Internet Firewall Data Data Set
github link : https://github.com/amirshnll/Internet-Firewall
dataset link : http://archive.ics.uci.edu/ml/datasets/Internet+Firewall+Data
1 2 3 4 | # Author : Amir Shokri # github link : https://github.com/amirshnll/Internet-Firewall # dataset link : http://archive.ics.uci.edu/ml/datasets/Internet+Firewall+Data # email : amirsh.nll@gmail.com |
1 2 3 4 5 6 7 8 | import pandas as pd import seaborn as sns import matplotlib.pyplot as plt import numpy as np df = pd.read_csv('firewall.csv') df |
1 | df.info() |
<class ‘pandas.core.frame.DataFrame’>
RangeIndex: 65532 entries, 0 to 65531
Data columns (total 12 columns):
# Column Non-Null Count Dtype
— —— ————– —–
0 Source Port 65532 non-null int64
1 Destination Port 65532 non-null int64
2 NAT Source Port 65532 non-null int64
3 NAT Destination Port 65532 non-null int64
4 Action 65532 non-null object
5 Bytes 65532 non-null int64
6 Bytes Sent 65532 non-null int64
7 Bytes Received 65532 non-null int64
8 Packets 65532 non-null int64
9 Elapsed Time (sec) 65532 non-null int64
10 pkts_sent 65532 non-null int64
11 pkts_received 65532 non-null int64
dtypes: int64(11), object(1)
memory usage: 6.0+ MB
1 2 3 4 | y = df['Action'].values y = y.reshape(-1,1) x_data = df.drop(['Action'],axis = 1) print(x_data) |
1 2 | sns.countplot(x='Action',data=df,palette='hls') plt.show() |
1 2 3 4 | #normalize data x = (x_data - np.min(x_data)) / (np.max(x_data) / np.min(x_data)).values x.head() |
1 2 3 4 5 6 7 | #data train & data test from sklearn.model_selection import train_test_split x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.3,random_state= 300) print("x_train",x_train.shape) print("x_test",x_test.shape) print("y_train",y_train.shape) print("y_test",y_test.shape) |
x_train (45872, 11)
x_test (19660, 11)
y_train (45872, 1)
y_test (19660, 1)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 | #decision tree classifier from sklearn.tree import DecisionTreeClassifier from sklearn import metrics from sklearn import preprocessing from sklearn.metrics import accuracy_score dt = DecisionTreeClassifier() dt = dt.fit(x_train,y_train) y_forecast=dt.predict(x_test) from sklearn.metrics import classification_report print(classification_report(y_test, dt.predict(x_test))) print('accuracy:{:.4f}'.format(dt.score(x_test, y_test))) from sklearn import tree plt.figure(figsize=(30,30)) temp = tree.plot_tree(dt.fit(x,y), fontsize=10) plt.show() |
1 2 3 4 5 6 7 8 9 10 11 12 | precision recall f1-score support allow 0.99 0.96 0.98 11381 deny 0.75 0.98 0.85 4456 drop 0.88 0.67 0.76 3808 reset-both 0.83 0.33 0.48 15 accuracy 0.91 19660 macro avg 0.87 0.74 0.77 19660 weighted avg 0.92 0.91 0.91 19660 accuracy:0.9074 |
1 2 3 4 5 6 7 8 9 10 11 | #Nave Bayes Classifier from sklearn.naive_bayes import GaussianNB nb = GaussianNB() nb = nb.fit(x_train, y_train.ravel()) y_forecast=nb.predict(x_test) from sklearn.metrics import classification_report print(classification_report(y_test, nb.predict(x_test))) print('Nave Bayes Classifier{:.4f}'.format(nb.score(x_test, y_test))) |
1 2 3 4 5 6 7 8 9 10 11 12 | precision recall f1-score support allow 1.00 0.36 0.53 11381 deny 0.11 0.14 0.12 4456 drop 0.37 1.00 0.54 3808 reset-both 0.00 0.00 0.00 15 accuracy 0.43 19660 macro avg 0.37 0.37 0.30 19660 weighted avg 0.68 0.43 0.44 19660 Nave Bayes Classifier0.4311 |
1 2 3 4 5 6 7 8 9 10 11 12 | #Logistic Regreession Classifier from sklearn.linear_model import LogisticRegression lr = LogisticRegression(solver='lbfgs') lr = lr.fit(x_train, y_train.ravel()) y_forecast=lr.predict(x_test) from sklearn.metrics import classification_report print(classification_report(y_test, lr.predict(x_test))) print('accuracy:{:.4f}'.format(lr.score(x_test, y_test))) |
1 2 3 4 5 6 7 8 9 10 11 12 | precision recall f1-score support allow 0.58 1.00 0.73 11381 deny 0.00 0.00 0.00 4456 drop 0.00 0.00 0.00 3808 reset-both 0.00 0.00 0.00 15 accuracy 0.58 19660 macro avg 0.14 0.25 0.18 19660 weighted avg 0.34 0.58 0.42 19660 accuracy:0.5789 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 | #Knn Classifier from sklearn.neighbors import KNeighborsClassifier K = 1 knn = KNeighborsClassifier(n_neighbors=K) knn = knn.fit(x_train,y_train.ravel()) print("k = {}neighbors , knn test:{}".format(K, knn.score(x_test, y_test))) print("knn = {}neighbors , knn train:{}".format(K, knn.score(x_train, y_train))) ran = np.arange(1,40) train_list = [] test_list = [] for i,each in enumerate(ran): knn = KNeighborsClassifier(n_neighbors=each) knn = knn.fit(x_train, y_train.ravel()) test_list.append(knn.score(x_test, y_test)) train_list.append(knn.score(x_train, y_train)) print("best test {} , k={}".format(np.max(test_list),test_list.index(np.max(test_list))+1)) print("best train {} , k={}".format(np.max(train_list),train_list.index(np.max(train_list))+1)) |
k = 1neighbors , knn test:0.8978128179043744
knn = 1neighbors , knn train:0.8984129752354377
1 2 3 4 5 6 7 8 | #mlp classifier from sklearn.neural_network import MLPClassifier clfm = MLPClassifier(hidden_layer_sizes=(5,), max_iter=1000) clfm.fit(x_train, y_train.ravel()) y_predm = clfm.predict(x_test) print("ACCTURACY:", metrics.accurecy_score(y_test, y_predm)) print(classification_report(y_test, clfk.predict(x_test))) print("mlp:", clfk.score(x_test, y_test)) |
other:
- Vicon Physical Action Data Set
- Internet Firewall Data Data Set
- Lung Cancer Data Set
- Covertype Data Set
- Iris Data Set
- dermatology Data Set
- Guitar Chords finger positions Data Set
1 Comment
Add Yours →[…] Internet Firewall Data Data Set […]