-
Notifications
You must be signed in to change notification settings - Fork 4
/
getDataset.py
49 lines (44 loc) · 1.12 KB
/
getDataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import csv
import numpy as np
import pandas as pd
from sklearn.utils import shuffle
arra = []
with open('16-09-27.csv') as file:
reader = csv.reader(file, delimiter='\t')
a = 0
for row in reader:
if(a==0):
a+=1
continue
# if(a==10):
# break
arr = np.array(row)
if(arr[7]==""):
arr[7]=arr[9]
if(arr[8]==""):
arr[8]=arr[10]
arr = np.delete(arr,(9,10))
arra.append(arr)
# a += 1
# print(arr)
# print(ar)
file.close()
# arra = np.array(ar)
columns = ['frameNumber','timeRelative','frame.len','protocolNumber','protocolName','ipSrc','ipDst','srcPort','dstPort','ipDSCP','ethsrc','ethdst']
df = pd.DataFrame(data=arra,columns=columns)
# print(df)
# df.head(5)
features = ['frameNumber','timeRelative','frame.len','protocolNumber','protocolName','ipSrc','ipDst','srcPort','dstPort','ipDSCP']
# print(arra)
X = df[features]
# print(X)
# X.head()
y1 = df['ethsrc']
# print(y1)
# y1.head()
y2 = df['ethdst']
# print(y2)
# X, y1 = shuffle(X, y1, random_state=0)
# print(X, y1)
# print("here")
df.head(15)