Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
ysngrmz authored Oct 23, 2022
1 parent 920ed54 commit 3a184d1
Show file tree
Hide file tree
Showing 4 changed files with 587 additions and 0 deletions.
152 changes: 152 additions & 0 deletions scripts/combine_files_with_respect_to_user.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
from datetime import datetime
import numpy as np
ldapFileList = ["2010-01","2010-02","2010-03","2010-04","2010-05","2010-06","2010-07","2010-08","2010-09","2010-10","2010-11","2010-12","2011-01","2011-02","2011-03","2011-04","2011-05"]
dataFileDir = "..\\datasets\\CERT\\r4.2\\"

def processLDAP():
oldUserIdList = []
for elem in ldapFileList:
fread = open(dataFileDir + "LDAP\\" + elem+".csv","r")
fread.readline()
userIdList = []
for row in fread:
row = row.replace("\n", "")
row = row.replace("\r", "")
rowsp = row.split(",")
userIdList.append(rowsp[1])
newUser = [userId for userId in userIdList if userId not in oldUserIdList]
leavingEmployees = [userId for userId in oldUserIdList if userId not in userIdList]
oldUserIdList = userIdList
#print(len(newUser),len(leavingEmployees),len(userIdList))
def extractUsers():
userList = []
fread = open(dataFileDir + "LDAP\\2010-01.csv","r")
fread.readline()
for row in fread:
row = row.replace("\n", "")
row = row.replace("\r", "")
rowsp = row.split(",")
userList.append(rowsp[1])
userList = list(set(userList))
return userList

def createEmptyUserIdDict():
userIdList = extractUsers()
userActionDictionary = {}
for userId in userIdList:
userActionDictionary[userId] = []
return userActionDictionary


def combine_action_squence_for_user():
userActionDictionary = createEmptyUserIdDict()
print("Dictionary Created.")
print("Device file is extracting...")
exract_action_sequence_device(userActionDictionary)
print("File file is extracting...")
exract_action_sequence_file(userActionDictionary)
print("Logon file is extracting...")
exract_action_sequence_logon(userActionDictionary)
print("Email file is extracting...")
exract_action_sequence_email(userActionDictionary)
print("Http file is extracting...")
exract_action_sequence_http(userActionDictionary)
print("Extraction completed")
return userActionDictionary

def exract_action_sequence_device(userActionDictionary):
deviceFile = open(dataFileDir + "device.csv","r")
deviceFile.readline()
for action in deviceFile:
action = action.replace("\n", "")
action = action.replace("\r", "")
actionsp = action.split(",")
actionId = actionsp[0]
date = actionsp[1]
date = datetime.strptime(date, '%m/%d/%Y %H:%M:%S')
userId = actionsp[2]
actionPc = actionsp[3]
actionType = actionsp[4]
userActionDictionary[userId].append([actionId,date,actionPc,actionType])

def exract_action_sequence_file(userActionDictionary):
fileFile = open(dataFileDir + "file.csv","r")
fileFile.readline()
for action in fileFile:
action = action.replace("\n", "")
action = action.replace("\r", "")
actionsp = action.split(",")
actionId = actionsp[0]
date = actionsp[1]
date = datetime.strptime(date, '%m/%d/%Y %H:%M:%S')
userId = actionsp[2]
actionPc = actionsp[3]
userActionDictionary[userId].append([actionId,date,actionPc,"file"])



def exract_action_sequence_logon(userActionDictionary):
logonFile = open(dataFileDir + "logon.csv","r")
logonFile.readline()
for action in logonFile:
action = action.replace("\n", "")
action = action.replace("\r", "")
actionsp = action.split(",")
actionId = actionsp[0]
date = actionsp[1]
date = datetime.strptime(date, '%m/%d/%Y %H:%M:%S')
userId = actionsp[2]
actionPc = actionsp[3]
actionType = actionsp[4]
userActionDictionary[userId].append([actionId,date,actionPc,actionType])

def exract_action_sequence_email(userActionDictionary):
emailFile = open(dataFileDir + "email.csv","r")
emailFile.readline()
for action in emailFile:
action = action.replace("\n", "")
action = action.replace("\r", "")
actionsp = action.split(",")
actionId = actionsp[0]
date = actionsp[1]
date = datetime.strptime(date, '%m/%d/%Y %H:%M:%S')
userId = actionsp[2]
actionPc = actionsp[3]
userActionDictionary[userId].append([actionId,date,actionPc,"email"])

def exract_action_sequence_http(userActionDictionary):
httpFile = open(dataFileDir + "http.csv","r")
httpFile.readline()
for action in httpFile:
action = action.replace("\n", "")
action = action.replace("\r", "")
actionsp = action.split(",")
actionId = actionsp[0]
date = actionsp[1]
date = datetime.strptime(date, '%m/%d/%Y %H:%M:%S')
userId = actionsp[2]
actionPc = actionsp[3]
userActionDictionary[userId].append([actionId,date,actionPc,"http"])

def sort_dictionary_by_user(userActionDictionary):
for elem in userActionDictionary.keys():
userActionDictionary[elem] = np.array(userActionDictionary[elem])
userActionDictionary[elem] = userActionDictionary[elem][np.argsort(userActionDictionary[elem][:,1])]
print("Dictionary sorted")

def create_user_action_squence_file(userActionDictionary):
for elem in userActionDictionary.keys():
print(elem)
userFile = open(dataFileDir + "combined_filed_with_respect_to_user\\" + elem + ".csv","w")
currentUserData = userActionDictionary[elem]
for i in range(currentUserData.shape[0]):
stringDate = currentUserData[i][1].strftime("%m/%d/%Y %H:%M:%S")
userFile.write(currentUserData[i][0] +","+ stringDate+"," +currentUserData[i][2] +","+ currentUserData[i][3] + "\n")
userFile.close()




#userActionDictionary = combine_action_squence_for_user()
#sort_dictionary_by_user(userActionDictionary)
#create_user_action_squence_file(userActionDictionary)
29 changes: 29 additions & 0 deletions scripts/divide_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@

import numpy as np

#Seq = 0-7, 18,19
#WoSeq = 0-17, 19

SeqInd = [0,1,2,3,4,5,6,7,18,19]
WoSeqInd = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,19]
extractedFileReadDir = "..\\datasets\\CERT\\r4.2\\featureExtractedDataset\\"

extractedFileRead = open(extractedFileReadDir +
"extractedDataset.csv","r")

extractedFileWriteWoSeq = open(extractedFileReadDir +
"extractedDatasetWoSeq.csv","w")

extractedFileWriteSeq = open(extractedFileReadDir +
"extractedDatasetSeq.csv","w")

for row in extractedFileRead:
row.replace("\n","")
rowsp = np.array(row.split(","))

extractedFileWriteWoSeq.write(",".join(rowsp[WoSeqInd]))
extractedFileWriteSeq.write(",".join(rowsp[SeqInd]))

extractedFileRead.close()
extractedFileWriteWoSeq.close()
extractedFileWriteSeq.close()
Loading

0 comments on commit 3a184d1

Please sign in to comment.