forked from Huydatnguyen/LSDMLab
-
Notifications
You must be signed in to change notification settings - Fork 0
/
question4-Pandas.py
43 lines (31 loc) · 1.63 KB
/
question4-Pandas.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import sys
import pandas as pd
import time
import glob
# Question 4 Pandas____________________________________________________________start
# start timer
start = time.time()
# getting csv files from the folder
task_path = ".\Task_events"
# refer to the path containing all the files in job/task events table with extension .csv
task_files = glob.glob(task_path + "\*.csv")
# read all the files in task events table with specific necessary columns
task_df = pd.concat((pd.read_csv(f, names=["timeStamp","missingInfo","jobID","taskIndex","machineID","eventType"
,"userName","schedulingClass","priority","cpuRequest","ramRequest","diskRequest","constraint"] , usecols=["eventType","schedulingClass"]) for f in task_files), axis = 0, ignore_index=True)
# new dataframe with filtering evicted tasks
task_evicted_df = task_df[task_df['eventType'] == 2]
# list contains scheduling class values with removing duplicates
list_scheduling_class = task_evicted_df['schedulingClass'].unique()
# iterate each element in list
for elem in list_scheduling_class:
# empty element is not valid
if elem != '':
# new dataframe with filtering tasks with corresponding scheduling class
scheduling_class_df = task_evicted_df[task_evicted_df['schedulingClass'] == elem]
# compute the result and show
print("Percentage of tasks having scheduling class: " , elem, "which were evicted is: " ,round(scheduling_class_df.count()/task_evicted_df.count()*100,2))
# end timer
end = time.time()
print("elapsed time: " , end-start)
# Question 4 Pandas______________________________________________________________end
input("Press Enter to continnnue...")