-
Notifications
You must be signed in to change notification settings - Fork 96
/
plot_dataset_distribution.py
100 lines (74 loc) · 2.59 KB
/
plot_dataset_distribution.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import argparse
import sys
import matplotlib.pyplot as plt
import re
def main(argv):
parser = argparse.ArgumentParser()
parser.add_argument(
"log_file",
help = "Log file"
)
parser.add_argument(
"begin_iter", type=int,
help = "begin iteration of the selction"
)
parser.add_argument(
"end_iter",type=int,
help = "end of iteration"
)
parser.add_argument(
"width",type=int,
help = "length of interval"
)
args = parser.parse_args()
f = open(args.log_file)
lines = [line.rstrip("\n") for line in f.readlines()]
numbers = {'1','2','3','4','5','6','7','8','9'}
fig,ax = plt.subplots()
width = args.width
num_partitions = int((args.end_iter - args.begin_iter + 0.9)/width )
num_partitions+=1 # this is for dataset_ratio
partition=1
plt.subplot(num_partitions,2,partition)
dataset_ratio = {'caltech-pedestrian':14302,
'ETH':3429,
'INRIAPerson':900,
'MOT17Det':5316,
'tud-brussels-motionpairs':507,
'tud-crossing-sequence':200,
'voc_person':6095}
plt.bar(xrange(1,8),dataset_ratio.values(),color='g')
ax.set_title('Dataset ratio')
for begin in range(args.begin_iter,args.end_iter,width):
partition+=1
end = begin+width
filenames = []
dataset_names = []
dataset_counts = {'caltech-pedestrian':0,'ETH':0,'INRIAPerson':0,
'MOT17Det':0,'tud-brussels-motionpairs':0,
'tud-crossing-sequence':0,'voc_person':0}
iter = 0
for line in lines:
vars = line.split(' ')
if vars[0][-1:]==':' and vars[0][0] in numbers :
iter = int(vars[0][:-1])
if iter>end:
break
if iter>begin and iter<end:
if line.startswith('grp:'):
filename = line.split(' ')[1]
filenames.append(filename)
dataset_args = filename.split('\\')
#print dataset_args
dataset_names.append(dataset_args[3])
dataset_counts[dataset_args[3]]+=1
print 'len of the images = ',len(dataset_names)
print dataset_counts
plt.subplot(num_partitions,2,partition)
print dataset_counts.keys()
print dataset_counts.values()
plt.bar(xrange(1,8),dataset_counts.values())
print
plt.show()
if __name__ == "__main__":
main(sys.argv)