-
Notifications
You must be signed in to change notification settings - Fork 0
/
fiologparser.py
executable file
·224 lines (194 loc) · 7.08 KB
/
fiologparser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
#!/usr/bin/env python3
# Note: this script is python2 and python 3 compatible.
#
# fiologparser.py
#
# This tool lets you parse multiple fio log files and look at interaval
# statistics even when samples are non-uniform. For instance:
#
# fiologparser.py -s *bw*
#
# to see per-interval sums for all bandwidth logs or:
#
# fiologparser.py -a *clat*
#
# to see per-interval average completion latency.
from __future__ import absolute_import
from __future__ import print_function
import argparse
import math
from functools import reduce
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument('-i', '--interval', required=False, type=int, default=1000, help='interval of time in seconds.')
parser.add_argument('-d', '--divisor', required=False, type=int, default=1, help='divide the results by this value.')
parser.add_argument('-f', '--full', dest='full', action='store_true', default=False, help='print full output.')
parser.add_argument('-A', '--all', dest='allstats', action='store_true', default=False,
help='print all stats for each interval.')
parser.add_argument('-a', '--average', dest='average', action='store_true', default=False, help='print the average for each interval.')
parser.add_argument('-s', '--sum', dest='sum', action='store_true', default=False, help='print the sum for each interval.')
parser.add_argument("FILE", help="collectl log output files to parse", nargs="+")
args = parser.parse_args()
return args
def get_ftime(series):
ftime = 0
for ts in series:
if ftime == 0 or ts.last.end < ftime:
ftime = ts.last.end
return ftime
def print_full(ctx, series):
ftime = get_ftime(series)
start = 0
end = ctx.interval
while (start < ftime):
end = ftime if ftime < end else end
results = [ts.get_value(start, end) for ts in series]
print("%s, %s" % (end, ', '.join(["%0.3f" % i for i in results])))
start += ctx.interval
end += ctx.interval
def print_sums(ctx, series):
ftime = get_ftime(series)
start = 0
end = ctx.interval
while (start < ftime):
end = ftime if ftime < end else end
results = [ts.get_value(start, end) for ts in series]
print("%s, %0.3f" % (end, sum(results)))
start += ctx.interval
end += ctx.interval
def print_averages(ctx, series):
ftime = get_ftime(series)
start = 0
end = ctx.interval
while (start < ftime):
end = ftime if ftime < end else end
results = [ts.get_value(start, end) for ts in series]
print("%s, %0.3f" % (end, float(sum(results))/len(results)))
start += ctx.interval
end += ctx.interval
# FIXME: this routine is computationally inefficient
# and has O(N^2) behavior
# it would be better to make one pass through samples
# to segment them into a series of time intervals, and
# then compute stats on each time interval instead.
# to debug this routine, use
# # sort -n -t ',' -k 2 small.log
# on your input.
def my_extend( vlist, val ):
vlist.extend(val)
return vlist
array_collapser = lambda vlist, val: my_extend(vlist, val)
def print_all_stats(ctx, series):
ftime = get_ftime(series)
start = 0
end = ctx.interval
print('start-time, samples, min, avg, median, 90%, 95%, 99%, max')
while (start < ftime): # for each time interval
end = ftime if ftime < end else end
sample_arrays = [ s.get_samples(start, end) for s in series ]
samplevalue_arrays = []
for sample_array in sample_arrays:
samplevalue_arrays.append(
[ sample.value for sample in sample_array ] )
# collapse list of lists of sample values into list of sample values
samplevalues = reduce( array_collapser, samplevalue_arrays, [] )
# compute all stats and print them
mymin = min(samplevalues)
myavg = sum(samplevalues) / float(len(samplevalues))
mymedian = median(samplevalues)
my90th = percentile(samplevalues, 0.90)
my95th = percentile(samplevalues, 0.95)
my99th = percentile(samplevalues, 0.99)
mymax = max(samplevalues)
print( '%f, %d, %f, %f, %f, %f, %f, %f, %f' % (
start, len(samplevalues),
mymin, myavg, mymedian, my90th, my95th, my99th, mymax))
# advance to next interval
start += ctx.interval
end += ctx.interval
def median(values):
s=sorted(values)
return float(s[(len(s)-1)/2]+s[(len(s)/2)])/2
def percentile(values, p):
s = sorted(values)
k = (len(s)-1) * p
f = math.floor(k)
c = math.ceil(k)
if f == c:
return s[int(k)]
return (s[int(f)] * (c-k)) + (s[int(c)] * (k-f))
def print_default(ctx, series):
ftime = get_ftime(series)
start = 0
end = ctx.interval
averages = []
weights = []
while (start < ftime):
end = ftime if ftime < end else end
results = [ts.get_value(start, end) for ts in series]
averages.append(sum(results))
weights.append(end-start)
start += ctx.interval
end += ctx.interval
total = 0
for i in range(0, len(averages)):
total += averages[i]*weights[i]
print('%0.3f' % (total/sum(weights)))
class TimeSeries(object):
def __init__(self, ctx, fn):
self.ctx = ctx
self.last = None
self.samples = []
self.read_data(fn)
def read_data(self, fn):
f = open(fn, 'r')
p_time = 0
for line in f:
(time, value) = line.rstrip('\r\n').rsplit(', ')[:2]
self.add_sample(p_time, int(time), int(value))
p_time = int(time)
def add_sample(self, start, end, value):
sample = Sample(ctx, start, end, value)
if not self.last or self.last.end < end:
self.last = sample
self.samples.append(sample)
def get_samples(self, start, end):
sample_list = []
for s in self.samples:
if s.start >= start and s.end <= end:
sample_list.append(s)
return sample_list
def get_value(self, start, end):
value = 0
for sample in self.samples:
value += sample.get_contribution(start, end)
return value
class Sample(object):
def __init__(self, ctx, start, end, value):
self.ctx = ctx
self.start = start
self.end = end
self.value = value
def get_contribution(self, start, end):
# short circuit if not within the bound
if (end < self.start or start > self.end):
return 0
sbound = self.start if start < self.start else start
ebound = self.end if end > self.end else end
ratio = float(ebound-sbound) / (end-start)
return self.value*ratio/ctx.divisor
if __name__ == '__main__':
ctx = parse_args()
series = []
for fn in ctx.FILE:
series.append(TimeSeries(ctx, fn))
if ctx.sum:
print_sums(ctx, series)
elif ctx.average:
print_averages(ctx, series)
elif ctx.full:
print_full(ctx, series)
elif ctx.allstats:
print_all_stats(ctx, series)
else:
print_default(ctx, series)