forked from shaka-project/shaka-player
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathstats.py
executable file
·771 lines (633 loc) · 22.8 KB
/
stats.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
#!/usr/bin/python
#
# Copyright 2015 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""A program for analyzing the Shaka compiled sources to find areas that can
be removed if not needed. This uses the source map
(i.e. shaka-player.compiled.debug.map) to find the compiled code
size, see:
https://github.com/mattrobenolt/python-sourcemap
http://www.html5rocks.com/en/tutorials/developertools/sourcemaps/
This script can output four different stats in two different formats:
- The size of functions and namespaces.
- The dependencies between types (in plain or DOT format).
- The dependencies between functions (in plain or DOT format).
- All tokens in the source map.
The dependencies can be outputted in DOT format which can be used with graph
programs to display a visual layout of the dependencies.
"""
import json
import math
import string
import sys
import os
def fromVlqSigned(value):
"""Converts a VLQ number to a normal signed number.
Arguments:
value - A number decoded from a VLQ string.
Returns:
an integer.
"""
negative = (value & 1) == 1
value >>= 1
return -value if negative else value
class Segment:
"""Defines an entry in the source map.
Members:
dstColOffset - The offset of the destination column from the previous
segment.
nameOffset - If not None, the offset of the name index from the previous
segment.
"""
def __init__(self, data):
self.dstColOffset = data[0]
self.nameOffset = data[4] if len(data) > 4 else None
def decodeSegment(segment):
"""Decodes VLQ values from the given segment.
Arguments:
segment - A string containing the encoded segment text.
Returns:
the parsed Segment.
"""
# A Base64 VLQ digit can represent 5 bits, so it is Base32.
VLQ_BASE_SHIFT = 5
VLQ_BASE = 1 << VLQ_BASE_SHIFT
# A mask of bits for a VLQ digit (11111), 31 decimal
VLQ_BASE_MASK = VLQ_BASE - 1
# The continuation bit is the 6th bit
VLQ_CONTINUATION_BIT = VLQ_BASE
# Don't use Base64 lib since it is not a real Base64 string; it simply
# decodes each character to a single Base64 number.
B64 = dict((c, i) for i, c in
enumerate('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
'0123456789+/'))
values = []
cur, shift = 0, 0
for c in segment:
digit = B64[c]
cont = (digit & VLQ_CONTINUATION_BIT) != 0
digit &= VLQ_BASE_MASK
cur += digit << shift
shift += VLQ_BASE_SHIFT
if not cont:
values.append(fromVlqSigned(cur))
cur, shift = 0, 0
# A valid VLQ string should not have dangling bits.
assert cur == 0
assert shift == 0
return Segment(values)
class Token:
"""A Token represents one JavaScript symbol. For example, this can be a
variable or an equals sign. If this is a variable or the keyword 'function'
it will usually have a name which indicates what it originally was defined.
But there are also tokens such as ; and ( which appear as tokens in the
map but to not have explicit name (see isFunction).
Members:
dstLine - Line index in compiled code
dstCol - Column index in compiled code
name - Name of the token; or None
"""
def __init__(self, dstLine, dstCol, name=None):
self.dstLine = dstLine
self.dstCol = dstCol
self.name = name
def __str__(self):
return str(self.name)
def decodeMappings(lineData, names):
"""Decodes a mappings line of text.
Arguments:
lineData - A string containing the mapping line.
names - An array of strings containing the names of the objects.
Returns:
a list of Tokens
"""
tokens = []
lines = lineData.split(';')
nameId = 0
for dstLine, line in enumerate(lines):
dstCol = 0
segments = line.split(',')
for segment in segments:
if not segment:
continue
segment = decodeSegment(segment)
dstCol += segment.dstColOffset
# segment.dstCol can be negative (more useful in names below); however
# after applying a negative offset, the result must still be positive.
assert dstCol >= 0
name = None
if segment.nameOffset != None:
nameId += segment.nameOffset
assert nameId >= 0
name = names[nameId]
tokens.append(Token(dstLine, dstCol, name))
return tokens
def isFunction(token, lines):
"""Determines if the given token is the start of a function.
All function definitions are assumed to have a name field and the token in
the compiled source is the keyword 'function'. Sometimes the function is
defined on the previous semicolon and sometimes that semicolon appears on
the previous line.
Arguments:
token - The Token to check.
lines - An array of compiled code lines.
Returns:
whether the token is a function.
"""
# All functions have a name.
if not token.name:
return False
# Sometimes a function token starts with the previous ;
# Also sometimes the token starts on the ; that is on the previous
# line.
partialLine = lines[token.dstLine][token.dstCol:]
if partialLine == ';\n':
if len(lines) == token.dstLine + 1:
return False
else:
return lines[token.dstLine + 1].startswith('function')
else:
return (partialLine.startswith('function') or
partialLine.startswith(';function'))
def readFunction(tokenIter, prev, prevIndex, lines, callback):
"""Reads a function from the token stream. The function token should
already be consumed.
Arguments:
tokenIter - An iterator of the tokens.
prev - The token containing the function definition.
prevIndex - The index of the previous token.
lines - An array of compiled code lines.
callback - A callback type used to create the data. See traverseTokens.
Returns:
an array of State objects in a format controlled by the callback (see
traverseTokens).
"""
brackets = 0
read = False
ret = []
partialLine = lines[prev.dstLine][prev.dstCol:]
state = callback(prev, prevIndex)
try:
while not read or brackets > 0:
index, token = next(tokenIter)
partialLine = lines[token.dstLine][token.dstCol:]
# Recursively read functions. Sometimes functions are defined nested.
# This doesn't happen that often, and never for Shaka methods, so it does
# not count it twice since the size of this method includes the nested
# function.
if isFunction(token, lines):
ret += readFunction(tokenIter, token, index, lines, callback)
else:
state.add(token, index)
if partialLine.startswith('{}'):
read = True
elif partialLine[0] == '{':
brackets += 1
read = True
elif partialLine[0] == '}':
brackets -= 1
# When we run out of tokens, simply ignore it. A parent call will not see
# this error; but it will continue and the next call to 'next' will fail
# with another StopIteration. This ensures that the last State object
# is included for invalid content.
except StopIteration:
pass
temp = state.build()
if temp:
ret.append(temp)
return ret
def traverseTokens(tokens, lines, callback):
"""Traverses a list of tokens to identify functions. Then uses a callback
to perform some work on the functions. Each function seen gets a new State
object created from the given callback method; there is a single State for
global code which is given None in the constructor. Then, each token seen
is passed to the 'add' method of the State. This is used by the State to
either calculate sizes, print tokens, or detect dependencies. The 'build'
method is called at the end of the function to create a result object that
is returned as an array at the end.
Arguments:
tokens - An array of Tokens.
lines - An array of compiled code lines.
callback - A constructor that returns a state object. It takes a start
token or None if outside a function. It has two member
functions:
add - accepts the current token and the token's index.
build - returns an object to be added to the results.
Returns:
an array of State objects in a format controlled by the callback.
"""
ret = []
state = callback(None, None)
# Create a token iterator. This is used to read tokens from the array. We
# cannot use a for loop because the iterator is passed to readFunction.
tokenIter = enumerate(tokens)
try:
while True:
index, token = next(tokenIter)
if isFunction(token, lines):
ret += readFunction(tokenIter, token, index, lines, callback)
else:
state.add(token, index)
except StopIteration:
pass
temp = state.build()
if temp:
ret.append(temp)
return ret
class FunctionSize:
def __init__(self, name, size):
self.name = name
self.size = size
def printTokens(tokens, lines, funcs):
"""Prints the given tokens.
Arguments:
tokens - An array of Tokens.
lines - An array of compiled code lines.
funcs - An array of FunctionSize.
"""
class State:
def __init__(self, token, index):
# The start of a function, or the global start.
self.name = token.name if token else None
if token:
self._printToken('>', token, index)
def _printToken(self, prefix, token, index):
partialLine = lines[token.dstLine][token.dstCol:]
if len(tokens) > index + 1:
next_ = tokens[index + 1]
if next_.dstLine == token.dstLine:
partialLine = lines[token.dstLine][token.dstCol:next_.dstCol]
tokenText = partialLine[:10].replace('\n', '').rjust(12)
print '%s %4d %4d %12s %s' % (prefix, token.dstLine, token.dstCol,
tokenText, token.name)
def add(self, token, index):
prefix = None
if not self.name:
prefix = '!'
elif lines[token.dstLine][token.dstCol:token.dstCol+2] == '{}':
prefix = ' '
elif lines[token.dstLine][token.dstCol] == '{':
prefix = '+'
elif lines[token.dstLine][token.dstCol] == '}':
prefix = '-'
else:
prefix = ' '
self._printToken(prefix, token, index)
def build(self):
if not self.name:
return
# The end of a function. Print the size of this function.
size = 0
thisFunc = filter(lambda key:key.name == self.name, funcs)
if len(thisFunc) > 0:
size = thisFunc[0].size
print 'X', self.name, size
traverseTokens(tokens, lines, State)
class FunctionDependencies:
def __init__(self, name, deps):
self.name = name
self.deps = deps
def processDeps(tokens, lines, isClass):
"""Processes the tokens into function or class dependencies.
Arguments:
tokens - An array of Tokens.
lines - An array of compiled code lines.
isClass - Whether to create a class graph instead of a function graph.
Returns:
an array of FunctionDependencies.
"""
class State:
def __init__(self, token, index):
self.deps = []
self.name, self.parts = self._createParts(token)
def _createParts(self, token):
if not token or not token.name:
return (None, None)
parts = token.name.split('.')
name = token.name
# Instance methods are the same as static methods.
if len(parts) > 2 and parts[-2] == 'prototype':
del parts[-2]
# Strip function names if class graph; also remove it from the name.
if isClass:
if parts[-1][0] in string.lowercase:
del parts[-1]
name = '.'.join(parts)
return (name, parts)
def add(self, token, index):
# Ignore symbols outside a function. Only care about function
# references and only those that reference our code.
if not self.name or not token.name or not token.name.startswith('shaka.'):
return
name, otherParts = self._createParts(token)
# Get the index of the first different namespace.
count = min(len(self.parts), len(otherParts))
i = 0
while i < count and self.parts[i] == otherParts[i]:
i += 1
# Ignore use of members of the same object:
# OfflineVideoSource.configure and OfflineVideoSource.store
if (i == count - 1 or i == count) and len(self.parts) == len(otherParts):
return
# Ignore use of the constructor of the same type:
# OfflineVideoSource and OfflineVideoSource.store
if i == count and abs(len(self.parts) - len(otherParts)) == 1:
return
# Add the dependency.
if not (name in self.deps):
self.deps.append(name)
def build(self):
return FunctionDependencies(self.name, self.deps) if self.name else None
ret = traverseTokens(tokens, lines, State)
assert len(ret) > 0
ret = sorted(ret, key=lambda key:key.name)
# We need to collapse duplicates.
i = 0
while i + 1 < len(ret):
if ret[i].name == ret[i + 1].name:
for dep in ret[i + 1].deps:
if not dep in ret[i].deps:
ret[i].deps.append(dep)
del ret[i + 1]
else:
i += 1
return ret
def processSizes(tokens, lines):
"""Processes an array of tokens into function lengths.
Arguments:
tokens - An array of Tokens.
lines - An array of compiled code lines.
Returns:
an array of FunctionSizes sorted on name.
"""
class State:
def __init__(self, token, index):
self.name = token.name if token else None
self.size = 0
self.start = token.dstCol if token else None
self.line = token.dstLine if token else None
def add(self, token, index):
# Ignore outside a function
if not self.name:
return
# If we skipped to the next line, include the code to the end of the line.
# If we skipped multiple lines, include the whole line. This will most
# likely never happen since the compiled code usually has new lines on
# function boundaries.
assert token.dstLine >= self.line
while token.dstLine != self.line:
self.size += len(lines[self.line]) - self.start
self.line += 1
self.start = 0
# Keep increasing the size. We can't simply keep the start and measure
# at the end since we are not given the end token in build().
self.size += token.dstCol - self.start
self.start = token.dstCol
def build(self):
return FunctionSize(self.name, self.size) if self.name else None
ret = traverseTokens(tokens, lines, State)
assert len(ret) > 0
ret = filter(lambda key:key.name and
(key.name.startswith('shaka.') or key.name.startswith('goog.')), ret)
ret = sorted(ret, key=lambda key:key.name)
# We need to collapse duplicates.
i = 0
while i + 1 < len(ret):
if ret[i].name == ret[i + 1].name:
ret[i].size += ret[i + 1].size
del ret[i + 1]
else:
i += 1
return ret
def printTree(results, indent, callback, endCallback):
"""Prints the results in an indented format.
Arguments:
results - An array of FunctionSizes sorted on name.
indent - A number to indent.
callback - A callback function to print the data. Accepts a title, an
indentation, and a sublist of the items in that group.
endCallback - A callback function called after a group; can be None.
"""
# This is used both when printing sizes and when printing dependencies in
# DOT format. This recursively creates groups of items with the same prefix.
# e.g.
# shaka
# shaka.util
# shaka.util.FailoverUri
# shaka.util.TypedBind
# shaka.player
# ...
if len(results) <= 1:
callback(None, indent, results)
return
# We want to group-by prefixes for the elements. Since it is sorted, we
# can find the overall prefix length.
first = results[0].name.split('.')
last = results[-1].name.split('.')
prefix = 0
while (prefix < len(first) and prefix < len(last)
and first[prefix] == last[prefix]):
prefix += 1
group = 0
groupItems = first
if prefix == len(first):
# This happens when the group has a first element of a class name and the
# remaining are member functions. Remove the first element from this
# group.
groupItems = results[1].name.split('.')
group = 1
# Start with second element, and go one more so we make sure to process the
# last group.
for i in range(1, len(results) + 1):
items = (results[i].name.split('.') if i != len(results) else
[''] * (prefix + 1))
if items[prefix] != groupItems[prefix]:
title = '.'.join(groupItems[:(prefix + 1)])
callback(title, indent, results[group:i])
printTree(results[group:i], indent + 1, callback, endCallback)
# Set the start of the next group to the current element.
group = i
groupItems = items
if endCallback:
endCallback(indent)
def printSizes(sizes):
"""Prints the sizes in an indented format.
Arguments:
sizes - An array of FunctionSizes sorted on name.
"""
# This callback is used to print the total sizes of each of the sub-groups.
# Using the indent as padding allows to print a tree-like structure to
# show how big each section is.
def callbackFactory(padding):
# Use a factory so we capture the padding.
def callback(title, indent, results):
if title:
size = sum(map(lambda key:key.size, results))
print '%s %*d %s' % (indent * ' ', padding, size, title)
return callback
total = sum(map(lambda key:key.size, sizes))
padding = int(math.ceil(math.log10(total)))
print '%*d %s' % (padding, total, 'TOTAL')
printTree(sizes, 0, callbackFactory(padding), None)
def printDeps(results, inDot):
"""Prints the dependencies.
Arguments:
results - A sorted array of FunctionDependencies.
inDot - Whether to print in DOT format.
"""
if not inDot:
for func in results:
name, deps = func.name, func.deps
# Ignore items with no dependencies.
if len(deps) > 0:
print name
for dep in deps:
print ' ', dep
return
depMap = dict()
# Use the printTree to produce clusters for each namespace and type. This
# will print boxes around each class and show dependencies between types.
print 'digraph {'
def callbackFactory(depMap, temp):
def callback(title, indent, results):
if title:
if len(results) > 1:
print '\t' * indent, 'subgraph', 'cluster' + str(len(temp)), '{'
temp.append(1)
else:
print '\t' * indent, len(depMap), '[', \
'label="' + results[0].name + '"', ']', ';'
depMap[results[0].name] = len(depMap)
return callback
def endCallback(indent):
if indent > 1:
print '\t' * (indent - 1), '}'
printTree(results, 1, callbackFactory(depMap, []), endCallback)
for func in results:
name, deps = func.name, func.deps
# Ignore items with no dependencies.
if len(deps) > 0:
if not name in depMap:
depMap[name] = len(depMap)
print '\t', depMap[name], '[', 'label="' + name + '"', ']', ';'
for dep in deps:
if not dep in depMap:
depMap[dep] = len(depMap)
print '\t', depMap[dep], '[', 'label="' + dep + '"', ']', ';'
print '\t', depMap[name], '->', depMap[dep], ';'
print '}'
class Options:
def __init__(self):
self.printDeps = False
self.printSizes = False
self.printTokens = False
self.inDot = False
self.isClass = False
def process(text, options):
"""Decodes a JSON string containing source map data.
Arguments:
text - A JSON string containing source map data.
options - An object containing the command-line options.
"""
# The spec allows a map file to start with )]} to prevent javascript from
# including it.
if text.startswith(')]}\'\n') or text.startswith(')]}\n'):
_, text = text.split('\n', 1)
# Decode the JSON data and get the parts we need.
data = json.loads(text)
fileLines = open(data['file']).readlines()
names = data['names']
mappings = data['mappings']
tokens = decodeMappings(mappings, names)
sizes = processSizes(tokens, fileLines)
# Print out one of the results.
if options.printTokens:
printTokens(tokens, fileLines, sizes)
elif options.printSizes:
printSizes(sizes)
elif options.printDeps or options.isClass:
temp = processDeps(tokens, fileLines, options.isClass)
printDeps(temp, options.inDot)
def printHelp():
"""Prints the help docs.
"""
print 'Usage:', sys.argv[0], """[options] [--] [source_map]
Types(must include exactly one):
-c --class-deps : Prints the class dependencies
-f --function-deps : Prints the function dependencies
-s --function-sizes : Prints the function sizes (in number of characters)
-t --all-tokens : Prints all tokens in the source map
Options:
-d --dot-format : Prints in DOT format; only valid with \
--function-deps or --class-dep
-h --help : Prints this help page
Token Format:
prefix line col token name => Token
X functionName size => end function
Prefixes:
> - start a function
! - not in a function
- - end curly brace
+ - start curly brace
- other token
DOT Format:
This can print the dependency graph in DOT format. This can be used with
graph programs to display a visual graph of dependencies. For example
using graphviz:
""", sys.argv[0], """-c -d | fdp -Goverlap=prism | neato -n2 -Tsvg > out.svg"""
def main(args):
options = Options()
doneArgs = False
name = 'shaka-player.compiled.debug.map'
# Process the command-line arguments.
for arg in args:
if doneArgs or arg[0] != '-':
name = arg
elif arg == '-f' or arg == '--function-deps':
options.printDeps = True
elif arg == '-t' or arg == '--all-tokens':
options.printTokens = True
elif arg == '-s' or arg == '--function-sizes':
options.printSizes = True
elif arg == '-c' or arg == '--class-deps':
options.isClass = True
elif arg == '-d' or arg == '--dot-format':
options.inDot = True
elif arg == '--':
doneArgs = True
elif arg == '-h' or arg == '--help':
printHelp()
return 0
else:
print >> sys.stderr, 'Unrecognized argument:', arg
printHelp()
return 1
# Verify arguments are correct.
if (options.printSizes + options.printDeps + options.printTokens +
options.isClass) != 1:
print >> sys.stderr, 'Must include exactly one output type.'
printHelp()
return 1
elif options.inDot and not options.printDeps and not options.isClass:
print >> sys.stderr, '--dot-format only valid with --function-deps or \
--class-deps.'
return 1
elif not os.path.isfile(name):
print >> sys.stderr, name, 'not found; build Shaka first.'
return 1
else:
process(open(name).read(), options)
return 0
if __name__ == '__main__':
sys.exit(main(sys.argv[1:]))