forked from sanyaade-mobiledev/chromium.src
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add the core classification infrastructure to memory_inspector.
This CL introduce the core classes for handling classification of data in a hierarchical fashion. It introduces two concepts: - A rule tree: hierarchy of rules defined by the end-user. - A result tree: the corresponding tree which aggregates the memory counters. BUG=340294 NOTRY=true Review URL: https://codereview.chromium.org/183173003 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@255352 0039d316-1c4b-4281-b951-d872f2087c98
- Loading branch information
primiano@chromium.org
committed
Mar 6, 2014
1 parent
b99c133
commit 7a9570e
Showing
5 changed files
with
394 additions
and
0 deletions.
There are no files selected for viewing
Empty file.
115 changes: 115 additions & 0 deletions
115
tools/memory_inspector/memory_inspector/classification/results.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
# Copyright 2014 The Chromium Authors. All rights reserved. | ||
# Use of this source code is governed by a BSD-style license that can be | ||
# found in the LICENSE file. | ||
|
||
"""This module owns the logic for classifying and aggregating data in buckets. | ||
This complements the structure defined in the rules module. Symmetrically, the | ||
aggregated results are organized in a bucket tree, which structure is identical | ||
to the one of the corresponding rule tree. | ||
The logic for aggregation is the following: | ||
- The client loads a "rule tree" defined by the end-user (e.g., in a file) which | ||
defines the final "shape" of the results. | ||
- The rules define how to "match" a trace_record (e.g., a mmap line or a native | ||
allocation) given some of its properties (e.g. the mapped file or the prot. | ||
flags). | ||
- The concrete classifier (which will use this module) knows how to count the | ||
values for each trace_record (e.g. [Dirty KB, Clean KB, RSS KB] for mmaps). | ||
Hence it decides the cardinality of the result nodes. | ||
- The responsibility of this module is simply doing the math. | ||
In the very essence this module adds up the counters of each node whereas the | ||
trace_record being pushed in the tree (through the AddToMatchingNodes method) | ||
matches a rule. | ||
It just does this math in a hierarchical fashion following the shape the tree. | ||
A typical result tree looks like this (each node has two values in the example): | ||
+----------------------+ | ||
| Total | | ||
|----------------------| | ||
+------------------+ (100, 1000) +--------------------+ | ||
| +----------+-----------+ | | ||
| | | | ||
+-----v-----+ +-----v-----+ +------v----+ | ||
| Foo | | Bar | |Total-other| | ||
|-----------| |-----------| |-----------| | ||
| (15, 100) | +---+ (80, 200) +-----+ | (5, 700) | | ||
+-----------+ | +-----------+ | +-----------+ | ||
| | | ||
+------v------+ +------v-----+ | ||
| Bar::Coffee | | Bar-other | | ||
|-------------| |------------| | ||
| (30, 120) | | (50, 80) | | ||
+-------------+ +------------+ | ||
""" | ||
|
||
from memory_inspector.classification import rules | ||
|
||
|
||
class AggreatedResults(object): | ||
"""A tree of results, where each node is a bucket (root: 'Total' bucket).""" | ||
|
||
def __init__(self, rule_tree, keys): | ||
"""Initializes the bucket tree using the structure of the rules tree. | ||
Each node of the bucket tree is initialized with a list of len(keys) zeros. | ||
""" | ||
assert(isinstance(rule_tree, rules.Rule)) | ||
assert(isinstance(keys, list)) | ||
self.keys = keys | ||
self.total = AggreatedResults._MakeBucketNodeFromRule(rule_tree, len(keys)) | ||
|
||
def AddToMatchingNodes(self, trace_record, values): | ||
"""Adds the provided |values| to the nodes that match the |trace_record|. | ||
Tree traversal logic: at any level, one and only one node will match the | ||
|trace_record| (in the worst case it will be the catchall *-other rule). | ||
When a node is matched, the traversal continues in its children and no | ||
further siblings in the upper levels are visited anymore. | ||
This is to guarantee that at any level the values of one node are equal to | ||
the sum of the values of all its children. | ||
Args: | ||
trace_record: any kind of object which can be matched by the Match method | ||
of the Rule object. | ||
values: a list of int(s) which represent the value associated to the | ||
matched trace_record. The cardinality of the list must be equal to the | ||
cardinality of the initial keys. | ||
""" | ||
assert(len(values) == len(self.keys)) | ||
AggreatedResults._AddToMatchingNodes( | ||
trace_record, values, self.total, len(self.keys)) | ||
|
||
@staticmethod | ||
def _AddToMatchingNodes(trace_record, values, bucket, num_keys): | ||
if not bucket.rule.Match(trace_record): | ||
return False | ||
for i in xrange(num_keys): | ||
bucket.values[i] += values[i] | ||
for child_bucket in bucket.children: | ||
if AggreatedResults._AddToMatchingNodes( | ||
trace_record, values, child_bucket, num_keys): | ||
break | ||
return True | ||
|
||
@staticmethod | ||
def _MakeBucketNodeFromRule(rule, num_keys): | ||
assert(isinstance(rule, rules.Rule)) | ||
bucket = Bucket(rule, num_keys) | ||
for child_rule in rule.children: | ||
bucket.children.append( | ||
AggreatedResults._MakeBucketNodeFromRule(child_rule, num_keys)) | ||
return bucket | ||
|
||
|
||
class Bucket(object): | ||
"""A bucket is a node in the results tree. """ | ||
def __init__(self, rule, num_keys): | ||
self.rule = rule | ||
self.values = [0] * num_keys | ||
self.children = [] | ||
|
||
|
||
@property | ||
def name(self): | ||
return self.rule.name |
63 changes: 63 additions & 0 deletions
63
tools/memory_inspector/memory_inspector/classification/results_unittest.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
# Copyright 2014 The Chromium Authors. All rights reserved. | ||
# Use of this source code is governed by a BSD-style license that can be | ||
# found in the LICENSE file. | ||
|
||
import re | ||
import unittest | ||
|
||
from memory_inspector.classification import results | ||
from memory_inspector.classification import rules | ||
|
||
|
||
class ResultsTest(unittest.TestCase): | ||
def runTest(self): | ||
rules_dict = [ | ||
{ | ||
'name': 'a*', | ||
'regex': '^a.*', | ||
'children': [ | ||
{ | ||
'name': 'az*', | ||
'regex': '^az.*' | ||
} | ||
] | ||
}, | ||
{ | ||
'name': 'b*', | ||
'regex': '^b.*', | ||
}, | ||
] | ||
|
||
rule = rules.Load(str(rules_dict), MockRegexMatchingRule) | ||
result = results.AggreatedResults(rule, keys=['X', 'Y']) | ||
self.assertEqual(result.total.name, 'Total') | ||
self.assertEqual(len(result.total.children), 3) | ||
self.assertEqual(result.total.children[0].name, 'a*') | ||
self.assertEqual(result.total.children[1].name, 'b*') | ||
self.assertEqual(result.total.children[2].name, 'Total-other') | ||
self.assertEqual(result.total.children[0].children[0].name, 'az*') | ||
self.assertEqual(result.total.children[0].children[1].name, 'a*-other') | ||
|
||
result.AddToMatchingNodes('aa1', [1, 2]) # -> a* | ||
result.AddToMatchingNodes('aa2', [3, 4]) # -> a* | ||
result.AddToMatchingNodes('az', [5, 6]) # -> a*/az* | ||
result.AddToMatchingNodes('z1', [7, 8]) # -> T-other | ||
result.AddToMatchingNodes('b1', [9, 10]) # -> b* | ||
result.AddToMatchingNodes('b2', [11, 12]) # -> b* | ||
result.AddToMatchingNodes('z2', [13, 14]) # -> T-other | ||
|
||
self.assertEqual(result.total.values, [49, 56]) | ||
self.assertEqual(result.total.children[0].values, [9, 12]) | ||
self.assertEqual(result.total.children[1].values, [20, 22]) | ||
self.assertEqual(result.total.children[0].children[0].values, [5, 6]) | ||
self.assertEqual(result.total.children[0].children[1].values, [4, 6]) | ||
self.assertEqual(result.total.children[2].values, [20, 22]) | ||
|
||
|
||
class MockRegexMatchingRule(rules.Rule): | ||
def __init__(self, name, filters): | ||
super(MockRegexMatchingRule, self).__init__(name) | ||
self._regex = filters['regex'] | ||
|
||
def Match(self, s): | ||
return bool(re.match(self._regex, s)) |
119 changes: 119 additions & 0 deletions
119
tools/memory_inspector/memory_inspector/classification/rules.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,119 @@ | ||
# Copyright 2014 The Chromium Authors. All rights reserved. | ||
# Use of this source code is governed by a BSD-style license that can be | ||
# found in the LICENSE file. | ||
|
||
"""This module defines the core structure of the classification rules. | ||
This module does NOT specify how the rules filter the data: this responsibility | ||
is of to the concrete classifiers, which have to override the Rule class herein | ||
defined and know how to do the math. | ||
This module, instead, defines the format of the rules and the way they are | ||
encoded and loaded (in a python-style dictionary file). | ||
Rules are organized in a tree, where the root is always represented by a 'Total' | ||
node, and the leaves are arbitrarily defined by the user, according to the | ||
following principles: | ||
- Order of siblings rules matter: what is caught by a rule will not be caught | ||
by the next ones, but it is propagated to its children rules if any. | ||
- Every non-leaf node X gets an implicit extra-children named X-other. This | ||
catch-all child catches everything (within the parent rule scope) that is | ||
not caught by the other siblings. This is to guarantee that, when doing the | ||
math (the aggregation), at any level, the sum of the values in the leaves | ||
match the value of their parent. | ||
The format of a rule dictionary is the following: | ||
[ | ||
{ | ||
'name': 'Name of the rule', | ||
'filter-X': 'The embedder will know how to interpret this value and will use | ||
it to filter the data' | ||
'filter-Y': 'Idem' | ||
children: [ | ||
{ | ||
'name': 'Name of the sub-rule 1' | ||
... and so on recursively , | ||
}, | ||
] | ||
}, | ||
] | ||
And a typical resulting rule tree looks like this: | ||
+----------------------+ | ||
| Total | | ||
|----------------------| | ||
+------------------+ Match all. +--------------------+ | ||
| +----------+-----------+ | | ||
| | | | ||
+-----v-----+ +-----v-----+ +------v----+ | ||
| Foo | | Bar | |Total-other| | ||
|-----------| |-----------| |-----------| | ||
|File: foo* | +---+File: bar* +-----+ | Match all | | ||
+-----------+ | +-----------+ | +-----------+ | ||
| | | ||
+------v------+ +------v----+ | ||
| Bar::Coffee | | Bar-other | | ||
|-------------| |-----------| | ||
|File: bar*cof| | Match all | | ||
+-------------+ +-----------+ | ||
""" | ||
|
||
import ast | ||
|
||
|
||
def Load(content, rule_builder): | ||
"""Construct a rule tree from a python-style dict representation. | ||
Args: | ||
content: a string containing the dict (i.e. content of the rule file). | ||
rule_builder: a method which takes two arguments (rule_name, filters_dict) | ||
and returns a subclass of |Rule|. |filters_dict| is a dict of the keys | ||
(filter-foo, filter-bar in the example above) for the rule node. | ||
""" | ||
rules_dict = ast.literal_eval(content) | ||
root = Rule('Total') | ||
_MakeRuleNodeFromDictNode(root, rules_dict, rule_builder) | ||
return root | ||
|
||
|
||
class Rule(object): | ||
""" An abstract class representing a rule node in the rules tree. | ||
Embedders must override the Match method when deriving this class. | ||
""" | ||
|
||
def __init__(self, name): | ||
self.name = name | ||
self.children = [] | ||
|
||
def Match(self, _): # pylint: disable=R0201 | ||
""" The rationale of this default implementation is modeling the root | ||
('Total') and the catch-all (*-other) rules that every |RuleTree| must have, | ||
regardless of the embedder-specific children rules. This is to guarantee | ||
that the totals match at any level of the tree. | ||
""" | ||
return True | ||
|
||
def AppendChild(self, child_rule): | ||
assert(isinstance(child_rule, Rule)) | ||
duplicates = filter(lambda x: x.name == child_rule.name, self.children) | ||
assert(not duplicates), 'Duplicate rule ' + child_rule.name | ||
self.children.append(child_rule) | ||
|
||
|
||
def _MakeRuleNodeFromDictNode(rule_node, dict_nodes, rule_builder): | ||
"""Recursive rule tree builder for traversing the rule dict.""" | ||
for dict_node in dict_nodes: | ||
assert('name' in dict_node) | ||
# Extract the filter keys (e.g., mmap-file, mmap-prot) that will be passed | ||
# to the |rule_builder| | ||
filter_keys = set(dict_node.keys()) - set(('name', 'children')) | ||
filters = dict((k, dict_node[k]) for k in filter_keys) | ||
child_rule = rule_builder(dict_node['name'], filters) | ||
rule_node.AppendChild(child_rule) | ||
dict_children = dict_node.get('children', {}) | ||
_MakeRuleNodeFromDictNode(child_rule, dict_children, rule_builder) | ||
|
||
# If the rule_node isn't a leaf, add the 'name-other' catch-all sibling to | ||
# catch all the entries that matched this node but none of its children. | ||
if len(rule_node.children): | ||
rule_node.AppendChild(Rule(rule_node.name + '-other')) |
Oops, something went wrong.