Add the core classification infrastructure to memory_inspector.

This CL introduce the core classes for handling classification of data in a hierarchical fashion. It introduces two concepts: - A rule tree: hierarchy of rules defined by the end-user. - A result tree: the corresponding tree which aggregates the memory counters. BUG=340294 NOTRY=true Review URL: https://codereview.chromium.org/183173003 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@255352 0039d316-1c4b-4281-b951-d872f2087c98
280723148 · Mar 6, 2014 · 7a9570e · 7a9570e
1 parent b99c133
commit 7a9570e
Show file tree

Hide file tree

Showing 5 changed files with 394 additions and 0 deletions.
diff --git a/tools/memory_inspector/memory_inspector/classification/__init__.py b/tools/memory_inspector/memory_inspector/classification/__init__.py
diff --git a/tools/memory_inspector/memory_inspector/classification/results.py b/tools/memory_inspector/memory_inspector/classification/results.py
@@ -0,0 +1,115 @@
+# Copyright 2014 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""This module owns the logic for classifying and aggregating data in buckets.
+
+This complements the structure defined in the rules module. Symmetrically, the
+aggregated results are organized in a bucket tree, which structure is identical
+to the one of the corresponding rule tree.
+The logic for aggregation is the following:
+- The client loads a "rule tree" defined by the end-user (e.g., in a file) which
+  defines the final "shape" of the results.
+- The rules define how to "match" a trace_record (e.g., a mmap line or a native
+  allocation) given some of its properties (e.g. the mapped file or the prot.
+  flags).
+- The concrete classifier (which will use this module) knows how to count the
+  values for each trace_record (e.g. [Dirty KB, Clean KB, RSS KB] for mmaps).
+  Hence it decides the cardinality of the result nodes.
+- The responsibility of this module is simply doing the math.
+
+In the very essence this module adds up the counters of each node whereas the
+trace_record being pushed in the tree (through the AddToMatchingNodes method)
+matches a rule.
+It just does this math in a hierarchical fashion following the shape the tree.
+
+A typical result tree looks like this (each node has two values in the example):
+                          +----------------------+
+                          |        Total         |
+                          |----------------------|
+       +------------------+     (100, 1000)      +--------------------+
+       |                  +----------+-----------+                    |
+       |                             |                                |
+ +-----v-----+                 +-----v-----+                   +------v----+
+ |    Foo    |                 |    Bar    |                   |Total-other|
+ |-----------|                 |-----------|                   |-----------|
+ | (15, 100) |             +---+ (80, 200) +-----+             | (5, 700)  |
+ +-----------+             |   +-----------+     |             +-----------+
+                           |                     |
+                    +------v------+       +------v-----+
+                    | Bar::Coffee |       | Bar-other  |
+                    |-------------|       |------------|
+                    |  (30, 120)  |       |  (50, 80)  |
+                    +-------------+       +------------+
+"""
+
+from memory_inspector.classification import rules
+
+
+class AggreatedResults(object):
+  """A tree of results, where each node is a bucket (root: 'Total' bucket)."""
+
+  def __init__(self, rule_tree, keys):
+    """Initializes the bucket tree using the structure of the rules tree.
+
+    Each node of the bucket tree is initialized with a list of len(keys) zeros.
+    """
+    assert(isinstance(rule_tree, rules.Rule))
+    assert(isinstance(keys, list))
+    self.keys = keys
+    self.total = AggreatedResults._MakeBucketNodeFromRule(rule_tree, len(keys))
+
+  def AddToMatchingNodes(self, trace_record, values):
+    """Adds the provided |values| to the nodes that match the |trace_record|.
+
+    Tree traversal logic: at any level, one and only one node will match the
+    |trace_record| (in the worst case it will be the catchall *-other rule).
+    When a node is matched, the traversal continues in its children and no
+    further siblings in the upper levels are visited anymore.
+    This is to guarantee that at any level the values of one node are equal to
+    the sum of the values of all its children.
+
+    Args:
+      trace_record: any kind of object which can be matched by the Match method
+          of the Rule object.
+      values: a list of int(s) which represent the value associated to the
+          matched trace_record. The cardinality of the list must be equal to the
+          cardinality of the initial keys.
+    """
+    assert(len(values) == len(self.keys))
+    AggreatedResults._AddToMatchingNodes(
+        trace_record, values, self.total, len(self.keys))
+
+  @staticmethod
+  def _AddToMatchingNodes(trace_record, values, bucket, num_keys):
+    if not bucket.rule.Match(trace_record):
+      return False
+    for i in xrange(num_keys):
+      bucket.values[i] += values[i]
+    for child_bucket in bucket.children:
+      if AggreatedResults._AddToMatchingNodes(
+          trace_record, values, child_bucket, num_keys):
+        break
+    return True
+
+  @staticmethod
+  def _MakeBucketNodeFromRule(rule, num_keys):
+    assert(isinstance(rule, rules.Rule))
+    bucket = Bucket(rule, num_keys)
+    for child_rule in rule.children:
+      bucket.children.append(
+          AggreatedResults._MakeBucketNodeFromRule(child_rule, num_keys))
+    return bucket
+
+
+class Bucket(object):
+  """A bucket is a node in the results tree. """
+  def __init__(self, rule, num_keys):
+    self.rule = rule
+    self.values = [0] * num_keys
+    self.children = []
+
+
+  @property
+  def name(self):
+    return self.rule.name
diff --git a/tools/memory_inspector/memory_inspector/classification/results_unittest.py b/tools/memory_inspector/memory_inspector/classification/results_unittest.py
@@ -0,0 +1,63 @@
+# Copyright 2014 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import re
+import unittest
+
+from memory_inspector.classification import results
+from memory_inspector.classification import rules
+
+
+class ResultsTest(unittest.TestCase):
+  def runTest(self):
+    rules_dict = [
+      {
+        'name': 'a*',
+        'regex': '^a.*',
+        'children': [
+          {
+            'name': 'az*',
+            'regex': '^az.*'
+          }
+        ]
+      },
+      {
+        'name': 'b*',
+        'regex': '^b.*',
+      },
+    ]
+
+    rule = rules.Load(str(rules_dict), MockRegexMatchingRule)
+    result = results.AggreatedResults(rule, keys=['X', 'Y'])
+    self.assertEqual(result.total.name, 'Total')
+    self.assertEqual(len(result.total.children), 3)
+    self.assertEqual(result.total.children[0].name, 'a*')
+    self.assertEqual(result.total.children[1].name, 'b*')
+    self.assertEqual(result.total.children[2].name, 'Total-other')
+    self.assertEqual(result.total.children[0].children[0].name, 'az*')
+    self.assertEqual(result.total.children[0].children[1].name, 'a*-other')
+
+    result.AddToMatchingNodes('aa1', [1, 2])  # -> a*
+    result.AddToMatchingNodes('aa2', [3, 4])  # -> a*
+    result.AddToMatchingNodes('az', [5, 6])  # -> a*/az*
+    result.AddToMatchingNodes('z1', [7, 8])  # -> T-other
+    result.AddToMatchingNodes('b1', [9, 10])  # -> b*
+    result.AddToMatchingNodes('b2', [11, 12])  # -> b*
+    result.AddToMatchingNodes('z2', [13, 14])  # -> T-other
+
+    self.assertEqual(result.total.values, [49, 56])
+    self.assertEqual(result.total.children[0].values, [9, 12])
+    self.assertEqual(result.total.children[1].values, [20, 22])
+    self.assertEqual(result.total.children[0].children[0].values, [5, 6])
+    self.assertEqual(result.total.children[0].children[1].values, [4, 6])
+    self.assertEqual(result.total.children[2].values, [20, 22])
+
+
+class MockRegexMatchingRule(rules.Rule):
+  def __init__(self, name, filters):
+    super(MockRegexMatchingRule, self).__init__(name)
+    self._regex = filters['regex']
+
+  def Match(self, s):
+    return bool(re.match(self._regex, s))
diff --git a/tools/memory_inspector/memory_inspector/classification/rules.py b/tools/memory_inspector/memory_inspector/classification/rules.py
@@ -0,0 +1,119 @@
+# Copyright 2014 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""This module defines the core structure of the classification rules.
+
+This module does NOT specify how the rules filter the data: this responsibility
+is of to the concrete classifiers, which have to override the Rule class herein
+defined and know how to do the math.
+
+This module, instead, defines the format of the rules and the way they are
+encoded and loaded (in a python-style dictionary file).
+Rules are organized in a tree, where the root is always represented by a 'Total'
+node, and the leaves are arbitrarily defined by the user, according to the
+following principles:
+- Order of siblings rules matter: what is caught by a rule will not be caught
+  by the next ones, but it is propagated to its children rules if any.
+- Every non-leaf node X gets an implicit extra-children named X-other. This
+  catch-all child catches everything (within the parent rule scope) that is
+  not caught by the other siblings. This is to guarantee that, when doing the
+  math (the aggregation), at any level, the sum of the values in the leaves
+  match the value of their parent.
+
+The format of a rule dictionary is the following:
+[
+{
+  'name':       'Name of the rule',
+  'filter-X':   'The embedder will know how to interpret this value and will use
+                 it to filter the data'
+  'filter-Y':   'Idem'
+  children: [
+    {
+      'name':   'Name of the sub-rule 1'
+      ... and so on recursively ,
+    },
+  ]
+},
+]
+
+And a typical resulting rule tree looks like this:
+                          +----------------------+
+                          |        Total         |
+                          |----------------------|
+       +------------------+      Match all.      +--------------------+
+       |                  +----------+-----------+                    |
+       |                             |                                |
+ +-----v-----+                 +-----v-----+                   +------v----+
+ |    Foo    |                 |    Bar    |                   |Total-other|
+ |-----------|                 |-----------|                   |-----------|
+ |File: foo* |             +---+File: bar* +-----+             | Match all |
+ +-----------+             |   +-----------+     |             +-----------+
+                           |                     |
+                    +------v------+       +------v----+
+                    | Bar::Coffee |       | Bar-other |
+                    |-------------|       |-----------|
+                    |File: bar*cof|       | Match all |
+                    +-------------+       +-----------+
+"""
+
+import ast
+
+
+def Load(content, rule_builder):
+  """Construct a rule tree from a python-style dict representation.
+
+  Args:
+    content: a string containing the dict (i.e. content of the rule file).
+    rule_builder: a method which takes two arguments (rule_name, filters_dict)
+        and returns a subclass of |Rule|. |filters_dict| is a dict of the keys
+        (filter-foo, filter-bar in the example above) for the rule node.
+  """
+  rules_dict = ast.literal_eval(content)
+  root = Rule('Total')
+  _MakeRuleNodeFromDictNode(root, rules_dict, rule_builder)
+  return root
+
+
+class Rule(object):
+  """ An abstract class representing a rule node in the rules tree.
+
+  Embedders must override the Match method when deriving this class.
+  """
+
+  def __init__(self, name):
+    self.name = name
+    self.children = []
+
+  def Match(self, _):  # pylint: disable=R0201
+    """ The rationale of this default implementation is modeling the root
+    ('Total') and the catch-all (*-other) rules that every |RuleTree| must have,
+    regardless of the embedder-specific children rules. This is to guarantee
+    that the totals match at any level of the tree.
+    """
+    return True
+
+  def AppendChild(self, child_rule):
+    assert(isinstance(child_rule, Rule))
+    duplicates = filter(lambda x: x.name == child_rule.name, self.children)
+    assert(not duplicates), 'Duplicate rule ' + child_rule.name
+    self.children.append(child_rule)
+
+
+def _MakeRuleNodeFromDictNode(rule_node, dict_nodes, rule_builder):
+  """Recursive rule tree builder for traversing the rule dict."""
+  for dict_node in dict_nodes:
+    assert('name' in dict_node)
+    # Extract the filter keys (e.g., mmap-file, mmap-prot) that will be passed
+    # to the |rule_builder|
+    filter_keys = set(dict_node.keys()) - set(('name', 'children'))
+    filters = dict((k, dict_node[k]) for k in filter_keys)
+    child_rule = rule_builder(dict_node['name'], filters)
+    rule_node.AppendChild(child_rule)
+    dict_children = dict_node.get('children', {})
+    _MakeRuleNodeFromDictNode(child_rule, dict_children, rule_builder)
+
+    # If the rule_node isn't a leaf, add the 'name-other' catch-all sibling to
+    # catch all the entries that matched this node but none of its children.
+  if len(rule_node.children):
+    rule_node.AppendChild(Rule(rule_node.name + '-other'))