Skip to content

Commit

Permalink
Add the core classification infrastructure to memory_inspector.
Browse files Browse the repository at this point in the history
This CL introduce the core classes for handling classification of
data in a hierarchical fashion. It introduces two concepts:
- A rule tree: hierarchy of rules defined by the end-user.
- A result tree: the corresponding tree which aggregates the
memory counters.

BUG=340294
NOTRY=true

Review URL: https://codereview.chromium.org/183173003

git-svn-id: svn://svn.chromium.org/chrome/trunk/src@255352 0039d316-1c4b-4281-b951-d872f2087c98
  • Loading branch information
primiano@chromium.org committed Mar 6, 2014
1 parent b99c133 commit 7a9570e
Show file tree
Hide file tree
Showing 5 changed files with 394 additions and 0 deletions.
Empty file.
115 changes: 115 additions & 0 deletions tools/memory_inspector/memory_inspector/classification/results.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
# Copyright 2014 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

"""This module owns the logic for classifying and aggregating data in buckets.
This complements the structure defined in the rules module. Symmetrically, the
aggregated results are organized in a bucket tree, which structure is identical
to the one of the corresponding rule tree.
The logic for aggregation is the following:
- The client loads a "rule tree" defined by the end-user (e.g., in a file) which
defines the final "shape" of the results.
- The rules define how to "match" a trace_record (e.g., a mmap line or a native
allocation) given some of its properties (e.g. the mapped file or the prot.
flags).
- The concrete classifier (which will use this module) knows how to count the
values for each trace_record (e.g. [Dirty KB, Clean KB, RSS KB] for mmaps).
Hence it decides the cardinality of the result nodes.
- The responsibility of this module is simply doing the math.
In the very essence this module adds up the counters of each node whereas the
trace_record being pushed in the tree (through the AddToMatchingNodes method)
matches a rule.
It just does this math in a hierarchical fashion following the shape the tree.
A typical result tree looks like this (each node has two values in the example):
+----------------------+
| Total |
|----------------------|
+------------------+ (100, 1000) +--------------------+
| +----------+-----------+ |
| | |
+-----v-----+ +-----v-----+ +------v----+
| Foo | | Bar | |Total-other|
|-----------| |-----------| |-----------|
| (15, 100) | +---+ (80, 200) +-----+ | (5, 700) |
+-----------+ | +-----------+ | +-----------+
| |
+------v------+ +------v-----+
| Bar::Coffee | | Bar-other |
|-------------| |------------|
| (30, 120) | | (50, 80) |
+-------------+ +------------+
"""

from memory_inspector.classification import rules


class AggreatedResults(object):
"""A tree of results, where each node is a bucket (root: 'Total' bucket)."""

def __init__(self, rule_tree, keys):
"""Initializes the bucket tree using the structure of the rules tree.
Each node of the bucket tree is initialized with a list of len(keys) zeros.
"""
assert(isinstance(rule_tree, rules.Rule))
assert(isinstance(keys, list))
self.keys = keys
self.total = AggreatedResults._MakeBucketNodeFromRule(rule_tree, len(keys))

def AddToMatchingNodes(self, trace_record, values):
"""Adds the provided |values| to the nodes that match the |trace_record|.
Tree traversal logic: at any level, one and only one node will match the
|trace_record| (in the worst case it will be the catchall *-other rule).
When a node is matched, the traversal continues in its children and no
further siblings in the upper levels are visited anymore.
This is to guarantee that at any level the values of one node are equal to
the sum of the values of all its children.
Args:
trace_record: any kind of object which can be matched by the Match method
of the Rule object.
values: a list of int(s) which represent the value associated to the
matched trace_record. The cardinality of the list must be equal to the
cardinality of the initial keys.
"""
assert(len(values) == len(self.keys))
AggreatedResults._AddToMatchingNodes(
trace_record, values, self.total, len(self.keys))

@staticmethod
def _AddToMatchingNodes(trace_record, values, bucket, num_keys):
if not bucket.rule.Match(trace_record):
return False
for i in xrange(num_keys):
bucket.values[i] += values[i]
for child_bucket in bucket.children:
if AggreatedResults._AddToMatchingNodes(
trace_record, values, child_bucket, num_keys):
break
return True

@staticmethod
def _MakeBucketNodeFromRule(rule, num_keys):
assert(isinstance(rule, rules.Rule))
bucket = Bucket(rule, num_keys)
for child_rule in rule.children:
bucket.children.append(
AggreatedResults._MakeBucketNodeFromRule(child_rule, num_keys))
return bucket


class Bucket(object):
"""A bucket is a node in the results tree. """
def __init__(self, rule, num_keys):
self.rule = rule
self.values = [0] * num_keys
self.children = []


@property
def name(self):
return self.rule.name
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# Copyright 2014 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

import re
import unittest

from memory_inspector.classification import results
from memory_inspector.classification import rules


class ResultsTest(unittest.TestCase):
def runTest(self):
rules_dict = [
{
'name': 'a*',
'regex': '^a.*',
'children': [
{
'name': 'az*',
'regex': '^az.*'
}
]
},
{
'name': 'b*',
'regex': '^b.*',
},
]

rule = rules.Load(str(rules_dict), MockRegexMatchingRule)
result = results.AggreatedResults(rule, keys=['X', 'Y'])
self.assertEqual(result.total.name, 'Total')
self.assertEqual(len(result.total.children), 3)
self.assertEqual(result.total.children[0].name, 'a*')
self.assertEqual(result.total.children[1].name, 'b*')
self.assertEqual(result.total.children[2].name, 'Total-other')
self.assertEqual(result.total.children[0].children[0].name, 'az*')
self.assertEqual(result.total.children[0].children[1].name, 'a*-other')

result.AddToMatchingNodes('aa1', [1, 2]) # -> a*
result.AddToMatchingNodes('aa2', [3, 4]) # -> a*
result.AddToMatchingNodes('az', [5, 6]) # -> a*/az*
result.AddToMatchingNodes('z1', [7, 8]) # -> T-other
result.AddToMatchingNodes('b1', [9, 10]) # -> b*
result.AddToMatchingNodes('b2', [11, 12]) # -> b*
result.AddToMatchingNodes('z2', [13, 14]) # -> T-other

self.assertEqual(result.total.values, [49, 56])
self.assertEqual(result.total.children[0].values, [9, 12])
self.assertEqual(result.total.children[1].values, [20, 22])
self.assertEqual(result.total.children[0].children[0].values, [5, 6])
self.assertEqual(result.total.children[0].children[1].values, [4, 6])
self.assertEqual(result.total.children[2].values, [20, 22])


class MockRegexMatchingRule(rules.Rule):
def __init__(self, name, filters):
super(MockRegexMatchingRule, self).__init__(name)
self._regex = filters['regex']

def Match(self, s):
return bool(re.match(self._regex, s))
119 changes: 119 additions & 0 deletions tools/memory_inspector/memory_inspector/classification/rules.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
# Copyright 2014 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

"""This module defines the core structure of the classification rules.
This module does NOT specify how the rules filter the data: this responsibility
is of to the concrete classifiers, which have to override the Rule class herein
defined and know how to do the math.
This module, instead, defines the format of the rules and the way they are
encoded and loaded (in a python-style dictionary file).
Rules are organized in a tree, where the root is always represented by a 'Total'
node, and the leaves are arbitrarily defined by the user, according to the
following principles:
- Order of siblings rules matter: what is caught by a rule will not be caught
by the next ones, but it is propagated to its children rules if any.
- Every non-leaf node X gets an implicit extra-children named X-other. This
catch-all child catches everything (within the parent rule scope) that is
not caught by the other siblings. This is to guarantee that, when doing the
math (the aggregation), at any level, the sum of the values in the leaves
match the value of their parent.
The format of a rule dictionary is the following:
[
{
'name': 'Name of the rule',
'filter-X': 'The embedder will know how to interpret this value and will use
it to filter the data'
'filter-Y': 'Idem'
children: [
{
'name': 'Name of the sub-rule 1'
... and so on recursively ,
},
]
},
]
And a typical resulting rule tree looks like this:
+----------------------+
| Total |
|----------------------|
+------------------+ Match all. +--------------------+
| +----------+-----------+ |
| | |
+-----v-----+ +-----v-----+ +------v----+
| Foo | | Bar | |Total-other|
|-----------| |-----------| |-----------|
|File: foo* | +---+File: bar* +-----+ | Match all |
+-----------+ | +-----------+ | +-----------+
| |
+------v------+ +------v----+
| Bar::Coffee | | Bar-other |
|-------------| |-----------|
|File: bar*cof| | Match all |
+-------------+ +-----------+
"""

import ast


def Load(content, rule_builder):
"""Construct a rule tree from a python-style dict representation.
Args:
content: a string containing the dict (i.e. content of the rule file).
rule_builder: a method which takes two arguments (rule_name, filters_dict)
and returns a subclass of |Rule|. |filters_dict| is a dict of the keys
(filter-foo, filter-bar in the example above) for the rule node.
"""
rules_dict = ast.literal_eval(content)
root = Rule('Total')
_MakeRuleNodeFromDictNode(root, rules_dict, rule_builder)
return root


class Rule(object):
""" An abstract class representing a rule node in the rules tree.
Embedders must override the Match method when deriving this class.
"""

def __init__(self, name):
self.name = name
self.children = []

def Match(self, _): # pylint: disable=R0201
""" The rationale of this default implementation is modeling the root
('Total') and the catch-all (*-other) rules that every |RuleTree| must have,
regardless of the embedder-specific children rules. This is to guarantee
that the totals match at any level of the tree.
"""
return True

def AppendChild(self, child_rule):
assert(isinstance(child_rule, Rule))
duplicates = filter(lambda x: x.name == child_rule.name, self.children)
assert(not duplicates), 'Duplicate rule ' + child_rule.name
self.children.append(child_rule)


def _MakeRuleNodeFromDictNode(rule_node, dict_nodes, rule_builder):
"""Recursive rule tree builder for traversing the rule dict."""
for dict_node in dict_nodes:
assert('name' in dict_node)
# Extract the filter keys (e.g., mmap-file, mmap-prot) that will be passed
# to the |rule_builder|
filter_keys = set(dict_node.keys()) - set(('name', 'children'))
filters = dict((k, dict_node[k]) for k in filter_keys)
child_rule = rule_builder(dict_node['name'], filters)
rule_node.AppendChild(child_rule)
dict_children = dict_node.get('children', {})
_MakeRuleNodeFromDictNode(child_rule, dict_children, rule_builder)

# If the rule_node isn't a leaf, add the 'name-other' catch-all sibling to
# catch all the entries that matched this node but none of its children.
if len(rule_node.children):
rule_node.AppendChild(Rule(rule_node.name + '-other'))
Loading

0 comments on commit 7a9570e

Please sign in to comment.