sripathikrishnan · rvoicilas · Nov 27, 2013 · Nov 27, 2013 · Nov 27, 2013 · Nov 27, 2013
diff --git a/rdbtools/__init__.py b/rdbtools/__init__.py
@@ -1,6 +1,7 @@
 from rdbtools.parser import RdbCallback, RdbParser, DebugCallback
 from rdbtools.callbacks import JSONCallback, DiffCallback, ProtocolCallback
-from rdbtools.memprofiler import MemoryCallback, PrintAllKeys, StatsAggregator
+from rdbtools.memprofiler import MemoryCallback, PrintAllKeys
+from rdbtools.stats_aggregator import StatsAggregator
 
 __version__ = '0.1.6'
 VERSION = tuple(map(int, __version__.split('.')))

diff --git a/rdbtools/memprofiler.py b/rdbtools/memprofiler.py
@@ -1,6 +1,5 @@
 from collections import namedtuple
 import random
-import json
 
 from rdbtools.parser import RdbCallback
 from rdbtools.callbacks import encode_key
@@ -11,62 +10,7 @@
 
 MemoryRecord = namedtuple('MemoryRecord', ['database', 'type', 'key', 'bytes', 'encoding','size', 'len_largest_element'])
 
-class StatsAggregator():
-    def __init__(self, key_groupings = None):
-        self.aggregates = {}
-        self.scatters = {}
-        self.histograms = {}
 
-    def next_record(self, record):
-        self.add_aggregate('database_memory', record.database, record.bytes)
-        self.add_aggregate('type_memory', record.type, record.bytes)
-        self.add_aggregate('encoding_memory', record.encoding, record.bytes)
-
-        self.add_aggregate('type_count', record.type, 1)
-        self.add_aggregate('encoding_count', record.encoding, 1)
-
-        self.add_histogram(record.type + "_length", record.size)
-        self.add_histogram(record.type + "_memory", (record.bytes/10) * 10)
-
-        if record.type == 'list':
-            self.add_scatter('list_memory_by_length', record.bytes, record.size)
-        elif record.type == 'hash':
-            self.add_scatter('hash_memory_by_length', record.bytes, record.size)
-        elif record.type == 'set':
-            self.add_scatter('set_memory_by_length', record.bytes, record.size)
-        elif record.type == 'sortedset':
-            self.add_scatter('sortedset_memory_by_length', record.bytes, record.size)
-        elif record.type == 'string':
-            self.add_scatter('string_memory_by_length', record.bytes, record.size)
-        else:
-            raise Exception('Invalid data type %s' % record.type)
-
-    def add_aggregate(self, heading, subheading, metric):
-        if not heading in self.aggregates :
-            self.aggregates[heading] = {}
-
-        if not subheading in self.aggregates[heading]:
-            self.aggregates[heading][subheading] = 0
-
-        self.aggregates[heading][subheading] += metric
-
-    def add_histogram(self, heading, metric):
-        if not heading in self.histograms:
-            self.histograms[heading] = {}
-
-        if not metric in self.histograms[heading]:
-            self.histograms[heading][metric] = 1
-        else :
-            self.histograms[heading][metric] += 1
-
-    def add_scatter(self, heading, x, y):
-        if not heading in self.scatters:
-            self.scatters[heading] = []
-        self.scatters[heading].append([x, y])
-
-    def get_json(self):
-        return json.dumps({"aggregates":self.aggregates, "scatters":self.scatters, "histograms":self.histograms})
-
 class PrintAllKeys():
     def __init__(self, out):
         self._out = out

diff --git a/rdbtools/stats_aggregator.py b/rdbtools/stats_aggregator.py
@@ -0,0 +1,50 @@
+import json
+
+from collections import defaultdict
+
+
+class StatsAggregator(object):
+
+    _RECORD_TYPE_TO_HEADING = {
+        'list': 'list_memory_by_length',
+        'hash': 'hash_memory_by_length',
+        'set': 'set_memory_by_length',
+        'sortedset': 'sortedset_memory_by_length',
+        'string': 'string_memory_by_length'
+    }
+
+    def __init__(self):
+        self.aggregates = defaultdict(lambda: defaultdict(int))
+        self.histograms = defaultdict(lambda: defaultdict(int))
+        self.scatters = defaultdict(list)
+
+    def next_record(self, record):
+        self.add_aggregate('database_memory', record.database, record.bytes)
+        self.add_aggregate('type_memory', record.type, record.bytes)
+        self.add_aggregate('encoding_memory', record.encoding, record.bytes)
+
+        self.add_aggregate('type_count', record.type, 1)
+        self.add_aggregate('encoding_count', record.encoding, 1)
+
+        self.add_histogram(record.type + '_length', record.size)
+        self.add_histogram(record.type + '_memory', (record.bytes / 10) * 10)
+
+        scatter_heading = self._RECORD_TYPE_TO_HEADING.get(record.type)
+        if scatter_heading:
+            self.add_scatter(scatter_heading, record.bytes, record.size)
+        else:
+            raise Exception('Invalid data type %s' % record.type)
+
+    def add_aggregate(self, heading, subheading, metric):
+        self.aggregates[heading][subheading] += metric
+
+    def add_histogram(self, heading, metric):
+        self.histograms[heading][metric] += 1
+
+    def add_scatter(self, heading, x, y):
+        self.scatters[heading].append([x, y])
+
+    def get_json(self):
+        return json.dumps({'aggregates': self.aggregates,
+                           'scatters': self.scatters,
+                           'histograms': self.histograms})
diff --git a/tests/__init__.py b/tests/__init__.py
@@ -1,9 +1,11 @@
 import unittest
 from tests.parser_tests import RedisParserTestCase
 from tests.memprofiler_tests import MemoryCallbackTestCase
+from tests.stats_aggregator_tests import TestStatsAggregator
 
 def all_tests():
     suite = unittest.TestSuite()
     suite.addTest(unittest.makeSuite(RedisParserTestCase))
     suite.addTest(unittest.makeSuite(MemoryCallbackTestCase))
+    suite.addTest(unittest.makeSuite(TestStatsAggregator))
     return suite
diff --git a/tests/stats_aggregator_tests.py b/tests/stats_aggregator_tests.py
@@ -0,0 +1,27 @@
+from unittest import TestCase
+
+from rdbtools import StatsAggregator
+
+
+class TestStatsAggregator(TestCase):
+
+    def setUp(self):
+        self._stats = StatsAggregator()
+
+    def test_add_aggregate(self):
+        self._stats.add_aggregate('type_count', 'hash', 1)
+        self._stats.add_aggregate('type_count', 'hash', 2)
+
+        self.assertEqual(3, self._stats.aggregates['type_count']['hash'])
+
+    def test_add_histogram(self):
+        self._stats.add_histogram('hash_length', 12)
+        self.assertEqual(1, self._stats.histograms['hash_length'][12])
+
+        self._stats.add_histogram('hash_length', 12)
+        self.assertEqual(2, self._stats.histograms['hash_length'][12])
+
+    def test_add_scatter(self):
+        self._stats.add_scatter('set_memory_by_length', 8, 32)
+        self.assertEqual([[8, 32]],
+                         self._stats.scatters['set_memory_by_length'])