man-group · TomTaylorLondon · Apr 19, 2019 · bmoscon · Apr 23, 2019 · bmoscon
diff --git a/CHANGES.md b/CHANGES.md
@@ -3,6 +3,8 @@
 ### 1.75
   * Bugfix: pypandoc not rendering README correctly for PYPI
   * Bugfix: #744 get_info on an empty dataframe raises an exception
+  * Feature: Chunkstore: Removed duplication error when filtering by columns
+  * Feature: Chunkstore: Reduced memory footprint when reading data
 
 ### 1.74 (2019-02-28)
   * Bugfix: #712 Pandas deprecation warning in chunkstore serializer

diff --git a/arctic/chunkstore/chunkstore.py b/arctic/chunkstore/chunkstore.py
@@ -280,6 +280,7 @@ def read(self, symbol, chunk_range=None, filter_data=True, **kwargs):
             chunks[segments[0][SYMBOL]].append({DATA: chunk_data, METADATA: mdata})
 
         skip_filter = not filter_data or chunk_range is None
+        kwargs['inplace'] = kwargs.get('inplace', True)
 
         if len(symbol) > 1:
             return {sym: deser(chunks[sym], **kwargs) if skip_filter else chunker.filter(deser(chunks[sym], **kwargs), chunk_range) for sym in symbol}

diff --git a/arctic/serialization/numpy_arrays.py b/arctic/serialization/numpy_arrays.py
@@ -193,7 +193,7 @@ def serialize(self, df):
         ret[METADATA][TYPE] = dtype
         return ret
 
-    def deserialize(self, data, columns=None):
+    def deserialize(self, data, columns=None, inplace=False):
         """
         Deserializes SON to a DataFrame
 
@@ -203,13 +203,17 @@ def deserialize(self, data, columns=None):
         columns: None, or list of strings
             optionally you can deserialize a subset of the data in the SON. Index
             columns are ALWAYS deserialized, and should not be specified
+        inplace: Convert and remove items from data in-place
+            this will modify data
 
         Returns
         -------
         pandas dataframe or series
         """
         if not data:
             return pd.DataFrame()
+        if not inplace:
+            data = data[:]
 
         meta = data[0][METADATA] if isinstance(data, list) else data[METADATA]
         index = INDEX in meta
@@ -218,16 +222,19 @@ def deserialize(self, data, columns=None):
             if index:
                 columns = columns[:]
                 columns.extend(meta[INDEX])
-            if len(columns) > len(set(columns)):
-                raise Exception("Duplicate columns specified, cannot de-serialize")
+                columns = list(set(columns))
 
         if not isinstance(data, list):
             df = self.converter.objify(data, columns)
         else:
-            df = pd.concat([self.converter.objify(d, columns) for d in data], ignore_index=not index)
+            dfs = []
+            while len(data):
+                dfs.append(self.converter.objify(data.pop(0), columns))
+            df = pd.concat(dfs, ignore_index=not index)
+            del dfs
 
         if index:
-            df = df.set_index(meta[INDEX])
+            df.set_index(meta[INDEX], inplace=True)
         if meta[TYPE] == 'series':
             return df[df.columns[0]]
         return df