Skip to content

ENH: allow saving of meta-data via CArrays to support wide tables #11788

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 5 commits into from
Closed
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Merge jreback's suggested changes. Also handle rewrite and MultiIndex.
And, make sure that non_index_axes is preserved if it was written by
legacy code.
  • Loading branch information
MJuddBooth committed Dec 7, 2015
commit d548e0f3f2de2244d6196bde05f05b9912101830
67 changes: 52 additions & 15 deletions pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,7 @@
import itertools
import warnings
import os
from six import string_types
from tables.exceptions import NoSuchNodeError

import numpy as np
import pandas as pd
from pandas import (Series, DataFrame, Panel, Panel4D, Index,
Expand Down Expand Up @@ -42,6 +41,8 @@
import pandas.algos as algos
import pandas.tslib as tslib

from tables.exceptions import NoSuchNodeError, NodeError

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

update the version info

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

0.18?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yep

from contextlib import contextmanager
from distutils.version import LooseVersion

Expand Down Expand Up @@ -1513,6 +1514,10 @@ def cvalues(self):
""" return my cython values """
return self.values

@property
def handle(self):
return self._handle

def __iter__(self):
return iter(self.values)

Expand Down Expand Up @@ -2048,8 +2053,12 @@ def get_attr(self):
self.values = getattr(self.attrs, self.kind_attr, None)
if self.values is None:
try:
self.values = self._handle.get_node(self.attrs._v_node._v_parent,
self.kind_attr)[:].tolist()
data = self.handle.get_node(self.attrs._v_node._v_parent, self.kind_attr)[:]
if len(data.shape) > 1 and data.shape[1] > 1: # multiIndex
self.values = map(tuple, data.tolist())
else:
self.values = data.tolist()

except NoSuchNodeError:
pass
self.dtype = getattr(self.attrs, self.dtype_attr, None)
Expand All @@ -2059,9 +2068,18 @@ def get_attr(self):
def set_attr(self):
""" set the data for this colummn """
#setattr(self.attrs, self.kind_attr, self.values)
self._handle.create_carray(self.attrs._v_node._v_parent,
try:
self.handle.create_carray(self.attrs._v_node._v_parent,
self.kind_attr,
obj=np.array(self.values))
except NodeError as e:
self.handle.remove_node(self.attrs._v_node._v_parent,
self.kind_attr)
self.handle.create_carray(self.attrs._v_node._v_parent,
self.kind_attr,
obj=np.array(self.values))
except Exception as e: # for debugging
raise
setattr(self.attrs, self.meta_attr, self.meta)
if self.dtype is not None:
setattr(self.attrs, self.dtype_attr, self.dtype)
Expand Down Expand Up @@ -3033,20 +3051,39 @@ def set_info(self):
self.attrs.info = self.info

def set_non_index_axes(self):
replacement = []
for dim, flds in self.non_index_axes:
name = "non_index_axes_%d" % dim
self._handle.create_carray(self.attrs._v_node, name, obj=np.array(flds))
replacement.append((dim, name))
""" Write the axes to carrays """
def f(dim, flds):
name = "non_index_axes_%d" % dim
try:
self._handle.create_carray(self.attrs._v_node, name, obj=np.array(flds))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

use _convert_index on the data (and _unconvert_index on deserialize); this will do all the proper type conversions

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wasn't aware of that code. My simple implementation of your suggestion sort of works, but actually raises different errors. The root of the problem really seems to be non_index_axes, which converts proper index objects to arrays. And at the moment lib.infer_dtype does the wrong thing for the test case - it says "datetime" for an array of Timestamps rather than datetime64 so the roundtrip of _unconvert_index(_convert_index()) is incorrect. I'll have to investigate a bit more.

except ValueError as e:
# Should probably make this check:
#if e.message == "unknown type: 'object'":
# raise ValueError("axis {} has dtype 'object' which cannot be saved to carray".format(dim))
raise
except NodeError as e:
self._handle.remove_node(self.attrs._v_node, name)
self._handle.create_carray(self.attrs._v_node, name, obj=np.array(flds))
return dim, flds

replacement = [f(dim, flds) for dim, flds in self.non_index_axes]
self.attrs.non_index_axes = replacement

def get_non_index_axes(self):
non_index_axes = getattr(self.attrs, 'non_index_axes', [])
new = []
for dim, flds in non_index_axes:
"""Load the non-index axes from their carrays. This is a pass-through
for tables stored prior to v0.17"""
def f(dim, flds):
if isinstance(flds, string_types):
flds = self._handle.get_node(self.attrs._v_node, flds)[:].tolist()
new.append((dim, flds))
flds = self._handle.get_node(self.attrs._v_node, flds)[:]
if len(flds.shape) > 1 and flds.shape[1] > 1:
flds = map(tuple, flds.tolist())
else:
flds = flds.tolist()
return dim, flds
else:
return dim, flds #if not a string presumably pre v17 list
non_index_axes = getattr(self.attrs, 'non_index_axes', [])
new = [f(dim, flds) for dim, flds in non_index_axes]
return new

def set_attrs(self):
Expand Down