Skip to content

ERR: raise on invalid coulmns using a fixed HDFStore #13492

Closed
@amanhanda

Description

@amanhanda

Code Sample

idx = pd.Index(pd.to_datetime([datetime.date(2000, 1, 1), datetime.date(2000, 1, 2)]), name='cols')
idx1 = pd.Index(pd.to_datetime([datetime.date(2010, 1, 1), datetime.date(2010, 1, 2)]), name='rows')
s = pd.DataFrame(np.arange(4).reshape(2,2), columns=idx, index=idx1)
print type(s.index.name)
# The type is str
<type 'str'>
s.reset_index()
cols       rows  2000-01-01 00:00:00  2000-01-02 00:00:00
0    2010-01-01                    0                    1
1    2010-01-02                    2                    3
with pd.HDFStore("/logs/tmp/test.h5", "w") as store:
    store.put("test", s, "fixed")
# When reading the data from HDF5, the index name comes back as a numpy.string_

with pd.HDFStore("/logs/tmp/test.h5", "r") as store:
    s1 = store["test"]
type(s1.index.name)
numpy.string_
# numpy.concatenate throws a ValueError, 
# which the code does not catch to convert the column to type object from DatetimeIndex, and fails

s1.reset_index()
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-93-f61766d7f5c1> in <module>()
----> 1 s1.reset_index()

/auto/energymdl2/anaconda/envs/commod_20160516/lib/python2.7/site-packages/pandas/core/frame.pyc in reset_index(self, level, drop, inplace, col_level, col_fill)
   2731                     name = tuple(name_lst)
   2732             values = _maybe_casted_values(self.index)
-> 2733             new_obj.insert(0, name, values)
   2734
   2735         new_obj.index = new_index

/auto/energymdl2/anaconda/envs/commod_20160516/lib/python2.7/site-packages/pandas/core/frame.pyc in insert(self, loc, column, value, allow_duplicates)
   2228         value = self._sanitize_column(column, value)
   2229         self._data.insert(
-> 2230             loc, column, value, allow_duplicates=allow_duplicates)
   2231
   2232     def assign(self, **kwargs):

/auto/energymdl2/anaconda/envs/commod_20160516/lib/python2.7/site-packages/pandas/core/internals.pyc in insert(self, loc, item, value, allow_duplicates)
   3100             self._blknos = np.insert(self._blknos, loc, len(self.blocks))
   3101
-> 3102         self.axes[0] = self.items.insert(loc, item)
   3103
   3104         self.blocks += (block,)

/auto/energymdl2/anaconda/envs/commod_20160516/lib/python2.7/site-packages/pandas/tseries/index.pyc in insert(self, loc, item)
   1505             item = _to_m8(item, tz=self.tz)
   1506         try:
-> 1507             new_dates = np.concatenate((self[:loc].asi8, [item.view(np.int64)],
   1508                                         self[loc:].asi8))
   1509             if self.tz is not None:

ValueError: new type not compatible with array.

# The exception caluse does not catch ValueError

.../pandas/tseries/index.py
   1720         freq = None
   1721
   1722         if isinstance(item, (datetime, np.datetime64)):
   1723             self._assert_can_do_op(item)
   1724             if not self._has_same_tz(item):
   1725                 raise ValueError(
   1726                     'Passed item and index have different timezone')
   1727             # check freq can be preserved on edge cases
   1728             if self.size and self.freq is not None:
   1729                 if ((loc == 0 or loc == -len(self)) and
   1730                         item + self.freq == self[0]):
   1731                     freq = self.freq
   1732                 elif (loc == len(self)) and item - self.freq == self[-1]:
   1733                     freq = self.freq
   1734             item = _to_m8(item, tz=self.tz)
   1735         try:
1> 1736             new_dates = np.concatenate((self[:loc].asi8, [item.view(np.int64)],
   1737                                         self[loc:].asi8))
   1738             if self.tz is not None:
   1739                 new_dates = tslib.tz_convert(new_dates, 'UTC', self.tz)
   1740             return DatetimeIndex(new_dates, name=self.name, freq=freq,
   1741                                  tz=self.tz)
   1742
   1743         except (AttributeError, TypeError):
   1744
   1745             # fall back to object index
   1746             if isinstance(item, compat.string_types):
   1747                 return self.asobject.insert(loc, item)
   1748             raise TypeError(
   1749                 "cannot insert DatetimeIndex with incompatible label")

Expected Output

cols       rows  2000-01-01 00:00:00  2000-01-02 00:00:00
0    2010-01-01                    0                    1
1    2010-01-02                    2                    3

output of pd.show_versions()

# Problem occurs in 0.16.2 and 0.18.1

INSTALLED VERSIONS
------------------
commit: None
python: 2.7.11.final.0
python-bits: 64
OS: Linux
OS-release: 2.6.32-573.7.1.el6.x86_64
machine: x86_64
processor: x86_64
byteorder: little
LC_ALL: C
LANG: en_US.UTF-8

pandas: 0.18.1
nose: 1.3.7
pip: 8.1.1
setuptools: 20.7.0
Cython: 0.24
numpy: 1.10.4
scipy: 0.17.0
statsmodels: 0.6.1
xarray: 0.7.2
IPython: 4.1.2
sphinx: 1.3.5
patsy: 0.4.1
dateutil: 2.5.2
pytz: 2016.4
blosc: None
bottleneck: 1.0.0
tables: 3.2.2
numexpr: 2.5.2
matplotlib: 1.4.3
openpyxl: 2.3.2
xlrd: 0.9.4
xlwt: 1.0.0
xlsxwriter: 0.8.4
lxml: 3.6.0
bs4: 4.3.2
html5lib: 0.999
httplib2: 0.9.2
apiclient: 1.5.0
sqlalchemy: 1.0.12
pymysql: None
psycopg2: None
jinja2: 2.8
boto: 2.39.0
pandas_datareader: None


Metadata

Metadata

Assignees

No one assigned

    Labels

    Error ReportingIncorrect or improved errors from pandasIO HDF5read_hdf, HDFStore

    Type

    No type

    Projects

    No projects

    Milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions