Skip to content

Commit 2d5c299

Browse files
committed
wip
1 parent 1f885f2 commit 2d5c299

File tree

2 files changed

+57
-5
lines changed

2 files changed

+57
-5
lines changed

pandas/io/pytables.py

Lines changed: 34 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ class DuplicateWarning(Warning):
183183
"series": "SeriesFixed",
184184
"sparse_series": "SeriesFixed",
185185
"frame": "FrameFixed",
186-
"sparse_frame": "FrameFixed",
186+
"sparse_frame": "SparseFrameFixed",
187187
}
188188

189189
# table class map
@@ -2722,6 +2722,18 @@ def write(self, obj, **kwargs):
27222722

27232723
def read_array(self, key, start=None, stop=None):
27242724
""" read an array for the specified node (off of group """
2725+
if (
2726+
self.pandas_type == "sparse_series" or "sp_index_length" in self.attrs
2727+
) and key not in self.group:
2728+
# Compatibility for files written with pandas 0.25.1 and earlier.
2729+
if "sp_values" in self.group:
2730+
key = "sp_values"
2731+
dtype = "Sparse"
2732+
sp_index = self.read_index("sp_index".format(key))
2733+
else:
2734+
dtype = None
2735+
sp_index = None
2736+
27252737
import tables
27262738

27272739
node = getattr(self.group, key)
@@ -2732,7 +2744,7 @@ def read_array(self, key, start=None, stop=None):
27322744
if isinstance(node, tables.VLArray):
27332745
ret = node[0][start:stop]
27342746
else:
2735-
dtype = getattr(attrs, "value_type", None)
2747+
dtype = getattr(attrs, "value_type", dtype)
27362748
shape = getattr(attrs, "shape", None)
27372749

27382750
if shape is not None:
@@ -2754,7 +2766,8 @@ def read_array(self, key, start=None, stop=None):
27542766
raise NotImplementedError(
27552767
"start and/or stop are not supported in fixed Sparse reading"
27562768
)
2757-
sp_index = self.read_index("{}_sp_index".format(key))
2769+
if sp_index is None:
2770+
sp_index = self.read_index("{}_sp_index".format(key))
27582771
ret = SparseArray(
27592772
ret, sparse_index=sp_index, fill_value=self.attrs.fill_value
27602773
)
@@ -3079,10 +3092,10 @@ def shape(self):
30793092
except (TypeError, AttributeError):
30803093
return None
30813094

3082-
def read(self, **kwargs):
3095+
def read(self, key="values", **kwargs):
30833096
kwargs = self.validate_read(kwargs)
30843097
index = self.read_index("index", **kwargs)
3085-
values = self.read_array("values", **kwargs)
3098+
values = self.read_array(key, **kwargs)
30863099
return Series(values, index=index, name=self.name)
30873100

30883101
def write(self, obj, **kwargs):
@@ -3184,6 +3197,22 @@ class FrameFixed(BlockManagerFixed):
31843197
obj_type = DataFrame
31853198

31863199

3200+
class SparseFrameFixed(GenericFixed):
3201+
pandas_kind = "sparse_frame"
3202+
attributes = ["default_kind", "default_fill_value"]
3203+
3204+
def read(self, **kwargs):
3205+
kwargs = self.validate_read(kwargs)
3206+
columns = self.read_index("columns")
3207+
sdict = {}
3208+
for c in columns:
3209+
key = "sparse_series_{columns}".format(columns=c)
3210+
s = SeriesFixed(self.parent, getattr(self.group, key))
3211+
s.infer_axes()
3212+
sdict[c] = s.read(key=key)
3213+
return DataFrame(sdict)
3214+
3215+
31873216
class Table(Fixed):
31883217
""" represent a table:
31893218
facilitate read/write of various types of tables

pandas/tests/io/pytables/test_pytables.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4883,6 +4883,29 @@ def test_read_py2_hdf_file_in_py3(self, datapath):
48834883
result = store["p"]
48844884
assert_frame_equal(result, expected)
48854885

4886+
def test_read_legacy_sparse(self, datapath):
4887+
"""
4888+
Generated with pandas 0.25.1 and
4889+
4890+
>>> s = pd.Series([1, None, 2, 3]).to_sparse()
4891+
>>> df = pd.DataFrame({"A": [1, None, 2, 3], "B": [1, 0, 0, 0]}).to_sparse()
4892+
>>> s.to_hdf("pandas/tests/io/data/legacy_hdf/legacy_sparse.h5", "series")
4893+
>>> df.to_hdf("pandas/tests/io/data/legacy_hdf/legacy_sparse.h5", "frame")
4894+
"""
4895+
result = pd.read_hdf(
4896+
datapath("io", "data", "legacy_hdf", "legacy_sparse.h5"), "series"
4897+
)
4898+
expected = pd.Series(pd.SparseArray([1, None, 2, 3]))
4899+
tm.assert_series_equal(result, expected)
4900+
4901+
result = pd.read_hdf(
4902+
datapath("io", "data", "legacy_hdf", "legacy_sparse.h5"), "frame"
4903+
)
4904+
expected = pd.DataFrame(
4905+
{"A": pd.SparseArray([1, None, 2, 3]), "B": pd.SparseArray([1, 0, 0, 0])}
4906+
)
4907+
tm.assert_frame_equal(result, expected)
4908+
48864909
@pytest.mark.parametrize("where", ["", (), (None,), [], [None]])
48874910
def test_select_empty_where(self, where):
48884911
# GH26610

0 commit comments

Comments
 (0)