Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
927 changes: 927 additions & 0 deletions pandas/tests/io/pytables/test_append.py

Large diffs are not rendered by default.

186 changes: 186 additions & 0 deletions pandas/tests/io/pytables/test_categorical.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
import numpy as np
import pytest

from pandas import Categorical, DataFrame, Series, _testing as tm, concat, read_hdf
from pandas.tests.io.pytables.common import (
_maybe_remove,
ensure_clean_path,
ensure_clean_store,
)

pytestmark = pytest.mark.single


def test_categorical(setup_path):

with ensure_clean_store(setup_path) as store:

# Basic
_maybe_remove(store, "s")
s = Series(
Categorical(
["a", "b", "b", "a", "a", "c"],
categories=["a", "b", "c", "d"],
ordered=False,
)
)
store.append("s", s, format="table")
result = store.select("s")
tm.assert_series_equal(s, result)

_maybe_remove(store, "s_ordered")
s = Series(
Categorical(
["a", "b", "b", "a", "a", "c"],
categories=["a", "b", "c", "d"],
ordered=True,
)
)
store.append("s_ordered", s, format="table")
result = store.select("s_ordered")
tm.assert_series_equal(s, result)

_maybe_remove(store, "df")
df = DataFrame({"s": s, "vals": [1, 2, 3, 4, 5, 6]})
store.append("df", df, format="table")
result = store.select("df")
tm.assert_frame_equal(result, df)

# Dtypes
_maybe_remove(store, "si")
s = Series([1, 1, 2, 2, 3, 4, 5]).astype("category")
store.append("si", s)
result = store.select("si")
tm.assert_series_equal(result, s)

_maybe_remove(store, "si2")
s = Series([1, 1, np.nan, 2, 3, 4, 5]).astype("category")
store.append("si2", s)
result = store.select("si2")
tm.assert_series_equal(result, s)

# Multiple
_maybe_remove(store, "df2")
df2 = df.copy()
df2["s2"] = Series(list("abcdefg")).astype("category")
store.append("df2", df2)
result = store.select("df2")
tm.assert_frame_equal(result, df2)

# Make sure the metadata is OK
info = store.info()
assert "/df2 " in info
# assert '/df2/meta/values_block_0/meta' in info
assert "/df2/meta/values_block_1/meta" in info

# unordered
_maybe_remove(store, "s2")
s = Series(
Categorical(
["a", "b", "b", "a", "a", "c"],
categories=["a", "b", "c", "d"],
ordered=False,
)
)
store.append("s2", s, format="table")
result = store.select("s2")
tm.assert_series_equal(result, s)

# Query
_maybe_remove(store, "df3")
store.append("df3", df, data_columns=["s"])
expected = df[df.s.isin(["b", "c"])]
result = store.select("df3", where=['s in ["b","c"]'])
tm.assert_frame_equal(result, expected)

expected = df[df.s.isin(["b", "c"])]
result = store.select("df3", where=['s = ["b","c"]'])
tm.assert_frame_equal(result, expected)

expected = df[df.s.isin(["d"])]
result = store.select("df3", where=['s in ["d"]'])
tm.assert_frame_equal(result, expected)

expected = df[df.s.isin(["f"])]
result = store.select("df3", where=['s in ["f"]'])
tm.assert_frame_equal(result, expected)

# Appending with same categories is ok
store.append("df3", df)

df = concat([df, df])
expected = df[df.s.isin(["b", "c"])]
result = store.select("df3", where=['s in ["b","c"]'])
tm.assert_frame_equal(result, expected)

# Appending must have the same categories
df3 = df.copy()
df3["s"] = df3["s"].cat.remove_unused_categories()

msg = "cannot append a categorical with different categories to the existing"
with pytest.raises(ValueError, match=msg):
store.append("df3", df3)

# Remove, and make sure meta data is removed (its a recursive
# removal so should be).
result = store.select("df3/meta/s/meta")
assert result is not None
store.remove("df3")

with pytest.raises(
KeyError, match="'No object named df3/meta/s/meta in the file'"
):
store.select("df3/meta/s/meta")


def test_categorical_conversion(setup_path):

# GH13322
# Check that read_hdf with categorical columns doesn't return rows if
# where criteria isn't met.
obsids = ["ESP_012345_6789", "ESP_987654_3210"]
imgids = ["APF00006np", "APF0001imm"]
data = [4.3, 9.8]

# Test without categories
df = DataFrame({"obsids": obsids, "imgids": imgids, "data": data})

# We are expecting an empty DataFrame matching types of df
expected = df.iloc[[], :]
with ensure_clean_path(setup_path) as path:
df.to_hdf(path, "df", format="table", data_columns=True)
result = read_hdf(path, "df", where="obsids=B")
tm.assert_frame_equal(result, expected)

# Test with categories
df.obsids = df.obsids.astype("category")
df.imgids = df.imgids.astype("category")

# We are expecting an empty DataFrame matching types of df
expected = df.iloc[[], :]
with ensure_clean_path(setup_path) as path:
df.to_hdf(path, "df", format="table", data_columns=True)
result = read_hdf(path, "df", where="obsids=B")
tm.assert_frame_equal(result, expected)


def test_categorical_nan_only_columns(setup_path):
# GH18413
# Check that read_hdf with categorical columns with NaN-only values can
# be read back.
df = DataFrame(
{
"a": ["a", "b", "c", np.nan],
"b": [np.nan, np.nan, np.nan, np.nan],
"c": [1, 2, 3, 4],
"d": Series([None] * 4, dtype=object),
}
)
df["a"] = df.a.astype("category")
df["b"] = df.b.astype("category")
df["d"] = df.b.astype("category")
expected = df
with ensure_clean_path(setup_path) as path:
df.to_hdf(path, "df", format="table", data_columns=True)
result = read_hdf(path, "df")
tm.assert_frame_equal(result, expected)
Loading