Skip to content

Commit

Permalink
Fix PandasBlocks implementation for missmatching categories (#81)
Browse files Browse the repository at this point in the history
  • Loading branch information
phofl authored Jul 15, 2024
1 parent 829b539 commit e832b65
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 1 deletion.
13 changes: 12 additions & 1 deletion partd/pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,17 @@ def join(dfs):
if not dfs:
return pd.DataFrame()
else:
return pd.concat(dfs)
result = pd.concat(dfs)
dtypes = {
col: "category"
for col in result.columns
if (
isinstance(dfs[0][col].dtype, pd.CategoricalDtype)
and not isinstance(result[col].dtype, pd.CategoricalDtype)
)
}
if dtypes:
result = result.astype(dtypes)
return result

PandasBlocks = partial(Encode, serialize, deserialize, join)
14 changes: 14 additions & 0 deletions partd/tests/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,3 +146,17 @@ def test_index_non_numeric_extension_types(dtype):
df.index = df.index.astype(dtype)
df2 = deserialize(serialize(df))
tm.assert_frame_equal(df, df2)


def test_categorical_concat():
pytest.importorskip("pandas", minversion="2")

df1 = pd.DataFrame({"a": ["x", "y"]}, dtype="category")
df2 = pd.DataFrame({"a": ["y", "z"]}, dtype="category")

with PandasBlocks() as p:
p.append({'x': df1})
p.append({'x': df2})

result = p.get(["x"])
pd.testing.assert_frame_equal(result[0], pd.concat([df1, df2]).astype("category"))

0 comments on commit e832b65

Please sign in to comment.