Skip to content

Commit

Permalink
FIX-modin-project#6259: Fix astype("category") causing read-only buff…
Browse files Browse the repository at this point in the history
…er error (modin-project#6267)

Signed-off-by: mvashishtha <mahesh@ponder.io>
  • Loading branch information
mvashishtha authored Jun 14, 2023
1 parent e987314 commit a882ece
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 2 deletions.
13 changes: 11 additions & 2 deletions modin/core/dataframe/pandas/dataframe/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from pandas._libs.lib import no_default
from typing import List, Hashable, Optional, Callable, Union, Dict, TYPE_CHECKING

from modin.config import Engine
from modin.core.storage_formats.pandas.query_compiler import PandasQueryCompiler
from modin.core.storage_formats.pandas.utils import get_length_list
from modin.error_message import ErrorMessage
Expand Down Expand Up @@ -1374,6 +1375,7 @@ def astype(self, col_dtypes, errors: str = "raise"):
# will store the encoded table. That can lead to higher memory footprint.
# TODO: Revisit if this hurts users.
use_full_axis_cast = False
has_categorical_cast = False
for i, column in enumerate(columns):
dtype = col_dtypes[column]
if (
Expand All @@ -1400,13 +1402,20 @@ def astype(self, col_dtypes, errors: str = "raise"):
columns=[column]
)[column],
)
use_full_axis_cast = True
use_full_axis_cast = has_categorical_cast = True
else:
new_dtypes[column] = new_dtype

def astype_builder(df):
"""Compute new partition frame with dtypes updated."""
return df.astype(
# TODO(https://github.com/modin-project/modin/issues/6266): Remove this
# copy, which is a workaround for https://github.com/pandas-dev/pandas/issues/53658
df_for_astype = (
df.copy(deep=True)
if Engine.get() == "Ray" and has_categorical_cast
else df
)
return df_for_astype.astype(
{k: v for k, v in col_dtypes.items() if k in df}, errors=errors
)

Expand Down
17 changes: 17 additions & 0 deletions modin/pandas/test/dataframe/test_map_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -545,6 +545,23 @@ def test_astype_category_large():
assert modin_result.dtypes.equals(pandas_result.dtypes)


@pytest.mark.xfail(
StorageFormat.get() == "Hdk",
reason="https://github.com/modin-project/modin/issues/6268",
strict=True,
)
def test_astype_int64_to_astype_category_github_issue_6259():
eval_general(
*create_test_dfs(
{"c0": [0, 1, 2, 3, 4], "par": ["foo", "boo", "bar", "foo", "boo"]},
index=["a", "b", "c", "d", "e"],
),
lambda df: df["c0"].astype("Int64").astype("category"),
# work around https://github.com/modin-project/modin/issues/6016
raising_exceptions=(Exception,),
)


@pytest.mark.skipif(
get_current_execution() == "BaseOnPython",
reason="BaseOnPython doesn't have proxy categories",
Expand Down

0 comments on commit a882ece

Please sign in to comment.