Skip to content

ENH: ArrayManager.quantile #40189

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 19 commits into from
Mar 5, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
ENH: ArrayManager.quantile
  • Loading branch information
jbrockmendel committed Mar 3, 2021
commit 1daa18ea2174d69c75e6596f79fefb67681e751f
8 changes: 2 additions & 6 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -9650,15 +9650,11 @@ def quantile(
return self._constructor([], index=q, columns=cols)
return self._constructor_sliced([], index=cols, name=q, dtype=np.float64)

result = data._mgr.quantile(
res = data._mgr.quantile(
qs=q, axis=1, interpolation=interpolation, transposed=is_transposed
)

result = self._constructor(result)

if is_transposed:
result = result.T

result = self._constructor(res)
return result

@doc(NDFrame.asfreq, **_shared_doc_kwargs)
Expand Down
25 changes: 24 additions & 1 deletion pandas/core/internals/array_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
)

import pandas.core.algorithms as algos
from pandas.core.array_algos.quantile import quantile_compat
from pandas.core.arrays import (
DatetimeArray,
ExtensionArray,
Expand All @@ -75,6 +76,7 @@
from pandas.core.internals.blocks import make_block

if TYPE_CHECKING:
from pandas import Float64Index
from pandas.core.internals.managers import SingleBlockManager


Expand Down Expand Up @@ -433,7 +435,28 @@ def apply_with_block(self: T, f, align_keys=None, **kwargs) -> T:

return type(self)(result_arrays, self._axes)

# TODO quantile
def quantile(
self,
*,
qs: Float64Index,
axis: int = 0,
transposed: bool = False,
interpolation="linear",
) -> ArrayManager:

arrs = [
x if not isinstance(x, np.ndarray) else np.atleast_2d(x)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can the reshaping be moved to quantile_compat? (as it also already does this for EAs)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this one (potentially)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i think ArrayManager-specific logic should stay in ArrayManager wherever possible

for x in self.arrays
]
assert axis == 1
new_arrs = [quantile_compat(x, qs, interpolation, axis=axis) for x in arrs]
for i, arr in enumerate(new_arrs):
if arr.ndim == 2:
assert arr.shape[0] == 1, arr.shape
new_arrs[i] = arr[0]

axes = [qs, self._axes[1]]
return type(self)(new_arrs, axes)

def isna(self, func) -> ArrayManager:
return self.apply("apply", func=func)
Expand Down
7 changes: 0 additions & 7 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -554,13 +554,6 @@ def quantile(
for blk in self.blocks
]

if transposed:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The transposed keyword is no longer used after removing this? (so the keyword can be removed as well)

new_axes = new_axes[::-1]
blocks = [
b.make_block(b.values.T, placement=np.arange(b.shape[1]))
for b in blocks
]

return type(self)(blocks, new_axes)

def isna(self, func) -> BlockManager:
Expand Down
7 changes: 7 additions & 0 deletions pandas/tests/frame/methods/test_quantile.py
Original file line number Diff line number Diff line change
Expand Up @@ -524,6 +524,7 @@ def test_quantile_empty_no_columns(self):
expected.columns.name = "captain tightpants"
tm.assert_frame_equal(result, expected)

@td.skip_array_manager_invalid_test
def test_quantile_item_cache(self):
# previous behavior incorrect retained an invalid _item_cache entry
df = DataFrame(np.random.randn(4, 3), columns=["A", "B", "C"])
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can't comment on the exact line (3 lines below), but could also do

if not using_array_manager:
    assert len(df._mgr.blocks) == 2

because I think the rest of the test is still valid

Expand Down Expand Up @@ -608,12 +609,18 @@ def test_quantile_ea_with_na(self, index, frame_or_series):
expected = frame_or_series(expected)
tm.assert_equal(result, expected)

# TODO: filtering can be removed after GH#39763 is fixed
@pytest.mark.filterwarnings("ignore:Using .astype to convert:FutureWarning")
def test_quantile_ea_all_na(self, index, frame_or_series):

obj = frame_or_series(index).copy()

obj.iloc[:] = index._na_value

# TODO: this casting should be unnecessary after GH#39763 is fixed
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# TODO: this casting should be unnecessary after GH#39763 is fixed
# TODO(ArrayManager) this casting should be unnecessary after GH#39763 is fixed

obj[:] = obj.astype(index.dtype)
assert np.all(obj.dtypes == index.dtype)

# result should be invariant to shuffling
indexer = np.arange(len(index), dtype=np.intp)
np.random.shuffle(indexer)
Expand Down
2 changes: 0 additions & 2 deletions pandas/tests/series/methods/test_quantile.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import numpy as np
import pytest

import pandas.util._test_decorators as td

from pandas.core.dtypes.common import is_integer

import pandas as pd
Expand Down