Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CLN: Make repeat method consistent #24395

Merged
merged 3 commits into from
Dec 23, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 39 additions & 13 deletions pandas/core/arrays/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from pandas.compat import PY3, set_function_name
from pandas.compat.numpy import function as nv
from pandas.errors import AbstractMethodError
from pandas.util._decorators import Appender, Substitution

from pandas.core.dtypes.common import is_list_like
from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries
Expand All @@ -20,6 +21,8 @@

_not_implemented_message = "{} does not implement {}."

_extension_array_shared_docs = dict()


class ExtensionArray(object):
"""
Expand Down Expand Up @@ -580,32 +583,55 @@ def factorize(self, na_sentinel=-1):
uniques = self._from_factorized(uniques, self)
return labels, uniques

def repeat(self, repeats, axis=None):
"""
Repeat elements of an array.
_extension_array_shared_docs['repeat'] = """
Repeat elements of a %(klass)s.

.. versionadded:: 0.24.0
Returns a new %(klass)s where each element of the current %(klass)s
is repeated consecutively a given number of times.

Parameters
----------
repeats : int
Copy link
Member Author

@jschendel jschendel Dec 22, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note that repeat can be an array of ints in the numpy implementation, and this appears to work for the pandas implementation as well. Added tests for this behavior in places it didn't previously exist.

In [2]: np.array([10, 20, 30]).repeat([1, 2, 3])
Out[2]: array([10, 20, 20, 30, 30, 30])

In [3]: pd.Index(list('abc')).repeat([1, 2, 3])
Out[3]: Index(['a', 'b', 'b', 'c', 'c', 'c'], dtype='object')

This should be a non-negative integer. Repeating 0 times
will return an empty array.
repeats : int or array of ints
The number of repetitions for each element. This should be a
non-negative integer. Repeating 0 times will return an empty
%(klass)s.
*args
Additional arguments have no effect but might be accepted for
compatibility with numpy.
**kwargs
Additional keywords have no effect but might be accepted for
compatibility with numpy.

Returns
-------
repeated_array : ExtensionArray
Same type as the input, with elements repeated `repeats` times.
repeated_array : %(klass)s
Newly created %(klass)s with repeated elements.

See Also
--------
Series.repeat : Equivalent function for Series.
Index.repeat : Equivalent function for Index.
numpy.repeat : Similar method for :class:`numpy.ndarray`.
ExtensionArray.take : Take arbitrary positions.

Examples
--------
>>> cat = pd.Categorical(['a', 'b', 'c'])
>>> cat
[a, b, c]
Categories (3, object): [a, b, c]
>>> cat.repeat(2)
[a, a, b, b, c, c]
Categories (3, object): [a, b, c]
>>> cat.repeat([1, 2, 3])
[a, b, b, c, c, c]
Categories (3, object): [a, b, c]
"""
if axis is not None:
raise ValueError("'axis' must be None.")
if repeats < 0:
raise ValueError("negative repeats are not allowed.")

@Substitution(klass='ExtensionArray')
@Appender(_extension_array_shared_docs['repeat'])
def repeat(self, repeats, *args, **kwargs):
nv.validate_repeat(args, kwargs)
ind = np.arange(len(self)).repeat(repeats)
return self.take(ind)

Expand Down
12 changes: 3 additions & 9 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
from pandas.io.formats import console
from pandas.io.formats.terminal import get_terminal_size

from .base import ExtensionArray
from .base import ExtensionArray, _extension_array_shared_docs

_take_msg = textwrap.dedent("""\
Interpreting negative values in 'indexer' as missing values.
Expand Down Expand Up @@ -2394,15 +2394,9 @@ def describe(self):

return result

@Substitution(klass='Categorical')
@Appender(_extension_array_shared_docs['repeat'])
def repeat(self, repeats, *args, **kwargs):
"""
Repeat elements of a Categorical.

See Also
--------
numpy.ndarray.repeat

"""
nv.validate_repeat(args, kwargs)
codes = self._codes.repeat(repeats)
return self._constructor(values=codes, dtype=self.dtype, fastpath=True)
Expand Down
39 changes: 8 additions & 31 deletions pandas/core/arrays/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,13 @@
ABCDatetimeIndex, ABCInterval, ABCIntervalIndex, ABCPeriodIndex, ABCSeries)
from pandas.core.dtypes.missing import isna, notna

from pandas.core.arrays.base import (
ExtensionArray, _extension_array_shared_docs)
from pandas.core.arrays.categorical import Categorical
import pandas.core.common as com
from pandas.core.config import get_option
from pandas.core.indexes.base import Index, ensure_index

from . import Categorical, ExtensionArray

_VALID_CLOSED = {'left', 'right', 'both', 'neither'}
_interval_shared_docs = {}

Expand Down Expand Up @@ -1000,35 +1001,11 @@ def to_tuples(self, na_tuple=True):
tuples = np.where(~self.isna(), tuples, np.nan)
return tuples

def repeat(self, repeats, **kwargs):
"""
Repeat elements of an IntervalArray.

Returns a new IntervalArray where each element of the current
IntervalArray is repeated consecutively a given number of times.

Parameters
----------
repeats : int
The number of repetitions for each element.

**kwargs
Additional keywords have no effect but might be accepted for
compatibility with numpy.

Returns
-------
IntervalArray
Newly created IntervalArray with repeated elements.

See Also
--------
Index.repeat : Equivalent function for Index.
Series.repeat : Equivalent function for Series.
numpy.repeat : Underlying implementation.
"""
left_repeat = self.left.repeat(repeats, **kwargs)
right_repeat = self.right.repeat(repeats, **kwargs)
@Appender(_extension_array_shared_docs['repeat'] % _shared_docs_kwargs)
def repeat(self, repeats, *args, **kwargs):
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The base class implementation works for IntervalArray as well, but a couple off-the-cuff timings had this implementation 1.5x - 2x faster.

nv.validate_repeat(args, kwargs)
left_repeat = self.left.repeat(repeats)
right_repeat = self.right.repeat(repeats)
return self._shallow_copy(left=left_repeat, right=right_repeat)

_interval_shared_docs['overlaps'] = """
Expand Down
14 changes: 0 additions & 14 deletions pandas/core/arrays/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
period_asfreq_arr)
from pandas._libs.tslibs.timedeltas import Timedelta, delta_to_nanoseconds
import pandas.compat as compat
from pandas.compat.numpy import function as nv
from pandas.util._decorators import Appender, cache_readonly
from pandas.util._validators import validate_fillna_kwargs

Expand Down Expand Up @@ -593,19 +592,6 @@ def _format_native_types(self, na_rep=u'NaT', date_format=None, **kwargs):
def strftime(self, date_format):
return self._format_native_types(date_format=date_format)

def repeat(self, repeats, *args, **kwargs):
"""
Repeat elements of a PeriodArray.

See Also
--------
numpy.ndarray.repeat
"""
# TODO(DatetimeArray): remove
nv.validate_repeat(args, kwargs)
values = self._data.repeat(repeats)
return type(self)(values, self.freq)

def astype(self, dtype, copy=True):
# TODO: Figure out something better here...
# We have DatetimeLikeArrayMixin ->
Expand Down
35 changes: 21 additions & 14 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -832,41 +832,48 @@ def _assert_take_fillable(self, values, indices, allow_fill=True,
taken = values.take(indices)
return taken

def repeat(self, repeats, *args, **kwargs):
"""
Repeat elements of an Index.
_index_shared_docs['repeat'] = """
Repeat elements of a %(klass)s.

Returns a new index where each element of the current index
Returns a new %(klass)s where each element of the current %(klass)s
is repeated consecutively a given number of times.

Parameters
----------
repeats : int
The number of repetitions for each element.
repeats : int or array of ints
The number of repetitions for each element. This should be a
non-negative integer. Repeating 0 times will return an empty
%(klass)s.
*args
Additional arguments have no effect but might be accepted for
compatibility with numpy.
**kwargs
Additional keywords have no effect but might be accepted for
compatibility with numpy.

Returns
-------
pandas.Index
Newly created Index with repeated elements.
repeated_index : %(klass)s
Newly created %(klass)s with repeated elements.

See Also
--------
Series.repeat : Equivalent function for Series.
numpy.repeat : Underlying implementation.
numpy.repeat : Similar method for :class:`numpy.ndarray`.

Examples
--------
>>> idx = pd.Index([1, 2, 3])
>>> idx = pd.Index(['a', 'b', 'c'])
>>> idx
Int64Index([1, 2, 3], dtype='int64')
Index(['a', 'b', 'c'], dtype='object')
>>> idx.repeat(2)
Int64Index([1, 1, 2, 2, 3, 3], dtype='int64')
>>> idx.repeat(3)
Int64Index([1, 1, 1, 2, 2, 2, 3, 3, 3], dtype='int64')
Index(['a', 'a', 'b', 'b', 'c', 'c'], dtype='object')
>>> idx.repeat([1, 2, 3])
Index(['a', 'b', 'b', 'c', 'c', 'c'], dtype='object')
"""

@Appender(_index_shared_docs['repeat'] % _index_doc_kwargs)
def repeat(self, repeats, *args, **kwargs):
nv.validate_repeat(args, kwargs)
return self._shallow_copy(self._values.repeat(repeats))

Expand Down
12 changes: 3 additions & 9 deletions pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -445,17 +445,11 @@ def isin(self, values):

return algorithms.isin(self.asi8, values.asi8)

@Appender(_index_shared_docs['repeat'] % _index_doc_kwargs)
def repeat(self, repeats, *args, **kwargs):
"""
Analogous to ndarray.repeat.
"""
nv.validate_repeat(args, kwargs)
if is_period_dtype(self):
freq = self.freq
else:
freq = None
return self._shallow_copy(self.asi8.repeat(repeats),
freq=freq)
freq = self.freq if is_period_dtype(self) else None
return self._shallow_copy(self.asi8.repeat(repeats), freq=freq)
jreback marked this conversation as resolved.
Show resolved Hide resolved

@Appender(_index_shared_docs['where'] % _index_doc_kwargs)
def where(self, cond, other=None):
Expand Down
1 change: 1 addition & 0 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -1857,6 +1857,7 @@ def append(self, other):
def argsort(self, *args, **kwargs):
return self.values.argsort(*args, **kwargs)

@Appender(_index_shared_docs['repeat'] % _index_doc_kwargs)
def repeat(self, repeats, *args, **kwargs):
nv.validate_repeat(args, kwargs)
return MultiIndex(levels=self.levels,
Expand Down
4 changes: 0 additions & 4 deletions pandas/core/indexes/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -924,10 +924,6 @@ def wrapper(self, other):
wrapper.__name__ = '__{}__'.format(op.__name__)
return wrapper

def repeat(self, repeats, *args, **kwargs):
# TODO(DatetimeArray): Just use Index.repeat
return Index.repeat(self, repeats, *args, **kwargs)

def view(self, dtype=None, type=None):
# TODO(DatetimeArray): remove
if dtype is None or dtype is __builtins__['type'](self):
Expand Down
52 changes: 49 additions & 3 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1038,12 +1038,58 @@ def _set_values(self, key, value):

def repeat(self, repeats, *args, **kwargs):
"""
Repeat elements of an Series. Refer to `numpy.ndarray.repeat`
for more information about the `repeats` argument.
Repeat elements of a Series.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

any way to re-use the doc-string you defined for EA (meaning here and for Index), maybe make it even more generic and parametrize on the types? (ok that we are duplicating code, but the more can share the better).
cc @datapythonista

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Had the same thought but was unsure how to do it in a clean way. I think you could split the docstring to reuse to everything but the See Also and Examples section, then separately define only those sections for EA/Index/Series. Seems a bit convoluted though; certainly interested to hear if there's a better way.


Returns a new Series where each element of the current Series
is repeated consecutively a given number of times.

Parameters
----------
repeats : int or array of ints
The number of repetitions for each element. This should be a
non-negative integer. Repeating 0 times will return an empty
Series.
*args
Additional arguments have no effect but might be accepted for
compatibility with numpy.
**kwargs
Additional keywords have no effect but might be accepted for
compatibility with numpy.

Returns
-------
repeated_series : Series
Newly created Series with repeated elements.

See Also
--------
numpy.ndarray.repeat
Index.repeat : Equivalent function for Index.
numpy.repeat : Similar method for :class:`numpy.ndarray`.

Examples
--------
>>> s = pd.Series(['a', 'b', 'c'])
>>> s
0 a
1 b
2 c
dtype: object
>>> s.repeat(2)
0 a
0 a
1 b
1 b
2 c
2 c
dtype: object
>>> s.repeat([1, 2, 3])
0 a
1 b
1 b
2 c
2 c
2 c
dtype: object
"""
nv.validate_repeat(args, kwargs)
new_index = self.index.repeat(repeats)
Expand Down
16 changes: 0 additions & 16 deletions pandas/tests/arrays/categorical/test_analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -292,22 +292,6 @@ def test_validate_inplace(self):
with pytest.raises(ValueError):
cat.sort_values(inplace=value)

def test_repeat(self):
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

deleted as these appear to be testing the same thing as the base class tests in tests/extension/base/methods.py

# GH10183
cat = Categorical(["a", "b"], categories=["a", "b"])
exp = Categorical(["a", "a", "b", "b"], categories=["a", "b"])
res = cat.repeat(2)
tm.assert_categorical_equal(res, exp)

def test_numpy_repeat(self):
cat = Categorical(["a", "b"], categories=["a", "b"])
exp = Categorical(["a", "a", "b", "b"], categories=["a", "b"])
tm.assert_categorical_equal(np.repeat(cat, 2), exp)

msg = "the 'axis' parameter is not supported"
with pytest.raises(ValueError, match=msg):
np.repeat(cat, 2, axis=1)

def test_isna(self):
exp = np.array([False, False, True])
c = Categorical(["a", "b", np.nan])
Expand Down
Loading