Skip to content

use range in RangeIndex instead of _start etc. #26581

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jun 5, 2019
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Changes.
  • Loading branch information
topper-123 committed Jun 3, 2019
commit a86cd9a915b79ea06a44f49df9e24523cb3e7974
3 changes: 3 additions & 0 deletions doc/source/whatsnew/v0.25.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -473,6 +473,9 @@ Other Deprecations
the :meth:`SparseArray.to_dense` method instead (:issue:`26421`).
- The functions :func:`pandas.to_datetime` and :func:`pandas.to_timedelta` have deprecated the ``box`` keyword. Instead, use :meth:`to_numpy` or :meth:`Timestamp.to_datetime64` or :meth:`Timedelta.to_timedelta64`. (:issue:`24416`)
- The :meth:`DataFrame.compound` and :meth:`Series.compound` methods are deprecated and will be removed in a future version (:issue:`26405`).
- The internal attributes ``_start``, ``_stop`` and ``_step`` attributes of :class:`RangeIndex` have been deprecated.
Use the public attributes :attr:`~RangeIndex.start`, :attr:`~RangeIndex.stop` and :attr:`~RangeIndex.step` instead (:issue:`26581`).


.. _whatsnew_0250.prior_deprecations:

Expand Down
10 changes: 5 additions & 5 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -2282,7 +2282,7 @@ def info(self, verbose=None, buf=None, max_cols=None, memory_usage=None,
text_col 5 non-null object
float_col 5 non-null float64
dtypes: float64(1), int64(1), object(1)
memory usage: 312.0+ bytes
memory usage: 248.0+ bytes

Prints a summary of columns count and its dtypes but not per column
information:
Expand All @@ -2292,7 +2292,7 @@ def info(self, verbose=None, buf=None, max_cols=None, memory_usage=None,
RangeIndex: 5 entries, 0 to 4
Columns: 3 entries, int_col to float_col
dtypes: float64(1), int64(1), object(1)
memory usage: 312.0+ bytes
memory usage: 248.0+ bytes

Pipe output of DataFrame.info to buffer instead of sys.stdout, get
buffer content and writes to a text file:
Expand Down Expand Up @@ -2494,7 +2494,7 @@ def memory_usage(self, index=True, deep=False):
4 1 1.0 1.0+0.0j 1 True

>>> df.memory_usage()
Index 192
Index 128
int64 40000
float64 40000
complex128 80000
Expand All @@ -2513,7 +2513,7 @@ def memory_usage(self, index=True, deep=False):
The memory footprint of `object` dtype columns is ignored by default:

>>> df.memory_usage(deep=True)
Index 192
Index 128
int64 40000
float64 40000
complex128 80000
Expand All @@ -2525,7 +2525,7 @@ def memory_usage(self, index=True, deep=False):
many repeated values.

>>> df['object'].astype('category').memory_usage(deep=True)
5280
5216
"""
result = Series([c.memory_usage(index=False, deep=deep)
for col, c in self.iteritems()], index=self.columns)
Expand Down
62 changes: 29 additions & 33 deletions pandas/core/indexes/range.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,8 +167,6 @@ def _simple_new(cls, start, stop=None, step=None, name=None,
for k, v in kwargs.items():
setattr(result, k, v)

result._range = range(result._start, result._stop, result._step)

result._reset_identity()
return result

Expand Down Expand Up @@ -238,20 +236,20 @@ def _format_with_header(self, header, na_rep='NaN', **kwargs):
@cache_readonly
def start(self):
"""
The value of the `start` parameter (or ``0`` if this was not supplied)
The value of the `start` parameter (``0`` if this was not supplied)
"""
# GH 25710
return self._range.start

@property
def _start(self):
"""
The value of the `start` parameter (or ``0`` if this was not supplied)
The value of the `start` parameter (``0`` if this was not supplied)

.. deprecated:: 0.25.0
Use ._range.start or .start instead.
Use ``start`` instead.
"""
return self._range.start
return self.start

@cache_readonly
def stop(self):
Expand All @@ -266,38 +264,38 @@ def _stop(self):
The value of the `stop` parameter

.. deprecated:: 0.25.0
Use ._range.stop or .stop instead.
Use ``stop`` instead.
"""
# GH 25710
return self._range.stop
return self.stop

@cache_readonly
def step(self):
"""
The value of the `step` parameter (or ``1`` if this was not supplied)
The value of the `step` parameter (``1`` if this was not supplied)
"""
# GH 25710
return self._range.step

@property
def _step(self):
"""
The value of the `step` parameter (or ``1`` if this was not supplied)
The value of the `step` parameter (``1`` if this was not supplied)

.. deprecated:: 0.25.0
Use ._range.step or .step instead.
Use ``step`` instead.
"""
# GH 25710
return self._range.step
return self.step

@cache_readonly
def nbytes(self):
"""
Return the number of bytes in the underlying data.
"""
rng = self._range
return getsizeof(rng) + sum(getsizeof(rng, v)
for v in ['start', 'stop', 'step'])
return getsizeof(rng) + sum(getsizeof(getattr(rng, attr_name))
for attr_name in ['start', 'stop', 'step'])

def memory_usage(self, deep=False):
"""
Expand Down Expand Up @@ -361,7 +359,7 @@ def tolist(self):
def _shallow_copy(self, values=None, **kwargs):
if values is None:
name = kwargs.get("name", self.name)
return RangeIndex._simple_new(
return self._simple_new(
name=name, **dict(self._get_data_as_items()))
else:
kwargs.setdefault('name', self.name)
Expand All @@ -372,7 +370,7 @@ def copy(self, name=None, deep=False, dtype=None, **kwargs):
self._validate_dtype(dtype)
if name is None:
name = self.name
return RangeIndex.from_range(self._range, name=name)
return self.from_range(self._range, name=name)

def _minmax(self, meth):
no_steps = len(self) - 1
Expand Down Expand Up @@ -454,7 +452,7 @@ def intersection(self, other, sort=False):
return super().intersection(other, sort=sort)

if not len(self) or not len(other):
return RangeIndex._simple_new(None)
return self._simple_new(None)

first = self._range[::-1] if self.step < 0 else self._range
second = other._range[::-1] if other.step < 0 else other._range
Expand All @@ -464,7 +462,7 @@ def intersection(self, other, sort=False):
int_low = max(first.start, second.start)
int_high = min(first.stop, second.stop)
if int_high <= int_low:
return RangeIndex._simple_new(None)
return self._simple_new(None)

# Method hint: linear Diophantine equation
# solve intersection problem
Expand All @@ -474,20 +472,18 @@ def intersection(self, other, sort=False):

# check whether element sets intersect
if (first.start - second.start) % gcd:
return RangeIndex._simple_new(None)
return self._simple_new(None)

# calculate parameters for the RangeIndex describing the
# intersection disregarding the lower bounds
tmp_start = first.start + (second.start - first.start) * \
first.step // gcd * s
new_step = first.step * second.step // gcd
new_index = RangeIndex._simple_new(tmp_start, int_high, new_step)
new_index = self._simple_new(tmp_start, int_high, new_step)

# adjust index to limiting interval
new_start = new_index._min_fitting_element(int_low)
new_index = RangeIndex._simple_new(new_start,
new_index.stop,
new_index.step)
new_index = self._simple_new(new_start, new_index.stop, new_index.step)

if (self.step < 0 and other.step < 0) is not (new_index.step < 0):
new_index = new_index[::-1]
Expand Down Expand Up @@ -566,21 +562,23 @@ def _union(self, other, sort):
if ((start_s - start_o) % step_s == 0 and
(start_s - end_o) <= step_s and
(start_o - end_s) <= step_s):
return RangeIndex(start_r, end_r + step_s, step_s)
return self.__class__(start_r, end_r + step_s, step_s)
if ((step_s % 2 == 0) and
(abs(start_s - start_o) <= step_s / 2) and
(abs(end_s - end_o) <= step_s / 2)):
return RangeIndex(start_r, end_r + step_s / 2, step_s / 2)
return self.__class__(start_r,
end_r + step_s / 2,
step_s / 2)
elif step_o % step_s == 0:
if ((start_o - start_s) % step_s == 0 and
(start_o + step_s >= start_s) and
(end_o - step_s <= end_s)):
return RangeIndex(start_r, end_r + step_s, step_s)
return self.__class__(start_r, end_r + step_s, step_s)
elif step_s % step_o == 0:
if ((start_s - start_o) % step_o == 0 and
(start_s + step_o >= start_o) and
(end_s - step_o <= end_o)):
return RangeIndex(start_r, end_r + step_o, step_o)
return self.__class__(start_r, end_r + step_o, step_o)
return self._int64index._union(other, sort=sort)

@Appender(_index_shared_docs['join'])
Expand Down Expand Up @@ -629,7 +627,7 @@ def __getitem__(self, key):
size=len(self)))
if isinstance(key, slice):
new_range = self._range[key]
return RangeIndex.from_range(new_range, name=self.name)
return self.from_range(new_range, name=self.name)

# fall back to Int64Index
return super_getitem(key)
Expand All @@ -645,12 +643,10 @@ def __floordiv__(self, other):
start = self.start // other
step = self.step // other
stop = start + len(self) * step
return RangeIndex._simple_new(
start, stop, step, name=self.name)
return self._simple_new(start, stop, step, name=self.name)
if len(self) == 1:
start = self.start // other
return RangeIndex._simple_new(
start, start + 1, 1, name=self.name)
return self._simple_new(start, start + 1, 1, name=self.name)
return self._int64index // other

@classmethod
Expand Down Expand Up @@ -706,7 +702,7 @@ def _evaluate_numeric_binop(self, other):
rstart = op(left.start, right)
rstop = op(left.stop, right)

result = RangeIndex(rstart, rstop, rstep, **attrs)
result = self.__class__(rstart, rstop, rstep, **attrs)

# for compat with numpy / Int64Index
# even if we can represent as a RangeIndex, return
Expand Down
6 changes: 3 additions & 3 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -4010,7 +4010,7 @@ def memory_usage(self, index=True, deep=False):
--------
>>> s = pd.Series(range(3))
>>> s.memory_usage()
216
152

Not including the index gives the size of the rest of the data, which
is necessarily smaller:
Expand All @@ -4024,9 +4024,9 @@ def memory_usage(self, index=True, deep=False):
>>> s.values
array(['a', 'b'], dtype=object)
>>> s.memory_usage()
208
144
>>> s.memory_usage(deep=True)
324
260
"""
v = super().memory_usage(deep=deep)
if index:
Expand Down
7 changes: 4 additions & 3 deletions pandas/tests/indexes/test_range.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,8 +239,9 @@ def test_dtype(self):

def test_cached_data(self):
# GH 26565
# Calling RangeIndex._data caches an int64 array of the same length at
# self._cached_data. This tests whether _cached_data has been set.
# Calling RangeIndex._data caches an int64 array of the same length as
# self at self._cached_data.
# This tests whether _cached_data is being set by various operations.
idx = RangeIndex(0, 100, 10)

assert idx._cached_data is None
Expand Down Expand Up @@ -269,7 +270,7 @@ def test_cached_data(self):
df.iloc[5:10]
assert idx._cached_data is None

# actually calling data._data
# actually calling idx._data
assert isinstance(idx._data, np.ndarray)
assert isinstance(idx._cached_data, np.ndarray)

Expand Down