Skip to content

Commit

Permalink
REF: de-privatize dtypes.concat functions (#27499)
Browse files Browse the repository at this point in the history
* de-privatize _concat_compat

* de-privatize _concat_categorical

* de-privatize _concat_datetime

* move private functions to the one place they are used
  • Loading branch information
jbrockmendel authored and jreback committed Jul 22, 2019
1 parent 2d2d670 commit ea666da
Show file tree
Hide file tree
Showing 11 changed files with 71 additions and 71 deletions.
4 changes: 2 additions & 2 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -2480,9 +2480,9 @@ def _can_hold_na(self):

@classmethod
def _concat_same_type(self, to_concat):
from pandas.core.dtypes.concat import _concat_categorical
from pandas.core.dtypes.concat import concat_categorical

return _concat_categorical(to_concat)
return concat_categorical(to_concat)

def isin(self, values):
"""
Expand Down
49 changes: 7 additions & 42 deletions pandas/core/dtypes/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
ABCIndexClass,
ABCPeriodIndex,
ABCRangeIndex,
ABCSparseDataFrame,
ABCTimedeltaIndex,
)

Expand Down Expand Up @@ -71,41 +70,7 @@ def get_dtype_kinds(l):
return typs


def _get_series_result_type(result, objs=None):
"""
return appropriate class of Series concat
input is either dict or array-like
"""
from pandas import SparseSeries, SparseDataFrame, DataFrame

# concat Series with axis 1
if isinstance(result, dict):
# concat Series with axis 1
if all(isinstance(c, (SparseSeries, SparseDataFrame)) for c in result.values()):
return SparseDataFrame
else:
return DataFrame

# otherwise it is a SingleBlockManager (axis = 0)
return objs[0]._constructor


def _get_frame_result_type(result, objs):
"""
return appropriate class of DataFrame-like concat
if all blocks are sparse, return SparseDataFrame
otherwise, return 1st obj
"""

if result.blocks and (any(isinstance(obj, ABCSparseDataFrame) for obj in objs)):
from pandas.core.sparse.api import SparseDataFrame

return SparseDataFrame
else:
return next(obj for obj in objs if not isinstance(obj, ABCSparseDataFrame))


def _concat_compat(to_concat, axis=0):
def concat_compat(to_concat, axis=0):
"""
provide concatenation of an array of arrays each of which is a single
'normalized' dtypes (in that for example, if it's object, then it is a
Expand Down Expand Up @@ -142,12 +107,12 @@ def is_nonempty(x):
_contains_period = any(typ.startswith("period") for typ in typs)

if "category" in typs:
# this must be prior to _concat_datetime,
# this must be prior to concat_datetime,
# to support Categorical + datetime-like
return _concat_categorical(to_concat, axis=axis)
return concat_categorical(to_concat, axis=axis)

elif _contains_datetime or "timedelta" in typs or _contains_period:
return _concat_datetime(to_concat, axis=axis, typs=typs)
return concat_datetime(to_concat, axis=axis, typs=typs)

# these are mandated to handle empties as well
elif "sparse" in typs:
Expand All @@ -174,7 +139,7 @@ def is_nonempty(x):
return np.concatenate(to_concat, axis=axis)


def _concat_categorical(to_concat, axis=0):
def concat_categorical(to_concat, axis=0):
"""Concatenate an object/categorical array of arrays, each of which is a
single dtype
Expand Down Expand Up @@ -214,7 +179,7 @@ def _concat_categorical(to_concat, axis=0):
else np.asarray(x.astype(object))
for x in to_concat
]
result = _concat_compat(to_concat)
result = concat_compat(to_concat)
if axis == 1:
result = result.reshape(1, len(result))
return result
Expand Down Expand Up @@ -400,7 +365,7 @@ def _concatenate_2d(to_concat, axis):
return np.concatenate(to_concat, axis=axis)


def _concat_datetime(to_concat, axis=0, typs=None):
def concat_datetime(to_concat, axis=0, typs=None):
"""
provide concatenation of an datetimelike array of arrays each of which is a
single M8[ns], datetimet64[ns, tz] or m8[ns] dtype
Expand Down
7 changes: 4 additions & 3 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from pandas.compat.numpy import function as nv
from pandas.util._decorators import Appender, Substitution, cache_readonly

from pandas.core.dtypes import concat as _concat
from pandas.core.dtypes.cast import maybe_cast_to_integer_array
from pandas.core.dtypes.common import (
ensure_categorical,
Expand Down Expand Up @@ -45,7 +46,7 @@
is_unsigned_integer_dtype,
pandas_dtype,
)
import pandas.core.dtypes.concat as _concat
from pandas.core.dtypes.concat import concat_compat
from pandas.core.dtypes.generic import (
ABCDataFrame,
ABCDateOffset,
Expand Down Expand Up @@ -2540,7 +2541,7 @@ def _union(self, other, sort):

if len(indexer) > 0:
other_diff = algos.take_nd(rvals, indexer, allow_fill=False)
result = _concat._concat_compat((lvals, other_diff))
result = concat_compat((lvals, other_diff))

else:
result = lvals
Expand Down Expand Up @@ -2786,7 +2787,7 @@ def symmetric_difference(self, other, result_name=None, sort=None):
right_indexer = (indexer == -1).nonzero()[0]
right_diff = other.values.take(right_indexer)

the_diff = _concat._concat_compat([left_diff, right_diff])
the_diff = concat_compat([left_diff, right_diff])
if sort is None:
try:
the_diff = sorting.safe_sort(the_diff)
Expand Down
6 changes: 3 additions & 3 deletions pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
is_scalar,
is_string_like,
)
import pandas.core.dtypes.concat as _concat
from pandas.core.dtypes.concat import concat_compat
from pandas.core.dtypes.dtypes import DatetimeTZDtype
from pandas.core.dtypes.missing import isna

Expand Down Expand Up @@ -608,7 +608,7 @@ def _fast_union(self, other, sort=None):
left_start = left[0]
loc = right.searchsorted(left_start, side="left")
right_chunk = right.values[:loc]
dates = _concat._concat_compat((left.values, right_chunk))
dates = concat_compat((left.values, right_chunk))
return self._shallow_copy(dates)
# DTIs are not in the "correct" order and we want
# to sort
Expand All @@ -624,7 +624,7 @@ def _fast_union(self, other, sort=None):
if left_end < right_end:
loc = right.searchsorted(left_end, side="right")
right_chunk = right.values[loc:]
dates = _concat._concat_compat((left.values, right_chunk))
dates = concat_compat((left.values, right_chunk))
return self._shallow_copy(dates)
else:
return left
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/indexes/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
is_timedelta64_ns_dtype,
pandas_dtype,
)
import pandas.core.dtypes.concat as _concat
from pandas.core.dtypes.concat import concat_compat
from pandas.core.dtypes.missing import isna

from pandas.core.accessor import delegate_names
Expand Down Expand Up @@ -462,7 +462,7 @@ def _fast_union(self, other):
if left_end < right_end:
loc = right.searchsorted(left_end, side="right")
right_chunk = right.values[loc:]
dates = _concat._concat_compat((left.values, right_chunk))
dates = concat_compat((left.values, right_chunk))
return self._shallow_copy(dates)
else:
return left
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
is_sequence,
is_sparse,
)
from pandas.core.dtypes.concat import _concat_compat
from pandas.core.dtypes.concat import concat_compat
from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries
from pandas.core.dtypes.missing import _infer_fill_value, isna

Expand Down Expand Up @@ -607,7 +607,7 @@ def _setitem_with_indexer_missing(self, indexer, value):
if len(self.obj._values):
# GH#22717 handle casting compatibility that np.concatenate
# does incorrectly
new_values = _concat_compat([self.obj._values, new_values])
new_values = concat_compat([self.obj._values, new_values])
self.obj._data = self.obj._constructor(
new_values, index=new_index, name=self.obj.name
)._data
Expand Down
6 changes: 3 additions & 3 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
is_timedelta64_dtype,
pandas_dtype,
)
import pandas.core.dtypes.concat as _concat
from pandas.core.dtypes.concat import concat_categorical, concat_datetime
from pandas.core.dtypes.dtypes import CategoricalDtype, ExtensionDtype
from pandas.core.dtypes.generic import (
ABCDataFrame,
Expand Down Expand Up @@ -2563,7 +2563,7 @@ def concat_same_type(self, to_concat, placement=None):
# Instead of placing the condition here, it could also go into the
# is_uniform_join_units check, but I'm not sure what is better.
if len({x.dtype for x in to_concat}) > 1:
values = _concat._concat_datetime([x.values for x in to_concat])
values = concat_datetime([x.values for x in to_concat])
placement = placement or slice(0, len(values), 1)

if self.ndim > 1:
Expand Down Expand Up @@ -3082,7 +3082,7 @@ class CategoricalBlock(ExtensionBlock):
is_categorical = True
_verify_integrity = True
_can_hold_na = True
_concatenator = staticmethod(_concat._concat_categorical)
_concatenator = staticmethod(concat_categorical)

def __init__(self, values, placement, ndim=None):
from pandas.core.arrays.categorical import _maybe_to_categorical
Expand Down
8 changes: 4 additions & 4 deletions pandas/core/internals/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
is_sparse,
is_timedelta64_dtype,
)
import pandas.core.dtypes.concat as _concat
from pandas.core.dtypes.concat import concat_compat
from pandas.core.dtypes.missing import isna

import pandas.core.algorithms as algos
Expand Down Expand Up @@ -211,7 +211,7 @@ def get_reindexed_values(self, empty_dtype, upcasted_na):

if not self.indexers:
if not self.block._can_consolidate:
# preserve these for validation in _concat_compat
# preserve these for validation in concat_compat
return self.block.values

if self.block.is_bool and not self.block.is_categorical:
Expand Down Expand Up @@ -265,7 +265,7 @@ def concatenate_join_units(join_units, concat_axis, copy):
else:
concat_values = concat_values.copy()
else:
concat_values = _concat._concat_compat(to_concat, axis=concat_axis)
concat_values = concat_compat(to_concat, axis=concat_axis)

return concat_values

Expand Down Expand Up @@ -380,7 +380,7 @@ def is_uniform_join_units(join_units):
"""
Check if the join units consist of blocks of uniform type that can
be concatenated using Block.concat_same_type instead of the generic
concatenate_join_units (which uses `_concat._concat_compat`).
concatenate_join_units (which uses `concat_compat`).
"""
return (
Expand Down
8 changes: 4 additions & 4 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
is_scalar,
is_sparse,
)
import pandas.core.dtypes.concat as _concat
from pandas.core.dtypes.concat import concat_compat
from pandas.core.dtypes.dtypes import ExtensionDtype
from pandas.core.dtypes.generic import ABCExtensionArray, ABCSeries
from pandas.core.dtypes.missing import isna
Expand Down Expand Up @@ -532,7 +532,7 @@ def get_axe(block, qs, axes):
return self.__class__(blocks, new_axes)

# single block, i.e. ndim == {1}
values = _concat._concat_compat([b.values for b in blocks])
values = concat_compat([b.values for b in blocks])

# compute the orderings of our original data
if len(self.blocks) > 1:
Expand Down Expand Up @@ -1647,11 +1647,11 @@ def concat(self, to_concat, new_axis):
new_block = blocks[0].concat_same_type(blocks)
else:
values = [x.values for x in blocks]
values = _concat._concat_compat(values)
values = concat_compat(values)
new_block = make_block(values, placement=slice(0, len(values), 1))
else:
values = [x._block.values for x in to_concat]
values = _concat._concat_compat(values)
values = concat_compat(values)
new_block = make_block(values, placement=slice(0, len(values), 1))

mgr = SingleBlockManager(new_block, new_axis)
Expand Down
42 changes: 38 additions & 4 deletions pandas/core/reshape/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

import numpy as np

import pandas.core.dtypes.concat as _concat
from pandas.core.dtypes.generic import ABCSparseDataFrame

from pandas import DataFrame, Index, MultiIndex, Series
from pandas.core import common as com
Expand Down Expand Up @@ -439,13 +439,13 @@ def get_result(self):
mgr = self.objs[0]._data.concat(
[x._data for x in self.objs], self.new_axes
)
cons = _concat._get_series_result_type(mgr, self.objs)
cons = _get_series_result_type(mgr, self.objs)
return cons(mgr, name=name).__finalize__(self, method="concat")

# combine as columns in a frame
else:
data = dict(zip(range(len(self.objs)), self.objs))
cons = _concat._get_series_result_type(data)
cons = _get_series_result_type(data)

index, columns = self.new_axes
df = cons(data, index=index)
Expand Down Expand Up @@ -475,7 +475,7 @@ def get_result(self):
if not self.copy:
new_data._consolidate_inplace()

cons = _concat._get_frame_result_type(new_data, self.objs)
cons = _get_frame_result_type(new_data, self.objs)
return cons._from_axes(new_data, self.new_axes).__finalize__(
self, method="concat"
)
Expand Down Expand Up @@ -708,3 +708,37 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None):
return MultiIndex(
levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False
)


def _get_series_result_type(result, objs=None):
"""
return appropriate class of Series concat
input is either dict or array-like
"""
from pandas import SparseSeries, SparseDataFrame, DataFrame

# concat Series with axis 1
if isinstance(result, dict):
# concat Series with axis 1
if all(isinstance(c, (SparseSeries, SparseDataFrame)) for c in result.values()):
return SparseDataFrame
else:
return DataFrame

# otherwise it is a SingleBlockManager (axis = 0)
return objs[0]._constructor


def _get_frame_result_type(result, objs):
"""
return appropriate class of DataFrame-like concat
if all blocks are sparse, return SparseDataFrame
otherwise, return 1st obj
"""

if result.blocks and (any(isinstance(obj, ABCSparseDataFrame) for obj in objs)):
from pandas.core.sparse.api import SparseDataFrame

return SparseDataFrame
else:
return next(obj for obj in objs if not isinstance(obj, ABCSparseDataFrame))
4 changes: 2 additions & 2 deletions pandas/core/reshape/melt.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,9 +171,9 @@ def lreshape(data, groups, dropna=True, label=None):
for target, names in zip(keys, values):
to_concat = [data[col].values for col in names]

import pandas.core.dtypes.concat as _concat
from pandas.core.dtypes.concat import concat_compat

mdata[target] = _concat._concat_compat(to_concat)
mdata[target] = concat_compat(to_concat)
pivot_cols.append(target)

for col in id_cols:
Expand Down

0 comments on commit ea666da

Please sign in to comment.