Skip to content
135 changes: 62 additions & 73 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -813,17 +813,50 @@ def replace(
)
return blocks

def _replace_single(
def _replace_regex(
self,
to_replace,
value,
inplace: bool = False,
regex: bool = False,
convert: bool = True,
mask=None,
) -> List["Block"]:
""" no-op on a non-ObjectBlock """
return [self] if inplace else [self.copy()]
"""
Replace elements by the given value.

Parameters
----------
to_replace : object or pattern
Scalar to replace or regular expression to match.
value : object
Replacement object.
inplace : bool, default False
Perform inplace modification.
convert : bool, default True
If true, try to coerce any object types to better types.
mask : array-like of bool, optional
True indicate corresponding element is ignored.

Returns
-------
List[Block]
"""
if not self._can_hold_element(to_replace):
# i.e. only ObjectBlock, but could in principle include a
# String ExtensionBlock
return [self] if inplace else [self.copy()]

rx = re.compile(to_replace)

new_values = self.values if inplace else self.values.copy()
replace_regex(new_values, rx, value, mask)

block = self.make_block(new_values)
if convert:
nbs = block.convert(numeric=False)
else:
nbs = [block]
return nbs

def _replace_list(
self,
Expand Down Expand Up @@ -1598,14 +1631,16 @@ def _replace_coerce(
self = self.coerce_to_target_dtype(value)
return self.putmask(mask, value, inplace=inplace)
else:
return self._replace_single(
to_replace,
value,
inplace=inplace,
regex=regex,
convert=False,
mask=mask,
)
regex = _should_use_regex(regex, to_replace)
if regex:
return self._replace_regex(
to_replace,
value,
inplace=inplace,
convert=False,
mask=mask,
)
return self.replace(to_replace, value, inplace=inplace, regex=False)
return [self]


Expand Down Expand Up @@ -2506,72 +2541,26 @@ def replace(
# here with listlike to_replace or value, as those cases
# go through _replace_list

if is_re(to_replace) or regex:
return self._replace_single(to_replace, value, inplace=inplace, regex=True)
else:
return super().replace(to_replace, value, inplace=inplace, regex=regex)

def _replace_single(
self,
to_replace,
value,
inplace: bool = False,
regex: bool = False,
convert: bool = True,
mask=None,
) -> List["Block"]:
"""
Replace elements by the given value.

Parameters
----------
to_replace : object or pattern
Scalar to replace or regular expression to match.
value : object
Replacement object.
inplace : bool, default False
Perform inplace modification.
regex : bool, default False
If true, perform regular expression substitution.
convert : bool, default True
If true, try to coerce any object types to better types.
mask : array-like of bool, optional
True indicate corresponding element is ignored.

Returns
-------
List[Block]
"""
inplace = validate_bool_kwarg(inplace, "inplace")

# to_replace is regex compilable
regex = regex and is_re_compilable(to_replace)
regex = _should_use_regex(regex, to_replace)

# try to get the pattern attribute (compiled re) or it's a string
if is_re(to_replace):
pattern = to_replace.pattern
if regex:
return self._replace_regex(to_replace, value, inplace=inplace)
else:
pattern = to_replace
return super().replace(to_replace, value, inplace=inplace, regex=False)

# if the pattern is not empty and to_replace is either a string or a
# regex
if regex and pattern:
rx = re.compile(to_replace)
else:
# if the thing to replace is not a string or compiled regex call
# the superclass method -> to_replace is some kind of object
return super().replace(to_replace, value, inplace=inplace, regex=regex)

new_values = self.values if inplace else self.values.copy()
replace_regex(new_values, rx, value, mask)
def _should_use_regex(regex: bool, to_replace: Any) -> bool:
"""
Decide whether to treat `to_replace` as a regular expression.
"""
if is_re(to_replace):
regex = True

# convert
block = self.make_block(new_values)
if convert:
nbs = block.convert(numeric=False)
else:
nbs = [block]
return nbs
regex = regex and is_re_compilable(to_replace)

# Don't use regex if the pattern is empty.
regex = regex and re.compile(to_replace).pattern != ""
return regex


class CategoricalBlock(ExtensionBlock):
Expand Down