Description
Hi, @agroce and I did mutation testing of pandas, and here are some significant changes/mutants in pandas source code lines that were apparently covered by tests, but when we ran the tests with the mutation, the tests passed, so pandas may want to add tests that would fail for these changes/mutants.
This was run on v2.2.1 so the line numbers/links refer to that version of the code (first line original, second line mutated).
I will be going through these mutants, trying to identify significant ones, and then hopefully filing PRs to add relevant tests.
If anyone else has time to spare, please help by looking at the above mutations, and creating corresponding test cases, to help improve the pandas test suite.
already filed prs
def min(self, *, axis: AxisInt | None = None, skipna: bool = True) -> IntervalOrNA:
def min(self, *, axis: AxisInt | None = None, skipna: bool = False) -> IntervalOrNA:
core/arrays/sparse/array.py:1768
if len(self) != len(other):
if len(self) > len(other):
if not other_is_scalar and len(self) != len(other):
if not other_is_scalar and len(self) > len(other):
to investigate
null_locs = null_pos.nonzero()[0]
null_locs = null_pos.nonzero()[-1]
if self.axis == 0:
if self.axis <= 0:
col_mask = col_idx == -1
col_mask = col_idx < -1
core/arrays/arrow/array.py:1501
if dropna and data.null_count > 0:
if dropna and data.null_count > -1:
core/arrays/arrow/array.py:2120
return pc.if_else(cond, left, right)
pass
core/arrays/arrow/array.py:2167
if isinstance(values, pa.ChunkedArray) and pa.types.is_boolean(values.type):
if isinstance(values, pa.ChunkedArray) or pa.types.is_boolean(values.type):
if (inferred_dtype in integer_like) and not (
if True and not (
core/arrays/categorical.py:2079
elif is_any_real_numeric_dtype(self.categories.dtype):
elif not (is_any_real_numeric_dtype(self.categories.dtype)):
core/arrays/categorical.py:2743
assert self.ordered # checked earlier
pass
if lib.infer_dtype(scalars, skipna=True) not in ["datetime", "datetime64"]:
if lib.infer_dtype(scalars, skipna=True) not in [""]:
data_dtype = data.dtype
pass
data = data.reshape(shape)
pass
start_i = i * chunksize
start_i = i % chunksize
level_values = lib.maybe_convert_objects(level_values)
pass
if isinstance(dtype, ExtensionDtype) and all(
if True and all(
self = cast("Series", self)
pass
axis = self._get_axis_number(axis)
pass
if nonexistent not in nonexistent_options and not isinstance(
if nonexistent not in nonexistent_options and isinstance(
axis = self._get_axis_number(axis)
pass
if len(in_axis_grps) > 0:
if len(in_axis_grps) > 1:
return self.groupings[0]._result_index.rename(self.names[0])
return self.groupings[0]._result_index.rename(self.names[-1])
return False
pass
return arr
pass
na_idx = np.where(uniques == -1)[0]
na_idx = np.where(uniques == -1)[-1]
if len(self) != len(other):
if len(self) > len(other):
elif len(other) == 1:
elif len(other) == -1:
return type(self)(start_r, end_r + step_s / 2, step_s / 2)
pass
elif len(ilocs) == 1 and com.is_null_slice(pi) and len(self.obj) == 0:
elif len(ilocs) <= 1 and com.is_null_slice(pi) and len(self.obj) == 0:
if isinstance(value, np.ndarray) and value.ndim == 1 and len(value) == 1:
if isinstance(value, np.ndarray) and value.ndim > 1 and len(value) == 1:
elif lib.is_integer(indexer[1]) and indexer[1] == 0:
elif lib.is_integer(indexer[-1]) and indexer[1] == 0:
core/internals/managers.py:217
bp = BlockPlacement(slice(0, 0))
bp = BlockPlacement(slice(0, 1))
core/internals/managers.py:1542
assert self.ndim >= 2
assert self.ndim >= -1
core/internals/managers.py:2074
self.blocks[0]._mgr_locs = BlockPlacement(slice(len(values)))
self.blocks[-1]._mgr_locs = BlockPlacement(slice(len(values)))
if obj.ndim == 2 and obj.columns.size == 0:
if obj.ndim != 2 and obj.columns.size == 0:
return super().pipe(func, *args, **kwargs)
return super().pipe(func, **kwargs)
elif validate in ["one_to_many", "1:m"]:
elif validate in ["one_to_many"]:
if isinstance(left.dtype, CategoricalDtype) and isinstance(
if True and isinstance(
if self.tolerance < Timedelta(0):
if self.tolerance < Timedelta(-1):
return _get_join_keys(llab, rlab, shape, sort)
return _get_join_keys( rlab,llab, shape, sort)
def unstack(obj: Series | DataFrame, level, fill_value=None, sort: bool = True):
def unstack(obj: Series | DataFrame, level, fill_value=None, sort: bool = False):
level = [v if v <= lev else v - 1 for v in level]
level = [v if v < lev else v - 1 for v in level]
mx += 0.001 * abs(mx) if mx != 0 else 0.001
mx += 0.001 / abs(mx) if mx != 0 else 0.001
if common.count_not_none(self.com, self.span, self.alpha) > 0:
if common.count_not_none(self.com, self.span, self.alpha) < 0:
To ignore (probably not significant)
these mutants involve non-significant permutation of arguments.
core/window/ewm.py:858 ignore since this function is symmetric.
def cov_func(x, y):
def cov_func( y,x):
result = obj.apply(func, args=self.args, **self.kwargs)
result = obj.apply(func, **self.kwargs, args=self.args)
core/arrays/arrow/array.py:136
has_remainder = pc.not_equal(pc.multiply(divided, right), left)
has_remainder = pc.not_equal(pc.multiply( right,divided), left)
core/arrays/arrow/extension_types.py:108
return hash((str(self), str(self.subtype), self.closed))
return hash((str(self), self.closed, str(self.subtype)))
Code below was used to produce the output above:
> rmq=function(s)gsub('""""','"',s,fixed=TRUE);mutant.dt[,suffix:=sub(".*[.]", "", file)][order(suffix,file,line)][critical==1 & software=="pandas", cat(sprintf("[%s:%d](https://github.com/pandas-dev/pandas/blob/v2.2.1/pandas/%s#L%d)\n```\n%s\n%s\n```\n", file,line,file,line,rmq(original),rmq(mutated)),sep="\n")]
already investigated, but not likely to result in Prs (please write why not)
TODO