Skip to content

Commit

Permalink
fix: do not clamp negative offsets to start of array prematurely (#15242
Browse files Browse the repository at this point in the history
)
  • Loading branch information
orlp authored Mar 22, 2024
1 parent dbbd3e8 commit bc742d7
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 18 deletions.
2 changes: 1 addition & 1 deletion crates/polars-core/src/chunked_array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -853,7 +853,7 @@ pub(crate) mod test {
assert_slice_equal(&first.slice(-3, 3), &[3, 4, 5]);
assert_slice_equal(&first.slice(-6, 6), &[0, 1, 2, 3, 4, 5]);

assert_eq!(first.slice(-7, 2).len(), 2);
assert_eq!(first.slice(-7, 2).len(), 1);
assert_eq!(first.slice(-3, 4).len(), 3);
assert_eq!(first.slice(3, 4).len(), 3);
assert_eq!(first.slice(10, 4).len(), 0);
Expand Down
32 changes: 15 additions & 17 deletions crates/polars-core/src/utils/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -190,24 +190,22 @@ pub fn slice_slice<T>(vals: &[T], offset: i64, len: usize) -> &[T] {
#[inline]
#[doc(hidden)]
pub fn slice_offsets(offset: i64, length: usize, array_len: usize) -> (usize, usize) {
let abs_offset = offset.unsigned_abs() as usize;

// The offset counted from the start of the array
// negative index
if offset < 0 {
if abs_offset <= array_len {
(array_len - abs_offset, std::cmp::min(length, abs_offset))
// negative index larger that array: slice from start
} else {
(0, std::cmp::min(length, array_len))
}
// positive index
} else if abs_offset <= array_len {
(abs_offset, std::cmp::min(length, array_len - abs_offset))
// empty slice
let signed_start_offset = if offset < 0 {
offset.saturating_add_unsigned(array_len as u64)
} else {
(array_len, 0)
}
offset
};
let signed_stop_offset = signed_start_offset.saturating_add_unsigned(length as u64);

let signed_array_len: i64 = array_len
.try_into()
.expect("array length larger than i64::MAX");
let clamped_start_offset = signed_start_offset.clamp(0, signed_array_len);
let clamped_stop_offset = signed_stop_offset.clamp(0, signed_array_len);

let slice_start_idx = clamped_start_offset as usize;
let slice_len = (clamped_stop_offset - clamped_start_offset) as usize;
(slice_start_idx, slice_len)
}

/// Apply a macro on the Series
Expand Down
1 change: 1 addition & 0 deletions py-polars/tests/unit/namespaces/list/test_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,7 @@ def test_slice() -> None:
assert s.list.tail(200).to_list() == vals
assert s.list.head(200).to_list() == vals
assert s.list.slice(1, 2).to_list() == [[2, 3], [2, 1]]
assert s.list.slice(-5, 2).to_list() == [[1], []]


def test_list_eval_dtype_inference() -> None:
Expand Down
6 changes: 6 additions & 0 deletions py-polars/tests/unit/operations/test_slice.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@ def test_python_slicing_data_frame() -> None:
):
assert_frame_equal(df.slice(*slice_params), expected)

# Negative starting index before start of dataframe.
expected = pl.DataFrame({"a": [1, 2], "b": ["a", "b"]})
assert_frame_equal(df.slice(-5, 4), expected)

for py_slice in (
slice(1, 2),
slice(0, 2, 2),
Expand All @@ -50,6 +54,8 @@ def test_python_slicing_series() -> None:
[s.slice(4, None), [4, 5]],
[s.slice(3), [3, 4, 5]],
[s.slice(-2), [4, 5]],
[s.slice(-7, 4), [0, 1, 2]],
[s.slice(-700, 4), []],
):
assert srs_slice.to_list() == expected # type: ignore[attr-defined]

Expand Down

0 comments on commit bc742d7

Please sign in to comment.