Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(python)!: Update reshape to return Array types instead of List types #16825

Merged
merged 12 commits into from
Jun 10, 2024
86 changes: 60 additions & 26 deletions crates/polars-core/src/series/ops/reshape.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
use std::borrow::Cow;
#[cfg(feature = "dtype-array")]
use std::cmp::Ordering;
#[cfg(feature = "dtype-array")]
use std::collections::VecDeque;

use arrow::array::*;
Expand Down Expand Up @@ -89,31 +91,62 @@ impl Series {

#[cfg(feature = "dtype-array")]
pub fn reshape_array(&self, dimensions: &[i64]) -> PolarsResult<Series> {
polars_ensure!(
!dimensions.is_empty(),
InvalidOperation: "at least one dimension must be specified"
);

let mut dims = dimensions.iter().copied().collect::<VecDeque<_>>();

let leaf_array = self.get_leaf_array();
let size = leaf_array.len() as i64;

// Infer dimension
if dims.contains(&-1) {
let infer_dims = dims.iter().filter(|d| **d == -1).count();
polars_ensure!(infer_dims == 1, InvalidOperation: "can only infer single dimension, found {}", infer_dims);

let mut prod = 1;
for &dim in &dims {
if dim != -1 {
prod *= dim;
}
}
polars_ensure!(size % prod == 0, InvalidOperation: "cannot reshape array of size {} into shape: {}", size, format_tuple!(dims));
let inferred_value = size / prod;
for dim in &mut dims {
if *dim == -1 {
*dim = inferred_value;
let size = leaf_array.len();

let mut total_dim_size = 1;
let mut infer_dim_index: Option<usize> = None;
for (index, &dim) in dims.iter().enumerate() {
match dim.cmp(&0) {
Ordering::Greater => total_dim_size *= dim as usize,
Ordering::Equal => {
polars_ensure!(
index == 0,
InvalidOperation: "cannot reshape array into shape containing a zero dimension after the first: {}",
format_tuple!(dims)
);
total_dim_size = 0;
// We can early exit here, as empty arrays will error with multiple dimensions,
// and non-empty arrays will error when the first dimension is zero.
break;
}
},
Ordering::Less => {
polars_ensure!(
infer_dim_index.is_none(),
InvalidOperation: "can only specify one unknown dimension"
);
infer_dim_index = Some(index);
},
}
}

if size == 0 {
if dims.len() > 1 || (infer_dim_index.is_none() && total_dim_size != 0) {
polars_bail!(InvalidOperation: "cannot reshape empty array into shape {}", format_tuple!(dims))
}
} else if total_dim_size == 0 {
polars_bail!(InvalidOperation: "cannot reshape non-empty array into shape containing a zero dimension: {}", format_tuple!(dims))
} else {
polars_ensure!(
size % total_dim_size == 0,
InvalidOperation: "cannot reshape array of size {} into shape {}", size, format_tuple!(dims)
);
}

// Infer dimension
if let Some(index) = infer_dim_index {
let inferred_dim = size / total_dim_size;
let item = dims.get_mut(index).unwrap();
*item = i64::try_from(inferred_dim).unwrap();
}

let leaf_array = leaf_array.rechunk();
let mut prev_dtype = leaf_array.dtype().clone();
let mut prev_array = leaf_array.chunks()[0].clone();
Expand All @@ -136,11 +169,12 @@ impl Series {
}

pub fn reshape_list(&self, dimensions: &[i64]) -> PolarsResult<Series> {
let s = self;
polars_ensure!(
!dimensions.is_empty(),
InvalidOperation: "at least one dimension must be specified"
);

if dimensions.is_empty() {
polars_bail!(ComputeError: "reshape `dimensions` cannot be empty")
}
let s = self;
let s = if let DataType::List(_) = s.dtype() {
Cow::Owned(s.explode()?)
} else {
Expand All @@ -155,7 +189,7 @@ impl Series {
1 => {
polars_ensure!(
dimensions[0] as usize == s_ref.len() || dimensions[0] == -1_i64,
ComputeError: "cannot reshape len {} into shape {:?}", s_ref.len(), dimensions,
InvalidOperation: "cannot reshape len {} into shape {:?}", s_ref.len(), dimensions,
);
Ok(s_ref.clone())
},
Expand All @@ -168,7 +202,7 @@ impl Series {
let s = reshape_fast_path(s.name(), s_ref);
return Ok(s);
} else {
polars_bail!(ComputeError: "cannot reshape len 0 into shape {:?}", dimensions,)
polars_bail!(InvalidOperation: "cannot reshape len 0 into shape {:?}", dimensions,)
}
}

Expand All @@ -190,7 +224,7 @@ impl Series {

polars_ensure!(
(rows*cols) as usize == s_ref.len() && rows >= 1 && cols >= 1,
ComputeError: "cannot reshape len {} into shape {:?}", s_ref.len(), dimensions,
InvalidOperation: "cannot reshape len {} into shape {:?}", s_ref.len(), dimensions,
);

let mut builder =
Expand Down
2 changes: 1 addition & 1 deletion py-polars/polars/_utils/construction/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -523,7 +523,7 @@ def numpy_to_pyseries(
strict=strict,
nan_to_null=nan_to_null,
)
return wrap_s(py_s).reshape(original_shape, Array)._s
return wrap_s(py_s).reshape(original_shape)._s


def series_to_pyseries(
Expand Down
55 changes: 32 additions & 23 deletions py-polars/polars/expr/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@
)
from polars.datatypes import (
Int64,
List,
is_polars_dtype,
py_type_to_dtype,
)
Expand Down Expand Up @@ -80,7 +79,6 @@
from polars._utils.various import (
NoDefault,
)
from polars.datatypes import Array
from polars.type_aliases import (
ClosedInterval,
FillNullStrategy,
Expand Down Expand Up @@ -9078,51 +9076,62 @@ def radians(self) -> Self:
"""
return self._from_pyexpr(self._pyexpr.radians())

def reshape(
self, dimensions: tuple[int, ...], nested_type: type[Array] | type[List] = List
) -> Self:
def reshape(self, dimensions: tuple[int, ...]) -> Self:
"""
Reshape this Expr to a flat Series or a Series of Lists.
Reshape this Expr to a flat column or an Array column.

Parameters
----------
dimensions
Tuple of the dimension sizes. If a -1 is used in any of the dimensions, that
dimension is inferred.
nested_type
The nested data type to create. List only supports 2 dimension,
whereas Array supports an arbitrary number of dimensions.

Returns
-------
Expr
If a single dimension is given, results in an expression of the original
data type.
If a multiple dimensions are given, results in an expression of data type
:class:`List` with shape (rows, cols)
or :class:`Array` with shape `dimensions`.
:class:`Array` with shape `dimensions`.

Examples
--------
>>> df = pl.DataFrame({"foo": [1, 2, 3, 4, 5, 6, 7, 8, 9]})
>>> df.select(pl.col("foo").reshape((3, 3)))
>>> square = df.select(pl.col("foo").reshape((3, 3)))
>>> square
shape: (3, 1)
┌───────────┐
│ foo │
│ --- │
│ list[i64] │
╞═══════════╡
│ [1, 2, 3] │
│ [4, 5, 6] │
│ [7, 8, 9] │
└───────────┘
┌───────────────┐
│ foo │
│ --- │
│ array[i64, 3] │
╞═══════════════╡
│ [1, 2, 3] │
│ [4, 5, 6] │
│ [7, 8, 9] │
└───────────────┘
>>> square.select(pl.col("foo").reshape((9,)))
shape: (9, 1)
┌─────┐
│ foo │
│ --- │
│ i64 │
╞═════╡
│ 1 │
│ 2 │
│ 3 │
│ 4 │
│ 5 │
│ 6 │
│ 7 │
│ 8 │
│ 9 │
└─────┘

See Also
--------
Expr.list.explode : Explode a list column.
"""
is_list = nested_type == List
return self._from_pyexpr(self._pyexpr.reshape(dimensions, is_list))
return self._from_pyexpr(self._pyexpr.reshape(dimensions))

def shuffle(self, seed: int | None = None) -> Self:
"""
Expand Down
34 changes: 21 additions & 13 deletions py-polars/polars/series/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -6591,29 +6591,23 @@ def replace(
]
"""

def reshape(
self, dimensions: tuple[int, ...], nested_type: type[Array] | type[List] = List
) -> Series:
def reshape(self, dimensions: tuple[int, ...]) -> Series:
"""
Reshape this Series to a flat Series or a Series of Lists.
Reshape this Series to a flat Series or an Array Series.

Parameters
----------
dimensions
Tuple of the dimension sizes. If a -1 is used in any of the dimensions, that
dimension is inferred.
nested_type
The nested data type to create. List only supports 2 dimension,
whereas Array supports an arbitrary number of dimensions.

Returns
-------
Series
If a single dimension is given, results in a Series of the original
data type.
If a multiple dimensions are given, results in a Series of data type
:class:`List` with shape (rows, cols)
or :class:`Array` with shape `dimensions`.
:class:`Array` with shape `dimensions`.

See Also
--------
Expand All @@ -6622,17 +6616,31 @@ def reshape(
Examples
--------
>>> s = pl.Series("foo", [1, 2, 3, 4, 5, 6, 7, 8, 9])
>>> s.reshape((3, 3))
>>> square = s.reshape((3, 3))
>>> square
shape: (3,)
Series: 'foo' [list[i64]]
Series: 'foo' [array[i64, 3]]
[
[1, 2, 3]
[4, 5, 6]
[7, 8, 9]
]
>>> square.reshape((9,))
shape: (9,)
Series: 'foo' [i64]
[
1
2
3
4
5
6
7
8
9
]
"""
is_list = nested_type == List
return self._from_pyseries(self._s.reshape(dimensions, is_list))
return self._from_pyseries(self._s.reshape(dimensions))

def shuffle(self, seed: int | None = None) -> Series:
"""
Expand Down
9 changes: 2 additions & 7 deletions py-polars/src/expr/general.rs
Original file line number Diff line number Diff line change
Expand Up @@ -793,13 +793,8 @@ impl PyExpr {
self.inner.clone().kurtosis(fisher, bias).into()
}

fn reshape(&self, dims: Vec<i64>, is_list: bool) -> Self {
let nested = if is_list {
NestedType::List
} else {
NestedType::Array
};
self.inner.clone().reshape(&dims, nested).into()
fn reshape(&self, dims: Vec<i64>) -> Self {
self.inner.clone().reshape(&dims, NestedType::Array).into()
}

fn to_physical(&self) -> Self {
Expand Down
12 changes: 5 additions & 7 deletions py-polars/src/series/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -126,13 +126,11 @@ impl PySeries {
})
}

fn reshape(&self, dims: Vec<i64>, is_list: bool) -> PyResult<Self> {
let out = if is_list {
self.series.reshape_list(&dims)
} else {
self.series.reshape_array(&dims)
}
.map_err(PyPolarsErr::from)?;
fn reshape(&self, dims: Vec<i64>) -> PyResult<Self> {
let out = self
.series
.reshape_array(&dims)
.map_err(PyPolarsErr::from)?;
Ok(out.into())
}

Expand Down
10 changes: 10 additions & 0 deletions py-polars/tests/unit/constructors/test_series.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import re
from datetime import date, datetime, timedelta
from typing import Any

Expand Down Expand Up @@ -134,3 +135,12 @@ def test_series_init_np_temporal_with_nat_15518() -> None:

expected = pl.Series([date(2020, 1, 1), None, date(2020, 1, 3)])
assert_series_equal(result, expected)


def test_series_init_np_2d_zero_zero_shape() -> None:
arr = np.array([]).reshape(0, 0)
with pytest.raises(
pl.InvalidOperationError,
match=re.escape("cannot reshape empty array into shape (0, 0)"),
):
pl.Series(arr)
11 changes: 0 additions & 11 deletions py-polars/tests/unit/datatypes/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,17 +285,6 @@ def test_create_nested_array() -> None:
assert s2.to_list() == data


def test_array_ndarray_reshape() -> None:
shape = (8, 4, 2, 1)
s = pl.Series(range(64)).reshape(shape, nested_type=pl.Array)
n = s.to_numpy()
assert n.shape == shape
assert (n[0] == s[0].to_numpy()).all()
n = n[0]
s = s[0]
assert (n[0] == s[0].to_numpy()).all()


def test_recursive_array_dtype() -> None:
assert str(pl.Array(pl.Int64, (2, 3))) == "Array(Int64, shape=(2, 3))"
assert str(pl.Array(pl.Int64, 3)) == "Array(Int64, shape=(3,))"
Expand Down
2 changes: 1 addition & 1 deletion py-polars/tests/unit/expr/test_exprs.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ def test_list_eval_expression() -> None:
"rank": [[1.0, 2.0], [2.0, 1.0], [2.0, 1.0]],
}

assert df["a"].reshape((1, -1)).list.eval(
assert df["a"].reshape((1, -1)).arr.to_list().list.eval(
pl.first(), parallel=parallel
).to_list() == [[1, 8, 3]]

Expand Down
Loading
Loading