Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Allow for zero-width fixed size lists #18940

Merged
merged 6 commits into from
Sep 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions crates/polars-arrow/src/array/fixed_size_list/data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ impl Arrow2Arrow for FixedSizeListArray {

fn from_data(data: &ArrayData) -> Self {
let dtype: ArrowDataType = data.data_type().clone().into();
let length = data.len() - data.offset();
let size = match dtype {
ArrowDataType::FixedSizeList(_, size) => size,
_ => unreachable!("must be FixedSizeList type"),
Expand All @@ -28,6 +29,7 @@ impl Arrow2Arrow for FixedSizeListArray {

Self {
size,
length,
dtype,
values,
validity: data.nulls().map(|n| Bitmap::from_null_buffer(n.clone())),
Expand Down
14 changes: 11 additions & 3 deletions crates/polars-arrow/src/array/fixed_size_list/ffi.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use polars_error::PolarsResult;
use polars_error::{polars_ensure, PolarsResult};

use super::FixedSizeListArray;
use crate::array::ffi::{FromFfi, ToFfi};
Expand Down Expand Up @@ -31,11 +31,19 @@ unsafe impl ToFfi for FixedSizeListArray {
impl<A: ffi::ArrowArrayRef> FromFfi<A> for FixedSizeListArray {
unsafe fn try_from_ffi(array: A) -> PolarsResult<Self> {
let dtype = array.dtype().clone();
let (_, width) = FixedSizeListArray::try_child_and_size(&dtype)?;
let validity = unsafe { array.validity() }?;
let child = unsafe { array.child(0)? };
let child = unsafe { array.child(0) }?;
let values = ffi::try_from(child)?;

let mut fsl = Self::try_new(dtype, values, validity)?;
let length = if values.len() == 0 {
0
} else {
polars_ensure!(width > 0, InvalidOperation: "Zero-width array with values");
values.len() / width
};

let mut fsl = Self::try_new(dtype, length, values, validity)?;
fsl.slice(array.offset(), array.length());
Ok(fsl)
}
Expand Down
70 changes: 49 additions & 21 deletions crates/polars-arrow/src/array/fixed_size_list/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,15 @@ mod iterator;

mod mutable;
pub use mutable::*;
use polars_error::{polars_bail, PolarsResult};
use polars_error::{polars_bail, polars_ensure, PolarsResult};
use polars_utils::pl_str::PlSmallStr;

/// The Arrow's equivalent to an immutable `Vec<Option<[T; size]>>` where `T` is an Arrow type.
/// Cloning and slicing this struct is `O(1)`.
#[derive(Clone)]
pub struct FixedSizeListArray {
size: usize, // this is redundant with `dtype`, but useful to not have to deconstruct the dtype.
length: usize, // invariant: this is values.len() / size if size > 0
dtype: ArrowDataType,
values: Box<dyn Array>,
validity: Option<Bitmap>,
Expand All @@ -34,6 +35,7 @@ impl FixedSizeListArray {
/// * the validity's length is not equal to `values.len() / size`.
pub fn try_new(
dtype: ArrowDataType,
length: usize,
values: Box<dyn Array>,
validity: Option<Bitmap>,
) -> PolarsResult<Self> {
Expand All @@ -45,34 +47,61 @@ impl FixedSizeListArray {
polars_bail!(ComputeError: "FixedSizeListArray's child's DataType must match. However, the expected DataType is {child_dtype:?} while it got {values_dtype:?}.")
}

if values.len() % size != 0 {
polars_bail!(ComputeError:
"values (of len {}) must be a multiple of size ({}) in FixedSizeListArray.",
values.len(),
size
)
}
let len = values.len() / size;
polars_ensure!(size == 0 || values.len() % size == 0, ComputeError:
"values (of len {}) must be a multiple of size ({}) in FixedSizeListArray.",
values.len(),
size
);

polars_ensure!(size == 0 || values.len() / size == length, ComputeError:
"length of values ({}) is not equal to given length ({}) in FixedSizeListArray({size}).",
values.len() / size,
length,
);
polars_ensure!(size != 0 || values.len() == 0, ComputeError:
"zero width FixedSizeListArray has values (length = {}).",
values.len(),
);

if validity
.as_ref()
.map_or(false, |validity| validity.len() != len)
.map_or(false, |validity| validity.len() != length)
{
polars_bail!(ComputeError: "validity mask length must be equal to the number of values divided by size")
}

Ok(Self {
size,
length,
dtype,
values,
validity,
})
}

#[inline]
fn has_invariants(&self) -> bool {
let has_valid_length = (self.size == 0 && self.values().len() == 0)
|| (self.size > 0
&& self.values().len() % self.size() == 0
&& self.values().len() / self.size() == self.length);
let has_valid_validity = self
.validity
.as_ref()
.map_or(true, |v| v.len() == self.length);

has_valid_length && has_valid_validity
}

/// Alias to `Self::try_new(...).unwrap()`
#[track_caller]
pub fn new(dtype: ArrowDataType, values: Box<dyn Array>, validity: Option<Bitmap>) -> Self {
Self::try_new(dtype, values, validity).unwrap()
pub fn new(
dtype: ArrowDataType,
length: usize,
values: Box<dyn Array>,
validity: Option<Bitmap>,
) -> Self {
Self::try_new(dtype, length, values, validity).unwrap()
}

/// Returns the size (number of elements per slot) of this [`FixedSizeListArray`].
Expand All @@ -83,15 +112,15 @@ impl FixedSizeListArray {
/// Returns a new empty [`FixedSizeListArray`].
pub fn new_empty(dtype: ArrowDataType) -> Self {
let values = new_empty_array(Self::get_child_and_size(&dtype).0.dtype().clone());
Self::new(dtype, values, None)
Self::new(dtype, 0, values, None)
}

/// Returns a new null [`FixedSizeListArray`].
pub fn new_null(dtype: ArrowDataType, length: usize) -> Self {
let (field, size) = Self::get_child_and_size(&dtype);

let values = new_null_array(field.dtype().clone(), length * size);
Self::new(dtype, values, Some(Bitmap::new_zeroed(length)))
Self::new(dtype, length, values, Some(Bitmap::new_zeroed(length)))
}
}

Expand Down Expand Up @@ -124,6 +153,7 @@ impl FixedSizeListArray {
.filter(|bitmap| bitmap.unset_bits() > 0);
self.values
.slice_unchecked(offset * self.size, length * self.size);
self.length = length;
}

impl_sliced!();
Expand All @@ -136,7 +166,8 @@ impl FixedSizeListArray {
/// Returns the length of this array
#[inline]
pub fn len(&self) -> usize {
self.values.len() / self.size
debug_assert!(self.has_invariants());
self.length
}

/// The optional validity.
Expand Down Expand Up @@ -184,12 +215,7 @@ impl FixedSizeListArray {
impl FixedSizeListArray {
pub(crate) fn try_child_and_size(dtype: &ArrowDataType) -> PolarsResult<(&Field, usize)> {
match dtype.to_logical_type() {
ArrowDataType::FixedSizeList(child, size) => {
if *size == 0 {
polars_bail!(ComputeError: "FixedSizeBinaryArray expects a positive size")
}
Ok((child.as_ref(), *size))
},
ArrowDataType::FixedSizeList(child, size) => Ok((child.as_ref(), *size)),
_ => polars_bail!(ComputeError: "FixedSizeListArray expects DataType::FixedSizeList"),
}
}
Expand Down Expand Up @@ -233,12 +259,14 @@ impl Splitable for FixedSizeListArray {
(
Self {
dtype: self.dtype.clone(),
length: offset,
values: lhs_values,
validity: lhs_validity,
size,
},
Self {
dtype: self.dtype.clone(),
length: self.length - offset,
values: rhs_values,
validity: rhs_validity,
size,
Expand Down
50 changes: 42 additions & 8 deletions crates/polars-arrow/src/array/fixed_size_list/mutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ use crate::datatypes::{ArrowDataType, Field};
pub struct MutableFixedSizeListArray<M: MutableArray> {
dtype: ArrowDataType,
size: usize,
length: usize,
values: M,
validity: Option<MutableBitmap>,
}
Expand All @@ -22,6 +23,7 @@ impl<M: MutableArray> From<MutableFixedSizeListArray<M>> for FixedSizeListArray
fn from(mut other: MutableFixedSizeListArray<M>) -> Self {
FixedSizeListArray::new(
other.dtype,
other.length,
other.values.as_box(),
other.validity.map(|x| x.into()),
)
Expand Down Expand Up @@ -53,32 +55,35 @@ impl<M: MutableArray> MutableFixedSizeListArray<M> {
};
Self {
size,
length: 0,
dtype,
values,
validity: None,
}
}

#[inline]
fn has_valid_invariants(&self) -> bool {
(self.size == 0 && self.values().len() == 0)
|| (self.size > 0 && self.values.len() / self.size == self.length)
}

/// Returns the size (number of elements per slot) of this [`FixedSizeListArray`].
pub const fn size(&self) -> usize {
self.size
}

/// The length of this array
pub fn len(&self) -> usize {
self.values.len() / self.size
debug_assert!(self.has_valid_invariants());
self.length
}

/// The inner values
pub fn values(&self) -> &M {
&self.values
}

/// The values as a mutable reference
pub fn mut_values(&mut self) -> &mut M {
&mut self.values
}

fn init_validity(&mut self) {
let len = self.values.len() / self.size;

Expand All @@ -98,6 +103,10 @@ impl<M: MutableArray> MutableFixedSizeListArray<M> {
if let Some(validity) = &mut self.validity {
validity.push(true)
}
self.length += 1;

debug_assert!(self.has_valid_invariants());

Ok(())
}

Expand All @@ -108,6 +117,9 @@ impl<M: MutableArray> MutableFixedSizeListArray<M> {
if let Some(validity) = &mut self.validity {
validity.push(true)
}
self.length += 1;

debug_assert!(self.has_valid_invariants());
}

#[inline]
Expand All @@ -117,6 +129,9 @@ impl<M: MutableArray> MutableFixedSizeListArray<M> {
Some(validity) => validity.push(false),
None => self.init_validity(),
}
self.length += 1;

debug_assert!(self.has_valid_invariants());
}

/// Reserves `additional` slots.
Expand All @@ -138,7 +153,8 @@ impl<M: MutableArray> MutableFixedSizeListArray<M> {

impl<M: MutableArray + 'static> MutableArray for MutableFixedSizeListArray<M> {
fn len(&self) -> usize {
self.values.len() / self.size
debug_assert!(self.has_valid_invariants());
self.length
}

fn validity(&self) -> Option<&MutableBitmap> {
Expand All @@ -148,6 +164,7 @@ impl<M: MutableArray + 'static> MutableArray for MutableFixedSizeListArray<M> {
fn as_box(&mut self) -> Box<dyn Array> {
FixedSizeListArray::new(
self.dtype.clone(),
self.length,
self.values.as_box(),
std::mem::take(&mut self.validity).map(|x| x.into()),
)
Expand All @@ -157,6 +174,7 @@ impl<M: MutableArray + 'static> MutableArray for MutableFixedSizeListArray<M> {
fn as_arc(&mut self) -> Arc<dyn Array> {
FixedSizeListArray::new(
self.dtype.clone(),
self.length,
self.values.as_box(),
std::mem::take(&mut self.validity).map(|x| x.into()),
)
Expand Down Expand Up @@ -185,6 +203,9 @@ impl<M: MutableArray + 'static> MutableArray for MutableFixedSizeListArray<M> {
} else {
self.init_validity()
}
self.length += 1;

debug_assert!(self.has_valid_invariants());
}

fn reserve(&mut self, additional: usize) {
Expand All @@ -206,6 +227,9 @@ where
for items in iter {
self.try_push(items)?;
}

debug_assert!(self.has_valid_invariants());

Ok(())
}
}
Expand All @@ -223,6 +247,9 @@ where
} else {
self.push_null();
}

debug_assert!(self.has_valid_invariants());

Ok(())
}
}
Expand All @@ -243,6 +270,8 @@ where
} else {
self.push_null();
}

debug_assert!(self.has_valid_invariants());
}
}

Expand All @@ -253,6 +282,11 @@ where
fn try_extend_from_self(&mut self, other: &Self) -> PolarsResult<()> {
extend_validity(self.len(), &mut self.validity, &other.validity);

self.values.try_extend_from_self(&other.values)
self.values.try_extend_from_self(&other.values)?;
self.length += other.len();

debug_assert!(self.has_valid_invariants());

Ok(())
}
}
Loading
Loading