Skip to content

Commit 71e0c6c

Browse files
committed
Add cast support for (Large)ListView <-> (Large)List
1 parent 2eabb59 commit 71e0c6c

File tree

3 files changed

+452
-2
lines changed

3 files changed

+452
-2
lines changed

arrow-array/src/array/list_view_array.rs

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,10 @@ use std::sync::Arc;
2424

2525
use crate::array::{make_array, print_long_array};
2626
use crate::iterator::GenericListViewArrayIter;
27-
use crate::{Array, ArrayAccessor, ArrayRef, FixedSizeListArray, OffsetSizeTrait, new_empty_array};
27+
use crate::{
28+
Array, ArrayAccessor, ArrayRef, FixedSizeListArray, GenericListArray, OffsetSizeTrait,
29+
new_empty_array,
30+
};
2831

2932
/// A [`GenericListViewArray`] of variable size lists, storing offsets as `i32`.
3033
pub type ListViewArray = GenericListViewArray<i32>;
@@ -454,6 +457,36 @@ impl<OffsetSize: OffsetSizeTrait> std::fmt::Debug for GenericListViewArray<Offse
454457
}
455458
}
456459

460+
impl<OffsetSize: OffsetSizeTrait> From<GenericListArray<OffsetSize>>
461+
for GenericListViewArray<OffsetSize>
462+
{
463+
fn from(value: GenericListArray<OffsetSize>) -> Self {
464+
let field = match value.data_type() {
465+
DataType::List(f) | DataType::LargeList(f) => f.clone(),
466+
_ => panic!(
467+
"Expected infallible creation of GenericListViewArray from GenericList failed"
468+
),
469+
};
470+
471+
let offsets = value.value_offsets();
472+
let len = offsets.len() - 1;
473+
let mut sizes = Vec::with_capacity(len);
474+
let mut view_offsets = Vec::with_capacity(len);
475+
for (i, offset) in offsets.iter().enumerate().take(len) {
476+
view_offsets.push(*offset);
477+
sizes.push(value.value_length(i));
478+
}
479+
480+
Self::new(
481+
field,
482+
ScalarBuffer::from(view_offsets),
483+
ScalarBuffer::from(sizes),
484+
value.values().clone(),
485+
value.nulls().cloned(),
486+
)
487+
}
488+
}
489+
457490
impl<OffsetSize: OffsetSizeTrait> From<GenericListViewArray<OffsetSize>> for ArrayData {
458491
fn from(array: GenericListViewArray<OffsetSize>) -> Self {
459492
let len = array.len();

arrow-cast/src/cast/list.rs

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,15 @@ pub(crate) fn cast_single_element_fixed_size_list_to_values(
5050
cast_with_options(values, to, cast_options)
5151
}
5252

53+
pub(crate) fn cast_list_to_list_view<OffsetSize>(array: &dyn Array) -> Result<ArrayRef, ArrowError>
54+
where
55+
OffsetSize: OffsetSizeTrait,
56+
{
57+
let list = array.as_list::<OffsetSize>();
58+
let list_view: GenericListViewArray<OffsetSize> = list.clone().into();
59+
Ok(Arc::new(list_view))
60+
}
61+
5362
pub(crate) fn cast_fixed_size_list_to_list<OffsetSize>(
5463
array: &dyn Array,
5564
) -> Result<ArrayRef, ArrowError>
@@ -160,6 +169,70 @@ pub(crate) fn cast_list_values<O: OffsetSizeTrait>(
160169
)?))
161170
}
162171

172+
/// Helper function to cast a list view to a list
173+
pub(crate) fn cast_list_view_to_list<O: OffsetSizeTrait>(
174+
array: &dyn Array,
175+
to: &FieldRef,
176+
cast_options: &CastOptions,
177+
) -> Result<ArrayRef, ArrowError> {
178+
let list_view = array.as_list_view::<O>();
179+
let list_view_offsets = list_view.offsets();
180+
let sizes = list_view.sizes();
181+
let source_values = list_view.values();
182+
183+
// Construct the indices and offsets for the new list array by iterating over the list view subarrays
184+
let mut indices = Vec::with_capacity(list_view.values().len());
185+
let mut offsets = Vec::with_capacity(list_view.len() + 1);
186+
// Add the offset for the first subarray
187+
offsets.push(O::usize_as(0));
188+
for i in 0..list_view.len() {
189+
// For each subarray, add the indices of the values to take
190+
let offset = list_view_offsets[i].as_usize();
191+
let size = sizes[i].as_usize();
192+
let end = offset + size;
193+
for j in offset..end {
194+
indices.push(j as i32);
195+
}
196+
// Add the offset for the next subarray
197+
offsets.push(O::usize_as(indices.len()));
198+
}
199+
200+
// Take the values from the source values using the indices, creating a new array
201+
let values = arrow_select::take::take(source_values, &Int32Array::from(indices), None)?;
202+
203+
// Cast the values to the target data type
204+
let values = cast_with_options(&values, to.data_type(), cast_options)?;
205+
206+
Ok(Arc::new(GenericListArray::<O>::try_new(
207+
to.clone(),
208+
OffsetBuffer::new(offsets.into()),
209+
values,
210+
list_view.nulls().cloned(),
211+
)?))
212+
}
213+
214+
pub(crate) fn cast_list_view<I: OffsetSizeTrait, O: OffsetSizeTrait>(
215+
array: &dyn Array,
216+
to_field: &FieldRef,
217+
cast_options: &CastOptions,
218+
) -> Result<ArrayRef, ArrowError> {
219+
let list_view = array.as_list_view::<I>();
220+
let (_field, offsets, sizes, values, nulls) = list_view.clone().into_parts();
221+
222+
// Recursively cast values
223+
let values = cast_with_options(&values, to_field.data_type(), cast_options)?;
224+
225+
let new_offsets: Vec<_> = offsets.iter().map(|x| O::usize_as(x.as_usize())).collect();
226+
let new_sizes: Vec<_> = sizes.iter().map(|x| O::usize_as(x.as_usize())).collect();
227+
Ok(Arc::new(GenericListViewArray::<O>::try_new(
228+
to_field.clone(),
229+
new_offsets.into(),
230+
new_sizes.into(),
231+
values,
232+
nulls,
233+
)?))
234+
}
235+
163236
/// Cast the container type of List/Largelist array along with the inner datatype
164237
pub(crate) fn cast_list<I: OffsetSizeTrait, O: OffsetSizeTrait>(
165238
array: &dyn Array,

0 commit comments

Comments
 (0)