diff --git a/arrow-array/src/cast.rs b/arrow-array/src/cast.rs index 2e21f3e7e640..7b4b1d6eca4b 100644 --- a/arrow-array/src/cast.rs +++ b/arrow-array/src/cast.rs @@ -779,6 +779,34 @@ pub trait AsArray: private::Sealed { self.as_bytes_opt().expect("binary array") } + /// Downcast this to a [`StringViewArray`] returning `None` if not possible + fn as_string_view(&self) -> &StringViewArray { + self.as_byte_view_opt().expect("string view array") + } + + /// Downcast this to a [`StringViewArray`] returning `None` if not possible + fn as_string_view_opt(&self) -> Option<&StringViewArray> { + self.as_byte_view_opt() + } + + /// Downcast this to a [`StringViewArray`] returning `None` if not possible + fn as_binary_view(&self) -> &BinaryViewArray { + self.as_byte_view_opt().expect("binary view array") + } + + /// Downcast this to a [`BinaryViewArray`] returning `None` if not possible + fn as_binary_view_opt(&self) -> Option<&BinaryViewArray> { + self.as_byte_view_opt() + } + + /// Downcast this to a [`GenericByteViewArray`] returning `None` if not possible + fn as_byte_view(&self) -> &GenericByteViewArray { + self.as_byte_view_opt().expect("byte view array") + } + + /// Downcast this to a [`GenericByteViewArray`] returning `None` if not possible + fn as_byte_view_opt(&self) -> Option<&GenericByteViewArray>; + /// Downcast this to a [`StructArray`] returning `None` if not possible fn as_struct_opt(&self) -> Option<&StructArray>; @@ -852,6 +880,10 @@ impl AsArray for dyn Array + '_ { self.as_any().downcast_ref() } + fn as_byte_view_opt(&self) -> Option<&GenericByteViewArray> { + self.as_any().downcast_ref() + } + fn as_struct_opt(&self) -> Option<&StructArray> { self.as_any().downcast_ref() } @@ -899,6 +931,10 @@ impl AsArray for ArrayRef { self.as_ref().as_bytes_opt() } + fn as_byte_view_opt(&self) -> Option<&GenericByteViewArray> { + self.as_ref().as_byte_view_opt() + } + fn as_struct_opt(&self) -> Option<&StructArray> { self.as_ref().as_struct_opt() } diff --git a/arrow-select/src/take.rs b/arrow-select/src/take.rs index d9a639da8066..dc9e13040c8e 100644 --- a/arrow-select/src/take.rs +++ b/arrow-select/src/take.rs @@ -143,6 +143,9 @@ fn take_impl( DataType::LargeUtf8 => { Ok(Arc::new(take_bytes(values.as_string::(), indices)?)) } + DataType::Utf8View => { + Ok(Arc::new(take_byte_view(values.as_string_view(), indices)?)) + } DataType::List(_) => { Ok(Arc::new(take_list::<_, Int32Type>(values.as_list(), indices)?)) } @@ -204,6 +207,9 @@ fn take_impl( DataType::LargeBinary => { Ok(Arc::new(take_bytes(values.as_binary::(), indices)?)) } + DataType::BinaryView => { + Ok(Arc::new(take_byte_view(values.as_binary_view(), indices)?)) + } DataType::FixedSizeBinary(size) => { let values = values .as_any() @@ -437,6 +443,20 @@ fn take_bytes( Ok(GenericByteArray::from(array_data)) } +/// `take` implementation for byte view arrays +fn take_byte_view( + array: &GenericByteViewArray, + indices: &PrimitiveArray, +) -> Result, ArrowError> { + let new_views = take_native(array.views(), indices); + let new_nulls = take_nulls(array.nulls(), indices); + Ok(GenericByteViewArray::new( + new_views, + array.data_buffers().to_vec(), + new_nulls, + )) +} + /// `take` implementation for list arrays /// /// Calculates the index and indexed offset for the inner array, @@ -1424,6 +1444,53 @@ mod tests { assert_eq!(result.as_ref(), &expected); } + fn _test_byte_view() + where + T: ByteViewType, + str: AsRef, + T::Native: PartialEq, + { + let index = UInt32Array::from(vec![Some(3), None, Some(1), Some(3), Some(4), Some(2)]); + let array = { + // ["hello", "world", null, "large payload over 12 bytes", "lulu"] + let mut builder = GenericByteViewBuilder::::new(); + builder.append_value("hello"); + builder.append_value("world"); + builder.append_null(); + builder.append_value("large payload over 12 bytes"); + builder.append_value("lulu"); + builder.finish() + }; + + let actual = take(&array, &index, None).unwrap(); + + assert_eq!(actual.len(), index.len()); + + let expected = { + // ["large payload over 12 bytes", null, "world", "large payload over 12 bytes", "lulu", null] + let mut builder = GenericByteViewBuilder::::new(); + builder.append_value("large payload over 12 bytes"); + builder.append_null(); + builder.append_value("world"); + builder.append_value("large payload over 12 bytes"); + builder.append_value("lulu"); + builder.append_null(); + builder.finish() + }; + + assert_eq!(actual.as_ref(), &expected); + } + + #[test] + fn test_take_string_view() { + _test_byte_view::() + } + + #[test] + fn test_take_binary_view() { + _test_byte_view::() + } + macro_rules! test_take_list { ($offset_type:ty, $list_data_type:ident, $list_array_type:ident) => {{ // Construct a value array, [[0,0,0], [-1,-2,-1], [], [2,3]]