Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement take kernel for byte view array. #5602

Merged
merged 4 commits into from
Apr 9, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions arrow-array/src/cast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -779,6 +779,34 @@ pub trait AsArray: private::Sealed {
self.as_bytes_opt().expect("binary array")
}

/// Downcast this to a [`StringViewArray`] returning `None` if not possible
fn as_string_view(&self) -> &StringViewArray {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not a breaking change as the trait is sealed

self.as_bytes_view_opt().expect("string view array")
}

/// Downcast this to a [`StringViewArray`] returning `None` if not possible
fn as_string_view_opt(&self) -> Option<&StringViewArray> {
self.as_bytes_view_opt()
}

/// Downcast this to a [`StringViewArray`] returning `None` if not possible
fn as_binary_view(&self) -> &BinaryViewArray {
self.as_bytes_view_opt().expect("binary view array")
}

/// Downcast this to a [`BinaryViewArray`] returning `None` if not possible
fn as_binary_view_opt(&self) -> Option<&BinaryViewArray> {
self.as_bytes_view_opt()
}

/// Downcast this to a [`GenericByteViewArray`] returning `None` if not possible
fn as_bytes_view<T: ByteViewType>(&self) -> &GenericByteViewArray<T> {
self.as_bytes_view_opt().expect("byte view array")
}

/// Downcast this to a [`GenericByteViewArray`] returning `None` if not possible
fn as_bytes_view_opt<T: ByteViewType>(&self) -> Option<&GenericByteViewArray<T>>;

/// Downcast this to a [`StructArray`] returning `None` if not possible
fn as_struct_opt(&self) -> Option<&StructArray>;

Expand Down Expand Up @@ -852,6 +880,10 @@ impl AsArray for dyn Array + '_ {
self.as_any().downcast_ref()
}

fn as_bytes_view_opt<T: ByteViewType>(&self) -> Option<&GenericByteViewArray<T>> {
self.as_any().downcast_ref()
}

fn as_struct_opt(&self) -> Option<&StructArray> {
self.as_any().downcast_ref()
}
Expand Down Expand Up @@ -899,6 +931,10 @@ impl AsArray for ArrayRef {
self.as_ref().as_bytes_opt()
}

fn as_bytes_view_opt<T: ByteViewType>(&self) -> Option<&GenericByteViewArray<T>> {
self.as_ref().as_bytes_view_opt()
}

fn as_struct_opt(&self) -> Option<&StructArray> {
self.as_ref().as_struct_opt()
}
Expand Down
85 changes: 85 additions & 0 deletions arrow-select/src/take.rs
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,9 @@ fn take_impl<IndexType: ArrowPrimitiveType>(
DataType::LargeUtf8 => {
Ok(Arc::new(take_bytes(values.as_string::<i64>(), indices)?))
}
DataType::Utf8View => {
Ok(Arc::new(take_byte_view(values.as_string_view(), indices)?))
}
DataType::List(_) => {
Ok(Arc::new(take_list::<_, Int32Type>(values.as_list(), indices)?))
}
Expand Down Expand Up @@ -204,6 +207,9 @@ fn take_impl<IndexType: ArrowPrimitiveType>(
DataType::LargeBinary => {
Ok(Arc::new(take_bytes(values.as_binary::<i64>(), indices)?))
}
DataType::BinaryView => {
Ok(Arc::new(take_byte_view(values.as_binary_view(), indices)?))
}
DataType::FixedSizeBinary(size) => {
let values = values
.as_any()
Expand Down Expand Up @@ -437,6 +443,20 @@ fn take_bytes<T: ByteArrayType, IndexType: ArrowPrimitiveType>(
Ok(GenericByteArray::from(array_data))
}

/// `take` implementation for byte view arrays
fn take_byte_view<T: ByteViewType, IndexType: ArrowPrimitiveType>(
array: &GenericByteViewArray<T>,
indices: &PrimitiveArray<IndexType>,
) -> Result<GenericByteViewArray<T>, ArrowError> {
let new_views = take_native(array.views(), indices);
let new_nulls = take_nulls(array.nulls(), indices);
Comment on lines +451 to +452
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's beautiful how these traits just magically work

Ok(GenericByteViewArray::new(
new_views,
array.data_buffers().to_vec(),
RinChanNOWWW marked this conversation as resolved.
Show resolved Hide resolved
new_nulls,
))
}

/// `take` implementation for list arrays
///
/// Calculates the index and indexed offset for the inner array,
Expand Down Expand Up @@ -1424,6 +1444,71 @@ mod tests {
assert_eq!(result.as_ref(), &expected);
}

fn _test_byte_view<T>()
where
T: ByteViewType,
str: AsRef<T::Native>,
T::Native: PartialEq,
{
let index = UInt32Array::from(vec![Some(3), None, Some(1), Some(3), Some(4), Some(2)]);
let array = {
// ["hello", "world", null, "large payload over 12 bytes", "lulu"]
let mut builder = GenericByteViewBuilder::<T>::new();
builder.append_value("hello");
builder.append_value("world");
builder.append_null();
builder.append_value("large payload over 12 bytes");
builder.append_value("lulu");
builder.finish()
};

let actual = take(&array, &index, None).unwrap();

assert_eq!(actual.len(), index.len());

let actual = actual
.as_any()
.downcast_ref::<GenericByteViewArray<T>>()
.unwrap();

let expected = {
// ["large payload over 12 bytes", null, "world", "large payload over 12 bytes", "lulu", null]
let mut builder = GenericByteViewBuilder::<T>::new();
builder.append_value("large payload over 12 bytes");
builder.append_null();
builder.append_value("world");
builder.append_value("large payload over 12 bytes");
builder.append_value("lulu");
builder.append_null();
builder.finish()
};

_assert_byte_view_equal(actual, &expected);
}

fn _assert_byte_view_equal<T>(
array1: &GenericByteViewArray<T>,
array2: &GenericByteViewArray<T>,
) where
T: ByteViewType,
T::Native: PartialEq,
{
assert_eq!(array1.len(), array2.len());
for (v1, v2) in array1.iter().zip(array2.iter()) {
assert_eq!(v1, v2);
}
}

#[test]
fn test_take_string_view() {
_test_byte_view::<StringViewType>()
}

#[test]
fn test_take_binary_view() {
_test_byte_view::<BinaryViewType>()
}

macro_rules! test_take_list {
($offset_type:ty, $list_data_type:ident, $list_array_type:ident) => {{
// Construct a value array, [[0,0,0], [-1,-2,-1], [], [2,3]]
Expand Down
Loading