|
18 | 18 | use std::mem::size_of; |
19 | 19 |
|
20 | 20 | use arrow::array::{ |
21 | | - make_view, Array, ArrayAccessor, ArrayDataBuilder, ArrayIter, ByteView, |
22 | | - GenericStringArray, LargeStringArray, OffsetSizeTrait, StringArray, StringViewArray, |
23 | | - StringViewBuilder, |
| 21 | + make_view, Array, ArrayAccessor, ArrayDataBuilder, ByteView, LargeStringArray, |
| 22 | + StringArray, StringViewArray, StringViewBuilder, |
24 | 23 | }; |
25 | 24 | use arrow::datatypes::DataType; |
26 | 25 | use arrow_buffer::{MutableBuffer, NullBuffer, NullBufferBuilder}; |
27 | 26 |
|
28 | | -/// Abstracts iteration over different types of string arrays. |
29 | | -/// |
30 | | -/// The [`StringArrayType`] trait helps write generic code for string functions that can work with |
31 | | -/// different types of string arrays. |
32 | | -/// |
33 | | -/// Currently three types are supported: |
34 | | -/// - [`StringArray`] |
35 | | -/// - [`LargeStringArray`] |
36 | | -/// - [`StringViewArray`] |
37 | | -/// |
38 | | -/// It is inspired / copied from [arrow-rs]. |
39 | | -/// |
40 | | -/// [arrow-rs]: https://github.com/apache/arrow-rs/blob/bf0ea9129e617e4a3cf915a900b747cc5485315f/arrow-string/src/like.rs#L151-L157 |
41 | | -/// |
42 | | -/// # Examples |
43 | | -/// Generic function that works for [`StringArray`], [`LargeStringArray`] |
44 | | -/// and [`StringViewArray`]: |
45 | | -/// ``` |
46 | | -/// # use arrow::array::{StringArray, LargeStringArray, StringViewArray}; |
47 | | -/// # use datafusion_functions::strings::StringArrayType; |
48 | | -/// |
49 | | -/// /// Combines string values for any StringArrayType type. It can be invoked on |
50 | | -/// /// and combination of `StringArray`, `LargeStringArray` or `StringViewArray` |
51 | | -/// fn combine_values<'a, S1, S2>(array1: S1, array2: S2) -> Vec<String> |
52 | | -/// where S1: StringArrayType<'a>, S2: StringArrayType<'a> |
53 | | -/// { |
54 | | -/// // iterate over the elements of the 2 arrays in parallel |
55 | | -/// array1 |
56 | | -/// .iter() |
57 | | -/// .zip(array2.iter()) |
58 | | -/// .map(|(s1, s2)| { |
59 | | -/// // if both values are non null, combine them |
60 | | -/// if let (Some(s1), Some(s2)) = (s1, s2) { |
61 | | -/// format!("{s1}{s2}") |
62 | | -/// } else { |
63 | | -/// "None".to_string() |
64 | | -/// } |
65 | | -/// }) |
66 | | -/// .collect() |
67 | | -/// } |
68 | | -/// |
69 | | -/// let string_array = StringArray::from(vec!["foo", "bar"]); |
70 | | -/// let large_string_array = LargeStringArray::from(vec!["foo2", "bar2"]); |
71 | | -/// let string_view_array = StringViewArray::from(vec!["foo3", "bar3"]); |
72 | | -/// |
73 | | -/// // can invoke this function a string array and large string array |
74 | | -/// assert_eq!( |
75 | | -/// combine_values(&string_array, &large_string_array), |
76 | | -/// vec![String::from("foofoo2"), String::from("barbar2")] |
77 | | -/// ); |
78 | | -/// |
79 | | -/// // Can call the same function with string array and string view array |
80 | | -/// assert_eq!( |
81 | | -/// combine_values(&string_array, &string_view_array), |
82 | | -/// vec![String::from("foofoo3"), String::from("barbar3")] |
83 | | -/// ); |
84 | | -/// ``` |
85 | | -/// |
86 | | -/// [`LargeStringArray`]: arrow::array::LargeStringArray |
87 | | -pub trait StringArrayType<'a>: ArrayAccessor<Item = &'a str> + Sized { |
88 | | - /// Return an [`ArrayIter`] over the values of the array. |
89 | | - /// |
90 | | - /// This iterator iterates returns `Option<&str>` for each item in the array. |
91 | | - fn iter(&self) -> ArrayIter<Self>; |
92 | | - |
93 | | - /// Check if the array is ASCII only. |
94 | | - fn is_ascii(&self) -> bool; |
95 | | -} |
96 | | - |
97 | | -impl<'a, T: OffsetSizeTrait> StringArrayType<'a> for &'a GenericStringArray<T> { |
98 | | - fn iter(&self) -> ArrayIter<Self> { |
99 | | - GenericStringArray::<T>::iter(self) |
100 | | - } |
101 | | - |
102 | | - fn is_ascii(&self) -> bool { |
103 | | - GenericStringArray::<T>::is_ascii(self) |
104 | | - } |
105 | | -} |
106 | | - |
107 | | -impl<'a> StringArrayType<'a> for &'a StringViewArray { |
108 | | - fn iter(&self) -> ArrayIter<Self> { |
109 | | - StringViewArray::iter(self) |
110 | | - } |
111 | | - |
112 | | - fn is_ascii(&self) -> bool { |
113 | | - StringViewArray::is_ascii(self) |
114 | | - } |
115 | | -} |
116 | | - |
117 | 27 | /// Optimized version of the StringBuilder in Arrow that: |
118 | 28 | /// 1. Precalculating the expected length of the result, avoiding reallocations. |
119 | 29 | /// 2. Avoids creating / incrementally creating a `NullBufferBuilder` |
|
0 commit comments