|
17 | 17 |
|
18 | 18 | use crate::utils::{make_scalar_function, utf8_to_int_type};
|
19 | 19 | use arrow::array::{
|
20 |
| - Array, ArrayRef, ArrowPrimitiveType, AsArray, OffsetSizeTrait, PrimitiveBuilder, |
| 20 | + Array, ArrayRef, ArrowPrimitiveType, AsArray, OffsetSizeTrait, PrimitiveArray, |
21 | 21 | StringArrayType,
|
22 | 22 | };
|
23 | 23 | use arrow::datatypes::{ArrowNativeType, DataType, Int32Type, Int64Type};
|
@@ -131,46 +131,64 @@ where
|
131 | 131 | T::Native: OffsetSizeTrait,
|
132 | 132 | V: StringArrayType<'a>,
|
133 | 133 | {
|
134 |
| - let mut builder = PrimitiveBuilder::<T>::with_capacity(array.len()); |
135 |
| - |
136 | 134 | // String characters are variable length encoded in UTF-8, counting the
|
137 | 135 | // number of chars requires expensive decoding, however checking if the
|
138 | 136 | // string is ASCII only is relatively cheap.
|
139 | 137 | // If strings are ASCII only, count bytes instead.
|
140 | 138 | let is_array_ascii_only = array.is_ascii();
|
141 |
| - if array.null_count() == 0 { |
| 139 | + let array = if array.null_count() == 0 { |
142 | 140 | if is_array_ascii_only {
|
143 |
| - for i in 0..array.len() { |
144 |
| - let value = array.value(i); |
145 |
| - builder.append_value(T::Native::usize_as(value.len())); |
146 |
| - } |
| 141 | + let values: Vec<_> = (0..array.len()) |
| 142 | + .map(|i| { |
| 143 | + let value = array.value(i); |
| 144 | + T::Native::usize_as(value.len()) |
| 145 | + }) |
| 146 | + .collect(); |
| 147 | + PrimitiveArray::<T>::new(values.into(), None) |
147 | 148 | } else {
|
148 |
| - for i in 0..array.len() { |
149 |
| - let value = array.value(i); |
150 |
| - builder.append_value(T::Native::usize_as(value.chars().count())); |
151 |
| - } |
| 149 | + let values: Vec<_> = (0..array.len()) |
| 150 | + .map(|i| { |
| 151 | + let value = array.value(i); |
| 152 | + if value.is_ascii() { |
| 153 | + T::Native::usize_as(value.len()) |
| 154 | + } else { |
| 155 | + T::Native::usize_as(value.chars().count()) |
| 156 | + } |
| 157 | + }) |
| 158 | + .collect(); |
| 159 | + PrimitiveArray::<T>::new(values.into(), None) |
152 | 160 | }
|
153 | 161 | } else if is_array_ascii_only {
|
154 |
| - for i in 0..array.len() { |
155 |
| - if array.is_null(i) { |
156 |
| - builder.append_null(); |
157 |
| - } else { |
158 |
| - let value = array.value(i); |
159 |
| - builder.append_value(T::Native::usize_as(value.len())); |
160 |
| - } |
161 |
| - } |
| 162 | + let values: Vec<_> = (0..array.len()) |
| 163 | + .map(|i| { |
| 164 | + if array.is_null(i) { |
| 165 | + T::default_value() |
| 166 | + } else { |
| 167 | + let value = array.value(i); |
| 168 | + T::Native::usize_as(value.len()) |
| 169 | + } |
| 170 | + }) |
| 171 | + .collect(); |
| 172 | + PrimitiveArray::<T>::new(values.into(), array.nulls().cloned()) |
162 | 173 | } else {
|
163 |
| - for i in 0..array.len() { |
164 |
| - if array.is_null(i) { |
165 |
| - builder.append_null(); |
166 |
| - } else { |
167 |
| - let value = array.value(i); |
168 |
| - builder.append_value(T::Native::usize_as(value.chars().count())); |
169 |
| - } |
170 |
| - } |
171 |
| - } |
| 174 | + let values: Vec<_> = (0..array.len()) |
| 175 | + .map(|i| { |
| 176 | + if array.is_null(i) { |
| 177 | + T::default_value() |
| 178 | + } else { |
| 179 | + let value = array.value(i); |
| 180 | + if value.is_ascii() { |
| 181 | + T::Native::usize_as(value.len()) |
| 182 | + } else { |
| 183 | + T::Native::usize_as(value.chars().count()) |
| 184 | + } |
| 185 | + } |
| 186 | + }) |
| 187 | + .collect(); |
| 188 | + PrimitiveArray::<T>::new(values.into(), array.nulls().cloned()) |
| 189 | + }; |
172 | 190 |
|
173 |
| - Ok(Arc::new(builder.finish()) as ArrayRef) |
| 191 | + Ok(Arc::new(array)) |
174 | 192 | }
|
175 | 193 |
|
176 | 194 | #[cfg(test)]
|
|
0 commit comments