Skip to content

Commit 3ee71fe

Browse files
committed
Character length
1 parent d2a2a8b commit 3ee71fe

File tree

1 file changed

+48
-30
lines changed

1 file changed

+48
-30
lines changed

datafusion/functions/src/unicode/character_length.rs

Lines changed: 48 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
use crate::utils::{make_scalar_function, utf8_to_int_type};
1919
use arrow::array::{
20-
Array, ArrayRef, ArrowPrimitiveType, AsArray, OffsetSizeTrait, PrimitiveBuilder,
20+
Array, ArrayRef, ArrowPrimitiveType, AsArray, OffsetSizeTrait, PrimitiveArray,
2121
StringArrayType,
2222
};
2323
use arrow::datatypes::{ArrowNativeType, DataType, Int32Type, Int64Type};
@@ -131,46 +131,64 @@ where
131131
T::Native: OffsetSizeTrait,
132132
V: StringArrayType<'a>,
133133
{
134-
let mut builder = PrimitiveBuilder::<T>::with_capacity(array.len());
135-
136134
// String characters are variable length encoded in UTF-8, counting the
137135
// number of chars requires expensive decoding, however checking if the
138136
// string is ASCII only is relatively cheap.
139137
// If strings are ASCII only, count bytes instead.
140138
let is_array_ascii_only = array.is_ascii();
141-
if array.null_count() == 0 {
139+
let array = if array.null_count() == 0 {
142140
if is_array_ascii_only {
143-
for i in 0..array.len() {
144-
let value = array.value(i);
145-
builder.append_value(T::Native::usize_as(value.len()));
146-
}
141+
let values: Vec<_> = (0..array.len())
142+
.map(|i| {
143+
let value = array.value(i);
144+
T::Native::usize_as(value.len())
145+
})
146+
.collect();
147+
PrimitiveArray::<T>::new(values.into(), None)
147148
} else {
148-
for i in 0..array.len() {
149-
let value = array.value(i);
150-
builder.append_value(T::Native::usize_as(value.chars().count()));
151-
}
149+
let values: Vec<_> = (0..array.len())
150+
.map(|i| {
151+
let value = array.value(i);
152+
if value.is_ascii() {
153+
T::Native::usize_as(value.len())
154+
} else {
155+
T::Native::usize_as(value.chars().count())
156+
}
157+
})
158+
.collect();
159+
PrimitiveArray::<T>::new(values.into(), None)
152160
}
153161
} else if is_array_ascii_only {
154-
for i in 0..array.len() {
155-
if array.is_null(i) {
156-
builder.append_null();
157-
} else {
158-
let value = array.value(i);
159-
builder.append_value(T::Native::usize_as(value.len()));
160-
}
161-
}
162+
let values: Vec<_> = (0..array.len())
163+
.map(|i| {
164+
if array.is_null(i) {
165+
T::default_value()
166+
} else {
167+
let value = array.value(i);
168+
T::Native::usize_as(value.len())
169+
}
170+
})
171+
.collect();
172+
PrimitiveArray::<T>::new(values.into(), array.nulls().cloned())
162173
} else {
163-
for i in 0..array.len() {
164-
if array.is_null(i) {
165-
builder.append_null();
166-
} else {
167-
let value = array.value(i);
168-
builder.append_value(T::Native::usize_as(value.chars().count()));
169-
}
170-
}
171-
}
174+
let values: Vec<_> = (0..array.len())
175+
.map(|i| {
176+
if array.is_null(i) {
177+
T::default_value()
178+
} else {
179+
let value = array.value(i);
180+
if value.is_ascii() {
181+
T::Native::usize_as(value.len())
182+
} else {
183+
T::Native::usize_as(value.chars().count())
184+
}
185+
}
186+
})
187+
.collect();
188+
PrimitiveArray::<T>::new(values.into(), array.nulls().cloned())
189+
};
172190

173-
Ok(Arc::new(builder.finish()) as ArrayRef)
191+
Ok(Arc::new(array))
174192
}
175193

176194
#[cfg(test)]

0 commit comments

Comments
 (0)