Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions datafusion/functions-nested/src/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ use crate::utils::make_scalar_function;
use arrow::compute::cast;
use arrow_array::builder::{ArrayBuilder, LargeStringBuilder, StringViewBuilder};
use arrow_array::cast::AsArray;
use arrow_array::{GenericStringArray, StringViewArray};
use arrow_array::{GenericStringArray, StringArrayType, StringViewArray};
use arrow_schema::DataType::{
Dictionary, FixedSizeList, LargeList, LargeUtf8, List, Null, Utf8, Utf8View,
};
Expand All @@ -45,7 +45,6 @@ use datafusion_common::exec_err;
use datafusion_expr::{
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
};
use datafusion_functions::strings::StringArrayType;
use datafusion_functions::{downcast_arg, downcast_named_arg};
use datafusion_macros::user_doc;
use std::sync::Arc;
Expand Down
3 changes: 1 addition & 2 deletions datafusion/functions/src/datetime/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,14 @@ use std::sync::Arc;

use arrow::array::{
Array, ArrowPrimitiveType, AsArray, GenericStringArray, PrimitiveArray,
StringViewArray,
StringArrayType, StringViewArray,
};
use arrow::compute::kernels::cast_utils::string_to_timestamp_nanos;
use arrow::datatypes::DataType;
use chrono::format::{parse, Parsed, StrftimeItems};
use chrono::LocalResult::Single;
use chrono::{DateTime, TimeZone, Utc};

use crate::strings::StringArrayType;
use datafusion_common::cast::as_generic_string_array;
use datafusion_common::{
exec_err, unwrap_or_internal_err, DataFusionError, Result, ScalarType, ScalarValue,
Expand Down
3 changes: 1 addition & 2 deletions datafusion/functions/src/regex/regexpcount.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,7 @@
// specific language governing permissions and limitations
// under the License.

use crate::strings::StringArrayType;
use arrow::array::{Array, ArrayRef, AsArray, Datum, Int64Array};
use arrow::array::{Array, ArrayRef, AsArray, Datum, Int64Array, StringArrayType};
use arrow::datatypes::{DataType, Int64Type};
use arrow::datatypes::{
DataType::Int64, DataType::LargeUtf8, DataType::Utf8, DataType::Utf8View,
Expand Down
3 changes: 1 addition & 2 deletions datafusion/functions/src/string/repeat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,10 @@
use std::any::Any;
use std::sync::Arc;

use crate::strings::StringArrayType;
use crate::utils::{make_scalar_function, utf8_to_str_type};
use arrow::array::{
ArrayRef, AsArray, GenericStringArray, GenericStringBuilder, Int64Array,
OffsetSizeTrait, StringViewArray,
OffsetSizeTrait, StringArrayType, StringViewArray,
};
use arrow::datatypes::DataType;
use arrow::datatypes::DataType::{LargeUtf8, Utf8, Utf8View};
Expand Down
4 changes: 2 additions & 2 deletions datafusion/functions/src/string/split_part.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@
// specific language governing permissions and limitations
// under the License.

use crate::strings::StringArrayType;
use crate::utils::utf8_to_str_type;
use arrow::array::{
ArrayRef, GenericStringArray, Int64Array, OffsetSizeTrait, StringViewArray,
ArrayRef, GenericStringArray, Int64Array, OffsetSizeTrait, StringArrayType,
StringViewArray,
};
use arrow::array::{AsArray, GenericStringBuilder};
use arrow::datatypes::DataType;
Expand Down
61 changes: 3 additions & 58 deletions datafusion/functions/src/strings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,64 +26,7 @@ use arrow::datatypes::DataType;
use arrow_buffer::{MutableBuffer, NullBuffer, NullBufferBuilder};

/// Abstracts iteration over different types of string arrays.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I double checked and this was made public in arrow in this PR:

Thanks @tlm365 (for both changes!)

///
/// The [`StringArrayType`] trait helps write generic code for string functions that can work with
/// different types of string arrays.
///
/// Currently three types are supported:
/// - [`StringArray`]
/// - [`LargeStringArray`]
/// - [`StringViewArray`]
///
/// It is inspired / copied from [arrow-rs].
///
/// [arrow-rs]: https://github.com/apache/arrow-rs/blob/bf0ea9129e617e4a3cf915a900b747cc5485315f/arrow-string/src/like.rs#L151-L157
///
/// # Examples
/// Generic function that works for [`StringArray`], [`LargeStringArray`]
/// and [`StringViewArray`]:
/// ```
/// # use arrow::array::{StringArray, LargeStringArray, StringViewArray};
/// # use datafusion_functions::strings::StringArrayType;
///
/// /// Combines string values for any StringArrayType type. It can be invoked on
/// /// and combination of `StringArray`, `LargeStringArray` or `StringViewArray`
/// fn combine_values<'a, S1, S2>(array1: S1, array2: S2) -> Vec<String>
/// where S1: StringArrayType<'a>, S2: StringArrayType<'a>
/// {
/// // iterate over the elements of the 2 arrays in parallel
/// array1
/// .iter()
/// .zip(array2.iter())
/// .map(|(s1, s2)| {
/// // if both values are non null, combine them
/// if let (Some(s1), Some(s2)) = (s1, s2) {
/// format!("{s1}{s2}")
/// } else {
/// "None".to_string()
/// }
/// })
/// .collect()
/// }
///
/// let string_array = StringArray::from(vec!["foo", "bar"]);
/// let large_string_array = LargeStringArray::from(vec!["foo2", "bar2"]);
/// let string_view_array = StringViewArray::from(vec!["foo3", "bar3"]);
///
/// // can invoke this function a string array and large string array
/// assert_eq!(
/// combine_values(&string_array, &large_string_array),
/// vec![String::from("foofoo2"), String::from("barbar2")]
/// );
///
/// // Can call the same function with string array and string view array
/// assert_eq!(
/// combine_values(&string_array, &string_view_array),
/// vec![String::from("foofoo3"), String::from("barbar3")]
/// );
/// ```
///
/// [`LargeStringArray`]: arrow::array::LargeStringArray
#[deprecated(since = "45.0.0", note = "Use arrow::array::StringArrayType instead")]
pub trait StringArrayType<'a>: ArrayAccessor<Item = &'a str> + Sized {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It might be nicer to deprecate this trait instead of immediately removing it as described here:

https://datafusion.apache.org/library-user-guide/api-health.html#deprecation-guidelines

Basically we could remove the comments and just leave a #[deprecated] comment

/// Return an [`ArrayIter`] over the values of the array.
///
Expand All @@ -94,6 +37,7 @@ pub trait StringArrayType<'a>: ArrayAccessor<Item = &'a str> + Sized {
fn is_ascii(&self) -> bool;
}

#[allow(deprecated)]
impl<'a, T: OffsetSizeTrait> StringArrayType<'a> for &'a GenericStringArray<T> {
fn iter(&self) -> ArrayIter<Self> {
GenericStringArray::<T>::iter(self)
Expand All @@ -104,6 +48,7 @@ impl<'a, T: OffsetSizeTrait> StringArrayType<'a> for &'a GenericStringArray<T> {
}
}

#[allow(deprecated)]
impl<'a> StringArrayType<'a> for &'a StringViewArray {
fn iter(&self) -> ArrayIter<Self> {
StringViewArray::iter(self)
Expand Down
2 changes: 1 addition & 1 deletion datafusion/functions/src/unicode/character_length.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@
// specific language governing permissions and limitations
// under the License.

use crate::strings::StringArrayType;
use crate::utils::{make_scalar_function, utf8_to_int_type};
use arrow::array::{
Array, ArrayRef, ArrowPrimitiveType, AsArray, OffsetSizeTrait, PrimitiveBuilder,
StringArrayType,
};
use arrow::datatypes::{ArrowNativeType, DataType, Int32Type, Int64Type};
use datafusion_common::Result;
Expand Down
3 changes: 1 addition & 2 deletions datafusion/functions/src/unicode/lpad.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,12 @@ use std::sync::Arc;

use arrow::array::{
Array, ArrayRef, AsArray, GenericStringArray, GenericStringBuilder, Int64Array,
OffsetSizeTrait, StringViewArray,
OffsetSizeTrait, StringArrayType, StringViewArray,
};
use arrow::datatypes::DataType;
use unicode_segmentation::UnicodeSegmentation;
use DataType::{LargeUtf8, Utf8, Utf8View};

use crate::strings::StringArrayType;
use crate::utils::{make_scalar_function, utf8_to_str_type};
use datafusion_common::cast::as_int64_array;
use datafusion_common::{exec_err, Result};
Expand Down
3 changes: 1 addition & 2 deletions datafusion/functions/src/unicode/rpad.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,10 @@
// specific language governing permissions and limitations
// under the License.

use crate::strings::StringArrayType;
use crate::utils::{make_scalar_function, utf8_to_str_type};
use arrow::array::{
ArrayRef, AsArray, GenericStringArray, GenericStringBuilder, Int64Array,
OffsetSizeTrait, StringViewArray,
OffsetSizeTrait, StringArrayType, StringViewArray,
};
use arrow::datatypes::DataType;
use datafusion_common::cast::as_int64_array;
Expand Down
5 changes: 3 additions & 2 deletions datafusion/functions/src/unicode/strpos.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,10 @@
use std::any::Any;
use std::sync::Arc;

use crate::strings::StringArrayType;
use crate::utils::{make_scalar_function, utf8_to_int_type};
use arrow::array::{ArrayRef, ArrowPrimitiveType, AsArray, PrimitiveArray};
use arrow::array::{
ArrayRef, ArrowPrimitiveType, AsArray, PrimitiveArray, StringArrayType,
};
use arrow::datatypes::{ArrowNativeType, DataType, Int32Type, Int64Type};
use datafusion_common::{exec_err, Result};
use datafusion_expr::{
Expand Down
4 changes: 2 additions & 2 deletions datafusion/functions/src/unicode/substr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,11 @@
use std::any::Any;
use std::sync::Arc;

use crate::strings::{make_and_append_view, StringArrayType};
use crate::strings::make_and_append_view;
use crate::utils::{make_scalar_function, utf8_to_str_type};
use arrow::array::{
Array, ArrayIter, ArrayRef, AsArray, GenericStringBuilder, Int64Array,
OffsetSizeTrait, StringViewArray,
OffsetSizeTrait, StringArrayType, StringViewArray,
};
use arrow::datatypes::DataType;
use arrow_buffer::{NullBufferBuilder, ScalarBuffer};
Expand Down
Loading