Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 21 additions & 28 deletions datafusion/functions/src/string/repeat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
// under the License.

use std::any::Any;
use std::sync::{Arc, OnceLock};
use std::sync::Arc;

use crate::strings::StringArrayType;
use crate::utils::{make_scalar_function, utf8_to_str_type};
Expand All @@ -29,11 +29,29 @@ use arrow::datatypes::DataType::{LargeUtf8, Utf8, Utf8View};
use datafusion_common::cast::as_int64_array;
use datafusion_common::types::{logical_int64, logical_string};
use datafusion_common::{exec_err, Result};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::{ColumnarValue, Documentation, Volatility};
use datafusion_expr::{ScalarUDFImpl, Signature};
use datafusion_expr_common::signature::TypeSignatureClass;
use datafusion_macros::user_doc;

#[user_doc(
doc_section(label = "String Functions"),
description = "Returns a string with an input string repeated a specified number.",
syntax_example = "repeat(str, n)",
sql_example = r#"```sql
> select repeat('data', 3);
+-------------------------------+
| repeat(Utf8("data"),Int64(3)) |
+-------------------------------+
| datadatadata |
+-------------------------------+
```"#,
standard_argument(name = "str", prefix = "String"),
argument(
name = "n",
description = "Number of times to repeat the input string."
)
)]
#[derive(Debug)]
pub struct RepeatFunc {
signature: Signature,
Expand Down Expand Up @@ -85,35 +103,10 @@ impl ScalarUDFImpl for RepeatFunc {
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_repeat_doc())
self.doc()
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_repeat_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder(
DOC_SECTION_STRING,
"Returns a string with an input string repeated a specified number.",
"repeat(str, n)",
)
.with_sql_example(
r#"```sql
> select repeat('data', 3);
+-------------------------------+
| repeat(Utf8("data"),Int64(3)) |
+-------------------------------+
| datadatadata |
+-------------------------------+
```"#,
)
.with_standard_argument("str", Some("String"))
.with_argument("n", "Number of times to repeat the input string.")
.build()
})
}

/// Repeats string the specified number of times.
/// repeat('Pg', 4) = 'PgPgPgPg'
fn repeat(args: &[ArrayRef]) -> Result<ArrayRef> {
Expand Down
49 changes: 22 additions & 27 deletions datafusion/functions/src/string/replace.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,18 +16,36 @@
// under the License.

use std::any::Any;
use std::sync::{Arc, OnceLock};
use std::sync::Arc;

use arrow::array::{ArrayRef, GenericStringArray, OffsetSizeTrait, StringArray};
use arrow::datatypes::DataType;

use crate::utils::{make_scalar_function, utf8_to_str_type};
use datafusion_common::cast::{as_generic_string_array, as_string_view_array};
use datafusion_common::{exec_err, Result};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::{ColumnarValue, Documentation, Volatility};
use datafusion_expr::{ScalarUDFImpl, Signature};

use datafusion_macros::user_doc;
#[user_doc(
doc_section(label = "String Functions"),
description = "Replaces all occurrences of a specified substring in a string with a new substring.",
syntax_example = "replace(str, substr, replacement)",
sql_example = r#"```sql
> select replace('ABabbaBA', 'ab', 'cd');
+-------------------------------------------------+
| replace(Utf8("ABabbaBA"),Utf8("ab"),Utf8("cd")) |
+-------------------------------------------------+
| ABcdbaBA |
+-------------------------------------------------+
```"#,
standard_argument(name = "str", prefix = "String"),
standard_argument(
name = "substr",
prefix = "Substring expression to replace in the input string. Substring"
),
standard_argument(name = "replacement", prefix = "Replacement substring")
)]
#[derive(Debug)]
pub struct ReplaceFunc {
signature: Signature,
Expand Down Expand Up @@ -80,33 +98,10 @@ impl ScalarUDFImpl for ReplaceFunc {
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_replace_doc())
self.doc()
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_replace_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder(
DOC_SECTION_STRING,
"Replaces all occurrences of a specified substring in a string with a new substring.",
"replace(str, substr, replacement)")
.with_sql_example(r#"```sql
> select replace('ABabbaBA', 'ab', 'cd');
+-------------------------------------------------+
| replace(Utf8("ABabbaBA"),Utf8("ab"),Utf8("cd")) |
+-------------------------------------------------+
| ABcdbaBA |
+-------------------------------------------------+
```"#)
.with_standard_argument("str", Some("String"))
.with_standard_argument("substr", Some("Substring expression to replace in the input string. Substring"))
.with_standard_argument("replacement", Some("Replacement substring"))
.build()
})
}

fn replace_view(args: &[ArrayRef]) -> Result<ArrayRef> {
let string_array = as_string_view_array(&args[0])?;
let from_array = as_string_view_array(&args[1])?;
Expand Down
45 changes: 19 additions & 26 deletions datafusion/functions/src/string/split_part.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,28 @@ use arrow::datatypes::DataType;
use datafusion_common::cast::as_int64_array;
use datafusion_common::ScalarValue;
use datafusion_common::{exec_err, DataFusionError, Result};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::{ColumnarValue, Documentation, TypeSignature, Volatility};
use datafusion_expr::{ScalarUDFImpl, Signature};
use datafusion_macros::user_doc;
use std::any::Any;
use std::sync::{Arc, OnceLock};
use std::sync::Arc;

#[user_doc(
doc_section(label = "String Functions"),
description = "Splits a string based on a specified delimiter and returns the substring in the specified position.",
syntax_example = "split_part(str, delimiter, pos)",
sql_example = r#"```sql
> select split_part('1.2.3.4.5', '.', 3);
+--------------------------------------------------+
| split_part(Utf8("1.2.3.4.5"),Utf8("."),Int64(3)) |
+--------------------------------------------------+
| 3 |
+--------------------------------------------------+
```"#,
standard_argument(name = "str", prefix = "String"),
argument(name = "delimiter", description = "String or character to split on."),
argument(name = "pos", description = "Position of the part to return.")
)]
#[derive(Debug)]
pub struct SplitPartFunc {
signature: Signature,
Expand Down Expand Up @@ -182,33 +198,10 @@ impl ScalarUDFImpl for SplitPartFunc {
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_split_part_doc())
self.doc()
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_split_part_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder(
DOC_SECTION_STRING,
"Splits a string based on a specified delimiter and returns the substring in the specified position.",
"split_part(str, delimiter, pos)")
.with_sql_example(r#"```sql
> select split_part('1.2.3.4.5', '.', 3);
+--------------------------------------------------+
| split_part(Utf8("1.2.3.4.5"),Utf8("."),Int64(3)) |
+--------------------------------------------------+
| 3 |
+--------------------------------------------------+
```"#)
.with_standard_argument("str", Some("String"))
.with_argument("delimiter", "String or character to split on.")
.with_argument("pos", "Position of the part to return.")
.build()
})
}

/// impl
pub fn split_part_impl<'a, StringArrType, DelimiterArrType, StringArrayLen>(
string_array: StringArrType,
Expand Down
46 changes: 18 additions & 28 deletions datafusion/functions/src/string/starts_with.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,16 @@
// under the License.

use std::any::Any;
use std::sync::{Arc, OnceLock};
use std::sync::Arc;

use arrow::array::ArrayRef;
use arrow::datatypes::DataType;

use crate::utils::make_scalar_function;
use datafusion_common::{internal_err, Result};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::{ColumnarValue, Documentation};
use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
use datafusion_macros::user_doc;

/// Returns true if string starts with prefix.
/// starts_with('alphabet', 'alph') = 't'
Expand All @@ -34,6 +34,21 @@ pub fn starts_with(args: &[ArrayRef]) -> Result<ArrayRef> {
Ok(Arc::new(result) as ArrayRef)
}

#[user_doc(
doc_section(label = "String Functions"),
description = "Tests if a string starts with a substring.",
syntax_example = "starts_with(str, substr)",
sql_example = r#"```sql
> select starts_with('datafusion','data');
+----------------------------------------------+
| starts_with(Utf8("datafusion"),Utf8("data")) |
+----------------------------------------------+
| true |
+----------------------------------------------+
```"#,
standard_argument(name = "str", prefix = "String"),
argument(name = "substr", description = "Substring to test for.")
)]
#[derive(Debug)]
pub struct StartsWithFunc {
signature: Signature,
Expand Down Expand Up @@ -84,35 +99,10 @@ impl ScalarUDFImpl for StartsWithFunc {
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_starts_with_doc())
self.doc()
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_starts_with_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder(
DOC_SECTION_STRING,
"Tests if a string starts with a substring.",
"starts_with(str, substr)",
)
.with_sql_example(
r#"```sql
> select starts_with('datafusion','data');
+----------------------------------------------+
| starts_with(Utf8("datafusion"),Utf8("data")) |
+----------------------------------------------+
| true |
+----------------------------------------------+
```"#,
)
.with_standard_argument("str", Some("String"))
.with_argument("substr", "Substring to test for.")
.build()
})
}

#[cfg(test)]
mod tests {
use crate::utils::test::test_function;
Expand Down
45 changes: 18 additions & 27 deletions datafusion/functions/src/string/to_hex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
// under the License.

use std::any::Any;
use std::sync::{Arc, OnceLock};
use std::sync::Arc;

use arrow::array::{ArrayRef, GenericStringArray, OffsetSizeTrait};
use arrow::datatypes::{
Expand All @@ -27,9 +27,10 @@ use crate::utils::make_scalar_function;
use datafusion_common::cast::as_primitive_array;
use datafusion_common::Result;
use datafusion_common::{exec_err, plan_err};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;

use datafusion_expr::{ColumnarValue, Documentation};
use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
use datafusion_macros::user_doc;

/// Converts the number to its equivalent hexadecimal representation.
/// to_hex(2147483647) = '7fffffff'
Expand Down Expand Up @@ -59,6 +60,20 @@ where
Ok(Arc::new(result) as ArrayRef)
}

#[user_doc(
doc_section(label = "String Functions"),
description = "Converts an integer to a hexadecimal string.",
syntax_example = "to_hex(int)",
sql_example = r#"```sql
> select to_hex(12345689);
+-------------------------+
| to_hex(Int64(12345689)) |
+-------------------------+
| bc6159 |
+-------------------------+
```"#,
standard_argument(name = "int", prefix = "Integer")
)]
#[derive(Debug)]
pub struct ToHexFunc {
signature: Signature,
Expand Down Expand Up @@ -116,34 +131,10 @@ impl ScalarUDFImpl for ToHexFunc {
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_to_hex_doc())
self.doc()
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_to_hex_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder(
DOC_SECTION_STRING,
"Converts an integer to a hexadecimal string.",
"to_hex(int)",
)
.with_sql_example(
r#"```sql
> select to_hex(12345689);
+-------------------------+
| to_hex(Int64(12345689)) |
+-------------------------+
| bc6159 |
+-------------------------+
```"#,
)
.with_standard_argument("int", Some("Integer"))
.build()
})
}

#[cfg(test)]
mod tests {
use arrow::array::{Int32Array, StringArray};
Expand Down
Loading
Loading