Skip to content

Commit 618767e

Browse files
Chen-Yuan-LaiCheng-Yuan-Lai
andauthored
doc-gen: migrate scalar functions (string) documentation 3/4 (#13926)
Co-authored-by: Cheng-Yuan-Lai <a186235@g,ail.com>
1 parent a08dc0a commit 618767e

File tree

8 files changed

+152
-211
lines changed

8 files changed

+152
-211
lines changed

datafusion/functions/src/string/repeat.rs

Lines changed: 21 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
// under the License.
1717

1818
use std::any::Any;
19-
use std::sync::{Arc, OnceLock};
19+
use std::sync::Arc;
2020

2121
use crate::strings::StringArrayType;
2222
use crate::utils::{make_scalar_function, utf8_to_str_type};
@@ -29,11 +29,29 @@ use arrow::datatypes::DataType::{LargeUtf8, Utf8, Utf8View};
2929
use datafusion_common::cast::as_int64_array;
3030
use datafusion_common::types::{logical_int64, logical_string};
3131
use datafusion_common::{exec_err, Result};
32-
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
3332
use datafusion_expr::{ColumnarValue, Documentation, Volatility};
3433
use datafusion_expr::{ScalarUDFImpl, Signature};
3534
use datafusion_expr_common::signature::TypeSignatureClass;
35+
use datafusion_macros::user_doc;
3636

37+
#[user_doc(
38+
doc_section(label = "String Functions"),
39+
description = "Returns a string with an input string repeated a specified number.",
40+
syntax_example = "repeat(str, n)",
41+
sql_example = r#"```sql
42+
> select repeat('data', 3);
43+
+-------------------------------+
44+
| repeat(Utf8("data"),Int64(3)) |
45+
+-------------------------------+
46+
| datadatadata |
47+
+-------------------------------+
48+
```"#,
49+
standard_argument(name = "str", prefix = "String"),
50+
argument(
51+
name = "n",
52+
description = "Number of times to repeat the input string."
53+
)
54+
)]
3755
#[derive(Debug)]
3856
pub struct RepeatFunc {
3957
signature: Signature,
@@ -85,35 +103,10 @@ impl ScalarUDFImpl for RepeatFunc {
85103
}
86104

87105
fn documentation(&self) -> Option<&Documentation> {
88-
Some(get_repeat_doc())
106+
self.doc()
89107
}
90108
}
91109

92-
static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
93-
94-
fn get_repeat_doc() -> &'static Documentation {
95-
DOCUMENTATION.get_or_init(|| {
96-
Documentation::builder(
97-
DOC_SECTION_STRING,
98-
"Returns a string with an input string repeated a specified number.",
99-
"repeat(str, n)",
100-
)
101-
.with_sql_example(
102-
r#"```sql
103-
> select repeat('data', 3);
104-
+-------------------------------+
105-
| repeat(Utf8("data"),Int64(3)) |
106-
+-------------------------------+
107-
| datadatadata |
108-
+-------------------------------+
109-
```"#,
110-
)
111-
.with_standard_argument("str", Some("String"))
112-
.with_argument("n", "Number of times to repeat the input string.")
113-
.build()
114-
})
115-
}
116-
117110
/// Repeats string the specified number of times.
118111
/// repeat('Pg', 4) = 'PgPgPgPg'
119112
fn repeat(args: &[ArrayRef]) -> Result<ArrayRef> {

datafusion/functions/src/string/replace.rs

Lines changed: 22 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -16,18 +16,36 @@
1616
// under the License.
1717

1818
use std::any::Any;
19-
use std::sync::{Arc, OnceLock};
19+
use std::sync::Arc;
2020

2121
use arrow::array::{ArrayRef, GenericStringArray, OffsetSizeTrait, StringArray};
2222
use arrow::datatypes::DataType;
2323

2424
use crate::utils::{make_scalar_function, utf8_to_str_type};
2525
use datafusion_common::cast::{as_generic_string_array, as_string_view_array};
2626
use datafusion_common::{exec_err, Result};
27-
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
2827
use datafusion_expr::{ColumnarValue, Documentation, Volatility};
2928
use datafusion_expr::{ScalarUDFImpl, Signature};
30-
29+
use datafusion_macros::user_doc;
30+
#[user_doc(
31+
doc_section(label = "String Functions"),
32+
description = "Replaces all occurrences of a specified substring in a string with a new substring.",
33+
syntax_example = "replace(str, substr, replacement)",
34+
sql_example = r#"```sql
35+
> select replace('ABabbaBA', 'ab', 'cd');
36+
+-------------------------------------------------+
37+
| replace(Utf8("ABabbaBA"),Utf8("ab"),Utf8("cd")) |
38+
+-------------------------------------------------+
39+
| ABcdbaBA |
40+
+-------------------------------------------------+
41+
```"#,
42+
standard_argument(name = "str", prefix = "String"),
43+
standard_argument(
44+
name = "substr",
45+
prefix = "Substring expression to replace in the input string. Substring"
46+
),
47+
standard_argument(name = "replacement", prefix = "Replacement substring")
48+
)]
3149
#[derive(Debug)]
3250
pub struct ReplaceFunc {
3351
signature: Signature,
@@ -80,33 +98,10 @@ impl ScalarUDFImpl for ReplaceFunc {
8098
}
8199

82100
fn documentation(&self) -> Option<&Documentation> {
83-
Some(get_replace_doc())
101+
self.doc()
84102
}
85103
}
86104

87-
static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
88-
89-
fn get_replace_doc() -> &'static Documentation {
90-
DOCUMENTATION.get_or_init(|| {
91-
Documentation::builder(
92-
DOC_SECTION_STRING,
93-
"Replaces all occurrences of a specified substring in a string with a new substring.",
94-
"replace(str, substr, replacement)")
95-
.with_sql_example(r#"```sql
96-
> select replace('ABabbaBA', 'ab', 'cd');
97-
+-------------------------------------------------+
98-
| replace(Utf8("ABabbaBA"),Utf8("ab"),Utf8("cd")) |
99-
+-------------------------------------------------+
100-
| ABcdbaBA |
101-
+-------------------------------------------------+
102-
```"#)
103-
.with_standard_argument("str", Some("String"))
104-
.with_standard_argument("substr", Some("Substring expression to replace in the input string. Substring"))
105-
.with_standard_argument("replacement", Some("Replacement substring"))
106-
.build()
107-
})
108-
}
109-
110105
fn replace_view(args: &[ArrayRef]) -> Result<ArrayRef> {
111106
let string_array = as_string_view_array(&args[0])?;
112107
let from_array = as_string_view_array(&args[1])?;

datafusion/functions/src/string/split_part.rs

Lines changed: 19 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,28 @@ use arrow::datatypes::DataType;
2525
use datafusion_common::cast::as_int64_array;
2626
use datafusion_common::ScalarValue;
2727
use datafusion_common::{exec_err, DataFusionError, Result};
28-
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
2928
use datafusion_expr::{ColumnarValue, Documentation, TypeSignature, Volatility};
3029
use datafusion_expr::{ScalarUDFImpl, Signature};
30+
use datafusion_macros::user_doc;
3131
use std::any::Any;
32-
use std::sync::{Arc, OnceLock};
32+
use std::sync::Arc;
3333

34+
#[user_doc(
35+
doc_section(label = "String Functions"),
36+
description = "Splits a string based on a specified delimiter and returns the substring in the specified position.",
37+
syntax_example = "split_part(str, delimiter, pos)",
38+
sql_example = r#"```sql
39+
> select split_part('1.2.3.4.5', '.', 3);
40+
+--------------------------------------------------+
41+
| split_part(Utf8("1.2.3.4.5"),Utf8("."),Int64(3)) |
42+
+--------------------------------------------------+
43+
| 3 |
44+
+--------------------------------------------------+
45+
```"#,
46+
standard_argument(name = "str", prefix = "String"),
47+
argument(name = "delimiter", description = "String or character to split on."),
48+
argument(name = "pos", description = "Position of the part to return.")
49+
)]
3450
#[derive(Debug)]
3551
pub struct SplitPartFunc {
3652
signature: Signature,
@@ -182,33 +198,10 @@ impl ScalarUDFImpl for SplitPartFunc {
182198
}
183199

184200
fn documentation(&self) -> Option<&Documentation> {
185-
Some(get_split_part_doc())
201+
self.doc()
186202
}
187203
}
188204

189-
static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
190-
191-
fn get_split_part_doc() -> &'static Documentation {
192-
DOCUMENTATION.get_or_init(|| {
193-
Documentation::builder(
194-
DOC_SECTION_STRING,
195-
"Splits a string based on a specified delimiter and returns the substring in the specified position.",
196-
"split_part(str, delimiter, pos)")
197-
.with_sql_example(r#"```sql
198-
> select split_part('1.2.3.4.5', '.', 3);
199-
+--------------------------------------------------+
200-
| split_part(Utf8("1.2.3.4.5"),Utf8("."),Int64(3)) |
201-
+--------------------------------------------------+
202-
| 3 |
203-
+--------------------------------------------------+
204-
```"#)
205-
.with_standard_argument("str", Some("String"))
206-
.with_argument("delimiter", "String or character to split on.")
207-
.with_argument("pos", "Position of the part to return.")
208-
.build()
209-
})
210-
}
211-
212205
/// impl
213206
pub fn split_part_impl<'a, StringArrType, DelimiterArrType, StringArrayLen>(
214207
string_array: StringArrType,

datafusion/functions/src/string/starts_with.rs

Lines changed: 18 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -16,16 +16,16 @@
1616
// under the License.
1717

1818
use std::any::Any;
19-
use std::sync::{Arc, OnceLock};
19+
use std::sync::Arc;
2020

2121
use arrow::array::ArrayRef;
2222
use arrow::datatypes::DataType;
2323

2424
use crate::utils::make_scalar_function;
2525
use datafusion_common::{internal_err, Result};
26-
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
2726
use datafusion_expr::{ColumnarValue, Documentation};
2827
use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
28+
use datafusion_macros::user_doc;
2929

3030
/// Returns true if string starts with prefix.
3131
/// starts_with('alphabet', 'alph') = 't'
@@ -34,6 +34,21 @@ pub fn starts_with(args: &[ArrayRef]) -> Result<ArrayRef> {
3434
Ok(Arc::new(result) as ArrayRef)
3535
}
3636

37+
#[user_doc(
38+
doc_section(label = "String Functions"),
39+
description = "Tests if a string starts with a substring.",
40+
syntax_example = "starts_with(str, substr)",
41+
sql_example = r#"```sql
42+
> select starts_with('datafusion','data');
43+
+----------------------------------------------+
44+
| starts_with(Utf8("datafusion"),Utf8("data")) |
45+
+----------------------------------------------+
46+
| true |
47+
+----------------------------------------------+
48+
```"#,
49+
standard_argument(name = "str", prefix = "String"),
50+
argument(name = "substr", description = "Substring to test for.")
51+
)]
3752
#[derive(Debug)]
3853
pub struct StartsWithFunc {
3954
signature: Signature,
@@ -84,35 +99,10 @@ impl ScalarUDFImpl for StartsWithFunc {
8499
}
85100

86101
fn documentation(&self) -> Option<&Documentation> {
87-
Some(get_starts_with_doc())
102+
self.doc()
88103
}
89104
}
90105

91-
static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
92-
93-
fn get_starts_with_doc() -> &'static Documentation {
94-
DOCUMENTATION.get_or_init(|| {
95-
Documentation::builder(
96-
DOC_SECTION_STRING,
97-
"Tests if a string starts with a substring.",
98-
"starts_with(str, substr)",
99-
)
100-
.with_sql_example(
101-
r#"```sql
102-
> select starts_with('datafusion','data');
103-
+----------------------------------------------+
104-
| starts_with(Utf8("datafusion"),Utf8("data")) |
105-
+----------------------------------------------+
106-
| true |
107-
+----------------------------------------------+
108-
```"#,
109-
)
110-
.with_standard_argument("str", Some("String"))
111-
.with_argument("substr", "Substring to test for.")
112-
.build()
113-
})
114-
}
115-
116106
#[cfg(test)]
117107
mod tests {
118108
use crate::utils::test::test_function;

datafusion/functions/src/string/to_hex.rs

Lines changed: 18 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
// under the License.
1717

1818
use std::any::Any;
19-
use std::sync::{Arc, OnceLock};
19+
use std::sync::Arc;
2020

2121
use arrow::array::{ArrayRef, GenericStringArray, OffsetSizeTrait};
2222
use arrow::datatypes::{
@@ -27,9 +27,10 @@ use crate::utils::make_scalar_function;
2727
use datafusion_common::cast::as_primitive_array;
2828
use datafusion_common::Result;
2929
use datafusion_common::{exec_err, plan_err};
30-
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
30+
3131
use datafusion_expr::{ColumnarValue, Documentation};
3232
use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
33+
use datafusion_macros::user_doc;
3334

3435
/// Converts the number to its equivalent hexadecimal representation.
3536
/// to_hex(2147483647) = '7fffffff'
@@ -59,6 +60,20 @@ where
5960
Ok(Arc::new(result) as ArrayRef)
6061
}
6162

63+
#[user_doc(
64+
doc_section(label = "String Functions"),
65+
description = "Converts an integer to a hexadecimal string.",
66+
syntax_example = "to_hex(int)",
67+
sql_example = r#"```sql
68+
> select to_hex(12345689);
69+
+-------------------------+
70+
| to_hex(Int64(12345689)) |
71+
+-------------------------+
72+
| bc6159 |
73+
+-------------------------+
74+
```"#,
75+
standard_argument(name = "int", prefix = "Integer")
76+
)]
6277
#[derive(Debug)]
6378
pub struct ToHexFunc {
6479
signature: Signature,
@@ -116,34 +131,10 @@ impl ScalarUDFImpl for ToHexFunc {
116131
}
117132

118133
fn documentation(&self) -> Option<&Documentation> {
119-
Some(get_to_hex_doc())
134+
self.doc()
120135
}
121136
}
122137

123-
static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
124-
125-
fn get_to_hex_doc() -> &'static Documentation {
126-
DOCUMENTATION.get_or_init(|| {
127-
Documentation::builder(
128-
DOC_SECTION_STRING,
129-
"Converts an integer to a hexadecimal string.",
130-
"to_hex(int)",
131-
)
132-
.with_sql_example(
133-
r#"```sql
134-
> select to_hex(12345689);
135-
+-------------------------+
136-
| to_hex(Int64(12345689)) |
137-
+-------------------------+
138-
| bc6159 |
139-
+-------------------------+
140-
```"#,
141-
)
142-
.with_standard_argument("int", Some("Integer"))
143-
.build()
144-
})
145-
}
146-
147138
#[cfg(test)]
148139
mod tests {
149140
use arrow::array::{Int32Array, StringArray};

0 commit comments

Comments
 (0)