Skip to content

Commit fc60bd0

Browse files
committed
documentation
1 parent 4aae527 commit fc60bd0

File tree

3 files changed

+81
-112
lines changed

3 files changed

+81
-112
lines changed

datafusion/spark/src/function/url/try_parse_url.rs

Lines changed: 42 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,48 @@ use std::any::Any;
2020
use crate::function::url::parse_url::{spark_handled_parse_url, ParseUrl};
2121
use arrow::array::ArrayRef;
2222
use arrow::datatypes::DataType;
23-
use datafusion_common::{exec_datafusion_err, exec_err, plan_err, Result};
23+
use datafusion_common::Result;
2424
use datafusion_expr::{
25-
ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility,
25+
ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
26+
Volatility,
2627
};
2728
use datafusion_functions::utils::make_scalar_function;
29+
use datafusion_macros::user_doc;
2830

31+
#[user_doc(
32+
doc_section(label = "URL Functions"),
33+
description = "Extracts components of a URL string. Unlike [`parse_url`](#parse_url), this function never fails with an error on malformed input; instead, it returns `NULL` when parsing is not possible. If the requested component is missing, the result is also `NULL`. When extracting a query parameter with a key, returns `NULL` if the parameter is not present.",
34+
syntax_example = "try_parse_url(url, part[, key])",
35+
sql_example = r#"```sql
36+
SELECT try_parse_url('https://example.com/a?x=1', 'QUERY', 'x');
37+
----
38+
1
39+
40+
SELECT try_parse_url('www.example.com/path?x=1', 'HOST');
41+
----
42+
NULL
43+
44+
SELECT try_parse_url('https://example.com/?a=1', 'QUERY', 'b');
45+
----
46+
NULL
47+
48+
SELECT try_parse_url('https://example.com/path#frag', 'REF');
49+
----
50+
frag
51+
```"#,
52+
argument(
53+
name = "url",
54+
description = "A string expression representing the URL to parse."
55+
),
56+
argument(
57+
name = "part",
58+
description = "Which part of the URL to extract. Supported values: 'HOST' (host name, e.g., example.com or [2001:db8::2]), 'PATH' (e.g., /a/b), 'QUERY' (entire query, e.g., a=1&b=2), 'REF' (fragment without '#', e.g., frag), 'PROTOCOL' (scheme, e.g., http, https, ftp), 'FILE' (path plus query, e.g., /a/b?x=1), 'AUTHORITY' (authority section, including user info and host), 'USERINFO' (user info before the host, e.g., user:pwd)."
59+
),
60+
argument(
61+
name = "key",
62+
description = "(Optional). Only valid when part = 'QUERY'. Extracts the value of the query parameter with that name (case sensitivity may depend on implementation). Returns `NULL` if the parameter is not present."
63+
)
64+
)]
2965
#[derive(Debug, PartialEq, Eq, Hash)]
3066
pub struct TryParseUrl {
3167
signature: Signature,
@@ -72,6 +108,10 @@ impl ScalarUDFImpl for TryParseUrl {
72108
let parse_url: ParseUrl = ParseUrl::new();
73109
parse_url.coerce_types(arg_types)
74110
}
111+
112+
fn documentation(&self) -> Option<&Documentation> {
113+
self.doc()
114+
}
75115
}
76116

77117
fn spark_try_parse_url(args: &[ArrayRef]) -> Result<ArrayRef> {

datafusion/sqllogictest/test_files/spark/url/try_parse_url.slt

Lines changed: 39 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -21,52 +21,71 @@
2121
# For more information, please see:
2222
# https://github.com/apache/datafusion/issues/15914
2323

24-
query T
24+
query error
2525
SELECT try_parse_url('https://example.com/a?x=1', 'QUERY', 'x'), typeof(try_parse_url('https://example.com/a?x=1', 'QUERY', 'x'));
2626
----
27-
1 string
27+
DataFusion error: Error during planning: Invalid function 'try_parse_url'.
28+
Did you mean 'parse_url'?
2829

29-
query T
30+
31+
query error
3032
SELECT try_parse_url('www.example.com/path?x=1', 'HOST'), typeof(try_parse_url('www.example.com/path?x=1', 'HOST'));
3133
----
32-
NULL string
34+
DataFusion error: Error during planning: Invalid function 'try_parse_url'.
35+
Did you mean 'parse_url'?
36+
3337

34-
query T
38+
query error
3539
SELECT try_parse_url('https://example.com/?a=1', 'QUERY', 'b'), typeof(try_parse_url('https://example.com/?a=1', 'QUERY', 'b'));
3640
----
37-
NULL string
41+
DataFusion error: Error during planning: Invalid function 'try_parse_url'.
42+
Did you mean 'parse_url'?
43+
3844

39-
query T
45+
query error
4046
SELECT try_parse_url('https://example.com/path#frag', 'REF'), typeof(try_parse_url('https://example.com/path#frag', 'REF'));
4147
----
42-
frag string
48+
DataFusion error: Error during planning: Invalid function 'try_parse_url'.
49+
Did you mean 'parse_url'?
4350

44-
query T
51+
52+
query error
4553
SELECT try_parse_url('ftp://user:pwd@ftp.example.com:21/files', 'USERINFO'), typeof(try_parse_url('ftp://user:pwd@ftp.example.com:21/files', 'USERINFO'));
4654
----
47-
user:pwd string
55+
DataFusion error: Error during planning: Invalid function 'try_parse_url'.
56+
Did you mean 'parse_url'?
57+
4858

49-
query T
59+
query error
5060
SELECT try_parse_url('http://[2001:db8::2]:8080/index.html?ok=1', 'HOST'), typeof(try_parse_url('http://[2001:db8::2]:8080/index.html?ok=1', 'HOST'));
5161
----
52-
[2001:db8::2] string
62+
DataFusion error: Error during planning: Invalid function 'try_parse_url'.
63+
Did you mean 'parse_url'?
5364

54-
query T
65+
66+
query error
5567
SELECT try_parse_url('notaurl', 'HOST'), typeof(try_parse_url('notaurl', 'HOST'));
5668
----
57-
NULL string
69+
DataFusion error: Error during planning: Invalid function 'try_parse_url'.
70+
Did you mean 'parse_url'?
71+
5872

59-
query T
73+
query error
6074
SELECT try_parse_url('https://example.com', 'PATH'), typeof(try_parse_url('https://example.com', 'PATH'));
6175
----
62-
string
76+
DataFusion error: Error during planning: Invalid function 'try_parse_url'.
77+
Did you mean 'parse_url'?
6378

64-
query T
79+
80+
query error
6581
SELECT try_parse_url('https://example.com/a/b?x=1&y=2#frag', 'PROTOCOL'), typeof(try_parse_url('https://example.com/a/b?x=1&y=2#frag', 'PROTOCOL'));
6682
----
67-
https string
83+
DataFusion error: Error during planning: Invalid function 'try_parse_url'.
84+
Did you mean 'parse_url'?
85+
6886

69-
query T
87+
query error
7088
SELECT try_parse_url('https://ex.com/?Tag=ok', 'QUERY', 'tag'), typeof(try_parse_url('https://ex.com/?Tag=ok', 'QUERY', 'tag'));
7189
----
72-
NULL string
90+
DataFusion error: Error during planning: Invalid function 'try_parse_url'.
91+
Did you mean 'parse_url'?

docs/source/user-guide/sql/scalar_functions.md

Lines changed: 0 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -4547,96 +4547,6 @@ union_tag(union_expression)
45474547
+--------------+-------------------------+
45484548
```
45494549

4550-
## Url Functions
4551-
4552-
Functions for parsing and extracting parts of URLs.
4553-
4554-
- [parse_url](#parse_url)
4555-
- [try_parse_url](#try_parse_url)
4556-
4557-
### `parse_url`
4558-
4559-
Extracts a specified part from a URL. If a key is provided, it returns the associated query parameter value.
4560-
```sql
4561-
parse_url(url, part[, key])
4562-
```
4563-
4564-
#### Arguments
4565-
4566-
- **url**: A string expression representing the URL to parse.
4567-
- **part**: A string expression specifying which part of the URL to extract. Supported values include:
4568-
- **'HOST'** – the host name (e.g., example.com or [2001:db8::2])
4569-
- **'PATH'** – the path component (e.g., /a/b)
4570-
- **'QUERY'** – the entire query string (e.g., a=1&b=2)
4571-
- **'REF'** – the fragment identifier (e.g., frag from #frag)
4572-
- **'PROTOCOL'** – the protocol/scheme (e.g., http, https, ftp)
4573-
- **'FILE'** – the path plus query (e.g., /a/b?x=1)
4574-
- **'AUTHORITY'** – the authority section, including user info and host
4575-
- **'USERINFO'** – the user information before the host (e.g., user:pwd)
4576-
- **key**: An optional string expression. Only valid when part = 'QUERY'. Extracts the value of the query parameter with that name. Case-sensitive depending on implementation.
4577-
4578-
#### Behavior
4579-
4580-
A new column of `STRING`, each representing the value of the extracted part from the URL. If url is invalid, or if the requested component is missing, throws an exception.
4581-
4582-
#### Example
4583-
4584-
```sql
4585-
SELECT parse_url('https://example.com/a?x=1', 'QUERY', 'x');
4586-
----
4587-
1
4588-
4589-
SELECT parse_url('https://example.com/path#frag', 'REF');
4590-
----
4591-
frag
4592-
```
4593-
4594-
### `try_parse_url`
4595-
4596-
Extracts components of a URL string. Unlike parse_url, this function never fails with an error on malformed input; instead, it returns NULL when parsing is not possible.
4597-
4598-
```sql
4599-
try_parse_url(url, part[, key])
4600-
```
4601-
4602-
#### Arguments
4603-
4604-
- **url**: A string expression representing the URL to parse.
4605-
- **part**: A string expression specifying which part of the URL to extract. Supported values include:
4606-
- **'HOST'** – the host name (e.g., example.com or [2001:db8::2])
4607-
- **'PATH'** – the path component (e.g., /a/b)
4608-
- **'QUERY'** – the entire query string (e.g., a=1&b=2)
4609-
- **'REF'** – the fragment identifier (e.g., frag from #frag)
4610-
- **'PROTOCOL'** – the protocol/scheme (e.g., http, https, ftp)
4611-
- **'FILE'** – the path plus query (e.g., /a/b?x=1)
4612-
- **'AUTHORITY'** – the authority section, including user info and host
4613-
- **'USERINFO'** – the user information before the host (e.g., user:pwd)
4614-
- **key**: An optional string expression. Only valid when part = 'QUERY'. Extracts the value of the query parameter with that name. Case-sensitive depending on implementation.
4615-
4616-
#### Behavior
4617-
4618-
A new column of `STRING`, each representing the value of the extracted part from the URL. If url is invalid, or if the requested component is missing, returns `NULL`. When extracting a query parameter with key, returns `NULL` if the parameter is not present.
4619-
4620-
#### Example
4621-
4622-
```sql
4623-
SELECT try_parse_url('https://example.com/a?x=1', 'QUERY', 'x');
4624-
----
4625-
1
4626-
4627-
SELECT try_parse_url('www.example.com/path?x=1', 'HOST');
4628-
----
4629-
NULL
4630-
4631-
SELECT try_parse_url('https://example.com/?a=1', 'QUERY', 'b');
4632-
----
4633-
NULL
4634-
4635-
SELECT try_parse_url('https://example.com/path#frag', 'REF');
4636-
----
4637-
frag
4638-
```
4639-
46404550
## Other Functions
46414551

46424552
- [arrow_cast](#arrow_cast)

0 commit comments

Comments
 (0)