Skip to content

Commit

Permalink
Fix edge cases in JavaScript regexp_extract and regexp_extract_all UD…
Browse files Browse the repository at this point in the history
…Fs (GoogleCloudPlatform#369)

* Fix edge cases in JavaScript regexp_extract and regexp_extract_all UDFs

* Fix edge cases in JavaScript regexp_extract and regexp_extract_all UDFs

---------

Co-authored-by: Daniel De Leo <danieldeleo@users.noreply.github.com>
  • Loading branch information
mswapnilG and danieldeleo authored Jul 10, 2023
1 parent 27bb290 commit 4c153b2
Show file tree
Hide file tree
Showing 4 changed files with 81 additions and 13 deletions.
16 changes: 14 additions & 2 deletions udfs/community/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -706,19 +706,31 @@ sg
```
### [cw_regexp_extract(str STRING, regexp STRING)](cw_regexp_extract.sqlx)
Returns the first substring matched by the regular expression regexp in str.
Extracts the first substring matched by the regular expression regexp in str, returns null if the regex doesn't have a match or either str or regexp is null.
```sql
SELECT bqutil.fn.cw_regexp_extract('TestStr123456#?%&', 'Str');
SELECT bqutil.fn.cw_regexp_extract('TestStr123456#?%&', 'StrX');
SELECT bqutil.fn.cw_regexp_extract(NULL, 'StrX');
SELECT bqutil.fn.cw_regexp_extract('TestStr123456#?%&', NULL);
Str
NULL
NULL
NULL
```

### [cw_regexp_extract_all(str STRING, regexp STRING)](cw_regexp_extract_all.sqlx)
Returns the substring(s) matched by the regular expression regexp in str.
Returns the substring(s) matched by the regular expression regexp in str, returns null if the regex doesn't have a match or either str or regexp is null.
```sql
SELECT bqutil.fn.cw_regexp_extract_all('TestStr123456', 'Str.*');
SELECT bqutil.fn.cw_regexp_extract_all('TestStr123456', 'StrX.*');
SELECT bqutil.fn.cw_regexp_extract_all(NULL, 'Str.*');
SELECT bqutil.fn.cw_regexp_extract_all('TestStr123456', NULL);
[Str123456]
NULL
NULL
NULL
```
### [cw_regexp_extract_all_n(str STRING, regexp STRING, groupn INT64)](cw_regexp_extract_all_n.sqlx)
Expand Down
13 changes: 5 additions & 8 deletions udfs/community/cw_regexp_extract.sqlx
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,11 @@ config { hasOutput: true }
* limitations under the License.
*/

/* Returns the first substring matched by the regular expression `regexp` in `str`. */
/* Extracts the first substring matched by the regular expression `regexp` in `str`, returns null if the regex doesn't have a match or either str or regexp is null. */
CREATE OR REPLACE FUNCTION ${self()}(str STRING, regexp STRING) RETURNS STRING
LANGUAGE js
OPTIONS (
description="Returns the first substring matched by the regular expression `regexp` in `str`."
description="""Extracts the first substring matched by the regular expression `regexp` in `str`, returns null if the regex doesn't have a match or either str or regexp is null."""
)
AS """
var r = new RegExp(regexp);
var a = str.match(r);
return a[0];
""";
AS (
${ref("cw_regexp_extract_all")}(str, regexp)[SAFE_OFFSET(0)]
);
9 changes: 6 additions & 3 deletions udfs/community/cw_regexp_extract_all.sqlx
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,16 @@ config { hasOutput: true }
* limitations under the License.
*/

/* Returns the substring(s) matched by the regular expression `regexp` in `str`. */
/* Returns the substring(s) matched by the regular expression `regexp` in `str`, returns null if the regex doesn't have a match or either str or regexp is null. */
CREATE OR REPLACE FUNCTION ${self()}(str STRING, regexp STRING) RETURNS ARRAY<STRING>
LANGUAGE js
LANGUAGE js
OPTIONS (
description="Returns the substring(s) matched by the regular expression `regexp` in `str`."
description="Returns the substring(s) matched by the regular expression `regexp` in `str`, returns null if the regex doesn't have a match or either str or regexp is null."
)
AS """
if (str == null || regexp == null) {
return null;
}
var r = new RegExp(regexp, "g");
return str.match(r);
""";
56 changes: 56 additions & 0 deletions udfs/community/test_cases.js
Original file line number Diff line number Diff line change
Expand Up @@ -1922,6 +1922,34 @@ generate_udf_test("cw_regexp_extract", [
],
expected_output: `"Str"`
},
{
inputs: [
`"TestStr123456#?%&"`,
`"StrX"`
],
expected_output: `NULL`
},
{
inputs: [
`"TestStr123456#?%&"`,
`NULL`
],
expected_output: `NULL`
},
{
inputs: [
`NULL`,
`"StrX"`
],
expected_output: `NULL`
},
{
inputs: [
`NULL`,
`NULL`
],
expected_output: `NULL`
},
]);
generate_udf_test("cw_regexp_extract_n", [
{
Expand All @@ -1941,6 +1969,34 @@ generate_udf_test("cw_regexp_extract_all", [
],
expected_output: `CAST(["Str123456"] AS ARRAY<STRING>)`
},
{
inputs: [
`"TestStr123456"`,
`"StrX"`
],
expected_output: `NULL`
},
{
inputs: [
`"TestStr123456"`,
`NULL`
],
expected_output: `NULL`
},
{
inputs: [
`NULL`,
`"StrX"`
],
expected_output: `NULL`
},
{
inputs: [
`NULL`,
`NULL`
],
expected_output: `NULL`
},
]);
generate_udf_test("cw_regexp_extract_all_n", [
{
Expand Down

0 comments on commit 4c153b2

Please sign in to comment.