Skip to content

Commit

Permalink
[Enhancement] Update regexp_extract function for trino parser (#34845)
Browse files Browse the repository at this point in the history
Signed-off-by: zenoyang <cookie.yz@qq.com>
  • Loading branch information
zenoyang authored Nov 15, 2023
1 parent 0e0b2bb commit c9cd165
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,14 @@
package com.starrocks.connector.parser.trino;

import com.google.common.collect.ImmutableList;
import com.starrocks.analysis.BinaryPredicate;
import com.starrocks.analysis.BinaryType;
import com.starrocks.analysis.CastExpr;
import com.starrocks.analysis.CollectionElementExpr;
import com.starrocks.analysis.Expr;
import com.starrocks.analysis.FunctionCallExpr;
import com.starrocks.analysis.IntLiteral;
import com.starrocks.analysis.NullLiteral;
import com.starrocks.analysis.StringLiteral;
import com.starrocks.analysis.TimestampArithmeticExpr;
import com.starrocks.catalog.Type;
Expand Down Expand Up @@ -77,6 +80,13 @@ public static Expr transform(String functionName, Expr... args) {
throw new SemanticException("element_at function must have 2 arguments");
}
return new CollectionElementExpr(args[0], args[1]);
} else if (functionName.equalsIgnoreCase("regexp_extract")) {
// regexp_extract(string, pattern) -> regexp_extract(str, pattern, 0)
FunctionCallExpr regexpExtractFunc = new FunctionCallExpr("regexp_extract",
ImmutableList.of(args[0], args[1], args.length == 3 ? args[2] : new IntLiteral(0L)));
BinaryPredicate predicate = new BinaryPredicate(BinaryType.EQ, regexpExtractFunc, new StringLiteral(""));
// regexp_extract -> if(regexp_extract(xxx)='', null, regexp_extract(xxx))
return new FunctionCallExpr("if", ImmutableList.of(predicate, new NullLiteral(), regexpExtractFunc));
}
return null;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -217,10 +217,6 @@ private static void registerRegexpFunctionTransformer() {
// regexp_like -> regexp
registerFunctionTransformer("regexp_like", 2, "regexp",
ImmutableList.of(Expr.class, Expr.class));

// regexp_extract(string, pattern) -> regexp_extract(str, pattern, 0)
registerFunctionTransformer("regexp_extract", 2, new FunctionCallExpr("regexp_extract",
ImmutableList.of(new PlaceholderExpr(1, Expr.class), new PlaceholderExpr(2, Expr.class), new IntLiteral(0L))));
}

private static void registerJsonFunctionTransformer() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -774,10 +774,19 @@ public void testRegexp() throws Exception {
assertPlanContains(sql, "regexp('abc123', 'abc*')");

sql = "select regexp_extract('1a 2b 14m', '\\d+');";
assertPlanContains(sql, "regexp_extract('1a 2b 14m', '\\\\d+', 0)");
assertPlanContains(sql, "if(3: regexp_extract = '', NULL, 3: regexp_extract)\n" +
" | common expressions:\n" +
" | <slot 3> : regexp_extract('1a 2b 14m', '\\\\d+', 0)");

sql = "select regexp_extract('1abb 2b 14m', '[a-z]+');";
assertPlanContains(sql, "regexp_extract('1abb 2b 14m', '[a-z]+', 0)");
assertPlanContains(sql, "if(3: regexp_extract = '', NULL, 3: regexp_extract)\n" +
" | common expressions:\n" +
" | <slot 3> : regexp_extract('1abb 2b 14m', '[a-z]+', 0)");

sql = "select regexp_extract('1abb 2b 14m', '[a-z]+', 1);";
assertPlanContains(sql, "if(3: regexp_extract = '', NULL, 3: regexp_extract)\n" +
" | common expressions:\n" +
" | <slot 3> : regexp_extract('1abb 2b 14m', '[a-z]+', 1)");
}

@Test
Expand Down

0 comments on commit c9cd165

Please sign in to comment.