Skip to content

Commit

Permalink
[Feature][Transform-V2] Support SqlTransform Not Like Expression (apa…
Browse files Browse the repository at this point in the history
  • Loading branch information
zhilinli123 authored Nov 14, 2023
1 parent 72be666 commit 44418b1
Show file tree
Hide file tree
Showing 2 changed files with 103 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,12 @@ source {
{fields = [1, "Joy Ding", 20, null], kind = INSERT}
{fields = [2, "May Ding", 22, "may_ding@apache.com"], kind = INSERT}
{fields = [3, "Kin Dom", 21, "kin_dom@apache.com"], kind = INSERT}
{fields = [4, "LeBron Ding", 38, null], kind = INSERT}
{fields = [8, "Wang DingCC", 34, null], kind = INSERT}
{fields = [9, "Zu DingDD", 33, null], kind = INSERT}
{fields = [10, "Zhang DingEE", 40, null], kind = INSERT}
{fields = [11, "Lin Qiang", 40, null], kind = INSERT}
{fields = [12, "Yu Liang", 40, null], kind = INSERT}
]
}
}
Expand All @@ -47,10 +53,22 @@ transform {
Sql {
source_table_name = "fake"
result_table_name = "fake1"
query = "select id,name,age from fake where id=1 and id!=0 and name<>'Kin Dom' and (age>=20 or age<22) and regexp_like(name, '[A-Z ]*') and id>0 and id>=1 and id in (1,2,3,4) and id not in (5,6,7) and name is not null and email is null and id<4 and id<=4 and name like '%Din_'"
query = """
select id, name, age, email from fake
where ( id = 1 or id = 4 or id in (8, 9, 10, 11, 12) )
and id != 0 and name <> 'Kin Dom'
and ( age >= 20 or age < 22 )
and regexp_like(name, '[A-Z ]*')
and id > 0 and id >= 1 and id in (1, 2, 3, 4, 8, 9, 10, 11, 12)
and id not in (5, 6, 7) and name is not null and email is null
and id < 500 and id <= 500
and ( name like '%Din_' or name like 'Wan_%' or name like '%Yu%' )
and name not like '%LeBron%'
and name not like 'Wan_%'
and name not like '%Lian_'
"""
}
}

sink {
Console {
source_table_name = "fake1"
Expand All @@ -68,7 +86,17 @@ sink {
rule_type = MAX_ROW
rule_value = 1
}
],
field_rules = [
{
field_name = "id"
field_type = "int"
field_value = [
{equals_to = 1}
]
}
]
}
}
}
}

Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,15 @@ public boolean executeFilter(Expression whereExpr, Object[] inputFields) {
return inExpr((InExpression) whereExpr, inputFields);
}
if (whereExpr instanceof LikeExpression) {
return likeExpr((LikeExpression) whereExpr, inputFields);
boolean isNotLike = ((LikeExpression) whereExpr).isNot();
// not like SQL parsing
if (isNotLike) {
return notLikeExpr((LikeExpression) whereExpr, inputFields);
}
// like SQL parsing
if (!isNotLike) {
return likeExpr((LikeExpression) whereExpr, inputFields);
}
}
if (whereExpr instanceof ComparisonOperator) {
Pair<Object, Object> pair =
Expand Down Expand Up @@ -148,7 +156,7 @@ private boolean inExpr(InExpression inExpression, Object[] inputFields) {
}

/**
* Like expression filter, unsupported yet
* Like expression filter
*
* @param likeExpression like expression
* @param inputFields input fields
Expand All @@ -162,11 +170,26 @@ private boolean likeExpr(LikeExpression likeExpression, Object[] inputFields) {
}
Expression rightExpr = likeExpression.getRightExpression();
Object rightVal = zetaSQLFunction.computeForValue(rightExpr, inputFields);
if (rightVal == null) {
String regex = rightVal.toString();
if (rightVal == null && regex.length() > 0) {
return false;
}

String regex = rightVal.toString().replace("%", ".*").replace("_", ".");
String likeIdent = "%";
if (regex.startsWith(likeIdent)) {
regex = regex.replaceFirst(likeIdent, ".*");
}
if (regex.endsWith(likeIdent)) {
regex = regex.substring(0, regex.length() - 1) + ".*";
}
if (regex.startsWith("_")) {
regex = regex.replaceFirst("_", ".");
}
if (regex.endsWith("_")) {
regex = regex.substring(0, regex.length() - 1) + ".";
}
if (regex.length() >= 3 && regex.substring(regex.length() - 3).endsWith("_.*")) {
regex = regex.substring(0, regex.length() - 3) + "..*";
}
if (regex.startsWith("'") && regex.endsWith("'")) {
regex = regex.substring(0, regex.length() - 1).substring(1);
}
Expand All @@ -176,6 +199,50 @@ private boolean likeExpr(LikeExpression likeExpression, Object[] inputFields) {
return matcher.matches();
}

/**
* Not Like expression filter
*
* @param likeExpression not like expression
* @param inputFields input fields
* @return filter result
*/
private boolean notLikeExpr(LikeExpression likeExpression, Object[] inputFields) {
Expression leftExpr = likeExpression.getLeftExpression();
Object leftVal = zetaSQLFunction.computeForValue(leftExpr, inputFields);
if (leftVal == null) {
return false;
}
Expression rightExpr = likeExpression.getRightExpression();
Object rightVal = zetaSQLFunction.computeForValue(rightExpr, inputFields);
String regex = rightVal.toString();
if (rightVal == null && regex.length() > 0) {
return false;
}
String likeIdent = "%";
if (regex.startsWith(likeIdent)) {
regex = regex.replaceFirst(likeIdent, ".*");
}
if (regex.endsWith(likeIdent)) {
regex = regex.substring(0, regex.length() - 1) + ".*";
}
if (regex.startsWith("_")) {
regex = regex.replaceFirst("_", ".");
}
if (regex.endsWith("_")) {
regex = regex.substring(0, regex.length() - 1) + ".";
}
if (regex.length() >= 3 && regex.substring(regex.length() - 3).endsWith("_.*")) {
regex = regex.substring(0, regex.length() - 3) + "..*";
}
if (regex.startsWith("'") && regex.endsWith("'")) {
regex = regex.substring(0, regex.length() - 1).substring(1);
}
Pattern pattern = Pattern.compile(regex);
Matcher matcher = pattern.matcher(leftVal.toString());

return !matcher.matches();
}

private Pair<Object, Object> executeComparisonOperator(
ComparisonOperator comparisonOperator, Object[] inputFields) {
Expression leftExpr = comparisonOperator.getLeftExpression();
Expand Down

0 comments on commit 44418b1

Please sign in to comment.