Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ public enum Key {
PATTERN_MODE("plugins.ppl.pattern.mode"),
PATTERN_MAX_SAMPLE_COUNT("plugins.ppl.pattern.max.sample.count"),
PATTERN_BUFFER_LIMIT("plugins.ppl.pattern.buffer.limit"),
PPL_REX_MAX_MATCH_LIMIT("plugins.ppl.rex.max_match.limit"),

/** Enable Calcite as execution engine */
CALCITE_ENGINE_ENABLED("plugins.calcite.enabled"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@
import org.opensearch.sql.ast.tree.RelationSubquery;
import org.opensearch.sql.ast.tree.Rename;
import org.opensearch.sql.ast.tree.Reverse;
import org.opensearch.sql.ast.tree.Rex;
import org.opensearch.sql.ast.tree.Sort;
import org.opensearch.sql.ast.tree.Sort.SortOption;
import org.opensearch.sql.ast.tree.SubqueryAlias;
Expand Down Expand Up @@ -749,6 +750,11 @@ public LogicalPlan visitRegex(Regex node, AnalysisContext context) {
throw getOnlyForCalciteException("Regex");
}

@Override
public LogicalPlan visitRex(Rex node, AnalysisContext context) {
throw getOnlyForCalciteException("Rex");
}

@Override
public LogicalPlan visitPaginate(Paginate paginate, AnalysisContext context) {
LogicalPlan child = paginate.getChild().get(0).accept(this, context);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@
import org.opensearch.sql.ast.tree.RelationSubquery;
import org.opensearch.sql.ast.tree.Rename;
import org.opensearch.sql.ast.tree.Reverse;
import org.opensearch.sql.ast.tree.Rex;
import org.opensearch.sql.ast.tree.Sort;
import org.opensearch.sql.ast.tree.SubqueryAlias;
import org.opensearch.sql.ast.tree.TableFunction;
Expand Down Expand Up @@ -264,6 +265,10 @@ public T visitRegex(Regex node, C context) {
return visitChildren(node, context);
}

public T visitRex(Rex node, C context) {
return visitChildren(node, context);
}

public T visitLambdaFunction(LambdaFunction node, C context) {
return visitChildren(node, context);
}
Expand Down
75 changes: 75 additions & 0 deletions core/src/main/java/org/opensearch/sql/ast/tree/Rex.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.sql.ast.tree;

import com.google.common.collect.ImmutableList;
import java.util.List;
import java.util.Optional;
import lombok.EqualsAndHashCode;
import lombok.Getter;
import lombok.Setter;
import lombok.ToString;
import org.opensearch.sql.ast.AbstractNodeVisitor;
import org.opensearch.sql.ast.expression.Literal;
import org.opensearch.sql.ast.expression.UnresolvedExpression;

/** AST node represent Rex field extraction operation. */
@Getter
@ToString
@EqualsAndHashCode(callSuper = false)
public class Rex extends UnresolvedPlan {

public enum RexMode {
EXTRACT
}

/** Field to extract from. */
private final UnresolvedExpression field;

/** Pattern with named capture groups. */
private final Literal pattern;

/** Rex mode (only EXTRACT supported). */
private final RexMode mode;

/** Maximum number of matches (optional). */
private final Optional<Integer> maxMatch;

/** Child Plan. */
@Setter private UnresolvedPlan child;

public Rex(UnresolvedExpression field, Literal pattern) {
this(field, pattern, RexMode.EXTRACT, Optional.empty());
}

public Rex(UnresolvedExpression field, Literal pattern, Optional<Integer> maxMatch) {
this(field, pattern, RexMode.EXTRACT, maxMatch);
}

public Rex(
UnresolvedExpression field, Literal pattern, RexMode mode, Optional<Integer> maxMatch) {
this.field = field;
this.pattern = pattern;
this.mode = mode;
this.maxMatch = maxMatch;
}

@Override
public Rex attach(UnresolvedPlan child) {
this.child = child;
return this;
}

@Override
public List<UnresolvedPlan> getChild() {
return ImmutableList.of(child);
}

@Override
public <T, C> T accept(AbstractNodeVisitor<T, C> nodeVisitor, C context) {
return nodeVisitor.visitRex(this, context);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@
import org.opensearch.sql.ast.tree.Regex;
import org.opensearch.sql.ast.tree.Relation;
import org.opensearch.sql.ast.tree.Rename;
import org.opensearch.sql.ast.tree.Rex;
import org.opensearch.sql.ast.tree.Sort;
import org.opensearch.sql.ast.tree.Sort.SortOption;
import org.opensearch.sql.ast.tree.SubqueryAlias;
Expand All @@ -125,6 +126,7 @@
import org.opensearch.sql.exception.SemanticCheckException;
import org.opensearch.sql.expression.function.BuiltinFunctionName;
import org.opensearch.sql.expression.function.PPLFuncImpTable;
import org.opensearch.sql.expression.parse.RegexCommonUtils;
import org.opensearch.sql.utils.ParseUtils;

public class CalciteRelNodeVisitor extends AbstractNodeVisitor<RelNode, CalcitePlanContext> {
Expand Down Expand Up @@ -203,6 +205,50 @@ public RelNode visitRegex(Regex node, CalcitePlanContext context) {
return context.relBuilder.peek();
}

public RelNode visitRex(Rex node, CalcitePlanContext context) {
visitChildren(node, context);

RexNode fieldRex = rexVisitor.analyze(node.getField(), context);
String patternStr = (String) node.getPattern().getValue();

List<String> namedGroups = RegexCommonUtils.getNamedGroupCandidates(patternStr);

if (namedGroups.isEmpty()) {
throw new IllegalArgumentException(
"Rex pattern must contain at least one named capture group");
}

List<RexNode> newFields = new ArrayList<>();
List<String> newFieldNames = new ArrayList<>();

for (int i = 0; i < namedGroups.size(); i++) {
RexNode extractCall;
if (node.getMaxMatch().isPresent() && node.getMaxMatch().get() > 1) {
extractCall =
PPLFuncImpTable.INSTANCE.resolve(
context.rexBuilder,
BuiltinFunctionName.REX_EXTRACT_MULTI,
fieldRex,
context.rexBuilder.makeLiteral(patternStr),
context.relBuilder.literal(i + 1),
context.relBuilder.literal(node.getMaxMatch().get()));
} else {
extractCall =
PPLFuncImpTable.INSTANCE.resolve(
context.rexBuilder,
BuiltinFunctionName.REX_EXTRACT,
fieldRex,
context.rexBuilder.makeLiteral(patternStr),
context.relBuilder.literal(i + 1));
}
newFields.add(extractCall);
newFieldNames.add(namedGroups.get(i));
}

projectPlusOverriding(newFields, newFieldNames, context);
return context.relBuilder.peek();
}

private boolean containsSubqueryExpression(Node expr) {
if (expr == null) {
return false;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,17 @@ private PPLOperandTypes() {}
UDFOperandMetadata.wrap((FamilyOperandTypeChecker) OperandTypes.NUMERIC_NUMERIC);
public static final UDFOperandMetadata STRING_INTEGER =
UDFOperandMetadata.wrap(OperandTypes.family(SqlTypeFamily.CHARACTER, SqlTypeFamily.INTEGER));
public static final UDFOperandMetadata STRING_STRING_INTEGER =
UDFOperandMetadata.wrap(
OperandTypes.family(
SqlTypeFamily.CHARACTER, SqlTypeFamily.CHARACTER, SqlTypeFamily.INTEGER));
public static final UDFOperandMetadata STRING_STRING_INTEGER_INTEGER =
UDFOperandMetadata.wrap(
OperandTypes.family(
SqlTypeFamily.CHARACTER,
SqlTypeFamily.CHARACTER,
SqlTypeFamily.INTEGER,
SqlTypeFamily.INTEGER));

public static final UDFOperandMetadata NUMERIC_NUMERIC_OPTIONAL_NUMERIC =
UDFOperandMetadata.wrap(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,8 @@ public enum BuiltinFunctionName {
POSITION(FunctionName.of("position")),
REGEXP(FunctionName.of("regexp")),
REGEX_MATCH(FunctionName.of("regex_match")),
REX_EXTRACT(FunctionName.of("REX_EXTRACT")),
REX_EXTRACT_MULTI(FunctionName.of("REX_EXTRACT_MULTI")),
REPLACE(FunctionName.of("replace")),
REVERSE(FunctionName.of("reverse")),
RIGHT(FunctionName.of("right")),
Expand Down Expand Up @@ -315,7 +317,10 @@ public enum BuiltinFunctionName {
INTERNAL_UNCOLLECT_PATTERNS(FunctionName.of("uncollect_patterns")),
INTERNAL_REGEXP_EXTRACT(FunctionName.of("regexp_extract"), true),
INTERNAL_GROK(FunctionName.of("grok"), true),
INTERNAL_REGEXP_REPLACE_3(FunctionName.of("regexp_replace_3"), true);
INTERNAL_REGEXP_REPLACE_3(FunctionName.of("regexp_replace_3"), true),
INTERNAL_REGEXP_REPLACE_PG_4(FunctionName.of("regexp_replace_pg_4"), true),
INTERNAL_REGEXP_REPLACE_5(FunctionName.of("regexp_replace_5"), true),
INTERNAL_TRANSLATE3(FunctionName.of("translate3"), true);

private final FunctionName name;
private boolean isInternal;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@
import org.opensearch.sql.expression.function.udf.CryptographicFunction;
import org.opensearch.sql.expression.function.udf.GrokFunction;
import org.opensearch.sql.expression.function.udf.RelevanceQueryFunction;
import org.opensearch.sql.expression.function.udf.RexExtractFunction;
import org.opensearch.sql.expression.function.udf.RexExtractMultiFunction;
import org.opensearch.sql.expression.function.udf.SpanFunction;
import org.opensearch.sql.expression.function.udf.condition.EarliestFunction;
import org.opensearch.sql.expression.function.udf.condition.EnhancedCoalesceFunction;
Expand Down Expand Up @@ -401,6 +403,9 @@ public class PPLBuiltinOperators extends ReflectiveSqlOperatorTable {
public static final SqlOperator RANGE_BUCKET =
new org.opensearch.sql.expression.function.udf.binning.RangeBucketFunction()
.toUDF("RANGE_BUCKET");
public static final SqlOperator REX_EXTRACT = new RexExtractFunction().toUDF("REX_EXTRACT");
public static final SqlOperator REX_EXTRACT_MULTI =
new RexExtractMultiFunction().toUDF("REX_EXTRACT_MULTI");

public static final SqlOperator ENHANCED_COALESCE =
new EnhancedCoalesceFunction().toUDF("COALESCE");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,9 @@
import static org.opensearch.sql.expression.function.BuiltinFunctionName.INTERNAL_PATTERN_PARSER;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.INTERNAL_REGEXP_EXTRACT;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.INTERNAL_REGEXP_REPLACE_3;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.INTERNAL_REGEXP_REPLACE_5;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.INTERNAL_REGEXP_REPLACE_PG_4;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.INTERNAL_TRANSLATE3;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.IS_BLANK;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.IS_EMPTY;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.IS_NOT_NULL;
Expand Down Expand Up @@ -159,6 +162,8 @@
import static org.opensearch.sql.expression.function.BuiltinFunctionName.REGEX_MATCH;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.REPLACE;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.REVERSE;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.REX_EXTRACT;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.REX_EXTRACT_MULTI;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.RIGHT;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.RINT;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.ROUND;
Expand Down Expand Up @@ -685,6 +690,9 @@ void populate() {
registerOperator(SHA1, SqlLibraryOperators.SHA1);
registerOperator(INTERNAL_REGEXP_EXTRACT, SqlLibraryOperators.REGEXP_EXTRACT);
registerOperator(INTERNAL_REGEXP_REPLACE_3, SqlLibraryOperators.REGEXP_REPLACE_3);
registerOperator(INTERNAL_REGEXP_REPLACE_PG_4, SqlLibraryOperators.REGEXP_REPLACE_PG_4);
registerOperator(INTERNAL_REGEXP_REPLACE_5, SqlLibraryOperators.REGEXP_REPLACE_5);
registerOperator(INTERNAL_TRANSLATE3, SqlLibraryOperators.TRANSLATE3);

// Register PPL UDF operator
registerOperator(COSH, PPLBuiltinOperators.COSH);
Expand All @@ -710,6 +718,8 @@ void populate() {
registerOperator(SIMPLE_QUERY_STRING, PPLBuiltinOperators.SIMPLE_QUERY_STRING);
registerOperator(QUERY_STRING, PPLBuiltinOperators.QUERY_STRING);
registerOperator(MULTI_MATCH, PPLBuiltinOperators.MULTI_MATCH);
registerOperator(REX_EXTRACT, PPLBuiltinOperators.REX_EXTRACT);
registerOperator(REX_EXTRACT_MULTI, PPLBuiltinOperators.REX_EXTRACT_MULTI);

// Register PPL Datetime UDF operator
registerOperator(TIMESTAMP, PPLBuiltinOperators.TIMESTAMP);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.sql.expression.function.udf;

import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import org.apache.calcite.adapter.enumerable.NotNullImplementor;
import org.apache.calcite.adapter.enumerable.NullPolicy;
import org.apache.calcite.adapter.enumerable.RexToLixTranslator;
import org.apache.calcite.linq4j.tree.Expression;
import org.apache.calcite.linq4j.tree.Expressions;
import org.apache.calcite.rex.RexCall;
import org.apache.calcite.sql.type.ReturnTypes;
import org.apache.calcite.sql.type.SqlReturnTypeInference;
import org.opensearch.sql.calcite.utils.PPLOperandTypes;
import org.opensearch.sql.expression.function.ImplementorUDF;
import org.opensearch.sql.expression.function.UDFOperandMetadata;

/** Custom REX_EXTRACT function for extracting regex named capture groups. */
public final class RexExtractFunction extends ImplementorUDF {

public RexExtractFunction() {
super(new RexExtractImplementor(), NullPolicy.ARG0);
}

@Override
public SqlReturnTypeInference getReturnTypeInference() {
return ReturnTypes.VARCHAR_2000_NULLABLE;
}

@Override
public UDFOperandMetadata getOperandMetadata() {
return PPLOperandTypes.STRING_STRING_INTEGER;
}

private static class RexExtractImplementor implements NotNullImplementor {

@Override
public Expression implement(
RexToLixTranslator translator, RexCall call, List<Expression> translatedOperands) {
Expression field = translatedOperands.get(0);
Expression pattern = translatedOperands.get(1);
Expression groupIndex = translatedOperands.get(2);

return Expressions.call(RexExtractFunction.class, "extractGroup", field, pattern, groupIndex);
}
}

public static String extractGroup(String text, String pattern, int groupIndex) {
try {
Pattern compiledPattern = Pattern.compile(pattern);
Matcher matcher = compiledPattern.matcher(text);

if (matcher.find() && groupIndex > 0 && groupIndex <= matcher.groupCount()) {
return matcher.group(groupIndex);
}
return null;
} catch (PatternSyntaxException e) {
throw new IllegalArgumentException(
"Error in 'rex' command: Encountered the following error while compiling the regex '"
+ pattern
+ "': "
+ e.getMessage());
}
}
}
Loading
Loading