Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Enhancement]optimize subfield expr evaluate when copy is no need #35585

Merged
merged 1 commit into from
Nov 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 11 additions & 2 deletions be/src/exprs/subfield_expr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,11 @@ namespace starrocks {

class SubfieldExpr final : public Expr {
public:
explicit SubfieldExpr(const TExprNode& node) : Expr(node), _used_subfield_names(node.used_subfield_names) {}
explicit SubfieldExpr(const TExprNode& node) : Expr(node), _used_subfield_names(node.used_subfield_names) {
if (node.__isset.copy_flag) {
_copy_flag = node.copy_flag;
}
}

SubfieldExpr(const SubfieldExpr&) = default;
SubfieldExpr(SubfieldExpr&&) = default;
Expand Down Expand Up @@ -77,7 +81,11 @@ class SubfieldExpr final : public Expr {
DCHECK_EQ(subfield_column->size(), result_null->size());

// We need clone a new subfield column
return NullableColumn::create(subfield_column->clone_shared(), result_null);
if (_copy_flag) {
return NullableColumn::create(subfield_column->clone_shared(), result_null);
} else {
return NullableColumn::create(subfield_column, result_null);
}
}

Expr* clone(ObjectPool* pool) const override { return pool->add(new SubfieldExpr(*this)); }
Expand All @@ -89,6 +97,7 @@ class SubfieldExpr final : public Expr {

private:
std::vector<std::string> _used_subfield_names;
bool _copy_flag = true;
};

Expr* SubfieldExprFactory::from_thrift(const TExprNode& node) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ public class SubfieldExpr extends Expr {
// We use fieldNames to extract subfield column from children[0],
// children[0] must be an StructType.
private List<String> fieldNames;
private boolean copyFlag = true;

// Only used in parser, in parser, we can't determine column's type
public SubfieldExpr(Expr child, List<String> fieldNames) {
Expand Down Expand Up @@ -61,12 +62,17 @@ public SubfieldExpr(Expr child, Type type, List<String> fieldNames, NodePosition
public SubfieldExpr(SubfieldExpr other) {
super(other);
fieldNames = other.fieldNames;
copyFlag = other.copyFlag;
}

public void setFieldNames(List<String> fieldNames) {
this.fieldNames = ImmutableList.copyOf(fieldNames);
}

public void setCopyFlag(boolean copyFlag) {
this.copyFlag = copyFlag;
}

public List<String> getFieldNames() {
return fieldNames;
}
Expand All @@ -82,13 +88,14 @@ protected void analyzeImpl(Analyzer analyzer) throws AnalysisException {

@Override
protected String toSqlImpl() {
return getChild(0).toSqlImpl() + "." + Joiner.on('.').join(fieldNames);
return getChild(0).toSqlImpl() + "." + Joiner.on('.').join(fieldNames) + '[' + copyFlag + ']';
}

@Override
protected void toThrift(TExprNode msg) {
msg.setNode_type(TExprNodeType.SUBFIELD_EXPR);
msg.setUsed_subfield_names(fieldNames);
msg.setCopy_flag(copyFlag);
}

@Override
Expand All @@ -110,11 +117,11 @@ public boolean equals(Object o) {
return false;
}
SubfieldExpr that = (SubfieldExpr) o;
return Objects.equals(fieldNames, that.fieldNames);
return Objects.equals(fieldNames, that.fieldNames) && this.copyFlag == that.copyFlag;
}

@Override
public int hashCode() {
return Objects.hash(super.hashCode(), fieldNames);
return Objects.hash(super.hashCode(), fieldNames, copyFlag);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,17 @@ public boolean hasChildPath(String path) {
return children.stream().anyMatch(p -> p.path.equals(path));
}

public boolean hasOverlap(List<String> fieldNames) {
if (!hasChildPath() || fieldNames.isEmpty()) {
return true;
}

if (children.stream().noneMatch(p -> p.path.equals(fieldNames.get(0)))) {
return false;
}
return getChildPath(fieldNames.get(0)).hasOverlap(fieldNames.subList(1, fieldNames.size()));
}

public boolean hasChildPath() {
return !children.isEmpty();
}
Expand Down
12 changes: 12 additions & 0 deletions fe/fe-core/src/main/java/com/starrocks/qe/SessionVariable.java
Original file line number Diff line number Diff line change
Expand Up @@ -486,6 +486,7 @@ public static MaterializedViewRewriteMode parse(String str) {
public static final String ENABLE_REWRITE_SIMPLE_AGG_TO_META_SCAN = "enable_rewrite_simple_agg_to_meta_scan";

public static final String ENABLE_PRUNE_COMPLEX_TYPES = "enable_prune_complex_types";
public static final String ENABLE_SUBFIELD_NO_COPY = "enable_subfield_no_copy";
public static final String ENABLE_PRUNE_COMPLEX_TYPES_IN_UNNEST = "enable_prune_complex_types_in_unnest";
public static final String RANGE_PRUNER_PREDICATES_MAX_LEN = "range_pruner_max_predicate";

Expand Down Expand Up @@ -1364,6 +1365,9 @@ public void setEnableParallelMerge(boolean enableParallelMerge) {
@VarAttr(name = ENABLE_PRUNE_COMPLEX_TYPES)
private boolean enablePruneComplexTypes = true;

@VarAttr(name = ENABLE_SUBFIELD_NO_COPY)
private boolean enableSubfieldNoCopy = true;

@VarAttr(name = ENABLE_PRUNE_COMPLEX_TYPES_IN_UNNEST)
private boolean enablePruneComplexTypesInUnnest = true;

Expand Down Expand Up @@ -2679,6 +2683,14 @@ public void setEnablePruneComplexTypes(boolean enablePruneComplexTypes) {
this.enablePruneComplexTypes = enablePruneComplexTypes;
}

public boolean getEnableSubfieldNoCopy() {
return this.enableSubfieldNoCopy;
}

public void setEnableSubfieldNoCopy(boolean enableSubfieldNoCopy) {
this.enableSubfieldNoCopy = enableSubfieldNoCopy;
}

public boolean getEnablePruneComplexTypesInUnnest() {
return this.enablePruneComplexTypesInUnnest;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@
import com.starrocks.sql.optimizer.rule.tree.ScalarOperatorsReuseRule;
import com.starrocks.sql.optimizer.rule.tree.prunesubfield.PruneSubfieldRule;
import com.starrocks.sql.optimizer.rule.tree.prunesubfield.PushDownSubfieldRule;
import com.starrocks.sql.optimizer.rule.tree.prunesubfield.SubfieldExprNoCopyRule;
import com.starrocks.sql.optimizer.task.OptimizeGroupTask;
import com.starrocks.sql.optimizer.task.RewriteTreeTask;
import com.starrocks.sql.optimizer.task.TaskContext;
Expand Down Expand Up @@ -361,6 +362,10 @@ private OptExpression logicalRuleRewrite(ConnectContext connectContext,
}

tree = pruneSubfield(tree, rootTaskContext, requiredColumns);
// after pruneSubfield which will push down subfield expr
if (sessionVariable.getEnableSubfieldNoCopy()) {
ruleRewriteOnlyOnce(tree, rootTaskContext, new SubfieldExprNoCopyRule());
}
ruleRewriteIterative(tree, rootTaskContext, RuleSetType.PRUNE_ASSERT_ROW);
ruleRewriteIterative(tree, rootTaskContext, RuleSetType.PRUNE_PROJECT);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ public class SubfieldOperator extends ScalarOperator {
// Only one child
private List<ScalarOperator> children = new ArrayList<>();
private final ImmutableList<String> fieldNames;
private boolean copyFlag = true;

// Build based on SlotRef which contains struct subfield access information
public static SubfieldOperator build(ScalarOperator child, Type type, List<Integer> usedSubfieldPos) {
Expand All @@ -50,15 +51,29 @@ public static SubfieldOperator build(ScalarOperator child, Type type, List<Integ
}

public SubfieldOperator(ScalarOperator child, Type type, List<String> fieldNames) {
this(child, type, fieldNames, true);
}

public SubfieldOperator(ScalarOperator child, Type type, List<String> fieldNames, boolean copyFlag) {
super(OperatorType.SUBFIELD, type);
this.children.add(child);
this.fieldNames = ImmutableList.copyOf(fieldNames);
this.fieldNames = ImmutableList.copyOf(fieldNames);
this.copyFlag = copyFlag;
}

public List<String> getFieldNames() {
return fieldNames;
}

public boolean getCopyFlag() {
return copyFlag;
}

public void setCopyFlag(boolean copyFlag) {
this.copyFlag = copyFlag;
}


@Override
public boolean isNullable() {
return children.get(0).isNullable();
Expand Down Expand Up @@ -98,7 +113,7 @@ public String toString() {

@Override
public int hashCode() {
return Objects.hash(getChild(0), fieldNames);
return Objects.hash(getChild(0), fieldNames, copyFlag);
}

@Override
Expand All @@ -111,7 +126,8 @@ public boolean equals(Object other) {
return false;
}
SubfieldOperator otherOp = (SubfieldOperator) other;
return fieldNames.equals(otherOp.fieldNames) && getChild(0).equals(otherOp.getChild(0));
return fieldNames.equals(otherOp.fieldNames) && getChild(0).equals(otherOp.getChild(0))
&& copyFlag == otherOp.getCopyFlag();
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ public enum RuleType {
TF_PRUNE_CTE_CONSUME_COLUMNS,
TF_PRUNE_GROUP_BY_KEYS,
TF_PRUNE_SUBFIELD,
TF_SUBFILED_NOCOPY,

TF_SCALAR_OPERATORS_REUSE,
TF_PRUNE_EMPTY_WINDOW,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
// Copyright 2021-present StarRocks, Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package com.starrocks.sql.optimizer.rule.tree.prunesubfield;

import com.google.common.collect.Lists;
import com.starrocks.catalog.ColumnAccessPath;
import com.starrocks.sql.optimizer.OptExpression;
import com.starrocks.sql.optimizer.OptimizerContext;
import com.starrocks.sql.optimizer.operator.OperatorType;
import com.starrocks.sql.optimizer.operator.logical.LogicalProjectOperator;
import com.starrocks.sql.optimizer.operator.pattern.Pattern;
import com.starrocks.sql.optimizer.operator.scalar.ColumnRefOperator;
import com.starrocks.sql.optimizer.operator.scalar.ScalarOperator;
import com.starrocks.sql.optimizer.operator.scalar.SubfieldOperator;
import com.starrocks.sql.optimizer.rule.RuleType;
import com.starrocks.sql.optimizer.rule.transformation.TransformationRule;

import java.util.ArrayList;
import java.util.List;

/*
Phase 1: for the most common case, subfield expr only exists in one on the ColumnRefMap's value of projection.
*/

public class SubfieldExprNoCopyRule extends TransformationRule {
public SubfieldExprNoCopyRule() {
super(RuleType.TF_SUBFILED_NOCOPY, Pattern.create(OperatorType.LOGICAL_PROJECT, OperatorType.PATTERN_LEAF));
}

@Override
public List<OptExpression> transform(OptExpression input, OptimizerContext context) {
// project expression
LogicalProjectOperator project = input.getOp().cast();
List<ScalarOperator> projectMapValues = new ArrayList<>(project.getColumnRefMap().values());
for (int i = 0; i < projectMapValues.size(); i++) {
ScalarOperator value = projectMapValues.get(i);
// only deal with subfield expr of slotRef
if (value instanceof SubfieldOperator && value.getChild(0) instanceof ColumnRefOperator) {
Copy link
Contributor

@Seaven Seaven Nov 27, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think don't need the condition?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is used to simplify the case

SubfieldOperator subfield = value.cast();
ColumnRefOperator col = value.getChild(0).cast();
SubfieldExpressionCollector collector = new SubfieldExpressionCollector();
// collect other expr that used the same root slot
for (int j = 0; j < projectMapValues.size(); j++) {
if (j != i && projectMapValues.get(j).getUsedColumns().contains(col)) {
projectMapValues.get(j).accept(collector, null);
}
}
List<ScalarOperator> allSubfieldExpr = Lists.newArrayList();
allSubfieldExpr.addAll(collector.getComplexExpressions());
// normalize access path
SubfieldAccessPathNormalizer normalizer = new SubfieldAccessPathNormalizer();
normalizer.collect(allSubfieldExpr);
// no other usage
if (!normalizer.hasPath(col)) {
subfield.setCopyFlag(false);
continue;
}
ColumnAccessPath p = normalizer.normalizePath(col, col.getName());
// no overlap, overlap means other expr use father or child or self
if (!p.hasOverlap(subfield.getFieldNames())) {
subfield.setCopyFlag(false);
}
}
}

return Lists.newArrayList(input);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@ public Expr visitVariableReference(ColumnRefOperator node, FormatterContext cont
public Expr visitSubfield(SubfieldOperator node, FormatterContext context) {
SubfieldExpr expr = new SubfieldExpr(buildExpr.build(node.getChild(0), context), node.getType(),
node.getFieldNames());
expr.setCopyFlag(node.getCopyFlag());
hackTypeNull(expr);
return expr;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -391,34 +391,34 @@ public void testSelectStruct() throws Exception {
String sql = "select c0, c1.a from test_struct";
assertPlanContains(sql, "1:Project\n" +
" | <slot 1> : 1: c0\n" +
" | <slot 4> : 2: c1.a");
" | <slot 4> : 2: c1.a[false]");

sql = "select c0, test_struct.c1.a from test_struct";
assertPlanContains(sql, "<slot 4> : 2: c1.a");
assertPlanContains(sql, "<slot 4> : 2: c1.a[false]");

sql = "select c0, test.test_struct.c1.a from test_struct";
assertPlanContains(sql, "<slot 4> : 2: c1.a");
assertPlanContains(sql, "<slot 4> : 2: c1.a[false]");

sql = "select c0, default_catalog.test.test_struct.c1.a from test_struct";
assertPlanContains(sql, "<slot 4> : 2: c1.a");
assertPlanContains(sql, "<slot 4> : 2: c1.a[false]");

sql = "select c1.a[10].b from test_struct";
assertPlanContains(sql, "1:Project\n" +
" | <slot 4> : 2: c1.a[10].b");
" | <slot 4> : 2: c1.a[true][10].b[true]");

sql = "select c2.a, c2.b from test_struct";
assertPlanContains(sql, " 1:Project\n" +
" | <slot 4> : 3: c2.a\n" +
" | <slot 5> : 3: c2.b");
" | <slot 4> : 3: c2.a[false]\n" +
" | <slot 5> : 3: c2.b[false]");

sql = "select c2.a + c2.b from test_struct";
assertPlanContains(sql, "1:Project\n" +
" | <slot 4> : CAST(3: c2.a AS DOUBLE) + 3: c2.b");
" | <slot 4> : CAST(3: c2.a[true] AS DOUBLE) + 3: c2.b[true]");

sql = "select sum(c2.b) from test_struct group by c2.a";
assertPlanContains(sql, "1:Project\n" +
" | <slot 4> : 3: c2.a\n" +
" | <slot 5> : 3: c2.b");
" | <slot 4> : 3: c2.a[false]\n" +
" | <slot 5> : 3: c2.b[false]");
}

public void testSelectRow() throws Exception {
Expand Down
Loading
Loading