Skip to content

Commit

Permalink
[CALCITE-4994] SQL-to-RelNode conversion is slow if table contains hu…
Browse files Browse the repository at this point in the history
…ndreds of fields

If a table contains hundreds or thousands of fields,
SQL-to-RelNode conversion is slow because the operation to
lookup a field within a record type is O(n) in the number of
fields and is called O(n) times. This manifests in the method
SqlToRelConverter.Blackboard.lookupExp.

The solution we adopted is to add a map in RelRecordType
from field names (case-sensitive) to fields (Julian Hyde).

We hope that this map will improve performance in other
parts of the planning process besides SQL-to-RelNode (e.g.
validation and rewrite rules).

If the record has 20 (RelRecordType.THRESHOLD) or fewer
fields, the map is not populated. We believe that this saves
memory and effort for the common case, small to medium-sized
records.

In SqlToRelConverter.Blackboard, change the contract of
method lookupExp. It would previously return null if a field
was not found, whereupon the caller would throw; now the
method throws, and is declared not-nullable. The method
previously returned a Map from field names to field ordinals,
and now returns a Function that can convert a field name to
an expression accessing that field; the new contract is
easier to implement efficiently with available knowledge.

Add a benchmark, RelNodeConversionBenchmark, that
demonstrates improvement for tables with 100 and 1,000
fields (Jay Narale).

Close apache#2701

Co-authored-by: Julian Hyde <jhyde@apache.org>
  • Loading branch information
2 people authored and liyafan82 committed Mar 4, 2022
1 parent 28f4195 commit bb89b92
Show file tree
Hide file tree
Showing 6 changed files with 307 additions and 48 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
import java.io.Serializable;
import java.util.Objects;

import static java.util.Objects.requireNonNull;

/**
* Default implementation of {@link RelDataTypeField}.
*/
Expand All @@ -42,11 +44,9 @@ public RelDataTypeFieldImpl(
String name,
int index,
RelDataType type) {
assert name != null;
assert type != null;
this.name = name;
this.name = requireNonNull(name, "name");
this.index = index;
this.type = type;
this.type = requireNonNull(type, "type");
}

//~ Methods ----------------------------------------------------------------
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Objects;

import static org.apache.calcite.linq4j.Nullness.castNonNull;
Expand Down Expand Up @@ -91,16 +92,24 @@ protected RelDataTypeImpl() {

//~ Methods ----------------------------------------------------------------

@Override public @Nullable RelDataTypeField getField(String fieldName, boolean caseSensitive,
boolean elideRecord) {
@Override public @Nullable RelDataTypeField getField(String fieldName,
boolean caseSensitive, boolean elideRecord) {
if (fieldList == null) {
throw new IllegalStateException("Trying to access field " + fieldName
+ " in a type with no fields: " + this);
}
for (RelDataTypeField field : fieldList) {
if (Util.matches(caseSensitive, field.getName(), fieldName)) {
final Map<String, RelDataTypeField> fieldMap = getFieldMap();
if (caseSensitive && fieldMap != null) {
RelDataTypeField field = fieldMap.get(fieldName);
if (field != null) {
return field;
}
} else {
for (RelDataTypeField field : fieldList) {
if (Util.matches(caseSensitive, field.getName(), fieldName)) {
return field;
}
}
}
if (elideRecord) {
final List<Slot> slots = new ArrayList<>();
Expand All @@ -127,16 +136,35 @@ protected RelDataTypeImpl() {
}

// a dynamic * field will match any field name.
for (RelDataTypeField field : fieldList) {
if (field.isDynamicStar()) {
// the requested field could be in the unresolved star
return field;
if (fieldMap != null) {
return fieldMap.get("");
} else {
for (RelDataTypeField field : fieldList) {
if (field.isDynamicStar()) {
// the requested field could be in the unresolved star
return field;
}
}
}

return null;
}

/** Returns a map from field names to fields.
*
* <p>Matching is case-sensitive.
*
* <p>If several fields have the same name, the map contains the first.
*
* <p>A {@link RelDataTypeField#isDynamicStar() dynamic star field} is indexed
* under its own name and "" (the empty string).
*
* <p>If the map is null, the type must do lookup the long way.
*/
protected @Nullable Map<String, RelDataTypeField> getFieldMap() {
return null;
}

private static void getFieldRecurse(List<Slot> slots, RelDataType type,
int depth, String fieldName, boolean caseSensitive) {
while (slots.size() <= depth) {
Expand Down
28 changes: 28 additions & 0 deletions core/src/main/java/org/apache/calcite/rel/type/RelRecordType.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,14 @@
import org.apache.calcite.linq4j.Ord;
import org.apache.calcite.sql.type.SqlTypeName;

import com.google.common.collect.ImmutableMap;

import org.checkerframework.checker.nullness.qual.Nullable;

import java.io.Serializable;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import static java.util.Objects.requireNonNull;

Expand All @@ -31,6 +37,11 @@ public class RelRecordType extends RelDataTypeImpl implements Serializable {
/** Name resolution policy; usually {@link StructKind#FULLY_QUALIFIED}. */
private final StructKind kind;
private final boolean nullable;
private final @Nullable Map<String, RelDataTypeField> fieldNameMap;

/** Minimum number of fields where it is worth populating {@link #fieldNameMap}
* to accelerate lookups by field name. */
private static final int THRESHOLD = 20;

//~ Constructors -----------------------------------------------------------

Expand All @@ -45,6 +56,19 @@ public RelRecordType(StructKind kind, List<RelDataTypeField> fields, boolean nul
super(fields);
this.nullable = nullable;
this.kind = requireNonNull(kind, "kind");
if (fields.size() > THRESHOLD) {
final Map<String, RelDataTypeField> map = new HashMap<>();
for (RelDataTypeField f : fields) {
map.putIfAbsent(f.getName(), f);
if (f.isDynamicStar()) {
// the first dynamic star field is cached with a special name
map.putIfAbsent("", f);
}
}
this.fieldNameMap = ImmutableMap.copyOf(map);
} else {
this.fieldNameMap = null;
}
computeDigest();
}

Expand Down Expand Up @@ -85,6 +109,10 @@ public RelRecordType(List<RelDataTypeField> fields) {
return kind;
}

@Override protected @Nullable Map<String, RelDataTypeField> getFieldMap() {
return fieldNameMap;
}

@Override protected void generateTypeString(StringBuilder sb, boolean withDetail) {
sb.append("RecordType");
switch (kind) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4106,16 +4106,12 @@ private RexNode convertIdentifier(
} else {
qualified = SqlQualified.create(null, 1, null, identifier);
}
final Pair<RexNode, @Nullable Map<String, Integer>> e0 = requireNonNull(
bb.lookupExp(qualified),
() -> "no expression found for " + qualified);
final Pair<RexNode, @Nullable BiFunction<RexNode, String, RexNode>> e0 =
bb.lookupExp(qualified);
RexNode e = e0.left;
for (String name : qualified.suffix()) {
if (e == e0.left && e0.right != null) {
Integer i = requireNonNull(
e0.right.get(name),
() -> "e0.right.get(name) produced null for " + name);
e = rexBuilder.makeFieldAccess(e, i);
e = e0.right.apply(e, name);
} else {
final boolean caseSensitive = true; // name already fully-qualified
if (identifier.isStar() && bb.scope instanceof MatchRecognizeScope) {
Expand Down Expand Up @@ -4779,13 +4775,14 @@ void setRoot(List<RelNode> inputs) {
}

/**
* Returns an expression with which to reference a from-list item.
* Returns an expression with which to reference a from-list item;
* throws if not found.
*
* @param qualified the alias of the from item
* @return a {@link RexFieldAccess} or {@link RexRangeRef}, or null if
* not found
* @param qualified The alias of the FROM item
* @return a {@link RexFieldAccess} or {@link RexRangeRef}, never null
*/
@Nullable Pair<RexNode, @Nullable Map<String, Integer>> lookupExp(SqlQualified qualified) {
Pair<RexNode, @Nullable BiFunction<RexNode, String, RexNode>> lookupExp(
SqlQualified qualified) {
if (nameToNodeMap != null && qualified.prefixLength == 1) {
RexNode node = nameToNodeMap.get(qualified.identifier.names.get(0));
if (node == null) {
Expand All @@ -4799,8 +4796,9 @@ void setRoot(List<RelNode> inputs) {
final SqlValidatorScope.ResolvedImpl resolved =
new SqlValidatorScope.ResolvedImpl();
scope().resolve(qualified.prefix(), nameMatcher, false, resolved);
if (!(resolved.count() == 1)) {
return null;
if (resolved.count() != 1) {
throw new AssertionError("no unique expression found for " + qualified
+ "; count is " + resolved.count());
}
final SqlValidatorScope.Resolve resolve = resolved.only();
final RelDataType rowType = resolve.rowType();
Expand All @@ -4814,18 +4812,13 @@ void setRoot(List<RelNode> inputs) {
final LookupContext rels =
new LookupContext(this, inputs, systemFieldList.size());
final RexNode node = lookup(resolve.path.steps().get(0).i, rels);
if (node == null) {
return null;
} else {
final Map<String, Integer> fieldOffsets = new HashMap<>();
for (RelDataTypeField f : resolve.rowType().getFieldList()) {
if (!fieldOffsets.containsKey(f.getName())) {
fieldOffsets.put(f.getName(), f.getIndex());
}
}
final Map<String, Integer> map = ImmutableMap.copyOf(fieldOffsets);
return Pair.of(node, map);
}
assert node != null;
return Pair.of(node, (e, fieldName) -> {
final RelDataTypeField field =
requireNonNull(rowType.getField(fieldName, true, false),
() -> "field " + fieldName);
return rexBuilder.makeFieldAccess(e, field.getIndex());
});
} else {
// We're referencing a relational expression which has not been
// converted yet. This occurs when from items are correlated,
Expand Down Expand Up @@ -4857,7 +4850,11 @@ void setRoot(List<RelNode> inputs) {
}
final RexNode c =
rexBuilder.makeCorrel(builder.uniquify().build(), correlId);
return Pair.of(c, fields.build());
final ImmutableMap<String, Integer> fieldMap = fields.build();
return Pair.of(c, (e, fieldName) -> {
final int j = requireNonNull(fieldMap.get(fieldName), "field " + fieldName);
return rexBuilder.makeFieldAccess(e, j);
});
}
}
}
Expand Down
16 changes: 7 additions & 9 deletions ubenchmark/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,27 +28,23 @@ Calcite artifacts. (Besides, jmh's license does not allow that.)

To run all benchmarks:

{noformat}bash
$ cd calcite
$ ./gradlew :ubenchmark:jmh
{noformat}
cd calcite
./gradlew :ubenchmark:jmh

## Running one benchmark from the command line

To run just one benchmark, modify `ubenchmark/build.gradle.kts` and add the
following task:

{noformat}kotlin
```kotlin
jmh {
include = listOf("removeAllVertices.*Benchmark")
}
{noformat}
```

and run

{noformat}bash
$ ./gradlew :ubenchmark:jmh
{noformat}
./gradlew :ubenchmark:jmh

as before. In this case, `removeAllVertices.*Benchmark` is a
regular expression that matches a few methods -- benchmarks -- in
Expand All @@ -74,3 +70,5 @@ case and link them here:
[3836](https://issues.apache.org/jira/browse/CALCITE-3836)
* ReflectVisitorDispatcherTest:
[3873](https://issues.apache.org/jira/browse/CALCITE-3873)
* RelNodeConversionBenchmark:
[4994](https://issues.apache.org/jira/browse/CALCITE-4994)
Loading

0 comments on commit bb89b92

Please sign in to comment.