Skip to content

Commit 04116c9

Browse files
committed
SQL: Remove CircuitBreaker from parser (#41835)
The CircuitBreaker was introduced as means of preventing a `StackOverflowException` during the build of the AST by the parser. The ANTLR4 grammar causes a weird behaviour for a Parser Listener. The `enterEveryRule()` method is often called with a different parsing context than the respective `exitEveryRule()`. This makes it difficult to keep track of the tree's depth, and a custom Map was used as an attempt of matching the contextes as they are encounter during `enter` and during `exit` of the rules. This approach had 2 important drawbacks: 1. It's hard to maintain this custom Map as the grammar changes. 2. The CircuitBreaker could often lead to false positives which caused valid queries to return an Exception and prevent them from executing. So, this removes completely the CircuitBreaker which is replaced be a simple handling of the `StackOverflowException` Fixes: #41471 (cherry picked from commit 1559a8e)
1 parent a22120d commit 04116c9

File tree

3 files changed

+60
-278
lines changed

3 files changed

+60
-278
lines changed

docs/reference/sql/limitations.asciidoc

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,14 @@
33
[[sql-limitations]]
44
== SQL Limitations
55

6+
[float]
7+
[[large-parsing-trees]]
8+
=== Large queries may throw `ParsingExpection`
9+
10+
Extremely large queries can consume too much memory during the parsing phase, in which case the {es-sql} engine will
11+
abort parsing and throw an error. In such cases, consider reducing the query to a smaller size by potentially
12+
simplifying it or splitting it into smaller queries.
13+
614
[float]
715
[[sys-columns-describe-table-nested-fields]]
816
=== Nested fields in `SYS COLUMNS` and `DESCRIBE TABLE`

x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/parser/SqlParser.java

Lines changed: 29 additions & 124 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
*/
66
package org.elasticsearch.xpack.sql.parser;
77

8-
import com.carrotsearch.hppc.ObjectShortHashMap;
98
import org.antlr.v4.runtime.BaseErrorListener;
109
import org.antlr.v4.runtime.CharStream;
1110
import org.antlr.v4.runtime.CommonToken;
@@ -26,16 +25,6 @@
2625
import org.apache.logging.log4j.LogManager;
2726
import org.apache.logging.log4j.Logger;
2827
import org.elasticsearch.xpack.sql.expression.Expression;
29-
import org.elasticsearch.xpack.sql.parser.SqlBaseParser.BackQuotedIdentifierContext;
30-
import org.elasticsearch.xpack.sql.parser.SqlBaseParser.BooleanDefaultContext;
31-
import org.elasticsearch.xpack.sql.parser.SqlBaseParser.BooleanExpressionContext;
32-
import org.elasticsearch.xpack.sql.parser.SqlBaseParser.PrimaryExpressionContext;
33-
import org.elasticsearch.xpack.sql.parser.SqlBaseParser.QueryPrimaryDefaultContext;
34-
import org.elasticsearch.xpack.sql.parser.SqlBaseParser.QueryTermContext;
35-
import org.elasticsearch.xpack.sql.parser.SqlBaseParser.QuoteIdentifierContext;
36-
import org.elasticsearch.xpack.sql.parser.SqlBaseParser.StatementContext;
37-
import org.elasticsearch.xpack.sql.parser.SqlBaseParser.StatementDefaultContext;
38-
import org.elasticsearch.xpack.sql.parser.SqlBaseParser.UnquoteIdentifierContext;
3928
import org.elasticsearch.xpack.sql.plan.logical.LogicalPlan;
4029
import org.elasticsearch.xpack.sql.proto.SqlTypedParamValue;
4130

@@ -50,7 +39,6 @@
5039
import java.util.function.Function;
5140

5241
import static java.lang.String.format;
53-
import static org.elasticsearch.xpack.sql.parser.AbstractBuilder.source;
5442

5543
public class SqlParser {
5644

@@ -100,45 +88,49 @@ private <T> T invokeParser(String sql,
10088
List<SqlTypedParamValue> params, Function<SqlBaseParser,
10189
ParserRuleContext> parseFunction,
10290
BiFunction<AstBuilder, ParserRuleContext, T> visitor) {
103-
SqlBaseLexer lexer = new SqlBaseLexer(new CaseInsensitiveStream(sql));
91+
try {
92+
SqlBaseLexer lexer = new SqlBaseLexer(new CaseInsensitiveStream(sql));
10493

105-
lexer.removeErrorListeners();
106-
lexer.addErrorListener(ERROR_LISTENER);
94+
lexer.removeErrorListeners();
95+
lexer.addErrorListener(ERROR_LISTENER);
10796

108-
Map<Token, SqlTypedParamValue> paramTokens = new HashMap<>();
109-
TokenSource tokenSource = new ParametrizedTokenSource(lexer, paramTokens, params);
97+
Map<Token, SqlTypedParamValue> paramTokens = new HashMap<>();
98+
TokenSource tokenSource = new ParametrizedTokenSource(lexer, paramTokens, params);
11099

111-
CommonTokenStream tokenStream = new CommonTokenStream(tokenSource);
112-
SqlBaseParser parser = new SqlBaseParser(tokenStream);
100+
CommonTokenStream tokenStream = new CommonTokenStream(tokenSource);
101+
SqlBaseParser parser = new SqlBaseParser(tokenStream);
113102

114-
parser.addParseListener(new CircuitBreakerListener());
115-
parser.addParseListener(new PostProcessor(Arrays.asList(parser.getRuleNames())));
103+
parser.addParseListener(new PostProcessor(Arrays.asList(parser.getRuleNames())));
116104

117-
parser.removeErrorListeners();
118-
parser.addErrorListener(ERROR_LISTENER);
105+
parser.removeErrorListeners();
106+
parser.addErrorListener(ERROR_LISTENER);
119107

120-
parser.getInterpreter().setPredictionMode(PredictionMode.SLL);
108+
parser.getInterpreter().setPredictionMode(PredictionMode.SLL);
121109

122-
if (DEBUG) {
123-
debug(parser);
124-
tokenStream.fill();
110+
if (DEBUG) {
111+
debug(parser);
112+
tokenStream.fill();
125113

126-
for (Token t : tokenStream.getTokens()) {
127-
String symbolicName = SqlBaseLexer.VOCABULARY.getSymbolicName(t.getType());
128-
String literalName = SqlBaseLexer.VOCABULARY.getLiteralName(t.getType());
129-
log.info(format(Locale.ROOT, " %-15s '%s'",
114+
for (Token t : tokenStream.getTokens()) {
115+
String symbolicName = SqlBaseLexer.VOCABULARY.getSymbolicName(t.getType());
116+
String literalName = SqlBaseLexer.VOCABULARY.getLiteralName(t.getType());
117+
log.info(format(Locale.ROOT, " %-15s '%s'",
130118
symbolicName == null ? literalName : symbolicName,
131119
t.getText()));
120+
}
132121
}
133-
}
134122

135-
ParserRuleContext tree = parseFunction.apply(parser);
123+
ParserRuleContext tree = parseFunction.apply(parser);
136124

137-
if (DEBUG) {
138-
log.info("Parse tree {} " + tree.toStringTree());
139-
}
125+
if (DEBUG) {
126+
log.info("Parse tree {} " + tree.toStringTree());
127+
}
140128

141-
return visitor.apply(new AstBuilder(paramTokens), tree);
129+
return visitor.apply(new AstBuilder(paramTokens), tree);
130+
} catch (StackOverflowError e) {
131+
throw new ParsingException("SQL statement is too large, " +
132+
"causing stack overflow when generating the parsing tree: [{}]", sql);
133+
}
142134
}
143135

144136
private static void debug(SqlBaseParser parser) {
@@ -221,93 +213,6 @@ public void exitNonReserved(SqlBaseParser.NonReservedContext context) {
221213
}
222214
}
223215

224-
/**
225-
* Used to catch large expressions that can lead to stack overflows
226-
*/
227-
static class CircuitBreakerListener extends SqlBaseBaseListener {
228-
229-
private static final short MAX_RULE_DEPTH = 200;
230-
231-
/**
232-
* Due to the structure of the grammar and our custom handling in {@link ExpressionBuilder}
233-
* some expressions can exit with a different class than they entered:
234-
* e.g.: ValueExpressionContext can exit as ValueExpressionDefaultContext
235-
*/
236-
private static final Map<String, String> ENTER_EXIT_RULE_MAPPING = new HashMap<>();
237-
238-
static {
239-
ENTER_EXIT_RULE_MAPPING.put(StatementDefaultContext.class.getSimpleName(), StatementContext.class.getSimpleName());
240-
ENTER_EXIT_RULE_MAPPING.put(QueryPrimaryDefaultContext.class.getSimpleName(), QueryTermContext.class.getSimpleName());
241-
ENTER_EXIT_RULE_MAPPING.put(BooleanDefaultContext.class.getSimpleName(), BooleanExpressionContext.class.getSimpleName());
242-
}
243-
244-
private boolean insideIn = false;
245-
246-
// Keep current depth for every rule visited.
247-
// The totalDepth alone cannot be used as expressions like: e1 OR e2 OR e3 OR ...
248-
// are processed as e1 OR (e2 OR (e3 OR (... and this results in the totalDepth not growing
249-
// while the stack call depth is, leading to a StackOverflowError.
250-
private ObjectShortHashMap<String> depthCounts = new ObjectShortHashMap<>();
251-
252-
@Override
253-
public void enterEveryRule(ParserRuleContext ctx) {
254-
if (inDetected(ctx)) {
255-
insideIn = true;
256-
}
257-
258-
// Skip PrimaryExpressionContext for IN as it's not visited on exit due to
259-
// the grammar's peculiarity rule with "predicated" and "predicate".
260-
// Also skip the Identifiers as they are "cheap".
261-
if (ctx.getClass() != UnquoteIdentifierContext.class &&
262-
ctx.getClass() != QuoteIdentifierContext.class &&
263-
ctx.getClass() != BackQuotedIdentifierContext.class &&
264-
ctx.getClass() != SqlBaseParser.ConstantContext.class &&
265-
ctx.getClass() != SqlBaseParser.NumberContext.class &&
266-
ctx.getClass() != SqlBaseParser.ValueExpressionContext.class &&
267-
(insideIn == false || ctx.getClass() != PrimaryExpressionContext.class)) {
268-
269-
int currentDepth = depthCounts.putOrAdd(ctx.getClass().getSimpleName(), (short) 1, (short) 1);
270-
if (currentDepth > MAX_RULE_DEPTH) {
271-
throw new ParsingException(source(ctx), "SQL statement too large; " +
272-
"halt parsing to prevent memory errors (stopped at depth {})", MAX_RULE_DEPTH);
273-
}
274-
}
275-
super.enterEveryRule(ctx);
276-
}
277-
278-
@Override
279-
public void exitEveryRule(ParserRuleContext ctx) {
280-
if (inDetected(ctx)) {
281-
insideIn = false;
282-
}
283-
284-
decrementCounter(ctx);
285-
super.exitEveryRule(ctx);
286-
}
287-
288-
ObjectShortHashMap<String> depthCounts() {
289-
return depthCounts;
290-
}
291-
292-
private void decrementCounter(ParserRuleContext ctx) {
293-
String className = ctx.getClass().getSimpleName();
294-
String classNameToDecrement = ENTER_EXIT_RULE_MAPPING.getOrDefault(className, className);
295-
296-
// Avoid having negative numbers
297-
if (depthCounts.containsKey(classNameToDecrement)) {
298-
depthCounts.putOrAdd(classNameToDecrement, (short) 0, (short) -1);
299-
}
300-
}
301-
302-
private boolean inDetected(ParserRuleContext ctx) {
303-
if (ctx.getParent() != null && ctx.getParent().getClass() == SqlBaseParser.PredicateContext.class) {
304-
SqlBaseParser.PredicateContext pc = (SqlBaseParser.PredicateContext) ctx.getParent();
305-
return pc.kind != null && pc.kind.getType() == SqlBaseParser.IN;
306-
}
307-
return false;
308-
}
309-
}
310-
311216
private static final BaseErrorListener ERROR_LISTENER = new BaseErrorListener() {
312217
@Override
313218
public void syntaxError(Recognizer<?, ?> recognizer, Object offendingSymbol, int line,

0 commit comments

Comments
 (0)