Skip to content

Commit 17302a0

Browse files
committed
Painless: modify grammar to allow more statement delimiters (#29566)
This allows the grammar to determine when and what delimiters statements will use by splitting up the statements into regular statements and delimited statements, those that do not require a delimiter versus those that do. This allows consumers of the statements to determine what delimiters the statements will use so that in certain cases semicolons are not necessary like when there's a closing right bracket. This change removes the need for semicolon insertion in the lexer, simplifying the existing lexer quite a bit. It also ensures that there isn't a need to track semicolons being inserted into places that aren't necessary such as array initializers.
1 parent 4ceb102 commit 17302a0

File tree

8 files changed

+936
-889
lines changed

8 files changed

+936
-889
lines changed

modules/lang-painless/src/main/antlr/PainlessParser.g4

Lines changed: 24 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ parser grammar PainlessParser;
2222
options { tokenVocab=PainlessLexer; }
2323

2424
source
25-
: function* statement* EOF
25+
: function* statement* dstatement? EOF
2626
;
2727

2828
function
@@ -33,23 +33,31 @@ parameters
3333
: LP ( decltype ID ( COMMA decltype ID )* )? RP
3434
;
3535

36+
statement
37+
: rstatement
38+
| dstatement SEMICOLON
39+
;
40+
3641
// Note we use a predicate on the if/else case here to prevent the
3742
// "dangling-else" ambiguity by forcing the 'else' token to be consumed
3843
// as soon as one is found. See (https://en.wikipedia.org/wiki/Dangling_else).
39-
statement
44+
rstatement
4045
: IF LP expression RP trailer ( ELSE trailer | { _input.LA(1) != ELSE }? ) # if
4146
| WHILE LP expression RP ( trailer | empty ) # while
42-
| DO block WHILE LP expression RP delimiter # do
4347
| FOR LP initializer? SEMICOLON expression? SEMICOLON afterthought? RP ( trailer | empty ) # for
4448
| FOR LP decltype ID COLON expression RP trailer # each
4549
| FOR LP ID IN expression RP trailer # ineach
46-
| declaration delimiter # decl
47-
| CONTINUE delimiter # continue
48-
| BREAK delimiter # break
49-
| RETURN expression delimiter # return
5050
| TRY block trap+ # try
51-
| THROW expression delimiter # throw
52-
| expression delimiter # expr
51+
;
52+
53+
dstatement
54+
: DO block WHILE LP expression RP # do
55+
| declaration # decl
56+
| CONTINUE # continue
57+
| BREAK # break
58+
| RETURN expression # return
59+
| THROW expression # throw
60+
| expression # expr
5361
;
5462

5563
trailer
@@ -58,7 +66,7 @@ trailer
5866
;
5967

6068
block
61-
: LBRACK statement* RBRACK
69+
: LBRACK statement* dstatement? RBRACK
6270
;
6371

6472
empty
@@ -90,11 +98,6 @@ trap
9098
: CATCH LP TYPE ID RP block
9199
;
92100

93-
delimiter
94-
: SEMICOLON
95-
| EOF
96-
;
97-
98101
expression
99102
: unary # single
100103
| expression ( MUL | DIV | REM ) expression # binary
@@ -169,8 +172,8 @@ braceaccess
169172
;
170173

171174
arrayinitializer
172-
: NEW TYPE ( LBRACE expression RBRACE )+ ( postdot postfix* )? # newstandardarray
173-
| NEW TYPE LBRACE RBRACE LBRACK ( expression ( COMMA expression )* )? SEMICOLON? RBRACK postfix* # newinitializedarray
175+
: NEW TYPE ( LBRACE expression RBRACE )+ ( postdot postfix* )? # newstandardarray
176+
| NEW TYPE LBRACE RBRACE LBRACK ( expression ( COMMA expression )* )? RBRACK postfix* # newinitializedarray
174177
;
175178

176179
listinitializer
@@ -206,10 +209,8 @@ lamtype
206209
;
207210

208211
funcref
209-
: TYPE REF ID # classfuncref // reference to a static or instance method,
210-
// e.g. ArrayList::size or Integer::compare
211-
| decltype REF NEW # constructorfuncref // reference to a constructor, e.g. ArrayList::new
212-
| ID REF ID # capturingfuncref // reference to an instance method, e.g. object::toString
213-
// currently limited to capture of a simple variable (id).
214-
| THIS REF ID # localfuncref // reference to a local function, e.g. this::myfunc
212+
: TYPE REF ID # classfuncref
213+
| decltype REF NEW # constructorfuncref
214+
| ID REF ID # capturingfuncref
215+
| THIS REF ID # localfuncref
215216
;

modules/lang-painless/src/main/java/org/elasticsearch/painless/antlr/EnhancedPainlessLexer.java

Lines changed: 4 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -44,36 +44,18 @@ final class EnhancedPainlessLexer extends PainlessLexer {
4444
private final String sourceName;
4545
private final Definition definition;
4646

47-
private Token stashedNext = null;
48-
private Token previous = null;
47+
private Token current = null;
4948

5049
EnhancedPainlessLexer(CharStream charStream, String sourceName, Definition definition) {
5150
super(charStream);
5251
this.sourceName = sourceName;
5352
this.definition = definition;
5453
}
5554

56-
public Token getPreviousToken() {
57-
return previous;
58-
}
59-
6055
@Override
6156
public Token nextToken() {
62-
if (stashedNext != null) {
63-
previous = stashedNext;
64-
stashedNext = null;
65-
return previous;
66-
}
67-
Token next = super.nextToken();
68-
if (insertSemicolon(previous, next)) {
69-
stashedNext = next;
70-
previous = _factory.create(new Pair<TokenSource, CharStream>(this, _input), PainlessLexer.SEMICOLON, ";",
71-
Lexer.DEFAULT_TOKEN_CHANNEL, next.getStartIndex(), next.getStopIndex(), next.getLine(), next.getCharPositionInLine());
72-
return previous;
73-
} else {
74-
previous = next;
75-
return next;
76-
}
57+
current = super.nextToken();
58+
return current;
7759
}
7860

7961
@Override
@@ -101,7 +83,7 @@ protected boolean isSimpleType(String name) {
10183

10284
@Override
10385
protected boolean slashIsRegex() {
104-
Token lastToken = getPreviousToken();
86+
Token lastToken = current;
10587
if (lastToken == null) {
10688
return true;
10789
}
@@ -120,18 +102,4 @@ protected boolean slashIsRegex() {
120102
return true;
121103
}
122104
}
123-
124-
private static boolean insertSemicolon(Token previous, Token next) {
125-
if (previous == null || next.getType() != PainlessLexer.RBRACK) {
126-
return false;
127-
}
128-
switch (previous.getType()) {
129-
case PainlessLexer.RBRACK: // };} would be weird!
130-
case PainlessLexer.SEMICOLON: // already have a semicolon, no need to add one
131-
case PainlessLexer.LBRACK: // empty blocks don't need a semicolon
132-
return false;
133-
default:
134-
return true;
135-
}
136-
}
137105
}

0 commit comments

Comments
 (0)