3
3
* and open the template in the editor.
4
4
*/
5
5
6
- grammar PyPde2;
6
+ grammar PyPde;
7
+
8
+ @parser::header {
9
+ package info.sansgills.mode.python.preproc;
10
+ }
11
+
12
+ @lexer::header {
13
+ package info.sansgills.mode.python.preproc;
14
+
15
+ import java.util.Stack;
16
+ import java.util.Deque;
17
+ import java.util.ArrayDeque;
18
+ import java.lang.Integer;
19
+ import org.antlr.v4.runtime.CommonToken;
20
+ import org.antlr.v4.runtime.misc.Interval;
21
+ }
7
22
8
23
// some black magic
9
24
@lexer::members {
10
25
int bracket_nesting_level = 0 ;
11
26
int parenth_nesting_level = 0 ;
12
27
int curlybr_nesting_level = 0 ;
28
+
29
+ Stack<Integer> whitespace_stack = new Stack<Integer>();
13
30
14
- int previous_indent_level = 0 ;
15
- int current_indent_level = 0 ;
31
+ Deque<Token> pendingTokens = new ArrayDeque<Token>();
32
+
33
+ @Override
34
+ public Token nextToken() {
35
+ if (whitespace_stack.empty()){
36
+ whitespace_stack.push(new Integer(0 ));
37
+ }
38
+
39
+ while(pendingTokens.isEmpty()){
40
+ Token token = super.nextToken();
41
+ switch (token.getType()){
42
+ case EOF: // EOF; dedent until we're at the bottom of the stack and then pop off a newline
43
+ System.out.println(" EOF" );
44
+ if (whitespace_stack.peek().intValue() != 0 ){
45
+ while (whitespace_stack.peek().intValue() != 0 ){
46
+ whitespace_stack.pop();
47
+ pendingTokens.add(new CommonToken(DEDENT, " DE" ));
48
+ }
49
+ pendingTokens.add(new CommonToken(NEWLINE , " \n " ));
50
+ }
51
+ pendingTokens.add(new CommonToken(EOF , " <EOF>" ));
52
+ break;
53
+ case NEWLINE :
54
+ System.out.println(" newline" );
55
+ pendingTokens.add(token);
56
+ // dynamically generate *DENT tokens
57
+ int next = 0; // level of indentation on next line
58
+ int current_position = token.getText().length();
59
+ String followingText = _input.getText(new Interval(_tokenStartCharIndex, _tokenStartCharIndex+32));
60
+ boolean empty_line = false;
61
+ while(true){
62
+ if (current_position >= followingText.length()){
63
+ followingText = _input.getText(new Interval(_tokenStartCharIndex, _tokenStartCharIndex+current_position+32 ));
64
+ if (current_position >= followingText.length()){
65
+ // we've reached the end of the text
66
+ break ;
67
+ }
68
+ }
69
+ if(followingText.charAt(current_position) == ' ' ){
70
+ next++;
71
+ } else if(followingText.charAt(current_position) == ' \t ' ){
72
+ next+=8 ;
73
+ next-=next%8 ;
74
+ } else if(followingText.charAt(current_position) == ' \r ' || followingText.charAt(current_position) == ' \n ' ){
75
+ empty_line = true ;
76
+ break ;
77
+ } else{
78
+ break ;
79
+ }
80
+ current_position++;
81
+ }
82
+ if(empty_line){
83
+ System.out.println(" empty line" );
84
+ break ;
85
+ }
86
+
87
+ // next now matches the amount of whitespace beginning the next line
88
+ int cur = whitespace_stack.peek().intValue();
89
+ System.out.println(" next line: " +next+" current line: " +cur);
90
+ if(next > cur){
91
+ whitespace_stack.push(new Integer(next));
92
+ pendingTokens.add(new CommonToken(INDENT, " IN" ));
93
+ System.out.println(" indenting" );
94
+ } else if(next < cur){
95
+ while (next < whitespace_stack.peek().intValue()){
96
+ whitespace_stack.pop();
97
+ pendingTokens.add(new CommonToken(DEDENT, " DE" ));
98
+ System.out.println(" dedenting" );
99
+ }
100
+ }
101
+ break;
102
+ default:
103
+ pendingTokens.add(token);
104
+ break;
105
+ }
106
+ }
107
+ return pendingTokens.poll();
108
+ }
16
109
}
17
110
18
111
@@ -82,27 +175,31 @@ TRY : 'try';
82
175
83
176
IDENTIFIER : [a-zA-Z_ ] [a-zA-Z0 -9_]*;
84
177
178
+ COMMENT : ' #' (~' \n ' )* -> skip; // no need to keep these
179
+
85
180
// logical vs. physical line nonsense
86
181
LPAREN : ' (' {parenth_nesting_level++;} ;
87
182
RPAREN : ' )' {parenth_nesting_level--;} ;
88
183
LCURLY : ' {' {curlybr_nesting_level++;} ;
89
184
RCURLY : ' }' {curlybr_nesting_level--;} ;
90
185
LBRACKET : ' [' {bracket_nesting_level++;} ;
91
- RBRACKET : ' [ ' {bracket_nesting_level--;} ;
186
+ RBRACKET : ' ] ' {bracket_nesting_level--;} ;
92
187
93
188
// we're inside a grouping, newlines don't count
94
189
IMPLICIT_ESCAPE_NEWLINE : {parenth_nesting_level > 0 ||
95
190
curlybr_nesting_level > 0 ||
96
191
bracket_nesting_level > 0 } ? (' \n ' | ' \r\n ' ) -> skip;
97
- EXPLICIT_ESCAPE_NEWLINE : ( ' \\\n ' | ' \\\r \n' ) -> skip; // explicit line joins
192
+ EXPLICIT_ESCAPE_NEWLINE : ' \\ ' ' \r ' ? ' \n ' -> skip; // explicit line joins
98
193
99
194
100
- NEWLINE : ( ' \n ' | ' \r\n ' ) {current_indent_level = 0 ;} ; // above not true; these newlines count
195
+ NEWLINE : ' \r ' ? ' \n ' ; // above not true; these newlines count
101
196
102
- COMMENT : ' #' (~' \n ' )* -> skip; // no need to keep these
197
+ // never generate these automatically
198
+ INDENT : {false } ? ' IN' ;
199
+ DEDENT : {false } ? ' DE' ;
103
200
201
+ WS : [ \t]+ -> skip;
104
202
105
- WS : ' ' + -> skip;
106
203
107
204
// onto the parser
108
205
// atomic values: identifiers and literals (including enclodure literals
@@ -114,38 +211,38 @@ enclosure : parenth_form | generator_expression
114
211
| list_display | dict_display | set_display
115
212
| string_conversion | yield_atom;
116
213
117
- parenth_form : ' (' expression_list ' )' ; // tuples
118
-
214
+ parenth_form : ' (' expression_list ' )' ; // tuples
215
+
119
216
generator_expression : ' (' expression comp_for ' )' ;
120
217
121
218
list_display : ' [' (expression_list | list_comprehension)? ' ]' ; // lists; lotsa backwards compatibility cruft tho
122
219
list_comprehension : expression list_for;
123
- list_for : FOR target_list IN old_expression_list (list_iter)?;
220
+ list_for : FOR target_list IN old_expression_list (list_iter)?;
124
221
old_expression_list : old_expression ((' ,' old_expression)+ ' ,' ?)?;
125
222
old_expression : or_test | old_lambda_form;
126
223
list_iter : list_for | list_if;
127
224
list_if : IF old_expression list_iter?;
128
225
129
226
dict_display : ' {' (key_datum_list | dict_comprehension)? ' }' ; // dictionary
130
227
key_datum_list : key_datum (' ,' key_datum)* ' ,' ?;
131
- key_datum : expression ' :' expression;
228
+ key_datum : expression ' :' expression;
132
229
dict_comprehension : expression ' :' expression comp_for;
133
230
134
231
set_display : ' {' (expression_list | comprehension) ' }' ; // set
135
232
136
233
comprehension : expression comp_for; // elements of set and dictionary comprehensions
137
- comp_for : FOR target_list IN or_test comp_iter?;
234
+ comp_for : FOR target_list IN or_test comp_iter?;
138
235
comp_iter : comp_for | comp_if;
139
236
comp_if : IF expression comp_iter?;
140
237
141
238
string_conversion : ' `' expression_list ' `' ;
142
-
239
+
143
240
yield_atom : ' (' yield_expression ' )' ;
144
241
yield_expression : YIELD expression_list?;
145
242
146
243
147
244
// primaries: most tightly bound operations
148
-
245
+
149
246
primary : primary ' .' identifier # AttributeRef
150
247
| primary ' [' expression_list ' ]' # Subscription
151
248
| primary ' [' short_slice ' ]' # ShortSlicing
@@ -170,13 +267,13 @@ long_slice : short_slice ':' (expression)?;
170
267
// call : primary '(' (argument_list ','? | expression comp_for)? ')';
171
268
172
269
173
- argument_list : positional_arguments (' ,' keyword_arguments)?
270
+ argument_list : positional_arguments (' ,' keyword_arguments)?
174
271
(' ,' ' *' expression)? (' ,' keyword_arguments)?
175
272
(' ,' ' **' expression)?
176
- | keyword_arguments (' ,' ' *' expression)?
273
+ | keyword_arguments (' ,' ' *' expression)?
177
274
(' ,' ' **' expression)?
178
275
| ' *' expression (' ,' ' *' expression)? (' ,' ' **' expression)?
179
- | ' **' expression;
276
+ | ' **' expression;
180
277
positional_arguments : expression (' ,' expression)*;
181
278
keyword_arguments : keyword_item (' ,' keyword_item)*;
182
279
keyword_item : identifier ' =' expression;
@@ -302,7 +399,7 @@ compound_stmt: if_stmt
302
399
| classdef
303
400
| decorated;
304
401
305
- suite : stmt_list NEWLINE | NEWLINE INDENT statement+ DEDENT ;
402
+ suite : stmt_list NEWLINE | NEWLINE INDENT ( statement | NEWLINE ) + DEDENT ;
306
403
307
404
if_stmt : IF expression ' :' suite
308
405
(ELIF expression ' :' suite)*
0 commit comments