Skip to content

Commit 3ce2b46

Browse files
committed
python parser functional!
1 parent 3b34a8a commit 3ce2b46

File tree

3 files changed

+149
-28
lines changed

3 files changed

+149
-28
lines changed

.gitignore

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,7 @@
22
/build/
33
/dist/
44
/build.number
5-
/src/info/sansgills/mode/python/preproc/*.java # generated files
6-
/src/info/sansgills/mode/python/preproc/*.tokens # generated files
5+
/generated/*
76

87
# Java things #
98
*.class

build.xml

Lines changed: 32 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
<!-- You'll want to change these to compile. -->
77
<property name="processing.base" location="C:\Dev\processing-2.0.1-windows64\processing-2.0.1" /> <!-- The directory that you unpacked Processing into. -->
88
<property name="processing.sketchfolder" location="C:\Dev\Processing" /> <!-- Your sketchbook folder. -->
9-
<property name="java.target.bootclasspath" location="C:\Program Files\Java\jdk1.6.0_45\jre\lib\rt.jar" /> <!-- If you're using a newer jdk -->
9+
<property name="java.target.bootclasspath" location="C:\Program Files\Java\jdk1.6.0_38\jre\lib\rt.jar" /> <!-- If you're using a newer jdk -->
1010

1111
<!-- Leave these be. -->
1212
<description>Python Mode for Processing 2.0.</description>
@@ -17,13 +17,14 @@
1717
<property name="java.target.version" value="1.6" />
1818

1919
<property name="src" value="src" />
20+
<property name="gen" value="generated" />
2021
<property name="build" value="build" />
21-
<property name="bin" value="bin" />
2222
<property name="dist" value="dist" />
2323
<property name="releasedir" value="release" />
2424

2525
<property name="lib.path" value="info/sansgills/mode/python" />
2626
<property name="wrap.path" value="info/sansgills/mode/python/wrapper" />
27+
<property name="preproc.path" value="info/sansgills/mode/python/preproc" />
2728

2829
<path id="library-classpath">
2930
<fileset dir="${processing.base}/core/library/" > <!-- processing core -->
@@ -39,6 +40,10 @@
3940
</fileset>
4041
</path>
4142

43+
<condition property="gen.present">
44+
<available file="${gen}" />
45+
</condition>
46+
4247
<!-- - - - - - - - - - - - - - - - - - - - - - -
4348
GET
4449
- - - - - - - - - - - - - - - - - - - - - - - -->
@@ -62,21 +67,40 @@
6267
verbose="true" /> <!-- download latest ANTLR compiler (to compile our grammars; not included) -->
6368
</target>
6469

70+
71+
72+
<target name="generate" depends="get" unless="gen.present">
73+
<mkdir dir="${gen}" />
74+
<java jar="tool/antlr-4.1-complete.jar" fork="true">
75+
<arg value="-no-listener" />
76+
<arg value="-visitor" />
77+
<arg value="-lib" />
78+
<arg value="${basedir}/${src}/${preproc.path}" />
79+
<arg value="-o" />
80+
<arg value="${basedir}/${gen}/${preproc.path}" />
81+
<arg value="${basedir}/${src}/${preproc.path}/PyPde.g4" />
82+
</java>
83+
</target>
84+
85+
6586
<!-- - - - - - - - - - - - - - - - - - - - - - -
6687
BUILD
6788
- - - - - - - - - - - - - - - - - - - - - - - -->
68-
<target name="build" depends="get">
89+
<target name="build" depends="get,generate">
6990
<propertyfile file="build.number" /> <!-- create the build.number file if it doesn't exist -->
7091
<buildnumber file="build.number" />
7192

7293
<mkdir dir="${build}" />
7394

74-
<javac srcdir="${src}" destdir="${build}" source="${java.target.version}" target="${java.target.version}" bootclasspath="${java.target.bootclasspath}" includeantruntime="false" debug="true">
95+
<javac destdir="${build}" source="${java.target.version}" target="${java.target.version}" bootclasspath="${java.target.bootclasspath}" includeantruntime="false" debug="true">
96+
<src path="${src}" />
97+
<src path="${gen}" />
7598
<classpath>
7699
<path refid="library-classpath"/>
77100
</classpath>
78101
</javac>
79102

103+
80104
<copy file="${src}/${wrap.path}/prepend.py" tofile="${build}/${wrap.path}/prepend.py" />
81105
<copy file="${src}/${wrap.path}/scrub.py" tofile="${build}/${wrap.path}/scrub.py" />
82106

@@ -135,9 +159,10 @@
135159
<target name="clean" >
136160
<delete dir="${build}" />
137161
<delete dir="${dist}" />
138-
<delete>
139-
<fileset dir="${src}/info/sansgills/mode/python/preproc" includes="*.java *.tokens" /> <!-- remove generated files -->
140-
</delete>
162+
</target>
163+
164+
<target name="cleangen" >
165+
<delete dir="${gen}" />
141166
</target>
142167

143168
</project>

src/info/sansgills/mode/python/preproc/PyPde.g4

Lines changed: 116 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,109 @@
33
* and open the template in the editor.
44
*/
55

6-
grammar PyPde2;
6+
grammar PyPde;
7+
8+
@parser::header {
9+
package info.sansgills.mode.python.preproc;
10+
}
11+
12+
@lexer::header {
13+
package info.sansgills.mode.python.preproc;
14+
15+
import java.util.Stack;
16+
import java.util.Deque;
17+
import java.util.ArrayDeque;
18+
import java.lang.Integer;
19+
import org.antlr.v4.runtime.CommonToken;
20+
import org.antlr.v4.runtime.misc.Interval;
21+
}
722

823
//some black magic
924
@lexer::members {
1025
int bracket_nesting_level = 0;
1126
int parenth_nesting_level = 0;
1227
int curlybr_nesting_level = 0;
28+
29+
Stack<Integer> whitespace_stack = new Stack<Integer>();
1330
14-
int previous_indent_level = 0;
15-
int current_indent_level = 0;
31+
Deque<Token> pendingTokens = new ArrayDeque<Token>();
32+
33+
@Override
34+
public Token nextToken() {
35+
if(whitespace_stack.empty()){
36+
whitespace_stack.push(new Integer(0));
37+
}
38+
39+
while(pendingTokens.isEmpty()){
40+
Token token = super.nextToken();
41+
switch(token.getType()){
42+
case EOF: //EOF; dedent until we're at the bottom of the stack and then pop off a newline
43+
System.out.println("EOF");
44+
if(whitespace_stack.peek().intValue() != 0){
45+
while(whitespace_stack.peek().intValue() != 0){
46+
whitespace_stack.pop();
47+
pendingTokens.add(new CommonToken(DEDENT, "DE"));
48+
}
49+
pendingTokens.add(new CommonToken(NEWLINE, "\n"));
50+
}
51+
pendingTokens.add(new CommonToken(EOF, "<EOF>"));
52+
break;
53+
case NEWLINE:
54+
System.out.println("newline");
55+
pendingTokens.add(token);
56+
//dynamically generate *DENT tokens
57+
int next = 0; //level of indentation on next line
58+
int current_position = token.getText().length();
59+
String followingText = _input.getText(new Interval(_tokenStartCharIndex, _tokenStartCharIndex+32));
60+
boolean empty_line = false;
61+
while(true){
62+
if(current_position >= followingText.length()){
63+
followingText = _input.getText(new Interval(_tokenStartCharIndex, _tokenStartCharIndex+current_position+32));
64+
if(current_position >= followingText.length()){
65+
//we've reached the end of the text
66+
break;
67+
}
68+
}
69+
if(followingText.charAt(current_position) == ' '){
70+
next++;
71+
}else if(followingText.charAt(current_position) == '\t'){
72+
next+=8;
73+
next-=next%8;
74+
}else if(followingText.charAt(current_position) == '\r' || followingText.charAt(current_position) == '\n'){
75+
empty_line = true;
76+
break;
77+
}else{
78+
break;
79+
}
80+
current_position++;
81+
}
82+
if(empty_line){
83+
System.out.println("empty line");
84+
break;
85+
}
86+
87+
//next now matches the amount of whitespace beginning the next line
88+
int cur = whitespace_stack.peek().intValue();
89+
System.out.println("next line: "+next+" current line: "+cur);
90+
if(next > cur){
91+
whitespace_stack.push(new Integer(next));
92+
pendingTokens.add(new CommonToken(INDENT, "IN"));
93+
System.out.println("indenting");
94+
}else if(next < cur){
95+
while(next < whitespace_stack.peek().intValue()){
96+
whitespace_stack.pop();
97+
pendingTokens.add(new CommonToken(DEDENT, "DE"));
98+
System.out.println("dedenting");
99+
}
100+
}
101+
break;
102+
default:
103+
pendingTokens.add(token);
104+
break;
105+
}
106+
}
107+
return pendingTokens.poll();
108+
}
16109
}
17110

18111

@@ -82,27 +175,31 @@ TRY : 'try';
82175

83176
IDENTIFIER: [a-zA-Z_] [a-zA-Z0-9_]*;
84177

178+
COMMENT: '#' (~'\n')* -> skip; //no need to keep these
179+
85180
//logical vs. physical line nonsense
86181
LPAREN: '(' {parenth_nesting_level++;};
87182
RPAREN: ')' {parenth_nesting_level--;};
88183
LCURLY: '{' {curlybr_nesting_level++;};
89184
RCURLY: '}' {curlybr_nesting_level--;};
90185
LBRACKET: '[' {bracket_nesting_level++;};
91-
RBRACKET: '[' {bracket_nesting_level--;};
186+
RBRACKET: ']' {bracket_nesting_level--;};
92187

93188
//we're inside a grouping, newlines don't count
94189
IMPLICIT_ESCAPE_NEWLINE: {parenth_nesting_level > 0 ||
95190
curlybr_nesting_level > 0 ||
96191
bracket_nesting_level > 0}? ('\n' | '\r\n') -> skip;
97-
EXPLICIT_ESCAPE_NEWLINE: ('\\\n' | '\\\r\n') -> skip; //explicit line joins
192+
EXPLICIT_ESCAPE_NEWLINE: '\\' '\r'? '\n' -> skip; //explicit line joins
98193

99194

100-
NEWLINE: ('\n' | '\r\n') {current_indent_level = 0;}; //above not true; these newlines count
195+
NEWLINE: '\r'?'\n'; //above not true; these newlines count
101196

102-
COMMENT: '#' (~'\n')* -> skip; //no need to keep these
197+
//never generate these automatically
198+
INDENT: {false}? 'IN';
199+
DEDENT: {false}? 'DE';
103200

201+
WS: [ \t]+ -> skip;
104202

105-
WS: ' '+ -> skip;
106203

107204
// onto the parser
108205
// atomic values: identifiers and literals (including enclodure literals
@@ -114,38 +211,38 @@ enclosure : parenth_form | generator_expression
114211
| list_display | dict_display | set_display
115212
| string_conversion | yield_atom;
116213

117-
parenth_form: '(' expression_list ')'; //tuples
118-
214+
parenth_form: '(' expression_list ')'; //tuples
215+
119216
generator_expression: '(' expression comp_for ')';
120217

121218
list_display : '[' (expression_list | list_comprehension)? ']'; //lists; lotsa backwards compatibility cruft tho
122219
list_comprehension : expression list_for;
123-
list_for : FOR target_list IN old_expression_list (list_iter)?;
220+
list_for : FOR target_list IN old_expression_list (list_iter)?;
124221
old_expression_list : old_expression ((',' old_expression)+ ','?)?;
125222
old_expression : or_test | old_lambda_form;
126223
list_iter : list_for | list_if;
127224
list_if : IF old_expression list_iter?;
128225

129226
dict_display : '{' (key_datum_list | dict_comprehension)? '}'; //dictionary
130227
key_datum_list : key_datum (',' key_datum)* ','?;
131-
key_datum : expression ':' expression;
228+
key_datum : expression ':' expression;
132229
dict_comprehension : expression ':' expression comp_for;
133230

134231
set_display : '{' (expression_list | comprehension) '}'; //set
135232

136233
comprehension : expression comp_for; //elements of set and dictionary comprehensions
137-
comp_for : FOR target_list IN or_test comp_iter?;
234+
comp_for : FOR target_list IN or_test comp_iter?;
138235
comp_iter : comp_for | comp_if;
139236
comp_if : IF expression comp_iter?;
140237

141238
string_conversion : '`' expression_list '`';
142-
239+
143240
yield_atom : '(' yield_expression ')';
144241
yield_expression : YIELD expression_list?;
145242

146243

147244
// primaries: most tightly bound operations
148-
245+
149246
primary : primary '.' identifier # AttributeRef
150247
| primary '[' expression_list ']' # Subscription
151248
| primary '[' short_slice ']' # ShortSlicing
@@ -170,13 +267,13 @@ long_slice : short_slice ':' (expression)?;
170267
//call : primary '(' (argument_list ','? | expression comp_for)? ')';
171268

172269

173-
argument_list : positional_arguments (',' keyword_arguments)?
270+
argument_list : positional_arguments (',' keyword_arguments)?
174271
(',' '*' expression)? (',' keyword_arguments)?
175272
(',' '**' expression)?
176-
| keyword_arguments (',' '*' expression)?
273+
| keyword_arguments (',' '*' expression)?
177274
(',' '**' expression)?
178275
| '*' expression (',' '*' expression)? (',' '**' expression)?
179-
| '**' expression;
276+
| '**' expression;
180277
positional_arguments : expression (',' expression)*;
181278
keyword_arguments : keyword_item (',' keyword_item)*;
182279
keyword_item : identifier '=' expression;
@@ -302,7 +399,7 @@ compound_stmt: if_stmt
302399
| classdef
303400
| decorated;
304401

305-
suite: stmt_list NEWLINE | NEWLINE INDENT statement+ DEDENT;
402+
suite: stmt_list NEWLINE | NEWLINE INDENT (statement | NEWLINE)+ DEDENT;
306403

307404
if_stmt: IF expression ':' suite
308405
(ELIF expression ':' suite)*

0 commit comments

Comments
 (0)