14
# other compatibility work.
14
# other compatibility work.
15
#
15
#
16
16
17
- # FIXME: <fl> formalize (objectify?) and document the compiler code
18
- # format, so that other frontends can use this compiler
19
-
20
import array , string , sys
17
import array , string , sys
21
18
22
import _sre
19
import _sre
@@ -45,64 +42,70 @@ def append(self, code):
45
self .data .append (code )
42
self .data .append (code )
46
def todata (self ):
43
def todata (self ):
47
# print self.data
44
# print self.data
48
- return array . array ( WORDSIZE , self . data ). tostring ()
45
+ try :
49
-
46
+ return array . array ( WORDSIZE , self . data ). tostring ()
50
- def _lower ( literal ) :
47
+ except OverflowError :
51
- # return _sre._lower(literal) # FIXME
48
+ print self . data
52
- return string . lower ( literal )
49
+ raise
53
50
54
- def _compile (code , pattern , flags ):
51
+ def _compile (code , pattern , flags , level = 0 ):
55
append = code .append
52
append = code .append
56
for op , av in pattern :
53
for op , av in pattern :
57
if op is ANY :
54
if op is ANY :
58
- if "s" in flags :
55
+ if flags & SRE_FLAG_DOTALL :
59
- append (CODES [op ]) # any character at all!
56
+ append (OPCODES [op ]) # any character at all!
60
else :
57
else :
61
- append (CODES [ NOT_LITERAL ])
58
+ append (OPCODES [ CATEGORY ])
62
- append (10 )
59
+ append (CHCODES [ CATEGORY_NOT_LINEBREAK ] )
63
elif op in (SUCCESS , FAILURE ):
60
elif op in (SUCCESS , FAILURE ):
64
- append (CODES [op ])
61
+ append (OPCODES [op ])
65
elif op is AT :
62
elif op is AT :
66
- append (CODES [op ])
63
+ append (OPCODES [op ])
67
- append (POSITIONS [av ])
64
+ if flags & SRE_FLAG_MULTILINE :
65
+ append (ATCODES [AT_MULTILINE [av ]])
66
+ else :
67
+ append (ATCODES [av ])
68
elif op is BRANCH :
68
elif op is BRANCH :
69
- append (CODES [op ])
69
+ append (OPCODES [op ])
70
tail = []
70
tail = []
71
for av in av [1 ]:
71
for av in av [1 ]:
72
skip = len (code ); append (0 )
72
skip = len (code ); append (0 )
73
- _compile (code , av , flags )
73
+ _compile (code , av , flags , level )
74
- append (CODES [JUMP ])
74
+ append (OPCODES [JUMP ])
75
tail .append (len (code )); append (0 )
75
tail .append (len (code )); append (0 )
76
code [skip ] = len (code ) - skip
76
code [skip ] = len (code ) - skip
77
append (0 ) # end of branch
77
append (0 ) # end of branch
78
for tail in tail :
78
for tail in tail :
79
code [tail ] = len (code ) - tail
79
code [tail ] = len (code ) - tail
80
elif op is CALL :
80
elif op is CALL :
81
- append (CODES [op ])
81
+ append (OPCODES [op ])
82
skip = len (code ); append (0 )
82
skip = len (code ); append (0 )
83
- _compile (code , av , flags )
83
+ _compile (code , av , flags , level + 1 )
84
- append (CODES [SUCCESS ])
84
+ append (OPCODES [SUCCESS ])
85
code [skip ] = len (code ) - skip
85
code [skip ] = len (code ) - skip
86
elif op is CATEGORY : # not used by current parser
86
elif op is CATEGORY : # not used by current parser
87
- append (CODES [op ])
87
+ append (OPCODES [op ])
88
- append (CATEGORIES [av ])
88
+ if flags & SRE_FLAG_LOCALE :
89
+ append (CH_LOCALE [CHCODES [av ]])
90
+ else :
91
+ append (CHCODES [av ])
89
elif op is GROUP :
92
elif op is GROUP :
90
- if "i" in flags :
93
+ if flags & SRE_FLAG_IGNORECASE :
91
- append (CODES [ MAP_IGNORE [op ]])
94
+ append (OPCODES [ OP_IGNORE [op ]])
92
else :
95
else :
93
- append (CODES [op ])
96
+ append (OPCODES [op ])
94
- append (av )
97
+ append (av - 1 )
95
elif op is IN :
98
elif op is IN :
96
- if "i" in flags :
99
+ if flags & SRE_FLAG_IGNORECASE :
97
- append (CODES [ MAP_IGNORE [op ]])
100
+ append (OPCODES [ OP_IGNORE [op ]])
98
def fixup (literal ):
101
def fixup (literal ):
99
- return ord (_lower ( literal ))
102
+ return ord (literal . lower ( ))
100
else :
103
else :
101
- append (CODES [op ])
104
+ append (OPCODES [op ])
102
fixup = ord
105
fixup = ord
103
skip = len (code ); append (0 )
106
skip = len (code ); append (0 )
104
for op , av in av :
107
for op , av in av :
105
- append (CODES [op ])
108
+ append (OPCODES [op ])
106
if op is NEGATE :
109
if op is NEGATE :
107
pass
110
pass
108
elif op is LITERAL :
111
elif op is LITERAL :
@@ -111,77 +114,80 @@ def fixup(literal):
111
append (fixup (av [0 ]))
114
append (fixup (av [0 ]))
112
append (fixup (av [1 ]))
115
append (fixup (av [1 ]))
113
elif op is CATEGORY :
116
elif op is CATEGORY :
114
- append (CATEGORIES [av ])
117
+ if flags & SRE_FLAG_LOCALE :
118
+ append (CH_LOCALE [CHCODES [av ]])
119
+ else :
120
+ append (CHCODES [av ])
115
else :
121
else :
116
raise ValueError , "unsupported set operator"
122
raise ValueError , "unsupported set operator"
117
- append (CODES [FAILURE ])
123
+ append (OPCODES [FAILURE ])
118
code [skip ] = len (code ) - skip
124
code [skip ] = len (code ) - skip
119
elif op in (LITERAL , NOT_LITERAL ):
125
elif op in (LITERAL , NOT_LITERAL ):
120
- if "i" in flags :
126
+ if flags & SRE_FLAG_IGNORECASE :
121
- append (CODES [ MAP_IGNORE [op ]])
127
+ append (OPCODES [ OP_IGNORE [op ]])
122
- append (ord (_lower ( av )))
128
+ append (ord (av . lower ( )))
123
else :
129
else :
124
- append (CODES [op ])
130
+ append (OPCODES [op ])
125
append (ord (av ))
131
append (ord (av ))
126
elif op is MARK :
132
elif op is MARK :
127
- append (CODES [op ])
133
+ append (OPCODES [op ])
128
append (av )
134
append (av )
129
elif op in (REPEAT , MIN_REPEAT , MAX_REPEAT ):
135
elif op in (REPEAT , MIN_REPEAT , MAX_REPEAT ):
130
lo , hi = av [2 ].getwidth ()
136
lo , hi = av [2 ].getwidth ()
131
if lo == 0 :
137
if lo == 0 :
132
raise SyntaxError , "cannot repeat zero-width items"
138
raise SyntaxError , "cannot repeat zero-width items"
133
if lo == hi == 1 and op is MAX_REPEAT :
139
if lo == hi == 1 and op is MAX_REPEAT :
134
- append (CODES [MAX_REPEAT_ONE ])
140
+ append (OPCODES [MAX_REPEAT_ONE ])
135
skip = len (code ); append (0 )
141
skip = len (code ); append (0 )
136
append (av [0 ])
142
append (av [0 ])
137
append (av [1 ])
143
append (av [1 ])
138
- _compile (code , av [2 ], flags )
144
+ _compile (code , av [2 ], flags , level + 1 )
139
- append (CODES [SUCCESS ])
145
+ append (OPCODES [SUCCESS ])
140
code [skip ] = len (code ) - skip
146
code [skip ] = len (code ) - skip
141
else :
147
else :
142
- append (CODES [op ])
148
+ append (OPCODES [op ])
143
skip = len (code ); append (0 )
149
skip = len (code ); append (0 )
144
append (av [0 ])
150
append (av [0 ])
145
append (av [1 ])
151
append (av [1 ])
146
- _compile (code , av [2 ], flags )
152
+ _compile (code , av [2 ], flags , level + 1 )
147
if op is MIN_REPEAT :
153
if op is MIN_REPEAT :
148
- append (CODES [MIN_UNTIL ])
154
+ append (OPCODES [MIN_UNTIL ])
149
else :
155
else :
150
- # FIXME: MAX_REPEAT PROBABLY DOESN'T WORK (?)
156
+ append (OPCODES [MAX_UNTIL ])
151
- append (CODES [MAX_UNTIL ])
152
code [skip ] = len (code ) - skip
157
code [skip ] = len (code ) - skip
153
elif op is SUBPATTERN :
158
elif op is SUBPATTERN :
154
- ## group = av[0]
159
+ group = av [0 ]
155
- ## if group:
160
+ if group :
156
- ## append(CODES [MARK])
161
+ append (OPCODES [MARK ])
157
- ## append((group-1)*2)
162
+ append ((group - 1 )* 2 )
158
- _compile (code , av [1 ], flags )
163
+ _compile (code , av [1 ], flags , level + 1 )
159
- ## if group:
164
+ if group :
160
- ## append(CODES [MARK])
165
+ append (OPCODES [MARK ])
161
- ## append((group-1)*2+1)
166
+ append ((group - 1 )* 2 + 1 )
162
else :
167
else :
163
raise ValueError , ("unsupported operand type" , op )
168
raise ValueError , ("unsupported operand type" , op )
164
169
165
- def compile (p , flags = () ):
170
+ def compile (p , flags = 0 ):
166
# convert pattern list to internal format
171
# convert pattern list to internal format
167
if type (p ) in (type ("" ), type (u"" )):
172
if type (p ) in (type ("" ), type (u"" )):
168
import sre_parse
173
import sre_parse
169
pattern = p
174
pattern = p
170
p = sre_parse .parse (p )
175
p = sre_parse .parse (p )
171
else :
176
else :
172
pattern = None
177
pattern = None
173
- # print p.getwidth()
178
+ flags = p .pattern .flags | flags
174
- # print p
175
code = Code ()
179
code = Code ()
176
- _compile (code , p .data , p .pattern .flags )
180
+ _compile (code , p .data , flags )
177
- code .append (CODES [SUCCESS ])
181
+ code .append (OPCODES [SUCCESS ])
178
- # print list(code.data)
179
data = code .todata ()
182
data = code .todata ()
180
if 0 : # debugging
183
if 0 : # debugging
181
print
184
print
182
print "-" * 68
185
print "-" * 68
183
import sre_disasm
186
import sre_disasm
184
sre_disasm .disasm (data )
187
sre_disasm .disasm (data )
185
print "-" * 68
188
print "-" * 68
186
- # print len(data), p.pattern.groups, len(p.pattern.groupdict)
189
+ return _sre .compile (
187
- return _sre .compile (pattern , data , p .pattern .groups - 1 , p .pattern .groupdict )
190
+ pattern , flags ,
191
+ data ,
192
+ p .pattern .groups - 1 , p .pattern .groupdict
193
+ )
0 commit comments