@@ -135,9 +135,8 @@ def _split(self, data):
135
135
136
136
return data [:len1 ], binary , data [idx + 1 :]
137
137
138
- _whitespace_re = re .compile (br'[\0\t\r\014\n ]+' )
138
+ _whitespace_or_comment_re = re .compile (br'[\0\t\r\014\n ]+|%[^\r\n\v]* ' )
139
139
_token_re = re .compile (br'/{0,2}[^]\0\t\r\v\n ()<>{}/%[]+' )
140
- _comment_re = re .compile (br'%[^\r\n\v]*' )
141
140
_instring_re = re .compile (br'[()\\]' )
142
141
143
142
@classmethod
@@ -146,47 +145,52 @@ def _tokens(cls, text):
146
145
A PostScript tokenizer. Yield (token, value) pairs such as
147
146
(_TokenType.whitespace, ' ') or (_TokenType.name, '/Foobar').
148
147
"""
148
+ # Preload enum members for speed.
149
+ tok_whitespace = _TokenType .whitespace
150
+ tok_name = _TokenType .name
151
+ tok_string = _TokenType .string
152
+ tok_delimiter = _TokenType .delimiter
153
+ tok_number = _TokenType .number
149
154
pos = 0
150
155
while pos < len (text ):
151
- match = (cls ._comment_re .match (text [pos :]) or
152
- cls ._whitespace_re .match (text [pos :]))
156
+ match = cls ._whitespace_or_comment_re .match (text , pos )
153
157
if match :
154
- yield (_TokenType . whitespace , match .group ())
155
- pos + = match .end ()
156
- elif text [pos ] == b'(' :
158
+ yield (tok_whitespace , match .group ())
159
+ pos = match .end ()
160
+ elif text [pos : pos + 1 ] == b'(' :
157
161
start = pos
158
162
pos += 1
159
163
depth = 1
160
164
while depth :
161
- match = cls ._instring_re .search (text [ pos :] )
165
+ match = cls ._instring_re .search (text , pos )
162
166
if match is None :
163
167
return
164
- pos + = match .end ()
168
+ pos = match .end ()
165
169
if match .group () == b'(' :
166
170
depth += 1
167
171
elif match .group () == b')' :
168
172
depth -= 1
169
173
else : # a backslash - skip the next character
170
174
pos += 1
171
- yield (_TokenType . string , text [start :pos ])
175
+ yield (tok_string , text [start :pos ])
172
176
elif text [pos :pos + 2 ] in (b'<<' , b'>>' ):
173
- yield (_TokenType . delimiter , text [pos :pos + 2 ])
177
+ yield (tok_delimiter , text [pos :pos + 2 ])
174
178
pos += 2
175
- elif text [pos ] == b'<' :
179
+ elif text [pos : pos + 1 ] == b'<' :
176
180
start = pos
177
- pos + = text [ pos :] .index (b'>' )
178
- yield (_TokenType . string , text [start :pos ])
181
+ pos = text .index (b'>' , pos )
182
+ yield (tok_string , text [start :pos ])
179
183
else :
180
- match = cls ._token_re .match (text [ pos :] )
184
+ match = cls ._token_re .match (text , pos )
181
185
if match :
182
186
try :
183
187
float (match .group ())
184
- yield (_TokenType . number , match .group ())
188
+ yield (tok_number , match .group ())
185
189
except ValueError :
186
- yield (_TokenType . name , match .group ())
187
- pos + = match .end ()
190
+ yield (tok_name , match .group ())
191
+ pos = match .end ()
188
192
else :
189
- yield (_TokenType . delimiter , text [pos :pos + 1 ])
193
+ yield (tok_delimiter , text [pos :pos + 1 ])
190
194
pos += 1
191
195
192
196
def _parse (self ):
@@ -195,28 +199,33 @@ def _parse(self):
195
199
of parsing is described in Chapter 10 "Adobe Type Manager
196
200
Compatibility" of the Type-1 spec.
197
201
"""
202
+ # Preload enum members for speed.
203
+ tok_whitespace = _TokenType .whitespace
204
+ tok_name = _TokenType .name
205
+ tok_string = _TokenType .string
206
+ tok_number = _TokenType .number
198
207
# Start with reasonable defaults
199
208
prop = {'weight' : 'Regular' , 'ItalicAngle' : 0.0 , 'isFixedPitch' : False ,
200
209
'UnderlinePosition' : - 100 , 'UnderlineThickness' : 50 }
201
210
filtered = ((token , value )
202
211
for token , value in self ._tokens (self .parts [0 ])
203
- if token is not _TokenType . whitespace )
212
+ if token is not tok_whitespace )
204
213
# The spec calls this an ASCII format; in Python 2.x we could
205
214
# just treat the strings and names as opaque bytes but let's
206
215
# turn them into proper Unicode, and be lenient in case of high bytes.
207
216
def convert (x ): return x .decode ('ascii' , 'replace' )
208
217
for token , value in filtered :
209
- if token is _TokenType . name and value .startswith (b'/' ):
218
+ if token is tok_name and value .startswith (b'/' ):
210
219
key = convert (value [1 :])
211
220
token , value = next (filtered )
212
- if token is _TokenType . name :
221
+ if token is tok_name :
213
222
if value in (b'true' , b'false' ):
214
223
value = value == b'true'
215
224
else :
216
225
value = convert (value .lstrip (b'/' ))
217
- elif token is _TokenType . string :
226
+ elif token is tok_string :
218
227
value = convert (value .lstrip (b'(' ).rstrip (b')' ))
219
- elif token is _TokenType . number :
228
+ elif token is tok_number :
220
229
if b'.' in value :
221
230
value = float (value )
222
231
else :
@@ -242,6 +251,9 @@ def convert(x): return x.decode('ascii', 'replace')
242
251
243
252
@classmethod
244
253
def _transformer (cls , tokens , slant , extend ):
254
+ tok_whitespace = _TokenType .whitespace
255
+ tok_name = _TokenType .name
256
+
245
257
def fontname (name ):
246
258
result = name
247
259
if slant :
@@ -277,7 +289,7 @@ def replacer(tokens):
277
289
token , value = next (tokens ) # name, e.g., /FontMatrix
278
290
yield value
279
291
token , value = next (tokens ) # possible whitespace
280
- while token is _TokenType . whitespace :
292
+ while token is tok_whitespace :
281
293
yield value
282
294
token , value = next (tokens )
283
295
if value != b'[' : # name/number/etc.
@@ -302,7 +314,7 @@ def suppress(tokens):
302
314
b'/UniqueID' : suppress }
303
315
304
316
for token , value in tokens :
305
- if token is _TokenType . name and value in table :
317
+ if token is tok_name and value in table :
306
318
yield from table [value ](
307
319
itertools .chain ([(token , value )], tokens ))
308
320
else :
0 commit comments