@@ -135,9 +135,8 @@ def _split(self, data):
135135
136136 return data [:len1 ], binary , data [idx + 1 :]
137137
138- _whitespace_re = re .compile (br'[\0\t\r\014\n ]+' )
138+ _whitespace_or_comment_re = re .compile (br'[\0\t\r\014\n ]+|%[^\r\n\v]* ' )
139139 _token_re = re .compile (br'/{0,2}[^]\0\t\r\v\n ()<>{}/%[]+' )
140- _comment_re = re .compile (br'%[^\r\n\v]*' )
141140 _instring_re = re .compile (br'[()\\]' )
142141
143142 @classmethod
@@ -146,47 +145,52 @@ def _tokens(cls, text):
146145 A PostScript tokenizer. Yield (token, value) pairs such as
147146 (_TokenType.whitespace, ' ') or (_TokenType.name, '/Foobar').
148147 """
148+ # Preload enum members for speed.
149+ tok_whitespace = _TokenType .whitespace
150+ tok_name = _TokenType .name
151+ tok_string = _TokenType .string
152+ tok_delimiter = _TokenType .delimiter
153+ tok_number = _TokenType .number
149154 pos = 0
150155 while pos < len (text ):
151- match = (cls ._comment_re .match (text [pos :]) or
152- cls ._whitespace_re .match (text [pos :]))
156+ match = cls ._whitespace_or_comment_re .match (text , pos )
153157 if match :
154- yield (_TokenType . whitespace , match .group ())
155- pos + = match .end ()
156- elif text [pos ] == b'(' :
158+ yield (tok_whitespace , match .group ())
159+ pos = match .end ()
160+ elif text [pos : pos + 1 ] == b'(' :
157161 start = pos
158162 pos += 1
159163 depth = 1
160164 while depth :
161- match = cls ._instring_re .search (text [ pos :] )
165+ match = cls ._instring_re .search (text , pos )
162166 if match is None :
163167 return
164- pos + = match .end ()
168+ pos = match .end ()
165169 if match .group () == b'(' :
166170 depth += 1
167171 elif match .group () == b')' :
168172 depth -= 1
169173 else : # a backslash - skip the next character
170174 pos += 1
171- yield (_TokenType . string , text [start :pos ])
175+ yield (tok_string , text [start :pos ])
172176 elif text [pos :pos + 2 ] in (b'<<' , b'>>' ):
173- yield (_TokenType . delimiter , text [pos :pos + 2 ])
177+ yield (tok_delimiter , text [pos :pos + 2 ])
174178 pos += 2
175- elif text [pos ] == b'<' :
179+ elif text [pos : pos + 1 ] == b'<' :
176180 start = pos
177- pos + = text [ pos :] .index (b'>' )
178- yield (_TokenType . string , text [start :pos ])
181+ pos = text .index (b'>' , pos )
182+ yield (tok_string , text [start :pos ])
179183 else :
180- match = cls ._token_re .match (text [ pos :] )
184+ match = cls ._token_re .match (text , pos )
181185 if match :
182186 try :
183187 float (match .group ())
184- yield (_TokenType . number , match .group ())
188+ yield (tok_number , match .group ())
185189 except ValueError :
186- yield (_TokenType . name , match .group ())
187- pos + = match .end ()
190+ yield (tok_name , match .group ())
191+ pos = match .end ()
188192 else :
189- yield (_TokenType . delimiter , text [pos :pos + 1 ])
193+ yield (tok_delimiter , text [pos :pos + 1 ])
190194 pos += 1
191195
192196 def _parse (self ):
@@ -195,28 +199,33 @@ def _parse(self):
195199 of parsing is described in Chapter 10 "Adobe Type Manager
196200 Compatibility" of the Type-1 spec.
197201 """
202+ # Preload enum members for speed.
203+ tok_whitespace = _TokenType .whitespace
204+ tok_name = _TokenType .name
205+ tok_string = _TokenType .string
206+ tok_number = _TokenType .number
198207 # Start with reasonable defaults
199208 prop = {'weight' : 'Regular' , 'ItalicAngle' : 0.0 , 'isFixedPitch' : False ,
200209 'UnderlinePosition' : - 100 , 'UnderlineThickness' : 50 }
201210 filtered = ((token , value )
202211 for token , value in self ._tokens (self .parts [0 ])
203- if token is not _TokenType . whitespace )
212+ if token is not tok_whitespace )
204213 # The spec calls this an ASCII format; in Python 2.x we could
205214 # just treat the strings and names as opaque bytes but let's
206215 # turn them into proper Unicode, and be lenient in case of high bytes.
207216 def convert (x ): return x .decode ('ascii' , 'replace' )
208217 for token , value in filtered :
209- if token is _TokenType . name and value .startswith (b'/' ):
218+ if token is tok_name and value .startswith (b'/' ):
210219 key = convert (value [1 :])
211220 token , value = next (filtered )
212- if token is _TokenType . name :
221+ if token is tok_name :
213222 if value in (b'true' , b'false' ):
214223 value = value == b'true'
215224 else :
216225 value = convert (value .lstrip (b'/' ))
217- elif token is _TokenType . string :
226+ elif token is tok_string :
218227 value = convert (value .lstrip (b'(' ).rstrip (b')' ))
219- elif token is _TokenType . number :
228+ elif token is tok_number :
220229 if b'.' in value :
221230 value = float (value )
222231 else :
@@ -242,6 +251,9 @@ def convert(x): return x.decode('ascii', 'replace')
242251
243252 @classmethod
244253 def _transformer (cls , tokens , slant , extend ):
254+ tok_whitespace = _TokenType .whitespace
255+ tok_name = _TokenType .name
256+
245257 def fontname (name ):
246258 result = name
247259 if slant :
@@ -277,7 +289,7 @@ def replacer(tokens):
277289 token , value = next (tokens ) # name, e.g., /FontMatrix
278290 yield value
279291 token , value = next (tokens ) # possible whitespace
280- while token is _TokenType . whitespace :
292+ while token is tok_whitespace :
281293 yield value
282294 token , value = next (tokens )
283295 if value != b'[' : # name/number/etc.
@@ -302,7 +314,7 @@ def suppress(tokens):
302314 b'/UniqueID' : suppress }
303315
304316 for token , value in tokens :
305- if token is _TokenType . name and value in table :
317+ if token is tok_name and value in table :
306318 yield from table [value ](
307319 itertools .chain ([(token , value )], tokens ))
308320 else :
0 commit comments