Skip to content

Commit 21ed859

Browse files
authored
Merge pull request matplotlib#19564 from anntzer/t1f
Micro-optimize type1font loading
2 parents d1c7743 + 80d1c9b commit 21ed859

File tree

1 file changed

+38
-26
lines changed

1 file changed

+38
-26
lines changed

lib/matplotlib/type1font.py

Lines changed: 38 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -135,9 +135,8 @@ def _split(self, data):
135135

136136
return data[:len1], binary, data[idx+1:]
137137

138-
_whitespace_re = re.compile(br'[\0\t\r\014\n ]+')
138+
_whitespace_or_comment_re = re.compile(br'[\0\t\r\014\n ]+|%[^\r\n\v]*')
139139
_token_re = re.compile(br'/{0,2}[^]\0\t\r\v\n ()<>{}/%[]+')
140-
_comment_re = re.compile(br'%[^\r\n\v]*')
141140
_instring_re = re.compile(br'[()\\]')
142141

143142
@classmethod
@@ -146,47 +145,52 @@ def _tokens(cls, text):
146145
A PostScript tokenizer. Yield (token, value) pairs such as
147146
(_TokenType.whitespace, ' ') or (_TokenType.name, '/Foobar').
148147
"""
148+
# Preload enum members for speed.
149+
tok_whitespace = _TokenType.whitespace
150+
tok_name = _TokenType.name
151+
tok_string = _TokenType.string
152+
tok_delimiter = _TokenType.delimiter
153+
tok_number = _TokenType.number
149154
pos = 0
150155
while pos < len(text):
151-
match = (cls._comment_re.match(text[pos:]) or
152-
cls._whitespace_re.match(text[pos:]))
156+
match = cls._whitespace_or_comment_re.match(text, pos)
153157
if match:
154-
yield (_TokenType.whitespace, match.group())
155-
pos += match.end()
156-
elif text[pos] == b'(':
158+
yield (tok_whitespace, match.group())
159+
pos = match.end()
160+
elif text[pos:pos+1] == b'(':
157161
start = pos
158162
pos += 1
159163
depth = 1
160164
while depth:
161-
match = cls._instring_re.search(text[pos:])
165+
match = cls._instring_re.search(text, pos)
162166
if match is None:
163167
return
164-
pos += match.end()
168+
pos = match.end()
165169
if match.group() == b'(':
166170
depth += 1
167171
elif match.group() == b')':
168172
depth -= 1
169173
else: # a backslash - skip the next character
170174
pos += 1
171-
yield (_TokenType.string, text[start:pos])
175+
yield (tok_string, text[start:pos])
172176
elif text[pos:pos + 2] in (b'<<', b'>>'):
173-
yield (_TokenType.delimiter, text[pos:pos + 2])
177+
yield (tok_delimiter, text[pos:pos + 2])
174178
pos += 2
175-
elif text[pos] == b'<':
179+
elif text[pos:pos+1] == b'<':
176180
start = pos
177-
pos += text[pos:].index(b'>')
178-
yield (_TokenType.string, text[start:pos])
181+
pos = text.index(b'>', pos)
182+
yield (tok_string, text[start:pos])
179183
else:
180-
match = cls._token_re.match(text[pos:])
184+
match = cls._token_re.match(text, pos)
181185
if match:
182186
try:
183187
float(match.group())
184-
yield (_TokenType.number, match.group())
188+
yield (tok_number, match.group())
185189
except ValueError:
186-
yield (_TokenType.name, match.group())
187-
pos += match.end()
190+
yield (tok_name, match.group())
191+
pos = match.end()
188192
else:
189-
yield (_TokenType.delimiter, text[pos:pos + 1])
193+
yield (tok_delimiter, text[pos:pos + 1])
190194
pos += 1
191195

192196
def _parse(self):
@@ -195,28 +199,33 @@ def _parse(self):
195199
of parsing is described in Chapter 10 "Adobe Type Manager
196200
Compatibility" of the Type-1 spec.
197201
"""
202+
# Preload enum members for speed.
203+
tok_whitespace = _TokenType.whitespace
204+
tok_name = _TokenType.name
205+
tok_string = _TokenType.string
206+
tok_number = _TokenType.number
198207
# Start with reasonable defaults
199208
prop = {'weight': 'Regular', 'ItalicAngle': 0.0, 'isFixedPitch': False,
200209
'UnderlinePosition': -100, 'UnderlineThickness': 50}
201210
filtered = ((token, value)
202211
for token, value in self._tokens(self.parts[0])
203-
if token is not _TokenType.whitespace)
212+
if token is not tok_whitespace)
204213
# The spec calls this an ASCII format; in Python 2.x we could
205214
# just treat the strings and names as opaque bytes but let's
206215
# turn them into proper Unicode, and be lenient in case of high bytes.
207216
def convert(x): return x.decode('ascii', 'replace')
208217
for token, value in filtered:
209-
if token is _TokenType.name and value.startswith(b'/'):
218+
if token is tok_name and value.startswith(b'/'):
210219
key = convert(value[1:])
211220
token, value = next(filtered)
212-
if token is _TokenType.name:
221+
if token is tok_name:
213222
if value in (b'true', b'false'):
214223
value = value == b'true'
215224
else:
216225
value = convert(value.lstrip(b'/'))
217-
elif token is _TokenType.string:
226+
elif token is tok_string:
218227
value = convert(value.lstrip(b'(').rstrip(b')'))
219-
elif token is _TokenType.number:
228+
elif token is tok_number:
220229
if b'.' in value:
221230
value = float(value)
222231
else:
@@ -242,6 +251,9 @@ def convert(x): return x.decode('ascii', 'replace')
242251

243252
@classmethod
244253
def _transformer(cls, tokens, slant, extend):
254+
tok_whitespace = _TokenType.whitespace
255+
tok_name = _TokenType.name
256+
245257
def fontname(name):
246258
result = name
247259
if slant:
@@ -277,7 +289,7 @@ def replacer(tokens):
277289
token, value = next(tokens) # name, e.g., /FontMatrix
278290
yield value
279291
token, value = next(tokens) # possible whitespace
280-
while token is _TokenType.whitespace:
292+
while token is tok_whitespace:
281293
yield value
282294
token, value = next(tokens)
283295
if value != b'[': # name/number/etc.
@@ -302,7 +314,7 @@ def suppress(tokens):
302314
b'/UniqueID': suppress}
303315

304316
for token, value in tokens:
305-
if token is _TokenType.name and value in table:
317+
if token is tok_name and value in table:
306318
yield from table[value](
307319
itertools.chain([(token, value)], tokens))
308320
else:

0 commit comments

Comments
 (0)