Skip to content

bpo-33338: [tokenize] Minor code cleanup #6573

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 23, 2018
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 8 additions & 11 deletions Lib/tokenize.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@
from codecs import lookup, BOM_UTF8
import collections
from io import TextIOWrapper
from itertools import chain
import itertools as _itertools
import re
import sys
Expand Down Expand Up @@ -278,7 +277,7 @@ def compat(self, token, iterable):
startline = token[0] in (NEWLINE, NL)
prevstring = False

for tok in chain([token], iterable):
for tok in _itertools.chain([token], iterable):
toknum, tokval = tok[:2]
if toknum == ENCODING:
self.encoding = tokval
Expand Down Expand Up @@ -475,13 +474,10 @@ def tokenize(readline):
The first token sequence will always be an ENCODING token
which tells you which encoding was used to decode the bytes stream.
"""
# This import is here to avoid problems when the itertools module is not
# built yet and tokenize is imported.
from itertools import chain, repeat
encoding, consumed = detect_encoding(readline)
rl_gen = iter(readline, b"")
empty = repeat(b"")
return _tokenize(chain(consumed, rl_gen, empty).__next__, encoding)
empty = _itertools.repeat(b"")
rl_gen = _itertools.chain(consumed, iter(readline, b""), empty)
return _tokenize(rl_gen.__next__, encoding)


def _tokenize(readline, encoding):
Expand All @@ -496,7 +492,7 @@ def _tokenize(readline, encoding):
# BOM will already have been stripped.
encoding = "utf-8"
yield TokenInfo(ENCODING, encoding, (0, 0), (0, 0), '')
while True: # loop over lines in stream
while True: # loop over lines in stream
try:
line = readline()
except StopIteration:
Expand Down Expand Up @@ -581,7 +577,7 @@ def _tokenize(readline, encoding):
continue
token, initial = line[start:end], line[start]

if (initial in numchars or # ordinary number
if (initial in numchars or # ordinary number
(initial == '.' and token != '.' and token != '...')):
yield TokenInfo(NUMBER, token, spos, epos, line)
elif initial in '\r\n':
Expand Down Expand Up @@ -667,7 +663,8 @@ def main():

# Helper error handling routines
def perror(message):
print(message, file=sys.stderr)
sys.stderr.write(message)
sys.stderr.write('\n')

def error(message, filename=None, location=None):
if location:
Expand Down