Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

gh-128031: Preserve tab characters in untokenize() #128032

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 9 additions & 4 deletions Lib/test/test_tokenize.py
Original file line number Diff line number Diff line change
Expand Up @@ -1786,22 +1786,22 @@ def test_bad_input_order(self):
u.prev_row = 2
u.prev_col = 2
with self.assertRaises(ValueError) as cm:
u.add_whitespace((1,3))
u.add_whitespace((1,3), line=' ')
self.assertEqual(cm.exception.args[0],
'start (1,3) precedes previous end (2,2)')
# raise if previous column in row
self.assertRaises(ValueError, u.add_whitespace, (2,1))
self.assertRaises(ValueError, u.add_whitespace, (2,1), ' ')

def test_backslash_continuation(self):
# The problem is that <whitespace>\<newline> leaves no token
u = tokenize.Untokenizer()
u.prev_row = 1
u.prev_col = 1
u.tokens = []
u.add_whitespace((2, 0))
u.add_whitespace((2, 0), line=' \n')
self.assertEqual(u.tokens, ['\\\n'])
u.prev_row = 2
u.add_whitespace((4, 4))
u.add_whitespace((4, 4), line=' ')
self.assertEqual(u.tokens, ['\\\n', '\\\n\\\n', ' '])
TestRoundtrip.check_roundtrip(self, 'a\n b\n c\n \\\n c\n')

Expand Down Expand Up @@ -1985,6 +1985,11 @@ def test_string_concatenation(self):
# Two string literals on the same line
self.check_roundtrip("'' ''")

def test_tabs(self):
# Tabs should be preserved
self.check_roundtrip("a +\tb")
self.check_roundtrip("a + b\t# comment")

def test_random_files(self):
# Test roundtrip on random python modules.
# pass the '-ucpu' option to process the full directory.
Expand Down
6 changes: 3 additions & 3 deletions Lib/tokenize.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ def __init__(self):
self.prev_type = None
self.encoding = None

def add_whitespace(self, start):
def add_whitespace(self, start, line):
row, col = start
if row < self.prev_row or row == self.prev_row and col < self.prev_col:
raise ValueError("start ({},{}) precedes previous end ({},{})"
Expand All @@ -182,7 +182,7 @@ def add_whitespace(self, start):
self.prev_col = 0
col_offset = col - self.prev_col
if col_offset:
self.tokens.append(" " * col_offset)
self.tokens.append(line[self.prev_col:col])

def escape_brackets(self, token):
characters = []
Expand Down Expand Up @@ -246,7 +246,7 @@ def untokenize(self, iterable):
elif tok_type in (STRING, FSTRING_START) and self.prev_type in (STRING, FSTRING_END):
self.tokens.append(" ")

self.add_whitespace(start)
self.add_whitespace(start, line)
self.tokens.append(token)
self.prev_row, self.prev_col = end
if tok_type in (NEWLINE, NL):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Preserve tab characters in :func:`tokenize.untokenize`.
Loading