python · tomasr8 · Dec 17, 2024 · Dec 17, 2024
@@ -1786,22 +1786,22 @@ def test_bad_input_order(self):
         u.prev_row = 2
         u.prev_col = 2
         with self.assertRaises(ValueError) as cm:
-            u.add_whitespace((1,3))
+            u.add_whitespace((1,3), line='   ')
         self.assertEqual(cm.exception.args[0],
                 'start (1,3) precedes previous end (2,2)')
         # raise if previous column in row
-        self.assertRaises(ValueError, u.add_whitespace, (2,1))
+        self.assertRaises(ValueError, u.add_whitespace, (2,1), '   ')
 
     def test_backslash_continuation(self):
         # The problem is that <whitespace>\<newline> leaves no token
         u = tokenize.Untokenizer()
         u.prev_row = 1
         u.prev_col =  1
         u.tokens = []
-        u.add_whitespace((2, 0))
+        u.add_whitespace((2, 0), line=' \n')
         self.assertEqual(u.tokens, ['\\\n'])
         u.prev_row = 2
-        u.add_whitespace((4, 4))
+        u.add_whitespace((4, 4), line='    ')
         self.assertEqual(u.tokens, ['\\\n', '\\\n\\\n', '    '])
         TestRoundtrip.check_roundtrip(self, 'a\n  b\n    c\n  \\\n  c\n')
 
@@ -1985,6 +1985,11 @@ def test_string_concatenation(self):
         # Two string literals on the same line
         self.check_roundtrip("'' ''")
 
+    def test_tabs(self):
+        # Tabs should be preserved
+        self.check_roundtrip("a +\tb")
+        self.check_roundtrip("a + b\t# comment")
+
     def test_random_files(self):
         # Test roundtrip on random python modules.
         # pass the '-ucpu' option to process the full directory.

@@ -171,7 +171,7 @@ def __init__(self):
         self.prev_type = None
         self.encoding = None
 
-    def add_whitespace(self, start):
+    def add_whitespace(self, start, line):
         row, col = start
         if row < self.prev_row or row == self.prev_row and col < self.prev_col:
             raise ValueError("start ({},{}) precedes previous end ({},{})"
@@ -182,7 +182,7 @@ def add_whitespace(self, start):
             self.prev_col = 0
         col_offset = col - self.prev_col
         if col_offset:
-            self.tokens.append(" " * col_offset)
+            self.tokens.append(line[self.prev_col:col])
 
     def escape_brackets(self, token):
         characters = []
@@ -246,7 +246,7 @@ def untokenize(self, iterable):
             elif tok_type in (STRING, FSTRING_START) and self.prev_type in (STRING, FSTRING_END):
                 self.tokens.append(" ")
 
-            self.add_whitespace(start)
+            self.add_whitespace(start, line)
             self.tokens.append(token)
             self.prev_row, self.prev_col = end
             if tok_type in (NEWLINE, NL):

diff --git a/Misc/NEWS.d/next/Library/2024-12-17-16-16-30.gh-issue-128031.WoEQqM.rst b/Misc/NEWS.d/next/Library/2024-12-17-16-16-30.gh-issue-128031.WoEQqM.rst
@@ -0,0 +1 @@
+Preserve tab characters in :func:`tokenize.untokenize`.
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Preserve tab characters in :func:`tokenize.untokenize`.