Skip to content

Commit c8031c3

Browse files
[LIT] remove to_unicode, to_string, and to_bytes helpers (#165950)
These helpers, which handle the difference between Python 2.x and Python 3.x, are no longer required. Co-authored-by: Alexander Richardson <mail@alexrichardson.me>
1 parent 33bcde0 commit c8031c3

File tree

7 files changed

+45
-148
lines changed

7 files changed

+45
-148
lines changed

libcxx/test/selftest/dsl/lit.local.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,6 @@
1010
# within the test.
1111
import base64, lit.util, pickle
1212

13-
base64Encode = lambda s: lit.util.to_string(base64.b64encode(lit.util.to_bytes(s)))
13+
base64Encode = lambda s: base64.b64encode(s).decode("utf-8")
1414
escapedSubstitutions = base64Encode(pickle.dumps(config.substitutions))
1515
config.substitutions.append(("%{substitutions}", escapedSubstitutions))

llvm/utils/lit/lit/TestRunner.py

Lines changed: 33 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
import lit.ShUtil as ShUtil
2222
import lit.Test as Test
2323
import lit.util
24-
from lit.util import to_bytes, to_string, to_unicode
2524
from lit.BooleanExpression import BooleanExpression
2625

2726

@@ -391,18 +390,14 @@ def executeBuiltinEcho(cmd, shenv):
391390
# Some tests have un-redirected echo commands to help debug test failures.
392391
# Buffer our output and return it to the caller.
393392
is_redirected = True
394-
encode = lambda x: x
395393
if stdout == subprocess.PIPE:
396394
is_redirected = False
397395
stdout = StringIO()
398396
elif kIsWindows:
399-
# Reopen stdout in binary mode to avoid CRLF translation. The versions
400-
# of echo we are replacing on Windows all emit plain LF, and the LLVM
401-
# tests now depend on this.
402-
# When we open as binary, however, this also means that we have to write
403-
# 'bytes' objects to stdout instead of 'str' objects.
404-
encode = lit.util.to_bytes
405-
stdout = open(stdout.name, stdout.mode + "b")
397+
# Reopen stdout with `newline=""` to avoid CRLF translation.
398+
# The versions of echo we are replacing on Windows all emit plain LF,
399+
# and the LLVM tests now depend on this.
400+
stdout = open(stdout.name, stdout.mode, encoding="utf-8", newline="")
406401
opened_files.append((None, None, stdout, None))
407402

408403
# Implement echo flags. We only support -e and -n, and not yet in
@@ -423,16 +418,15 @@ def maybeUnescape(arg):
423418
if not interpret_escapes:
424419
return arg
425420

426-
arg = lit.util.to_bytes(arg)
427-
return arg.decode("unicode_escape")
421+
return arg.encode("utf-8").decode("unicode_escape")
428422

429423
if args:
430424
for arg in args[:-1]:
431-
stdout.write(encode(maybeUnescape(arg)))
432-
stdout.write(encode(" "))
433-
stdout.write(encode(maybeUnescape(args[-1])))
425+
stdout.write(maybeUnescape(arg))
426+
stdout.write(" ")
427+
stdout.write(maybeUnescape(args[-1]))
434428
if write_newline:
435-
stdout.write(encode("\n"))
429+
stdout.write("\n")
436430

437431
for (name, mode, f, path) in opened_files:
438432
f.close()
@@ -463,7 +457,7 @@ def executeBuiltinMkdir(cmd, cmd_shenv):
463457
exitCode = 0
464458
for dir in args:
465459
dir = pathlib.Path(dir)
466-
cwd = pathlib.Path(to_unicode(cmd_shenv.cwd))
460+
cwd = pathlib.Path(cmd_shenv.cwd)
467461
if not dir.is_absolute():
468462
dir = lit.util.abs_path_preserve_drive(cwd / dir)
469463
if parent:
@@ -508,8 +502,6 @@ def on_rm_error(func, path, exc_info):
508502
exitCode = 0
509503
for path in args:
510504
cwd = cmd_shenv.cwd
511-
path = to_unicode(path) if kIsWindows else to_bytes(path)
512-
cwd = to_unicode(cwd) if kIsWindows else to_bytes(cwd)
513505
if not os.path.isabs(path):
514506
path = lit.util.abs_path_preserve_drive(os.path.join(cwd, path))
515507
if force and not os.path.exists(path):
@@ -718,10 +710,7 @@ def processRedirects(cmd, stdin_source, cmd_shenv, opened_files):
718710
else:
719711
# Make sure relative paths are relative to the cwd.
720712
redir_filename = os.path.join(cmd_shenv.cwd, name)
721-
redir_filename = (
722-
to_unicode(redir_filename) if kIsWindows else to_bytes(redir_filename)
723-
)
724-
fd = open(redir_filename, mode)
713+
fd = open(redir_filename, mode, encoding="utf-8")
725714
# Workaround a Win32 and/or subprocess bug when appending.
726715
#
727716
# FIXME: Actually, this is probably an instance of PR6753.
@@ -1083,14 +1072,14 @@ def _executeShCmd(cmd, shenv, results, timeoutHelper):
10831072
if out is None:
10841073
out = ""
10851074
else:
1086-
out = to_string(out.decode("utf-8", errors="replace"))
1075+
out = out.decode("utf-8", errors="replace")
10871076
except:
10881077
out = str(out)
10891078
try:
10901079
if err is None:
10911080
err = ""
10921081
else:
1093-
err = to_string(err.decode("utf-8", errors="replace"))
1082+
err = err.decode("utf-8", errors="replace")
10941083
except:
10951084
err = str(err)
10961085

@@ -1284,7 +1273,7 @@ def executeScriptInternal(
12841273

12851274
# Add the command output, if redirected.
12861275
for (name, path, data) in result.outputFiles:
1287-
data = to_string(data.decode("utf-8", errors="replace"))
1276+
data = data.decode("utf-8", errors="replace")
12881277
out += formatOutput(f"redirected output from '{name}'", data, limit=1024)
12891278
if result.stdout.strip():
12901279
out += formatOutput("command stdout", result.stdout)
@@ -1340,13 +1329,6 @@ def executeScript(test, litConfig, tmpBase, commands, cwd):
13401329
script += ".bat"
13411330

13421331
# Write script file
1343-
mode = "w"
1344-
open_kwargs = {}
1345-
if litConfig.isWindows and not isWin32CMDEXE:
1346-
mode += "b" # Avoid CRLFs when writing bash scripts.
1347-
else:
1348-
open_kwargs["encoding"] = "utf-8"
1349-
f = open(script, mode, **open_kwargs)
13501332
if isWin32CMDEXE:
13511333
for i, ln in enumerate(commands):
13521334
match = re.fullmatch(kPdbgRegex, ln)
@@ -1355,8 +1337,9 @@ def executeScript(test, litConfig, tmpBase, commands, cwd):
13551337
commands[i] = match.expand(
13561338
"echo '\\1' > nul && " if command else "echo '\\1' > nul"
13571339
)
1358-
f.write("@echo on\n")
1359-
f.write("\n@if %ERRORLEVEL% NEQ 0 EXIT\n".join(commands))
1340+
with open(script, "w", encoding="utf-8") as f:
1341+
f.write("@echo on\n")
1342+
f.write("\n@if %ERRORLEVEL% NEQ 0 EXIT\n".join(commands))
13601343
else:
13611344
for i, ln in enumerate(commands):
13621345
match = re.fullmatch(kPdbgRegex, ln)
@@ -1395,8 +1378,6 @@ def executeScript(test, litConfig, tmpBase, commands, cwd):
13951378
# seen the latter manage to terminate the shell running lit.
13961379
if command:
13971380
commands[i] += f" && {{ {command}; }}"
1398-
if test.config.pipefail:
1399-
f.write(b"set -o pipefail;" if mode == "wb" else "set -o pipefail;")
14001381

14011382
# Manually export any DYLD_* variables used by dyld on macOS because
14021383
# otherwise they are lost when the shell executable is run, before the
@@ -1406,14 +1387,14 @@ def executeScript(test, litConfig, tmpBase, commands, cwd):
14061387
for k, v in test.config.environment.items()
14071388
if k.startswith("DYLD_")
14081389
)
1409-
f.write(bytes(env_str, "utf-8") if mode == "wb" else env_str)
1410-
f.write(b"set -x;" if mode == "wb" else "set -x;")
1411-
if mode == "wb":
1412-
f.write(bytes("{ " + "; } &&\n{ ".join(commands) + "; }", "utf-8"))
1413-
else:
1390+
1391+
with open(script, "w", encoding="utf-8", newline="") as f:
1392+
if test.config.pipefail:
1393+
f.write("set -o pipefail;")
1394+
f.write(env_str)
1395+
f.write("set -x;")
14141396
f.write("{ " + "; } &&\n{ ".join(commands) + "; }")
1415-
f.write(b"\n" if mode == "wb" else "\n")
1416-
f.close()
1397+
f.write("\n")
14171398

14181399
if isWin32CMDEXE:
14191400
command = ["cmd", "/c", script]
@@ -1449,19 +1430,11 @@ def parseIntegratedTestScriptCommands(source_path, keywords):
14491430
(line_number, command_type, line).
14501431
"""
14511432

1452-
# This code is carefully written to be dual compatible with Python 2.5+ and
1453-
# Python 3 without requiring input files to always have valid codings. The
1454-
# trick we use is to open the file in binary mode and use the regular
1455-
# expression library to find the commands, with it scanning strings in
1456-
# Python2 and bytes in Python3.
1457-
#
1458-
# Once we find a match, we do require each script line to be decodable to
1459-
# UTF-8, so we convert the outputs to UTF-8 before returning. This way the
1460-
# remaining code can work with "strings" agnostic of the executing Python
1461-
# version.
1433+
# We use `bytes` for scanning input files to avoid requiring them to always
1434+
# have valid codings.
14621435

14631436
keywords_re = re.compile(
1464-
to_bytes("(%s)(.*)\n" % ("|".join(re.escape(k) for k in keywords),))
1437+
b"(%s)(.*)\n" % (b"|".join(re.escape(k.encode("utf-8")) for k in keywords),)
14651438
)
14661439

14671440
f = open(source_path, "rb")
@@ -1470,8 +1443,8 @@ def parseIntegratedTestScriptCommands(source_path, keywords):
14701443
data = f.read()
14711444

14721445
# Ensure the data ends with a newline.
1473-
if not data.endswith(to_bytes("\n")):
1474-
data = data + to_bytes("\n")
1446+
if not data.endswith(b"\n"):
1447+
data = data + b"\n"
14751448

14761449
# Iterate over the matches.
14771450
line_number = 1
@@ -1480,24 +1453,20 @@ def parseIntegratedTestScriptCommands(source_path, keywords):
14801453
# Compute the updated line number by counting the intervening
14811454
# newlines.
14821455
match_position = match.start()
1483-
line_number += data.count(
1484-
to_bytes("\n"), last_match_position, match_position
1485-
)
1456+
line_number += data.count(b"\n", last_match_position, match_position)
14861457
last_match_position = match_position
14871458

14881459
# Convert the keyword and line to UTF-8 strings and yield the
1489-
# command. Note that we take care to return regular strings in
1490-
# Python 2, to avoid other code having to differentiate between the
1491-
# str and unicode types.
1460+
# command.
14921461
#
14931462
# Opening the file in binary mode prevented Windows \r newline
14941463
# characters from being converted to Unix \n newlines, so manually
14951464
# strip those from the yielded lines.
14961465
keyword, ln = match.groups()
14971466
yield (
14981467
line_number,
1499-
to_string(keyword.decode("utf-8")),
1500-
to_string(ln.decode("utf-8").rstrip("\r")),
1468+
keyword.decode("utf-8"),
1469+
ln.decode("utf-8").rstrip("\r"),
15011470
)
15021471
finally:
15031472
f.close()

llvm/utils/lit/lit/builtin_commands/diff.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
import sys
99

1010
import util
11-
from util import to_string
1211

1312

1413
class DiffFlags:
@@ -67,10 +66,9 @@ def compareTwoBinaryFiles(flags, filepaths, filelines):
6766
filepaths[1].encode(),
6867
n=flags.num_context_lines,
6968
)
70-
diffs = [diff.decode(errors="backslashreplace") for diff in diffs]
7169

7270
for diff in diffs:
73-
sys.stdout.write(to_string(diff))
71+
sys.stdout.write(diff.decode(errors="backslashreplace"))
7472
exitCode = 1
7573
return exitCode
7674

@@ -117,7 +115,7 @@ def compose2(f, g):
117115
filepaths[1],
118116
n=flags.num_context_lines,
119117
):
120-
sys.stdout.write(to_string(diff))
118+
sys.stdout.write(diff)
121119
exitCode = 1
122120
return exitCode
123121

llvm/utils/lit/lit/formats/googletest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ def get_num_tests(self, path, litConfig, localConfig):
4343
return None
4444
return sum(
4545
map(
46-
lambda line: lit.util.to_string(line).startswith(" "),
46+
lambda line: line.startswith(b" "),
4747
out.splitlines(False),
4848
)
4949
)

llvm/utils/lit/lit/llvm/config.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,7 @@ def _find_git_windows_unix_tools(self, tools_needed):
226226
continue
227227

228228
# We found it, stop enumerating.
229-
return lit.util.to_string(candidate_path)
229+
return candidate_path
230230
except:
231231
continue
232232

@@ -287,8 +287,8 @@ def get_process_output(self, command):
287287
env=self.config.environment,
288288
)
289289
stdout, stderr = cmd.communicate()
290-
stdout = lit.util.to_string(stdout)
291-
stderr = lit.util.to_string(stderr)
290+
stdout = stdout.decode("utf-8", errors="replace")
291+
stderr = stderr.decode("utf-8", errors="replace")
292292
return (stdout, stderr)
293293
except OSError:
294294
self.lit_config.fatal("Could not run process %s" % command)

llvm/utils/lit/lit/reports.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,10 @@ def write_results(self, tests, elapsed):
2929
fd, _ = tempfile.mkstemp(
3030
suffix=ext, prefix=f"{filename}.", dir=os.path.dirname(self.output_file)
3131
)
32-
report_file = os.fdopen(fd, "w")
32+
report_file = os.fdopen(fd, "w", encoding="utf-8")
3333
else:
3434
# Overwrite if the results already exist.
35-
report_file = open(self.output_file, "w")
35+
report_file = open(self.output_file, "w", encoding="utf-8")
3636

3737
with report_file:
3838
self._write_results_to_file(tests, elapsed, report_file)

0 commit comments

Comments
 (0)