Skip to content

Commit 5db73a6

Browse files
authored
Add support for specifying response file encoding (#15426)
* Add support for specifying response file encoding using the suffix of the response file name, and autodetect the response file encoding using the suffix of the response file name if one is specified there. * Update Changelog * Adjust test to verify locale.getpreferredencoding() * Update ChangeLog * Improve comments * Update comment * Relocate comment * Update Changelog * Add test to windows config
1 parent 4b9a0d0 commit 5db73a6

File tree

4 files changed

+56
-15
lines changed

4 files changed

+56
-15
lines changed

.circleci/config.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -430,7 +430,7 @@ jobs:
430430
# note we do *not* build all libraries and freeze the cache; as we run
431431
# only limited tests here, it's more efficient to build on demand
432432
- run-tests:
433-
test_targets: "other.test_emcc_cflags other.test_stdin other.test_bad_triple wasm2.test_sse1 wasm2.test_ccall other.test_closure_externs other.test_binaryen_debug other.test_js_optimizer_parse_error other.test_output_to_nowhere other.test_emcc_dev_null other.test_cmake* other.test_system_include_paths other.test_emar_response_file wasm2.test_utf16 other.test_special_chars_in_arguments other.test_toolchain_profiler other.test_realpath_nodefs"
433+
test_targets: "other.test_emcc_cflags other.test_stdin other.test_bad_triple wasm2.test_sse1 wasm2.test_ccall other.test_closure_externs other.test_binaryen_debug other.test_js_optimizer_parse_error other.test_output_to_nowhere other.test_emcc_dev_null other.test_cmake* other.test_system_include_paths other.test_emar_response_file wasm2.test_utf16 other.test_special_chars_in_arguments other.test_toolchain_profiler other.test_realpath_nodefs other.test_response_file_encoding"
434434
test-mac:
435435
executor: mac
436436
environment:

ChangeLog.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,10 @@ See docs/process.md for more on how version tagging works.
3636
2.0.33 - 11/01/2021
3737
-------------------
3838
- Bug fixes
39+
- Added support for specifying the text encoding to be used in response filenames
40+
by passing the encoding as a file suffix (e.g. "a.rsp.utf-8" or "a.rsp.cp1252").
41+
If not specified, the encoding is autodetected as either UTF-8 or Python
42+
default "locale.getpreferredencoding()". (#15406, #15292, #15426)
3943

4044
2.0.32 - 10/19/2021
4145
-------------------

tests/test_other.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10728,6 +10728,22 @@ def create_o(name, i):
1072810728
self.run_process(building.get_command_with_possible_response_file([EMCC, 'main.c'] + files))
1072910729
self.assertContained(str(count * (count - 1) // 2), self.run_js('a.out.js'))
1073010730

10731+
# Tests that the filename suffix of the response files can be used to detect which encoding the file is.
10732+
def test_response_file_encoding(self):
10733+
open('äö.c', 'w').write('int main(){}')
10734+
10735+
open('a.rsp', 'w', encoding='utf-8').write('äö.c') # Write a response file with unicode contents ...
10736+
self.run_process([EMCC, '@a.rsp']) # ... and test that in the absence of a file suffix, it is autodetected to utf-8.
10737+
10738+
open('a.rsp.cp437', 'w', encoding='cp437').write('äö.c') # Write a response file with Windows CP-437 encoding ...
10739+
self.run_process([EMCC, '@a.rsp.cp437']) # ... and test that with the explicit suffix present, it is properly decoded
10740+
10741+
import locale
10742+
preferred_encoding = locale.getpreferredencoding(do_setlocale=False)
10743+
print('Python locale preferredencoding: ' + preferred_encoding)
10744+
open('a.rsp', 'w', encoding=preferred_encoding).write('äö.c') # Write a response file using Python preferred encoding
10745+
self.run_process([EMCC, '@a.rsp']) # ... and test that it is properly autodetected.
10746+
1073110747
def test_output_name_collision(self):
1073210748
# Ensure that the seconday filenames never collide with the primary output filename
1073310749
# In this case we explcitly ask for JS to be ceated in a file with the `.wasm` suffix.

tools/response_file.py

Lines changed: 35 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,16 @@
1313
DEBUG = int(os.environ.get('EMCC_DEBUG', '0'))
1414

1515

16-
def create_response_file(args, directory):
16+
def create_response_file(args, directory, suffix='.rsp.utf-8'):
1717
"""Routes the given cmdline param list in args into a new response file and
1818
returns the filename to it.
1919
20-
The returned filename has a suffix '.rsp'.
20+
By default the returned filename has a suffix '.rsp.utf-8'. Pass a suffix parameter to override.
2121
"""
22-
response_fd, response_filename = tempfile.mkstemp(prefix='emscripten_', suffix='.rsp', dir=directory, text=True)
22+
23+
assert suffix.startswith('.')
24+
25+
response_fd, response_filename = tempfile.mkstemp(prefix='emscripten_', suffix=suffix, dir=directory, text=True)
2326

2427
# Backslashes and other special chars need to be escaped in the response file.
2528
escape_chars = ['\\', '\"']
@@ -41,16 +44,12 @@ def escape(arg):
4144
arg = '"%s"' % arg
4245
contents += arg + '\n'
4346

44-
# When writing windows repsonse files force the encoding to UTF8 which we know
45-
# that llvm tools understand. Without this, we get whatever the default codepage
46-
# might be.
47-
# See: https://github.com/llvm/llvm-project/blob/3f3d1c901d7abcc5b91468335679b1b27d8a02dd/llvm/include/llvm/Support/Program.h#L168-L170
48-
# And: https://github.com/llvm/llvm-project/blob/63d16d06f5b8f71382033b5ea4aa668f8150817a/clang/include/clang/Driver/Job.h#L58-L69
49-
# TODO(sbc): Should we also force utf-8 on non-windows?
50-
if WINDOWS:
51-
encoding = 'utf-8'
47+
# Decide the encoding of the generated file based on the requested file suffix
48+
if suffix.count('.') == 2:
49+
# Use the encoding specified in the suffix of the response file
50+
encoding = suffix.split('.')[2]
5251
else:
53-
encoding = None
52+
encoding = 'utf-8'
5453

5554
with os.fdopen(response_fd, 'w', encoding=encoding) as f:
5655
f.write(contents)
@@ -70,15 +69,37 @@ def read_response_file(response_filename):
7069
"""Reads a response file, and returns the list of cmdline params found in the
7170
file.
7271
72+
The encoding that the response filename should be read with can be specified
73+
as a suffix to the file, e.g. "foo.rsp.utf-8" or "foo.rsp.cp1252". If not
74+
specified, first UTF-8 and then Python locale.getpreferredencoding() are
75+
attempted.
76+
7377
The parameter response_filename may start with '@'."""
7478
if response_filename.startswith('@'):
7579
response_filename = response_filename[1:]
7680

7781
if not os.path.exists(response_filename):
7882
raise IOError("response file not found: %s" % response_filename)
7983

80-
with open(response_filename) as f:
81-
args = f.read()
84+
# Guess encoding based on the file suffix
85+
components = os.path.basename(response_filename).split('.')
86+
encoding_suffix = components[-1].lower()
87+
if len(components) > 1 and (encoding_suffix.startswith('utf') or encoding_suffix.startswith('cp') or encoding_suffix.startswith('iso') or encoding_suffix in ['ascii', 'latin-1']):
88+
guessed_encoding = encoding_suffix
89+
else:
90+
guessed_encoding = 'utf-8'
91+
92+
try:
93+
# First try with the guessed encoding
94+
with open(response_filename, encoding=guessed_encoding) as f:
95+
args = f.read()
96+
except (ValueError, LookupError): # UnicodeDecodeError is a subclass of ValueError, and Python raises either a ValueError or a UnicodeDecodeError on decode errors. LookupError is raised if guessed encoding is not an encoding.
97+
if DEBUG:
98+
logging.warning(f'Failed to parse response file {response_filename} with guessed encoding "{guessed_encoding}". Trying default system encoding...')
99+
# If that fails, try with the Python default locale.getpreferredencoding()
100+
with open(response_filename) as f:
101+
args = f.read()
102+
82103
args = shlex.split(args)
83104

84105
if DEBUG:

0 commit comments

Comments
 (0)