Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bpo-34523: Add _PyCoreConfig.filesystem_encoding #8963

Merged
merged 2 commits into from
Aug 29, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions Include/coreconfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,17 @@ typedef struct {
int coerce_c_locale; /* PYTHONCOERCECLOCALE, -1 means unknown */
int coerce_c_locale_warn; /* PYTHONCOERCECLOCALE=warn */

/* Python filesystem encoding and error handler: see
sys.getfilesystemencoding() and sys.getfilesystemencodeerrors().

Updated later by initfsencoding(). On Windows, can be updated by
sys._enablelegacywindowsfsencoding() at runtime.

See Py_FileSystemDefaultEncoding and Py_FileSystemDefaultEncodeErrors.
*/
char *filesystem_encoding;
char *filesystem_errors;

/* Enable UTF-8 mode?
Set by -X utf8 command line option and PYTHONUTF8 environment variable.
If set to -1 (default), inherit Py_UTF8Mode value. */
Expand Down Expand Up @@ -325,6 +336,14 @@ PyAPI_FUNC(int) _PyCoreConfig_GetEnvDup(
#endif


#ifdef Py_BUILD_CORE
PyAPI_FUNC(int) _Py_SetFileSystemEncoding(
const char *encoding,
const char *errors);
PyAPI_FUNC(void) _Py_ClearFileSystemEncoding(void);
#endif


#ifdef __cplusplus
}
#endif
Expand Down
2 changes: 1 addition & 1 deletion Include/pylifecycle.h
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ PyAPI_FUNC(int) _PyOS_URandomNonblock(void *buffer, Py_ssize_t size);

/* Legacy locale support */
#ifndef Py_LIMITED_API
PyAPI_FUNC(void) _Py_CoerceLegacyLocale(const _PyCoreConfig *config);
PyAPI_FUNC(void) _Py_CoerceLegacyLocale(int warn);
PyAPI_FUNC(int) _Py_LegacyLocaleDetected(void);
PyAPI_FUNC(char *) _Py_SetLocaleFromEnv(int category);
#endif
Expand Down
41 changes: 39 additions & 2 deletions Lib/test/test_embed.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,8 @@ def test_initialize_pymain(self):

class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase):
maxDiff = 4096
UTF8_MODE_ERRORS = ('surrogatepass' if sys.platform == 'win32'
else 'surrogateescape')
DEFAULT_CONFIG = {
'install_signal_handlers': 1,
'use_environment': 1,
Expand All @@ -265,8 +267,12 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase):
'show_alloc_count': 0,
'dump_refs': 0,
'malloc_stats': 0,
'utf8_mode': 0,

# None means that the default encoding is read at runtime:
# see get_locale_encoding().
'filesystem_encoding': None,
'filesystem_errors': sys.getfilesystemencodeerrors(),
'utf8_mode': 0,
'coerce_c_locale': 0,
'coerce_c_locale_warn': 0,

Expand Down Expand Up @@ -297,6 +303,7 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase):
'_frozen': 0,
}


def get_stdio_encoding(self, env):
code = 'import sys; print(sys.stdout.encoding, sys.stdout.errors)'
args = (sys.executable, '-c', code)
Expand All @@ -308,6 +315,29 @@ def get_stdio_encoding(self, env):
out = proc.stdout.rstrip()
return out.split()

def get_locale_encoding(self, isolated):
if sys.platform in ('win32', 'darwin') or support.is_android:
# Windows, macOS and Android use UTF-8
return "utf-8"

code = ('import codecs, locale, sys',
'locale.setlocale(locale.LC_CTYPE, "")',
'enc = locale.nl_langinfo(locale.CODESET)',
'enc = codecs.lookup(enc).name',
'print(enc)')
args = (sys.executable, '-c', '; '.join(code))
env = dict(os.environ)
if not isolated:
env['PYTHONCOERCECLOCALE'] = '0'
env['PYTHONUTF8'] = '0'
proc = subprocess.run(args, text=True, env=env,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
if proc.returncode:
raise Exception(f"failed to get the locale encoding: "
f"stdout={proc.stdout!r} stderr={proc.stderr!r}")
return proc.stdout.rstrip()

def check_config(self, testname, expected):
expected = dict(self.DEFAULT_CONFIG, **expected)

Expand All @@ -326,6 +356,8 @@ def check_config(self, testname, expected):
expected['stdio_encoding'] = res[0]
if expected['stdio_errors'] is None:
expected['stdio_errors'] = res[1]
if expected['filesystem_encoding'] is None:
expected['filesystem_encoding'] = self.get_locale_encoding(expected['isolated'])
for key, value in expected.items():
expected[key] = str(value)

Expand Down Expand Up @@ -357,7 +389,8 @@ def test_init_global_config(self):
'utf8_mode': 1,
'stdio_encoding': 'utf-8',
'stdio_errors': 'surrogateescape',

'filesystem_encoding': 'utf-8',
'filesystem_errors': self.UTF8_MODE_ERRORS,
'user_site_directory': 0,
'_frozen': 1,
}
Expand All @@ -378,6 +411,8 @@ def test_init_from_config(self):
'utf8_mode': 1,
'stdio_encoding': 'iso8859-1',
'stdio_errors': 'replace',
'filesystem_encoding': 'utf-8',
'filesystem_errors': self.UTF8_MODE_ERRORS,

'pycache_prefix': 'conf_pycache_prefix',
'program_name': './conf_program_name',
Expand Down Expand Up @@ -409,6 +444,8 @@ def test_init_env(self):
'import_time': 1,
'malloc_stats': 1,
'utf8_mode': 1,
'filesystem_encoding': 'utf-8',
'filesystem_errors': self.UTF8_MODE_ERRORS,
'inspect': 1,
'optimization_level': 2,
'pycache_prefix': 'env_pycache_prefix',
Expand Down
10 changes: 10 additions & 0 deletions Lib/test/test_sys.py
Original file line number Diff line number Diff line change
Expand Up @@ -861,6 +861,16 @@ def check(tracebacklimit, expected):
def test_no_duplicates_in_meta_path(self):
self.assertEqual(len(sys.meta_path), len(set(sys.meta_path)))

@unittest.skipUnless(hasattr(sys, "_enablelegacywindowsfsencoding"),
'needs sys._enablelegacywindowsfsencoding()')
def test__enablelegacywindowsfsencoding(self):
code = ('import sys',
'sys._enablelegacywindowsfsencoding()',
'print(sys.getfilesystemencoding(), sys.getfilesystemencodeerrors())')
rc, out, err = assert_python_ok('-c', '; '.join(code))
out = out.decode('ascii', 'replace').rstrip()
self.assertEqual(out, 'mbcs replace')


@test.support.cpython_only
class SizeofTest(unittest.TestCase):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
The Python filesystem encoding is now read earlier during the Python
initialization.
2 changes: 1 addition & 1 deletion Modules/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -1339,7 +1339,7 @@ pymain_read_conf(_PyMain *pymain, _PyCoreConfig *config,
*/
if (config->coerce_c_locale && !locale_coerced) {
locale_coerced = 1;
_Py_CoerceLegacyLocale(config);
_Py_CoerceLegacyLocale(config->coerce_c_locale_warn);
encoding_changed = 1;
}

Expand Down
42 changes: 18 additions & 24 deletions Objects/unicodeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -3410,27 +3410,24 @@ PyUnicode_EncodeLocale(PyObject *unicode, const char *errors)
PyObject *
PyUnicode_EncodeFSDefault(PyObject *unicode)
{
PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
const _PyCoreConfig *config = &interp->core_config;
#if defined(__APPLE__)
return _PyUnicode_AsUTF8String(unicode, Py_FileSystemDefaultEncodeErrors);
return _PyUnicode_AsUTF8String(unicode, config->filesystem_errors);
#else
PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
/* Bootstrap check: if the filesystem codec is implemented in Python, we
cannot use it to encode and decode filenames before it is loaded. Load
the Python codec requires to encode at least its own filename. Use the C
version of the locale codec until the codec registry is initialized and
the Python codec is loaded.

Py_FileSystemDefaultEncoding is shared between all interpreters, we
cannot only rely on it: check also interp->fscodec_initialized for
subinterpreters. */
if (Py_FileSystemDefaultEncoding && interp->fscodec_initialized) {
implementation of the locale codec until the codec registry is
initialized and the Python codec is loaded. See initfsencoding(). */
if (interp->fscodec_initialized) {
return PyUnicode_AsEncodedString(unicode,
Py_FileSystemDefaultEncoding,
Py_FileSystemDefaultEncodeErrors);
config->filesystem_encoding,
config->filesystem_errors);
}
else {
return unicode_encode_locale(unicode,
Py_FileSystemDefaultEncodeErrors, 0);
config->filesystem_errors, 0);
}
#endif
}
Expand Down Expand Up @@ -3636,27 +3633,24 @@ PyUnicode_DecodeFSDefault(const char *s) {
PyObject*
PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size)
{
PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
const _PyCoreConfig *config = &interp->core_config;
#if defined(__APPLE__)
return PyUnicode_DecodeUTF8Stateful(s, size, Py_FileSystemDefaultEncodeErrors, NULL);
return PyUnicode_DecodeUTF8Stateful(s, size, config->filesystem_errors, NULL);
#else
PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
/* Bootstrap check: if the filesystem codec is implemented in Python, we
cannot use it to encode and decode filenames before it is loaded. Load
the Python codec requires to encode at least its own filename. Use the C
version of the locale codec until the codec registry is initialized and
the Python codec is loaded.

Py_FileSystemDefaultEncoding is shared between all interpreters, we
cannot only rely on it: check also interp->fscodec_initialized for
subinterpreters. */
if (Py_FileSystemDefaultEncoding && interp->fscodec_initialized) {
implementation of the locale codec until the codec registry is
initialized and the Python codec is loaded. See initfsencoding(). */
if (interp->fscodec_initialized) {
return PyUnicode_Decode(s, size,
Py_FileSystemDefaultEncoding,
Py_FileSystemDefaultEncodeErrors);
config->filesystem_encoding,
config->filesystem_errors);
}
else {
return unicode_decode_locale(s, size,
Py_FileSystemDefaultEncodeErrors, 0);
config->filesystem_errors, 0);
}
#endif
}
Expand Down
9 changes: 8 additions & 1 deletion Programs/_freeze_importlib.c
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,15 @@ main(int argc, char *argv[])
config.program_name = L"./_freeze_importlib";
/* Don't install importlib, since it could execute outdated bytecode. */
config._install_importlib = 0;
config.install_signal_handlers = 1;
config._frozen = 1;
#ifdef MS_WINDOWS
/* bpo-34523: initfsencoding() is not called if _install_importlib=0,
so interp->fscodec_initialized value remains 0.
PyUnicode_EncodeFSDefault() doesn't support the "surrogatepass" error
handler in such case, whereas it's the default error handler on Windows.
Force the "strict" error handler to work around this bootstrap issue. */
config.filesystem_errors = "strict";
#endif

_PyInitError err = _Py_InitializeFromConfig(&config);
/* No need to call _PyCoreConfig_Clear() since we didn't allocate any
Expand Down
2 changes: 2 additions & 0 deletions Programs/_testembed.c
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,8 @@ dump_config(void)
printf("dump_refs = %i\n", config->dump_refs);
printf("malloc_stats = %i\n", config->malloc_stats);

printf("filesystem_encoding = %s\n", config->filesystem_encoding);
printf("filesystem_errors = %s\n", config->filesystem_errors);
printf("coerce_c_locale = %i\n", config->coerce_c_locale);
printf("coerce_c_locale_warn = %i\n", config->coerce_c_locale_warn);
printf("utf8_mode = %i\n", config->utf8_mode);
Expand Down
Loading