Skip to content

bpo-29240, bpo-32030: Py_Main() re-reads config if encoding changes #4899

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Dec 16, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions Doc/using/cmdline.rst
Original file line number Diff line number Diff line change
Expand Up @@ -779,9 +779,7 @@ conflict.

If set to the value ``0``, causes the main Python command line application
to skip coercing the legacy ASCII-based C locale to a more capable UTF-8
based alternative. Note that this setting is checked even when the
:option:`-E` or :option:`-I` options are used, as it is handled prior to
the processing of command line options.
based alternative.

If this variable is *not* set, or is set to a value other than ``0``, and
the current locale reported for the ``LC_CTYPE`` category is the default
Expand Down
5 changes: 4 additions & 1 deletion Include/pylifecycle.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,9 @@ PyAPI_FUNC(int) Py_FdIsInteractive(FILE *, const char *);

/* Bootstrap __main__ (defined in Modules/main.c) */
PyAPI_FUNC(int) Py_Main(int argc, wchar_t **argv);
#ifdef Py_BUILD_CORE
PyAPI_FUNC(int) _Py_UnixMain(int argc, char **argv);
#endif

/* In getpath.c */
PyAPI_FUNC(wchar_t *) Py_GetProgramFullPath(void);
Expand Down Expand Up @@ -194,7 +197,7 @@ PyAPI_FUNC(int) _PyOS_URandomNonblock(void *buffer, Py_ssize_t size);

/* Legacy locale support */
#ifndef Py_LIMITED_API
PyAPI_FUNC(void) _Py_CoerceLegacyLocale(void);
PyAPI_FUNC(void) _Py_CoerceLegacyLocale(const _PyCoreConfig *config);
PyAPI_FUNC(int) _Py_LegacyLocaleDetected(void);
PyAPI_FUNC(char *) _Py_SetLocaleFromEnv(int category);
#endif
Expand Down
8 changes: 6 additions & 2 deletions Include/pystate.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,15 +38,19 @@ typedef struct {
int show_alloc_count; /* -X showalloccount */
int dump_refs; /* PYTHONDUMPREFS */
int malloc_stats; /* PYTHONMALLOCSTATS */
int utf8_mode; /* -X utf8 or PYTHONUTF8 environment variable */
int coerce_c_locale; /* PYTHONCOERCECLOCALE, -1 means unknown */
int coerce_c_locale_warn; /* PYTHONCOERCECLOCALE=warn */
int utf8_mode; /* -X utf8 or PYTHONUTF8 environment variable,
-1 means unknown */

wchar_t *module_search_path_env; /* PYTHONPATH environment variable */
wchar_t *home; /* PYTHONHOME environment variable,
see also Py_SetPythonHome(). */
wchar_t *program_name; /* Program name, see also Py_GetProgramName() */
} _PyCoreConfig;

#define _PyCoreConfig_INIT (_PyCoreConfig){.use_hash_seed = -1}
#define _PyCoreConfig_INIT \
(_PyCoreConfig){.use_hash_seed = -1, .coerce_c_locale = -1, .utf8_mode = -1}
/* Note: _PyCoreConfig_INIT sets other fields to 0/NULL */

/* Placeholders while working on the new configuration API
Expand Down
5 changes: 3 additions & 2 deletions Lib/test/test_c_locale_coercion.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def _set_locale_in_subprocess(locale_name):
# If there's no valid CODESET, we expect coercion to be skipped
cmd_fmt += "; import sys; sys.exit(not locale.nl_langinfo(locale.CODESET))"
cmd = cmd_fmt.format(locale_name)
result, py_cmd = run_python_until_end("-c", cmd, __isolated=True)
result, py_cmd = run_python_until_end("-c", cmd, PYTHONCOERCECLOCALE='')
return result.rc == 0


Expand Down Expand Up @@ -131,7 +131,6 @@ def get_child_details(cls, env_vars):
"""
result, py_cmd = run_python_until_end(
"-X", "utf8=0", "-c", cls.CHILD_PROCESS_SCRIPT,
__isolated=True,
**env_vars
)
if not result.rc == 0:
Expand Down Expand Up @@ -236,6 +235,7 @@ def test_external_target_locale_configuration(self):
"LANG": "",
"LC_CTYPE": "",
"LC_ALL": "",
"PYTHONCOERCECLOCALE": "",
}
for env_var in ("LANG", "LC_CTYPE"):
for locale_to_set in AVAILABLE_TARGETS:
Expand Down Expand Up @@ -294,6 +294,7 @@ def _check_c_locale_coercion(self,
"LANG": "",
"LC_CTYPE": "",
"LC_ALL": "",
"PYTHONCOERCECLOCALE": "",
}
base_var_dict.update(extra_vars)
for env_var in ("LANG", "LC_CTYPE"):
Expand Down
2 changes: 1 addition & 1 deletion Lib/test/test_cmd_line.py
Original file line number Diff line number Diff line change
Expand Up @@ -551,7 +551,7 @@ def test_xdev(self):
self.assertEqual(out, "True")

# Warnings
code = ("import sys, warnings; "
code = ("import warnings; "
"print(' '.join('%s::%s' % (f[0], f[2].__name__) "
"for f in warnings.filters))")
if Py_DEBUG:
Expand Down
30 changes: 26 additions & 4 deletions Lib/test/test_utf8_mode.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,19 @@
import sys
import textwrap
import unittest
from test import support
from test.support.script_helper import assert_python_ok, assert_python_failure


MS_WINDOWS = (sys.platform == 'win32')


class UTF8ModeTests(unittest.TestCase):
# Override PYTHONUTF8 and PYTHONLEGACYWINDOWSFSENCODING environment
# variables by default
DEFAULT_ENV = {'PYTHONUTF8': '', 'PYTHONLEGACYWINDOWSFSENCODING': ''}
DEFAULT_ENV = {
'PYTHONUTF8': '',
'PYTHONLEGACYWINDOWSFSENCODING': '',
'PYTHONCOERCECLOCALE': '0',
}

def posix_locale(self):
loc = locale.setlocale(locale.LC_CTYPE, None)
Expand Down Expand Up @@ -53,7 +56,7 @@ def test_xoption(self):
self.assertEqual(out, '0')

if MS_WINDOWS:
# PYTHONLEGACYWINDOWSFSENCODING disables the UTF-8
# PYTHONLEGACYWINDOWSFSENCODING disables the UTF-8 Mode
# and has the priority over -X utf8
out = self.get_output('-X', 'utf8', '-c', code,
PYTHONLEGACYWINDOWSFSENCODING='1')
Expand Down Expand Up @@ -201,6 +204,25 @@ def test_locale_getpreferredencoding(self):
out = self.get_output('-X', 'utf8', '-c', code, LC_ALL='C')
self.assertEqual(out, 'UTF-8 UTF-8')

@unittest.skipIf(MS_WINDOWS, 'test specific to Unix')
def test_cmd_line(self):
arg = 'h\xe9\u20ac'.encode('utf-8')
arg_utf8 = arg.decode('utf-8')
arg_ascii = arg.decode('ascii', 'surrogateescape')
code = 'import locale, sys; print("%s:%s" % (locale.getpreferredencoding(), ascii(sys.argv[1:])))'

def check(utf8_opt, expected, **kw):
out = self.get_output('-X', utf8_opt, '-c', code, arg, **kw)
args = out.partition(':')[2].rstrip()
self.assertEqual(args, ascii(expected), out)

check('utf8', [arg_utf8])
if sys.platform == 'darwin' or support.is_android:
c_arg = arg_utf8
else:
c_arg = arg_ascii
check('utf8=0', [c_arg], LC_ALL='C')


if __name__ == "__main__":
unittest.main()
2 changes: 1 addition & 1 deletion Modules/getpath.c
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ extern "C" {

#define DECODE_LOCALE_ERR(NAME, LEN) \
((LEN) == (size_t)-2) \
? _Py_INIT_USER_ERR("cannot decode " #NAME) \
? _Py_INIT_USER_ERR("cannot decode " NAME) \
: _Py_INIT_NO_MEMORY()

typedef struct {
Expand Down
Loading