From 93b9e6bd7d48150d8a5d16cea39246a980e073cb Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 8 Oct 2024 11:27:49 +0300 Subject: [PATCH] gh-69998: Fix decoding error in locale.nl_langinfo() (GH-124963) The function now sets temporarily the LC_CTYPE locale to the locale of the category that determines the requested value if the locales are different and the resulting string is non-ASCII. This temporary change affects other threads. --- Doc/library/locale.rst | 9 + Doc/whatsnew/3.14.rst | 6 + Lib/test/test__locale.py | 12 +- ...4-10-04-12-43-03.gh-issue-69998.DVqOXX.rst | 3 + Modules/_localemodule.c | 193 ++++++++++++------ 5 files changed, 153 insertions(+), 70 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-04-12-43-03.gh-issue-69998.DVqOXX.rst diff --git a/Doc/library/locale.rst b/Doc/library/locale.rst index 0246f99157024a..04035b33d0ed48 100644 --- a/Doc/library/locale.rst +++ b/Doc/library/locale.rst @@ -314,6 +314,15 @@ The :mod:`locale` module defines the following exception and functions: Get a representation of up to 100 values used to represent the values 0 to 99. + The function temporarily sets the ``LC_CTYPE`` locale to the locale + of the category that determines the requested value (``LC_TIME``, + ``LC_NUMERIC``, ``LC_MONETARY`` or ``LC_MESSAGES``) if locales are + different and the resulting string is non-ASCII. + This temporary change affects other threads. + + .. versionchanged:: 3.14 + The function now temporarily sets the ``LC_CTYPE`` locale in some cases. + .. function:: getdefaultlocale([envvars]) diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index f1f78ed843f313..4d71a24e9cc9ca 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -587,6 +587,12 @@ Changes in the Python API Wrap it in :func:`staticmethod` if you want to preserve the old behavior. (Contributed by Serhiy Storchaka and Dominykas Grigonis in :gh:`121027`.) +* The :func:`locale.nl_langinfo` function now sets temporarily the ``LC_CTYPE`` + locale in some cases. + This temporary change affects other threads. + (Contributed by Serhiy Storchaka in :gh:`69998`.) + + Build Changes ============= diff --git a/Lib/test/test__locale.py b/Lib/test/test__locale.py index 0947464bb8c04e..ba2d31f9c1ee9d 100644 --- a/Lib/test/test__locale.py +++ b/Lib/test/test__locale.py @@ -115,16 +115,17 @@ def numeric_tester(self, calc_type, calc_value, data_type, used_locale): def test_lc_numeric_nl_langinfo(self): # Test nl_langinfo against known values tested = False + oldloc = setlocale(LC_CTYPE) for loc in candidate_locales: try: setlocale(LC_NUMERIC, loc) - setlocale(LC_CTYPE, loc) except Error: continue for li, lc in ((RADIXCHAR, "decimal_point"), (THOUSEP, "thousands_sep")): if self.numeric_tester('nl_langinfo', nl_langinfo(li), lc, loc): tested = True + self.assertEqual(setlocale(LC_CTYPE), oldloc) if not tested: self.skipTest('no suitable locales') @@ -135,10 +136,10 @@ def test_lc_numeric_nl_langinfo(self): def test_lc_numeric_localeconv(self): # Test localeconv against known values tested = False + oldloc = setlocale(LC_CTYPE) for loc in candidate_locales: try: setlocale(LC_NUMERIC, loc) - setlocale(LC_CTYPE, loc) except Error: continue formatting = localeconv() @@ -146,6 +147,7 @@ def test_lc_numeric_localeconv(self): "thousands_sep"): if self.numeric_tester('localeconv', formatting[lc], lc, loc): tested = True + self.assertEqual(setlocale(LC_CTYPE), oldloc) if not tested: self.skipTest('no suitable locales') @@ -153,10 +155,10 @@ def test_lc_numeric_localeconv(self): def test_lc_numeric_basic(self): # Test nl_langinfo against localeconv tested = False + oldloc = setlocale(LC_CTYPE) for loc in candidate_locales: try: setlocale(LC_NUMERIC, loc) - setlocale(LC_CTYPE, loc) except Error: continue for li, lc in ((RADIXCHAR, "decimal_point"), @@ -173,6 +175,7 @@ def test_lc_numeric_basic(self): nl_radixchar, li_radixchar, loc, set_locale)) tested = True + self.assertEqual(setlocale(LC_CTYPE), oldloc) if not tested: self.skipTest('no suitable locales') @@ -180,10 +183,10 @@ def test_float_parsing(self): # Bug #1391872: Test whether float parsing is okay on European # locales. tested = False + oldloc = setlocale(LC_CTYPE) for loc in candidate_locales: try: setlocale(LC_NUMERIC, loc) - setlocale(LC_CTYPE, loc) except Error: continue @@ -199,6 +202,7 @@ def test_float_parsing(self): self.assertRaises(ValueError, float, localeconv()['decimal_point'].join(['1', '23'])) tested = True + self.assertEqual(setlocale(LC_CTYPE), oldloc) if not tested: self.skipTest('no suitable locales') diff --git a/Misc/NEWS.d/next/Library/2024-10-04-12-43-03.gh-issue-69998.DVqOXX.rst b/Misc/NEWS.d/next/Library/2024-10-04-12-43-03.gh-issue-69998.DVqOXX.rst new file mode 100644 index 00000000000000..65388e0b4e7ee4 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-04-12-43-03.gh-issue-69998.DVqOXX.rst @@ -0,0 +1,3 @@ +Fix :func:`locale.nl_langinfo` in case when different categories have +different locales. The function now sets temporarily the ``LC_CTYPE`` locale +in some cases. This temporary change affects other threads. diff --git a/Modules/_localemodule.c b/Modules/_localemodule.c index 9452df492bb23b..ce77c4072a6aea 100644 --- a/Modules/_localemodule.c +++ b/Modules/_localemodule.c @@ -144,6 +144,17 @@ locale_is_ascii(const char *str) return (strlen(str) == 1 && ((unsigned char)str[0]) <= 127); } +static int +is_all_ascii(const char *str) +{ + for (; *str; str++) { + if ((unsigned char)*str > 127) { + return 0; + } + } + return 1; +} + static int locale_decode_monetary(PyObject *dict, struct lconv *lc) { @@ -478,113 +489,153 @@ _locale__getdefaultlocale_impl(PyObject *module) #endif #ifdef HAVE_LANGINFO_H -#define LANGINFO(X) {#X, X} +#define LANGINFO(X, Y) {#X, X, Y} static struct langinfo_constant{ - char* name; + const char *name; int value; + int category; } langinfo_constants[] = { /* These constants should exist on any langinfo implementation */ - LANGINFO(DAY_1), - LANGINFO(DAY_2), - LANGINFO(DAY_3), - LANGINFO(DAY_4), - LANGINFO(DAY_5), - LANGINFO(DAY_6), - LANGINFO(DAY_7), - - LANGINFO(ABDAY_1), - LANGINFO(ABDAY_2), - LANGINFO(ABDAY_3), - LANGINFO(ABDAY_4), - LANGINFO(ABDAY_5), - LANGINFO(ABDAY_6), - LANGINFO(ABDAY_7), - - LANGINFO(MON_1), - LANGINFO(MON_2), - LANGINFO(MON_3), - LANGINFO(MON_4), - LANGINFO(MON_5), - LANGINFO(MON_6), - LANGINFO(MON_7), - LANGINFO(MON_8), - LANGINFO(MON_9), - LANGINFO(MON_10), - LANGINFO(MON_11), - LANGINFO(MON_12), - - LANGINFO(ABMON_1), - LANGINFO(ABMON_2), - LANGINFO(ABMON_3), - LANGINFO(ABMON_4), - LANGINFO(ABMON_5), - LANGINFO(ABMON_6), - LANGINFO(ABMON_7), - LANGINFO(ABMON_8), - LANGINFO(ABMON_9), - LANGINFO(ABMON_10), - LANGINFO(ABMON_11), - LANGINFO(ABMON_12), + LANGINFO(DAY_1, LC_TIME), + LANGINFO(DAY_2, LC_TIME), + LANGINFO(DAY_3, LC_TIME), + LANGINFO(DAY_4, LC_TIME), + LANGINFO(DAY_5, LC_TIME), + LANGINFO(DAY_6, LC_TIME), + LANGINFO(DAY_7, LC_TIME), + + LANGINFO(ABDAY_1, LC_TIME), + LANGINFO(ABDAY_2, LC_TIME), + LANGINFO(ABDAY_3, LC_TIME), + LANGINFO(ABDAY_4, LC_TIME), + LANGINFO(ABDAY_5, LC_TIME), + LANGINFO(ABDAY_6, LC_TIME), + LANGINFO(ABDAY_7, LC_TIME), + + LANGINFO(MON_1, LC_TIME), + LANGINFO(MON_2, LC_TIME), + LANGINFO(MON_3, LC_TIME), + LANGINFO(MON_4, LC_TIME), + LANGINFO(MON_5, LC_TIME), + LANGINFO(MON_6, LC_TIME), + LANGINFO(MON_7, LC_TIME), + LANGINFO(MON_8, LC_TIME), + LANGINFO(MON_9, LC_TIME), + LANGINFO(MON_10, LC_TIME), + LANGINFO(MON_11, LC_TIME), + LANGINFO(MON_12, LC_TIME), + + LANGINFO(ABMON_1, LC_TIME), + LANGINFO(ABMON_2, LC_TIME), + LANGINFO(ABMON_3, LC_TIME), + LANGINFO(ABMON_4, LC_TIME), + LANGINFO(ABMON_5, LC_TIME), + LANGINFO(ABMON_6, LC_TIME), + LANGINFO(ABMON_7, LC_TIME), + LANGINFO(ABMON_8, LC_TIME), + LANGINFO(ABMON_9, LC_TIME), + LANGINFO(ABMON_10, LC_TIME), + LANGINFO(ABMON_11, LC_TIME), + LANGINFO(ABMON_12, LC_TIME), #ifdef RADIXCHAR /* The following are not available with glibc 2.0 */ - LANGINFO(RADIXCHAR), - LANGINFO(THOUSEP), + LANGINFO(RADIXCHAR, LC_NUMERIC), + LANGINFO(THOUSEP, LC_NUMERIC), /* YESSTR and NOSTR are deprecated in glibc, since they are a special case of message translation, which should be rather done using gettext. So we don't expose it to Python in the first place. - LANGINFO(YESSTR), - LANGINFO(NOSTR), + LANGINFO(YESSTR, LC_MESSAGES), + LANGINFO(NOSTR, LC_MESSAGES), */ - LANGINFO(CRNCYSTR), + LANGINFO(CRNCYSTR, LC_MONETARY), #endif - LANGINFO(D_T_FMT), - LANGINFO(D_FMT), - LANGINFO(T_FMT), - LANGINFO(AM_STR), - LANGINFO(PM_STR), + LANGINFO(D_T_FMT, LC_TIME), + LANGINFO(D_FMT, LC_TIME), + LANGINFO(T_FMT, LC_TIME), + LANGINFO(AM_STR, LC_TIME), + LANGINFO(PM_STR, LC_TIME), /* The following constants are available only with XPG4, but... OpenBSD doesn't have CODESET but has T_FMT_AMPM, and doesn't have a few of the others. Solution: ifdef-test them all. */ #ifdef CODESET - LANGINFO(CODESET), + LANGINFO(CODESET, LC_CTYPE), #endif #ifdef T_FMT_AMPM - LANGINFO(T_FMT_AMPM), + LANGINFO(T_FMT_AMPM, LC_TIME), #endif #ifdef ERA - LANGINFO(ERA), + LANGINFO(ERA, LC_TIME), #endif #ifdef ERA_D_FMT - LANGINFO(ERA_D_FMT), + LANGINFO(ERA_D_FMT, LC_TIME), #endif #ifdef ERA_D_T_FMT - LANGINFO(ERA_D_T_FMT), + LANGINFO(ERA_D_T_FMT, LC_TIME), #endif #ifdef ERA_T_FMT - LANGINFO(ERA_T_FMT), + LANGINFO(ERA_T_FMT, LC_TIME), #endif #ifdef ALT_DIGITS - LANGINFO(ALT_DIGITS), + LANGINFO(ALT_DIGITS, LC_TIME), #endif #ifdef YESEXPR - LANGINFO(YESEXPR), + LANGINFO(YESEXPR, LC_MESSAGES), #endif #ifdef NOEXPR - LANGINFO(NOEXPR), + LANGINFO(NOEXPR, LC_MESSAGES), #endif #ifdef _DATE_FMT /* This is not available in all glibc versions that have CODESET. */ - LANGINFO(_DATE_FMT), + LANGINFO(_DATE_FMT, LC_TIME), #endif - {0, 0} + {0, 0, 0} }; +/* Temporary make the LC_CTYPE locale to be the same as + * the locale of the specified category. */ +static int +change_locale(int category, char **oldloc) +{ + /* Keep a copy of the LC_CTYPE locale */ + *oldloc = setlocale(LC_CTYPE, NULL); + if (!*oldloc) { + PyErr_SetString(PyExc_RuntimeError, "faild to get LC_CTYPE locale"); + return -1; + } + *oldloc = _PyMem_Strdup(*oldloc); + if (!*oldloc) { + PyErr_NoMemory(); + return -1; + } + + /* Set a new locale if it is different. */ + char *loc = setlocale(category, NULL); + if (loc == NULL || strcmp(loc, *oldloc) == 0) { + PyMem_Free(*oldloc); + *oldloc = NULL; + return 0; + } + + setlocale(LC_CTYPE, loc); + return 1; +} + +/* Restore the old LC_CTYPE locale. */ +static void +restore_locale(char *oldloc) +{ + if (oldloc != NULL) { + setlocale(LC_CTYPE, oldloc); + PyMem_Free(oldloc); + } +} + /*[clinic input] _locale.nl_langinfo @@ -602,14 +653,24 @@ _locale_nl_langinfo_impl(PyObject *module, int item) /* Check whether this is a supported constant. GNU libc sometimes returns numeric values in the char* return value, which would crash PyUnicode_FromString. */ - for (i = 0; langinfo_constants[i].name; i++) + for (i = 0; langinfo_constants[i].name; i++) { if (langinfo_constants[i].value == item) { /* Check NULL as a workaround for GNU libc's returning NULL instead of an empty string for nl_langinfo(ERA). */ const char *result = nl_langinfo(item); result = result != NULL ? result : ""; - return PyUnicode_DecodeLocale(result, NULL); + char *oldloc = NULL; + if (langinfo_constants[i].category != LC_CTYPE + && !is_all_ascii(result) + && change_locale(langinfo_constants[i].category, &oldloc) < 0) + { + return NULL; + } + PyObject *unicode = PyUnicode_DecodeLocale(result, NULL); + restore_locale(oldloc); + return unicode; } + } PyErr_SetString(PyExc_ValueError, "unsupported langinfo constant"); return NULL; }