Skip to content

Commit

Permalink
bpo-31900: Fix localeconv() encoding for LC_NUMERIC (#4174)
Browse files Browse the repository at this point in the history
* Add _Py_GetLocaleconvNumeric() function: decode decimal_point and
  thousands_sep fields of localeconv() from the LC_NUMERIC encoding,
  rather than decoding from the LC_CTYPE encoding.
* Modify locale.localeconv() and "n" formatter of str.format() (for
  int, float and complex to use _Py_GetLocaleconvNumeric()
  internally.
  • Loading branch information
vstinner authored Jan 15, 2018
1 parent 7ed7aea commit cb064fc
Show file tree
Hide file tree
Showing 8 changed files with 151 additions and 19 deletions.
10 changes: 10 additions & 0 deletions Doc/library/locale.rst
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,16 @@ The :mod:`locale` module defines the following exception and functions:
| ``CHAR_MAX`` | Nothing is specified in this locale. |
+--------------+-----------------------------------------+

The function sets temporarily the ``LC_CTYPE`` locale to the ``LC_NUMERIC``
locale to decode ``decimal_point`` and ``thousands_sep`` byte strings if
they are non-ASCII or longer than 1 byte, and the ``LC_NUMERIC`` locale is
different than the ``LC_CTYPE`` locale. This temporary change affects other
threads.

.. versionchanged:: 3.7
The function now sets temporarily the ``LC_CTYPE`` locale to the
``LC_NUMERIC`` locale in some cases.


.. function:: nl_langinfo(option)

Expand Down
14 changes: 14 additions & 0 deletions Doc/library/stdtypes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1599,6 +1599,20 @@ expression support in the :mod:`re` module).
See :ref:`formatstrings` for a description of the various formatting options
that can be specified in format strings.

.. note::
When formatting a number (:class:`int`, :class:`float`, :class:`float`
and subclasses) with the ``n`` type (ex: ``'{:n}'.format(1234)``), the
function sets temporarily the ``LC_CTYPE`` locale to the ``LC_NUMERIC``
locale to decode ``decimal_point`` and ``thousands_sep`` fields of
:c:func:`localeconv` if they are non-ASCII or longer than 1 byte, and the
``LC_NUMERIC`` locale is different than the ``LC_CTYPE`` locale. This
temporary change affects other threads.

.. versionchanged:: 3.7
When formatting a number with the ``n`` type, the function sets
temporarily the ``LC_CTYPE`` locale to the ``LC_NUMERIC`` locale in some
cases.


.. method:: str.format_map(mapping)

Expand Down
3 changes: 3 additions & 0 deletions Doc/whatsnew/3.7.rst
Original file line number Diff line number Diff line change
Expand Up @@ -866,6 +866,9 @@ Changes in Python behavior
Changes in the Python API
-------------------------

* The :func:`locale.localeconv` function now sets temporarily the ``LC_CTYPE``
locale to the ``LC_NUMERIC`` locale in some cases.

* The ``asyncio.windows_utils.socketpair()`` function has been
removed: use directly :func:`socket.socketpair` which is available on all
platforms since Python 3.5 (before, it wasn't available on Windows).
Expand Down
5 changes: 5 additions & 0 deletions Include/fileutils.h
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,11 @@ PyAPI_FUNC(int) _Py_get_blocking(int fd);
PyAPI_FUNC(int) _Py_set_blocking(int fd, int blocking);
#endif /* !MS_WINDOWS */

PyAPI_FUNC(int) _Py_GetLocaleconvNumeric(
PyObject **decimal_point,
PyObject **thousands_sep,
const char **grouping);

#endif /* Py_LIMITED_API */

#ifdef __cplusplus
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
The :func:`locale.localeconv` function now sets temporarily the ``LC_CTYPE``
locale to the ``LC_NUMERIC`` locale to decode ``decimal_point`` and
``thousands_sep`` byte strings if they are non-ASCII or longer than 1 byte, and
the ``LC_NUMERIC`` locale is different than the ``LC_CTYPE`` locale. This
temporary change affects other threads.

Same change for the :meth:`str.format` method when formatting a number
(:class:`int`, :class:`float`, :class:`float` and subclasses) with the ``n``
type (ex: ``'{:n}'.format(1234)``).
37 changes: 29 additions & 8 deletions Modules/_localemodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -139,8 +139,9 @@ PyLocale_localeconv(PyObject* self)
PyObject *x;

result = PyDict_New();
if (!result)
if (!result) {
return NULL;
}

/* if LC_NUMERIC is different in the C library, use saved value */
l = localeconv();
Expand Down Expand Up @@ -171,12 +172,6 @@ PyLocale_localeconv(PyObject* self)
RESULT(#i, x); \
} while (0)

/* Numeric information */
RESULT_STRING(decimal_point);
RESULT_STRING(thousands_sep);
x = copy_grouping(l->grouping);
RESULT("grouping", x);

/* Monetary information */
RESULT_STRING(int_curr_symbol);
RESULT_STRING(currency_symbol);
Expand All @@ -195,10 +190,36 @@ PyLocale_localeconv(PyObject* self)
RESULT_INT(n_sep_by_space);
RESULT_INT(p_sign_posn);
RESULT_INT(n_sign_posn);

/* Numeric information */
PyObject *decimal_point, *thousands_sep;
const char *grouping;
if (_Py_GetLocaleconvNumeric(&decimal_point,
&thousands_sep,
&grouping) < 0) {
goto failed;
}

if (PyDict_SetItemString(result, "decimal_point", decimal_point) < 0) {
Py_DECREF(decimal_point);
Py_DECREF(thousands_sep);
goto failed;
}
Py_DECREF(decimal_point);

if (PyDict_SetItemString(result, "thousands_sep", thousands_sep) < 0) {
Py_DECREF(thousands_sep);
goto failed;
}
Py_DECREF(thousands_sep);

x = copy_grouping(grouping);
RESULT("grouping", x);

return result;

failed:
Py_XDECREF(result);
Py_DECREF(result);
return NULL;
}

Expand Down
77 changes: 77 additions & 0 deletions Python/fileutils.c
Original file line number Diff line number Diff line change
Expand Up @@ -1746,3 +1746,80 @@ _Py_set_blocking(int fd, int blocking)
return -1;
}
#endif


int
_Py_GetLocaleconvNumeric(PyObject **decimal_point, PyObject **thousands_sep,
const char **grouping)
{
int res = -1;

struct lconv *lc = localeconv();

int change_locale = 0;
if (decimal_point != NULL &&
(strlen(lc->decimal_point) > 1 || ((unsigned char)lc->decimal_point[0]) > 127))
{
change_locale = 1;
}
if (thousands_sep != NULL &&
(strlen(lc->thousands_sep) > 1 || ((unsigned char)lc->thousands_sep[0]) > 127))
{
change_locale = 1;
}

/* Keep a copy of the LC_CTYPE locale */
char *oldloc = NULL, *loc = NULL;
if (change_locale) {
oldloc = setlocale(LC_CTYPE, NULL);
if (!oldloc) {
PyErr_SetString(PyExc_RuntimeWarning, "faild to get LC_CTYPE locale");
return -1;
}

oldloc = _PyMem_Strdup(oldloc);
if (!oldloc) {
PyErr_NoMemory();
return -1;
}

loc = setlocale(LC_NUMERIC, NULL);
if (loc != NULL && strcmp(loc, oldloc) == 0) {
loc = NULL;
}

if (loc != NULL) {
/* Only set the locale temporarilty the LC_CTYPE locale
if LC_NUMERIC locale is different than LC_CTYPE locale and
decimal_point and/or thousands_sep are non-ASCII or longer than
1 byte */
setlocale(LC_CTYPE, loc);
}
}

if (decimal_point != NULL) {
*decimal_point = PyUnicode_DecodeLocale(lc->decimal_point, NULL);
if (*decimal_point == NULL) {
goto error;
}
}
if (thousands_sep != NULL) {
*thousands_sep = PyUnicode_DecodeLocale(lc->thousands_sep, NULL);
if (*thousands_sep == NULL) {
goto error;
}
}

if (grouping != NULL) {
*grouping = lc->grouping;
}

res = 0;

error:
if (loc != NULL) {
setlocale(LC_CTYPE, oldloc);
}
PyMem_Free(oldloc);
return res;
}
15 changes: 4 additions & 11 deletions Python/formatter_unicode.c
Original file line number Diff line number Diff line change
Expand Up @@ -704,18 +704,11 @@ get_locale_info(enum LocaleType type, LocaleInfo *locale_info)
{
switch (type) {
case LT_CURRENT_LOCALE: {
struct lconv *locale_data = localeconv();
locale_info->decimal_point = PyUnicode_DecodeLocale(
locale_data->decimal_point,
NULL);
if (locale_info->decimal_point == NULL)
if (_Py_GetLocaleconvNumeric(&locale_info->decimal_point,
&locale_info->thousands_sep,
&locale_info->grouping) < 0) {
return -1;
locale_info->thousands_sep = PyUnicode_DecodeLocale(
locale_data->thousands_sep,
NULL);
if (locale_info->thousands_sep == NULL)
return -1;
locale_info->grouping = locale_data->grouping;
}
break;
}
case LT_DEFAULT_LOCALE:
Expand Down

0 comments on commit cb064fc

Please sign in to comment.