Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bpo-29240: readline now ignores the UTF-8 Mode #5145

Merged
merged 1 commit into from
Jan 10, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions Include/fileutils.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,14 @@ PyAPI_FUNC(wchar_t*) _Py_DecodeUTF8_surrogateescape(
const char *s,
Py_ssize_t size,
size_t *p_wlen);

PyAPI_FUNC(wchar_t *) _Py_DecodeCurrentLocale(
const char *arg,
size_t *size);

PyAPI_FUNC(char*) _Py_EncodeCurrentLocale(
const wchar_t *text,
size_t *error_pos);
#endif

#ifndef Py_LIMITED_API
Expand Down
10 changes: 10 additions & 0 deletions Include/unicodeobject.h
Original file line number Diff line number Diff line change
Expand Up @@ -1810,6 +1810,16 @@ PyAPI_FUNC(PyObject*) PyUnicode_EncodeLocale(
PyObject *unicode,
const char *errors
);

PyAPI_FUNC(PyObject*) _PyUnicode_DecodeCurrentLocaleAndSize(
const char *str,
Py_ssize_t len,
const char *errors);

PyAPI_FUNC(PyObject*) _PyUnicode_EncodeCurrentLocale(
PyObject *unicode,
const char *errors
);
#endif

/* --- File system encoding ---------------------------------------------- */
Expand Down
2 changes: 0 additions & 2 deletions Lib/test/test_readline.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,8 +152,6 @@ def test_auto_history_disabled(self):
output = run_pty(self.auto_history_script.format(False))
self.assertIn(b"History length: 0\r\n", output)

@unittest.skipIf(True,
"FIXME: test broken by bpo-29240")
def test_nonascii(self):
try:
readline.add_history("\xEB\xEF")
Expand Down
5 changes: 3 additions & 2 deletions Modules/readline.c
Original file line number Diff line number Diff line change
Expand Up @@ -132,13 +132,14 @@ static PyModuleDef readlinemodule;
static PyObject *
encode(PyObject *b)
{
return PyUnicode_EncodeLocale(b, "surrogateescape");
return _PyUnicode_EncodeCurrentLocale(b, "surrogateescape");
}

static PyObject *
decode(const char *s)
{
return PyUnicode_DecodeLocale(s, "surrogateescape");
return _PyUnicode_DecodeCurrentLocaleAndSize(s, strlen(s),
"surrogateescape");
}


Expand Down
62 changes: 52 additions & 10 deletions Objects/unicodeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -3395,8 +3395,8 @@ locale_error_handler(const char *errors, int *surrogateescape)
}
}

PyObject *
PyUnicode_EncodeLocale(PyObject *unicode, const char *errors)
static PyObject *
unicode_encode_locale(PyObject *unicode, const char *errors, int current_locale)
{
Py_ssize_t wlen, wlen2;
wchar_t *wstr;
Expand All @@ -3423,7 +3423,12 @@ PyUnicode_EncodeLocale(PyObject *unicode, const char *errors)
/* "surrogateescape" error handler */
char *str;

str = Py_EncodeLocale(wstr, &error_pos);
if (current_locale) {
str = _Py_EncodeCurrentLocale(wstr, &error_pos);
}
else {
str = Py_EncodeLocale(wstr, &error_pos);
}
if (str == NULL) {
if (error_pos == (size_t)-1) {
PyErr_NoMemory();
Expand All @@ -3437,7 +3442,12 @@ PyUnicode_EncodeLocale(PyObject *unicode, const char *errors)
PyMem_Free(wstr);

bytes = PyBytes_FromString(str);
PyMem_Free(str);
if (current_locale) {
PyMem_RawFree(str);
}
else {
PyMem_Free(str);
}
}
else {
/* strict mode */
Expand Down Expand Up @@ -3502,6 +3512,18 @@ PyUnicode_EncodeLocale(PyObject *unicode, const char *errors)
return NULL;
}

PyObject *
PyUnicode_EncodeLocale(PyObject *unicode, const char *errors)
{
return unicode_encode_locale(unicode, errors, 0);
}

PyObject *
_PyUnicode_EncodeCurrentLocale(PyObject *unicode, const char *errors)
{
return unicode_encode_locale(unicode, errors, 1);
}

PyObject *
PyUnicode_EncodeFSDefault(PyObject *unicode)
{
Expand All @@ -3524,7 +3546,8 @@ PyUnicode_EncodeFSDefault(PyObject *unicode)
Py_FileSystemDefaultEncodeErrors);
}
else {
return PyUnicode_EncodeLocale(unicode, Py_FileSystemDefaultEncodeErrors);
return unicode_encode_locale(unicode,
Py_FileSystemDefaultEncodeErrors, 0);
}
#endif
}
Expand Down Expand Up @@ -3695,9 +3718,9 @@ mbstowcs_errorpos(const char *str, size_t len)
return 0;
}

PyObject*
PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len,
const char *errors)
static PyObject*
unicode_decode_locale(const char *str, Py_ssize_t len, const char *errors,
int current_locale)
{
wchar_t smallbuf[256];
size_t smallbuf_len = Py_ARRAY_LENGTH(smallbuf);
Expand All @@ -3719,7 +3742,12 @@ PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len,

if (surrogateescape) {
/* "surrogateescape" error handler */
wstr = Py_DecodeLocale(str, &wlen);
if (current_locale) {
wstr = _Py_DecodeCurrentLocale(str, &wlen);
}
else {
wstr = Py_DecodeLocale(str, &wlen);
}
if (wstr == NULL) {
if (wlen == (size_t)-1)
PyErr_NoMemory();
Expand Down Expand Up @@ -3794,11 +3822,25 @@ PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len,
return NULL;
}

PyObject*
PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len,
const char *errors)
{
return unicode_decode_locale(str, len, errors, 0);
}

PyObject*
_PyUnicode_DecodeCurrentLocaleAndSize(const char *str, Py_ssize_t len,
const char *errors)
{
return unicode_decode_locale(str, len, errors, 1);
}

PyObject*
PyUnicode_DecodeLocale(const char *str, const char *errors)
{
Py_ssize_t size = (Py_ssize_t)strlen(str);
return PyUnicode_DecodeLocaleAndSize(str, size, errors);
return unicode_decode_locale(str, size, errors, 0);
}


Expand Down
80 changes: 52 additions & 28 deletions Python/fileutils.c
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ decode_ascii_surrogateescape(const char *arg, size_t *size)

#if !defined(__APPLE__) && !defined(__ANDROID__)
static wchar_t*
decode_locale(const char* arg, size_t *size)
decode_current_locale(const char* arg, size_t *size)
{
wchar_t *res;
size_t argsize;
Expand Down Expand Up @@ -380,32 +380,13 @@ decode_locale(const char* arg, size_t *size)
#endif


/* Decode a byte string from the locale encoding with the
surrogateescape error handler: undecodable bytes are decoded as characters
in range U+DC80..U+DCFF. If a byte sequence can be decoded as a surrogate
character, escape the bytes using the surrogateescape error handler instead
of decoding them.

Return a pointer to a newly allocated wide character string, use
PyMem_RawFree() to free the memory. If size is not NULL, write the number of
wide characters excluding the null character into *size

Return NULL on decoding error or memory allocation error. If *size* is not
NULL, *size is set to (size_t)-1 on memory error or set to (size_t)-2 on
decoding error.

Decoding errors should never happen, unless there is a bug in the C
library.

Use the Py_EncodeLocale() function to encode the character string back to a
byte string. */
wchar_t*
Py_DecodeLocale(const char* arg, size_t *size)
static wchar_t*
decode_locale(const char* arg, size_t *size, int ignore_utf8_mode)
{
#if defined(__APPLE__) || defined(__ANDROID__)
return _Py_DecodeUTF8_surrogateescape(arg, strlen(arg), size);
#else
if (Py_UTF8Mode == 1) {
if (!ignore_utf8_mode && Py_UTF8Mode == 1) {
return _Py_DecodeUTF8_surrogateescape(arg, strlen(arg), size);
}

Expand All @@ -426,11 +407,45 @@ Py_DecodeLocale(const char* arg, size_t *size)
}
#endif

return decode_locale(arg, size);
return decode_current_locale(arg, size);
#endif /* __APPLE__ or __ANDROID__ */
}


/* Decode a byte string from the locale encoding with the
surrogateescape error handler: undecodable bytes are decoded as characters
in range U+DC80..U+DCFF. If a byte sequence can be decoded as a surrogate
character, escape the bytes using the surrogateescape error handler instead
of decoding them.

Return a pointer to a newly allocated wide character string, use
PyMem_RawFree() to free the memory. If size is not NULL, write the number of
wide characters excluding the null character into *size

Return NULL on decoding error or memory allocation error. If *size* is not
NULL, *size is set to (size_t)-1 on memory error or set to (size_t)-2 on
decoding error.

Decoding errors should never happen, unless there is a bug in the C
library.

Use the Py_EncodeLocale() function to encode the character string back to a
byte string. */
wchar_t*
Py_DecodeLocale(const char* arg, size_t *size)
{
return decode_locale(arg, size, 0);
}


/* Similar to Py_DecodeLocale() but ignore the UTF-8 mode */
wchar_t*
_Py_DecodeCurrentLocale(const char* arg, size_t *size)
{
return decode_locale(arg, size, 1);
}


#if !defined(__APPLE__) && !defined(__ANDROID__)
static char*
encode_current_locale(const wchar_t *text, size_t *error_pos, int raw_malloc)
Expand Down Expand Up @@ -508,12 +523,13 @@ encode_current_locale(const wchar_t *text, size_t *error_pos, int raw_malloc)
#endif

static char*
encode_locale(const wchar_t *text, size_t *error_pos, int raw_malloc)
encode_locale(const wchar_t *text, size_t *error_pos,
int raw_malloc, int ignore_utf8_mode)
{
#if defined(__APPLE__) || defined(__ANDROID__)
return _Py_EncodeUTF8_surrogateescape(text, error_pos, raw_malloc);
#else /* __APPLE__ */
if (Py_UTF8Mode == 1) {
if (!ignore_utf8_mode && Py_UTF8Mode == 1) {
return _Py_EncodeUTF8_surrogateescape(text, error_pos, raw_malloc);
}

Expand Down Expand Up @@ -544,7 +560,7 @@ encode_locale(const wchar_t *text, size_t *error_pos, int raw_malloc)
char*
Py_EncodeLocale(const wchar_t *text, size_t *error_pos)
{
return encode_locale(text, error_pos, 0);
return encode_locale(text, error_pos, 0, 0);
}


Expand All @@ -553,7 +569,15 @@ Py_EncodeLocale(const wchar_t *text, size_t *error_pos)
char*
_Py_EncodeLocaleRaw(const wchar_t *text, size_t *error_pos)
{
return encode_locale(text, error_pos, 1);
return encode_locale(text, error_pos, 1, 0);
}


/* Similar to _Py_EncodeLocaleRaw() but ignore the UTF-8 Mode */
char*
_Py_EncodeCurrentLocale(const wchar_t *text, size_t *error_pos)
{
return encode_locale(text, error_pos, 1, 1);
}


Expand Down