Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cut disused recode_encoding logic in _PyBytes_DecodeEscape. #16013

Merged
merged 2 commits into from
Sep 12, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions Include/bytesobject.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,7 @@ PyAPI_FUNC(PyObject *) PyBytes_DecodeEscape(const char *, Py_ssize_t,
#ifndef Py_LIMITED_API
/* Helper for PyBytes_DecodeEscape that detects invalid escape chars. */
PyAPI_FUNC(PyObject *) _PyBytes_DecodeEscape(const char *, Py_ssize_t,
const char *, Py_ssize_t,
const char *,
const char **);
const char *, const char **);
#endif

/* Macro, trading safety for speed */
Expand Down
2 changes: 1 addition & 1 deletion Include/longobject.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ PyAPI_FUNC(int) _PyLong_Size_t_Converter(PyObject *, void *);
#endif

/* Used by Python/mystrtoul.c, _PyBytes_FromHex(),
_PyBytes_DecodeEscapeRecode(), etc. */
_PyBytes_DecodeEscape(), etc. */
#ifndef Py_LIMITED_API
PyAPI_DATA(unsigned char) _PyLong_DigitValue[256];
#endif
Expand Down
63 changes: 5 additions & 58 deletions Objects/bytesobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -1077,52 +1077,10 @@ _PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
return NULL;
}

/* Unescape a backslash-escaped string. If unicode is non-zero,
the string is a u-literal. If recode_encoding is non-zero,
the string is UTF-8 encoded and should be re-encoded in the
specified encoding. */

static char *
_PyBytes_DecodeEscapeRecode(const char **s, const char *end,
const char *errors, const char *recode_encoding,
_PyBytesWriter *writer, char *p)
{
PyObject *u, *w;
const char* t;

t = *s;
/* Decode non-ASCII bytes as UTF-8. */
while (t < end && (*t & 0x80))
t++;
u = PyUnicode_DecodeUTF8(*s, t - *s, errors);
if (u == NULL)
return NULL;

/* Recode them in target encoding. */
w = PyUnicode_AsEncodedString(u, recode_encoding, errors);
Py_DECREF(u);
if (w == NULL)
return NULL;
assert(PyBytes_Check(w));

/* Append bytes to output buffer. */
writer->min_size--; /* subtract 1 preallocated byte */
p = _PyBytesWriter_WriteBytes(writer, p,
PyBytes_AS_STRING(w),
PyBytes_GET_SIZE(w));
Py_DECREF(w);
if (p == NULL)
return NULL;

*s = t;
return p;
}

/* Unescape a backslash-escaped string. */
PyObject *_PyBytes_DecodeEscape(const char *s,
Py_ssize_t len,
const char *errors,
Py_ssize_t unicode,
const char *recode_encoding,
const char **first_invalid_escape)
{
int c;
Expand All @@ -1142,17 +1100,7 @@ PyObject *_PyBytes_DecodeEscape(const char *s,
end = s + len;
while (s < end) {
if (*s != '\\') {
if (!(recode_encoding && (*s & 0x80))) {
*p++ = *s++;
}
else {
/* non-ASCII character and need to recode */
p = _PyBytes_DecodeEscapeRecode(&s, end,
errors, recode_encoding,
&writer, p);
if (p == NULL)
goto failed;
}
*p++ = *s++;
continue;
}

Expand Down Expand Up @@ -1241,12 +1189,11 @@ PyObject *_PyBytes_DecodeEscape(const char *s,
PyObject *PyBytes_DecodeEscape(const char *s,
Py_ssize_t len,
const char *errors,
Py_ssize_t unicode,
const char *recode_encoding)
Py_ssize_t Py_UNUSED(unicode),
const char *Py_UNUSED(recode_encoding))
{
const char* first_invalid_escape;
PyObject *result = _PyBytes_DecodeEscape(s, len, errors, unicode,
recode_encoding,
PyObject *result = _PyBytes_DecodeEscape(s, len, errors,
&first_invalid_escape);
if (result == NULL)
return NULL;
Expand Down
2 changes: 1 addition & 1 deletion Python/ast.c
Original file line number Diff line number Diff line change
Expand Up @@ -4765,7 +4765,7 @@ decode_bytes_with_escapes(struct compiling *c, const node *n, const char *s,
size_t len)
{
const char *first_invalid_escape;
PyObject *result = _PyBytes_DecodeEscape(s, len, NULL, 0, NULL,
PyObject *result = _PyBytes_DecodeEscape(s, len, NULL,
&first_invalid_escape);
if (result == NULL)
return NULL;
Expand Down