Skip to content

bpo-29104: Fixed parsing backslashes in f-strings. #490

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions Lib/test/test_fstring.py
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,20 @@ def test_backslashes_in_string_part(self):
self.assertEqual(f'2\x203', '2 3')
self.assertEqual(f'\x203', ' 3')

with self.assertWarns(DeprecationWarning): # invalid escape sequence
value = eval(r"f'\{6*7}'")
self.assertEqual(value, '\\42')
self.assertEqual(f'\\{6*7}', '\\42')
self.assertEqual(fr'\{6*7}', '\\42')

AMPERSAND = 'spam'
# Get the right unicode character (&), or pick up local variable
# depending on the number of backslashes.
self.assertEqual(f'\N{AMPERSAND}', '&')
self.assertEqual(f'\\N{AMPERSAND}', '\\Nspam')
self.assertEqual(fr'\N{AMPERSAND}', '\\Nspam')
self.assertEqual(f'\\\N{AMPERSAND}', '\\&')

def test_misformed_unicode_character_name(self):
# These test are needed because unicode names are parsed
# differently inside f-strings.
Expand Down
2 changes: 2 additions & 0 deletions Misc/NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ What's New in Python 3.7.0 alpha 1?
Core and Builtins
-----------------

- bpo-29104: Fixed parsing backslashes in f-strings.

- bpo-27945: Fixed various segfaults with dict when input collections are
mutated during searching, inserting or comparing. Based on patches by
Duane Griffin and Tim Mitchell.
Expand Down
53 changes: 32 additions & 21 deletions Python/ast.c
Original file line number Diff line number Diff line change
Expand Up @@ -4197,9 +4197,11 @@ decode_unicode_with_escapes(struct compiling *c, const node *n, const char *s,
while (s < end) {
if (*s == '\\') {
*p++ = *s++;
if (*s & 0x80) {
if (s >= end || *s & 0x80) {
strcpy(p, "u005c");
p += 5;
if (s >= end)
break;
}
}
if (*s & 0x80) { /* XXX inefficient */
Expand Down Expand Up @@ -4352,59 +4354,68 @@ fstring_find_literal(const char **str, const char *end, int raw,
brace (which isn't part of a unicode name escape such as
"\N{EULER CONSTANT}"), or the end of the string. */

const char *literal_start = *str;
const char *literal_end;
int in_named_escape = 0;
const char *s = *str;
const char *literal_start = s;
int result = 0;

assert(*literal == NULL);
for (; *str < end; (*str)++) {
char ch = **str;
if (!in_named_escape && ch == '{' && (*str)-literal_start >= 2 &&
*(*str-2) == '\\' && *(*str-1) == 'N') {
in_named_escape = 1;
} else if (in_named_escape && ch == '}') {
in_named_escape = 0;
} else if (ch == '{' || ch == '}') {
while (s < end) {
char ch = *s++;
if (!raw && ch == '\\' && s < end) {
ch = *s++;
if (ch == 'N') {
if (s < end && *s++ == '{') {
while (s < end && *s++ != '}') {
}
continue;
}
break;
}
if (ch == '{' && warn_invalid_escape_sequence(c, n, ch) < 0) {
return -1;
}
}
if (ch == '{' || ch == '}') {
/* Check for doubled braces, but only at the top level. If
we checked at every level, then f'{0:{3}}' would fail
with the two closing braces. */
if (recurse_lvl == 0) {
if (*str+1 < end && *(*str+1) == ch) {
if (s < end && *s == ch) {
/* We're going to tell the caller that the literal ends
here, but that they should continue scanning. But also
skip over the second brace when we resume scanning. */
literal_end = *str+1;
*str += 2;
*str = s + 1;
result = 1;
goto done;
}

/* Where a single '{' is the start of a new expression, a
single '}' is not allowed. */
if (ch == '}') {
*str = s - 1;
ast_error(c, n, "f-string: single '}' is not allowed");
return -1;
}
}
/* We're either at a '{', which means we're starting another
expression; or a '}', which means we're at the end of this
f-string (for a nested format_spec). */
s--;
break;
}
}
literal_end = *str;
assert(*str <= end);
assert(*str == end || **str == '{' || **str == '}');
*str = s;
assert(s <= end);
assert(s == end || *s == '{' || *s == '}');
done:
if (literal_start != literal_end) {
if (literal_start != s) {
if (raw)
*literal = PyUnicode_DecodeUTF8Stateful(literal_start,
literal_end-literal_start,
s - literal_start,
NULL, NULL);
else
*literal = decode_unicode_with_escapes(c, n, literal_start,
literal_end-literal_start);
s - literal_start);
if (!*literal)
return -1;
}
Expand Down