Skip to content

Commit af320d3

Browse files
committed
gh-103656: Transfer f-string buffers to parser to avoid use-after-free
1 parent 842daa5 commit af320d3

File tree

8 files changed

+199
-123
lines changed

8 files changed

+199
-123
lines changed

Grammar/python.gram

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -881,14 +881,13 @@ fstring_middle[expr_ty]:
881881
| fstring_replacement_field
882882
| t=FSTRING_MIDDLE { _PyPegen_constant_from_token(p, t) }
883883
fstring_replacement_field[expr_ty]:
884-
| '{' a=(yield_expr | star_expressions) debug_expr="="? conversion=[fstring_conversion] format=[fstring_full_format_spec] '}' {
885-
_PyPegen_formatted_value(p, a, debug_expr, conversion, format, EXTRA)
886-
}
884+
| '{' a=(yield_expr | star_expressions) debug_expr="="? conversion=[fstring_conversion] format=[fstring_full_format_spec] rbrace='}' {
885+
_PyPegen_formatted_value(p, a, debug_expr, conversion, format, rbrace, EXTRA) }
887886
| invalid_replacement_field
888-
fstring_conversion[expr_ty]:
887+
fstring_conversion[ResultTokenWithMetadata*]:
889888
| conv_token="!" conv=NAME { _PyPegen_check_fstring_conversion(p, conv_token, conv) }
890-
fstring_full_format_spec[expr_ty]:
891-
| ':' spec=fstring_format_spec* { spec ? _PyAST_JoinedStr((asdl_expr_seq*)spec, EXTRA) : NULL }
889+
fstring_full_format_spec[ResultTokenWithMetadata*]:
890+
| colon=':' spec=fstring_format_spec* { _PyPegen_setup_full_format_spec(p, colon, (asdl_expr_seq *) spec, EXTRA) }
892891
fstring_format_spec[expr_ty]:
893892
| t=FSTRING_MIDDLE { _PyPegen_constant_from_token(p, t) }
894893
| fstring_replacement_field

Lib/test/test_fstring.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1535,5 +1535,19 @@ def test_not_closing_quotes(self):
15351535
self.assertAllRaise(SyntaxError, "unterminated triple-quoted f-string literal",
15361536
['f"""', "f'''"])
15371537

1538+
def test_syntax_error_after_debug(self):
1539+
self.assertAllRaise(SyntaxError, "f-string: expecting a valid expression after '{'",
1540+
[
1541+
"f'{1=}{;'",
1542+
"f'{1=}{+;'",
1543+
"f'{1=}{2}{;'",
1544+
"f'{1=}{3}{;'",
1545+
])
1546+
self.assertAllRaise(SyntaxError, "f-string: expecting '=', or '!', or ':', or '}'",
1547+
[
1548+
"f'{1=}{1;'",
1549+
"f'{1=}{1;}'",
1550+
])
1551+
15381552
if __name__ == '__main__':
15391553
unittest.main()

Parser/action_helpers.c

Lines changed: 51 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -965,17 +965,43 @@ _PyPegen_check_legacy_stmt(Parser *p, expr_ty name) {
965965
return 0;
966966
}
967967

968-
expr_ty
969-
_PyPegen_check_fstring_conversion(Parser *p, Token* symbol, expr_ty conv) {
970-
if (symbol->lineno != conv->lineno || symbol->end_col_offset != conv->col_offset) {
968+
static ResultTokenWithMetadata *
969+
result_token_with_metadata(Parser *p, void *result, PyObject *metadata)
970+
{
971+
ResultTokenWithMetadata *res = _PyArena_Malloc(p->arena, sizeof(ResultTokenWithMetadata));
972+
if (res == NULL) {
973+
return NULL;
974+
}
975+
res->metadata = metadata;
976+
res->result = result;
977+
return res;
978+
}
979+
980+
ResultTokenWithMetadata *
981+
_PyPegen_check_fstring_conversion(Parser *p, Token* conv_token, expr_ty conv)
982+
{
983+
if (conv_token->lineno != conv->lineno || conv_token->end_col_offset != conv->col_offset) {
971984
return RAISE_SYNTAX_ERROR_KNOWN_RANGE(
972-
symbol, conv,
985+
conv_token, conv,
973986
"f-string: conversion type must come right after the exclamanation mark"
974987
);
975988
}
976-
return conv;
989+
return result_token_with_metadata(p, conv, conv_token->metadata);
977990
}
978991

992+
ResultTokenWithMetadata *
993+
_PyPegen_setup_full_format_spec(Parser *p, Token *colon, asdl_expr_seq *spec, int lineno, int col_offset,
994+
int end_lineno, int end_col_offset, PyArena *arena)
995+
{
996+
if (!spec) {
997+
return NULL;
998+
}
999+
expr_ty res = _PyAST_JoinedStr(spec, lineno, col_offset, end_lineno, end_col_offset, p->arena);
1000+
if (!res) {
1001+
return NULL;
1002+
}
1003+
return result_token_with_metadata(p, res, colon->metadata);
1004+
}
9791005

9801006
const char *
9811007
_PyPegen_get_expr_name(expr_ty e)
@@ -1386,19 +1412,20 @@ expr_ty _PyPegen_constant_from_string(Parser* p, Token* tok) {
13861412
return _PyAST_Constant(s, kind, tok->lineno, tok->col_offset, tok->end_lineno, tok->end_col_offset, p->arena);
13871413
}
13881414

1389-
expr_ty _PyPegen_formatted_value(Parser *p, expr_ty expression, Token *debug, expr_ty conversion,
1390-
expr_ty format, int lineno, int col_offset, int end_lineno, int end_col_offset,
1391-
PyArena *arena) {
1415+
expr_ty _PyPegen_formatted_value(Parser *p, expr_ty expression, Token *debug, ResultTokenWithMetadata *conversion,
1416+
ResultTokenWithMetadata *format, Token *closing_brace, int lineno, int col_offset,
1417+
int end_lineno, int end_col_offset, PyArena *arena) {
13921418
int conversion_val = -1;
13931419
if (conversion != NULL) {
1394-
assert(conversion->kind == Name_kind);
1395-
Py_UCS4 first = PyUnicode_READ_CHAR(conversion->v.Name.id, 0);
1420+
expr_ty conversion_expr = (expr_ty) conversion->result;
1421+
assert(conversion_expr->kind == Name_kind);
1422+
Py_UCS4 first = PyUnicode_READ_CHAR(conversion_expr->v.Name.id, 0);
13961423

1397-
if (PyUnicode_GET_LENGTH(conversion->v.Name.id) > 1 ||
1424+
if (PyUnicode_GET_LENGTH(conversion_expr->v.Name.id) > 1 ||
13981425
!(first == 's' || first == 'r' || first == 'a')) {
1399-
RAISE_SYNTAX_ERROR_KNOWN_LOCATION(conversion,
1426+
RAISE_SYNTAX_ERROR_KNOWN_LOCATION(conversion_expr,
14001427
"f-string: invalid conversion character %R: expected 's', 'r', or 'a'",
1401-
conversion->v.Name.id);
1428+
conversion_expr->v.Name.id);
14021429
return NULL;
14031430
}
14041431

@@ -1410,30 +1437,34 @@ expr_ty _PyPegen_formatted_value(Parser *p, expr_ty expression, Token *debug, ex
14101437
}
14111438

14121439
expr_ty formatted_value = _PyAST_FormattedValue(
1413-
expression, conversion_val, format,
1440+
expression, conversion_val, format ? (expr_ty) format->result : NULL,
14141441
lineno, col_offset, end_lineno,
14151442
end_col_offset, arena
14161443
);
14171444

14181445
if (debug) {
14191446
/* Find the non whitespace token after the "=" */
14201447
int debug_end_line, debug_end_offset;
1448+
PyObject *debug_metadata;
14211449

14221450
if (conversion) {
1423-
debug_end_line = conversion->lineno;
1424-
debug_end_offset = conversion->col_offset;
1451+
debug_end_line = ((expr_ty) conversion->result)->lineno;
1452+
debug_end_offset = ((expr_ty) conversion->result)->col_offset;
1453+
debug_metadata = conversion->metadata;
14251454
}
14261455
else if (format) {
1427-
debug_end_line = format->lineno;
1428-
debug_end_offset = format->col_offset + 1; // HACK: ??
1456+
debug_end_line = ((expr_ty) format->result)->lineno;
1457+
debug_end_offset = ((expr_ty) format->result)->col_offset + 1;
1458+
debug_metadata = format->metadata;
14291459
}
14301460
else {
14311461
debug_end_line = end_lineno;
14321462
debug_end_offset = end_col_offset;
1463+
debug_metadata = closing_brace->metadata;
14331464
}
14341465

1435-
expr_ty debug_text = decode_fstring_buffer(p, lineno, col_offset + 1,
1436-
debug_end_line, debug_end_offset - 1);
1466+
expr_ty debug_text = _PyAST_Constant(debug_metadata, NULL, lineno, col_offset + 1, debug_end_line,
1467+
debug_end_offset - 1, p->arena);
14371468
if (!debug_text) {
14381469
return NULL;
14391470
}

Parser/parser.c

Lines changed: 12 additions & 12 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Parser/pegen.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,17 @@ initialize_token(Parser *p, Token *parser_token, struct token *new_token, int to
155155
return -1;
156156
}
157157

158+
if (new_token->metadata != NULL) {
159+
parser_token->metadata = new_token->metadata;
160+
if (_PyArena_AddPyObject(p->arena, parser_token->metadata) < 0) {
161+
Py_DECREF(parser_token->metadata);
162+
return -1;
163+
}
164+
}
165+
else {
166+
parser_token->metadata = NULL;
167+
}
168+
158169
parser_token->level = new_token->level;
159170
parser_token->lineno = new_token->lineno;
160171
parser_token->col_offset = p->tok->lineno == p->starting_lineno ? p->starting_col_offset + new_token->col_offset
@@ -198,6 +209,7 @@ int
198209
_PyPegen_fill_token(Parser *p)
199210
{
200211
struct token new_token;
212+
new_token.metadata = NULL;
201213
int type = _PyTokenizer_Get(p->tok, &new_token);
202214

203215
// Record and skip '# type: ignore' comments

Parser/pegen.h

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ typedef struct {
3939
int level;
4040
int lineno, col_offset, end_lineno, end_col_offset;
4141
Memo *memo;
42+
PyObject *metadata;
4243
} Token;
4344

4445
typedef struct {
@@ -118,6 +119,11 @@ typedef struct {
118119
int is_keyword;
119120
} KeywordOrStarred;
120121

122+
typedef struct {
123+
void *result;
124+
PyObject *metadata;
125+
} ResultTokenWithMetadata;
126+
121127
// Internal parser functions
122128
#if defined(Py_DEBUG)
123129
void _PyPegen_clear_memo_statistics(void);
@@ -310,7 +316,8 @@ StarEtc *_PyPegen_star_etc(Parser *, arg_ty, asdl_seq *, arg_ty);
310316
arguments_ty _PyPegen_make_arguments(Parser *, asdl_arg_seq *, SlashWithDefault *,
311317
asdl_arg_seq *, asdl_seq *, StarEtc *);
312318
arguments_ty _PyPegen_empty_arguments(Parser *);
313-
expr_ty _PyPegen_formatted_value(Parser *, expr_ty, Token *, expr_ty, expr_ty, int, int, int, int, PyArena *);
319+
expr_ty _PyPegen_formatted_value(Parser *, expr_ty, Token *, ResultTokenWithMetadata *, ResultTokenWithMetadata *, Token *,
320+
int, int, int, int, PyArena *);
314321
AugOperator *_PyPegen_augoperator(Parser*, operator_ty type);
315322
stmt_ty _PyPegen_function_def_decorators(Parser *, asdl_expr_seq *, stmt_ty);
316323
stmt_ty _PyPegen_class_def_decorators(Parser *, asdl_expr_seq *, stmt_ty);
@@ -329,7 +336,9 @@ expr_ty _PyPegen_ensure_real(Parser *p, expr_ty);
329336
asdl_seq *_PyPegen_join_sequences(Parser *, asdl_seq *, asdl_seq *);
330337
int _PyPegen_check_barry_as_flufl(Parser *, Token *);
331338
int _PyPegen_check_legacy_stmt(Parser *p, expr_ty t);
332-
expr_ty _PyPegen_check_fstring_conversion(Parser *p, Token *, expr_ty t);
339+
ResultTokenWithMetadata *_PyPegen_check_fstring_conversion(Parser *p, Token *, expr_ty t);
340+
ResultTokenWithMetadata *_PyPegen_setup_full_format_spec(Parser *, Token *, asdl_expr_seq *, int, int,
341+
int, int, PyArena *);
333342
mod_ty _PyPegen_make_module(Parser *, asdl_stmt_seq *);
334343
void *_PyPegen_arguments_parsing_error(Parser *, expr_ty);
335344
expr_ty _PyPegen_get_last_comprehension_item(comprehension_ty comprehension);

0 commit comments

Comments
 (0)