Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 15 additions & 4 deletions Grammar/python.gram
Original file line number Diff line number Diff line change
@@ -1,12 +1,23 @@
# Simplified grammar for Python

@bytecode True
@modulename 'peg_parser' # Non needed for now, but might be needed later
@trailer '''
mod_ty
parse_start(Parser *p)
void *
parse(Parser *p)
{
return start_rule(p);
// Initialize keywords
p->keywords = reserved_keywords;
p->n_keyword_lists = n_keyword_lists;

// Run parser
void *result = NULL;
if (p->start_rule_func == START) {
result = start_rule(p);
} else if (p->start_rule_func == EXPRESSIONS) {
result = expressions_rule(p);
}

return result;
}

// The end
Expand Down
22 changes: 17 additions & 5 deletions Parser/pegen/parse.c
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// @generated by pegen.py from ./Grammar/python.gram
#include "pegen.h"
const int n_keyword_lists = 9;
KeywordToken *reserved_keywords[] = {
static const int n_keyword_lists = 9;
static KeywordToken *reserved_keywords[] = {
NULL,
NULL,
(KeywordToken[]) {
Expand Down Expand Up @@ -13559,10 +13559,22 @@ _tmp_124_rule(Parser *p)
return res;
}

mod_ty
parse_start(Parser *p)
void *
parse(Parser *p)
{
return start_rule(p);
// Initialize keywords
p->keywords = reserved_keywords;
p->n_keyword_lists = n_keyword_lists;

// Run parser
void *result = NULL;
if (p->start_rule_func == START) {
result = start_rule(p);
} else if (p->start_rule_func == EXPRESSIONS) {
result = expressions_rule(p);
}

return result;
}

// The end
58 changes: 8 additions & 50 deletions Parser/pegen/parse_string.c
Original file line number Diff line number Diff line change
Expand Up @@ -535,7 +535,7 @@ static expr_ty
fstring_compile_expr(Parser *p, const char *expr_start, const char *expr_end,
Token *t)
{
mod_ty mod = NULL;
expr_ty expr = NULL;
char *str;
Py_ssize_t len;
const char *s;
Expand Down Expand Up @@ -585,68 +585,26 @@ fstring_compile_expr(Parser *p, const char *expr_start, const char *expr_end,
PyTokenizer_Free(tok);
return NULL;
}
mod_ty (*the_start_rule)(Parser*) = p->start_rule_func;

Parser *p2 = PyMem_Malloc(sizeof(Parser));
if (p2 == NULL) {
PyErr_Format(PyExc_MemoryError, "Out of memory for Parser");
goto exit;
}
p2->tok = tok;
p2->input_mode = STRING_INPUT;
p2->keywords = p->keywords;
p2->n_keyword_lists = p->n_keyword_lists;
p2->tokens = PyMem_Malloc(sizeof(Token *));
if (!p2->tokens) {
PyErr_Format(PyExc_MemoryError, "Out of memory for tokens");
goto exit;
}
p2->tokens[0] = PyMem_Malloc(sizeof(Token));
memset(p2->tokens[0], '\0', sizeof(Token));
p2->mark = 0;
p2->fill = 0;
p2->size = 1;
p2->arena = p->arena;
p2->start_rule_func = the_start_rule;
if (fill_token(p2) < 0) {
goto exit;
}
PyErr_Clear();
mod = the_start_rule(p2);
Parser *p2 = Parser_New(tok, EXPRESSIONS, STRING_INPUT, p->arena);

if (mod == NULL){
raise_syntax_error(p2, "invalid syntax");
goto exit;
}
expr = parse(p2);

if (asdl_seq_LEN(mod->v.Module.body) == 0) {
raise_syntax_error(p, "f-string: empty expression not allowed");
goto exit;
}

stmt_ty expr = asdl_seq_GET(mod->v.Module.body, 0);
if (asdl_seq_LEN(mod->v.Module.body) != 1 || expr->kind != Expr_kind) {
raise_syntax_error(p, "f-string: invalid expression");
if (expr == NULL){
raise_syntax_error(p2, "invalid syntax");
goto exit;
}

/* Reuse str to find the correct column offset. */
str[0] = '{';
str[len+1] = '}';
fstring_fix_expr_location(t, expr->v.Expr.value, str);
fstring_fix_expr_location(t, expr, str);

result = expr->v.Expr.value;
result = expr;

exit:
Parser_Free(p2);
PyTokenizer_Free(tok);
for (int i = 0; i < p2->size; i++) {
PyMem_Free(p2->tokens[i]);
}
PyMem_Free(p2->tokens);
PyMem_Free(p2);
if (mod == NULL) {
return NULL;
}
return result;
}

Expand Down
4 changes: 2 additions & 2 deletions Parser/pegen/peg_api.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ PyPegen_ASTFromString(const char *str, PyArena *arena)
return NULL;
}

mod_ty result = run_parser_from_string(str, parse_start, filename_ob, arena);
mod_ty result = run_parser_from_string(str, START, filename_ob, arena);
Py_XDECREF(filename_ob);
return result;
}
Expand All @@ -24,7 +24,7 @@ PyPegen_ASTFromFile(const char *filename, PyArena *arena)
return NULL;
}

mod_ty result = run_parser_from_file(filename, parse_start, filename_ob, arena);
mod_ty result = run_parser_from_file(filename, START, filename_ob, arena);
Py_XDECREF(filename_ob);
return result;
}
Expand Down
24 changes: 9 additions & 15 deletions Parser/pegen/pegen.c
Original file line number Diff line number Diff line change
Expand Up @@ -501,7 +501,7 @@ Parser_Free(Parser *p)
}

Parser *
Parser_New(struct tok_state *tok, mod_ty (*parse_func)(Parser *), int input_mode,
Parser_New(struct tok_state *tok, START_RULE start_rule_func, int input_mode,
PyArena *arena)
{
Parser *p = PyMem_Malloc(sizeof(Parser));
Expand All @@ -512,8 +512,8 @@ Parser_New(struct tok_state *tok, mod_ty (*parse_func)(Parser *), int input_mode
assert(tok != NULL);
p->tok = tok;
p->input_mode = input_mode;
p->keywords = reserved_keywords;
p->n_keyword_lists = n_keyword_lists;
p->keywords = NULL;
p->n_keyword_lists = -1;
p->tokens = PyMem_Malloc(sizeof(Token *));
if (!p->tokens) {
PyErr_Format(PyExc_MemoryError, "Out of memory for tokens");
Expand All @@ -526,12 +526,7 @@ Parser_New(struct tok_state *tok, mod_ty (*parse_func)(Parser *), int input_mode
p->size = 1;

p->arena = arena;

if (fill_token(p) < 0) {
return NULL;
}

p->start_rule_func = parse_func;
p->start_rule_func = start_rule_func;

return p;
}
Expand All @@ -544,8 +539,7 @@ run_parser(Parser *p)
return NULL;
}

mod_ty (*parse_func)(Parser *) = p->start_rule_func;
mod_ty res = (*parse_func)(p);
mod_ty res = parse(p);
if (res == NULL) {
if (PyErr_Occurred()) {
return NULL;
Expand All @@ -563,7 +557,7 @@ run_parser(Parser *p)
}

mod_ty
run_parser_from_file(const char *filename, mod_ty (*parse_func)(Parser *),
run_parser_from_file(const char *filename, START_RULE start_rule_func,
PyObject *filename_ob, PyArena *arena)
{
FILE *fp = fopen(filename, "rb");
Expand All @@ -583,7 +577,7 @@ run_parser_from_file(const char *filename, mod_ty (*parse_func)(Parser *),
tok->filename = filename_ob;
Py_INCREF(filename_ob);

Parser *p = Parser_New(tok, parse_func, FILE_INPUT, arena);
Parser *p = Parser_New(tok, start_rule_func, FILE_INPUT, arena);
if (p == NULL) {
goto after_tok_error;
}
Expand All @@ -599,7 +593,7 @@ run_parser_from_file(const char *filename, mod_ty (*parse_func)(Parser *),
}

mod_ty
run_parser_from_string(const char *str, mod_ty (*parse_func)(Parser *), PyObject *filename_ob,
run_parser_from_string(const char *str, START_RULE start_rule_func, PyObject *filename_ob,
PyArena *arena)
{
struct tok_state *tok = PyTokenizer_FromString(str, 1);
Expand All @@ -613,7 +607,7 @@ run_parser_from_string(const char *str, mod_ty (*parse_func)(Parser *), PyObject
// We need to clear up from here on
mod_ty result = NULL;

Parser *p = Parser_New(tok, parse_func, STRING_INPUT, arena);
Parser *p = Parser_New(tok, start_rule_func, STRING_INPUT, arena);
if (p == NULL) {
goto error;
}
Expand Down
19 changes: 12 additions & 7 deletions Parser/pegen/pegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,12 @@ enum INPUT_MODE {
};
typedef enum INPUT_MODE INPUT_MODE;

enum START_RULE {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why this enum of only one field?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We'll probably add more, we we need to choose between start rules. I'm currently working on maybe changing the start rule for f-string to expressions_rule and for this I added another field EXPRESSIONS to this enum.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(Probably for a separate PR.)

Maybe we could pass a string literal instead? By convention we could have NULL -> start and a few other names could be recognized based on metadata given in the .gram file, e.g. [1]

@alternate_starts "single_input file_input eval_input"

which would generate a little table at the end

static struct { char *, mod_ty (*)(Parser *) } [] alternate_starts = {
    {"single_input", single_input_rule},
    ...
    NULL,
};

and then the dispatch function could just look up the argument in the table. (This is a negligible cost given everything else that goes into parsing even one line.)


[1] We'd need to add a few new rules named single_input, file_input, eval_input that correspond to those targets in Grammar/Grammar.

START,
EXPRESSIONS
};
typedef enum START_RULE START_RULE;

typedef struct _memo {
int type;
void *node;
Expand Down Expand Up @@ -41,7 +47,7 @@ typedef struct {
PyArena *arena;
KeywordToken **keywords;
int n_keyword_lists;
void *start_rule_func;
START_RULE start_rule_func;
INPUT_MODE input_mode;
jmp_buf error_env;
} Parser;
Expand Down Expand Up @@ -81,9 +87,6 @@ typedef struct {
int is_keyword;
} KeywordOrStarred;

extern const int n_keyword_lists;
extern KeywordToken *reserved_keywords[];

int insert_memo(Parser *p, int mark, int type, void *node);
int update_memo(Parser *p, int mark, int type, void *node);
int is_memoized(Parser *p, int type, void *pres);
Expand Down Expand Up @@ -136,8 +139,10 @@ CHECK_CALL_NULL_ALLOWED(Parser *p, void *result)
#define CHECK_NULL_ALLOWED(result) CHECK_CALL_NULL_ALLOWED(p, result)

PyObject *new_identifier(Parser *, char *);
mod_ty run_parser_from_file(const char *, mod_ty(*)(Parser *), PyObject *, PyArena *);
mod_ty run_parser_from_string(const char *, mod_ty(*)(Parser *), PyObject *, PyArena *);
Parser *Parser_New(struct tok_state *, START_RULE, int, PyArena *);
void Parser_Free(Parser *);
mod_ty run_parser_from_file(const char *, START_RULE, PyObject *, PyArena *);
mod_ty run_parser_from_string(const char *, START_RULE, PyObject *, PyArena *);
asdl_seq *singleton_seq(Parser *, void *);
asdl_seq *seq_insert_in_front(Parser *, void *, asdl_seq *);
asdl_seq *seq_flatten(Parser *, asdl_seq *);
Expand Down Expand Up @@ -166,6 +171,6 @@ asdl_seq *seq_extract_starred_exprs(Parser *, asdl_seq *);
asdl_seq *seq_delete_starred_exprs(Parser *, asdl_seq *);
expr_ty concatenate_strings(Parser *p, asdl_seq *);

mod_ty parse_start(Parser *);
void *parse(Parser *);

#endif
4 changes: 2 additions & 2 deletions Tools/peg_generator/pegen/c_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -347,9 +347,9 @@ def _setup_keywords(self) -> None:
n_keyword_lists = (
len(max(keyword_cache.keys(), key=len)) + 1 if len(keyword_cache) > 0 else 0
)
self.print(f"const int n_keyword_lists = {n_keyword_lists};")
self.print(f"static const int n_keyword_lists = {n_keyword_lists};")
groups = self._group_keywords_by_length()
self.print("KeywordToken *reserved_keywords[] = {")
self.print("static KeywordToken *reserved_keywords[] = {")
with self.indent():
num_groups = max(groups) + 1 if groups else 1
for keywords_length in range(num_groups):
Expand Down