xref: /third_party/python/Parser/pegen.c (revision 7db96d56)
1#include <Python.h>
2#include "pycore_ast.h"           // _PyAST_Validate(),
3#include "pycore_pystate.h"       // _PyThreadState_GET()
4#include <errcode.h>
5
6#include "tokenizer.h"
7#include "pegen.h"
8
9// Internal parser functions
10
11asdl_stmt_seq*
12_PyPegen_interactive_exit(Parser *p)
13{
14    if (p->errcode) {
15        *(p->errcode) = E_EOF;
16    }
17    return NULL;
18}
19
20Py_ssize_t
21_PyPegen_byte_offset_to_character_offset(PyObject *line, Py_ssize_t col_offset)
22{
23    const char *str = PyUnicode_AsUTF8(line);
24    if (!str) {
25        return -1;
26    }
27    Py_ssize_t len = strlen(str);
28    if (col_offset > len + 1) {
29        col_offset = len + 1;
30    }
31    assert(col_offset >= 0);
32    PyObject *text = PyUnicode_DecodeUTF8(str, col_offset, "replace");
33    if (!text) {
34        return -1;
35    }
36    Py_ssize_t size = PyUnicode_GET_LENGTH(text);
37    Py_DECREF(text);
38    return size;
39}
40
41// Here, mark is the start of the node, while p->mark is the end.
42// If node==NULL, they should be the same.
43int
44_PyPegen_insert_memo(Parser *p, int mark, int type, void *node)
45{
46    // Insert in front
47    Memo *m = _PyArena_Malloc(p->arena, sizeof(Memo));
48    if (m == NULL) {
49        return -1;
50    }
51    m->type = type;
52    m->node = node;
53    m->mark = p->mark;
54    m->next = p->tokens[mark]->memo;
55    p->tokens[mark]->memo = m;
56    return 0;
57}
58
59// Like _PyPegen_insert_memo(), but updates an existing node if found.
60int
61_PyPegen_update_memo(Parser *p, int mark, int type, void *node)
62{
63    for (Memo *m = p->tokens[mark]->memo; m != NULL; m = m->next) {
64        if (m->type == type) {
65            // Update existing node.
66            m->node = node;
67            m->mark = p->mark;
68            return 0;
69        }
70    }
71    // Insert new node.
72    return _PyPegen_insert_memo(p, mark, type, node);
73}
74
75static int
76init_normalization(Parser *p)
77{
78    if (p->normalize) {
79        return 1;
80    }
81    PyObject *m = PyImport_ImportModule("unicodedata");
82    if (!m)
83    {
84        return 0;
85    }
86    p->normalize = PyObject_GetAttrString(m, "normalize");
87    Py_DECREF(m);
88    if (!p->normalize)
89    {
90        return 0;
91    }
92    return 1;
93}
94
95static int
96growable_comment_array_init(growable_comment_array *arr, size_t initial_size) {
97    assert(initial_size > 0);
98    arr->items = PyMem_Malloc(initial_size * sizeof(*arr->items));
99    arr->size = initial_size;
100    arr->num_items = 0;
101
102    return arr->items != NULL;
103}
104
105static int
106growable_comment_array_add(growable_comment_array *arr, int lineno, char *comment) {
107    if (arr->num_items >= arr->size) {
108        size_t new_size = arr->size * 2;
109        void *new_items_array = PyMem_Realloc(arr->items, new_size * sizeof(*arr->items));
110        if (!new_items_array) {
111            return 0;
112        }
113        arr->items = new_items_array;
114        arr->size = new_size;
115    }
116
117    arr->items[arr->num_items].lineno = lineno;
118    arr->items[arr->num_items].comment = comment;  // Take ownership
119    arr->num_items++;
120    return 1;
121}
122
123static void
124growable_comment_array_deallocate(growable_comment_array *arr) {
125    for (unsigned i = 0; i < arr->num_items; i++) {
126        PyMem_Free(arr->items[i].comment);
127    }
128    PyMem_Free(arr->items);
129}
130
131static int
132_get_keyword_or_name_type(Parser *p, const char *name, int name_len)
133{
134    assert(name_len > 0);
135    if (name_len >= p->n_keyword_lists ||
136        p->keywords[name_len] == NULL ||
137        p->keywords[name_len]->type == -1) {
138        return NAME;
139    }
140    for (KeywordToken *k = p->keywords[name_len]; k != NULL && k->type != -1; k++) {
141        if (strncmp(k->str, name, name_len) == 0) {
142            return k->type;
143        }
144    }
145    return NAME;
146}
147
148static int
149initialize_token(Parser *p, Token *token, const char *start, const char *end, int token_type) {
150    assert(token != NULL);
151
152    token->type = (token_type == NAME) ? _get_keyword_or_name_type(p, start, (int)(end - start)) : token_type;
153    token->bytes = PyBytes_FromStringAndSize(start, end - start);
154    if (token->bytes == NULL) {
155        return -1;
156    }
157
158    if (_PyArena_AddPyObject(p->arena, token->bytes) < 0) {
159        Py_DECREF(token->bytes);
160        return -1;
161    }
162
163    token->level = p->tok->level;
164
165    const char *line_start = token_type == STRING ? p->tok->multi_line_start : p->tok->line_start;
166    int lineno = token_type == STRING ? p->tok->first_lineno : p->tok->lineno;
167    int end_lineno = p->tok->lineno;
168
169    int col_offset = (start != NULL && start >= line_start) ? (int)(start - line_start) : -1;
170    int end_col_offset = (end != NULL && end >= p->tok->line_start) ? (int)(end - p->tok->line_start) : -1;
171
172    token->lineno = lineno;
173    token->col_offset = p->tok->lineno == p->starting_lineno ? p->starting_col_offset + col_offset : col_offset;
174    token->end_lineno = end_lineno;
175    token->end_col_offset = p->tok->lineno == p->starting_lineno ? p->starting_col_offset + end_col_offset : end_col_offset;
176
177    p->fill += 1;
178
179    if (token_type == ERRORTOKEN && p->tok->done == E_DECODE) {
180        return _Pypegen_raise_decode_error(p);
181    }
182
183    return (token_type == ERRORTOKEN ? _Pypegen_tokenizer_error(p) : 0);
184}
185
186static int
187_resize_tokens_array(Parser *p) {
188    int newsize = p->size * 2;
189    Token **new_tokens = PyMem_Realloc(p->tokens, newsize * sizeof(Token *));
190    if (new_tokens == NULL) {
191        PyErr_NoMemory();
192        return -1;
193    }
194    p->tokens = new_tokens;
195
196    for (int i = p->size; i < newsize; i++) {
197        p->tokens[i] = PyMem_Calloc(1, sizeof(Token));
198        if (p->tokens[i] == NULL) {
199            p->size = i; // Needed, in order to cleanup correctly after parser fails
200            PyErr_NoMemory();
201            return -1;
202        }
203    }
204    p->size = newsize;
205    return 0;
206}
207
208int
209_PyPegen_fill_token(Parser *p)
210{
211    const char *start;
212    const char *end;
213    int type = _PyTokenizer_Get(p->tok, &start, &end);
214
215    // Record and skip '# type: ignore' comments
216    while (type == TYPE_IGNORE) {
217        Py_ssize_t len = end - start;
218        char *tag = PyMem_Malloc(len + 1);
219        if (tag == NULL) {
220            PyErr_NoMemory();
221            return -1;
222        }
223        strncpy(tag, start, len);
224        tag[len] = '\0';
225        // Ownership of tag passes to the growable array
226        if (!growable_comment_array_add(&p->type_ignore_comments, p->tok->lineno, tag)) {
227            PyErr_NoMemory();
228            return -1;
229        }
230        type = _PyTokenizer_Get(p->tok, &start, &end);
231    }
232
233    // If we have reached the end and we are in single input mode we need to insert a newline and reset the parsing
234    if (p->start_rule == Py_single_input && type == ENDMARKER && p->parsing_started) {
235        type = NEWLINE; /* Add an extra newline */
236        p->parsing_started = 0;
237
238        if (p->tok->indent && !(p->flags & PyPARSE_DONT_IMPLY_DEDENT)) {
239            p->tok->pendin = -p->tok->indent;
240            p->tok->indent = 0;
241        }
242    }
243    else {
244        p->parsing_started = 1;
245    }
246
247    // Check if we are at the limit of the token array capacity and resize if needed
248    if ((p->fill == p->size) && (_resize_tokens_array(p) != 0)) {
249        return -1;
250    }
251
252    Token *t = p->tokens[p->fill];
253    return initialize_token(p, t, start, end, type);
254}
255
256#if defined(Py_DEBUG)
257// Instrumentation to count the effectiveness of memoization.
258// The array counts the number of tokens skipped by memoization,
259// indexed by type.
260
261#define NSTATISTICS 2000
262static long memo_statistics[NSTATISTICS];
263
264void
265_PyPegen_clear_memo_statistics(void)
266{
267    for (int i = 0; i < NSTATISTICS; i++) {
268        memo_statistics[i] = 0;
269    }
270}
271
272PyObject *
273_PyPegen_get_memo_statistics(void)
274{
275    PyObject *ret = PyList_New(NSTATISTICS);
276    if (ret == NULL) {
277        return NULL;
278    }
279    for (int i = 0; i < NSTATISTICS; i++) {
280        PyObject *value = PyLong_FromLong(memo_statistics[i]);
281        if (value == NULL) {
282            Py_DECREF(ret);
283            return NULL;
284        }
285        // PyList_SetItem borrows a reference to value.
286        if (PyList_SetItem(ret, i, value) < 0) {
287            Py_DECREF(ret);
288            return NULL;
289        }
290    }
291    return ret;
292}
293#endif
294
295int  // bool
296_PyPegen_is_memoized(Parser *p, int type, void *pres)
297{
298    if (p->mark == p->fill) {
299        if (_PyPegen_fill_token(p) < 0) {
300            p->error_indicator = 1;
301            return -1;
302        }
303    }
304
305    Token *t = p->tokens[p->mark];
306
307    for (Memo *m = t->memo; m != NULL; m = m->next) {
308        if (m->type == type) {
309#if defined(PY_DEBUG)
310            if (0 <= type && type < NSTATISTICS) {
311                long count = m->mark - p->mark;
312                // A memoized negative result counts for one.
313                if (count <= 0) {
314                    count = 1;
315                }
316                memo_statistics[type] += count;
317            }
318#endif
319            p->mark = m->mark;
320            *(void **)(pres) = m->node;
321            return 1;
322        }
323    }
324    return 0;
325}
326
327int
328_PyPegen_lookahead_with_name(int positive, expr_ty (func)(Parser *), Parser *p)
329{
330    int mark = p->mark;
331    void *res = func(p);
332    p->mark = mark;
333    return (res != NULL) == positive;
334}
335
336int
337_PyPegen_lookahead_with_string(int positive, expr_ty (func)(Parser *, const char*), Parser *p, const char* arg)
338{
339    int mark = p->mark;
340    void *res = func(p, arg);
341    p->mark = mark;
342    return (res != NULL) == positive;
343}
344
345int
346_PyPegen_lookahead_with_int(int positive, Token *(func)(Parser *, int), Parser *p, int arg)
347{
348    int mark = p->mark;
349    void *res = func(p, arg);
350    p->mark = mark;
351    return (res != NULL) == positive;
352}
353
354int
355_PyPegen_lookahead(int positive, void *(func)(Parser *), Parser *p)
356{
357    int mark = p->mark;
358    void *res = (void*)func(p);
359    p->mark = mark;
360    return (res != NULL) == positive;
361}
362
363Token *
364_PyPegen_expect_token(Parser *p, int type)
365{
366    if (p->mark == p->fill) {
367        if (_PyPegen_fill_token(p) < 0) {
368            p->error_indicator = 1;
369            return NULL;
370        }
371    }
372    Token *t = p->tokens[p->mark];
373    if (t->type != type) {
374        return NULL;
375    }
376    p->mark += 1;
377    return t;
378}
379
380void*
381_PyPegen_expect_forced_result(Parser *p, void* result, const char* expected) {
382
383    if (p->error_indicator == 1) {
384        return NULL;
385    }
386    if (result == NULL) {
387        RAISE_SYNTAX_ERROR("expected (%s)", expected);
388        return NULL;
389    }
390    return result;
391}
392
393Token *
394_PyPegen_expect_forced_token(Parser *p, int type, const char* expected) {
395
396    if (p->error_indicator == 1) {
397        return NULL;
398    }
399
400    if (p->mark == p->fill) {
401        if (_PyPegen_fill_token(p) < 0) {
402            p->error_indicator = 1;
403            return NULL;
404        }
405    }
406    Token *t = p->tokens[p->mark];
407    if (t->type != type) {
408        RAISE_SYNTAX_ERROR_KNOWN_LOCATION(t, "expected '%s'", expected);
409        return NULL;
410    }
411    p->mark += 1;
412    return t;
413}
414
415expr_ty
416_PyPegen_expect_soft_keyword(Parser *p, const char *keyword)
417{
418    if (p->mark == p->fill) {
419        if (_PyPegen_fill_token(p) < 0) {
420            p->error_indicator = 1;
421            return NULL;
422        }
423    }
424    Token *t = p->tokens[p->mark];
425    if (t->type != NAME) {
426        return NULL;
427    }
428    const char *s = PyBytes_AsString(t->bytes);
429    if (!s) {
430        p->error_indicator = 1;
431        return NULL;
432    }
433    if (strcmp(s, keyword) != 0) {
434        return NULL;
435    }
436    return _PyPegen_name_token(p);
437}
438
439Token *
440_PyPegen_get_last_nonnwhitespace_token(Parser *p)
441{
442    assert(p->mark >= 0);
443    Token *token = NULL;
444    for (int m = p->mark - 1; m >= 0; m--) {
445        token = p->tokens[m];
446        if (token->type != ENDMARKER && (token->type < NEWLINE || token->type > DEDENT)) {
447            break;
448        }
449    }
450    return token;
451}
452
453PyObject *
454_PyPegen_new_identifier(Parser *p, const char *n)
455{
456    PyObject *id = PyUnicode_DecodeUTF8(n, strlen(n), NULL);
457    if (!id) {
458        goto error;
459    }
460    /* PyUnicode_DecodeUTF8 should always return a ready string. */
461    assert(PyUnicode_IS_READY(id));
462    /* Check whether there are non-ASCII characters in the
463       identifier; if so, normalize to NFKC. */
464    if (!PyUnicode_IS_ASCII(id))
465    {
466        PyObject *id2;
467        if (!init_normalization(p))
468        {
469            Py_DECREF(id);
470            goto error;
471        }
472        PyObject *form = PyUnicode_InternFromString("NFKC");
473        if (form == NULL)
474        {
475            Py_DECREF(id);
476            goto error;
477        }
478        PyObject *args[2] = {form, id};
479        id2 = _PyObject_FastCall(p->normalize, args, 2);
480        Py_DECREF(id);
481        Py_DECREF(form);
482        if (!id2) {
483            goto error;
484        }
485        if (!PyUnicode_Check(id2))
486        {
487            PyErr_Format(PyExc_TypeError,
488                         "unicodedata.normalize() must return a string, not "
489                         "%.200s",
490                         _PyType_Name(Py_TYPE(id2)));
491            Py_DECREF(id2);
492            goto error;
493        }
494        id = id2;
495    }
496    PyUnicode_InternInPlace(&id);
497    if (_PyArena_AddPyObject(p->arena, id) < 0)
498    {
499        Py_DECREF(id);
500        goto error;
501    }
502    return id;
503
504error:
505    p->error_indicator = 1;
506    return NULL;
507}
508
509static expr_ty
510_PyPegen_name_from_token(Parser *p, Token* t)
511{
512    if (t == NULL) {
513        return NULL;
514    }
515    const char *s = PyBytes_AsString(t->bytes);
516    if (!s) {
517        p->error_indicator = 1;
518        return NULL;
519    }
520    PyObject *id = _PyPegen_new_identifier(p, s);
521    if (id == NULL) {
522        p->error_indicator = 1;
523        return NULL;
524    }
525    return _PyAST_Name(id, Load, t->lineno, t->col_offset, t->end_lineno,
526                       t->end_col_offset, p->arena);
527}
528
529expr_ty
530_PyPegen_name_token(Parser *p)
531{
532    Token *t = _PyPegen_expect_token(p, NAME);
533    return _PyPegen_name_from_token(p, t);
534}
535
536void *
537_PyPegen_string_token(Parser *p)
538{
539    return _PyPegen_expect_token(p, STRING);
540}
541
542expr_ty _PyPegen_soft_keyword_token(Parser *p) {
543    Token *t = _PyPegen_expect_token(p, NAME);
544    if (t == NULL) {
545        return NULL;
546    }
547    char *the_token;
548    Py_ssize_t size;
549    PyBytes_AsStringAndSize(t->bytes, &the_token, &size);
550    for (char **keyword = p->soft_keywords; *keyword != NULL; keyword++) {
551        if (strncmp(*keyword, the_token, size) == 0) {
552            return _PyPegen_name_from_token(p, t);
553        }
554    }
555    return NULL;
556}
557
558static PyObject *
559parsenumber_raw(const char *s)
560{
561    const char *end;
562    long x;
563    double dx;
564    Py_complex compl;
565    int imflag;
566
567    assert(s != NULL);
568    errno = 0;
569    end = s + strlen(s) - 1;
570    imflag = *end == 'j' || *end == 'J';
571    if (s[0] == '0') {
572        x = (long)PyOS_strtoul(s, (char **)&end, 0);
573        if (x < 0 && errno == 0) {
574            return PyLong_FromString(s, (char **)0, 0);
575        }
576    }
577    else {
578        x = PyOS_strtol(s, (char **)&end, 0);
579    }
580    if (*end == '\0') {
581        if (errno != 0) {
582            return PyLong_FromString(s, (char **)0, 0);
583        }
584        return PyLong_FromLong(x);
585    }
586    /* XXX Huge floats may silently fail */
587    if (imflag) {
588        compl.real = 0.;
589        compl.imag = PyOS_string_to_double(s, (char **)&end, NULL);
590        if (compl.imag == -1.0 && PyErr_Occurred()) {
591            return NULL;
592        }
593        return PyComplex_FromCComplex(compl);
594    }
595    dx = PyOS_string_to_double(s, NULL, NULL);
596    if (dx == -1.0 && PyErr_Occurred()) {
597        return NULL;
598    }
599    return PyFloat_FromDouble(dx);
600}
601
602static PyObject *
603parsenumber(const char *s)
604{
605    char *dup;
606    char *end;
607    PyObject *res = NULL;
608
609    assert(s != NULL);
610
611    if (strchr(s, '_') == NULL) {
612        return parsenumber_raw(s);
613    }
614    /* Create a duplicate without underscores. */
615    dup = PyMem_Malloc(strlen(s) + 1);
616    if (dup == NULL) {
617        return PyErr_NoMemory();
618    }
619    end = dup;
620    for (; *s; s++) {
621        if (*s != '_') {
622            *end++ = *s;
623        }
624    }
625    *end = '\0';
626    res = parsenumber_raw(dup);
627    PyMem_Free(dup);
628    return res;
629}
630
631expr_ty
632_PyPegen_number_token(Parser *p)
633{
634    Token *t = _PyPegen_expect_token(p, NUMBER);
635    if (t == NULL) {
636        return NULL;
637    }
638
639    const char *num_raw = PyBytes_AsString(t->bytes);
640    if (num_raw == NULL) {
641        p->error_indicator = 1;
642        return NULL;
643    }
644
645    if (p->feature_version < 6 && strchr(num_raw, '_') != NULL) {
646        p->error_indicator = 1;
647        return RAISE_SYNTAX_ERROR("Underscores in numeric literals are only supported "
648                                  "in Python 3.6 and greater");
649    }
650
651    PyObject *c = parsenumber(num_raw);
652
653    if (c == NULL) {
654        p->error_indicator = 1;
655        PyThreadState *tstate = _PyThreadState_GET();
656        // The only way a ValueError should happen in _this_ code is via
657        // PyLong_FromString hitting a length limit.
658        if (tstate->curexc_type == PyExc_ValueError &&
659            tstate->curexc_value != NULL) {
660            PyObject *type, *value, *tb;
661            // This acts as PyErr_Clear() as we're replacing curexc.
662            PyErr_Fetch(&type, &value, &tb);
663            Py_XDECREF(tb);
664            Py_DECREF(type);
665            /* Intentionally omitting columns to avoid a wall of 1000s of '^'s
666             * on the error message. Nobody is going to overlook their huge
667             * numeric literal once given the line. */
668            RAISE_ERROR_KNOWN_LOCATION(
669                p, PyExc_SyntaxError,
670                t->lineno, -1 /* col_offset */,
671                t->end_lineno, -1 /* end_col_offset */,
672                "%S - Consider hexadecimal for huge integer literals "
673                "to avoid decimal conversion limits.",
674                value);
675            Py_DECREF(value);
676        }
677        return NULL;
678    }
679
680    if (_PyArena_AddPyObject(p->arena, c) < 0) {
681        Py_DECREF(c);
682        p->error_indicator = 1;
683        return NULL;
684    }
685
686    return _PyAST_Constant(c, NULL, t->lineno, t->col_offset, t->end_lineno,
687                           t->end_col_offset, p->arena);
688}
689
690/* Check that the source for a single input statement really is a single
691   statement by looking at what is left in the buffer after parsing.
692   Trailing whitespace and comments are OK. */
693static int // bool
694bad_single_statement(Parser *p)
695{
696    char *cur = p->tok->cur;
697    char c = *cur;
698
699    for (;;) {
700        while (c == ' ' || c == '\t' || c == '\n' || c == '\014') {
701            c = *++cur;
702        }
703
704        if (!c) {
705            return 0;
706        }
707
708        if (c != '#') {
709            return 1;
710        }
711
712        /* Suck up comment. */
713        while (c && c != '\n') {
714            c = *++cur;
715        }
716    }
717}
718
719static int
720compute_parser_flags(PyCompilerFlags *flags)
721{
722    int parser_flags = 0;
723    if (!flags) {
724        return 0;
725    }
726    if (flags->cf_flags & PyCF_DONT_IMPLY_DEDENT) {
727        parser_flags |= PyPARSE_DONT_IMPLY_DEDENT;
728    }
729    if (flags->cf_flags & PyCF_IGNORE_COOKIE) {
730        parser_flags |= PyPARSE_IGNORE_COOKIE;
731    }
732    if (flags->cf_flags & CO_FUTURE_BARRY_AS_BDFL) {
733        parser_flags |= PyPARSE_BARRY_AS_BDFL;
734    }
735    if (flags->cf_flags & PyCF_TYPE_COMMENTS) {
736        parser_flags |= PyPARSE_TYPE_COMMENTS;
737    }
738    if ((flags->cf_flags & PyCF_ONLY_AST) && flags->cf_feature_version < 7) {
739        parser_flags |= PyPARSE_ASYNC_HACKS;
740    }
741    if (flags->cf_flags & PyCF_ALLOW_INCOMPLETE_INPUT) {
742        parser_flags |= PyPARSE_ALLOW_INCOMPLETE_INPUT;
743    }
744    return parser_flags;
745}
746
747// Parser API
748
749Parser *
750_PyPegen_Parser_New(struct tok_state *tok, int start_rule, int flags,
751                    int feature_version, int *errcode, PyArena *arena)
752{
753    Parser *p = PyMem_Malloc(sizeof(Parser));
754    if (p == NULL) {
755        return (Parser *) PyErr_NoMemory();
756    }
757    assert(tok != NULL);
758    tok->type_comments = (flags & PyPARSE_TYPE_COMMENTS) > 0;
759    tok->async_hacks = (flags & PyPARSE_ASYNC_HACKS) > 0;
760    p->tok = tok;
761    p->keywords = NULL;
762    p->n_keyword_lists = -1;
763    p->soft_keywords = NULL;
764    p->tokens = PyMem_Malloc(sizeof(Token *));
765    if (!p->tokens) {
766        PyMem_Free(p);
767        return (Parser *) PyErr_NoMemory();
768    }
769    p->tokens[0] = PyMem_Calloc(1, sizeof(Token));
770    if (!p->tokens[0]) {
771        PyMem_Free(p->tokens);
772        PyMem_Free(p);
773        return (Parser *) PyErr_NoMemory();
774    }
775    if (!growable_comment_array_init(&p->type_ignore_comments, 10)) {
776        PyMem_Free(p->tokens[0]);
777        PyMem_Free(p->tokens);
778        PyMem_Free(p);
779        return (Parser *) PyErr_NoMemory();
780    }
781
782    p->mark = 0;
783    p->fill = 0;
784    p->size = 1;
785
786    p->errcode = errcode;
787    p->arena = arena;
788    p->start_rule = start_rule;
789    p->parsing_started = 0;
790    p->normalize = NULL;
791    p->error_indicator = 0;
792
793    p->starting_lineno = 0;
794    p->starting_col_offset = 0;
795    p->flags = flags;
796    p->feature_version = feature_version;
797    p->known_err_token = NULL;
798    p->level = 0;
799    p->call_invalid_rules = 0;
800    return p;
801}
802
803void
804_PyPegen_Parser_Free(Parser *p)
805{
806    Py_XDECREF(p->normalize);
807    for (int i = 0; i < p->size; i++) {
808        PyMem_Free(p->tokens[i]);
809    }
810    PyMem_Free(p->tokens);
811    growable_comment_array_deallocate(&p->type_ignore_comments);
812    PyMem_Free(p);
813}
814
815static void
816reset_parser_state_for_error_pass(Parser *p)
817{
818    for (int i = 0; i < p->fill; i++) {
819        p->tokens[i]->memo = NULL;
820    }
821    p->mark = 0;
822    p->call_invalid_rules = 1;
823    // Don't try to get extra tokens in interactive mode when trying to
824    // raise specialized errors in the second pass.
825    p->tok->interactive_underflow = IUNDERFLOW_STOP;
826}
827
828static inline int
829_is_end_of_source(Parser *p) {
830    int err = p->tok->done;
831    return err == E_EOF || err == E_EOFS || err == E_EOLS;
832}
833
834void *
835_PyPegen_run_parser(Parser *p)
836{
837    void *res = _PyPegen_parse(p);
838    assert(p->level == 0);
839    if (res == NULL) {
840        if ((p->flags & PyPARSE_ALLOW_INCOMPLETE_INPUT) &&  _is_end_of_source(p)) {
841            PyErr_Clear();
842            return RAISE_SYNTAX_ERROR("incomplete input");
843        }
844        if (PyErr_Occurred() && !PyErr_ExceptionMatches(PyExc_SyntaxError)) {
845            return NULL;
846        }
847       // Make a second parser pass. In this pass we activate heavier and slower checks
848        // to produce better error messages and more complete diagnostics. Extra "invalid_*"
849        // rules will be active during parsing.
850        Token *last_token = p->tokens[p->fill - 1];
851        reset_parser_state_for_error_pass(p);
852        _PyPegen_parse(p);
853
854        // Set SyntaxErrors accordingly depending on the parser/tokenizer status at the failure
855        // point.
856        _Pypegen_set_syntax_error(p, last_token);
857       return NULL;
858    }
859
860    if (p->start_rule == Py_single_input && bad_single_statement(p)) {
861        p->tok->done = E_BADSINGLE; // This is not necessary for now, but might be in the future
862        return RAISE_SYNTAX_ERROR("multiple statements found while compiling a single statement");
863    }
864
865    // test_peg_generator defines _Py_TEST_PEGEN to not call PyAST_Validate()
866#if defined(Py_DEBUG) && !defined(_Py_TEST_PEGEN)
867    if (p->start_rule == Py_single_input ||
868        p->start_rule == Py_file_input ||
869        p->start_rule == Py_eval_input)
870    {
871        if (!_PyAST_Validate(res)) {
872            return NULL;
873        }
874    }
875#endif
876    return res;
877}
878
879mod_ty
880_PyPegen_run_parser_from_file_pointer(FILE *fp, int start_rule, PyObject *filename_ob,
881                             const char *enc, const char *ps1, const char *ps2,
882                             PyCompilerFlags *flags, int *errcode, PyArena *arena)
883{
884    struct tok_state *tok = _PyTokenizer_FromFile(fp, enc, ps1, ps2);
885    if (tok == NULL) {
886        if (PyErr_Occurred()) {
887            _PyPegen_raise_tokenizer_init_error(filename_ob);
888            return NULL;
889        }
890        return NULL;
891    }
892    if (!tok->fp || ps1 != NULL || ps2 != NULL ||
893        PyUnicode_CompareWithASCIIString(filename_ob, "<stdin>") == 0) {
894        tok->fp_interactive = 1;
895    }
896    // This transfers the ownership to the tokenizer
897    tok->filename = filename_ob;
898    Py_INCREF(filename_ob);
899
900    // From here on we need to clean up even if there's an error
901    mod_ty result = NULL;
902
903    int parser_flags = compute_parser_flags(flags);
904    Parser *p = _PyPegen_Parser_New(tok, start_rule, parser_flags, PY_MINOR_VERSION,
905                                    errcode, arena);
906    if (p == NULL) {
907        goto error;
908    }
909
910    result = _PyPegen_run_parser(p);
911    _PyPegen_Parser_Free(p);
912
913error:
914    _PyTokenizer_Free(tok);
915    return result;
916}
917
918mod_ty
919_PyPegen_run_parser_from_string(const char *str, int start_rule, PyObject *filename_ob,
920                       PyCompilerFlags *flags, PyArena *arena)
921{
922    int exec_input = start_rule == Py_file_input;
923
924    struct tok_state *tok;
925    if (flags != NULL && flags->cf_flags & PyCF_IGNORE_COOKIE) {
926        tok = _PyTokenizer_FromUTF8(str, exec_input);
927    } else {
928        tok = _PyTokenizer_FromString(str, exec_input);
929    }
930    if (tok == NULL) {
931        if (PyErr_Occurred()) {
932            _PyPegen_raise_tokenizer_init_error(filename_ob);
933        }
934        return NULL;
935    }
936    // This transfers the ownership to the tokenizer
937    tok->filename = filename_ob;
938    Py_INCREF(filename_ob);
939
940    // We need to clear up from here on
941    mod_ty result = NULL;
942
943    int parser_flags = compute_parser_flags(flags);
944    int feature_version = flags && (flags->cf_flags & PyCF_ONLY_AST) ?
945        flags->cf_feature_version : PY_MINOR_VERSION;
946    Parser *p = _PyPegen_Parser_New(tok, start_rule, parser_flags, feature_version,
947                                    NULL, arena);
948    if (p == NULL) {
949        goto error;
950    }
951
952    result = _PyPegen_run_parser(p);
953    _PyPegen_Parser_Free(p);
954
955error:
956    _PyTokenizer_Free(tok);
957    return result;
958}
959