17db96d56Sopenharmony_ci#include <Python.h>
27db96d56Sopenharmony_ci#include "pycore_ast.h"           // _PyAST_Validate(),
37db96d56Sopenharmony_ci#include "pycore_pystate.h"       // _PyThreadState_GET()
47db96d56Sopenharmony_ci#include <errcode.h>
57db96d56Sopenharmony_ci
67db96d56Sopenharmony_ci#include "tokenizer.h"
77db96d56Sopenharmony_ci#include "pegen.h"
87db96d56Sopenharmony_ci
97db96d56Sopenharmony_ci// Internal parser functions
107db96d56Sopenharmony_ci
117db96d56Sopenharmony_ciasdl_stmt_seq*
127db96d56Sopenharmony_ci_PyPegen_interactive_exit(Parser *p)
137db96d56Sopenharmony_ci{
147db96d56Sopenharmony_ci    if (p->errcode) {
157db96d56Sopenharmony_ci        *(p->errcode) = E_EOF;
167db96d56Sopenharmony_ci    }
177db96d56Sopenharmony_ci    return NULL;
187db96d56Sopenharmony_ci}
197db96d56Sopenharmony_ci
207db96d56Sopenharmony_ciPy_ssize_t
217db96d56Sopenharmony_ci_PyPegen_byte_offset_to_character_offset(PyObject *line, Py_ssize_t col_offset)
227db96d56Sopenharmony_ci{
237db96d56Sopenharmony_ci    const char *str = PyUnicode_AsUTF8(line);
247db96d56Sopenharmony_ci    if (!str) {
257db96d56Sopenharmony_ci        return -1;
267db96d56Sopenharmony_ci    }
277db96d56Sopenharmony_ci    Py_ssize_t len = strlen(str);
287db96d56Sopenharmony_ci    if (col_offset > len + 1) {
297db96d56Sopenharmony_ci        col_offset = len + 1;
307db96d56Sopenharmony_ci    }
317db96d56Sopenharmony_ci    assert(col_offset >= 0);
327db96d56Sopenharmony_ci    PyObject *text = PyUnicode_DecodeUTF8(str, col_offset, "replace");
337db96d56Sopenharmony_ci    if (!text) {
347db96d56Sopenharmony_ci        return -1;
357db96d56Sopenharmony_ci    }
367db96d56Sopenharmony_ci    Py_ssize_t size = PyUnicode_GET_LENGTH(text);
377db96d56Sopenharmony_ci    Py_DECREF(text);
387db96d56Sopenharmony_ci    return size;
397db96d56Sopenharmony_ci}
407db96d56Sopenharmony_ci
417db96d56Sopenharmony_ci// Here, mark is the start of the node, while p->mark is the end.
427db96d56Sopenharmony_ci// If node==NULL, they should be the same.
437db96d56Sopenharmony_ciint
447db96d56Sopenharmony_ci_PyPegen_insert_memo(Parser *p, int mark, int type, void *node)
457db96d56Sopenharmony_ci{
467db96d56Sopenharmony_ci    // Insert in front
477db96d56Sopenharmony_ci    Memo *m = _PyArena_Malloc(p->arena, sizeof(Memo));
487db96d56Sopenharmony_ci    if (m == NULL) {
497db96d56Sopenharmony_ci        return -1;
507db96d56Sopenharmony_ci    }
517db96d56Sopenharmony_ci    m->type = type;
527db96d56Sopenharmony_ci    m->node = node;
537db96d56Sopenharmony_ci    m->mark = p->mark;
547db96d56Sopenharmony_ci    m->next = p->tokens[mark]->memo;
557db96d56Sopenharmony_ci    p->tokens[mark]->memo = m;
567db96d56Sopenharmony_ci    return 0;
577db96d56Sopenharmony_ci}
587db96d56Sopenharmony_ci
597db96d56Sopenharmony_ci// Like _PyPegen_insert_memo(), but updates an existing node if found.
607db96d56Sopenharmony_ciint
617db96d56Sopenharmony_ci_PyPegen_update_memo(Parser *p, int mark, int type, void *node)
627db96d56Sopenharmony_ci{
637db96d56Sopenharmony_ci    for (Memo *m = p->tokens[mark]->memo; m != NULL; m = m->next) {
647db96d56Sopenharmony_ci        if (m->type == type) {
657db96d56Sopenharmony_ci            // Update existing node.
667db96d56Sopenharmony_ci            m->node = node;
677db96d56Sopenharmony_ci            m->mark = p->mark;
687db96d56Sopenharmony_ci            return 0;
697db96d56Sopenharmony_ci        }
707db96d56Sopenharmony_ci    }
717db96d56Sopenharmony_ci    // Insert new node.
727db96d56Sopenharmony_ci    return _PyPegen_insert_memo(p, mark, type, node);
737db96d56Sopenharmony_ci}
747db96d56Sopenharmony_ci
757db96d56Sopenharmony_cistatic int
767db96d56Sopenharmony_ciinit_normalization(Parser *p)
777db96d56Sopenharmony_ci{
787db96d56Sopenharmony_ci    if (p->normalize) {
797db96d56Sopenharmony_ci        return 1;
807db96d56Sopenharmony_ci    }
817db96d56Sopenharmony_ci    PyObject *m = PyImport_ImportModule("unicodedata");
827db96d56Sopenharmony_ci    if (!m)
837db96d56Sopenharmony_ci    {
847db96d56Sopenharmony_ci        return 0;
857db96d56Sopenharmony_ci    }
867db96d56Sopenharmony_ci    p->normalize = PyObject_GetAttrString(m, "normalize");
877db96d56Sopenharmony_ci    Py_DECREF(m);
887db96d56Sopenharmony_ci    if (!p->normalize)
897db96d56Sopenharmony_ci    {
907db96d56Sopenharmony_ci        return 0;
917db96d56Sopenharmony_ci    }
927db96d56Sopenharmony_ci    return 1;
937db96d56Sopenharmony_ci}
947db96d56Sopenharmony_ci
957db96d56Sopenharmony_cistatic int
967db96d56Sopenharmony_cigrowable_comment_array_init(growable_comment_array *arr, size_t initial_size) {
977db96d56Sopenharmony_ci    assert(initial_size > 0);
987db96d56Sopenharmony_ci    arr->items = PyMem_Malloc(initial_size * sizeof(*arr->items));
997db96d56Sopenharmony_ci    arr->size = initial_size;
1007db96d56Sopenharmony_ci    arr->num_items = 0;
1017db96d56Sopenharmony_ci
1027db96d56Sopenharmony_ci    return arr->items != NULL;
1037db96d56Sopenharmony_ci}
1047db96d56Sopenharmony_ci
1057db96d56Sopenharmony_cistatic int
1067db96d56Sopenharmony_cigrowable_comment_array_add(growable_comment_array *arr, int lineno, char *comment) {
1077db96d56Sopenharmony_ci    if (arr->num_items >= arr->size) {
1087db96d56Sopenharmony_ci        size_t new_size = arr->size * 2;
1097db96d56Sopenharmony_ci        void *new_items_array = PyMem_Realloc(arr->items, new_size * sizeof(*arr->items));
1107db96d56Sopenharmony_ci        if (!new_items_array) {
1117db96d56Sopenharmony_ci            return 0;
1127db96d56Sopenharmony_ci        }
1137db96d56Sopenharmony_ci        arr->items = new_items_array;
1147db96d56Sopenharmony_ci        arr->size = new_size;
1157db96d56Sopenharmony_ci    }
1167db96d56Sopenharmony_ci
1177db96d56Sopenharmony_ci    arr->items[arr->num_items].lineno = lineno;
1187db96d56Sopenharmony_ci    arr->items[arr->num_items].comment = comment;  // Take ownership
1197db96d56Sopenharmony_ci    arr->num_items++;
1207db96d56Sopenharmony_ci    return 1;
1217db96d56Sopenharmony_ci}
1227db96d56Sopenharmony_ci
1237db96d56Sopenharmony_cistatic void
1247db96d56Sopenharmony_cigrowable_comment_array_deallocate(growable_comment_array *arr) {
1257db96d56Sopenharmony_ci    for (unsigned i = 0; i < arr->num_items; i++) {
1267db96d56Sopenharmony_ci        PyMem_Free(arr->items[i].comment);
1277db96d56Sopenharmony_ci    }
1287db96d56Sopenharmony_ci    PyMem_Free(arr->items);
1297db96d56Sopenharmony_ci}
1307db96d56Sopenharmony_ci
1317db96d56Sopenharmony_cistatic int
1327db96d56Sopenharmony_ci_get_keyword_or_name_type(Parser *p, const char *name, int name_len)
1337db96d56Sopenharmony_ci{
1347db96d56Sopenharmony_ci    assert(name_len > 0);
1357db96d56Sopenharmony_ci    if (name_len >= p->n_keyword_lists ||
1367db96d56Sopenharmony_ci        p->keywords[name_len] == NULL ||
1377db96d56Sopenharmony_ci        p->keywords[name_len]->type == -1) {
1387db96d56Sopenharmony_ci        return NAME;
1397db96d56Sopenharmony_ci    }
1407db96d56Sopenharmony_ci    for (KeywordToken *k = p->keywords[name_len]; k != NULL && k->type != -1; k++) {
1417db96d56Sopenharmony_ci        if (strncmp(k->str, name, name_len) == 0) {
1427db96d56Sopenharmony_ci            return k->type;
1437db96d56Sopenharmony_ci        }
1447db96d56Sopenharmony_ci    }
1457db96d56Sopenharmony_ci    return NAME;
1467db96d56Sopenharmony_ci}
1477db96d56Sopenharmony_ci
1487db96d56Sopenharmony_cistatic int
1497db96d56Sopenharmony_ciinitialize_token(Parser *p, Token *token, const char *start, const char *end, int token_type) {
1507db96d56Sopenharmony_ci    assert(token != NULL);
1517db96d56Sopenharmony_ci
1527db96d56Sopenharmony_ci    token->type = (token_type == NAME) ? _get_keyword_or_name_type(p, start, (int)(end - start)) : token_type;
1537db96d56Sopenharmony_ci    token->bytes = PyBytes_FromStringAndSize(start, end - start);
1547db96d56Sopenharmony_ci    if (token->bytes == NULL) {
1557db96d56Sopenharmony_ci        return -1;
1567db96d56Sopenharmony_ci    }
1577db96d56Sopenharmony_ci
1587db96d56Sopenharmony_ci    if (_PyArena_AddPyObject(p->arena, token->bytes) < 0) {
1597db96d56Sopenharmony_ci        Py_DECREF(token->bytes);
1607db96d56Sopenharmony_ci        return -1;
1617db96d56Sopenharmony_ci    }
1627db96d56Sopenharmony_ci
1637db96d56Sopenharmony_ci    token->level = p->tok->level;
1647db96d56Sopenharmony_ci
1657db96d56Sopenharmony_ci    const char *line_start = token_type == STRING ? p->tok->multi_line_start : p->tok->line_start;
1667db96d56Sopenharmony_ci    int lineno = token_type == STRING ? p->tok->first_lineno : p->tok->lineno;
1677db96d56Sopenharmony_ci    int end_lineno = p->tok->lineno;
1687db96d56Sopenharmony_ci
1697db96d56Sopenharmony_ci    int col_offset = (start != NULL && start >= line_start) ? (int)(start - line_start) : -1;
1707db96d56Sopenharmony_ci    int end_col_offset = (end != NULL && end >= p->tok->line_start) ? (int)(end - p->tok->line_start) : -1;
1717db96d56Sopenharmony_ci
1727db96d56Sopenharmony_ci    token->lineno = lineno;
1737db96d56Sopenharmony_ci    token->col_offset = p->tok->lineno == p->starting_lineno ? p->starting_col_offset + col_offset : col_offset;
1747db96d56Sopenharmony_ci    token->end_lineno = end_lineno;
1757db96d56Sopenharmony_ci    token->end_col_offset = p->tok->lineno == p->starting_lineno ? p->starting_col_offset + end_col_offset : end_col_offset;
1767db96d56Sopenharmony_ci
1777db96d56Sopenharmony_ci    p->fill += 1;
1787db96d56Sopenharmony_ci
1797db96d56Sopenharmony_ci    if (token_type == ERRORTOKEN && p->tok->done == E_DECODE) {
1807db96d56Sopenharmony_ci        return _Pypegen_raise_decode_error(p);
1817db96d56Sopenharmony_ci    }
1827db96d56Sopenharmony_ci
1837db96d56Sopenharmony_ci    return (token_type == ERRORTOKEN ? _Pypegen_tokenizer_error(p) : 0);
1847db96d56Sopenharmony_ci}
1857db96d56Sopenharmony_ci
1867db96d56Sopenharmony_cistatic int
1877db96d56Sopenharmony_ci_resize_tokens_array(Parser *p) {
1887db96d56Sopenharmony_ci    int newsize = p->size * 2;
1897db96d56Sopenharmony_ci    Token **new_tokens = PyMem_Realloc(p->tokens, newsize * sizeof(Token *));
1907db96d56Sopenharmony_ci    if (new_tokens == NULL) {
1917db96d56Sopenharmony_ci        PyErr_NoMemory();
1927db96d56Sopenharmony_ci        return -1;
1937db96d56Sopenharmony_ci    }
1947db96d56Sopenharmony_ci    p->tokens = new_tokens;
1957db96d56Sopenharmony_ci
1967db96d56Sopenharmony_ci    for (int i = p->size; i < newsize; i++) {
1977db96d56Sopenharmony_ci        p->tokens[i] = PyMem_Calloc(1, sizeof(Token));
1987db96d56Sopenharmony_ci        if (p->tokens[i] == NULL) {
1997db96d56Sopenharmony_ci            p->size = i; // Needed, in order to cleanup correctly after parser fails
2007db96d56Sopenharmony_ci            PyErr_NoMemory();
2017db96d56Sopenharmony_ci            return -1;
2027db96d56Sopenharmony_ci        }
2037db96d56Sopenharmony_ci    }
2047db96d56Sopenharmony_ci    p->size = newsize;
2057db96d56Sopenharmony_ci    return 0;
2067db96d56Sopenharmony_ci}
2077db96d56Sopenharmony_ci
2087db96d56Sopenharmony_ciint
2097db96d56Sopenharmony_ci_PyPegen_fill_token(Parser *p)
2107db96d56Sopenharmony_ci{
2117db96d56Sopenharmony_ci    const char *start;
2127db96d56Sopenharmony_ci    const char *end;
2137db96d56Sopenharmony_ci    int type = _PyTokenizer_Get(p->tok, &start, &end);
2147db96d56Sopenharmony_ci
2157db96d56Sopenharmony_ci    // Record and skip '# type: ignore' comments
2167db96d56Sopenharmony_ci    while (type == TYPE_IGNORE) {
2177db96d56Sopenharmony_ci        Py_ssize_t len = end - start;
2187db96d56Sopenharmony_ci        char *tag = PyMem_Malloc(len + 1);
2197db96d56Sopenharmony_ci        if (tag == NULL) {
2207db96d56Sopenharmony_ci            PyErr_NoMemory();
2217db96d56Sopenharmony_ci            return -1;
2227db96d56Sopenharmony_ci        }
2237db96d56Sopenharmony_ci        strncpy(tag, start, len);
2247db96d56Sopenharmony_ci        tag[len] = '\0';
2257db96d56Sopenharmony_ci        // Ownership of tag passes to the growable array
2267db96d56Sopenharmony_ci        if (!growable_comment_array_add(&p->type_ignore_comments, p->tok->lineno, tag)) {
2277db96d56Sopenharmony_ci            PyErr_NoMemory();
2287db96d56Sopenharmony_ci            return -1;
2297db96d56Sopenharmony_ci        }
2307db96d56Sopenharmony_ci        type = _PyTokenizer_Get(p->tok, &start, &end);
2317db96d56Sopenharmony_ci    }
2327db96d56Sopenharmony_ci
2337db96d56Sopenharmony_ci    // If we have reached the end and we are in single input mode we need to insert a newline and reset the parsing
2347db96d56Sopenharmony_ci    if (p->start_rule == Py_single_input && type == ENDMARKER && p->parsing_started) {
2357db96d56Sopenharmony_ci        type = NEWLINE; /* Add an extra newline */
2367db96d56Sopenharmony_ci        p->parsing_started = 0;
2377db96d56Sopenharmony_ci
2387db96d56Sopenharmony_ci        if (p->tok->indent && !(p->flags & PyPARSE_DONT_IMPLY_DEDENT)) {
2397db96d56Sopenharmony_ci            p->tok->pendin = -p->tok->indent;
2407db96d56Sopenharmony_ci            p->tok->indent = 0;
2417db96d56Sopenharmony_ci        }
2427db96d56Sopenharmony_ci    }
2437db96d56Sopenharmony_ci    else {
2447db96d56Sopenharmony_ci        p->parsing_started = 1;
2457db96d56Sopenharmony_ci    }
2467db96d56Sopenharmony_ci
2477db96d56Sopenharmony_ci    // Check if we are at the limit of the token array capacity and resize if needed
2487db96d56Sopenharmony_ci    if ((p->fill == p->size) && (_resize_tokens_array(p) != 0)) {
2497db96d56Sopenharmony_ci        return -1;
2507db96d56Sopenharmony_ci    }
2517db96d56Sopenharmony_ci
2527db96d56Sopenharmony_ci    Token *t = p->tokens[p->fill];
2537db96d56Sopenharmony_ci    return initialize_token(p, t, start, end, type);
2547db96d56Sopenharmony_ci}
2557db96d56Sopenharmony_ci
2567db96d56Sopenharmony_ci#if defined(Py_DEBUG)
2577db96d56Sopenharmony_ci// Instrumentation to count the effectiveness of memoization.
2587db96d56Sopenharmony_ci// The array counts the number of tokens skipped by memoization,
2597db96d56Sopenharmony_ci// indexed by type.
2607db96d56Sopenharmony_ci
2617db96d56Sopenharmony_ci#define NSTATISTICS 2000
2627db96d56Sopenharmony_cistatic long memo_statistics[NSTATISTICS];
2637db96d56Sopenharmony_ci
2647db96d56Sopenharmony_civoid
2657db96d56Sopenharmony_ci_PyPegen_clear_memo_statistics(void)
2667db96d56Sopenharmony_ci{
2677db96d56Sopenharmony_ci    for (int i = 0; i < NSTATISTICS; i++) {
2687db96d56Sopenharmony_ci        memo_statistics[i] = 0;
2697db96d56Sopenharmony_ci    }
2707db96d56Sopenharmony_ci}
2717db96d56Sopenharmony_ci
2727db96d56Sopenharmony_ciPyObject *
2737db96d56Sopenharmony_ci_PyPegen_get_memo_statistics(void)
2747db96d56Sopenharmony_ci{
2757db96d56Sopenharmony_ci    PyObject *ret = PyList_New(NSTATISTICS);
2767db96d56Sopenharmony_ci    if (ret == NULL) {
2777db96d56Sopenharmony_ci        return NULL;
2787db96d56Sopenharmony_ci    }
2797db96d56Sopenharmony_ci    for (int i = 0; i < NSTATISTICS; i++) {
2807db96d56Sopenharmony_ci        PyObject *value = PyLong_FromLong(memo_statistics[i]);
2817db96d56Sopenharmony_ci        if (value == NULL) {
2827db96d56Sopenharmony_ci            Py_DECREF(ret);
2837db96d56Sopenharmony_ci            return NULL;
2847db96d56Sopenharmony_ci        }
2857db96d56Sopenharmony_ci        // PyList_SetItem borrows a reference to value.
2867db96d56Sopenharmony_ci        if (PyList_SetItem(ret, i, value) < 0) {
2877db96d56Sopenharmony_ci            Py_DECREF(ret);
2887db96d56Sopenharmony_ci            return NULL;
2897db96d56Sopenharmony_ci        }
2907db96d56Sopenharmony_ci    }
2917db96d56Sopenharmony_ci    return ret;
2927db96d56Sopenharmony_ci}
2937db96d56Sopenharmony_ci#endif
2947db96d56Sopenharmony_ci
2957db96d56Sopenharmony_ciint  // bool
2967db96d56Sopenharmony_ci_PyPegen_is_memoized(Parser *p, int type, void *pres)
2977db96d56Sopenharmony_ci{
2987db96d56Sopenharmony_ci    if (p->mark == p->fill) {
2997db96d56Sopenharmony_ci        if (_PyPegen_fill_token(p) < 0) {
3007db96d56Sopenharmony_ci            p->error_indicator = 1;
3017db96d56Sopenharmony_ci            return -1;
3027db96d56Sopenharmony_ci        }
3037db96d56Sopenharmony_ci    }
3047db96d56Sopenharmony_ci
3057db96d56Sopenharmony_ci    Token *t = p->tokens[p->mark];
3067db96d56Sopenharmony_ci
3077db96d56Sopenharmony_ci    for (Memo *m = t->memo; m != NULL; m = m->next) {
3087db96d56Sopenharmony_ci        if (m->type == type) {
3097db96d56Sopenharmony_ci#if defined(PY_DEBUG)
3107db96d56Sopenharmony_ci            if (0 <= type && type < NSTATISTICS) {
3117db96d56Sopenharmony_ci                long count = m->mark - p->mark;
3127db96d56Sopenharmony_ci                // A memoized negative result counts for one.
3137db96d56Sopenharmony_ci                if (count <= 0) {
3147db96d56Sopenharmony_ci                    count = 1;
3157db96d56Sopenharmony_ci                }
3167db96d56Sopenharmony_ci                memo_statistics[type] += count;
3177db96d56Sopenharmony_ci            }
3187db96d56Sopenharmony_ci#endif
3197db96d56Sopenharmony_ci            p->mark = m->mark;
3207db96d56Sopenharmony_ci            *(void **)(pres) = m->node;
3217db96d56Sopenharmony_ci            return 1;
3227db96d56Sopenharmony_ci        }
3237db96d56Sopenharmony_ci    }
3247db96d56Sopenharmony_ci    return 0;
3257db96d56Sopenharmony_ci}
3267db96d56Sopenharmony_ci
3277db96d56Sopenharmony_ciint
3287db96d56Sopenharmony_ci_PyPegen_lookahead_with_name(int positive, expr_ty (func)(Parser *), Parser *p)
3297db96d56Sopenharmony_ci{
3307db96d56Sopenharmony_ci    int mark = p->mark;
3317db96d56Sopenharmony_ci    void *res = func(p);
3327db96d56Sopenharmony_ci    p->mark = mark;
3337db96d56Sopenharmony_ci    return (res != NULL) == positive;
3347db96d56Sopenharmony_ci}
3357db96d56Sopenharmony_ci
3367db96d56Sopenharmony_ciint
3377db96d56Sopenharmony_ci_PyPegen_lookahead_with_string(int positive, expr_ty (func)(Parser *, const char*), Parser *p, const char* arg)
3387db96d56Sopenharmony_ci{
3397db96d56Sopenharmony_ci    int mark = p->mark;
3407db96d56Sopenharmony_ci    void *res = func(p, arg);
3417db96d56Sopenharmony_ci    p->mark = mark;
3427db96d56Sopenharmony_ci    return (res != NULL) == positive;
3437db96d56Sopenharmony_ci}
3447db96d56Sopenharmony_ci
3457db96d56Sopenharmony_ciint
3467db96d56Sopenharmony_ci_PyPegen_lookahead_with_int(int positive, Token *(func)(Parser *, int), Parser *p, int arg)
3477db96d56Sopenharmony_ci{
3487db96d56Sopenharmony_ci    int mark = p->mark;
3497db96d56Sopenharmony_ci    void *res = func(p, arg);
3507db96d56Sopenharmony_ci    p->mark = mark;
3517db96d56Sopenharmony_ci    return (res != NULL) == positive;
3527db96d56Sopenharmony_ci}
3537db96d56Sopenharmony_ci
3547db96d56Sopenharmony_ciint
3557db96d56Sopenharmony_ci_PyPegen_lookahead(int positive, void *(func)(Parser *), Parser *p)
3567db96d56Sopenharmony_ci{
3577db96d56Sopenharmony_ci    int mark = p->mark;
3587db96d56Sopenharmony_ci    void *res = (void*)func(p);
3597db96d56Sopenharmony_ci    p->mark = mark;
3607db96d56Sopenharmony_ci    return (res != NULL) == positive;
3617db96d56Sopenharmony_ci}
3627db96d56Sopenharmony_ci
3637db96d56Sopenharmony_ciToken *
3647db96d56Sopenharmony_ci_PyPegen_expect_token(Parser *p, int type)
3657db96d56Sopenharmony_ci{
3667db96d56Sopenharmony_ci    if (p->mark == p->fill) {
3677db96d56Sopenharmony_ci        if (_PyPegen_fill_token(p) < 0) {
3687db96d56Sopenharmony_ci            p->error_indicator = 1;
3697db96d56Sopenharmony_ci            return NULL;
3707db96d56Sopenharmony_ci        }
3717db96d56Sopenharmony_ci    }
3727db96d56Sopenharmony_ci    Token *t = p->tokens[p->mark];
3737db96d56Sopenharmony_ci    if (t->type != type) {
3747db96d56Sopenharmony_ci        return NULL;
3757db96d56Sopenharmony_ci    }
3767db96d56Sopenharmony_ci    p->mark += 1;
3777db96d56Sopenharmony_ci    return t;
3787db96d56Sopenharmony_ci}
3797db96d56Sopenharmony_ci
3807db96d56Sopenharmony_civoid*
3817db96d56Sopenharmony_ci_PyPegen_expect_forced_result(Parser *p, void* result, const char* expected) {
3827db96d56Sopenharmony_ci
3837db96d56Sopenharmony_ci    if (p->error_indicator == 1) {
3847db96d56Sopenharmony_ci        return NULL;
3857db96d56Sopenharmony_ci    }
3867db96d56Sopenharmony_ci    if (result == NULL) {
3877db96d56Sopenharmony_ci        RAISE_SYNTAX_ERROR("expected (%s)", expected);
3887db96d56Sopenharmony_ci        return NULL;
3897db96d56Sopenharmony_ci    }
3907db96d56Sopenharmony_ci    return result;
3917db96d56Sopenharmony_ci}
3927db96d56Sopenharmony_ci
3937db96d56Sopenharmony_ciToken *
3947db96d56Sopenharmony_ci_PyPegen_expect_forced_token(Parser *p, int type, const char* expected) {
3957db96d56Sopenharmony_ci
3967db96d56Sopenharmony_ci    if (p->error_indicator == 1) {
3977db96d56Sopenharmony_ci        return NULL;
3987db96d56Sopenharmony_ci    }
3997db96d56Sopenharmony_ci
4007db96d56Sopenharmony_ci    if (p->mark == p->fill) {
4017db96d56Sopenharmony_ci        if (_PyPegen_fill_token(p) < 0) {
4027db96d56Sopenharmony_ci            p->error_indicator = 1;
4037db96d56Sopenharmony_ci            return NULL;
4047db96d56Sopenharmony_ci        }
4057db96d56Sopenharmony_ci    }
4067db96d56Sopenharmony_ci    Token *t = p->tokens[p->mark];
4077db96d56Sopenharmony_ci    if (t->type != type) {
4087db96d56Sopenharmony_ci        RAISE_SYNTAX_ERROR_KNOWN_LOCATION(t, "expected '%s'", expected);
4097db96d56Sopenharmony_ci        return NULL;
4107db96d56Sopenharmony_ci    }
4117db96d56Sopenharmony_ci    p->mark += 1;
4127db96d56Sopenharmony_ci    return t;
4137db96d56Sopenharmony_ci}
4147db96d56Sopenharmony_ci
4157db96d56Sopenharmony_ciexpr_ty
4167db96d56Sopenharmony_ci_PyPegen_expect_soft_keyword(Parser *p, const char *keyword)
4177db96d56Sopenharmony_ci{
4187db96d56Sopenharmony_ci    if (p->mark == p->fill) {
4197db96d56Sopenharmony_ci        if (_PyPegen_fill_token(p) < 0) {
4207db96d56Sopenharmony_ci            p->error_indicator = 1;
4217db96d56Sopenharmony_ci            return NULL;
4227db96d56Sopenharmony_ci        }
4237db96d56Sopenharmony_ci    }
4247db96d56Sopenharmony_ci    Token *t = p->tokens[p->mark];
4257db96d56Sopenharmony_ci    if (t->type != NAME) {
4267db96d56Sopenharmony_ci        return NULL;
4277db96d56Sopenharmony_ci    }
4287db96d56Sopenharmony_ci    const char *s = PyBytes_AsString(t->bytes);
4297db96d56Sopenharmony_ci    if (!s) {
4307db96d56Sopenharmony_ci        p->error_indicator = 1;
4317db96d56Sopenharmony_ci        return NULL;
4327db96d56Sopenharmony_ci    }
4337db96d56Sopenharmony_ci    if (strcmp(s, keyword) != 0) {
4347db96d56Sopenharmony_ci        return NULL;
4357db96d56Sopenharmony_ci    }
4367db96d56Sopenharmony_ci    return _PyPegen_name_token(p);
4377db96d56Sopenharmony_ci}
4387db96d56Sopenharmony_ci
4397db96d56Sopenharmony_ciToken *
4407db96d56Sopenharmony_ci_PyPegen_get_last_nonnwhitespace_token(Parser *p)
4417db96d56Sopenharmony_ci{
4427db96d56Sopenharmony_ci    assert(p->mark >= 0);
4437db96d56Sopenharmony_ci    Token *token = NULL;
4447db96d56Sopenharmony_ci    for (int m = p->mark - 1; m >= 0; m--) {
4457db96d56Sopenharmony_ci        token = p->tokens[m];
4467db96d56Sopenharmony_ci        if (token->type != ENDMARKER && (token->type < NEWLINE || token->type > DEDENT)) {
4477db96d56Sopenharmony_ci            break;
4487db96d56Sopenharmony_ci        }
4497db96d56Sopenharmony_ci    }
4507db96d56Sopenharmony_ci    return token;
4517db96d56Sopenharmony_ci}
4527db96d56Sopenharmony_ci
4537db96d56Sopenharmony_ciPyObject *
4547db96d56Sopenharmony_ci_PyPegen_new_identifier(Parser *p, const char *n)
4557db96d56Sopenharmony_ci{
4567db96d56Sopenharmony_ci    PyObject *id = PyUnicode_DecodeUTF8(n, strlen(n), NULL);
4577db96d56Sopenharmony_ci    if (!id) {
4587db96d56Sopenharmony_ci        goto error;
4597db96d56Sopenharmony_ci    }
4607db96d56Sopenharmony_ci    /* PyUnicode_DecodeUTF8 should always return a ready string. */
4617db96d56Sopenharmony_ci    assert(PyUnicode_IS_READY(id));
4627db96d56Sopenharmony_ci    /* Check whether there are non-ASCII characters in the
4637db96d56Sopenharmony_ci       identifier; if so, normalize to NFKC. */
4647db96d56Sopenharmony_ci    if (!PyUnicode_IS_ASCII(id))
4657db96d56Sopenharmony_ci    {
4667db96d56Sopenharmony_ci        PyObject *id2;
4677db96d56Sopenharmony_ci        if (!init_normalization(p))
4687db96d56Sopenharmony_ci        {
4697db96d56Sopenharmony_ci            Py_DECREF(id);
4707db96d56Sopenharmony_ci            goto error;
4717db96d56Sopenharmony_ci        }
4727db96d56Sopenharmony_ci        PyObject *form = PyUnicode_InternFromString("NFKC");
4737db96d56Sopenharmony_ci        if (form == NULL)
4747db96d56Sopenharmony_ci        {
4757db96d56Sopenharmony_ci            Py_DECREF(id);
4767db96d56Sopenharmony_ci            goto error;
4777db96d56Sopenharmony_ci        }
4787db96d56Sopenharmony_ci        PyObject *args[2] = {form, id};
4797db96d56Sopenharmony_ci        id2 = _PyObject_FastCall(p->normalize, args, 2);
4807db96d56Sopenharmony_ci        Py_DECREF(id);
4817db96d56Sopenharmony_ci        Py_DECREF(form);
4827db96d56Sopenharmony_ci        if (!id2) {
4837db96d56Sopenharmony_ci            goto error;
4847db96d56Sopenharmony_ci        }
4857db96d56Sopenharmony_ci        if (!PyUnicode_Check(id2))
4867db96d56Sopenharmony_ci        {
4877db96d56Sopenharmony_ci            PyErr_Format(PyExc_TypeError,
4887db96d56Sopenharmony_ci                         "unicodedata.normalize() must return a string, not "
4897db96d56Sopenharmony_ci                         "%.200s",
4907db96d56Sopenharmony_ci                         _PyType_Name(Py_TYPE(id2)));
4917db96d56Sopenharmony_ci            Py_DECREF(id2);
4927db96d56Sopenharmony_ci            goto error;
4937db96d56Sopenharmony_ci        }
4947db96d56Sopenharmony_ci        id = id2;
4957db96d56Sopenharmony_ci    }
4967db96d56Sopenharmony_ci    PyUnicode_InternInPlace(&id);
4977db96d56Sopenharmony_ci    if (_PyArena_AddPyObject(p->arena, id) < 0)
4987db96d56Sopenharmony_ci    {
4997db96d56Sopenharmony_ci        Py_DECREF(id);
5007db96d56Sopenharmony_ci        goto error;
5017db96d56Sopenharmony_ci    }
5027db96d56Sopenharmony_ci    return id;
5037db96d56Sopenharmony_ci
5047db96d56Sopenharmony_cierror:
5057db96d56Sopenharmony_ci    p->error_indicator = 1;
5067db96d56Sopenharmony_ci    return NULL;
5077db96d56Sopenharmony_ci}
5087db96d56Sopenharmony_ci
5097db96d56Sopenharmony_cistatic expr_ty
5107db96d56Sopenharmony_ci_PyPegen_name_from_token(Parser *p, Token* t)
5117db96d56Sopenharmony_ci{
5127db96d56Sopenharmony_ci    if (t == NULL) {
5137db96d56Sopenharmony_ci        return NULL;
5147db96d56Sopenharmony_ci    }
5157db96d56Sopenharmony_ci    const char *s = PyBytes_AsString(t->bytes);
5167db96d56Sopenharmony_ci    if (!s) {
5177db96d56Sopenharmony_ci        p->error_indicator = 1;
5187db96d56Sopenharmony_ci        return NULL;
5197db96d56Sopenharmony_ci    }
5207db96d56Sopenharmony_ci    PyObject *id = _PyPegen_new_identifier(p, s);
5217db96d56Sopenharmony_ci    if (id == NULL) {
5227db96d56Sopenharmony_ci        p->error_indicator = 1;
5237db96d56Sopenharmony_ci        return NULL;
5247db96d56Sopenharmony_ci    }
5257db96d56Sopenharmony_ci    return _PyAST_Name(id, Load, t->lineno, t->col_offset, t->end_lineno,
5267db96d56Sopenharmony_ci                       t->end_col_offset, p->arena);
5277db96d56Sopenharmony_ci}
5287db96d56Sopenharmony_ci
5297db96d56Sopenharmony_ciexpr_ty
5307db96d56Sopenharmony_ci_PyPegen_name_token(Parser *p)
5317db96d56Sopenharmony_ci{
5327db96d56Sopenharmony_ci    Token *t = _PyPegen_expect_token(p, NAME);
5337db96d56Sopenharmony_ci    return _PyPegen_name_from_token(p, t);
5347db96d56Sopenharmony_ci}
5357db96d56Sopenharmony_ci
5367db96d56Sopenharmony_civoid *
5377db96d56Sopenharmony_ci_PyPegen_string_token(Parser *p)
5387db96d56Sopenharmony_ci{
5397db96d56Sopenharmony_ci    return _PyPegen_expect_token(p, STRING);
5407db96d56Sopenharmony_ci}
5417db96d56Sopenharmony_ci
5427db96d56Sopenharmony_ciexpr_ty _PyPegen_soft_keyword_token(Parser *p) {
5437db96d56Sopenharmony_ci    Token *t = _PyPegen_expect_token(p, NAME);
5447db96d56Sopenharmony_ci    if (t == NULL) {
5457db96d56Sopenharmony_ci        return NULL;
5467db96d56Sopenharmony_ci    }
5477db96d56Sopenharmony_ci    char *the_token;
5487db96d56Sopenharmony_ci    Py_ssize_t size;
5497db96d56Sopenharmony_ci    PyBytes_AsStringAndSize(t->bytes, &the_token, &size);
5507db96d56Sopenharmony_ci    for (char **keyword = p->soft_keywords; *keyword != NULL; keyword++) {
5517db96d56Sopenharmony_ci        if (strncmp(*keyword, the_token, size) == 0) {
5527db96d56Sopenharmony_ci            return _PyPegen_name_from_token(p, t);
5537db96d56Sopenharmony_ci        }
5547db96d56Sopenharmony_ci    }
5557db96d56Sopenharmony_ci    return NULL;
5567db96d56Sopenharmony_ci}
5577db96d56Sopenharmony_ci
5587db96d56Sopenharmony_cistatic PyObject *
5597db96d56Sopenharmony_ciparsenumber_raw(const char *s)
5607db96d56Sopenharmony_ci{
5617db96d56Sopenharmony_ci    const char *end;
5627db96d56Sopenharmony_ci    long x;
5637db96d56Sopenharmony_ci    double dx;
5647db96d56Sopenharmony_ci    Py_complex compl;
5657db96d56Sopenharmony_ci    int imflag;
5667db96d56Sopenharmony_ci
5677db96d56Sopenharmony_ci    assert(s != NULL);
5687db96d56Sopenharmony_ci    errno = 0;
5697db96d56Sopenharmony_ci    end = s + strlen(s) - 1;
5707db96d56Sopenharmony_ci    imflag = *end == 'j' || *end == 'J';
5717db96d56Sopenharmony_ci    if (s[0] == '0') {
5727db96d56Sopenharmony_ci        x = (long)PyOS_strtoul(s, (char **)&end, 0);
5737db96d56Sopenharmony_ci        if (x < 0 && errno == 0) {
5747db96d56Sopenharmony_ci            return PyLong_FromString(s, (char **)0, 0);
5757db96d56Sopenharmony_ci        }
5767db96d56Sopenharmony_ci    }
5777db96d56Sopenharmony_ci    else {
5787db96d56Sopenharmony_ci        x = PyOS_strtol(s, (char **)&end, 0);
5797db96d56Sopenharmony_ci    }
5807db96d56Sopenharmony_ci    if (*end == '\0') {
5817db96d56Sopenharmony_ci        if (errno != 0) {
5827db96d56Sopenharmony_ci            return PyLong_FromString(s, (char **)0, 0);
5837db96d56Sopenharmony_ci        }
5847db96d56Sopenharmony_ci        return PyLong_FromLong(x);
5857db96d56Sopenharmony_ci    }
5867db96d56Sopenharmony_ci    /* XXX Huge floats may silently fail */
5877db96d56Sopenharmony_ci    if (imflag) {
5887db96d56Sopenharmony_ci        compl.real = 0.;
5897db96d56Sopenharmony_ci        compl.imag = PyOS_string_to_double(s, (char **)&end, NULL);
5907db96d56Sopenharmony_ci        if (compl.imag == -1.0 && PyErr_Occurred()) {
5917db96d56Sopenharmony_ci            return NULL;
5927db96d56Sopenharmony_ci        }
5937db96d56Sopenharmony_ci        return PyComplex_FromCComplex(compl);
5947db96d56Sopenharmony_ci    }
5957db96d56Sopenharmony_ci    dx = PyOS_string_to_double(s, NULL, NULL);
5967db96d56Sopenharmony_ci    if (dx == -1.0 && PyErr_Occurred()) {
5977db96d56Sopenharmony_ci        return NULL;
5987db96d56Sopenharmony_ci    }
5997db96d56Sopenharmony_ci    return PyFloat_FromDouble(dx);
6007db96d56Sopenharmony_ci}
6017db96d56Sopenharmony_ci
6027db96d56Sopenharmony_cistatic PyObject *
6037db96d56Sopenharmony_ciparsenumber(const char *s)
6047db96d56Sopenharmony_ci{
6057db96d56Sopenharmony_ci    char *dup;
6067db96d56Sopenharmony_ci    char *end;
6077db96d56Sopenharmony_ci    PyObject *res = NULL;
6087db96d56Sopenharmony_ci
6097db96d56Sopenharmony_ci    assert(s != NULL);
6107db96d56Sopenharmony_ci
6117db96d56Sopenharmony_ci    if (strchr(s, '_') == NULL) {
6127db96d56Sopenharmony_ci        return parsenumber_raw(s);
6137db96d56Sopenharmony_ci    }
6147db96d56Sopenharmony_ci    /* Create a duplicate without underscores. */
6157db96d56Sopenharmony_ci    dup = PyMem_Malloc(strlen(s) + 1);
6167db96d56Sopenharmony_ci    if (dup == NULL) {
6177db96d56Sopenharmony_ci        return PyErr_NoMemory();
6187db96d56Sopenharmony_ci    }
6197db96d56Sopenharmony_ci    end = dup;
6207db96d56Sopenharmony_ci    for (; *s; s++) {
6217db96d56Sopenharmony_ci        if (*s != '_') {
6227db96d56Sopenharmony_ci            *end++ = *s;
6237db96d56Sopenharmony_ci        }
6247db96d56Sopenharmony_ci    }
6257db96d56Sopenharmony_ci    *end = '\0';
6267db96d56Sopenharmony_ci    res = parsenumber_raw(dup);
6277db96d56Sopenharmony_ci    PyMem_Free(dup);
6287db96d56Sopenharmony_ci    return res;
6297db96d56Sopenharmony_ci}
6307db96d56Sopenharmony_ci
6317db96d56Sopenharmony_ciexpr_ty
6327db96d56Sopenharmony_ci_PyPegen_number_token(Parser *p)
6337db96d56Sopenharmony_ci{
6347db96d56Sopenharmony_ci    Token *t = _PyPegen_expect_token(p, NUMBER);
6357db96d56Sopenharmony_ci    if (t == NULL) {
6367db96d56Sopenharmony_ci        return NULL;
6377db96d56Sopenharmony_ci    }
6387db96d56Sopenharmony_ci
6397db96d56Sopenharmony_ci    const char *num_raw = PyBytes_AsString(t->bytes);
6407db96d56Sopenharmony_ci    if (num_raw == NULL) {
6417db96d56Sopenharmony_ci        p->error_indicator = 1;
6427db96d56Sopenharmony_ci        return NULL;
6437db96d56Sopenharmony_ci    }
6447db96d56Sopenharmony_ci
6457db96d56Sopenharmony_ci    if (p->feature_version < 6 && strchr(num_raw, '_') != NULL) {
6467db96d56Sopenharmony_ci        p->error_indicator = 1;
6477db96d56Sopenharmony_ci        return RAISE_SYNTAX_ERROR("Underscores in numeric literals are only supported "
6487db96d56Sopenharmony_ci                                  "in Python 3.6 and greater");
6497db96d56Sopenharmony_ci    }
6507db96d56Sopenharmony_ci
6517db96d56Sopenharmony_ci    PyObject *c = parsenumber(num_raw);
6527db96d56Sopenharmony_ci
6537db96d56Sopenharmony_ci    if (c == NULL) {
6547db96d56Sopenharmony_ci        p->error_indicator = 1;
6557db96d56Sopenharmony_ci        PyThreadState *tstate = _PyThreadState_GET();
6567db96d56Sopenharmony_ci        // The only way a ValueError should happen in _this_ code is via
6577db96d56Sopenharmony_ci        // PyLong_FromString hitting a length limit.
6587db96d56Sopenharmony_ci        if (tstate->curexc_type == PyExc_ValueError &&
6597db96d56Sopenharmony_ci            tstate->curexc_value != NULL) {
6607db96d56Sopenharmony_ci            PyObject *type, *value, *tb;
6617db96d56Sopenharmony_ci            // This acts as PyErr_Clear() as we're replacing curexc.
6627db96d56Sopenharmony_ci            PyErr_Fetch(&type, &value, &tb);
6637db96d56Sopenharmony_ci            Py_XDECREF(tb);
6647db96d56Sopenharmony_ci            Py_DECREF(type);
6657db96d56Sopenharmony_ci            /* Intentionally omitting columns to avoid a wall of 1000s of '^'s
6667db96d56Sopenharmony_ci             * on the error message. Nobody is going to overlook their huge
6677db96d56Sopenharmony_ci             * numeric literal once given the line. */
6687db96d56Sopenharmony_ci            RAISE_ERROR_KNOWN_LOCATION(
6697db96d56Sopenharmony_ci                p, PyExc_SyntaxError,
6707db96d56Sopenharmony_ci                t->lineno, -1 /* col_offset */,
6717db96d56Sopenharmony_ci                t->end_lineno, -1 /* end_col_offset */,
6727db96d56Sopenharmony_ci                "%S - Consider hexadecimal for huge integer literals "
6737db96d56Sopenharmony_ci                "to avoid decimal conversion limits.",
6747db96d56Sopenharmony_ci                value);
6757db96d56Sopenharmony_ci            Py_DECREF(value);
6767db96d56Sopenharmony_ci        }
6777db96d56Sopenharmony_ci        return NULL;
6787db96d56Sopenharmony_ci    }
6797db96d56Sopenharmony_ci
6807db96d56Sopenharmony_ci    if (_PyArena_AddPyObject(p->arena, c) < 0) {
6817db96d56Sopenharmony_ci        Py_DECREF(c);
6827db96d56Sopenharmony_ci        p->error_indicator = 1;
6837db96d56Sopenharmony_ci        return NULL;
6847db96d56Sopenharmony_ci    }
6857db96d56Sopenharmony_ci
6867db96d56Sopenharmony_ci    return _PyAST_Constant(c, NULL, t->lineno, t->col_offset, t->end_lineno,
6877db96d56Sopenharmony_ci                           t->end_col_offset, p->arena);
6887db96d56Sopenharmony_ci}
6897db96d56Sopenharmony_ci
6907db96d56Sopenharmony_ci/* Check that the source for a single input statement really is a single
6917db96d56Sopenharmony_ci   statement by looking at what is left in the buffer after parsing.
6927db96d56Sopenharmony_ci   Trailing whitespace and comments are OK. */
6937db96d56Sopenharmony_cistatic int // bool
6947db96d56Sopenharmony_cibad_single_statement(Parser *p)
6957db96d56Sopenharmony_ci{
6967db96d56Sopenharmony_ci    char *cur = p->tok->cur;
6977db96d56Sopenharmony_ci    char c = *cur;
6987db96d56Sopenharmony_ci
6997db96d56Sopenharmony_ci    for (;;) {
7007db96d56Sopenharmony_ci        while (c == ' ' || c == '\t' || c == '\n' || c == '\014') {
7017db96d56Sopenharmony_ci            c = *++cur;
7027db96d56Sopenharmony_ci        }
7037db96d56Sopenharmony_ci
7047db96d56Sopenharmony_ci        if (!c) {
7057db96d56Sopenharmony_ci            return 0;
7067db96d56Sopenharmony_ci        }
7077db96d56Sopenharmony_ci
7087db96d56Sopenharmony_ci        if (c != '#') {
7097db96d56Sopenharmony_ci            return 1;
7107db96d56Sopenharmony_ci        }
7117db96d56Sopenharmony_ci
7127db96d56Sopenharmony_ci        /* Suck up comment. */
7137db96d56Sopenharmony_ci        while (c && c != '\n') {
7147db96d56Sopenharmony_ci            c = *++cur;
7157db96d56Sopenharmony_ci        }
7167db96d56Sopenharmony_ci    }
7177db96d56Sopenharmony_ci}
7187db96d56Sopenharmony_ci
7197db96d56Sopenharmony_cistatic int
7207db96d56Sopenharmony_cicompute_parser_flags(PyCompilerFlags *flags)
7217db96d56Sopenharmony_ci{
7227db96d56Sopenharmony_ci    int parser_flags = 0;
7237db96d56Sopenharmony_ci    if (!flags) {
7247db96d56Sopenharmony_ci        return 0;
7257db96d56Sopenharmony_ci    }
7267db96d56Sopenharmony_ci    if (flags->cf_flags & PyCF_DONT_IMPLY_DEDENT) {
7277db96d56Sopenharmony_ci        parser_flags |= PyPARSE_DONT_IMPLY_DEDENT;
7287db96d56Sopenharmony_ci    }
7297db96d56Sopenharmony_ci    if (flags->cf_flags & PyCF_IGNORE_COOKIE) {
7307db96d56Sopenharmony_ci        parser_flags |= PyPARSE_IGNORE_COOKIE;
7317db96d56Sopenharmony_ci    }
7327db96d56Sopenharmony_ci    if (flags->cf_flags & CO_FUTURE_BARRY_AS_BDFL) {
7337db96d56Sopenharmony_ci        parser_flags |= PyPARSE_BARRY_AS_BDFL;
7347db96d56Sopenharmony_ci    }
7357db96d56Sopenharmony_ci    if (flags->cf_flags & PyCF_TYPE_COMMENTS) {
7367db96d56Sopenharmony_ci        parser_flags |= PyPARSE_TYPE_COMMENTS;
7377db96d56Sopenharmony_ci    }
7387db96d56Sopenharmony_ci    if ((flags->cf_flags & PyCF_ONLY_AST) && flags->cf_feature_version < 7) {
7397db96d56Sopenharmony_ci        parser_flags |= PyPARSE_ASYNC_HACKS;
7407db96d56Sopenharmony_ci    }
7417db96d56Sopenharmony_ci    if (flags->cf_flags & PyCF_ALLOW_INCOMPLETE_INPUT) {
7427db96d56Sopenharmony_ci        parser_flags |= PyPARSE_ALLOW_INCOMPLETE_INPUT;
7437db96d56Sopenharmony_ci    }
7447db96d56Sopenharmony_ci    return parser_flags;
7457db96d56Sopenharmony_ci}
7467db96d56Sopenharmony_ci
7477db96d56Sopenharmony_ci// Parser API
7487db96d56Sopenharmony_ci
7497db96d56Sopenharmony_ciParser *
7507db96d56Sopenharmony_ci_PyPegen_Parser_New(struct tok_state *tok, int start_rule, int flags,
7517db96d56Sopenharmony_ci                    int feature_version, int *errcode, PyArena *arena)
7527db96d56Sopenharmony_ci{
7537db96d56Sopenharmony_ci    Parser *p = PyMem_Malloc(sizeof(Parser));
7547db96d56Sopenharmony_ci    if (p == NULL) {
7557db96d56Sopenharmony_ci        return (Parser *) PyErr_NoMemory();
7567db96d56Sopenharmony_ci    }
7577db96d56Sopenharmony_ci    assert(tok != NULL);
7587db96d56Sopenharmony_ci    tok->type_comments = (flags & PyPARSE_TYPE_COMMENTS) > 0;
7597db96d56Sopenharmony_ci    tok->async_hacks = (flags & PyPARSE_ASYNC_HACKS) > 0;
7607db96d56Sopenharmony_ci    p->tok = tok;
7617db96d56Sopenharmony_ci    p->keywords = NULL;
7627db96d56Sopenharmony_ci    p->n_keyword_lists = -1;
7637db96d56Sopenharmony_ci    p->soft_keywords = NULL;
7647db96d56Sopenharmony_ci    p->tokens = PyMem_Malloc(sizeof(Token *));
7657db96d56Sopenharmony_ci    if (!p->tokens) {
7667db96d56Sopenharmony_ci        PyMem_Free(p);
7677db96d56Sopenharmony_ci        return (Parser *) PyErr_NoMemory();
7687db96d56Sopenharmony_ci    }
7697db96d56Sopenharmony_ci    p->tokens[0] = PyMem_Calloc(1, sizeof(Token));
7707db96d56Sopenharmony_ci    if (!p->tokens[0]) {
7717db96d56Sopenharmony_ci        PyMem_Free(p->tokens);
7727db96d56Sopenharmony_ci        PyMem_Free(p);
7737db96d56Sopenharmony_ci        return (Parser *) PyErr_NoMemory();
7747db96d56Sopenharmony_ci    }
7757db96d56Sopenharmony_ci    if (!growable_comment_array_init(&p->type_ignore_comments, 10)) {
7767db96d56Sopenharmony_ci        PyMem_Free(p->tokens[0]);
7777db96d56Sopenharmony_ci        PyMem_Free(p->tokens);
7787db96d56Sopenharmony_ci        PyMem_Free(p);
7797db96d56Sopenharmony_ci        return (Parser *) PyErr_NoMemory();
7807db96d56Sopenharmony_ci    }
7817db96d56Sopenharmony_ci
7827db96d56Sopenharmony_ci    p->mark = 0;
7837db96d56Sopenharmony_ci    p->fill = 0;
7847db96d56Sopenharmony_ci    p->size = 1;
7857db96d56Sopenharmony_ci
7867db96d56Sopenharmony_ci    p->errcode = errcode;
7877db96d56Sopenharmony_ci    p->arena = arena;
7887db96d56Sopenharmony_ci    p->start_rule = start_rule;
7897db96d56Sopenharmony_ci    p->parsing_started = 0;
7907db96d56Sopenharmony_ci    p->normalize = NULL;
7917db96d56Sopenharmony_ci    p->error_indicator = 0;
7927db96d56Sopenharmony_ci
7937db96d56Sopenharmony_ci    p->starting_lineno = 0;
7947db96d56Sopenharmony_ci    p->starting_col_offset = 0;
7957db96d56Sopenharmony_ci    p->flags = flags;
7967db96d56Sopenharmony_ci    p->feature_version = feature_version;
7977db96d56Sopenharmony_ci    p->known_err_token = NULL;
7987db96d56Sopenharmony_ci    p->level = 0;
7997db96d56Sopenharmony_ci    p->call_invalid_rules = 0;
8007db96d56Sopenharmony_ci    return p;
8017db96d56Sopenharmony_ci}
8027db96d56Sopenharmony_ci
8037db96d56Sopenharmony_civoid
8047db96d56Sopenharmony_ci_PyPegen_Parser_Free(Parser *p)
8057db96d56Sopenharmony_ci{
8067db96d56Sopenharmony_ci    Py_XDECREF(p->normalize);
8077db96d56Sopenharmony_ci    for (int i = 0; i < p->size; i++) {
8087db96d56Sopenharmony_ci        PyMem_Free(p->tokens[i]);
8097db96d56Sopenharmony_ci    }
8107db96d56Sopenharmony_ci    PyMem_Free(p->tokens);
8117db96d56Sopenharmony_ci    growable_comment_array_deallocate(&p->type_ignore_comments);
8127db96d56Sopenharmony_ci    PyMem_Free(p);
8137db96d56Sopenharmony_ci}
8147db96d56Sopenharmony_ci
8157db96d56Sopenharmony_cistatic void
8167db96d56Sopenharmony_cireset_parser_state_for_error_pass(Parser *p)
8177db96d56Sopenharmony_ci{
8187db96d56Sopenharmony_ci    for (int i = 0; i < p->fill; i++) {
8197db96d56Sopenharmony_ci        p->tokens[i]->memo = NULL;
8207db96d56Sopenharmony_ci    }
8217db96d56Sopenharmony_ci    p->mark = 0;
8227db96d56Sopenharmony_ci    p->call_invalid_rules = 1;
8237db96d56Sopenharmony_ci    // Don't try to get extra tokens in interactive mode when trying to
8247db96d56Sopenharmony_ci    // raise specialized errors in the second pass.
8257db96d56Sopenharmony_ci    p->tok->interactive_underflow = IUNDERFLOW_STOP;
8267db96d56Sopenharmony_ci}
8277db96d56Sopenharmony_ci
8287db96d56Sopenharmony_cistatic inline int
8297db96d56Sopenharmony_ci_is_end_of_source(Parser *p) {
8307db96d56Sopenharmony_ci    int err = p->tok->done;
8317db96d56Sopenharmony_ci    return err == E_EOF || err == E_EOFS || err == E_EOLS;
8327db96d56Sopenharmony_ci}
8337db96d56Sopenharmony_ci
8347db96d56Sopenharmony_civoid *
8357db96d56Sopenharmony_ci_PyPegen_run_parser(Parser *p)
8367db96d56Sopenharmony_ci{
8377db96d56Sopenharmony_ci    void *res = _PyPegen_parse(p);
8387db96d56Sopenharmony_ci    assert(p->level == 0);
8397db96d56Sopenharmony_ci    if (res == NULL) {
8407db96d56Sopenharmony_ci        if ((p->flags & PyPARSE_ALLOW_INCOMPLETE_INPUT) &&  _is_end_of_source(p)) {
8417db96d56Sopenharmony_ci            PyErr_Clear();
8427db96d56Sopenharmony_ci            return RAISE_SYNTAX_ERROR("incomplete input");
8437db96d56Sopenharmony_ci        }
8447db96d56Sopenharmony_ci        if (PyErr_Occurred() && !PyErr_ExceptionMatches(PyExc_SyntaxError)) {
8457db96d56Sopenharmony_ci            return NULL;
8467db96d56Sopenharmony_ci        }
8477db96d56Sopenharmony_ci       // Make a second parser pass. In this pass we activate heavier and slower checks
8487db96d56Sopenharmony_ci        // to produce better error messages and more complete diagnostics. Extra "invalid_*"
8497db96d56Sopenharmony_ci        // rules will be active during parsing.
8507db96d56Sopenharmony_ci        Token *last_token = p->tokens[p->fill - 1];
8517db96d56Sopenharmony_ci        reset_parser_state_for_error_pass(p);
8527db96d56Sopenharmony_ci        _PyPegen_parse(p);
8537db96d56Sopenharmony_ci
8547db96d56Sopenharmony_ci        // Set SyntaxErrors accordingly depending on the parser/tokenizer status at the failure
8557db96d56Sopenharmony_ci        // point.
8567db96d56Sopenharmony_ci        _Pypegen_set_syntax_error(p, last_token);
8577db96d56Sopenharmony_ci       return NULL;
8587db96d56Sopenharmony_ci    }
8597db96d56Sopenharmony_ci
8607db96d56Sopenharmony_ci    if (p->start_rule == Py_single_input && bad_single_statement(p)) {
8617db96d56Sopenharmony_ci        p->tok->done = E_BADSINGLE; // This is not necessary for now, but might be in the future
8627db96d56Sopenharmony_ci        return RAISE_SYNTAX_ERROR("multiple statements found while compiling a single statement");
8637db96d56Sopenharmony_ci    }
8647db96d56Sopenharmony_ci
8657db96d56Sopenharmony_ci    // test_peg_generator defines _Py_TEST_PEGEN to not call PyAST_Validate()
8667db96d56Sopenharmony_ci#if defined(Py_DEBUG) && !defined(_Py_TEST_PEGEN)
8677db96d56Sopenharmony_ci    if (p->start_rule == Py_single_input ||
8687db96d56Sopenharmony_ci        p->start_rule == Py_file_input ||
8697db96d56Sopenharmony_ci        p->start_rule == Py_eval_input)
8707db96d56Sopenharmony_ci    {
8717db96d56Sopenharmony_ci        if (!_PyAST_Validate(res)) {
8727db96d56Sopenharmony_ci            return NULL;
8737db96d56Sopenharmony_ci        }
8747db96d56Sopenharmony_ci    }
8757db96d56Sopenharmony_ci#endif
8767db96d56Sopenharmony_ci    return res;
8777db96d56Sopenharmony_ci}
8787db96d56Sopenharmony_ci
8797db96d56Sopenharmony_cimod_ty
8807db96d56Sopenharmony_ci_PyPegen_run_parser_from_file_pointer(FILE *fp, int start_rule, PyObject *filename_ob,
8817db96d56Sopenharmony_ci                             const char *enc, const char *ps1, const char *ps2,
8827db96d56Sopenharmony_ci                             PyCompilerFlags *flags, int *errcode, PyArena *arena)
8837db96d56Sopenharmony_ci{
8847db96d56Sopenharmony_ci    struct tok_state *tok = _PyTokenizer_FromFile(fp, enc, ps1, ps2);
8857db96d56Sopenharmony_ci    if (tok == NULL) {
8867db96d56Sopenharmony_ci        if (PyErr_Occurred()) {
8877db96d56Sopenharmony_ci            _PyPegen_raise_tokenizer_init_error(filename_ob);
8887db96d56Sopenharmony_ci            return NULL;
8897db96d56Sopenharmony_ci        }
8907db96d56Sopenharmony_ci        return NULL;
8917db96d56Sopenharmony_ci    }
8927db96d56Sopenharmony_ci    if (!tok->fp || ps1 != NULL || ps2 != NULL ||
8937db96d56Sopenharmony_ci        PyUnicode_CompareWithASCIIString(filename_ob, "<stdin>") == 0) {
8947db96d56Sopenharmony_ci        tok->fp_interactive = 1;
8957db96d56Sopenharmony_ci    }
8967db96d56Sopenharmony_ci    // This transfers the ownership to the tokenizer
8977db96d56Sopenharmony_ci    tok->filename = filename_ob;
8987db96d56Sopenharmony_ci    Py_INCREF(filename_ob);
8997db96d56Sopenharmony_ci
9007db96d56Sopenharmony_ci    // From here on we need to clean up even if there's an error
9017db96d56Sopenharmony_ci    mod_ty result = NULL;
9027db96d56Sopenharmony_ci
9037db96d56Sopenharmony_ci    int parser_flags = compute_parser_flags(flags);
9047db96d56Sopenharmony_ci    Parser *p = _PyPegen_Parser_New(tok, start_rule, parser_flags, PY_MINOR_VERSION,
9057db96d56Sopenharmony_ci                                    errcode, arena);
9067db96d56Sopenharmony_ci    if (p == NULL) {
9077db96d56Sopenharmony_ci        goto error;
9087db96d56Sopenharmony_ci    }
9097db96d56Sopenharmony_ci
9107db96d56Sopenharmony_ci    result = _PyPegen_run_parser(p);
9117db96d56Sopenharmony_ci    _PyPegen_Parser_Free(p);
9127db96d56Sopenharmony_ci
9137db96d56Sopenharmony_cierror:
9147db96d56Sopenharmony_ci    _PyTokenizer_Free(tok);
9157db96d56Sopenharmony_ci    return result;
9167db96d56Sopenharmony_ci}
9177db96d56Sopenharmony_ci
9187db96d56Sopenharmony_cimod_ty
9197db96d56Sopenharmony_ci_PyPegen_run_parser_from_string(const char *str, int start_rule, PyObject *filename_ob,
9207db96d56Sopenharmony_ci                       PyCompilerFlags *flags, PyArena *arena)
9217db96d56Sopenharmony_ci{
9227db96d56Sopenharmony_ci    int exec_input = start_rule == Py_file_input;
9237db96d56Sopenharmony_ci
9247db96d56Sopenharmony_ci    struct tok_state *tok;
9257db96d56Sopenharmony_ci    if (flags != NULL && flags->cf_flags & PyCF_IGNORE_COOKIE) {
9267db96d56Sopenharmony_ci        tok = _PyTokenizer_FromUTF8(str, exec_input);
9277db96d56Sopenharmony_ci    } else {
9287db96d56Sopenharmony_ci        tok = _PyTokenizer_FromString(str, exec_input);
9297db96d56Sopenharmony_ci    }
9307db96d56Sopenharmony_ci    if (tok == NULL) {
9317db96d56Sopenharmony_ci        if (PyErr_Occurred()) {
9327db96d56Sopenharmony_ci            _PyPegen_raise_tokenizer_init_error(filename_ob);
9337db96d56Sopenharmony_ci        }
9347db96d56Sopenharmony_ci        return NULL;
9357db96d56Sopenharmony_ci    }
9367db96d56Sopenharmony_ci    // This transfers the ownership to the tokenizer
9377db96d56Sopenharmony_ci    tok->filename = filename_ob;
9387db96d56Sopenharmony_ci    Py_INCREF(filename_ob);
9397db96d56Sopenharmony_ci
9407db96d56Sopenharmony_ci    // We need to clear up from here on
9417db96d56Sopenharmony_ci    mod_ty result = NULL;
9427db96d56Sopenharmony_ci
9437db96d56Sopenharmony_ci    int parser_flags = compute_parser_flags(flags);
9447db96d56Sopenharmony_ci    int feature_version = flags && (flags->cf_flags & PyCF_ONLY_AST) ?
9457db96d56Sopenharmony_ci        flags->cf_feature_version : PY_MINOR_VERSION;
9467db96d56Sopenharmony_ci    Parser *p = _PyPegen_Parser_New(tok, start_rule, parser_flags, feature_version,
9477db96d56Sopenharmony_ci                                    NULL, arena);
9487db96d56Sopenharmony_ci    if (p == NULL) {
9497db96d56Sopenharmony_ci        goto error;
9507db96d56Sopenharmony_ci    }
9517db96d56Sopenharmony_ci
9527db96d56Sopenharmony_ci    result = _PyPegen_run_parser(p);
9537db96d56Sopenharmony_ci    _PyPegen_Parser_Free(p);
9547db96d56Sopenharmony_ci
9557db96d56Sopenharmony_cierror:
9567db96d56Sopenharmony_ci    _PyTokenizer_Free(tok);
9577db96d56Sopenharmony_ci    return result;
9587db96d56Sopenharmony_ci}
959