17db96d56Sopenharmony_ci#include <Python.h> 27db96d56Sopenharmony_ci#include "pycore_ast.h" // _PyAST_Validate(), 37db96d56Sopenharmony_ci#include "pycore_pystate.h" // _PyThreadState_GET() 47db96d56Sopenharmony_ci#include <errcode.h> 57db96d56Sopenharmony_ci 67db96d56Sopenharmony_ci#include "tokenizer.h" 77db96d56Sopenharmony_ci#include "pegen.h" 87db96d56Sopenharmony_ci 97db96d56Sopenharmony_ci// Internal parser functions 107db96d56Sopenharmony_ci 117db96d56Sopenharmony_ciasdl_stmt_seq* 127db96d56Sopenharmony_ci_PyPegen_interactive_exit(Parser *p) 137db96d56Sopenharmony_ci{ 147db96d56Sopenharmony_ci if (p->errcode) { 157db96d56Sopenharmony_ci *(p->errcode) = E_EOF; 167db96d56Sopenharmony_ci } 177db96d56Sopenharmony_ci return NULL; 187db96d56Sopenharmony_ci} 197db96d56Sopenharmony_ci 207db96d56Sopenharmony_ciPy_ssize_t 217db96d56Sopenharmony_ci_PyPegen_byte_offset_to_character_offset(PyObject *line, Py_ssize_t col_offset) 227db96d56Sopenharmony_ci{ 237db96d56Sopenharmony_ci const char *str = PyUnicode_AsUTF8(line); 247db96d56Sopenharmony_ci if (!str) { 257db96d56Sopenharmony_ci return -1; 267db96d56Sopenharmony_ci } 277db96d56Sopenharmony_ci Py_ssize_t len = strlen(str); 287db96d56Sopenharmony_ci if (col_offset > len + 1) { 297db96d56Sopenharmony_ci col_offset = len + 1; 307db96d56Sopenharmony_ci } 317db96d56Sopenharmony_ci assert(col_offset >= 0); 327db96d56Sopenharmony_ci PyObject *text = PyUnicode_DecodeUTF8(str, col_offset, "replace"); 337db96d56Sopenharmony_ci if (!text) { 347db96d56Sopenharmony_ci return -1; 357db96d56Sopenharmony_ci } 367db96d56Sopenharmony_ci Py_ssize_t size = PyUnicode_GET_LENGTH(text); 377db96d56Sopenharmony_ci Py_DECREF(text); 387db96d56Sopenharmony_ci return size; 397db96d56Sopenharmony_ci} 407db96d56Sopenharmony_ci 417db96d56Sopenharmony_ci// Here, mark is the start of the node, while p->mark is the end. 427db96d56Sopenharmony_ci// If node==NULL, they should be the same. 437db96d56Sopenharmony_ciint 447db96d56Sopenharmony_ci_PyPegen_insert_memo(Parser *p, int mark, int type, void *node) 457db96d56Sopenharmony_ci{ 467db96d56Sopenharmony_ci // Insert in front 477db96d56Sopenharmony_ci Memo *m = _PyArena_Malloc(p->arena, sizeof(Memo)); 487db96d56Sopenharmony_ci if (m == NULL) { 497db96d56Sopenharmony_ci return -1; 507db96d56Sopenharmony_ci } 517db96d56Sopenharmony_ci m->type = type; 527db96d56Sopenharmony_ci m->node = node; 537db96d56Sopenharmony_ci m->mark = p->mark; 547db96d56Sopenharmony_ci m->next = p->tokens[mark]->memo; 557db96d56Sopenharmony_ci p->tokens[mark]->memo = m; 567db96d56Sopenharmony_ci return 0; 577db96d56Sopenharmony_ci} 587db96d56Sopenharmony_ci 597db96d56Sopenharmony_ci// Like _PyPegen_insert_memo(), but updates an existing node if found. 607db96d56Sopenharmony_ciint 617db96d56Sopenharmony_ci_PyPegen_update_memo(Parser *p, int mark, int type, void *node) 627db96d56Sopenharmony_ci{ 637db96d56Sopenharmony_ci for (Memo *m = p->tokens[mark]->memo; m != NULL; m = m->next) { 647db96d56Sopenharmony_ci if (m->type == type) { 657db96d56Sopenharmony_ci // Update existing node. 667db96d56Sopenharmony_ci m->node = node; 677db96d56Sopenharmony_ci m->mark = p->mark; 687db96d56Sopenharmony_ci return 0; 697db96d56Sopenharmony_ci } 707db96d56Sopenharmony_ci } 717db96d56Sopenharmony_ci // Insert new node. 727db96d56Sopenharmony_ci return _PyPegen_insert_memo(p, mark, type, node); 737db96d56Sopenharmony_ci} 747db96d56Sopenharmony_ci 757db96d56Sopenharmony_cistatic int 767db96d56Sopenharmony_ciinit_normalization(Parser *p) 777db96d56Sopenharmony_ci{ 787db96d56Sopenharmony_ci if (p->normalize) { 797db96d56Sopenharmony_ci return 1; 807db96d56Sopenharmony_ci } 817db96d56Sopenharmony_ci PyObject *m = PyImport_ImportModule("unicodedata"); 827db96d56Sopenharmony_ci if (!m) 837db96d56Sopenharmony_ci { 847db96d56Sopenharmony_ci return 0; 857db96d56Sopenharmony_ci } 867db96d56Sopenharmony_ci p->normalize = PyObject_GetAttrString(m, "normalize"); 877db96d56Sopenharmony_ci Py_DECREF(m); 887db96d56Sopenharmony_ci if (!p->normalize) 897db96d56Sopenharmony_ci { 907db96d56Sopenharmony_ci return 0; 917db96d56Sopenharmony_ci } 927db96d56Sopenharmony_ci return 1; 937db96d56Sopenharmony_ci} 947db96d56Sopenharmony_ci 957db96d56Sopenharmony_cistatic int 967db96d56Sopenharmony_cigrowable_comment_array_init(growable_comment_array *arr, size_t initial_size) { 977db96d56Sopenharmony_ci assert(initial_size > 0); 987db96d56Sopenharmony_ci arr->items = PyMem_Malloc(initial_size * sizeof(*arr->items)); 997db96d56Sopenharmony_ci arr->size = initial_size; 1007db96d56Sopenharmony_ci arr->num_items = 0; 1017db96d56Sopenharmony_ci 1027db96d56Sopenharmony_ci return arr->items != NULL; 1037db96d56Sopenharmony_ci} 1047db96d56Sopenharmony_ci 1057db96d56Sopenharmony_cistatic int 1067db96d56Sopenharmony_cigrowable_comment_array_add(growable_comment_array *arr, int lineno, char *comment) { 1077db96d56Sopenharmony_ci if (arr->num_items >= arr->size) { 1087db96d56Sopenharmony_ci size_t new_size = arr->size * 2; 1097db96d56Sopenharmony_ci void *new_items_array = PyMem_Realloc(arr->items, new_size * sizeof(*arr->items)); 1107db96d56Sopenharmony_ci if (!new_items_array) { 1117db96d56Sopenharmony_ci return 0; 1127db96d56Sopenharmony_ci } 1137db96d56Sopenharmony_ci arr->items = new_items_array; 1147db96d56Sopenharmony_ci arr->size = new_size; 1157db96d56Sopenharmony_ci } 1167db96d56Sopenharmony_ci 1177db96d56Sopenharmony_ci arr->items[arr->num_items].lineno = lineno; 1187db96d56Sopenharmony_ci arr->items[arr->num_items].comment = comment; // Take ownership 1197db96d56Sopenharmony_ci arr->num_items++; 1207db96d56Sopenharmony_ci return 1; 1217db96d56Sopenharmony_ci} 1227db96d56Sopenharmony_ci 1237db96d56Sopenharmony_cistatic void 1247db96d56Sopenharmony_cigrowable_comment_array_deallocate(growable_comment_array *arr) { 1257db96d56Sopenharmony_ci for (unsigned i = 0; i < arr->num_items; i++) { 1267db96d56Sopenharmony_ci PyMem_Free(arr->items[i].comment); 1277db96d56Sopenharmony_ci } 1287db96d56Sopenharmony_ci PyMem_Free(arr->items); 1297db96d56Sopenharmony_ci} 1307db96d56Sopenharmony_ci 1317db96d56Sopenharmony_cistatic int 1327db96d56Sopenharmony_ci_get_keyword_or_name_type(Parser *p, const char *name, int name_len) 1337db96d56Sopenharmony_ci{ 1347db96d56Sopenharmony_ci assert(name_len > 0); 1357db96d56Sopenharmony_ci if (name_len >= p->n_keyword_lists || 1367db96d56Sopenharmony_ci p->keywords[name_len] == NULL || 1377db96d56Sopenharmony_ci p->keywords[name_len]->type == -1) { 1387db96d56Sopenharmony_ci return NAME; 1397db96d56Sopenharmony_ci } 1407db96d56Sopenharmony_ci for (KeywordToken *k = p->keywords[name_len]; k != NULL && k->type != -1; k++) { 1417db96d56Sopenharmony_ci if (strncmp(k->str, name, name_len) == 0) { 1427db96d56Sopenharmony_ci return k->type; 1437db96d56Sopenharmony_ci } 1447db96d56Sopenharmony_ci } 1457db96d56Sopenharmony_ci return NAME; 1467db96d56Sopenharmony_ci} 1477db96d56Sopenharmony_ci 1487db96d56Sopenharmony_cistatic int 1497db96d56Sopenharmony_ciinitialize_token(Parser *p, Token *token, const char *start, const char *end, int token_type) { 1507db96d56Sopenharmony_ci assert(token != NULL); 1517db96d56Sopenharmony_ci 1527db96d56Sopenharmony_ci token->type = (token_type == NAME) ? _get_keyword_or_name_type(p, start, (int)(end - start)) : token_type; 1537db96d56Sopenharmony_ci token->bytes = PyBytes_FromStringAndSize(start, end - start); 1547db96d56Sopenharmony_ci if (token->bytes == NULL) { 1557db96d56Sopenharmony_ci return -1; 1567db96d56Sopenharmony_ci } 1577db96d56Sopenharmony_ci 1587db96d56Sopenharmony_ci if (_PyArena_AddPyObject(p->arena, token->bytes) < 0) { 1597db96d56Sopenharmony_ci Py_DECREF(token->bytes); 1607db96d56Sopenharmony_ci return -1; 1617db96d56Sopenharmony_ci } 1627db96d56Sopenharmony_ci 1637db96d56Sopenharmony_ci token->level = p->tok->level; 1647db96d56Sopenharmony_ci 1657db96d56Sopenharmony_ci const char *line_start = token_type == STRING ? p->tok->multi_line_start : p->tok->line_start; 1667db96d56Sopenharmony_ci int lineno = token_type == STRING ? p->tok->first_lineno : p->tok->lineno; 1677db96d56Sopenharmony_ci int end_lineno = p->tok->lineno; 1687db96d56Sopenharmony_ci 1697db96d56Sopenharmony_ci int col_offset = (start != NULL && start >= line_start) ? (int)(start - line_start) : -1; 1707db96d56Sopenharmony_ci int end_col_offset = (end != NULL && end >= p->tok->line_start) ? (int)(end - p->tok->line_start) : -1; 1717db96d56Sopenharmony_ci 1727db96d56Sopenharmony_ci token->lineno = lineno; 1737db96d56Sopenharmony_ci token->col_offset = p->tok->lineno == p->starting_lineno ? p->starting_col_offset + col_offset : col_offset; 1747db96d56Sopenharmony_ci token->end_lineno = end_lineno; 1757db96d56Sopenharmony_ci token->end_col_offset = p->tok->lineno == p->starting_lineno ? p->starting_col_offset + end_col_offset : end_col_offset; 1767db96d56Sopenharmony_ci 1777db96d56Sopenharmony_ci p->fill += 1; 1787db96d56Sopenharmony_ci 1797db96d56Sopenharmony_ci if (token_type == ERRORTOKEN && p->tok->done == E_DECODE) { 1807db96d56Sopenharmony_ci return _Pypegen_raise_decode_error(p); 1817db96d56Sopenharmony_ci } 1827db96d56Sopenharmony_ci 1837db96d56Sopenharmony_ci return (token_type == ERRORTOKEN ? _Pypegen_tokenizer_error(p) : 0); 1847db96d56Sopenharmony_ci} 1857db96d56Sopenharmony_ci 1867db96d56Sopenharmony_cistatic int 1877db96d56Sopenharmony_ci_resize_tokens_array(Parser *p) { 1887db96d56Sopenharmony_ci int newsize = p->size * 2; 1897db96d56Sopenharmony_ci Token **new_tokens = PyMem_Realloc(p->tokens, newsize * sizeof(Token *)); 1907db96d56Sopenharmony_ci if (new_tokens == NULL) { 1917db96d56Sopenharmony_ci PyErr_NoMemory(); 1927db96d56Sopenharmony_ci return -1; 1937db96d56Sopenharmony_ci } 1947db96d56Sopenharmony_ci p->tokens = new_tokens; 1957db96d56Sopenharmony_ci 1967db96d56Sopenharmony_ci for (int i = p->size; i < newsize; i++) { 1977db96d56Sopenharmony_ci p->tokens[i] = PyMem_Calloc(1, sizeof(Token)); 1987db96d56Sopenharmony_ci if (p->tokens[i] == NULL) { 1997db96d56Sopenharmony_ci p->size = i; // Needed, in order to cleanup correctly after parser fails 2007db96d56Sopenharmony_ci PyErr_NoMemory(); 2017db96d56Sopenharmony_ci return -1; 2027db96d56Sopenharmony_ci } 2037db96d56Sopenharmony_ci } 2047db96d56Sopenharmony_ci p->size = newsize; 2057db96d56Sopenharmony_ci return 0; 2067db96d56Sopenharmony_ci} 2077db96d56Sopenharmony_ci 2087db96d56Sopenharmony_ciint 2097db96d56Sopenharmony_ci_PyPegen_fill_token(Parser *p) 2107db96d56Sopenharmony_ci{ 2117db96d56Sopenharmony_ci const char *start; 2127db96d56Sopenharmony_ci const char *end; 2137db96d56Sopenharmony_ci int type = _PyTokenizer_Get(p->tok, &start, &end); 2147db96d56Sopenharmony_ci 2157db96d56Sopenharmony_ci // Record and skip '# type: ignore' comments 2167db96d56Sopenharmony_ci while (type == TYPE_IGNORE) { 2177db96d56Sopenharmony_ci Py_ssize_t len = end - start; 2187db96d56Sopenharmony_ci char *tag = PyMem_Malloc(len + 1); 2197db96d56Sopenharmony_ci if (tag == NULL) { 2207db96d56Sopenharmony_ci PyErr_NoMemory(); 2217db96d56Sopenharmony_ci return -1; 2227db96d56Sopenharmony_ci } 2237db96d56Sopenharmony_ci strncpy(tag, start, len); 2247db96d56Sopenharmony_ci tag[len] = '\0'; 2257db96d56Sopenharmony_ci // Ownership of tag passes to the growable array 2267db96d56Sopenharmony_ci if (!growable_comment_array_add(&p->type_ignore_comments, p->tok->lineno, tag)) { 2277db96d56Sopenharmony_ci PyErr_NoMemory(); 2287db96d56Sopenharmony_ci return -1; 2297db96d56Sopenharmony_ci } 2307db96d56Sopenharmony_ci type = _PyTokenizer_Get(p->tok, &start, &end); 2317db96d56Sopenharmony_ci } 2327db96d56Sopenharmony_ci 2337db96d56Sopenharmony_ci // If we have reached the end and we are in single input mode we need to insert a newline and reset the parsing 2347db96d56Sopenharmony_ci if (p->start_rule == Py_single_input && type == ENDMARKER && p->parsing_started) { 2357db96d56Sopenharmony_ci type = NEWLINE; /* Add an extra newline */ 2367db96d56Sopenharmony_ci p->parsing_started = 0; 2377db96d56Sopenharmony_ci 2387db96d56Sopenharmony_ci if (p->tok->indent && !(p->flags & PyPARSE_DONT_IMPLY_DEDENT)) { 2397db96d56Sopenharmony_ci p->tok->pendin = -p->tok->indent; 2407db96d56Sopenharmony_ci p->tok->indent = 0; 2417db96d56Sopenharmony_ci } 2427db96d56Sopenharmony_ci } 2437db96d56Sopenharmony_ci else { 2447db96d56Sopenharmony_ci p->parsing_started = 1; 2457db96d56Sopenharmony_ci } 2467db96d56Sopenharmony_ci 2477db96d56Sopenharmony_ci // Check if we are at the limit of the token array capacity and resize if needed 2487db96d56Sopenharmony_ci if ((p->fill == p->size) && (_resize_tokens_array(p) != 0)) { 2497db96d56Sopenharmony_ci return -1; 2507db96d56Sopenharmony_ci } 2517db96d56Sopenharmony_ci 2527db96d56Sopenharmony_ci Token *t = p->tokens[p->fill]; 2537db96d56Sopenharmony_ci return initialize_token(p, t, start, end, type); 2547db96d56Sopenharmony_ci} 2557db96d56Sopenharmony_ci 2567db96d56Sopenharmony_ci#if defined(Py_DEBUG) 2577db96d56Sopenharmony_ci// Instrumentation to count the effectiveness of memoization. 2587db96d56Sopenharmony_ci// The array counts the number of tokens skipped by memoization, 2597db96d56Sopenharmony_ci// indexed by type. 2607db96d56Sopenharmony_ci 2617db96d56Sopenharmony_ci#define NSTATISTICS 2000 2627db96d56Sopenharmony_cistatic long memo_statistics[NSTATISTICS]; 2637db96d56Sopenharmony_ci 2647db96d56Sopenharmony_civoid 2657db96d56Sopenharmony_ci_PyPegen_clear_memo_statistics(void) 2667db96d56Sopenharmony_ci{ 2677db96d56Sopenharmony_ci for (int i = 0; i < NSTATISTICS; i++) { 2687db96d56Sopenharmony_ci memo_statistics[i] = 0; 2697db96d56Sopenharmony_ci } 2707db96d56Sopenharmony_ci} 2717db96d56Sopenharmony_ci 2727db96d56Sopenharmony_ciPyObject * 2737db96d56Sopenharmony_ci_PyPegen_get_memo_statistics(void) 2747db96d56Sopenharmony_ci{ 2757db96d56Sopenharmony_ci PyObject *ret = PyList_New(NSTATISTICS); 2767db96d56Sopenharmony_ci if (ret == NULL) { 2777db96d56Sopenharmony_ci return NULL; 2787db96d56Sopenharmony_ci } 2797db96d56Sopenharmony_ci for (int i = 0; i < NSTATISTICS; i++) { 2807db96d56Sopenharmony_ci PyObject *value = PyLong_FromLong(memo_statistics[i]); 2817db96d56Sopenharmony_ci if (value == NULL) { 2827db96d56Sopenharmony_ci Py_DECREF(ret); 2837db96d56Sopenharmony_ci return NULL; 2847db96d56Sopenharmony_ci } 2857db96d56Sopenharmony_ci // PyList_SetItem borrows a reference to value. 2867db96d56Sopenharmony_ci if (PyList_SetItem(ret, i, value) < 0) { 2877db96d56Sopenharmony_ci Py_DECREF(ret); 2887db96d56Sopenharmony_ci return NULL; 2897db96d56Sopenharmony_ci } 2907db96d56Sopenharmony_ci } 2917db96d56Sopenharmony_ci return ret; 2927db96d56Sopenharmony_ci} 2937db96d56Sopenharmony_ci#endif 2947db96d56Sopenharmony_ci 2957db96d56Sopenharmony_ciint // bool 2967db96d56Sopenharmony_ci_PyPegen_is_memoized(Parser *p, int type, void *pres) 2977db96d56Sopenharmony_ci{ 2987db96d56Sopenharmony_ci if (p->mark == p->fill) { 2997db96d56Sopenharmony_ci if (_PyPegen_fill_token(p) < 0) { 3007db96d56Sopenharmony_ci p->error_indicator = 1; 3017db96d56Sopenharmony_ci return -1; 3027db96d56Sopenharmony_ci } 3037db96d56Sopenharmony_ci } 3047db96d56Sopenharmony_ci 3057db96d56Sopenharmony_ci Token *t = p->tokens[p->mark]; 3067db96d56Sopenharmony_ci 3077db96d56Sopenharmony_ci for (Memo *m = t->memo; m != NULL; m = m->next) { 3087db96d56Sopenharmony_ci if (m->type == type) { 3097db96d56Sopenharmony_ci#if defined(PY_DEBUG) 3107db96d56Sopenharmony_ci if (0 <= type && type < NSTATISTICS) { 3117db96d56Sopenharmony_ci long count = m->mark - p->mark; 3127db96d56Sopenharmony_ci // A memoized negative result counts for one. 3137db96d56Sopenharmony_ci if (count <= 0) { 3147db96d56Sopenharmony_ci count = 1; 3157db96d56Sopenharmony_ci } 3167db96d56Sopenharmony_ci memo_statistics[type] += count; 3177db96d56Sopenharmony_ci } 3187db96d56Sopenharmony_ci#endif 3197db96d56Sopenharmony_ci p->mark = m->mark; 3207db96d56Sopenharmony_ci *(void **)(pres) = m->node; 3217db96d56Sopenharmony_ci return 1; 3227db96d56Sopenharmony_ci } 3237db96d56Sopenharmony_ci } 3247db96d56Sopenharmony_ci return 0; 3257db96d56Sopenharmony_ci} 3267db96d56Sopenharmony_ci 3277db96d56Sopenharmony_ciint 3287db96d56Sopenharmony_ci_PyPegen_lookahead_with_name(int positive, expr_ty (func)(Parser *), Parser *p) 3297db96d56Sopenharmony_ci{ 3307db96d56Sopenharmony_ci int mark = p->mark; 3317db96d56Sopenharmony_ci void *res = func(p); 3327db96d56Sopenharmony_ci p->mark = mark; 3337db96d56Sopenharmony_ci return (res != NULL) == positive; 3347db96d56Sopenharmony_ci} 3357db96d56Sopenharmony_ci 3367db96d56Sopenharmony_ciint 3377db96d56Sopenharmony_ci_PyPegen_lookahead_with_string(int positive, expr_ty (func)(Parser *, const char*), Parser *p, const char* arg) 3387db96d56Sopenharmony_ci{ 3397db96d56Sopenharmony_ci int mark = p->mark; 3407db96d56Sopenharmony_ci void *res = func(p, arg); 3417db96d56Sopenharmony_ci p->mark = mark; 3427db96d56Sopenharmony_ci return (res != NULL) == positive; 3437db96d56Sopenharmony_ci} 3447db96d56Sopenharmony_ci 3457db96d56Sopenharmony_ciint 3467db96d56Sopenharmony_ci_PyPegen_lookahead_with_int(int positive, Token *(func)(Parser *, int), Parser *p, int arg) 3477db96d56Sopenharmony_ci{ 3487db96d56Sopenharmony_ci int mark = p->mark; 3497db96d56Sopenharmony_ci void *res = func(p, arg); 3507db96d56Sopenharmony_ci p->mark = mark; 3517db96d56Sopenharmony_ci return (res != NULL) == positive; 3527db96d56Sopenharmony_ci} 3537db96d56Sopenharmony_ci 3547db96d56Sopenharmony_ciint 3557db96d56Sopenharmony_ci_PyPegen_lookahead(int positive, void *(func)(Parser *), Parser *p) 3567db96d56Sopenharmony_ci{ 3577db96d56Sopenharmony_ci int mark = p->mark; 3587db96d56Sopenharmony_ci void *res = (void*)func(p); 3597db96d56Sopenharmony_ci p->mark = mark; 3607db96d56Sopenharmony_ci return (res != NULL) == positive; 3617db96d56Sopenharmony_ci} 3627db96d56Sopenharmony_ci 3637db96d56Sopenharmony_ciToken * 3647db96d56Sopenharmony_ci_PyPegen_expect_token(Parser *p, int type) 3657db96d56Sopenharmony_ci{ 3667db96d56Sopenharmony_ci if (p->mark == p->fill) { 3677db96d56Sopenharmony_ci if (_PyPegen_fill_token(p) < 0) { 3687db96d56Sopenharmony_ci p->error_indicator = 1; 3697db96d56Sopenharmony_ci return NULL; 3707db96d56Sopenharmony_ci } 3717db96d56Sopenharmony_ci } 3727db96d56Sopenharmony_ci Token *t = p->tokens[p->mark]; 3737db96d56Sopenharmony_ci if (t->type != type) { 3747db96d56Sopenharmony_ci return NULL; 3757db96d56Sopenharmony_ci } 3767db96d56Sopenharmony_ci p->mark += 1; 3777db96d56Sopenharmony_ci return t; 3787db96d56Sopenharmony_ci} 3797db96d56Sopenharmony_ci 3807db96d56Sopenharmony_civoid* 3817db96d56Sopenharmony_ci_PyPegen_expect_forced_result(Parser *p, void* result, const char* expected) { 3827db96d56Sopenharmony_ci 3837db96d56Sopenharmony_ci if (p->error_indicator == 1) { 3847db96d56Sopenharmony_ci return NULL; 3857db96d56Sopenharmony_ci } 3867db96d56Sopenharmony_ci if (result == NULL) { 3877db96d56Sopenharmony_ci RAISE_SYNTAX_ERROR("expected (%s)", expected); 3887db96d56Sopenharmony_ci return NULL; 3897db96d56Sopenharmony_ci } 3907db96d56Sopenharmony_ci return result; 3917db96d56Sopenharmony_ci} 3927db96d56Sopenharmony_ci 3937db96d56Sopenharmony_ciToken * 3947db96d56Sopenharmony_ci_PyPegen_expect_forced_token(Parser *p, int type, const char* expected) { 3957db96d56Sopenharmony_ci 3967db96d56Sopenharmony_ci if (p->error_indicator == 1) { 3977db96d56Sopenharmony_ci return NULL; 3987db96d56Sopenharmony_ci } 3997db96d56Sopenharmony_ci 4007db96d56Sopenharmony_ci if (p->mark == p->fill) { 4017db96d56Sopenharmony_ci if (_PyPegen_fill_token(p) < 0) { 4027db96d56Sopenharmony_ci p->error_indicator = 1; 4037db96d56Sopenharmony_ci return NULL; 4047db96d56Sopenharmony_ci } 4057db96d56Sopenharmony_ci } 4067db96d56Sopenharmony_ci Token *t = p->tokens[p->mark]; 4077db96d56Sopenharmony_ci if (t->type != type) { 4087db96d56Sopenharmony_ci RAISE_SYNTAX_ERROR_KNOWN_LOCATION(t, "expected '%s'", expected); 4097db96d56Sopenharmony_ci return NULL; 4107db96d56Sopenharmony_ci } 4117db96d56Sopenharmony_ci p->mark += 1; 4127db96d56Sopenharmony_ci return t; 4137db96d56Sopenharmony_ci} 4147db96d56Sopenharmony_ci 4157db96d56Sopenharmony_ciexpr_ty 4167db96d56Sopenharmony_ci_PyPegen_expect_soft_keyword(Parser *p, const char *keyword) 4177db96d56Sopenharmony_ci{ 4187db96d56Sopenharmony_ci if (p->mark == p->fill) { 4197db96d56Sopenharmony_ci if (_PyPegen_fill_token(p) < 0) { 4207db96d56Sopenharmony_ci p->error_indicator = 1; 4217db96d56Sopenharmony_ci return NULL; 4227db96d56Sopenharmony_ci } 4237db96d56Sopenharmony_ci } 4247db96d56Sopenharmony_ci Token *t = p->tokens[p->mark]; 4257db96d56Sopenharmony_ci if (t->type != NAME) { 4267db96d56Sopenharmony_ci return NULL; 4277db96d56Sopenharmony_ci } 4287db96d56Sopenharmony_ci const char *s = PyBytes_AsString(t->bytes); 4297db96d56Sopenharmony_ci if (!s) { 4307db96d56Sopenharmony_ci p->error_indicator = 1; 4317db96d56Sopenharmony_ci return NULL; 4327db96d56Sopenharmony_ci } 4337db96d56Sopenharmony_ci if (strcmp(s, keyword) != 0) { 4347db96d56Sopenharmony_ci return NULL; 4357db96d56Sopenharmony_ci } 4367db96d56Sopenharmony_ci return _PyPegen_name_token(p); 4377db96d56Sopenharmony_ci} 4387db96d56Sopenharmony_ci 4397db96d56Sopenharmony_ciToken * 4407db96d56Sopenharmony_ci_PyPegen_get_last_nonnwhitespace_token(Parser *p) 4417db96d56Sopenharmony_ci{ 4427db96d56Sopenharmony_ci assert(p->mark >= 0); 4437db96d56Sopenharmony_ci Token *token = NULL; 4447db96d56Sopenharmony_ci for (int m = p->mark - 1; m >= 0; m--) { 4457db96d56Sopenharmony_ci token = p->tokens[m]; 4467db96d56Sopenharmony_ci if (token->type != ENDMARKER && (token->type < NEWLINE || token->type > DEDENT)) { 4477db96d56Sopenharmony_ci break; 4487db96d56Sopenharmony_ci } 4497db96d56Sopenharmony_ci } 4507db96d56Sopenharmony_ci return token; 4517db96d56Sopenharmony_ci} 4527db96d56Sopenharmony_ci 4537db96d56Sopenharmony_ciPyObject * 4547db96d56Sopenharmony_ci_PyPegen_new_identifier(Parser *p, const char *n) 4557db96d56Sopenharmony_ci{ 4567db96d56Sopenharmony_ci PyObject *id = PyUnicode_DecodeUTF8(n, strlen(n), NULL); 4577db96d56Sopenharmony_ci if (!id) { 4587db96d56Sopenharmony_ci goto error; 4597db96d56Sopenharmony_ci } 4607db96d56Sopenharmony_ci /* PyUnicode_DecodeUTF8 should always return a ready string. */ 4617db96d56Sopenharmony_ci assert(PyUnicode_IS_READY(id)); 4627db96d56Sopenharmony_ci /* Check whether there are non-ASCII characters in the 4637db96d56Sopenharmony_ci identifier; if so, normalize to NFKC. */ 4647db96d56Sopenharmony_ci if (!PyUnicode_IS_ASCII(id)) 4657db96d56Sopenharmony_ci { 4667db96d56Sopenharmony_ci PyObject *id2; 4677db96d56Sopenharmony_ci if (!init_normalization(p)) 4687db96d56Sopenharmony_ci { 4697db96d56Sopenharmony_ci Py_DECREF(id); 4707db96d56Sopenharmony_ci goto error; 4717db96d56Sopenharmony_ci } 4727db96d56Sopenharmony_ci PyObject *form = PyUnicode_InternFromString("NFKC"); 4737db96d56Sopenharmony_ci if (form == NULL) 4747db96d56Sopenharmony_ci { 4757db96d56Sopenharmony_ci Py_DECREF(id); 4767db96d56Sopenharmony_ci goto error; 4777db96d56Sopenharmony_ci } 4787db96d56Sopenharmony_ci PyObject *args[2] = {form, id}; 4797db96d56Sopenharmony_ci id2 = _PyObject_FastCall(p->normalize, args, 2); 4807db96d56Sopenharmony_ci Py_DECREF(id); 4817db96d56Sopenharmony_ci Py_DECREF(form); 4827db96d56Sopenharmony_ci if (!id2) { 4837db96d56Sopenharmony_ci goto error; 4847db96d56Sopenharmony_ci } 4857db96d56Sopenharmony_ci if (!PyUnicode_Check(id2)) 4867db96d56Sopenharmony_ci { 4877db96d56Sopenharmony_ci PyErr_Format(PyExc_TypeError, 4887db96d56Sopenharmony_ci "unicodedata.normalize() must return a string, not " 4897db96d56Sopenharmony_ci "%.200s", 4907db96d56Sopenharmony_ci _PyType_Name(Py_TYPE(id2))); 4917db96d56Sopenharmony_ci Py_DECREF(id2); 4927db96d56Sopenharmony_ci goto error; 4937db96d56Sopenharmony_ci } 4947db96d56Sopenharmony_ci id = id2; 4957db96d56Sopenharmony_ci } 4967db96d56Sopenharmony_ci PyUnicode_InternInPlace(&id); 4977db96d56Sopenharmony_ci if (_PyArena_AddPyObject(p->arena, id) < 0) 4987db96d56Sopenharmony_ci { 4997db96d56Sopenharmony_ci Py_DECREF(id); 5007db96d56Sopenharmony_ci goto error; 5017db96d56Sopenharmony_ci } 5027db96d56Sopenharmony_ci return id; 5037db96d56Sopenharmony_ci 5047db96d56Sopenharmony_cierror: 5057db96d56Sopenharmony_ci p->error_indicator = 1; 5067db96d56Sopenharmony_ci return NULL; 5077db96d56Sopenharmony_ci} 5087db96d56Sopenharmony_ci 5097db96d56Sopenharmony_cistatic expr_ty 5107db96d56Sopenharmony_ci_PyPegen_name_from_token(Parser *p, Token* t) 5117db96d56Sopenharmony_ci{ 5127db96d56Sopenharmony_ci if (t == NULL) { 5137db96d56Sopenharmony_ci return NULL; 5147db96d56Sopenharmony_ci } 5157db96d56Sopenharmony_ci const char *s = PyBytes_AsString(t->bytes); 5167db96d56Sopenharmony_ci if (!s) { 5177db96d56Sopenharmony_ci p->error_indicator = 1; 5187db96d56Sopenharmony_ci return NULL; 5197db96d56Sopenharmony_ci } 5207db96d56Sopenharmony_ci PyObject *id = _PyPegen_new_identifier(p, s); 5217db96d56Sopenharmony_ci if (id == NULL) { 5227db96d56Sopenharmony_ci p->error_indicator = 1; 5237db96d56Sopenharmony_ci return NULL; 5247db96d56Sopenharmony_ci } 5257db96d56Sopenharmony_ci return _PyAST_Name(id, Load, t->lineno, t->col_offset, t->end_lineno, 5267db96d56Sopenharmony_ci t->end_col_offset, p->arena); 5277db96d56Sopenharmony_ci} 5287db96d56Sopenharmony_ci 5297db96d56Sopenharmony_ciexpr_ty 5307db96d56Sopenharmony_ci_PyPegen_name_token(Parser *p) 5317db96d56Sopenharmony_ci{ 5327db96d56Sopenharmony_ci Token *t = _PyPegen_expect_token(p, NAME); 5337db96d56Sopenharmony_ci return _PyPegen_name_from_token(p, t); 5347db96d56Sopenharmony_ci} 5357db96d56Sopenharmony_ci 5367db96d56Sopenharmony_civoid * 5377db96d56Sopenharmony_ci_PyPegen_string_token(Parser *p) 5387db96d56Sopenharmony_ci{ 5397db96d56Sopenharmony_ci return _PyPegen_expect_token(p, STRING); 5407db96d56Sopenharmony_ci} 5417db96d56Sopenharmony_ci 5427db96d56Sopenharmony_ciexpr_ty _PyPegen_soft_keyword_token(Parser *p) { 5437db96d56Sopenharmony_ci Token *t = _PyPegen_expect_token(p, NAME); 5447db96d56Sopenharmony_ci if (t == NULL) { 5457db96d56Sopenharmony_ci return NULL; 5467db96d56Sopenharmony_ci } 5477db96d56Sopenharmony_ci char *the_token; 5487db96d56Sopenharmony_ci Py_ssize_t size; 5497db96d56Sopenharmony_ci PyBytes_AsStringAndSize(t->bytes, &the_token, &size); 5507db96d56Sopenharmony_ci for (char **keyword = p->soft_keywords; *keyword != NULL; keyword++) { 5517db96d56Sopenharmony_ci if (strncmp(*keyword, the_token, size) == 0) { 5527db96d56Sopenharmony_ci return _PyPegen_name_from_token(p, t); 5537db96d56Sopenharmony_ci } 5547db96d56Sopenharmony_ci } 5557db96d56Sopenharmony_ci return NULL; 5567db96d56Sopenharmony_ci} 5577db96d56Sopenharmony_ci 5587db96d56Sopenharmony_cistatic PyObject * 5597db96d56Sopenharmony_ciparsenumber_raw(const char *s) 5607db96d56Sopenharmony_ci{ 5617db96d56Sopenharmony_ci const char *end; 5627db96d56Sopenharmony_ci long x; 5637db96d56Sopenharmony_ci double dx; 5647db96d56Sopenharmony_ci Py_complex compl; 5657db96d56Sopenharmony_ci int imflag; 5667db96d56Sopenharmony_ci 5677db96d56Sopenharmony_ci assert(s != NULL); 5687db96d56Sopenharmony_ci errno = 0; 5697db96d56Sopenharmony_ci end = s + strlen(s) - 1; 5707db96d56Sopenharmony_ci imflag = *end == 'j' || *end == 'J'; 5717db96d56Sopenharmony_ci if (s[0] == '0') { 5727db96d56Sopenharmony_ci x = (long)PyOS_strtoul(s, (char **)&end, 0); 5737db96d56Sopenharmony_ci if (x < 0 && errno == 0) { 5747db96d56Sopenharmony_ci return PyLong_FromString(s, (char **)0, 0); 5757db96d56Sopenharmony_ci } 5767db96d56Sopenharmony_ci } 5777db96d56Sopenharmony_ci else { 5787db96d56Sopenharmony_ci x = PyOS_strtol(s, (char **)&end, 0); 5797db96d56Sopenharmony_ci } 5807db96d56Sopenharmony_ci if (*end == '\0') { 5817db96d56Sopenharmony_ci if (errno != 0) { 5827db96d56Sopenharmony_ci return PyLong_FromString(s, (char **)0, 0); 5837db96d56Sopenharmony_ci } 5847db96d56Sopenharmony_ci return PyLong_FromLong(x); 5857db96d56Sopenharmony_ci } 5867db96d56Sopenharmony_ci /* XXX Huge floats may silently fail */ 5877db96d56Sopenharmony_ci if (imflag) { 5887db96d56Sopenharmony_ci compl.real = 0.; 5897db96d56Sopenharmony_ci compl.imag = PyOS_string_to_double(s, (char **)&end, NULL); 5907db96d56Sopenharmony_ci if (compl.imag == -1.0 && PyErr_Occurred()) { 5917db96d56Sopenharmony_ci return NULL; 5927db96d56Sopenharmony_ci } 5937db96d56Sopenharmony_ci return PyComplex_FromCComplex(compl); 5947db96d56Sopenharmony_ci } 5957db96d56Sopenharmony_ci dx = PyOS_string_to_double(s, NULL, NULL); 5967db96d56Sopenharmony_ci if (dx == -1.0 && PyErr_Occurred()) { 5977db96d56Sopenharmony_ci return NULL; 5987db96d56Sopenharmony_ci } 5997db96d56Sopenharmony_ci return PyFloat_FromDouble(dx); 6007db96d56Sopenharmony_ci} 6017db96d56Sopenharmony_ci 6027db96d56Sopenharmony_cistatic PyObject * 6037db96d56Sopenharmony_ciparsenumber(const char *s) 6047db96d56Sopenharmony_ci{ 6057db96d56Sopenharmony_ci char *dup; 6067db96d56Sopenharmony_ci char *end; 6077db96d56Sopenharmony_ci PyObject *res = NULL; 6087db96d56Sopenharmony_ci 6097db96d56Sopenharmony_ci assert(s != NULL); 6107db96d56Sopenharmony_ci 6117db96d56Sopenharmony_ci if (strchr(s, '_') == NULL) { 6127db96d56Sopenharmony_ci return parsenumber_raw(s); 6137db96d56Sopenharmony_ci } 6147db96d56Sopenharmony_ci /* Create a duplicate without underscores. */ 6157db96d56Sopenharmony_ci dup = PyMem_Malloc(strlen(s) + 1); 6167db96d56Sopenharmony_ci if (dup == NULL) { 6177db96d56Sopenharmony_ci return PyErr_NoMemory(); 6187db96d56Sopenharmony_ci } 6197db96d56Sopenharmony_ci end = dup; 6207db96d56Sopenharmony_ci for (; *s; s++) { 6217db96d56Sopenharmony_ci if (*s != '_') { 6227db96d56Sopenharmony_ci *end++ = *s; 6237db96d56Sopenharmony_ci } 6247db96d56Sopenharmony_ci } 6257db96d56Sopenharmony_ci *end = '\0'; 6267db96d56Sopenharmony_ci res = parsenumber_raw(dup); 6277db96d56Sopenharmony_ci PyMem_Free(dup); 6287db96d56Sopenharmony_ci return res; 6297db96d56Sopenharmony_ci} 6307db96d56Sopenharmony_ci 6317db96d56Sopenharmony_ciexpr_ty 6327db96d56Sopenharmony_ci_PyPegen_number_token(Parser *p) 6337db96d56Sopenharmony_ci{ 6347db96d56Sopenharmony_ci Token *t = _PyPegen_expect_token(p, NUMBER); 6357db96d56Sopenharmony_ci if (t == NULL) { 6367db96d56Sopenharmony_ci return NULL; 6377db96d56Sopenharmony_ci } 6387db96d56Sopenharmony_ci 6397db96d56Sopenharmony_ci const char *num_raw = PyBytes_AsString(t->bytes); 6407db96d56Sopenharmony_ci if (num_raw == NULL) { 6417db96d56Sopenharmony_ci p->error_indicator = 1; 6427db96d56Sopenharmony_ci return NULL; 6437db96d56Sopenharmony_ci } 6447db96d56Sopenharmony_ci 6457db96d56Sopenharmony_ci if (p->feature_version < 6 && strchr(num_raw, '_') != NULL) { 6467db96d56Sopenharmony_ci p->error_indicator = 1; 6477db96d56Sopenharmony_ci return RAISE_SYNTAX_ERROR("Underscores in numeric literals are only supported " 6487db96d56Sopenharmony_ci "in Python 3.6 and greater"); 6497db96d56Sopenharmony_ci } 6507db96d56Sopenharmony_ci 6517db96d56Sopenharmony_ci PyObject *c = parsenumber(num_raw); 6527db96d56Sopenharmony_ci 6537db96d56Sopenharmony_ci if (c == NULL) { 6547db96d56Sopenharmony_ci p->error_indicator = 1; 6557db96d56Sopenharmony_ci PyThreadState *tstate = _PyThreadState_GET(); 6567db96d56Sopenharmony_ci // The only way a ValueError should happen in _this_ code is via 6577db96d56Sopenharmony_ci // PyLong_FromString hitting a length limit. 6587db96d56Sopenharmony_ci if (tstate->curexc_type == PyExc_ValueError && 6597db96d56Sopenharmony_ci tstate->curexc_value != NULL) { 6607db96d56Sopenharmony_ci PyObject *type, *value, *tb; 6617db96d56Sopenharmony_ci // This acts as PyErr_Clear() as we're replacing curexc. 6627db96d56Sopenharmony_ci PyErr_Fetch(&type, &value, &tb); 6637db96d56Sopenharmony_ci Py_XDECREF(tb); 6647db96d56Sopenharmony_ci Py_DECREF(type); 6657db96d56Sopenharmony_ci /* Intentionally omitting columns to avoid a wall of 1000s of '^'s 6667db96d56Sopenharmony_ci * on the error message. Nobody is going to overlook their huge 6677db96d56Sopenharmony_ci * numeric literal once given the line. */ 6687db96d56Sopenharmony_ci RAISE_ERROR_KNOWN_LOCATION( 6697db96d56Sopenharmony_ci p, PyExc_SyntaxError, 6707db96d56Sopenharmony_ci t->lineno, -1 /* col_offset */, 6717db96d56Sopenharmony_ci t->end_lineno, -1 /* end_col_offset */, 6727db96d56Sopenharmony_ci "%S - Consider hexadecimal for huge integer literals " 6737db96d56Sopenharmony_ci "to avoid decimal conversion limits.", 6747db96d56Sopenharmony_ci value); 6757db96d56Sopenharmony_ci Py_DECREF(value); 6767db96d56Sopenharmony_ci } 6777db96d56Sopenharmony_ci return NULL; 6787db96d56Sopenharmony_ci } 6797db96d56Sopenharmony_ci 6807db96d56Sopenharmony_ci if (_PyArena_AddPyObject(p->arena, c) < 0) { 6817db96d56Sopenharmony_ci Py_DECREF(c); 6827db96d56Sopenharmony_ci p->error_indicator = 1; 6837db96d56Sopenharmony_ci return NULL; 6847db96d56Sopenharmony_ci } 6857db96d56Sopenharmony_ci 6867db96d56Sopenharmony_ci return _PyAST_Constant(c, NULL, t->lineno, t->col_offset, t->end_lineno, 6877db96d56Sopenharmony_ci t->end_col_offset, p->arena); 6887db96d56Sopenharmony_ci} 6897db96d56Sopenharmony_ci 6907db96d56Sopenharmony_ci/* Check that the source for a single input statement really is a single 6917db96d56Sopenharmony_ci statement by looking at what is left in the buffer after parsing. 6927db96d56Sopenharmony_ci Trailing whitespace and comments are OK. */ 6937db96d56Sopenharmony_cistatic int // bool 6947db96d56Sopenharmony_cibad_single_statement(Parser *p) 6957db96d56Sopenharmony_ci{ 6967db96d56Sopenharmony_ci char *cur = p->tok->cur; 6977db96d56Sopenharmony_ci char c = *cur; 6987db96d56Sopenharmony_ci 6997db96d56Sopenharmony_ci for (;;) { 7007db96d56Sopenharmony_ci while (c == ' ' || c == '\t' || c == '\n' || c == '\014') { 7017db96d56Sopenharmony_ci c = *++cur; 7027db96d56Sopenharmony_ci } 7037db96d56Sopenharmony_ci 7047db96d56Sopenharmony_ci if (!c) { 7057db96d56Sopenharmony_ci return 0; 7067db96d56Sopenharmony_ci } 7077db96d56Sopenharmony_ci 7087db96d56Sopenharmony_ci if (c != '#') { 7097db96d56Sopenharmony_ci return 1; 7107db96d56Sopenharmony_ci } 7117db96d56Sopenharmony_ci 7127db96d56Sopenharmony_ci /* Suck up comment. */ 7137db96d56Sopenharmony_ci while (c && c != '\n') { 7147db96d56Sopenharmony_ci c = *++cur; 7157db96d56Sopenharmony_ci } 7167db96d56Sopenharmony_ci } 7177db96d56Sopenharmony_ci} 7187db96d56Sopenharmony_ci 7197db96d56Sopenharmony_cistatic int 7207db96d56Sopenharmony_cicompute_parser_flags(PyCompilerFlags *flags) 7217db96d56Sopenharmony_ci{ 7227db96d56Sopenharmony_ci int parser_flags = 0; 7237db96d56Sopenharmony_ci if (!flags) { 7247db96d56Sopenharmony_ci return 0; 7257db96d56Sopenharmony_ci } 7267db96d56Sopenharmony_ci if (flags->cf_flags & PyCF_DONT_IMPLY_DEDENT) { 7277db96d56Sopenharmony_ci parser_flags |= PyPARSE_DONT_IMPLY_DEDENT; 7287db96d56Sopenharmony_ci } 7297db96d56Sopenharmony_ci if (flags->cf_flags & PyCF_IGNORE_COOKIE) { 7307db96d56Sopenharmony_ci parser_flags |= PyPARSE_IGNORE_COOKIE; 7317db96d56Sopenharmony_ci } 7327db96d56Sopenharmony_ci if (flags->cf_flags & CO_FUTURE_BARRY_AS_BDFL) { 7337db96d56Sopenharmony_ci parser_flags |= PyPARSE_BARRY_AS_BDFL; 7347db96d56Sopenharmony_ci } 7357db96d56Sopenharmony_ci if (flags->cf_flags & PyCF_TYPE_COMMENTS) { 7367db96d56Sopenharmony_ci parser_flags |= PyPARSE_TYPE_COMMENTS; 7377db96d56Sopenharmony_ci } 7387db96d56Sopenharmony_ci if ((flags->cf_flags & PyCF_ONLY_AST) && flags->cf_feature_version < 7) { 7397db96d56Sopenharmony_ci parser_flags |= PyPARSE_ASYNC_HACKS; 7407db96d56Sopenharmony_ci } 7417db96d56Sopenharmony_ci if (flags->cf_flags & PyCF_ALLOW_INCOMPLETE_INPUT) { 7427db96d56Sopenharmony_ci parser_flags |= PyPARSE_ALLOW_INCOMPLETE_INPUT; 7437db96d56Sopenharmony_ci } 7447db96d56Sopenharmony_ci return parser_flags; 7457db96d56Sopenharmony_ci} 7467db96d56Sopenharmony_ci 7477db96d56Sopenharmony_ci// Parser API 7487db96d56Sopenharmony_ci 7497db96d56Sopenharmony_ciParser * 7507db96d56Sopenharmony_ci_PyPegen_Parser_New(struct tok_state *tok, int start_rule, int flags, 7517db96d56Sopenharmony_ci int feature_version, int *errcode, PyArena *arena) 7527db96d56Sopenharmony_ci{ 7537db96d56Sopenharmony_ci Parser *p = PyMem_Malloc(sizeof(Parser)); 7547db96d56Sopenharmony_ci if (p == NULL) { 7557db96d56Sopenharmony_ci return (Parser *) PyErr_NoMemory(); 7567db96d56Sopenharmony_ci } 7577db96d56Sopenharmony_ci assert(tok != NULL); 7587db96d56Sopenharmony_ci tok->type_comments = (flags & PyPARSE_TYPE_COMMENTS) > 0; 7597db96d56Sopenharmony_ci tok->async_hacks = (flags & PyPARSE_ASYNC_HACKS) > 0; 7607db96d56Sopenharmony_ci p->tok = tok; 7617db96d56Sopenharmony_ci p->keywords = NULL; 7627db96d56Sopenharmony_ci p->n_keyword_lists = -1; 7637db96d56Sopenharmony_ci p->soft_keywords = NULL; 7647db96d56Sopenharmony_ci p->tokens = PyMem_Malloc(sizeof(Token *)); 7657db96d56Sopenharmony_ci if (!p->tokens) { 7667db96d56Sopenharmony_ci PyMem_Free(p); 7677db96d56Sopenharmony_ci return (Parser *) PyErr_NoMemory(); 7687db96d56Sopenharmony_ci } 7697db96d56Sopenharmony_ci p->tokens[0] = PyMem_Calloc(1, sizeof(Token)); 7707db96d56Sopenharmony_ci if (!p->tokens[0]) { 7717db96d56Sopenharmony_ci PyMem_Free(p->tokens); 7727db96d56Sopenharmony_ci PyMem_Free(p); 7737db96d56Sopenharmony_ci return (Parser *) PyErr_NoMemory(); 7747db96d56Sopenharmony_ci } 7757db96d56Sopenharmony_ci if (!growable_comment_array_init(&p->type_ignore_comments, 10)) { 7767db96d56Sopenharmony_ci PyMem_Free(p->tokens[0]); 7777db96d56Sopenharmony_ci PyMem_Free(p->tokens); 7787db96d56Sopenharmony_ci PyMem_Free(p); 7797db96d56Sopenharmony_ci return (Parser *) PyErr_NoMemory(); 7807db96d56Sopenharmony_ci } 7817db96d56Sopenharmony_ci 7827db96d56Sopenharmony_ci p->mark = 0; 7837db96d56Sopenharmony_ci p->fill = 0; 7847db96d56Sopenharmony_ci p->size = 1; 7857db96d56Sopenharmony_ci 7867db96d56Sopenharmony_ci p->errcode = errcode; 7877db96d56Sopenharmony_ci p->arena = arena; 7887db96d56Sopenharmony_ci p->start_rule = start_rule; 7897db96d56Sopenharmony_ci p->parsing_started = 0; 7907db96d56Sopenharmony_ci p->normalize = NULL; 7917db96d56Sopenharmony_ci p->error_indicator = 0; 7927db96d56Sopenharmony_ci 7937db96d56Sopenharmony_ci p->starting_lineno = 0; 7947db96d56Sopenharmony_ci p->starting_col_offset = 0; 7957db96d56Sopenharmony_ci p->flags = flags; 7967db96d56Sopenharmony_ci p->feature_version = feature_version; 7977db96d56Sopenharmony_ci p->known_err_token = NULL; 7987db96d56Sopenharmony_ci p->level = 0; 7997db96d56Sopenharmony_ci p->call_invalid_rules = 0; 8007db96d56Sopenharmony_ci return p; 8017db96d56Sopenharmony_ci} 8027db96d56Sopenharmony_ci 8037db96d56Sopenharmony_civoid 8047db96d56Sopenharmony_ci_PyPegen_Parser_Free(Parser *p) 8057db96d56Sopenharmony_ci{ 8067db96d56Sopenharmony_ci Py_XDECREF(p->normalize); 8077db96d56Sopenharmony_ci for (int i = 0; i < p->size; i++) { 8087db96d56Sopenharmony_ci PyMem_Free(p->tokens[i]); 8097db96d56Sopenharmony_ci } 8107db96d56Sopenharmony_ci PyMem_Free(p->tokens); 8117db96d56Sopenharmony_ci growable_comment_array_deallocate(&p->type_ignore_comments); 8127db96d56Sopenharmony_ci PyMem_Free(p); 8137db96d56Sopenharmony_ci} 8147db96d56Sopenharmony_ci 8157db96d56Sopenharmony_cistatic void 8167db96d56Sopenharmony_cireset_parser_state_for_error_pass(Parser *p) 8177db96d56Sopenharmony_ci{ 8187db96d56Sopenharmony_ci for (int i = 0; i < p->fill; i++) { 8197db96d56Sopenharmony_ci p->tokens[i]->memo = NULL; 8207db96d56Sopenharmony_ci } 8217db96d56Sopenharmony_ci p->mark = 0; 8227db96d56Sopenharmony_ci p->call_invalid_rules = 1; 8237db96d56Sopenharmony_ci // Don't try to get extra tokens in interactive mode when trying to 8247db96d56Sopenharmony_ci // raise specialized errors in the second pass. 8257db96d56Sopenharmony_ci p->tok->interactive_underflow = IUNDERFLOW_STOP; 8267db96d56Sopenharmony_ci} 8277db96d56Sopenharmony_ci 8287db96d56Sopenharmony_cistatic inline int 8297db96d56Sopenharmony_ci_is_end_of_source(Parser *p) { 8307db96d56Sopenharmony_ci int err = p->tok->done; 8317db96d56Sopenharmony_ci return err == E_EOF || err == E_EOFS || err == E_EOLS; 8327db96d56Sopenharmony_ci} 8337db96d56Sopenharmony_ci 8347db96d56Sopenharmony_civoid * 8357db96d56Sopenharmony_ci_PyPegen_run_parser(Parser *p) 8367db96d56Sopenharmony_ci{ 8377db96d56Sopenharmony_ci void *res = _PyPegen_parse(p); 8387db96d56Sopenharmony_ci assert(p->level == 0); 8397db96d56Sopenharmony_ci if (res == NULL) { 8407db96d56Sopenharmony_ci if ((p->flags & PyPARSE_ALLOW_INCOMPLETE_INPUT) && _is_end_of_source(p)) { 8417db96d56Sopenharmony_ci PyErr_Clear(); 8427db96d56Sopenharmony_ci return RAISE_SYNTAX_ERROR("incomplete input"); 8437db96d56Sopenharmony_ci } 8447db96d56Sopenharmony_ci if (PyErr_Occurred() && !PyErr_ExceptionMatches(PyExc_SyntaxError)) { 8457db96d56Sopenharmony_ci return NULL; 8467db96d56Sopenharmony_ci } 8477db96d56Sopenharmony_ci // Make a second parser pass. In this pass we activate heavier and slower checks 8487db96d56Sopenharmony_ci // to produce better error messages and more complete diagnostics. Extra "invalid_*" 8497db96d56Sopenharmony_ci // rules will be active during parsing. 8507db96d56Sopenharmony_ci Token *last_token = p->tokens[p->fill - 1]; 8517db96d56Sopenharmony_ci reset_parser_state_for_error_pass(p); 8527db96d56Sopenharmony_ci _PyPegen_parse(p); 8537db96d56Sopenharmony_ci 8547db96d56Sopenharmony_ci // Set SyntaxErrors accordingly depending on the parser/tokenizer status at the failure 8557db96d56Sopenharmony_ci // point. 8567db96d56Sopenharmony_ci _Pypegen_set_syntax_error(p, last_token); 8577db96d56Sopenharmony_ci return NULL; 8587db96d56Sopenharmony_ci } 8597db96d56Sopenharmony_ci 8607db96d56Sopenharmony_ci if (p->start_rule == Py_single_input && bad_single_statement(p)) { 8617db96d56Sopenharmony_ci p->tok->done = E_BADSINGLE; // This is not necessary for now, but might be in the future 8627db96d56Sopenharmony_ci return RAISE_SYNTAX_ERROR("multiple statements found while compiling a single statement"); 8637db96d56Sopenharmony_ci } 8647db96d56Sopenharmony_ci 8657db96d56Sopenharmony_ci // test_peg_generator defines _Py_TEST_PEGEN to not call PyAST_Validate() 8667db96d56Sopenharmony_ci#if defined(Py_DEBUG) && !defined(_Py_TEST_PEGEN) 8677db96d56Sopenharmony_ci if (p->start_rule == Py_single_input || 8687db96d56Sopenharmony_ci p->start_rule == Py_file_input || 8697db96d56Sopenharmony_ci p->start_rule == Py_eval_input) 8707db96d56Sopenharmony_ci { 8717db96d56Sopenharmony_ci if (!_PyAST_Validate(res)) { 8727db96d56Sopenharmony_ci return NULL; 8737db96d56Sopenharmony_ci } 8747db96d56Sopenharmony_ci } 8757db96d56Sopenharmony_ci#endif 8767db96d56Sopenharmony_ci return res; 8777db96d56Sopenharmony_ci} 8787db96d56Sopenharmony_ci 8797db96d56Sopenharmony_cimod_ty 8807db96d56Sopenharmony_ci_PyPegen_run_parser_from_file_pointer(FILE *fp, int start_rule, PyObject *filename_ob, 8817db96d56Sopenharmony_ci const char *enc, const char *ps1, const char *ps2, 8827db96d56Sopenharmony_ci PyCompilerFlags *flags, int *errcode, PyArena *arena) 8837db96d56Sopenharmony_ci{ 8847db96d56Sopenharmony_ci struct tok_state *tok = _PyTokenizer_FromFile(fp, enc, ps1, ps2); 8857db96d56Sopenharmony_ci if (tok == NULL) { 8867db96d56Sopenharmony_ci if (PyErr_Occurred()) { 8877db96d56Sopenharmony_ci _PyPegen_raise_tokenizer_init_error(filename_ob); 8887db96d56Sopenharmony_ci return NULL; 8897db96d56Sopenharmony_ci } 8907db96d56Sopenharmony_ci return NULL; 8917db96d56Sopenharmony_ci } 8927db96d56Sopenharmony_ci if (!tok->fp || ps1 != NULL || ps2 != NULL || 8937db96d56Sopenharmony_ci PyUnicode_CompareWithASCIIString(filename_ob, "<stdin>") == 0) { 8947db96d56Sopenharmony_ci tok->fp_interactive = 1; 8957db96d56Sopenharmony_ci } 8967db96d56Sopenharmony_ci // This transfers the ownership to the tokenizer 8977db96d56Sopenharmony_ci tok->filename = filename_ob; 8987db96d56Sopenharmony_ci Py_INCREF(filename_ob); 8997db96d56Sopenharmony_ci 9007db96d56Sopenharmony_ci // From here on we need to clean up even if there's an error 9017db96d56Sopenharmony_ci mod_ty result = NULL; 9027db96d56Sopenharmony_ci 9037db96d56Sopenharmony_ci int parser_flags = compute_parser_flags(flags); 9047db96d56Sopenharmony_ci Parser *p = _PyPegen_Parser_New(tok, start_rule, parser_flags, PY_MINOR_VERSION, 9057db96d56Sopenharmony_ci errcode, arena); 9067db96d56Sopenharmony_ci if (p == NULL) { 9077db96d56Sopenharmony_ci goto error; 9087db96d56Sopenharmony_ci } 9097db96d56Sopenharmony_ci 9107db96d56Sopenharmony_ci result = _PyPegen_run_parser(p); 9117db96d56Sopenharmony_ci _PyPegen_Parser_Free(p); 9127db96d56Sopenharmony_ci 9137db96d56Sopenharmony_cierror: 9147db96d56Sopenharmony_ci _PyTokenizer_Free(tok); 9157db96d56Sopenharmony_ci return result; 9167db96d56Sopenharmony_ci} 9177db96d56Sopenharmony_ci 9187db96d56Sopenharmony_cimod_ty 9197db96d56Sopenharmony_ci_PyPegen_run_parser_from_string(const char *str, int start_rule, PyObject *filename_ob, 9207db96d56Sopenharmony_ci PyCompilerFlags *flags, PyArena *arena) 9217db96d56Sopenharmony_ci{ 9227db96d56Sopenharmony_ci int exec_input = start_rule == Py_file_input; 9237db96d56Sopenharmony_ci 9247db96d56Sopenharmony_ci struct tok_state *tok; 9257db96d56Sopenharmony_ci if (flags != NULL && flags->cf_flags & PyCF_IGNORE_COOKIE) { 9267db96d56Sopenharmony_ci tok = _PyTokenizer_FromUTF8(str, exec_input); 9277db96d56Sopenharmony_ci } else { 9287db96d56Sopenharmony_ci tok = _PyTokenizer_FromString(str, exec_input); 9297db96d56Sopenharmony_ci } 9307db96d56Sopenharmony_ci if (tok == NULL) { 9317db96d56Sopenharmony_ci if (PyErr_Occurred()) { 9327db96d56Sopenharmony_ci _PyPegen_raise_tokenizer_init_error(filename_ob); 9337db96d56Sopenharmony_ci } 9347db96d56Sopenharmony_ci return NULL; 9357db96d56Sopenharmony_ci } 9367db96d56Sopenharmony_ci // This transfers the ownership to the tokenizer 9377db96d56Sopenharmony_ci tok->filename = filename_ob; 9387db96d56Sopenharmony_ci Py_INCREF(filename_ob); 9397db96d56Sopenharmony_ci 9407db96d56Sopenharmony_ci // We need to clear up from here on 9417db96d56Sopenharmony_ci mod_ty result = NULL; 9427db96d56Sopenharmony_ci 9437db96d56Sopenharmony_ci int parser_flags = compute_parser_flags(flags); 9447db96d56Sopenharmony_ci int feature_version = flags && (flags->cf_flags & PyCF_ONLY_AST) ? 9457db96d56Sopenharmony_ci flags->cf_feature_version : PY_MINOR_VERSION; 9467db96d56Sopenharmony_ci Parser *p = _PyPegen_Parser_New(tok, start_rule, parser_flags, feature_version, 9477db96d56Sopenharmony_ci NULL, arena); 9487db96d56Sopenharmony_ci if (p == NULL) { 9497db96d56Sopenharmony_ci goto error; 9507db96d56Sopenharmony_ci } 9517db96d56Sopenharmony_ci 9527db96d56Sopenharmony_ci result = _PyPegen_run_parser(p); 9537db96d56Sopenharmony_ci _PyPegen_Parser_Free(p); 9547db96d56Sopenharmony_ci 9557db96d56Sopenharmony_cierror: 9567db96d56Sopenharmony_ci _PyTokenizer_Free(tok); 9577db96d56Sopenharmony_ci return result; 9587db96d56Sopenharmony_ci} 959