1#include <Python.h> 2#include "pycore_ast.h" // _PyAST_Validate(), 3#include "pycore_pystate.h" // _PyThreadState_GET() 4#include <errcode.h> 5 6#include "tokenizer.h" 7#include "pegen.h" 8 9// Internal parser functions 10 11asdl_stmt_seq* 12_PyPegen_interactive_exit(Parser *p) 13{ 14 if (p->errcode) { 15 *(p->errcode) = E_EOF; 16 } 17 return NULL; 18} 19 20Py_ssize_t 21_PyPegen_byte_offset_to_character_offset(PyObject *line, Py_ssize_t col_offset) 22{ 23 const char *str = PyUnicode_AsUTF8(line); 24 if (!str) { 25 return -1; 26 } 27 Py_ssize_t len = strlen(str); 28 if (col_offset > len + 1) { 29 col_offset = len + 1; 30 } 31 assert(col_offset >= 0); 32 PyObject *text = PyUnicode_DecodeUTF8(str, col_offset, "replace"); 33 if (!text) { 34 return -1; 35 } 36 Py_ssize_t size = PyUnicode_GET_LENGTH(text); 37 Py_DECREF(text); 38 return size; 39} 40 41// Here, mark is the start of the node, while p->mark is the end. 42// If node==NULL, they should be the same. 43int 44_PyPegen_insert_memo(Parser *p, int mark, int type, void *node) 45{ 46 // Insert in front 47 Memo *m = _PyArena_Malloc(p->arena, sizeof(Memo)); 48 if (m == NULL) { 49 return -1; 50 } 51 m->type = type; 52 m->node = node; 53 m->mark = p->mark; 54 m->next = p->tokens[mark]->memo; 55 p->tokens[mark]->memo = m; 56 return 0; 57} 58 59// Like _PyPegen_insert_memo(), but updates an existing node if found. 60int 61_PyPegen_update_memo(Parser *p, int mark, int type, void *node) 62{ 63 for (Memo *m = p->tokens[mark]->memo; m != NULL; m = m->next) { 64 if (m->type == type) { 65 // Update existing node. 66 m->node = node; 67 m->mark = p->mark; 68 return 0; 69 } 70 } 71 // Insert new node. 72 return _PyPegen_insert_memo(p, mark, type, node); 73} 74 75static int 76init_normalization(Parser *p) 77{ 78 if (p->normalize) { 79 return 1; 80 } 81 PyObject *m = PyImport_ImportModule("unicodedata"); 82 if (!m) 83 { 84 return 0; 85 } 86 p->normalize = PyObject_GetAttrString(m, "normalize"); 87 Py_DECREF(m); 88 if (!p->normalize) 89 { 90 return 0; 91 } 92 return 1; 93} 94 95static int 96growable_comment_array_init(growable_comment_array *arr, size_t initial_size) { 97 assert(initial_size > 0); 98 arr->items = PyMem_Malloc(initial_size * sizeof(*arr->items)); 99 arr->size = initial_size; 100 arr->num_items = 0; 101 102 return arr->items != NULL; 103} 104 105static int 106growable_comment_array_add(growable_comment_array *arr, int lineno, char *comment) { 107 if (arr->num_items >= arr->size) { 108 size_t new_size = arr->size * 2; 109 void *new_items_array = PyMem_Realloc(arr->items, new_size * sizeof(*arr->items)); 110 if (!new_items_array) { 111 return 0; 112 } 113 arr->items = new_items_array; 114 arr->size = new_size; 115 } 116 117 arr->items[arr->num_items].lineno = lineno; 118 arr->items[arr->num_items].comment = comment; // Take ownership 119 arr->num_items++; 120 return 1; 121} 122 123static void 124growable_comment_array_deallocate(growable_comment_array *arr) { 125 for (unsigned i = 0; i < arr->num_items; i++) { 126 PyMem_Free(arr->items[i].comment); 127 } 128 PyMem_Free(arr->items); 129} 130 131static int 132_get_keyword_or_name_type(Parser *p, const char *name, int name_len) 133{ 134 assert(name_len > 0); 135 if (name_len >= p->n_keyword_lists || 136 p->keywords[name_len] == NULL || 137 p->keywords[name_len]->type == -1) { 138 return NAME; 139 } 140 for (KeywordToken *k = p->keywords[name_len]; k != NULL && k->type != -1; k++) { 141 if (strncmp(k->str, name, name_len) == 0) { 142 return k->type; 143 } 144 } 145 return NAME; 146} 147 148static int 149initialize_token(Parser *p, Token *token, const char *start, const char *end, int token_type) { 150 assert(token != NULL); 151 152 token->type = (token_type == NAME) ? _get_keyword_or_name_type(p, start, (int)(end - start)) : token_type; 153 token->bytes = PyBytes_FromStringAndSize(start, end - start); 154 if (token->bytes == NULL) { 155 return -1; 156 } 157 158 if (_PyArena_AddPyObject(p->arena, token->bytes) < 0) { 159 Py_DECREF(token->bytes); 160 return -1; 161 } 162 163 token->level = p->tok->level; 164 165 const char *line_start = token_type == STRING ? p->tok->multi_line_start : p->tok->line_start; 166 int lineno = token_type == STRING ? p->tok->first_lineno : p->tok->lineno; 167 int end_lineno = p->tok->lineno; 168 169 int col_offset = (start != NULL && start >= line_start) ? (int)(start - line_start) : -1; 170 int end_col_offset = (end != NULL && end >= p->tok->line_start) ? (int)(end - p->tok->line_start) : -1; 171 172 token->lineno = lineno; 173 token->col_offset = p->tok->lineno == p->starting_lineno ? p->starting_col_offset + col_offset : col_offset; 174 token->end_lineno = end_lineno; 175 token->end_col_offset = p->tok->lineno == p->starting_lineno ? p->starting_col_offset + end_col_offset : end_col_offset; 176 177 p->fill += 1; 178 179 if (token_type == ERRORTOKEN && p->tok->done == E_DECODE) { 180 return _Pypegen_raise_decode_error(p); 181 } 182 183 return (token_type == ERRORTOKEN ? _Pypegen_tokenizer_error(p) : 0); 184} 185 186static int 187_resize_tokens_array(Parser *p) { 188 int newsize = p->size * 2; 189 Token **new_tokens = PyMem_Realloc(p->tokens, newsize * sizeof(Token *)); 190 if (new_tokens == NULL) { 191 PyErr_NoMemory(); 192 return -1; 193 } 194 p->tokens = new_tokens; 195 196 for (int i = p->size; i < newsize; i++) { 197 p->tokens[i] = PyMem_Calloc(1, sizeof(Token)); 198 if (p->tokens[i] == NULL) { 199 p->size = i; // Needed, in order to cleanup correctly after parser fails 200 PyErr_NoMemory(); 201 return -1; 202 } 203 } 204 p->size = newsize; 205 return 0; 206} 207 208int 209_PyPegen_fill_token(Parser *p) 210{ 211 const char *start; 212 const char *end; 213 int type = _PyTokenizer_Get(p->tok, &start, &end); 214 215 // Record and skip '# type: ignore' comments 216 while (type == TYPE_IGNORE) { 217 Py_ssize_t len = end - start; 218 char *tag = PyMem_Malloc(len + 1); 219 if (tag == NULL) { 220 PyErr_NoMemory(); 221 return -1; 222 } 223 strncpy(tag, start, len); 224 tag[len] = '\0'; 225 // Ownership of tag passes to the growable array 226 if (!growable_comment_array_add(&p->type_ignore_comments, p->tok->lineno, tag)) { 227 PyErr_NoMemory(); 228 return -1; 229 } 230 type = _PyTokenizer_Get(p->tok, &start, &end); 231 } 232 233 // If we have reached the end and we are in single input mode we need to insert a newline and reset the parsing 234 if (p->start_rule == Py_single_input && type == ENDMARKER && p->parsing_started) { 235 type = NEWLINE; /* Add an extra newline */ 236 p->parsing_started = 0; 237 238 if (p->tok->indent && !(p->flags & PyPARSE_DONT_IMPLY_DEDENT)) { 239 p->tok->pendin = -p->tok->indent; 240 p->tok->indent = 0; 241 } 242 } 243 else { 244 p->parsing_started = 1; 245 } 246 247 // Check if we are at the limit of the token array capacity and resize if needed 248 if ((p->fill == p->size) && (_resize_tokens_array(p) != 0)) { 249 return -1; 250 } 251 252 Token *t = p->tokens[p->fill]; 253 return initialize_token(p, t, start, end, type); 254} 255 256#if defined(Py_DEBUG) 257// Instrumentation to count the effectiveness of memoization. 258// The array counts the number of tokens skipped by memoization, 259// indexed by type. 260 261#define NSTATISTICS 2000 262static long memo_statistics[NSTATISTICS]; 263 264void 265_PyPegen_clear_memo_statistics(void) 266{ 267 for (int i = 0; i < NSTATISTICS; i++) { 268 memo_statistics[i] = 0; 269 } 270} 271 272PyObject * 273_PyPegen_get_memo_statistics(void) 274{ 275 PyObject *ret = PyList_New(NSTATISTICS); 276 if (ret == NULL) { 277 return NULL; 278 } 279 for (int i = 0; i < NSTATISTICS; i++) { 280 PyObject *value = PyLong_FromLong(memo_statistics[i]); 281 if (value == NULL) { 282 Py_DECREF(ret); 283 return NULL; 284 } 285 // PyList_SetItem borrows a reference to value. 286 if (PyList_SetItem(ret, i, value) < 0) { 287 Py_DECREF(ret); 288 return NULL; 289 } 290 } 291 return ret; 292} 293#endif 294 295int // bool 296_PyPegen_is_memoized(Parser *p, int type, void *pres) 297{ 298 if (p->mark == p->fill) { 299 if (_PyPegen_fill_token(p) < 0) { 300 p->error_indicator = 1; 301 return -1; 302 } 303 } 304 305 Token *t = p->tokens[p->mark]; 306 307 for (Memo *m = t->memo; m != NULL; m = m->next) { 308 if (m->type == type) { 309#if defined(PY_DEBUG) 310 if (0 <= type && type < NSTATISTICS) { 311 long count = m->mark - p->mark; 312 // A memoized negative result counts for one. 313 if (count <= 0) { 314 count = 1; 315 } 316 memo_statistics[type] += count; 317 } 318#endif 319 p->mark = m->mark; 320 *(void **)(pres) = m->node; 321 return 1; 322 } 323 } 324 return 0; 325} 326 327int 328_PyPegen_lookahead_with_name(int positive, expr_ty (func)(Parser *), Parser *p) 329{ 330 int mark = p->mark; 331 void *res = func(p); 332 p->mark = mark; 333 return (res != NULL) == positive; 334} 335 336int 337_PyPegen_lookahead_with_string(int positive, expr_ty (func)(Parser *, const char*), Parser *p, const char* arg) 338{ 339 int mark = p->mark; 340 void *res = func(p, arg); 341 p->mark = mark; 342 return (res != NULL) == positive; 343} 344 345int 346_PyPegen_lookahead_with_int(int positive, Token *(func)(Parser *, int), Parser *p, int arg) 347{ 348 int mark = p->mark; 349 void *res = func(p, arg); 350 p->mark = mark; 351 return (res != NULL) == positive; 352} 353 354int 355_PyPegen_lookahead(int positive, void *(func)(Parser *), Parser *p) 356{ 357 int mark = p->mark; 358 void *res = (void*)func(p); 359 p->mark = mark; 360 return (res != NULL) == positive; 361} 362 363Token * 364_PyPegen_expect_token(Parser *p, int type) 365{ 366 if (p->mark == p->fill) { 367 if (_PyPegen_fill_token(p) < 0) { 368 p->error_indicator = 1; 369 return NULL; 370 } 371 } 372 Token *t = p->tokens[p->mark]; 373 if (t->type != type) { 374 return NULL; 375 } 376 p->mark += 1; 377 return t; 378} 379 380void* 381_PyPegen_expect_forced_result(Parser *p, void* result, const char* expected) { 382 383 if (p->error_indicator == 1) { 384 return NULL; 385 } 386 if (result == NULL) { 387 RAISE_SYNTAX_ERROR("expected (%s)", expected); 388 return NULL; 389 } 390 return result; 391} 392 393Token * 394_PyPegen_expect_forced_token(Parser *p, int type, const char* expected) { 395 396 if (p->error_indicator == 1) { 397 return NULL; 398 } 399 400 if (p->mark == p->fill) { 401 if (_PyPegen_fill_token(p) < 0) { 402 p->error_indicator = 1; 403 return NULL; 404 } 405 } 406 Token *t = p->tokens[p->mark]; 407 if (t->type != type) { 408 RAISE_SYNTAX_ERROR_KNOWN_LOCATION(t, "expected '%s'", expected); 409 return NULL; 410 } 411 p->mark += 1; 412 return t; 413} 414 415expr_ty 416_PyPegen_expect_soft_keyword(Parser *p, const char *keyword) 417{ 418 if (p->mark == p->fill) { 419 if (_PyPegen_fill_token(p) < 0) { 420 p->error_indicator = 1; 421 return NULL; 422 } 423 } 424 Token *t = p->tokens[p->mark]; 425 if (t->type != NAME) { 426 return NULL; 427 } 428 const char *s = PyBytes_AsString(t->bytes); 429 if (!s) { 430 p->error_indicator = 1; 431 return NULL; 432 } 433 if (strcmp(s, keyword) != 0) { 434 return NULL; 435 } 436 return _PyPegen_name_token(p); 437} 438 439Token * 440_PyPegen_get_last_nonnwhitespace_token(Parser *p) 441{ 442 assert(p->mark >= 0); 443 Token *token = NULL; 444 for (int m = p->mark - 1; m >= 0; m--) { 445 token = p->tokens[m]; 446 if (token->type != ENDMARKER && (token->type < NEWLINE || token->type > DEDENT)) { 447 break; 448 } 449 } 450 return token; 451} 452 453PyObject * 454_PyPegen_new_identifier(Parser *p, const char *n) 455{ 456 PyObject *id = PyUnicode_DecodeUTF8(n, strlen(n), NULL); 457 if (!id) { 458 goto error; 459 } 460 /* PyUnicode_DecodeUTF8 should always return a ready string. */ 461 assert(PyUnicode_IS_READY(id)); 462 /* Check whether there are non-ASCII characters in the 463 identifier; if so, normalize to NFKC. */ 464 if (!PyUnicode_IS_ASCII(id)) 465 { 466 PyObject *id2; 467 if (!init_normalization(p)) 468 { 469 Py_DECREF(id); 470 goto error; 471 } 472 PyObject *form = PyUnicode_InternFromString("NFKC"); 473 if (form == NULL) 474 { 475 Py_DECREF(id); 476 goto error; 477 } 478 PyObject *args[2] = {form, id}; 479 id2 = _PyObject_FastCall(p->normalize, args, 2); 480 Py_DECREF(id); 481 Py_DECREF(form); 482 if (!id2) { 483 goto error; 484 } 485 if (!PyUnicode_Check(id2)) 486 { 487 PyErr_Format(PyExc_TypeError, 488 "unicodedata.normalize() must return a string, not " 489 "%.200s", 490 _PyType_Name(Py_TYPE(id2))); 491 Py_DECREF(id2); 492 goto error; 493 } 494 id = id2; 495 } 496 PyUnicode_InternInPlace(&id); 497 if (_PyArena_AddPyObject(p->arena, id) < 0) 498 { 499 Py_DECREF(id); 500 goto error; 501 } 502 return id; 503 504error: 505 p->error_indicator = 1; 506 return NULL; 507} 508 509static expr_ty 510_PyPegen_name_from_token(Parser *p, Token* t) 511{ 512 if (t == NULL) { 513 return NULL; 514 } 515 const char *s = PyBytes_AsString(t->bytes); 516 if (!s) { 517 p->error_indicator = 1; 518 return NULL; 519 } 520 PyObject *id = _PyPegen_new_identifier(p, s); 521 if (id == NULL) { 522 p->error_indicator = 1; 523 return NULL; 524 } 525 return _PyAST_Name(id, Load, t->lineno, t->col_offset, t->end_lineno, 526 t->end_col_offset, p->arena); 527} 528 529expr_ty 530_PyPegen_name_token(Parser *p) 531{ 532 Token *t = _PyPegen_expect_token(p, NAME); 533 return _PyPegen_name_from_token(p, t); 534} 535 536void * 537_PyPegen_string_token(Parser *p) 538{ 539 return _PyPegen_expect_token(p, STRING); 540} 541 542expr_ty _PyPegen_soft_keyword_token(Parser *p) { 543 Token *t = _PyPegen_expect_token(p, NAME); 544 if (t == NULL) { 545 return NULL; 546 } 547 char *the_token; 548 Py_ssize_t size; 549 PyBytes_AsStringAndSize(t->bytes, &the_token, &size); 550 for (char **keyword = p->soft_keywords; *keyword != NULL; keyword++) { 551 if (strncmp(*keyword, the_token, size) == 0) { 552 return _PyPegen_name_from_token(p, t); 553 } 554 } 555 return NULL; 556} 557 558static PyObject * 559parsenumber_raw(const char *s) 560{ 561 const char *end; 562 long x; 563 double dx; 564 Py_complex compl; 565 int imflag; 566 567 assert(s != NULL); 568 errno = 0; 569 end = s + strlen(s) - 1; 570 imflag = *end == 'j' || *end == 'J'; 571 if (s[0] == '0') { 572 x = (long)PyOS_strtoul(s, (char **)&end, 0); 573 if (x < 0 && errno == 0) { 574 return PyLong_FromString(s, (char **)0, 0); 575 } 576 } 577 else { 578 x = PyOS_strtol(s, (char **)&end, 0); 579 } 580 if (*end == '\0') { 581 if (errno != 0) { 582 return PyLong_FromString(s, (char **)0, 0); 583 } 584 return PyLong_FromLong(x); 585 } 586 /* XXX Huge floats may silently fail */ 587 if (imflag) { 588 compl.real = 0.; 589 compl.imag = PyOS_string_to_double(s, (char **)&end, NULL); 590 if (compl.imag == -1.0 && PyErr_Occurred()) { 591 return NULL; 592 } 593 return PyComplex_FromCComplex(compl); 594 } 595 dx = PyOS_string_to_double(s, NULL, NULL); 596 if (dx == -1.0 && PyErr_Occurred()) { 597 return NULL; 598 } 599 return PyFloat_FromDouble(dx); 600} 601 602static PyObject * 603parsenumber(const char *s) 604{ 605 char *dup; 606 char *end; 607 PyObject *res = NULL; 608 609 assert(s != NULL); 610 611 if (strchr(s, '_') == NULL) { 612 return parsenumber_raw(s); 613 } 614 /* Create a duplicate without underscores. */ 615 dup = PyMem_Malloc(strlen(s) + 1); 616 if (dup == NULL) { 617 return PyErr_NoMemory(); 618 } 619 end = dup; 620 for (; *s; s++) { 621 if (*s != '_') { 622 *end++ = *s; 623 } 624 } 625 *end = '\0'; 626 res = parsenumber_raw(dup); 627 PyMem_Free(dup); 628 return res; 629} 630 631expr_ty 632_PyPegen_number_token(Parser *p) 633{ 634 Token *t = _PyPegen_expect_token(p, NUMBER); 635 if (t == NULL) { 636 return NULL; 637 } 638 639 const char *num_raw = PyBytes_AsString(t->bytes); 640 if (num_raw == NULL) { 641 p->error_indicator = 1; 642 return NULL; 643 } 644 645 if (p->feature_version < 6 && strchr(num_raw, '_') != NULL) { 646 p->error_indicator = 1; 647 return RAISE_SYNTAX_ERROR("Underscores in numeric literals are only supported " 648 "in Python 3.6 and greater"); 649 } 650 651 PyObject *c = parsenumber(num_raw); 652 653 if (c == NULL) { 654 p->error_indicator = 1; 655 PyThreadState *tstate = _PyThreadState_GET(); 656 // The only way a ValueError should happen in _this_ code is via 657 // PyLong_FromString hitting a length limit. 658 if (tstate->curexc_type == PyExc_ValueError && 659 tstate->curexc_value != NULL) { 660 PyObject *type, *value, *tb; 661 // This acts as PyErr_Clear() as we're replacing curexc. 662 PyErr_Fetch(&type, &value, &tb); 663 Py_XDECREF(tb); 664 Py_DECREF(type); 665 /* Intentionally omitting columns to avoid a wall of 1000s of '^'s 666 * on the error message. Nobody is going to overlook their huge 667 * numeric literal once given the line. */ 668 RAISE_ERROR_KNOWN_LOCATION( 669 p, PyExc_SyntaxError, 670 t->lineno, -1 /* col_offset */, 671 t->end_lineno, -1 /* end_col_offset */, 672 "%S - Consider hexadecimal for huge integer literals " 673 "to avoid decimal conversion limits.", 674 value); 675 Py_DECREF(value); 676 } 677 return NULL; 678 } 679 680 if (_PyArena_AddPyObject(p->arena, c) < 0) { 681 Py_DECREF(c); 682 p->error_indicator = 1; 683 return NULL; 684 } 685 686 return _PyAST_Constant(c, NULL, t->lineno, t->col_offset, t->end_lineno, 687 t->end_col_offset, p->arena); 688} 689 690/* Check that the source for a single input statement really is a single 691 statement by looking at what is left in the buffer after parsing. 692 Trailing whitespace and comments are OK. */ 693static int // bool 694bad_single_statement(Parser *p) 695{ 696 char *cur = p->tok->cur; 697 char c = *cur; 698 699 for (;;) { 700 while (c == ' ' || c == '\t' || c == '\n' || c == '\014') { 701 c = *++cur; 702 } 703 704 if (!c) { 705 return 0; 706 } 707 708 if (c != '#') { 709 return 1; 710 } 711 712 /* Suck up comment. */ 713 while (c && c != '\n') { 714 c = *++cur; 715 } 716 } 717} 718 719static int 720compute_parser_flags(PyCompilerFlags *flags) 721{ 722 int parser_flags = 0; 723 if (!flags) { 724 return 0; 725 } 726 if (flags->cf_flags & PyCF_DONT_IMPLY_DEDENT) { 727 parser_flags |= PyPARSE_DONT_IMPLY_DEDENT; 728 } 729 if (flags->cf_flags & PyCF_IGNORE_COOKIE) { 730 parser_flags |= PyPARSE_IGNORE_COOKIE; 731 } 732 if (flags->cf_flags & CO_FUTURE_BARRY_AS_BDFL) { 733 parser_flags |= PyPARSE_BARRY_AS_BDFL; 734 } 735 if (flags->cf_flags & PyCF_TYPE_COMMENTS) { 736 parser_flags |= PyPARSE_TYPE_COMMENTS; 737 } 738 if ((flags->cf_flags & PyCF_ONLY_AST) && flags->cf_feature_version < 7) { 739 parser_flags |= PyPARSE_ASYNC_HACKS; 740 } 741 if (flags->cf_flags & PyCF_ALLOW_INCOMPLETE_INPUT) { 742 parser_flags |= PyPARSE_ALLOW_INCOMPLETE_INPUT; 743 } 744 return parser_flags; 745} 746 747// Parser API 748 749Parser * 750_PyPegen_Parser_New(struct tok_state *tok, int start_rule, int flags, 751 int feature_version, int *errcode, PyArena *arena) 752{ 753 Parser *p = PyMem_Malloc(sizeof(Parser)); 754 if (p == NULL) { 755 return (Parser *) PyErr_NoMemory(); 756 } 757 assert(tok != NULL); 758 tok->type_comments = (flags & PyPARSE_TYPE_COMMENTS) > 0; 759 tok->async_hacks = (flags & PyPARSE_ASYNC_HACKS) > 0; 760 p->tok = tok; 761 p->keywords = NULL; 762 p->n_keyword_lists = -1; 763 p->soft_keywords = NULL; 764 p->tokens = PyMem_Malloc(sizeof(Token *)); 765 if (!p->tokens) { 766 PyMem_Free(p); 767 return (Parser *) PyErr_NoMemory(); 768 } 769 p->tokens[0] = PyMem_Calloc(1, sizeof(Token)); 770 if (!p->tokens[0]) { 771 PyMem_Free(p->tokens); 772 PyMem_Free(p); 773 return (Parser *) PyErr_NoMemory(); 774 } 775 if (!growable_comment_array_init(&p->type_ignore_comments, 10)) { 776 PyMem_Free(p->tokens[0]); 777 PyMem_Free(p->tokens); 778 PyMem_Free(p); 779 return (Parser *) PyErr_NoMemory(); 780 } 781 782 p->mark = 0; 783 p->fill = 0; 784 p->size = 1; 785 786 p->errcode = errcode; 787 p->arena = arena; 788 p->start_rule = start_rule; 789 p->parsing_started = 0; 790 p->normalize = NULL; 791 p->error_indicator = 0; 792 793 p->starting_lineno = 0; 794 p->starting_col_offset = 0; 795 p->flags = flags; 796 p->feature_version = feature_version; 797 p->known_err_token = NULL; 798 p->level = 0; 799 p->call_invalid_rules = 0; 800 return p; 801} 802 803void 804_PyPegen_Parser_Free(Parser *p) 805{ 806 Py_XDECREF(p->normalize); 807 for (int i = 0; i < p->size; i++) { 808 PyMem_Free(p->tokens[i]); 809 } 810 PyMem_Free(p->tokens); 811 growable_comment_array_deallocate(&p->type_ignore_comments); 812 PyMem_Free(p); 813} 814 815static void 816reset_parser_state_for_error_pass(Parser *p) 817{ 818 for (int i = 0; i < p->fill; i++) { 819 p->tokens[i]->memo = NULL; 820 } 821 p->mark = 0; 822 p->call_invalid_rules = 1; 823 // Don't try to get extra tokens in interactive mode when trying to 824 // raise specialized errors in the second pass. 825 p->tok->interactive_underflow = IUNDERFLOW_STOP; 826} 827 828static inline int 829_is_end_of_source(Parser *p) { 830 int err = p->tok->done; 831 return err == E_EOF || err == E_EOFS || err == E_EOLS; 832} 833 834void * 835_PyPegen_run_parser(Parser *p) 836{ 837 void *res = _PyPegen_parse(p); 838 assert(p->level == 0); 839 if (res == NULL) { 840 if ((p->flags & PyPARSE_ALLOW_INCOMPLETE_INPUT) && _is_end_of_source(p)) { 841 PyErr_Clear(); 842 return RAISE_SYNTAX_ERROR("incomplete input"); 843 } 844 if (PyErr_Occurred() && !PyErr_ExceptionMatches(PyExc_SyntaxError)) { 845 return NULL; 846 } 847 // Make a second parser pass. In this pass we activate heavier and slower checks 848 // to produce better error messages and more complete diagnostics. Extra "invalid_*" 849 // rules will be active during parsing. 850 Token *last_token = p->tokens[p->fill - 1]; 851 reset_parser_state_for_error_pass(p); 852 _PyPegen_parse(p); 853 854 // Set SyntaxErrors accordingly depending on the parser/tokenizer status at the failure 855 // point. 856 _Pypegen_set_syntax_error(p, last_token); 857 return NULL; 858 } 859 860 if (p->start_rule == Py_single_input && bad_single_statement(p)) { 861 p->tok->done = E_BADSINGLE; // This is not necessary for now, but might be in the future 862 return RAISE_SYNTAX_ERROR("multiple statements found while compiling a single statement"); 863 } 864 865 // test_peg_generator defines _Py_TEST_PEGEN to not call PyAST_Validate() 866#if defined(Py_DEBUG) && !defined(_Py_TEST_PEGEN) 867 if (p->start_rule == Py_single_input || 868 p->start_rule == Py_file_input || 869 p->start_rule == Py_eval_input) 870 { 871 if (!_PyAST_Validate(res)) { 872 return NULL; 873 } 874 } 875#endif 876 return res; 877} 878 879mod_ty 880_PyPegen_run_parser_from_file_pointer(FILE *fp, int start_rule, PyObject *filename_ob, 881 const char *enc, const char *ps1, const char *ps2, 882 PyCompilerFlags *flags, int *errcode, PyArena *arena) 883{ 884 struct tok_state *tok = _PyTokenizer_FromFile(fp, enc, ps1, ps2); 885 if (tok == NULL) { 886 if (PyErr_Occurred()) { 887 _PyPegen_raise_tokenizer_init_error(filename_ob); 888 return NULL; 889 } 890 return NULL; 891 } 892 if (!tok->fp || ps1 != NULL || ps2 != NULL || 893 PyUnicode_CompareWithASCIIString(filename_ob, "<stdin>") == 0) { 894 tok->fp_interactive = 1; 895 } 896 // This transfers the ownership to the tokenizer 897 tok->filename = filename_ob; 898 Py_INCREF(filename_ob); 899 900 // From here on we need to clean up even if there's an error 901 mod_ty result = NULL; 902 903 int parser_flags = compute_parser_flags(flags); 904 Parser *p = _PyPegen_Parser_New(tok, start_rule, parser_flags, PY_MINOR_VERSION, 905 errcode, arena); 906 if (p == NULL) { 907 goto error; 908 } 909 910 result = _PyPegen_run_parser(p); 911 _PyPegen_Parser_Free(p); 912 913error: 914 _PyTokenizer_Free(tok); 915 return result; 916} 917 918mod_ty 919_PyPegen_run_parser_from_string(const char *str, int start_rule, PyObject *filename_ob, 920 PyCompilerFlags *flags, PyArena *arena) 921{ 922 int exec_input = start_rule == Py_file_input; 923 924 struct tok_state *tok; 925 if (flags != NULL && flags->cf_flags & PyCF_IGNORE_COOKIE) { 926 tok = _PyTokenizer_FromUTF8(str, exec_input); 927 } else { 928 tok = _PyTokenizer_FromString(str, exec_input); 929 } 930 if (tok == NULL) { 931 if (PyErr_Occurred()) { 932 _PyPegen_raise_tokenizer_init_error(filename_ob); 933 } 934 return NULL; 935 } 936 // This transfers the ownership to the tokenizer 937 tok->filename = filename_ob; 938 Py_INCREF(filename_ob); 939 940 // We need to clear up from here on 941 mod_ty result = NULL; 942 943 int parser_flags = compute_parser_flags(flags); 944 int feature_version = flags && (flags->cf_flags & PyCF_ONLY_AST) ? 945 flags->cf_feature_version : PY_MINOR_VERSION; 946 Parser *p = _PyPegen_Parser_New(tok, start_rule, parser_flags, feature_version, 947 NULL, arena); 948 if (p == NULL) { 949 goto error; 950 } 951 952 result = _PyPegen_run_parser(p); 953 _PyPegen_Parser_Free(p); 954 955error: 956 _PyTokenizer_Free(tok); 957 return result; 958} 959