17db96d56Sopenharmony_ci#include <Python.h> 27db96d56Sopenharmony_ci#include <errcode.h> 37db96d56Sopenharmony_ci 47db96d56Sopenharmony_ci#include "tokenizer.h" 57db96d56Sopenharmony_ci#include "pegen.h" 67db96d56Sopenharmony_ci 77db96d56Sopenharmony_ci// TOKENIZER ERRORS 87db96d56Sopenharmony_ci 97db96d56Sopenharmony_civoid 107db96d56Sopenharmony_ci_PyPegen_raise_tokenizer_init_error(PyObject *filename) 117db96d56Sopenharmony_ci{ 127db96d56Sopenharmony_ci if (!(PyErr_ExceptionMatches(PyExc_LookupError) 137db96d56Sopenharmony_ci || PyErr_ExceptionMatches(PyExc_SyntaxError) 147db96d56Sopenharmony_ci || PyErr_ExceptionMatches(PyExc_ValueError) 157db96d56Sopenharmony_ci || PyErr_ExceptionMatches(PyExc_UnicodeDecodeError))) { 167db96d56Sopenharmony_ci return; 177db96d56Sopenharmony_ci } 187db96d56Sopenharmony_ci PyObject *errstr = NULL; 197db96d56Sopenharmony_ci PyObject *tuple = NULL; 207db96d56Sopenharmony_ci PyObject *type; 217db96d56Sopenharmony_ci PyObject *value; 227db96d56Sopenharmony_ci PyObject *tback; 237db96d56Sopenharmony_ci PyErr_Fetch(&type, &value, &tback); 247db96d56Sopenharmony_ci errstr = PyObject_Str(value); 257db96d56Sopenharmony_ci if (!errstr) { 267db96d56Sopenharmony_ci goto error; 277db96d56Sopenharmony_ci } 287db96d56Sopenharmony_ci 297db96d56Sopenharmony_ci PyObject *tmp = Py_BuildValue("(OiiO)", filename, 0, -1, Py_None); 307db96d56Sopenharmony_ci if (!tmp) { 317db96d56Sopenharmony_ci goto error; 327db96d56Sopenharmony_ci } 337db96d56Sopenharmony_ci 347db96d56Sopenharmony_ci tuple = PyTuple_Pack(2, errstr, tmp); 357db96d56Sopenharmony_ci Py_DECREF(tmp); 367db96d56Sopenharmony_ci if (!value) { 377db96d56Sopenharmony_ci goto error; 387db96d56Sopenharmony_ci } 397db96d56Sopenharmony_ci PyErr_SetObject(PyExc_SyntaxError, tuple); 407db96d56Sopenharmony_ci 417db96d56Sopenharmony_cierror: 427db96d56Sopenharmony_ci Py_XDECREF(type); 437db96d56Sopenharmony_ci Py_XDECREF(value); 447db96d56Sopenharmony_ci Py_XDECREF(tback); 457db96d56Sopenharmony_ci Py_XDECREF(errstr); 467db96d56Sopenharmony_ci Py_XDECREF(tuple); 477db96d56Sopenharmony_ci} 487db96d56Sopenharmony_ci 497db96d56Sopenharmony_cistatic inline void 507db96d56Sopenharmony_ciraise_unclosed_parentheses_error(Parser *p) { 517db96d56Sopenharmony_ci int error_lineno = p->tok->parenlinenostack[p->tok->level-1]; 527db96d56Sopenharmony_ci int error_col = p->tok->parencolstack[p->tok->level-1]; 537db96d56Sopenharmony_ci RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, 547db96d56Sopenharmony_ci error_lineno, error_col, error_lineno, -1, 557db96d56Sopenharmony_ci "'%c' was never closed", 567db96d56Sopenharmony_ci p->tok->parenstack[p->tok->level-1]); 577db96d56Sopenharmony_ci} 587db96d56Sopenharmony_ci 597db96d56Sopenharmony_ciint 607db96d56Sopenharmony_ci_Pypegen_tokenizer_error(Parser *p) 617db96d56Sopenharmony_ci{ 627db96d56Sopenharmony_ci if (PyErr_Occurred()) { 637db96d56Sopenharmony_ci return -1; 647db96d56Sopenharmony_ci } 657db96d56Sopenharmony_ci 667db96d56Sopenharmony_ci const char *msg = NULL; 677db96d56Sopenharmony_ci PyObject* errtype = PyExc_SyntaxError; 687db96d56Sopenharmony_ci Py_ssize_t col_offset = -1; 697db96d56Sopenharmony_ci switch (p->tok->done) { 707db96d56Sopenharmony_ci case E_TOKEN: 717db96d56Sopenharmony_ci msg = "invalid token"; 727db96d56Sopenharmony_ci break; 737db96d56Sopenharmony_ci case E_EOF: 747db96d56Sopenharmony_ci if (p->tok->level) { 757db96d56Sopenharmony_ci raise_unclosed_parentheses_error(p); 767db96d56Sopenharmony_ci } else { 777db96d56Sopenharmony_ci RAISE_SYNTAX_ERROR("unexpected EOF while parsing"); 787db96d56Sopenharmony_ci } 797db96d56Sopenharmony_ci return -1; 807db96d56Sopenharmony_ci case E_DEDENT: 817db96d56Sopenharmony_ci RAISE_INDENTATION_ERROR("unindent does not match any outer indentation level"); 827db96d56Sopenharmony_ci return -1; 837db96d56Sopenharmony_ci case E_INTR: 847db96d56Sopenharmony_ci if (!PyErr_Occurred()) { 857db96d56Sopenharmony_ci PyErr_SetNone(PyExc_KeyboardInterrupt); 867db96d56Sopenharmony_ci } 877db96d56Sopenharmony_ci return -1; 887db96d56Sopenharmony_ci case E_NOMEM: 897db96d56Sopenharmony_ci PyErr_NoMemory(); 907db96d56Sopenharmony_ci return -1; 917db96d56Sopenharmony_ci case E_TABSPACE: 927db96d56Sopenharmony_ci errtype = PyExc_TabError; 937db96d56Sopenharmony_ci msg = "inconsistent use of tabs and spaces in indentation"; 947db96d56Sopenharmony_ci break; 957db96d56Sopenharmony_ci case E_TOODEEP: 967db96d56Sopenharmony_ci errtype = PyExc_IndentationError; 977db96d56Sopenharmony_ci msg = "too many levels of indentation"; 987db96d56Sopenharmony_ci break; 997db96d56Sopenharmony_ci case E_LINECONT: { 1007db96d56Sopenharmony_ci col_offset = p->tok->cur - p->tok->buf - 1; 1017db96d56Sopenharmony_ci msg = "unexpected character after line continuation character"; 1027db96d56Sopenharmony_ci break; 1037db96d56Sopenharmony_ci } 1047db96d56Sopenharmony_ci default: 1057db96d56Sopenharmony_ci msg = "unknown parsing error"; 1067db96d56Sopenharmony_ci } 1077db96d56Sopenharmony_ci 1087db96d56Sopenharmony_ci RAISE_ERROR_KNOWN_LOCATION(p, errtype, p->tok->lineno, 1097db96d56Sopenharmony_ci col_offset >= 0 ? col_offset : 0, 1107db96d56Sopenharmony_ci p->tok->lineno, -1, msg); 1117db96d56Sopenharmony_ci return -1; 1127db96d56Sopenharmony_ci} 1137db96d56Sopenharmony_ci 1147db96d56Sopenharmony_ciint 1157db96d56Sopenharmony_ci_Pypegen_raise_decode_error(Parser *p) 1167db96d56Sopenharmony_ci{ 1177db96d56Sopenharmony_ci assert(PyErr_Occurred()); 1187db96d56Sopenharmony_ci const char *errtype = NULL; 1197db96d56Sopenharmony_ci if (PyErr_ExceptionMatches(PyExc_UnicodeError)) { 1207db96d56Sopenharmony_ci errtype = "unicode error"; 1217db96d56Sopenharmony_ci } 1227db96d56Sopenharmony_ci else if (PyErr_ExceptionMatches(PyExc_ValueError)) { 1237db96d56Sopenharmony_ci errtype = "value error"; 1247db96d56Sopenharmony_ci } 1257db96d56Sopenharmony_ci if (errtype) { 1267db96d56Sopenharmony_ci PyObject *type; 1277db96d56Sopenharmony_ci PyObject *value; 1287db96d56Sopenharmony_ci PyObject *tback; 1297db96d56Sopenharmony_ci PyObject *errstr; 1307db96d56Sopenharmony_ci PyErr_Fetch(&type, &value, &tback); 1317db96d56Sopenharmony_ci errstr = PyObject_Str(value); 1327db96d56Sopenharmony_ci if (errstr) { 1337db96d56Sopenharmony_ci RAISE_SYNTAX_ERROR("(%s) %U", errtype, errstr); 1347db96d56Sopenharmony_ci Py_DECREF(errstr); 1357db96d56Sopenharmony_ci } 1367db96d56Sopenharmony_ci else { 1377db96d56Sopenharmony_ci PyErr_Clear(); 1387db96d56Sopenharmony_ci RAISE_SYNTAX_ERROR("(%s) unknown error", errtype); 1397db96d56Sopenharmony_ci } 1407db96d56Sopenharmony_ci Py_XDECREF(type); 1417db96d56Sopenharmony_ci Py_XDECREF(value); 1427db96d56Sopenharmony_ci Py_XDECREF(tback); 1437db96d56Sopenharmony_ci } 1447db96d56Sopenharmony_ci 1457db96d56Sopenharmony_ci return -1; 1467db96d56Sopenharmony_ci} 1477db96d56Sopenharmony_ci 1487db96d56Sopenharmony_cistatic int 1497db96d56Sopenharmony_ci_PyPegen_tokenize_full_source_to_check_for_errors(Parser *p) { 1507db96d56Sopenharmony_ci // Tokenize the whole input to see if there are any tokenization 1517db96d56Sopenharmony_ci // errors such as mistmatching parentheses. These will get priority 1527db96d56Sopenharmony_ci // over generic syntax errors only if the line number of the error is 1537db96d56Sopenharmony_ci // before the one that we had for the generic error. 1547db96d56Sopenharmony_ci 1557db96d56Sopenharmony_ci // We don't want to tokenize to the end for interactive input 1567db96d56Sopenharmony_ci if (p->tok->prompt != NULL) { 1577db96d56Sopenharmony_ci return 0; 1587db96d56Sopenharmony_ci } 1597db96d56Sopenharmony_ci 1607db96d56Sopenharmony_ci PyObject *type, *value, *traceback; 1617db96d56Sopenharmony_ci PyErr_Fetch(&type, &value, &traceback); 1627db96d56Sopenharmony_ci 1637db96d56Sopenharmony_ci Token *current_token = p->known_err_token != NULL ? p->known_err_token : p->tokens[p->fill - 1]; 1647db96d56Sopenharmony_ci Py_ssize_t current_err_line = current_token->lineno; 1657db96d56Sopenharmony_ci 1667db96d56Sopenharmony_ci int ret = 0; 1677db96d56Sopenharmony_ci 1687db96d56Sopenharmony_ci for (;;) { 1697db96d56Sopenharmony_ci const char *start; 1707db96d56Sopenharmony_ci const char *end; 1717db96d56Sopenharmony_ci switch (_PyTokenizer_Get(p->tok, &start, &end)) { 1727db96d56Sopenharmony_ci case ERRORTOKEN: 1737db96d56Sopenharmony_ci if (PyErr_Occurred()) { 1747db96d56Sopenharmony_ci ret = -1; 1757db96d56Sopenharmony_ci goto exit; 1767db96d56Sopenharmony_ci } 1777db96d56Sopenharmony_ci if (p->tok->level != 0) { 1787db96d56Sopenharmony_ci int error_lineno = p->tok->parenlinenostack[p->tok->level-1]; 1797db96d56Sopenharmony_ci if (current_err_line > error_lineno) { 1807db96d56Sopenharmony_ci raise_unclosed_parentheses_error(p); 1817db96d56Sopenharmony_ci ret = -1; 1827db96d56Sopenharmony_ci goto exit; 1837db96d56Sopenharmony_ci } 1847db96d56Sopenharmony_ci } 1857db96d56Sopenharmony_ci break; 1867db96d56Sopenharmony_ci case ENDMARKER: 1877db96d56Sopenharmony_ci break; 1887db96d56Sopenharmony_ci default: 1897db96d56Sopenharmony_ci continue; 1907db96d56Sopenharmony_ci } 1917db96d56Sopenharmony_ci break; 1927db96d56Sopenharmony_ci } 1937db96d56Sopenharmony_ci 1947db96d56Sopenharmony_ci 1957db96d56Sopenharmony_ciexit: 1967db96d56Sopenharmony_ci if (PyErr_Occurred()) { 1977db96d56Sopenharmony_ci Py_XDECREF(value); 1987db96d56Sopenharmony_ci Py_XDECREF(type); 1997db96d56Sopenharmony_ci Py_XDECREF(traceback); 2007db96d56Sopenharmony_ci } else { 2017db96d56Sopenharmony_ci PyErr_Restore(type, value, traceback); 2027db96d56Sopenharmony_ci } 2037db96d56Sopenharmony_ci return ret; 2047db96d56Sopenharmony_ci} 2057db96d56Sopenharmony_ci 2067db96d56Sopenharmony_ci// PARSER ERRORS 2077db96d56Sopenharmony_ci 2087db96d56Sopenharmony_civoid * 2097db96d56Sopenharmony_ci_PyPegen_raise_error(Parser *p, PyObject *errtype, const char *errmsg, ...) 2107db96d56Sopenharmony_ci{ 2117db96d56Sopenharmony_ci if (p->fill == 0) { 2127db96d56Sopenharmony_ci va_list va; 2137db96d56Sopenharmony_ci va_start(va, errmsg); 2147db96d56Sopenharmony_ci _PyPegen_raise_error_known_location(p, errtype, 0, 0, 0, -1, errmsg, va); 2157db96d56Sopenharmony_ci va_end(va); 2167db96d56Sopenharmony_ci return NULL; 2177db96d56Sopenharmony_ci } 2187db96d56Sopenharmony_ci 2197db96d56Sopenharmony_ci Token *t = p->known_err_token != NULL ? p->known_err_token : p->tokens[p->fill - 1]; 2207db96d56Sopenharmony_ci Py_ssize_t col_offset; 2217db96d56Sopenharmony_ci Py_ssize_t end_col_offset = -1; 2227db96d56Sopenharmony_ci if (t->col_offset == -1) { 2237db96d56Sopenharmony_ci if (p->tok->cur == p->tok->buf) { 2247db96d56Sopenharmony_ci col_offset = 0; 2257db96d56Sopenharmony_ci } else { 2267db96d56Sopenharmony_ci const char* start = p->tok->buf ? p->tok->line_start : p->tok->buf; 2277db96d56Sopenharmony_ci col_offset = Py_SAFE_DOWNCAST(p->tok->cur - start, intptr_t, int); 2287db96d56Sopenharmony_ci } 2297db96d56Sopenharmony_ci } else { 2307db96d56Sopenharmony_ci col_offset = t->col_offset + 1; 2317db96d56Sopenharmony_ci } 2327db96d56Sopenharmony_ci 2337db96d56Sopenharmony_ci if (t->end_col_offset != -1) { 2347db96d56Sopenharmony_ci end_col_offset = t->end_col_offset + 1; 2357db96d56Sopenharmony_ci } 2367db96d56Sopenharmony_ci 2377db96d56Sopenharmony_ci va_list va; 2387db96d56Sopenharmony_ci va_start(va, errmsg); 2397db96d56Sopenharmony_ci _PyPegen_raise_error_known_location(p, errtype, t->lineno, col_offset, t->end_lineno, end_col_offset, errmsg, va); 2407db96d56Sopenharmony_ci va_end(va); 2417db96d56Sopenharmony_ci 2427db96d56Sopenharmony_ci return NULL; 2437db96d56Sopenharmony_ci} 2447db96d56Sopenharmony_ci 2457db96d56Sopenharmony_cistatic PyObject * 2467db96d56Sopenharmony_ciget_error_line_from_tokenizer_buffers(Parser *p, Py_ssize_t lineno) 2477db96d56Sopenharmony_ci{ 2487db96d56Sopenharmony_ci /* If the file descriptor is interactive, the source lines of the current 2497db96d56Sopenharmony_ci * (multi-line) statement are stored in p->tok->interactive_src_start. 2507db96d56Sopenharmony_ci * If not, we're parsing from a string, which means that the whole source 2517db96d56Sopenharmony_ci * is stored in p->tok->str. */ 2527db96d56Sopenharmony_ci assert((p->tok->fp == NULL && p->tok->str != NULL) || p->tok->fp == stdin); 2537db96d56Sopenharmony_ci 2547db96d56Sopenharmony_ci char *cur_line = p->tok->fp_interactive ? p->tok->interactive_src_start : p->tok->str; 2557db96d56Sopenharmony_ci if (cur_line == NULL) { 2567db96d56Sopenharmony_ci assert(p->tok->fp_interactive); 2577db96d56Sopenharmony_ci // We can reach this point if the tokenizer buffers for interactive source have not been 2587db96d56Sopenharmony_ci // initialized because we failed to decode the original source with the given locale. 2597db96d56Sopenharmony_ci return PyUnicode_FromStringAndSize("", 0); 2607db96d56Sopenharmony_ci } 2617db96d56Sopenharmony_ci 2627db96d56Sopenharmony_ci Py_ssize_t relative_lineno = p->starting_lineno ? lineno - p->starting_lineno + 1 : lineno; 2637db96d56Sopenharmony_ci const char* buf_end = p->tok->fp_interactive ? p->tok->interactive_src_end : p->tok->inp; 2647db96d56Sopenharmony_ci 2657db96d56Sopenharmony_ci for (int i = 0; i < relative_lineno - 1; i++) { 2667db96d56Sopenharmony_ci char *new_line = strchr(cur_line, '\n'); 2677db96d56Sopenharmony_ci // The assert is here for debug builds but the conditional that 2687db96d56Sopenharmony_ci // follows is there so in release builds we do not crash at the cost 2697db96d56Sopenharmony_ci // to report a potentially wrong line. 2707db96d56Sopenharmony_ci assert(new_line != NULL && new_line + 1 < buf_end); 2717db96d56Sopenharmony_ci if (new_line == NULL || new_line + 1 > buf_end) { 2727db96d56Sopenharmony_ci break; 2737db96d56Sopenharmony_ci } 2747db96d56Sopenharmony_ci cur_line = new_line + 1; 2757db96d56Sopenharmony_ci } 2767db96d56Sopenharmony_ci 2777db96d56Sopenharmony_ci char *next_newline; 2787db96d56Sopenharmony_ci if ((next_newline = strchr(cur_line, '\n')) == NULL) { // This is the last line 2797db96d56Sopenharmony_ci next_newline = cur_line + strlen(cur_line); 2807db96d56Sopenharmony_ci } 2817db96d56Sopenharmony_ci return PyUnicode_DecodeUTF8(cur_line, next_newline - cur_line, "replace"); 2827db96d56Sopenharmony_ci} 2837db96d56Sopenharmony_ci 2847db96d56Sopenharmony_civoid * 2857db96d56Sopenharmony_ci_PyPegen_raise_error_known_location(Parser *p, PyObject *errtype, 2867db96d56Sopenharmony_ci Py_ssize_t lineno, Py_ssize_t col_offset, 2877db96d56Sopenharmony_ci Py_ssize_t end_lineno, Py_ssize_t end_col_offset, 2887db96d56Sopenharmony_ci const char *errmsg, va_list va) 2897db96d56Sopenharmony_ci{ 2907db96d56Sopenharmony_ci PyObject *value = NULL; 2917db96d56Sopenharmony_ci PyObject *errstr = NULL; 2927db96d56Sopenharmony_ci PyObject *error_line = NULL; 2937db96d56Sopenharmony_ci PyObject *tmp = NULL; 2947db96d56Sopenharmony_ci p->error_indicator = 1; 2957db96d56Sopenharmony_ci 2967db96d56Sopenharmony_ci if (end_lineno == CURRENT_POS) { 2977db96d56Sopenharmony_ci end_lineno = p->tok->lineno; 2987db96d56Sopenharmony_ci } 2997db96d56Sopenharmony_ci if (end_col_offset == CURRENT_POS) { 3007db96d56Sopenharmony_ci end_col_offset = p->tok->cur - p->tok->line_start; 3017db96d56Sopenharmony_ci } 3027db96d56Sopenharmony_ci 3037db96d56Sopenharmony_ci if (p->start_rule == Py_fstring_input) { 3047db96d56Sopenharmony_ci const char *fstring_msg = "f-string: "; 3057db96d56Sopenharmony_ci Py_ssize_t len = strlen(fstring_msg) + strlen(errmsg); 3067db96d56Sopenharmony_ci 3077db96d56Sopenharmony_ci char *new_errmsg = PyMem_Malloc(len + 1); // Lengths of both strings plus NULL character 3087db96d56Sopenharmony_ci if (!new_errmsg) { 3097db96d56Sopenharmony_ci return (void *) PyErr_NoMemory(); 3107db96d56Sopenharmony_ci } 3117db96d56Sopenharmony_ci 3127db96d56Sopenharmony_ci // Copy both strings into new buffer 3137db96d56Sopenharmony_ci memcpy(new_errmsg, fstring_msg, strlen(fstring_msg)); 3147db96d56Sopenharmony_ci memcpy(new_errmsg + strlen(fstring_msg), errmsg, strlen(errmsg)); 3157db96d56Sopenharmony_ci new_errmsg[len] = 0; 3167db96d56Sopenharmony_ci errmsg = new_errmsg; 3177db96d56Sopenharmony_ci } 3187db96d56Sopenharmony_ci errstr = PyUnicode_FromFormatV(errmsg, va); 3197db96d56Sopenharmony_ci if (!errstr) { 3207db96d56Sopenharmony_ci goto error; 3217db96d56Sopenharmony_ci } 3227db96d56Sopenharmony_ci 3237db96d56Sopenharmony_ci if (p->tok->fp_interactive && p->tok->interactive_src_start != NULL) { 3247db96d56Sopenharmony_ci error_line = get_error_line_from_tokenizer_buffers(p, lineno); 3257db96d56Sopenharmony_ci } 3267db96d56Sopenharmony_ci else if (p->start_rule == Py_file_input) { 3277db96d56Sopenharmony_ci error_line = _PyErr_ProgramDecodedTextObject(p->tok->filename, 3287db96d56Sopenharmony_ci (int) lineno, p->tok->encoding); 3297db96d56Sopenharmony_ci } 3307db96d56Sopenharmony_ci 3317db96d56Sopenharmony_ci if (!error_line) { 3327db96d56Sopenharmony_ci /* PyErr_ProgramTextObject was not called or returned NULL. If it was not called, 3337db96d56Sopenharmony_ci then we need to find the error line from some other source, because 3347db96d56Sopenharmony_ci p->start_rule != Py_file_input. If it returned NULL, then it either unexpectedly 3357db96d56Sopenharmony_ci failed or we're parsing from a string or the REPL. There's a third edge case where 3367db96d56Sopenharmony_ci we're actually parsing from a file, which has an E_EOF SyntaxError and in that case 3377db96d56Sopenharmony_ci `PyErr_ProgramTextObject` fails because lineno points to last_file_line + 1, which 3387db96d56Sopenharmony_ci does not physically exist */ 3397db96d56Sopenharmony_ci assert(p->tok->fp == NULL || p->tok->fp == stdin || p->tok->done == E_EOF); 3407db96d56Sopenharmony_ci 3417db96d56Sopenharmony_ci if (p->tok->lineno <= lineno && p->tok->inp > p->tok->buf) { 3427db96d56Sopenharmony_ci Py_ssize_t size = p->tok->inp - p->tok->buf; 3437db96d56Sopenharmony_ci error_line = PyUnicode_DecodeUTF8(p->tok->buf, size, "replace"); 3447db96d56Sopenharmony_ci } 3457db96d56Sopenharmony_ci else if (p->tok->fp == NULL || p->tok->fp == stdin) { 3467db96d56Sopenharmony_ci error_line = get_error_line_from_tokenizer_buffers(p, lineno); 3477db96d56Sopenharmony_ci } 3487db96d56Sopenharmony_ci else { 3497db96d56Sopenharmony_ci error_line = PyUnicode_FromStringAndSize("", 0); 3507db96d56Sopenharmony_ci } 3517db96d56Sopenharmony_ci if (!error_line) { 3527db96d56Sopenharmony_ci goto error; 3537db96d56Sopenharmony_ci } 3547db96d56Sopenharmony_ci } 3557db96d56Sopenharmony_ci 3567db96d56Sopenharmony_ci if (p->start_rule == Py_fstring_input) { 3577db96d56Sopenharmony_ci col_offset -= p->starting_col_offset; 3587db96d56Sopenharmony_ci end_col_offset -= p->starting_col_offset; 3597db96d56Sopenharmony_ci } 3607db96d56Sopenharmony_ci 3617db96d56Sopenharmony_ci Py_ssize_t col_number = col_offset; 3627db96d56Sopenharmony_ci Py_ssize_t end_col_number = end_col_offset; 3637db96d56Sopenharmony_ci 3647db96d56Sopenharmony_ci if (p->tok->encoding != NULL) { 3657db96d56Sopenharmony_ci col_number = _PyPegen_byte_offset_to_character_offset(error_line, col_offset); 3667db96d56Sopenharmony_ci if (col_number < 0) { 3677db96d56Sopenharmony_ci goto error; 3687db96d56Sopenharmony_ci } 3697db96d56Sopenharmony_ci if (end_col_number > 0) { 3707db96d56Sopenharmony_ci Py_ssize_t end_col_offset = _PyPegen_byte_offset_to_character_offset(error_line, end_col_number); 3717db96d56Sopenharmony_ci if (end_col_offset < 0) { 3727db96d56Sopenharmony_ci goto error; 3737db96d56Sopenharmony_ci } else { 3747db96d56Sopenharmony_ci end_col_number = end_col_offset; 3757db96d56Sopenharmony_ci } 3767db96d56Sopenharmony_ci } 3777db96d56Sopenharmony_ci } 3787db96d56Sopenharmony_ci tmp = Py_BuildValue("(OnnNnn)", p->tok->filename, lineno, col_number, error_line, end_lineno, end_col_number); 3797db96d56Sopenharmony_ci if (!tmp) { 3807db96d56Sopenharmony_ci goto error; 3817db96d56Sopenharmony_ci } 3827db96d56Sopenharmony_ci value = PyTuple_Pack(2, errstr, tmp); 3837db96d56Sopenharmony_ci Py_DECREF(tmp); 3847db96d56Sopenharmony_ci if (!value) { 3857db96d56Sopenharmony_ci goto error; 3867db96d56Sopenharmony_ci } 3877db96d56Sopenharmony_ci PyErr_SetObject(errtype, value); 3887db96d56Sopenharmony_ci 3897db96d56Sopenharmony_ci Py_DECREF(errstr); 3907db96d56Sopenharmony_ci Py_DECREF(value); 3917db96d56Sopenharmony_ci if (p->start_rule == Py_fstring_input) { 3927db96d56Sopenharmony_ci PyMem_Free((void *)errmsg); 3937db96d56Sopenharmony_ci } 3947db96d56Sopenharmony_ci return NULL; 3957db96d56Sopenharmony_ci 3967db96d56Sopenharmony_cierror: 3977db96d56Sopenharmony_ci Py_XDECREF(errstr); 3987db96d56Sopenharmony_ci Py_XDECREF(error_line); 3997db96d56Sopenharmony_ci if (p->start_rule == Py_fstring_input) { 4007db96d56Sopenharmony_ci PyMem_Free((void *)errmsg); 4017db96d56Sopenharmony_ci } 4027db96d56Sopenharmony_ci return NULL; 4037db96d56Sopenharmony_ci} 4047db96d56Sopenharmony_ci 4057db96d56Sopenharmony_civoid 4067db96d56Sopenharmony_ci_Pypegen_set_syntax_error(Parser* p, Token* last_token) { 4077db96d56Sopenharmony_ci // Existing sintax error 4087db96d56Sopenharmony_ci if (PyErr_Occurred()) { 4097db96d56Sopenharmony_ci // Prioritize tokenizer errors to custom syntax errors raised 4107db96d56Sopenharmony_ci // on the second phase only if the errors come from the parser. 4117db96d56Sopenharmony_ci int is_tok_ok = (p->tok->done == E_DONE || p->tok->done == E_OK); 4127db96d56Sopenharmony_ci if (is_tok_ok && PyErr_ExceptionMatches(PyExc_SyntaxError)) { 4137db96d56Sopenharmony_ci _PyPegen_tokenize_full_source_to_check_for_errors(p); 4147db96d56Sopenharmony_ci } 4157db96d56Sopenharmony_ci // Propagate the existing syntax error. 4167db96d56Sopenharmony_ci return; 4177db96d56Sopenharmony_ci } 4187db96d56Sopenharmony_ci // Initialization error 4197db96d56Sopenharmony_ci if (p->fill == 0) { 4207db96d56Sopenharmony_ci RAISE_SYNTAX_ERROR("error at start before reading any input"); 4217db96d56Sopenharmony_ci } 4227db96d56Sopenharmony_ci // Parser encountered EOF (End of File) unexpectedtly 4237db96d56Sopenharmony_ci if (last_token->type == ERRORTOKEN && p->tok->done == E_EOF) { 4247db96d56Sopenharmony_ci if (p->tok->level) { 4257db96d56Sopenharmony_ci raise_unclosed_parentheses_error(p); 4267db96d56Sopenharmony_ci } else { 4277db96d56Sopenharmony_ci RAISE_SYNTAX_ERROR("unexpected EOF while parsing"); 4287db96d56Sopenharmony_ci } 4297db96d56Sopenharmony_ci return; 4307db96d56Sopenharmony_ci } 4317db96d56Sopenharmony_ci // Indentation error in the tokenizer 4327db96d56Sopenharmony_ci if (last_token->type == INDENT || last_token->type == DEDENT) { 4337db96d56Sopenharmony_ci RAISE_INDENTATION_ERROR(last_token->type == INDENT ? "unexpected indent" : "unexpected unindent"); 4347db96d56Sopenharmony_ci return; 4357db96d56Sopenharmony_ci } 4367db96d56Sopenharmony_ci // Unknown error (generic case) 4377db96d56Sopenharmony_ci 4387db96d56Sopenharmony_ci // Use the last token we found on the first pass to avoid reporting 4397db96d56Sopenharmony_ci // incorrect locations for generic syntax errors just because we reached 4407db96d56Sopenharmony_ci // further away when trying to find specific syntax errors in the second 4417db96d56Sopenharmony_ci // pass. 4427db96d56Sopenharmony_ci RAISE_SYNTAX_ERROR_KNOWN_LOCATION(last_token, "invalid syntax"); 4437db96d56Sopenharmony_ci // _PyPegen_tokenize_full_source_to_check_for_errors will override the existing 4447db96d56Sopenharmony_ci // generic SyntaxError we just raised if errors are found. 4457db96d56Sopenharmony_ci _PyPegen_tokenize_full_source_to_check_for_errors(p); 4467db96d56Sopenharmony_ci} 447