17db96d56Sopenharmony_ci#include <Python.h>
27db96d56Sopenharmony_ci#include <errcode.h>
37db96d56Sopenharmony_ci
47db96d56Sopenharmony_ci#include "tokenizer.h"
57db96d56Sopenharmony_ci#include "pegen.h"
67db96d56Sopenharmony_ci
77db96d56Sopenharmony_ci// TOKENIZER ERRORS
87db96d56Sopenharmony_ci
97db96d56Sopenharmony_civoid
107db96d56Sopenharmony_ci_PyPegen_raise_tokenizer_init_error(PyObject *filename)
117db96d56Sopenharmony_ci{
127db96d56Sopenharmony_ci    if (!(PyErr_ExceptionMatches(PyExc_LookupError)
137db96d56Sopenharmony_ci          || PyErr_ExceptionMatches(PyExc_SyntaxError)
147db96d56Sopenharmony_ci          || PyErr_ExceptionMatches(PyExc_ValueError)
157db96d56Sopenharmony_ci          || PyErr_ExceptionMatches(PyExc_UnicodeDecodeError))) {
167db96d56Sopenharmony_ci        return;
177db96d56Sopenharmony_ci    }
187db96d56Sopenharmony_ci    PyObject *errstr = NULL;
197db96d56Sopenharmony_ci    PyObject *tuple = NULL;
207db96d56Sopenharmony_ci    PyObject *type;
217db96d56Sopenharmony_ci    PyObject *value;
227db96d56Sopenharmony_ci    PyObject *tback;
237db96d56Sopenharmony_ci    PyErr_Fetch(&type, &value, &tback);
247db96d56Sopenharmony_ci    errstr = PyObject_Str(value);
257db96d56Sopenharmony_ci    if (!errstr) {
267db96d56Sopenharmony_ci        goto error;
277db96d56Sopenharmony_ci    }
287db96d56Sopenharmony_ci
297db96d56Sopenharmony_ci    PyObject *tmp = Py_BuildValue("(OiiO)", filename, 0, -1, Py_None);
307db96d56Sopenharmony_ci    if (!tmp) {
317db96d56Sopenharmony_ci        goto error;
327db96d56Sopenharmony_ci    }
337db96d56Sopenharmony_ci
347db96d56Sopenharmony_ci    tuple = PyTuple_Pack(2, errstr, tmp);
357db96d56Sopenharmony_ci    Py_DECREF(tmp);
367db96d56Sopenharmony_ci    if (!value) {
377db96d56Sopenharmony_ci        goto error;
387db96d56Sopenharmony_ci    }
397db96d56Sopenharmony_ci    PyErr_SetObject(PyExc_SyntaxError, tuple);
407db96d56Sopenharmony_ci
417db96d56Sopenharmony_cierror:
427db96d56Sopenharmony_ci    Py_XDECREF(type);
437db96d56Sopenharmony_ci    Py_XDECREF(value);
447db96d56Sopenharmony_ci    Py_XDECREF(tback);
457db96d56Sopenharmony_ci    Py_XDECREF(errstr);
467db96d56Sopenharmony_ci    Py_XDECREF(tuple);
477db96d56Sopenharmony_ci}
487db96d56Sopenharmony_ci
497db96d56Sopenharmony_cistatic inline void
507db96d56Sopenharmony_ciraise_unclosed_parentheses_error(Parser *p) {
517db96d56Sopenharmony_ci       int error_lineno = p->tok->parenlinenostack[p->tok->level-1];
527db96d56Sopenharmony_ci       int error_col = p->tok->parencolstack[p->tok->level-1];
537db96d56Sopenharmony_ci       RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError,
547db96d56Sopenharmony_ci                                  error_lineno, error_col, error_lineno, -1,
557db96d56Sopenharmony_ci                                  "'%c' was never closed",
567db96d56Sopenharmony_ci                                  p->tok->parenstack[p->tok->level-1]);
577db96d56Sopenharmony_ci}
587db96d56Sopenharmony_ci
597db96d56Sopenharmony_ciint
607db96d56Sopenharmony_ci_Pypegen_tokenizer_error(Parser *p)
617db96d56Sopenharmony_ci{
627db96d56Sopenharmony_ci    if (PyErr_Occurred()) {
637db96d56Sopenharmony_ci        return -1;
647db96d56Sopenharmony_ci    }
657db96d56Sopenharmony_ci
667db96d56Sopenharmony_ci    const char *msg = NULL;
677db96d56Sopenharmony_ci    PyObject* errtype = PyExc_SyntaxError;
687db96d56Sopenharmony_ci    Py_ssize_t col_offset = -1;
697db96d56Sopenharmony_ci    switch (p->tok->done) {
707db96d56Sopenharmony_ci        case E_TOKEN:
717db96d56Sopenharmony_ci            msg = "invalid token";
727db96d56Sopenharmony_ci            break;
737db96d56Sopenharmony_ci        case E_EOF:
747db96d56Sopenharmony_ci            if (p->tok->level) {
757db96d56Sopenharmony_ci                raise_unclosed_parentheses_error(p);
767db96d56Sopenharmony_ci            } else {
777db96d56Sopenharmony_ci                RAISE_SYNTAX_ERROR("unexpected EOF while parsing");
787db96d56Sopenharmony_ci            }
797db96d56Sopenharmony_ci            return -1;
807db96d56Sopenharmony_ci        case E_DEDENT:
817db96d56Sopenharmony_ci            RAISE_INDENTATION_ERROR("unindent does not match any outer indentation level");
827db96d56Sopenharmony_ci            return -1;
837db96d56Sopenharmony_ci        case E_INTR:
847db96d56Sopenharmony_ci            if (!PyErr_Occurred()) {
857db96d56Sopenharmony_ci                PyErr_SetNone(PyExc_KeyboardInterrupt);
867db96d56Sopenharmony_ci            }
877db96d56Sopenharmony_ci            return -1;
887db96d56Sopenharmony_ci        case E_NOMEM:
897db96d56Sopenharmony_ci            PyErr_NoMemory();
907db96d56Sopenharmony_ci            return -1;
917db96d56Sopenharmony_ci        case E_TABSPACE:
927db96d56Sopenharmony_ci            errtype = PyExc_TabError;
937db96d56Sopenharmony_ci            msg = "inconsistent use of tabs and spaces in indentation";
947db96d56Sopenharmony_ci            break;
957db96d56Sopenharmony_ci        case E_TOODEEP:
967db96d56Sopenharmony_ci            errtype = PyExc_IndentationError;
977db96d56Sopenharmony_ci            msg = "too many levels of indentation";
987db96d56Sopenharmony_ci            break;
997db96d56Sopenharmony_ci        case E_LINECONT: {
1007db96d56Sopenharmony_ci            col_offset = p->tok->cur - p->tok->buf - 1;
1017db96d56Sopenharmony_ci            msg = "unexpected character after line continuation character";
1027db96d56Sopenharmony_ci            break;
1037db96d56Sopenharmony_ci        }
1047db96d56Sopenharmony_ci        default:
1057db96d56Sopenharmony_ci            msg = "unknown parsing error";
1067db96d56Sopenharmony_ci    }
1077db96d56Sopenharmony_ci
1087db96d56Sopenharmony_ci    RAISE_ERROR_KNOWN_LOCATION(p, errtype, p->tok->lineno,
1097db96d56Sopenharmony_ci                               col_offset >= 0 ? col_offset : 0,
1107db96d56Sopenharmony_ci                               p->tok->lineno, -1, msg);
1117db96d56Sopenharmony_ci    return -1;
1127db96d56Sopenharmony_ci}
1137db96d56Sopenharmony_ci
1147db96d56Sopenharmony_ciint
1157db96d56Sopenharmony_ci_Pypegen_raise_decode_error(Parser *p)
1167db96d56Sopenharmony_ci{
1177db96d56Sopenharmony_ci    assert(PyErr_Occurred());
1187db96d56Sopenharmony_ci    const char *errtype = NULL;
1197db96d56Sopenharmony_ci    if (PyErr_ExceptionMatches(PyExc_UnicodeError)) {
1207db96d56Sopenharmony_ci        errtype = "unicode error";
1217db96d56Sopenharmony_ci    }
1227db96d56Sopenharmony_ci    else if (PyErr_ExceptionMatches(PyExc_ValueError)) {
1237db96d56Sopenharmony_ci        errtype = "value error";
1247db96d56Sopenharmony_ci    }
1257db96d56Sopenharmony_ci    if (errtype) {
1267db96d56Sopenharmony_ci        PyObject *type;
1277db96d56Sopenharmony_ci        PyObject *value;
1287db96d56Sopenharmony_ci        PyObject *tback;
1297db96d56Sopenharmony_ci        PyObject *errstr;
1307db96d56Sopenharmony_ci        PyErr_Fetch(&type, &value, &tback);
1317db96d56Sopenharmony_ci        errstr = PyObject_Str(value);
1327db96d56Sopenharmony_ci        if (errstr) {
1337db96d56Sopenharmony_ci            RAISE_SYNTAX_ERROR("(%s) %U", errtype, errstr);
1347db96d56Sopenharmony_ci            Py_DECREF(errstr);
1357db96d56Sopenharmony_ci        }
1367db96d56Sopenharmony_ci        else {
1377db96d56Sopenharmony_ci            PyErr_Clear();
1387db96d56Sopenharmony_ci            RAISE_SYNTAX_ERROR("(%s) unknown error", errtype);
1397db96d56Sopenharmony_ci        }
1407db96d56Sopenharmony_ci        Py_XDECREF(type);
1417db96d56Sopenharmony_ci        Py_XDECREF(value);
1427db96d56Sopenharmony_ci        Py_XDECREF(tback);
1437db96d56Sopenharmony_ci    }
1447db96d56Sopenharmony_ci
1457db96d56Sopenharmony_ci    return -1;
1467db96d56Sopenharmony_ci}
1477db96d56Sopenharmony_ci
1487db96d56Sopenharmony_cistatic int
1497db96d56Sopenharmony_ci_PyPegen_tokenize_full_source_to_check_for_errors(Parser *p) {
1507db96d56Sopenharmony_ci    // Tokenize the whole input to see if there are any tokenization
1517db96d56Sopenharmony_ci    // errors such as mistmatching parentheses. These will get priority
1527db96d56Sopenharmony_ci    // over generic syntax errors only if the line number of the error is
1537db96d56Sopenharmony_ci    // before the one that we had for the generic error.
1547db96d56Sopenharmony_ci
1557db96d56Sopenharmony_ci    // We don't want to tokenize to the end for interactive input
1567db96d56Sopenharmony_ci    if (p->tok->prompt != NULL) {
1577db96d56Sopenharmony_ci        return 0;
1587db96d56Sopenharmony_ci    }
1597db96d56Sopenharmony_ci
1607db96d56Sopenharmony_ci    PyObject *type, *value, *traceback;
1617db96d56Sopenharmony_ci    PyErr_Fetch(&type, &value, &traceback);
1627db96d56Sopenharmony_ci
1637db96d56Sopenharmony_ci    Token *current_token = p->known_err_token != NULL ? p->known_err_token : p->tokens[p->fill - 1];
1647db96d56Sopenharmony_ci    Py_ssize_t current_err_line = current_token->lineno;
1657db96d56Sopenharmony_ci
1667db96d56Sopenharmony_ci    int ret = 0;
1677db96d56Sopenharmony_ci
1687db96d56Sopenharmony_ci    for (;;) {
1697db96d56Sopenharmony_ci        const char *start;
1707db96d56Sopenharmony_ci        const char *end;
1717db96d56Sopenharmony_ci        switch (_PyTokenizer_Get(p->tok, &start, &end)) {
1727db96d56Sopenharmony_ci            case ERRORTOKEN:
1737db96d56Sopenharmony_ci                if (PyErr_Occurred()) {
1747db96d56Sopenharmony_ci                    ret = -1;
1757db96d56Sopenharmony_ci                    goto exit;
1767db96d56Sopenharmony_ci                }
1777db96d56Sopenharmony_ci                if (p->tok->level != 0) {
1787db96d56Sopenharmony_ci                    int error_lineno = p->tok->parenlinenostack[p->tok->level-1];
1797db96d56Sopenharmony_ci                    if (current_err_line > error_lineno) {
1807db96d56Sopenharmony_ci                        raise_unclosed_parentheses_error(p);
1817db96d56Sopenharmony_ci                        ret = -1;
1827db96d56Sopenharmony_ci                        goto exit;
1837db96d56Sopenharmony_ci                    }
1847db96d56Sopenharmony_ci                }
1857db96d56Sopenharmony_ci                break;
1867db96d56Sopenharmony_ci            case ENDMARKER:
1877db96d56Sopenharmony_ci                break;
1887db96d56Sopenharmony_ci            default:
1897db96d56Sopenharmony_ci                continue;
1907db96d56Sopenharmony_ci        }
1917db96d56Sopenharmony_ci        break;
1927db96d56Sopenharmony_ci    }
1937db96d56Sopenharmony_ci
1947db96d56Sopenharmony_ci
1957db96d56Sopenharmony_ciexit:
1967db96d56Sopenharmony_ci    if (PyErr_Occurred()) {
1977db96d56Sopenharmony_ci        Py_XDECREF(value);
1987db96d56Sopenharmony_ci        Py_XDECREF(type);
1997db96d56Sopenharmony_ci        Py_XDECREF(traceback);
2007db96d56Sopenharmony_ci    } else {
2017db96d56Sopenharmony_ci        PyErr_Restore(type, value, traceback);
2027db96d56Sopenharmony_ci    }
2037db96d56Sopenharmony_ci    return ret;
2047db96d56Sopenharmony_ci}
2057db96d56Sopenharmony_ci
2067db96d56Sopenharmony_ci// PARSER ERRORS
2077db96d56Sopenharmony_ci
2087db96d56Sopenharmony_civoid *
2097db96d56Sopenharmony_ci_PyPegen_raise_error(Parser *p, PyObject *errtype, const char *errmsg, ...)
2107db96d56Sopenharmony_ci{
2117db96d56Sopenharmony_ci    if (p->fill == 0) {
2127db96d56Sopenharmony_ci        va_list va;
2137db96d56Sopenharmony_ci        va_start(va, errmsg);
2147db96d56Sopenharmony_ci        _PyPegen_raise_error_known_location(p, errtype, 0, 0, 0, -1, errmsg, va);
2157db96d56Sopenharmony_ci        va_end(va);
2167db96d56Sopenharmony_ci        return NULL;
2177db96d56Sopenharmony_ci    }
2187db96d56Sopenharmony_ci
2197db96d56Sopenharmony_ci    Token *t = p->known_err_token != NULL ? p->known_err_token : p->tokens[p->fill - 1];
2207db96d56Sopenharmony_ci    Py_ssize_t col_offset;
2217db96d56Sopenharmony_ci    Py_ssize_t end_col_offset = -1;
2227db96d56Sopenharmony_ci    if (t->col_offset == -1) {
2237db96d56Sopenharmony_ci        if (p->tok->cur == p->tok->buf) {
2247db96d56Sopenharmony_ci            col_offset = 0;
2257db96d56Sopenharmony_ci        } else {
2267db96d56Sopenharmony_ci            const char* start = p->tok->buf  ? p->tok->line_start : p->tok->buf;
2277db96d56Sopenharmony_ci            col_offset = Py_SAFE_DOWNCAST(p->tok->cur - start, intptr_t, int);
2287db96d56Sopenharmony_ci        }
2297db96d56Sopenharmony_ci    } else {
2307db96d56Sopenharmony_ci        col_offset = t->col_offset + 1;
2317db96d56Sopenharmony_ci    }
2327db96d56Sopenharmony_ci
2337db96d56Sopenharmony_ci    if (t->end_col_offset != -1) {
2347db96d56Sopenharmony_ci        end_col_offset = t->end_col_offset + 1;
2357db96d56Sopenharmony_ci    }
2367db96d56Sopenharmony_ci
2377db96d56Sopenharmony_ci    va_list va;
2387db96d56Sopenharmony_ci    va_start(va, errmsg);
2397db96d56Sopenharmony_ci    _PyPegen_raise_error_known_location(p, errtype, t->lineno, col_offset, t->end_lineno, end_col_offset, errmsg, va);
2407db96d56Sopenharmony_ci    va_end(va);
2417db96d56Sopenharmony_ci
2427db96d56Sopenharmony_ci    return NULL;
2437db96d56Sopenharmony_ci}
2447db96d56Sopenharmony_ci
2457db96d56Sopenharmony_cistatic PyObject *
2467db96d56Sopenharmony_ciget_error_line_from_tokenizer_buffers(Parser *p, Py_ssize_t lineno)
2477db96d56Sopenharmony_ci{
2487db96d56Sopenharmony_ci    /* If the file descriptor is interactive, the source lines of the current
2497db96d56Sopenharmony_ci     * (multi-line) statement are stored in p->tok->interactive_src_start.
2507db96d56Sopenharmony_ci     * If not, we're parsing from a string, which means that the whole source
2517db96d56Sopenharmony_ci     * is stored in p->tok->str. */
2527db96d56Sopenharmony_ci    assert((p->tok->fp == NULL && p->tok->str != NULL) || p->tok->fp == stdin);
2537db96d56Sopenharmony_ci
2547db96d56Sopenharmony_ci    char *cur_line = p->tok->fp_interactive ? p->tok->interactive_src_start : p->tok->str;
2557db96d56Sopenharmony_ci    if (cur_line == NULL) {
2567db96d56Sopenharmony_ci        assert(p->tok->fp_interactive);
2577db96d56Sopenharmony_ci        // We can reach this point if the tokenizer buffers for interactive source have not been
2587db96d56Sopenharmony_ci        // initialized because we failed to decode the original source with the given locale.
2597db96d56Sopenharmony_ci        return PyUnicode_FromStringAndSize("", 0);
2607db96d56Sopenharmony_ci    }
2617db96d56Sopenharmony_ci
2627db96d56Sopenharmony_ci    Py_ssize_t relative_lineno = p->starting_lineno ? lineno - p->starting_lineno + 1 : lineno;
2637db96d56Sopenharmony_ci    const char* buf_end = p->tok->fp_interactive ? p->tok->interactive_src_end : p->tok->inp;
2647db96d56Sopenharmony_ci
2657db96d56Sopenharmony_ci    for (int i = 0; i < relative_lineno - 1; i++) {
2667db96d56Sopenharmony_ci        char *new_line = strchr(cur_line, '\n');
2677db96d56Sopenharmony_ci        // The assert is here for debug builds but the conditional that
2687db96d56Sopenharmony_ci        // follows is there so in release builds we do not crash at the cost
2697db96d56Sopenharmony_ci        // to report a potentially wrong line.
2707db96d56Sopenharmony_ci        assert(new_line != NULL && new_line + 1 < buf_end);
2717db96d56Sopenharmony_ci        if (new_line == NULL || new_line + 1 > buf_end) {
2727db96d56Sopenharmony_ci            break;
2737db96d56Sopenharmony_ci        }
2747db96d56Sopenharmony_ci        cur_line = new_line + 1;
2757db96d56Sopenharmony_ci    }
2767db96d56Sopenharmony_ci
2777db96d56Sopenharmony_ci    char *next_newline;
2787db96d56Sopenharmony_ci    if ((next_newline = strchr(cur_line, '\n')) == NULL) { // This is the last line
2797db96d56Sopenharmony_ci        next_newline = cur_line + strlen(cur_line);
2807db96d56Sopenharmony_ci    }
2817db96d56Sopenharmony_ci    return PyUnicode_DecodeUTF8(cur_line, next_newline - cur_line, "replace");
2827db96d56Sopenharmony_ci}
2837db96d56Sopenharmony_ci
2847db96d56Sopenharmony_civoid *
2857db96d56Sopenharmony_ci_PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
2867db96d56Sopenharmony_ci                                    Py_ssize_t lineno, Py_ssize_t col_offset,
2877db96d56Sopenharmony_ci                                    Py_ssize_t end_lineno, Py_ssize_t end_col_offset,
2887db96d56Sopenharmony_ci                                    const char *errmsg, va_list va)
2897db96d56Sopenharmony_ci{
2907db96d56Sopenharmony_ci    PyObject *value = NULL;
2917db96d56Sopenharmony_ci    PyObject *errstr = NULL;
2927db96d56Sopenharmony_ci    PyObject *error_line = NULL;
2937db96d56Sopenharmony_ci    PyObject *tmp = NULL;
2947db96d56Sopenharmony_ci    p->error_indicator = 1;
2957db96d56Sopenharmony_ci
2967db96d56Sopenharmony_ci    if (end_lineno == CURRENT_POS) {
2977db96d56Sopenharmony_ci        end_lineno = p->tok->lineno;
2987db96d56Sopenharmony_ci    }
2997db96d56Sopenharmony_ci    if (end_col_offset == CURRENT_POS) {
3007db96d56Sopenharmony_ci        end_col_offset = p->tok->cur - p->tok->line_start;
3017db96d56Sopenharmony_ci    }
3027db96d56Sopenharmony_ci
3037db96d56Sopenharmony_ci    if (p->start_rule == Py_fstring_input) {
3047db96d56Sopenharmony_ci        const char *fstring_msg = "f-string: ";
3057db96d56Sopenharmony_ci        Py_ssize_t len = strlen(fstring_msg) + strlen(errmsg);
3067db96d56Sopenharmony_ci
3077db96d56Sopenharmony_ci        char *new_errmsg = PyMem_Malloc(len + 1); // Lengths of both strings plus NULL character
3087db96d56Sopenharmony_ci        if (!new_errmsg) {
3097db96d56Sopenharmony_ci            return (void *) PyErr_NoMemory();
3107db96d56Sopenharmony_ci        }
3117db96d56Sopenharmony_ci
3127db96d56Sopenharmony_ci        // Copy both strings into new buffer
3137db96d56Sopenharmony_ci        memcpy(new_errmsg, fstring_msg, strlen(fstring_msg));
3147db96d56Sopenharmony_ci        memcpy(new_errmsg + strlen(fstring_msg), errmsg, strlen(errmsg));
3157db96d56Sopenharmony_ci        new_errmsg[len] = 0;
3167db96d56Sopenharmony_ci        errmsg = new_errmsg;
3177db96d56Sopenharmony_ci    }
3187db96d56Sopenharmony_ci    errstr = PyUnicode_FromFormatV(errmsg, va);
3197db96d56Sopenharmony_ci    if (!errstr) {
3207db96d56Sopenharmony_ci        goto error;
3217db96d56Sopenharmony_ci    }
3227db96d56Sopenharmony_ci
3237db96d56Sopenharmony_ci    if (p->tok->fp_interactive && p->tok->interactive_src_start != NULL) {
3247db96d56Sopenharmony_ci        error_line = get_error_line_from_tokenizer_buffers(p, lineno);
3257db96d56Sopenharmony_ci    }
3267db96d56Sopenharmony_ci    else if (p->start_rule == Py_file_input) {
3277db96d56Sopenharmony_ci        error_line = _PyErr_ProgramDecodedTextObject(p->tok->filename,
3287db96d56Sopenharmony_ci                                                     (int) lineno, p->tok->encoding);
3297db96d56Sopenharmony_ci    }
3307db96d56Sopenharmony_ci
3317db96d56Sopenharmony_ci    if (!error_line) {
3327db96d56Sopenharmony_ci        /* PyErr_ProgramTextObject was not called or returned NULL. If it was not called,
3337db96d56Sopenharmony_ci           then we need to find the error line from some other source, because
3347db96d56Sopenharmony_ci           p->start_rule != Py_file_input. If it returned NULL, then it either unexpectedly
3357db96d56Sopenharmony_ci           failed or we're parsing from a string or the REPL. There's a third edge case where
3367db96d56Sopenharmony_ci           we're actually parsing from a file, which has an E_EOF SyntaxError and in that case
3377db96d56Sopenharmony_ci           `PyErr_ProgramTextObject` fails because lineno points to last_file_line + 1, which
3387db96d56Sopenharmony_ci           does not physically exist */
3397db96d56Sopenharmony_ci        assert(p->tok->fp == NULL || p->tok->fp == stdin || p->tok->done == E_EOF);
3407db96d56Sopenharmony_ci
3417db96d56Sopenharmony_ci        if (p->tok->lineno <= lineno && p->tok->inp > p->tok->buf) {
3427db96d56Sopenharmony_ci            Py_ssize_t size = p->tok->inp - p->tok->buf;
3437db96d56Sopenharmony_ci            error_line = PyUnicode_DecodeUTF8(p->tok->buf, size, "replace");
3447db96d56Sopenharmony_ci        }
3457db96d56Sopenharmony_ci        else if (p->tok->fp == NULL || p->tok->fp == stdin) {
3467db96d56Sopenharmony_ci            error_line = get_error_line_from_tokenizer_buffers(p, lineno);
3477db96d56Sopenharmony_ci        }
3487db96d56Sopenharmony_ci        else {
3497db96d56Sopenharmony_ci            error_line = PyUnicode_FromStringAndSize("", 0);
3507db96d56Sopenharmony_ci        }
3517db96d56Sopenharmony_ci        if (!error_line) {
3527db96d56Sopenharmony_ci            goto error;
3537db96d56Sopenharmony_ci        }
3547db96d56Sopenharmony_ci    }
3557db96d56Sopenharmony_ci
3567db96d56Sopenharmony_ci    if (p->start_rule == Py_fstring_input) {
3577db96d56Sopenharmony_ci        col_offset -= p->starting_col_offset;
3587db96d56Sopenharmony_ci        end_col_offset -= p->starting_col_offset;
3597db96d56Sopenharmony_ci    }
3607db96d56Sopenharmony_ci
3617db96d56Sopenharmony_ci    Py_ssize_t col_number = col_offset;
3627db96d56Sopenharmony_ci    Py_ssize_t end_col_number = end_col_offset;
3637db96d56Sopenharmony_ci
3647db96d56Sopenharmony_ci    if (p->tok->encoding != NULL) {
3657db96d56Sopenharmony_ci        col_number = _PyPegen_byte_offset_to_character_offset(error_line, col_offset);
3667db96d56Sopenharmony_ci        if (col_number < 0) {
3677db96d56Sopenharmony_ci            goto error;
3687db96d56Sopenharmony_ci        }
3697db96d56Sopenharmony_ci        if (end_col_number > 0) {
3707db96d56Sopenharmony_ci            Py_ssize_t end_col_offset = _PyPegen_byte_offset_to_character_offset(error_line, end_col_number);
3717db96d56Sopenharmony_ci            if (end_col_offset < 0) {
3727db96d56Sopenharmony_ci                goto error;
3737db96d56Sopenharmony_ci            } else {
3747db96d56Sopenharmony_ci                end_col_number = end_col_offset;
3757db96d56Sopenharmony_ci            }
3767db96d56Sopenharmony_ci        }
3777db96d56Sopenharmony_ci    }
3787db96d56Sopenharmony_ci    tmp = Py_BuildValue("(OnnNnn)", p->tok->filename, lineno, col_number, error_line, end_lineno, end_col_number);
3797db96d56Sopenharmony_ci    if (!tmp) {
3807db96d56Sopenharmony_ci        goto error;
3817db96d56Sopenharmony_ci    }
3827db96d56Sopenharmony_ci    value = PyTuple_Pack(2, errstr, tmp);
3837db96d56Sopenharmony_ci    Py_DECREF(tmp);
3847db96d56Sopenharmony_ci    if (!value) {
3857db96d56Sopenharmony_ci        goto error;
3867db96d56Sopenharmony_ci    }
3877db96d56Sopenharmony_ci    PyErr_SetObject(errtype, value);
3887db96d56Sopenharmony_ci
3897db96d56Sopenharmony_ci    Py_DECREF(errstr);
3907db96d56Sopenharmony_ci    Py_DECREF(value);
3917db96d56Sopenharmony_ci    if (p->start_rule == Py_fstring_input) {
3927db96d56Sopenharmony_ci        PyMem_Free((void *)errmsg);
3937db96d56Sopenharmony_ci    }
3947db96d56Sopenharmony_ci    return NULL;
3957db96d56Sopenharmony_ci
3967db96d56Sopenharmony_cierror:
3977db96d56Sopenharmony_ci    Py_XDECREF(errstr);
3987db96d56Sopenharmony_ci    Py_XDECREF(error_line);
3997db96d56Sopenharmony_ci    if (p->start_rule == Py_fstring_input) {
4007db96d56Sopenharmony_ci        PyMem_Free((void *)errmsg);
4017db96d56Sopenharmony_ci    }
4027db96d56Sopenharmony_ci    return NULL;
4037db96d56Sopenharmony_ci}
4047db96d56Sopenharmony_ci
4057db96d56Sopenharmony_civoid
4067db96d56Sopenharmony_ci_Pypegen_set_syntax_error(Parser* p, Token* last_token) {
4077db96d56Sopenharmony_ci    // Existing sintax error
4087db96d56Sopenharmony_ci    if (PyErr_Occurred()) {
4097db96d56Sopenharmony_ci        // Prioritize tokenizer errors to custom syntax errors raised
4107db96d56Sopenharmony_ci        // on the second phase only if the errors come from the parser.
4117db96d56Sopenharmony_ci        int is_tok_ok = (p->tok->done == E_DONE || p->tok->done == E_OK);
4127db96d56Sopenharmony_ci        if (is_tok_ok && PyErr_ExceptionMatches(PyExc_SyntaxError)) {
4137db96d56Sopenharmony_ci            _PyPegen_tokenize_full_source_to_check_for_errors(p);
4147db96d56Sopenharmony_ci        }
4157db96d56Sopenharmony_ci        // Propagate the existing syntax error.
4167db96d56Sopenharmony_ci        return;
4177db96d56Sopenharmony_ci    }
4187db96d56Sopenharmony_ci    // Initialization error
4197db96d56Sopenharmony_ci    if (p->fill == 0) {
4207db96d56Sopenharmony_ci        RAISE_SYNTAX_ERROR("error at start before reading any input");
4217db96d56Sopenharmony_ci    }
4227db96d56Sopenharmony_ci    // Parser encountered EOF (End of File) unexpectedtly
4237db96d56Sopenharmony_ci    if (last_token->type == ERRORTOKEN && p->tok->done == E_EOF) {
4247db96d56Sopenharmony_ci        if (p->tok->level) {
4257db96d56Sopenharmony_ci            raise_unclosed_parentheses_error(p);
4267db96d56Sopenharmony_ci        } else {
4277db96d56Sopenharmony_ci            RAISE_SYNTAX_ERROR("unexpected EOF while parsing");
4287db96d56Sopenharmony_ci        }
4297db96d56Sopenharmony_ci        return;
4307db96d56Sopenharmony_ci    }
4317db96d56Sopenharmony_ci    // Indentation error in the tokenizer
4327db96d56Sopenharmony_ci    if (last_token->type == INDENT || last_token->type == DEDENT) {
4337db96d56Sopenharmony_ci        RAISE_INDENTATION_ERROR(last_token->type == INDENT ? "unexpected indent" : "unexpected unindent");
4347db96d56Sopenharmony_ci        return;
4357db96d56Sopenharmony_ci    }
4367db96d56Sopenharmony_ci    // Unknown error (generic case)
4377db96d56Sopenharmony_ci
4387db96d56Sopenharmony_ci    // Use the last token we found on the first pass to avoid reporting
4397db96d56Sopenharmony_ci    // incorrect locations for generic syntax errors just because we reached
4407db96d56Sopenharmony_ci    // further away when trying to find specific syntax errors in the second
4417db96d56Sopenharmony_ci    // pass.
4427db96d56Sopenharmony_ci    RAISE_SYNTAX_ERROR_KNOWN_LOCATION(last_token, "invalid syntax");
4437db96d56Sopenharmony_ci    // _PyPegen_tokenize_full_source_to_check_for_errors will override the existing
4447db96d56Sopenharmony_ci    // generic SyntaxError we just raised if errors are found.
4457db96d56Sopenharmony_ci    _PyPegen_tokenize_full_source_to_check_for_errors(p);
4467db96d56Sopenharmony_ci}
447