17db96d56Sopenharmony_ci#include <stdbool.h>
27db96d56Sopenharmony_ci
37db96d56Sopenharmony_ci#include <Python.h>
47db96d56Sopenharmony_ci
57db96d56Sopenharmony_ci#include "tokenizer.h"
67db96d56Sopenharmony_ci#include "pegen.h"
77db96d56Sopenharmony_ci#include "string_parser.h"
87db96d56Sopenharmony_ci
97db96d56Sopenharmony_ci//// STRING HANDLING FUNCTIONS ////
107db96d56Sopenharmony_ci
117db96d56Sopenharmony_cistatic int
127db96d56Sopenharmony_ciwarn_invalid_escape_sequence(Parser *p, const char *first_invalid_escape, Token *t)
137db96d56Sopenharmony_ci{
147db96d56Sopenharmony_ci    unsigned char c = *first_invalid_escape;
157db96d56Sopenharmony_ci    int octal = ('4' <= c && c <= '7');
167db96d56Sopenharmony_ci    PyObject *msg =
177db96d56Sopenharmony_ci        octal
187db96d56Sopenharmony_ci        ? PyUnicode_FromFormat("invalid octal escape sequence '\\%.3s'",
197db96d56Sopenharmony_ci                               first_invalid_escape)
207db96d56Sopenharmony_ci        : PyUnicode_FromFormat("invalid escape sequence '\\%c'", c);
217db96d56Sopenharmony_ci    if (msg == NULL) {
227db96d56Sopenharmony_ci        return -1;
237db96d56Sopenharmony_ci    }
247db96d56Sopenharmony_ci    if (PyErr_WarnExplicitObject(PyExc_DeprecationWarning, msg, p->tok->filename,
257db96d56Sopenharmony_ci                                 t->lineno, NULL, NULL) < 0) {
267db96d56Sopenharmony_ci        if (PyErr_ExceptionMatches(PyExc_DeprecationWarning)) {
277db96d56Sopenharmony_ci            /* Replace the DeprecationWarning exception with a SyntaxError
287db96d56Sopenharmony_ci               to get a more accurate error report */
297db96d56Sopenharmony_ci            PyErr_Clear();
307db96d56Sopenharmony_ci
317db96d56Sopenharmony_ci            /* This is needed, in order for the SyntaxError to point to the token t,
327db96d56Sopenharmony_ci               since _PyPegen_raise_error uses p->tokens[p->fill - 1] for the
337db96d56Sopenharmony_ci               error location, if p->known_err_token is not set. */
347db96d56Sopenharmony_ci            p->known_err_token = t;
357db96d56Sopenharmony_ci            if (octal) {
367db96d56Sopenharmony_ci                RAISE_SYNTAX_ERROR("invalid octal escape sequence '\\%.3s'",
377db96d56Sopenharmony_ci                                   first_invalid_escape);
387db96d56Sopenharmony_ci            }
397db96d56Sopenharmony_ci            else {
407db96d56Sopenharmony_ci                RAISE_SYNTAX_ERROR("invalid escape sequence '\\%c'", c);
417db96d56Sopenharmony_ci            }
427db96d56Sopenharmony_ci        }
437db96d56Sopenharmony_ci        Py_DECREF(msg);
447db96d56Sopenharmony_ci        return -1;
457db96d56Sopenharmony_ci    }
467db96d56Sopenharmony_ci    Py_DECREF(msg);
477db96d56Sopenharmony_ci    return 0;
487db96d56Sopenharmony_ci}
497db96d56Sopenharmony_ci
507db96d56Sopenharmony_cistatic PyObject *
517db96d56Sopenharmony_cidecode_utf8(const char **sPtr, const char *end)
527db96d56Sopenharmony_ci{
537db96d56Sopenharmony_ci    const char *s;
547db96d56Sopenharmony_ci    const char *t;
557db96d56Sopenharmony_ci    t = s = *sPtr;
567db96d56Sopenharmony_ci    while (s < end && (*s & 0x80)) {
577db96d56Sopenharmony_ci        s++;
587db96d56Sopenharmony_ci    }
597db96d56Sopenharmony_ci    *sPtr = s;
607db96d56Sopenharmony_ci    return PyUnicode_DecodeUTF8(t, s - t, NULL);
617db96d56Sopenharmony_ci}
627db96d56Sopenharmony_ci
637db96d56Sopenharmony_cistatic PyObject *
647db96d56Sopenharmony_cidecode_unicode_with_escapes(Parser *parser, const char *s, size_t len, Token *t)
657db96d56Sopenharmony_ci{
667db96d56Sopenharmony_ci    PyObject *v;
677db96d56Sopenharmony_ci    PyObject *u;
687db96d56Sopenharmony_ci    char *buf;
697db96d56Sopenharmony_ci    char *p;
707db96d56Sopenharmony_ci    const char *end;
717db96d56Sopenharmony_ci
727db96d56Sopenharmony_ci    /* check for integer overflow */
737db96d56Sopenharmony_ci    if (len > SIZE_MAX / 6) {
747db96d56Sopenharmony_ci        return NULL;
757db96d56Sopenharmony_ci    }
767db96d56Sopenharmony_ci    /* "ä" (2 bytes) may become "\U000000E4" (10 bytes), or 1:5
777db96d56Sopenharmony_ci       "\ä" (3 bytes) may become "\u005c\U000000E4" (16 bytes), or ~1:6 */
787db96d56Sopenharmony_ci    u = PyBytes_FromStringAndSize((char *)NULL, len * 6);
797db96d56Sopenharmony_ci    if (u == NULL) {
807db96d56Sopenharmony_ci        return NULL;
817db96d56Sopenharmony_ci    }
827db96d56Sopenharmony_ci    p = buf = PyBytes_AsString(u);
837db96d56Sopenharmony_ci    if (p == NULL) {
847db96d56Sopenharmony_ci        return NULL;
857db96d56Sopenharmony_ci    }
867db96d56Sopenharmony_ci    end = s + len;
877db96d56Sopenharmony_ci    while (s < end) {
887db96d56Sopenharmony_ci        if (*s == '\\') {
897db96d56Sopenharmony_ci            *p++ = *s++;
907db96d56Sopenharmony_ci            if (s >= end || *s & 0x80) {
917db96d56Sopenharmony_ci                strcpy(p, "u005c");
927db96d56Sopenharmony_ci                p += 5;
937db96d56Sopenharmony_ci                if (s >= end) {
947db96d56Sopenharmony_ci                    break;
957db96d56Sopenharmony_ci                }
967db96d56Sopenharmony_ci            }
977db96d56Sopenharmony_ci        }
987db96d56Sopenharmony_ci        if (*s & 0x80) {
997db96d56Sopenharmony_ci            PyObject *w;
1007db96d56Sopenharmony_ci            int kind;
1017db96d56Sopenharmony_ci            const void *data;
1027db96d56Sopenharmony_ci            Py_ssize_t w_len;
1037db96d56Sopenharmony_ci            Py_ssize_t i;
1047db96d56Sopenharmony_ci            w = decode_utf8(&s, end);
1057db96d56Sopenharmony_ci            if (w == NULL) {
1067db96d56Sopenharmony_ci                Py_DECREF(u);
1077db96d56Sopenharmony_ci                return NULL;
1087db96d56Sopenharmony_ci            }
1097db96d56Sopenharmony_ci            kind = PyUnicode_KIND(w);
1107db96d56Sopenharmony_ci            data = PyUnicode_DATA(w);
1117db96d56Sopenharmony_ci            w_len = PyUnicode_GET_LENGTH(w);
1127db96d56Sopenharmony_ci            for (i = 0; i < w_len; i++) {
1137db96d56Sopenharmony_ci                Py_UCS4 chr = PyUnicode_READ(kind, data, i);
1147db96d56Sopenharmony_ci                sprintf(p, "\\U%08x", chr);
1157db96d56Sopenharmony_ci                p += 10;
1167db96d56Sopenharmony_ci            }
1177db96d56Sopenharmony_ci            /* Should be impossible to overflow */
1187db96d56Sopenharmony_ci            assert(p - buf <= PyBytes_GET_SIZE(u));
1197db96d56Sopenharmony_ci            Py_DECREF(w);
1207db96d56Sopenharmony_ci        }
1217db96d56Sopenharmony_ci        else {
1227db96d56Sopenharmony_ci            *p++ = *s++;
1237db96d56Sopenharmony_ci        }
1247db96d56Sopenharmony_ci    }
1257db96d56Sopenharmony_ci    len = p - buf;
1267db96d56Sopenharmony_ci    s = buf;
1277db96d56Sopenharmony_ci
1287db96d56Sopenharmony_ci    const char *first_invalid_escape;
1297db96d56Sopenharmony_ci    v = _PyUnicode_DecodeUnicodeEscapeInternal(s, len, NULL, NULL, &first_invalid_escape);
1307db96d56Sopenharmony_ci
1317db96d56Sopenharmony_ci    if (v != NULL && first_invalid_escape != NULL) {
1327db96d56Sopenharmony_ci        if (warn_invalid_escape_sequence(parser, first_invalid_escape, t) < 0) {
1337db96d56Sopenharmony_ci            /* We have not decref u before because first_invalid_escape points
1347db96d56Sopenharmony_ci               inside u. */
1357db96d56Sopenharmony_ci            Py_XDECREF(u);
1367db96d56Sopenharmony_ci            Py_DECREF(v);
1377db96d56Sopenharmony_ci            return NULL;
1387db96d56Sopenharmony_ci        }
1397db96d56Sopenharmony_ci    }
1407db96d56Sopenharmony_ci    Py_XDECREF(u);
1417db96d56Sopenharmony_ci    return v;
1427db96d56Sopenharmony_ci}
1437db96d56Sopenharmony_ci
1447db96d56Sopenharmony_cistatic PyObject *
1457db96d56Sopenharmony_cidecode_bytes_with_escapes(Parser *p, const char *s, Py_ssize_t len, Token *t)
1467db96d56Sopenharmony_ci{
1477db96d56Sopenharmony_ci    const char *first_invalid_escape;
1487db96d56Sopenharmony_ci    PyObject *result = _PyBytes_DecodeEscape(s, len, NULL, &first_invalid_escape);
1497db96d56Sopenharmony_ci    if (result == NULL) {
1507db96d56Sopenharmony_ci        return NULL;
1517db96d56Sopenharmony_ci    }
1527db96d56Sopenharmony_ci
1537db96d56Sopenharmony_ci    if (first_invalid_escape != NULL) {
1547db96d56Sopenharmony_ci        if (warn_invalid_escape_sequence(p, first_invalid_escape, t) < 0) {
1557db96d56Sopenharmony_ci            Py_DECREF(result);
1567db96d56Sopenharmony_ci            return NULL;
1577db96d56Sopenharmony_ci        }
1587db96d56Sopenharmony_ci    }
1597db96d56Sopenharmony_ci    return result;
1607db96d56Sopenharmony_ci}
1617db96d56Sopenharmony_ci
1627db96d56Sopenharmony_ci/* s must include the bracketing quote characters, and r, b, u,
1637db96d56Sopenharmony_ci   &/or f prefixes (if any), and embedded escape sequences (if any).
1647db96d56Sopenharmony_ci   _PyPegen_parsestr parses it, and sets *result to decoded Python string object.
1657db96d56Sopenharmony_ci   If the string is an f-string, set *fstr and *fstrlen to the unparsed
1667db96d56Sopenharmony_ci   string object.  Return 0 if no errors occurred.  */
1677db96d56Sopenharmony_ciint
1687db96d56Sopenharmony_ci_PyPegen_parsestr(Parser *p, int *bytesmode, int *rawmode, PyObject **result,
1697db96d56Sopenharmony_ci                  const char **fstr, Py_ssize_t *fstrlen, Token *t)
1707db96d56Sopenharmony_ci{
1717db96d56Sopenharmony_ci    const char *s = PyBytes_AsString(t->bytes);
1727db96d56Sopenharmony_ci    if (s == NULL) {
1737db96d56Sopenharmony_ci        return -1;
1747db96d56Sopenharmony_ci    }
1757db96d56Sopenharmony_ci
1767db96d56Sopenharmony_ci    size_t len;
1777db96d56Sopenharmony_ci    int quote = Py_CHARMASK(*s);
1787db96d56Sopenharmony_ci    int fmode = 0;
1797db96d56Sopenharmony_ci    *bytesmode = 0;
1807db96d56Sopenharmony_ci    *rawmode = 0;
1817db96d56Sopenharmony_ci    *result = NULL;
1827db96d56Sopenharmony_ci    *fstr = NULL;
1837db96d56Sopenharmony_ci    if (Py_ISALPHA(quote)) {
1847db96d56Sopenharmony_ci        while (!*bytesmode || !*rawmode) {
1857db96d56Sopenharmony_ci            if (quote == 'b' || quote == 'B') {
1867db96d56Sopenharmony_ci                quote =(unsigned char)*++s;
1877db96d56Sopenharmony_ci                *bytesmode = 1;
1887db96d56Sopenharmony_ci            }
1897db96d56Sopenharmony_ci            else if (quote == 'u' || quote == 'U') {
1907db96d56Sopenharmony_ci                quote = (unsigned char)*++s;
1917db96d56Sopenharmony_ci            }
1927db96d56Sopenharmony_ci            else if (quote == 'r' || quote == 'R') {
1937db96d56Sopenharmony_ci                quote = (unsigned char)*++s;
1947db96d56Sopenharmony_ci                *rawmode = 1;
1957db96d56Sopenharmony_ci            }
1967db96d56Sopenharmony_ci            else if (quote == 'f' || quote == 'F') {
1977db96d56Sopenharmony_ci                quote = (unsigned char)*++s;
1987db96d56Sopenharmony_ci                fmode = 1;
1997db96d56Sopenharmony_ci            }
2007db96d56Sopenharmony_ci            else {
2017db96d56Sopenharmony_ci                break;
2027db96d56Sopenharmony_ci            }
2037db96d56Sopenharmony_ci        }
2047db96d56Sopenharmony_ci    }
2057db96d56Sopenharmony_ci
2067db96d56Sopenharmony_ci    /* fstrings are only allowed in Python 3.6 and greater */
2077db96d56Sopenharmony_ci    if (fmode && p->feature_version < 6) {
2087db96d56Sopenharmony_ci        p->error_indicator = 1;
2097db96d56Sopenharmony_ci        RAISE_SYNTAX_ERROR("Format strings are only supported in Python 3.6 and greater");
2107db96d56Sopenharmony_ci        return -1;
2117db96d56Sopenharmony_ci    }
2127db96d56Sopenharmony_ci
2137db96d56Sopenharmony_ci    if (fmode && *bytesmode) {
2147db96d56Sopenharmony_ci        PyErr_BadInternalCall();
2157db96d56Sopenharmony_ci        return -1;
2167db96d56Sopenharmony_ci    }
2177db96d56Sopenharmony_ci    if (quote != '\'' && quote != '\"') {
2187db96d56Sopenharmony_ci        PyErr_BadInternalCall();
2197db96d56Sopenharmony_ci        return -1;
2207db96d56Sopenharmony_ci    }
2217db96d56Sopenharmony_ci    /* Skip the leading quote char. */
2227db96d56Sopenharmony_ci    s++;
2237db96d56Sopenharmony_ci    len = strlen(s);
2247db96d56Sopenharmony_ci    if (len > INT_MAX) {
2257db96d56Sopenharmony_ci        PyErr_SetString(PyExc_OverflowError, "string to parse is too long");
2267db96d56Sopenharmony_ci        return -1;
2277db96d56Sopenharmony_ci    }
2287db96d56Sopenharmony_ci    if (s[--len] != quote) {
2297db96d56Sopenharmony_ci        /* Last quote char must match the first. */
2307db96d56Sopenharmony_ci        PyErr_BadInternalCall();
2317db96d56Sopenharmony_ci        return -1;
2327db96d56Sopenharmony_ci    }
2337db96d56Sopenharmony_ci    if (len >= 4 && s[0] == quote && s[1] == quote) {
2347db96d56Sopenharmony_ci        /* A triple quoted string. We've already skipped one quote at
2357db96d56Sopenharmony_ci           the start and one at the end of the string. Now skip the
2367db96d56Sopenharmony_ci           two at the start. */
2377db96d56Sopenharmony_ci        s += 2;
2387db96d56Sopenharmony_ci        len -= 2;
2397db96d56Sopenharmony_ci        /* And check that the last two match. */
2407db96d56Sopenharmony_ci        if (s[--len] != quote || s[--len] != quote) {
2417db96d56Sopenharmony_ci            PyErr_BadInternalCall();
2427db96d56Sopenharmony_ci            return -1;
2437db96d56Sopenharmony_ci        }
2447db96d56Sopenharmony_ci    }
2457db96d56Sopenharmony_ci
2467db96d56Sopenharmony_ci    if (fmode) {
2477db96d56Sopenharmony_ci        /* Just return the bytes. The caller will parse the resulting
2487db96d56Sopenharmony_ci           string. */
2497db96d56Sopenharmony_ci        *fstr = s;
2507db96d56Sopenharmony_ci        *fstrlen = len;
2517db96d56Sopenharmony_ci        return 0;
2527db96d56Sopenharmony_ci    }
2537db96d56Sopenharmony_ci
2547db96d56Sopenharmony_ci    /* Not an f-string. */
2557db96d56Sopenharmony_ci    /* Avoid invoking escape decoding routines if possible. */
2567db96d56Sopenharmony_ci    *rawmode = *rawmode || strchr(s, '\\') == NULL;
2577db96d56Sopenharmony_ci    if (*bytesmode) {
2587db96d56Sopenharmony_ci        /* Disallow non-ASCII characters. */
2597db96d56Sopenharmony_ci        const char *ch;
2607db96d56Sopenharmony_ci        for (ch = s; *ch; ch++) {
2617db96d56Sopenharmony_ci            if (Py_CHARMASK(*ch) >= 0x80) {
2627db96d56Sopenharmony_ci                RAISE_SYNTAX_ERROR_KNOWN_LOCATION(
2637db96d56Sopenharmony_ci                                   t,
2647db96d56Sopenharmony_ci                                   "bytes can only contain ASCII "
2657db96d56Sopenharmony_ci                                   "literal characters");
2667db96d56Sopenharmony_ci                return -1;
2677db96d56Sopenharmony_ci            }
2687db96d56Sopenharmony_ci        }
2697db96d56Sopenharmony_ci        if (*rawmode) {
2707db96d56Sopenharmony_ci            *result = PyBytes_FromStringAndSize(s, len);
2717db96d56Sopenharmony_ci        }
2727db96d56Sopenharmony_ci        else {
2737db96d56Sopenharmony_ci            *result = decode_bytes_with_escapes(p, s, len, t);
2747db96d56Sopenharmony_ci        }
2757db96d56Sopenharmony_ci    }
2767db96d56Sopenharmony_ci    else {
2777db96d56Sopenharmony_ci        if (*rawmode) {
2787db96d56Sopenharmony_ci            *result = PyUnicode_DecodeUTF8Stateful(s, len, NULL, NULL);
2797db96d56Sopenharmony_ci        }
2807db96d56Sopenharmony_ci        else {
2817db96d56Sopenharmony_ci            *result = decode_unicode_with_escapes(p, s, len, t);
2827db96d56Sopenharmony_ci        }
2837db96d56Sopenharmony_ci    }
2847db96d56Sopenharmony_ci    return *result == NULL ? -1 : 0;
2857db96d56Sopenharmony_ci}
2867db96d56Sopenharmony_ci
2877db96d56Sopenharmony_ci
2887db96d56Sopenharmony_ci
2897db96d56Sopenharmony_ci// FSTRING STUFF
2907db96d56Sopenharmony_ci
2917db96d56Sopenharmony_ci/* Fix locations for the given node and its children.
2927db96d56Sopenharmony_ci
2937db96d56Sopenharmony_ci   `parent` is the enclosing node.
2947db96d56Sopenharmony_ci   `expr_start` is the starting position of the expression (pointing to the open brace).
2957db96d56Sopenharmony_ci   `n` is the node which locations are going to be fixed relative to parent.
2967db96d56Sopenharmony_ci   `expr_str` is the child node's string representation, including braces.
2977db96d56Sopenharmony_ci*/
2987db96d56Sopenharmony_cistatic bool
2997db96d56Sopenharmony_cifstring_find_expr_location(Token *parent, const char* expr_start, char *expr_str, int *p_lines, int *p_cols)
3007db96d56Sopenharmony_ci{
3017db96d56Sopenharmony_ci    *p_lines = 0;
3027db96d56Sopenharmony_ci    *p_cols = 0;
3037db96d56Sopenharmony_ci    assert(expr_start != NULL && *expr_start == '{');
3047db96d56Sopenharmony_ci    if (parent && parent->bytes) {
3057db96d56Sopenharmony_ci        const char *parent_str = PyBytes_AsString(parent->bytes);
3067db96d56Sopenharmony_ci        if (!parent_str) {
3077db96d56Sopenharmony_ci            return false;
3087db96d56Sopenharmony_ci        }
3097db96d56Sopenharmony_ci        // The following is needed, in order to correctly shift the column
3107db96d56Sopenharmony_ci        // offset, in the case that (disregarding any whitespace) a newline
3117db96d56Sopenharmony_ci        // immediately follows the opening curly brace of the fstring expression.
3127db96d56Sopenharmony_ci        bool newline_after_brace = 1;
3137db96d56Sopenharmony_ci        const char *start = expr_start + 1;
3147db96d56Sopenharmony_ci        while (start && *start != '}' && *start != '\n') {
3157db96d56Sopenharmony_ci            if (*start != ' ' && *start != '\t' && *start != '\f') {
3167db96d56Sopenharmony_ci                newline_after_brace = 0;
3177db96d56Sopenharmony_ci                break;
3187db96d56Sopenharmony_ci            }
3197db96d56Sopenharmony_ci            start++;
3207db96d56Sopenharmony_ci        }
3217db96d56Sopenharmony_ci
3227db96d56Sopenharmony_ci        // Account for the characters from the last newline character to our
3237db96d56Sopenharmony_ci        // left until the beginning of expr_start.
3247db96d56Sopenharmony_ci        if (!newline_after_brace) {
3257db96d56Sopenharmony_ci            start = expr_start;
3267db96d56Sopenharmony_ci            while (start > parent_str && *start != '\n') {
3277db96d56Sopenharmony_ci                start--;
3287db96d56Sopenharmony_ci            }
3297db96d56Sopenharmony_ci            *p_cols += (int)(expr_start - start);
3307db96d56Sopenharmony_ci            if (*start == '\n') {
3317db96d56Sopenharmony_ci                *p_cols -= 1;
3327db96d56Sopenharmony_ci            }
3337db96d56Sopenharmony_ci        }
3347db96d56Sopenharmony_ci        /* adjust the start based on the number of newlines encountered
3357db96d56Sopenharmony_ci           before the f-string expression */
3367db96d56Sopenharmony_ci        for (const char *p = parent_str; p < expr_start; p++) {
3377db96d56Sopenharmony_ci            if (*p == '\n') {
3387db96d56Sopenharmony_ci                (*p_lines)++;
3397db96d56Sopenharmony_ci            }
3407db96d56Sopenharmony_ci        }
3417db96d56Sopenharmony_ci    }
3427db96d56Sopenharmony_ci    return true;
3437db96d56Sopenharmony_ci}
3447db96d56Sopenharmony_ci
3457db96d56Sopenharmony_ci
3467db96d56Sopenharmony_ci/* Compile this expression in to an expr_ty.  Add parens around the
3477db96d56Sopenharmony_ci   expression, in order to allow leading spaces in the expression. */
3487db96d56Sopenharmony_cistatic expr_ty
3497db96d56Sopenharmony_cifstring_compile_expr(Parser *p, const char *expr_start, const char *expr_end,
3507db96d56Sopenharmony_ci                     Token *t)
3517db96d56Sopenharmony_ci{
3527db96d56Sopenharmony_ci    expr_ty expr = NULL;
3537db96d56Sopenharmony_ci    char *str;
3547db96d56Sopenharmony_ci    Py_ssize_t len;
3557db96d56Sopenharmony_ci    const char *s;
3567db96d56Sopenharmony_ci    expr_ty result = NULL;
3577db96d56Sopenharmony_ci
3587db96d56Sopenharmony_ci    assert(expr_end >= expr_start);
3597db96d56Sopenharmony_ci    assert(*(expr_start-1) == '{');
3607db96d56Sopenharmony_ci    assert(*expr_end == '}' || *expr_end == '!' || *expr_end == ':' ||
3617db96d56Sopenharmony_ci           *expr_end == '=');
3627db96d56Sopenharmony_ci
3637db96d56Sopenharmony_ci    /* If the substring is all whitespace, it's an error.  We need to catch this
3647db96d56Sopenharmony_ci       here, and not when we call PyParser_SimpleParseStringFlagsFilename,
3657db96d56Sopenharmony_ci       because turning the expression '' in to '()' would go from being invalid
3667db96d56Sopenharmony_ci       to valid. */
3677db96d56Sopenharmony_ci    for (s = expr_start; s != expr_end; s++) {
3687db96d56Sopenharmony_ci        char c = *s;
3697db96d56Sopenharmony_ci        /* The Python parser ignores only the following whitespace
3707db96d56Sopenharmony_ci           characters (\r already is converted to \n). */
3717db96d56Sopenharmony_ci        if (!(c == ' ' || c == '\t' || c == '\n' || c == '\f')) {
3727db96d56Sopenharmony_ci            break;
3737db96d56Sopenharmony_ci        }
3747db96d56Sopenharmony_ci    }
3757db96d56Sopenharmony_ci
3767db96d56Sopenharmony_ci    if (s == expr_end) {
3777db96d56Sopenharmony_ci        if (*expr_end == '!' || *expr_end == ':' || *expr_end == '=') {
3787db96d56Sopenharmony_ci            RAISE_SYNTAX_ERROR("f-string: expression required before '%c'", *expr_end);
3797db96d56Sopenharmony_ci            return NULL;
3807db96d56Sopenharmony_ci        }
3817db96d56Sopenharmony_ci        RAISE_SYNTAX_ERROR("f-string: empty expression not allowed");
3827db96d56Sopenharmony_ci        return NULL;
3837db96d56Sopenharmony_ci    }
3847db96d56Sopenharmony_ci
3857db96d56Sopenharmony_ci    len = expr_end - expr_start;
3867db96d56Sopenharmony_ci    /* Allocate 3 extra bytes: open paren, close paren, null byte. */
3877db96d56Sopenharmony_ci    str = PyMem_Calloc(len + 3, sizeof(char));
3887db96d56Sopenharmony_ci    if (str == NULL) {
3897db96d56Sopenharmony_ci        PyErr_NoMemory();
3907db96d56Sopenharmony_ci        return NULL;
3917db96d56Sopenharmony_ci    }
3927db96d56Sopenharmony_ci
3937db96d56Sopenharmony_ci    // The call to fstring_find_expr_location is responsible for finding the column offset
3947db96d56Sopenharmony_ci    // the generated AST nodes need to be shifted to the right, which is equal to the number
3957db96d56Sopenharmony_ci    // of the f-string characters before the expression starts.
3967db96d56Sopenharmony_ci    memcpy(str+1, expr_start, len);
3977db96d56Sopenharmony_ci    int lines, cols;
3987db96d56Sopenharmony_ci    if (!fstring_find_expr_location(t, expr_start-1, str+1, &lines, &cols)) {
3997db96d56Sopenharmony_ci        PyMem_Free(str);
4007db96d56Sopenharmony_ci        return NULL;
4017db96d56Sopenharmony_ci    }
4027db96d56Sopenharmony_ci
4037db96d56Sopenharmony_ci    // The parentheses are needed in order to allow for leading whitespace within
4047db96d56Sopenharmony_ci    // the f-string expression. This consequently gets parsed as a group (see the
4057db96d56Sopenharmony_ci    // group rule in python.gram).
4067db96d56Sopenharmony_ci    str[0] = '(';
4077db96d56Sopenharmony_ci    str[len+1] = ')';
4087db96d56Sopenharmony_ci
4097db96d56Sopenharmony_ci    struct tok_state* tok = _PyTokenizer_FromString(str, 1);
4107db96d56Sopenharmony_ci    if (tok == NULL) {
4117db96d56Sopenharmony_ci        PyMem_Free(str);
4127db96d56Sopenharmony_ci        return NULL;
4137db96d56Sopenharmony_ci    }
4147db96d56Sopenharmony_ci    Py_INCREF(p->tok->filename);
4157db96d56Sopenharmony_ci
4167db96d56Sopenharmony_ci    tok->filename = p->tok->filename;
4177db96d56Sopenharmony_ci    tok->lineno = t->lineno + lines - 1;
4187db96d56Sopenharmony_ci
4197db96d56Sopenharmony_ci    Parser *p2 = _PyPegen_Parser_New(tok, Py_fstring_input, p->flags, p->feature_version,
4207db96d56Sopenharmony_ci                                     NULL, p->arena);
4217db96d56Sopenharmony_ci
4227db96d56Sopenharmony_ci    p2->starting_lineno = t->lineno + lines;
4237db96d56Sopenharmony_ci    p2->starting_col_offset = lines != 0 ? cols : t->col_offset + cols;
4247db96d56Sopenharmony_ci
4257db96d56Sopenharmony_ci    expr = _PyPegen_run_parser(p2);
4267db96d56Sopenharmony_ci
4277db96d56Sopenharmony_ci    if (expr == NULL) {
4287db96d56Sopenharmony_ci        goto exit;
4297db96d56Sopenharmony_ci    }
4307db96d56Sopenharmony_ci    result = expr;
4317db96d56Sopenharmony_ci
4327db96d56Sopenharmony_ciexit:
4337db96d56Sopenharmony_ci    PyMem_Free(str);
4347db96d56Sopenharmony_ci    _PyPegen_Parser_Free(p2);
4357db96d56Sopenharmony_ci    _PyTokenizer_Free(tok);
4367db96d56Sopenharmony_ci    return result;
4377db96d56Sopenharmony_ci}
4387db96d56Sopenharmony_ci
4397db96d56Sopenharmony_ci/* Return -1 on error.
4407db96d56Sopenharmony_ci
4417db96d56Sopenharmony_ci   Return 0 if we reached the end of the literal.
4427db96d56Sopenharmony_ci
4437db96d56Sopenharmony_ci   Return 1 if we haven't reached the end of the literal, but we want
4447db96d56Sopenharmony_ci   the caller to process the literal up to this point. Used for
4457db96d56Sopenharmony_ci   doubled braces.
4467db96d56Sopenharmony_ci*/
4477db96d56Sopenharmony_cistatic int
4487db96d56Sopenharmony_cifstring_find_literal(Parser *p, const char **str, const char *end, int raw,
4497db96d56Sopenharmony_ci                     PyObject **literal, int recurse_lvl, Token *t)
4507db96d56Sopenharmony_ci{
4517db96d56Sopenharmony_ci    /* Get any literal string. It ends when we hit an un-doubled left
4527db96d56Sopenharmony_ci       brace (which isn't part of a unicode name escape such as
4537db96d56Sopenharmony_ci       "\N{EULER CONSTANT}"), or the end of the string. */
4547db96d56Sopenharmony_ci
4557db96d56Sopenharmony_ci    const char *s = *str;
4567db96d56Sopenharmony_ci    const char *literal_start = s;
4577db96d56Sopenharmony_ci    int result = 0;
4587db96d56Sopenharmony_ci
4597db96d56Sopenharmony_ci    assert(*literal == NULL);
4607db96d56Sopenharmony_ci    while (s < end) {
4617db96d56Sopenharmony_ci        char ch = *s++;
4627db96d56Sopenharmony_ci        if (!raw && ch == '\\' && s < end) {
4637db96d56Sopenharmony_ci            ch = *s++;
4647db96d56Sopenharmony_ci            if (ch == 'N') {
4657db96d56Sopenharmony_ci                /* We need to look at and skip matching braces for "\N{name}"
4667db96d56Sopenharmony_ci                   sequences because otherwise we'll think the opening '{'
4677db96d56Sopenharmony_ci                   starts an expression, which is not the case with "\N".
4687db96d56Sopenharmony_ci                   Keep looking for either a matched '{' '}' pair, or the end
4697db96d56Sopenharmony_ci                   of the string. */
4707db96d56Sopenharmony_ci
4717db96d56Sopenharmony_ci                if (s < end && *s++ == '{') {
4727db96d56Sopenharmony_ci                    while (s < end && *s++ != '}') {
4737db96d56Sopenharmony_ci                    }
4747db96d56Sopenharmony_ci                    continue;
4757db96d56Sopenharmony_ci                }
4767db96d56Sopenharmony_ci
4777db96d56Sopenharmony_ci                /* This is an invalid "\N" sequence, since it's a "\N" not
4787db96d56Sopenharmony_ci                   followed by a "{".  Just keep parsing this literal.  This
4797db96d56Sopenharmony_ci                   error will be caught later by
4807db96d56Sopenharmony_ci                   decode_unicode_with_escapes(). */
4817db96d56Sopenharmony_ci                continue;
4827db96d56Sopenharmony_ci            }
4837db96d56Sopenharmony_ci            if (ch == '{' && warn_invalid_escape_sequence(p, s-1, t) < 0) {
4847db96d56Sopenharmony_ci                return -1;
4857db96d56Sopenharmony_ci            }
4867db96d56Sopenharmony_ci        }
4877db96d56Sopenharmony_ci        if (ch == '{' || ch == '}') {
4887db96d56Sopenharmony_ci            /* Check for doubled braces, but only at the top level. If
4897db96d56Sopenharmony_ci               we checked at every level, then f'{0:{3}}' would fail
4907db96d56Sopenharmony_ci               with the two closing braces. */
4917db96d56Sopenharmony_ci            if (recurse_lvl == 0) {
4927db96d56Sopenharmony_ci                if (s < end && *s == ch) {
4937db96d56Sopenharmony_ci                    /* We're going to tell the caller that the literal ends
4947db96d56Sopenharmony_ci                       here, but that they should continue scanning. But also
4957db96d56Sopenharmony_ci                       skip over the second brace when we resume scanning. */
4967db96d56Sopenharmony_ci                    *str = s + 1;
4977db96d56Sopenharmony_ci                    result = 1;
4987db96d56Sopenharmony_ci                    goto done;
4997db96d56Sopenharmony_ci                }
5007db96d56Sopenharmony_ci
5017db96d56Sopenharmony_ci                /* Where a single '{' is the start of a new expression, a
5027db96d56Sopenharmony_ci                   single '}' is not allowed. */
5037db96d56Sopenharmony_ci                if (ch == '}') {
5047db96d56Sopenharmony_ci                    *str = s - 1;
5057db96d56Sopenharmony_ci                    RAISE_SYNTAX_ERROR("f-string: single '}' is not allowed");
5067db96d56Sopenharmony_ci                    return -1;
5077db96d56Sopenharmony_ci                }
5087db96d56Sopenharmony_ci            }
5097db96d56Sopenharmony_ci            /* We're either at a '{', which means we're starting another
5107db96d56Sopenharmony_ci               expression; or a '}', which means we're at the end of this
5117db96d56Sopenharmony_ci               f-string (for a nested format_spec). */
5127db96d56Sopenharmony_ci            s--;
5137db96d56Sopenharmony_ci            break;
5147db96d56Sopenharmony_ci        }
5157db96d56Sopenharmony_ci    }
5167db96d56Sopenharmony_ci    *str = s;
5177db96d56Sopenharmony_ci    assert(s <= end);
5187db96d56Sopenharmony_ci    assert(s == end || *s == '{' || *s == '}');
5197db96d56Sopenharmony_cidone:
5207db96d56Sopenharmony_ci    if (literal_start != s) {
5217db96d56Sopenharmony_ci        if (raw) {
5227db96d56Sopenharmony_ci            *literal = PyUnicode_DecodeUTF8Stateful(literal_start,
5237db96d56Sopenharmony_ci                                                    s - literal_start,
5247db96d56Sopenharmony_ci                                                    NULL, NULL);
5257db96d56Sopenharmony_ci        }
5267db96d56Sopenharmony_ci        else {
5277db96d56Sopenharmony_ci            *literal = decode_unicode_with_escapes(p, literal_start,
5287db96d56Sopenharmony_ci                                                   s - literal_start, t);
5297db96d56Sopenharmony_ci        }
5307db96d56Sopenharmony_ci        if (!*literal) {
5317db96d56Sopenharmony_ci            return -1;
5327db96d56Sopenharmony_ci        }
5337db96d56Sopenharmony_ci    }
5347db96d56Sopenharmony_ci    return result;
5357db96d56Sopenharmony_ci}
5367db96d56Sopenharmony_ci
5377db96d56Sopenharmony_ci/* Forward declaration because parsing is recursive. */
5387db96d56Sopenharmony_cistatic expr_ty
5397db96d56Sopenharmony_cifstring_parse(Parser *p, const char **str, const char *end, int raw, int recurse_lvl,
5407db96d56Sopenharmony_ci              Token *first_token, Token* t, Token *last_token);
5417db96d56Sopenharmony_ci
5427db96d56Sopenharmony_ci/* Parse the f-string at *str, ending at end.  We know *str starts an
5437db96d56Sopenharmony_ci   expression (so it must be a '{'). Returns the FormattedValue node, which
5447db96d56Sopenharmony_ci   includes the expression, conversion character, format_spec expression, and
5457db96d56Sopenharmony_ci   optionally the text of the expression (if = is used).
5467db96d56Sopenharmony_ci
5477db96d56Sopenharmony_ci   Note that I don't do a perfect job here: I don't make sure that a
5487db96d56Sopenharmony_ci   closing brace doesn't match an opening paren, for example. It
5497db96d56Sopenharmony_ci   doesn't need to error on all invalid expressions, just correctly
5507db96d56Sopenharmony_ci   find the end of all valid ones. Any errors inside the expression
5517db96d56Sopenharmony_ci   will be caught when we parse it later.
5527db96d56Sopenharmony_ci
5537db96d56Sopenharmony_ci   *expression is set to the expression.  For an '=' "debug" expression,
5547db96d56Sopenharmony_ci   *expr_text is set to the debug text (the original text of the expression,
5557db96d56Sopenharmony_ci   including the '=' and any whitespace around it, as a string object).  If
5567db96d56Sopenharmony_ci   not a debug expression, *expr_text set to NULL. */
5577db96d56Sopenharmony_cistatic int
5587db96d56Sopenharmony_cifstring_find_expr(Parser *p, const char **str, const char *end, int raw, int recurse_lvl,
5597db96d56Sopenharmony_ci                  PyObject **expr_text, expr_ty *expression, Token *first_token,
5607db96d56Sopenharmony_ci                  Token *t, Token *last_token)
5617db96d56Sopenharmony_ci{
5627db96d56Sopenharmony_ci    /* Return -1 on error, else 0. */
5637db96d56Sopenharmony_ci
5647db96d56Sopenharmony_ci    const char *expr_start;
5657db96d56Sopenharmony_ci    const char *expr_end;
5667db96d56Sopenharmony_ci    expr_ty simple_expression;
5677db96d56Sopenharmony_ci    expr_ty format_spec = NULL; /* Optional format specifier. */
5687db96d56Sopenharmony_ci    int conversion = -1; /* The conversion char.  Use default if not
5697db96d56Sopenharmony_ci                            specified, or !r if using = and no format
5707db96d56Sopenharmony_ci                            spec. */
5717db96d56Sopenharmony_ci
5727db96d56Sopenharmony_ci    /* 0 if we're not in a string, else the quote char we're trying to
5737db96d56Sopenharmony_ci       match (single or double quote). */
5747db96d56Sopenharmony_ci    char quote_char = 0;
5757db96d56Sopenharmony_ci
5767db96d56Sopenharmony_ci    /* If we're inside a string, 1=normal, 3=triple-quoted. */
5777db96d56Sopenharmony_ci    int string_type = 0;
5787db96d56Sopenharmony_ci
5797db96d56Sopenharmony_ci    /* Keep track of nesting level for braces/parens/brackets in
5807db96d56Sopenharmony_ci       expressions. */
5817db96d56Sopenharmony_ci    Py_ssize_t nested_depth = 0;
5827db96d56Sopenharmony_ci    char parenstack[MAXLEVEL];
5837db96d56Sopenharmony_ci
5847db96d56Sopenharmony_ci    *expr_text = NULL;
5857db96d56Sopenharmony_ci
5867db96d56Sopenharmony_ci    /* Can only nest one level deep. */
5877db96d56Sopenharmony_ci    if (recurse_lvl >= 2) {
5887db96d56Sopenharmony_ci        RAISE_SYNTAX_ERROR("f-string: expressions nested too deeply");
5897db96d56Sopenharmony_ci        goto error;
5907db96d56Sopenharmony_ci    }
5917db96d56Sopenharmony_ci
5927db96d56Sopenharmony_ci    /* The first char must be a left brace, or we wouldn't have gotten
5937db96d56Sopenharmony_ci       here. Skip over it. */
5947db96d56Sopenharmony_ci    assert(**str == '{');
5957db96d56Sopenharmony_ci    *str += 1;
5967db96d56Sopenharmony_ci
5977db96d56Sopenharmony_ci    expr_start = *str;
5987db96d56Sopenharmony_ci    for (; *str < end; (*str)++) {
5997db96d56Sopenharmony_ci        char ch;
6007db96d56Sopenharmony_ci
6017db96d56Sopenharmony_ci        /* Loop invariants. */
6027db96d56Sopenharmony_ci        assert(nested_depth >= 0);
6037db96d56Sopenharmony_ci        assert(*str >= expr_start && *str < end);
6047db96d56Sopenharmony_ci        if (quote_char) {
6057db96d56Sopenharmony_ci            assert(string_type == 1 || string_type == 3);
6067db96d56Sopenharmony_ci        } else {
6077db96d56Sopenharmony_ci            assert(string_type == 0);
6087db96d56Sopenharmony_ci        }
6097db96d56Sopenharmony_ci
6107db96d56Sopenharmony_ci        ch = **str;
6117db96d56Sopenharmony_ci        /* Nowhere inside an expression is a backslash allowed. */
6127db96d56Sopenharmony_ci        if (ch == '\\') {
6137db96d56Sopenharmony_ci            /* Error: can't include a backslash character, inside
6147db96d56Sopenharmony_ci               parens or strings or not. */
6157db96d56Sopenharmony_ci            RAISE_SYNTAX_ERROR(
6167db96d56Sopenharmony_ci                      "f-string expression part "
6177db96d56Sopenharmony_ci                      "cannot include a backslash");
6187db96d56Sopenharmony_ci            goto error;
6197db96d56Sopenharmony_ci        }
6207db96d56Sopenharmony_ci        if (quote_char) {
6217db96d56Sopenharmony_ci            /* We're inside a string. See if we're at the end. */
6227db96d56Sopenharmony_ci            /* This code needs to implement the same non-error logic
6237db96d56Sopenharmony_ci               as tok_get from tokenizer.c, at the letter_quote
6247db96d56Sopenharmony_ci               label. To actually share that code would be a
6257db96d56Sopenharmony_ci               nightmare. But, it's unlikely to change and is small,
6267db96d56Sopenharmony_ci               so duplicate it here. Note we don't need to catch all
6277db96d56Sopenharmony_ci               of the errors, since they'll be caught when parsing the
6287db96d56Sopenharmony_ci               expression. We just need to match the non-error
6297db96d56Sopenharmony_ci               cases. Thus we can ignore \n in single-quoted strings,
6307db96d56Sopenharmony_ci               for example. Or non-terminated strings. */
6317db96d56Sopenharmony_ci            if (ch == quote_char) {
6327db96d56Sopenharmony_ci                /* Does this match the string_type (single or triple
6337db96d56Sopenharmony_ci                   quoted)? */
6347db96d56Sopenharmony_ci                if (string_type == 3) {
6357db96d56Sopenharmony_ci                    if (*str+2 < end && *(*str+1) == ch && *(*str+2) == ch) {
6367db96d56Sopenharmony_ci                        /* We're at the end of a triple quoted string. */
6377db96d56Sopenharmony_ci                        *str += 2;
6387db96d56Sopenharmony_ci                        string_type = 0;
6397db96d56Sopenharmony_ci                        quote_char = 0;
6407db96d56Sopenharmony_ci                        continue;
6417db96d56Sopenharmony_ci                    }
6427db96d56Sopenharmony_ci                } else {
6437db96d56Sopenharmony_ci                    /* We're at the end of a normal string. */
6447db96d56Sopenharmony_ci                    quote_char = 0;
6457db96d56Sopenharmony_ci                    string_type = 0;
6467db96d56Sopenharmony_ci                    continue;
6477db96d56Sopenharmony_ci                }
6487db96d56Sopenharmony_ci            }
6497db96d56Sopenharmony_ci        } else if (ch == '\'' || ch == '"') {
6507db96d56Sopenharmony_ci            /* Is this a triple quoted string? */
6517db96d56Sopenharmony_ci            if (*str+2 < end && *(*str+1) == ch && *(*str+2) == ch) {
6527db96d56Sopenharmony_ci                string_type = 3;
6537db96d56Sopenharmony_ci                *str += 2;
6547db96d56Sopenharmony_ci            } else {
6557db96d56Sopenharmony_ci                /* Start of a normal string. */
6567db96d56Sopenharmony_ci                string_type = 1;
6577db96d56Sopenharmony_ci            }
6587db96d56Sopenharmony_ci            /* Start looking for the end of the string. */
6597db96d56Sopenharmony_ci            quote_char = ch;
6607db96d56Sopenharmony_ci        } else if (ch == '[' || ch == '{' || ch == '(') {
6617db96d56Sopenharmony_ci            if (nested_depth >= MAXLEVEL) {
6627db96d56Sopenharmony_ci                RAISE_SYNTAX_ERROR("f-string: too many nested parenthesis");
6637db96d56Sopenharmony_ci                goto error;
6647db96d56Sopenharmony_ci            }
6657db96d56Sopenharmony_ci            parenstack[nested_depth] = ch;
6667db96d56Sopenharmony_ci            nested_depth++;
6677db96d56Sopenharmony_ci        } else if (ch == '#') {
6687db96d56Sopenharmony_ci            /* Error: can't include a comment character, inside parens
6697db96d56Sopenharmony_ci               or not. */
6707db96d56Sopenharmony_ci            RAISE_SYNTAX_ERROR("f-string expression part cannot include '#'");
6717db96d56Sopenharmony_ci            goto error;
6727db96d56Sopenharmony_ci        } else if (nested_depth == 0 &&
6737db96d56Sopenharmony_ci                   (ch == '!' || ch == ':' || ch == '}' ||
6747db96d56Sopenharmony_ci                    ch == '=' || ch == '>' || ch == '<')) {
6757db96d56Sopenharmony_ci            /* See if there's a next character. */
6767db96d56Sopenharmony_ci            if (*str+1 < end) {
6777db96d56Sopenharmony_ci                char next = *(*str+1);
6787db96d56Sopenharmony_ci
6797db96d56Sopenharmony_ci                /* For "!=". since '=' is not an allowed conversion character,
6807db96d56Sopenharmony_ci                   nothing is lost in this test. */
6817db96d56Sopenharmony_ci                if ((ch == '!' && next == '=') ||   /* != */
6827db96d56Sopenharmony_ci                    (ch == '=' && next == '=') ||   /* == */
6837db96d56Sopenharmony_ci                    (ch == '<' && next == '=') ||   /* <= */
6847db96d56Sopenharmony_ci                    (ch == '>' && next == '=')      /* >= */
6857db96d56Sopenharmony_ci                    ) {
6867db96d56Sopenharmony_ci                    *str += 1;
6877db96d56Sopenharmony_ci                    continue;
6887db96d56Sopenharmony_ci                }
6897db96d56Sopenharmony_ci            }
6907db96d56Sopenharmony_ci            /* Don't get out of the loop for these, if they're single
6917db96d56Sopenharmony_ci               chars (not part of 2-char tokens). If by themselves, they
6927db96d56Sopenharmony_ci               don't end an expression (unlike say '!'). */
6937db96d56Sopenharmony_ci            if (ch == '>' || ch == '<') {
6947db96d56Sopenharmony_ci                continue;
6957db96d56Sopenharmony_ci            }
6967db96d56Sopenharmony_ci
6977db96d56Sopenharmony_ci            /* Normal way out of this loop. */
6987db96d56Sopenharmony_ci            break;
6997db96d56Sopenharmony_ci        } else if (ch == ']' || ch == '}' || ch == ')') {
7007db96d56Sopenharmony_ci            if (!nested_depth) {
7017db96d56Sopenharmony_ci                RAISE_SYNTAX_ERROR("f-string: unmatched '%c'", ch);
7027db96d56Sopenharmony_ci                goto error;
7037db96d56Sopenharmony_ci            }
7047db96d56Sopenharmony_ci            nested_depth--;
7057db96d56Sopenharmony_ci            int opening = (unsigned char)parenstack[nested_depth];
7067db96d56Sopenharmony_ci            if (!((opening == '(' && ch == ')') ||
7077db96d56Sopenharmony_ci                  (opening == '[' && ch == ']') ||
7087db96d56Sopenharmony_ci                  (opening == '{' && ch == '}')))
7097db96d56Sopenharmony_ci            {
7107db96d56Sopenharmony_ci                RAISE_SYNTAX_ERROR(
7117db96d56Sopenharmony_ci                          "f-string: closing parenthesis '%c' "
7127db96d56Sopenharmony_ci                          "does not match opening parenthesis '%c'",
7137db96d56Sopenharmony_ci                          ch, opening);
7147db96d56Sopenharmony_ci                goto error;
7157db96d56Sopenharmony_ci            }
7167db96d56Sopenharmony_ci        } else {
7177db96d56Sopenharmony_ci            /* Just consume this char and loop around. */
7187db96d56Sopenharmony_ci        }
7197db96d56Sopenharmony_ci    }
7207db96d56Sopenharmony_ci    expr_end = *str;
7217db96d56Sopenharmony_ci    /* If we leave the above loop in a string or with mismatched parens, we
7227db96d56Sopenharmony_ci       don't really care. We'll get a syntax error when compiling the
7237db96d56Sopenharmony_ci       expression. But, we can produce a better error message, so let's just
7247db96d56Sopenharmony_ci       do that.*/
7257db96d56Sopenharmony_ci    if (quote_char) {
7267db96d56Sopenharmony_ci        RAISE_SYNTAX_ERROR("f-string: unterminated string");
7277db96d56Sopenharmony_ci        goto error;
7287db96d56Sopenharmony_ci    }
7297db96d56Sopenharmony_ci    if (nested_depth) {
7307db96d56Sopenharmony_ci        int opening = (unsigned char)parenstack[nested_depth - 1];
7317db96d56Sopenharmony_ci        RAISE_SYNTAX_ERROR("f-string: unmatched '%c'", opening);
7327db96d56Sopenharmony_ci        goto error;
7337db96d56Sopenharmony_ci    }
7347db96d56Sopenharmony_ci
7357db96d56Sopenharmony_ci    if (*str >= end) {
7367db96d56Sopenharmony_ci        goto unexpected_end_of_string;
7377db96d56Sopenharmony_ci    }
7387db96d56Sopenharmony_ci
7397db96d56Sopenharmony_ci    /* Compile the expression as soon as possible, so we show errors
7407db96d56Sopenharmony_ci       related to the expression before errors related to the
7417db96d56Sopenharmony_ci       conversion or format_spec. */
7427db96d56Sopenharmony_ci    simple_expression = fstring_compile_expr(p, expr_start, expr_end, t);
7437db96d56Sopenharmony_ci    if (!simple_expression) {
7447db96d56Sopenharmony_ci        goto error;
7457db96d56Sopenharmony_ci    }
7467db96d56Sopenharmony_ci
7477db96d56Sopenharmony_ci    /* Check for =, which puts the text value of the expression in
7487db96d56Sopenharmony_ci       expr_text. */
7497db96d56Sopenharmony_ci    if (**str == '=') {
7507db96d56Sopenharmony_ci        if (p->feature_version < 8) {
7517db96d56Sopenharmony_ci            RAISE_SYNTAX_ERROR("f-string: self documenting expressions are "
7527db96d56Sopenharmony_ci                               "only supported in Python 3.8 and greater");
7537db96d56Sopenharmony_ci            goto error;
7547db96d56Sopenharmony_ci        }
7557db96d56Sopenharmony_ci        *str += 1;
7567db96d56Sopenharmony_ci
7577db96d56Sopenharmony_ci        /* Skip over ASCII whitespace.  No need to test for end of string
7587db96d56Sopenharmony_ci           here, since we know there's at least a trailing quote somewhere
7597db96d56Sopenharmony_ci           ahead. */
7607db96d56Sopenharmony_ci        while (Py_ISSPACE(**str)) {
7617db96d56Sopenharmony_ci            *str += 1;
7627db96d56Sopenharmony_ci        }
7637db96d56Sopenharmony_ci        if (*str >= end) {
7647db96d56Sopenharmony_ci            goto unexpected_end_of_string;
7657db96d56Sopenharmony_ci        }
7667db96d56Sopenharmony_ci        /* Set *expr_text to the text of the expression. */
7677db96d56Sopenharmony_ci        *expr_text = PyUnicode_FromStringAndSize(expr_start, *str-expr_start);
7687db96d56Sopenharmony_ci        if (!*expr_text) {
7697db96d56Sopenharmony_ci            goto error;
7707db96d56Sopenharmony_ci        }
7717db96d56Sopenharmony_ci    }
7727db96d56Sopenharmony_ci
7737db96d56Sopenharmony_ci    /* Check for a conversion char, if present. */
7747db96d56Sopenharmony_ci    if (**str == '!') {
7757db96d56Sopenharmony_ci        *str += 1;
7767db96d56Sopenharmony_ci        if (*str >= end) {
7777db96d56Sopenharmony_ci            goto unexpected_end_of_string;
7787db96d56Sopenharmony_ci        }
7797db96d56Sopenharmony_ci
7807db96d56Sopenharmony_ci        conversion = (unsigned char)**str;
7817db96d56Sopenharmony_ci        *str += 1;
7827db96d56Sopenharmony_ci
7837db96d56Sopenharmony_ci        /* Validate the conversion. */
7847db96d56Sopenharmony_ci        if (!(conversion == 's' || conversion == 'r' || conversion == 'a')) {
7857db96d56Sopenharmony_ci            RAISE_SYNTAX_ERROR(
7867db96d56Sopenharmony_ci                      "f-string: invalid conversion character: "
7877db96d56Sopenharmony_ci                      "expected 's', 'r', or 'a'");
7887db96d56Sopenharmony_ci            goto error;
7897db96d56Sopenharmony_ci        }
7907db96d56Sopenharmony_ci
7917db96d56Sopenharmony_ci    }
7927db96d56Sopenharmony_ci
7937db96d56Sopenharmony_ci    /* Check for the format spec, if present. */
7947db96d56Sopenharmony_ci    if (*str >= end) {
7957db96d56Sopenharmony_ci        goto unexpected_end_of_string;
7967db96d56Sopenharmony_ci    }
7977db96d56Sopenharmony_ci    if (**str == ':') {
7987db96d56Sopenharmony_ci        *str += 1;
7997db96d56Sopenharmony_ci        if (*str >= end) {
8007db96d56Sopenharmony_ci            goto unexpected_end_of_string;
8017db96d56Sopenharmony_ci        }
8027db96d56Sopenharmony_ci
8037db96d56Sopenharmony_ci        /* Parse the format spec. */
8047db96d56Sopenharmony_ci        format_spec = fstring_parse(p, str, end, raw, recurse_lvl+1,
8057db96d56Sopenharmony_ci                                    first_token, t, last_token);
8067db96d56Sopenharmony_ci        if (!format_spec) {
8077db96d56Sopenharmony_ci            goto error;
8087db96d56Sopenharmony_ci        }
8097db96d56Sopenharmony_ci    }
8107db96d56Sopenharmony_ci
8117db96d56Sopenharmony_ci    if (*str >= end || **str != '}') {
8127db96d56Sopenharmony_ci        goto unexpected_end_of_string;
8137db96d56Sopenharmony_ci    }
8147db96d56Sopenharmony_ci
8157db96d56Sopenharmony_ci    /* We're at a right brace. Consume it. */
8167db96d56Sopenharmony_ci    assert(*str < end);
8177db96d56Sopenharmony_ci    assert(**str == '}');
8187db96d56Sopenharmony_ci    *str += 1;
8197db96d56Sopenharmony_ci
8207db96d56Sopenharmony_ci    /* If we're in = mode (detected by non-NULL expr_text), and have no format
8217db96d56Sopenharmony_ci       spec and no explicit conversion, set the conversion to 'r'. */
8227db96d56Sopenharmony_ci    if (*expr_text && format_spec == NULL && conversion == -1) {
8237db96d56Sopenharmony_ci        conversion = 'r';
8247db96d56Sopenharmony_ci    }
8257db96d56Sopenharmony_ci
8267db96d56Sopenharmony_ci    /* And now create the FormattedValue node that represents this
8277db96d56Sopenharmony_ci       entire expression with the conversion and format spec. */
8287db96d56Sopenharmony_ci    //TODO: Fix this
8297db96d56Sopenharmony_ci    *expression = _PyAST_FormattedValue(simple_expression, conversion,
8307db96d56Sopenharmony_ci                                        format_spec, first_token->lineno,
8317db96d56Sopenharmony_ci                                        first_token->col_offset,
8327db96d56Sopenharmony_ci                                        last_token->end_lineno,
8337db96d56Sopenharmony_ci                                        last_token->end_col_offset, p->arena);
8347db96d56Sopenharmony_ci    if (!*expression) {
8357db96d56Sopenharmony_ci        goto error;
8367db96d56Sopenharmony_ci    }
8377db96d56Sopenharmony_ci
8387db96d56Sopenharmony_ci    return 0;
8397db96d56Sopenharmony_ci
8407db96d56Sopenharmony_ciunexpected_end_of_string:
8417db96d56Sopenharmony_ci    RAISE_SYNTAX_ERROR("f-string: expecting '}'");
8427db96d56Sopenharmony_ci    /* Falls through to error. */
8437db96d56Sopenharmony_ci
8447db96d56Sopenharmony_cierror:
8457db96d56Sopenharmony_ci    Py_XDECREF(*expr_text);
8467db96d56Sopenharmony_ci    return -1;
8477db96d56Sopenharmony_ci
8487db96d56Sopenharmony_ci}
8497db96d56Sopenharmony_ci
8507db96d56Sopenharmony_ci/* Return -1 on error.
8517db96d56Sopenharmony_ci
8527db96d56Sopenharmony_ci   Return 0 if we have a literal (possible zero length) and an
8537db96d56Sopenharmony_ci   expression (zero length if at the end of the string.
8547db96d56Sopenharmony_ci
8557db96d56Sopenharmony_ci   Return 1 if we have a literal, but no expression, and we want the
8567db96d56Sopenharmony_ci   caller to call us again. This is used to deal with doubled
8577db96d56Sopenharmony_ci   braces.
8587db96d56Sopenharmony_ci
8597db96d56Sopenharmony_ci   When called multiple times on the string 'a{{b{0}c', this function
8607db96d56Sopenharmony_ci   will return:
8617db96d56Sopenharmony_ci
8627db96d56Sopenharmony_ci   1. the literal 'a{' with no expression, and a return value
8637db96d56Sopenharmony_ci      of 1. Despite the fact that there's no expression, the return
8647db96d56Sopenharmony_ci      value of 1 means we're not finished yet.
8657db96d56Sopenharmony_ci
8667db96d56Sopenharmony_ci   2. the literal 'b' and the expression '0', with a return value of
8677db96d56Sopenharmony_ci      0. The fact that there's an expression means we're not finished.
8687db96d56Sopenharmony_ci
8697db96d56Sopenharmony_ci   3. literal 'c' with no expression and a return value of 0. The
8707db96d56Sopenharmony_ci      combination of the return value of 0 with no expression means
8717db96d56Sopenharmony_ci      we're finished.
8727db96d56Sopenharmony_ci*/
8737db96d56Sopenharmony_cistatic int
8747db96d56Sopenharmony_cifstring_find_literal_and_expr(Parser *p, const char **str, const char *end, int raw,
8757db96d56Sopenharmony_ci                              int recurse_lvl, PyObject **literal,
8767db96d56Sopenharmony_ci                              PyObject **expr_text, expr_ty *expression,
8777db96d56Sopenharmony_ci                              Token *first_token, Token *t, Token *last_token)
8787db96d56Sopenharmony_ci{
8797db96d56Sopenharmony_ci    int result;
8807db96d56Sopenharmony_ci
8817db96d56Sopenharmony_ci    assert(*literal == NULL && *expression == NULL);
8827db96d56Sopenharmony_ci
8837db96d56Sopenharmony_ci    /* Get any literal string. */
8847db96d56Sopenharmony_ci    result = fstring_find_literal(p, str, end, raw, literal, recurse_lvl, t);
8857db96d56Sopenharmony_ci    if (result < 0) {
8867db96d56Sopenharmony_ci        goto error;
8877db96d56Sopenharmony_ci    }
8887db96d56Sopenharmony_ci
8897db96d56Sopenharmony_ci    assert(result == 0 || result == 1);
8907db96d56Sopenharmony_ci
8917db96d56Sopenharmony_ci    if (result == 1) {
8927db96d56Sopenharmony_ci        /* We have a literal, but don't look at the expression. */
8937db96d56Sopenharmony_ci        return 1;
8947db96d56Sopenharmony_ci    }
8957db96d56Sopenharmony_ci
8967db96d56Sopenharmony_ci    if (*str >= end || **str == '}') {
8977db96d56Sopenharmony_ci        /* We're at the end of the string or the end of a nested
8987db96d56Sopenharmony_ci           f-string: no expression. The top-level error case where we
8997db96d56Sopenharmony_ci           expect to be at the end of the string but we're at a '}' is
9007db96d56Sopenharmony_ci           handled later. */
9017db96d56Sopenharmony_ci        return 0;
9027db96d56Sopenharmony_ci    }
9037db96d56Sopenharmony_ci
9047db96d56Sopenharmony_ci    /* We must now be the start of an expression, on a '{'. */
9057db96d56Sopenharmony_ci    assert(**str == '{');
9067db96d56Sopenharmony_ci
9077db96d56Sopenharmony_ci    if (fstring_find_expr(p, str, end, raw, recurse_lvl, expr_text,
9087db96d56Sopenharmony_ci                          expression, first_token, t, last_token) < 0) {
9097db96d56Sopenharmony_ci        goto error;
9107db96d56Sopenharmony_ci    }
9117db96d56Sopenharmony_ci
9127db96d56Sopenharmony_ci    return 0;
9137db96d56Sopenharmony_ci
9147db96d56Sopenharmony_cierror:
9157db96d56Sopenharmony_ci    Py_CLEAR(*literal);
9167db96d56Sopenharmony_ci    return -1;
9177db96d56Sopenharmony_ci}
9187db96d56Sopenharmony_ci
9197db96d56Sopenharmony_ci#ifdef NDEBUG
9207db96d56Sopenharmony_ci#define ExprList_check_invariants(l)
9217db96d56Sopenharmony_ci#else
9227db96d56Sopenharmony_cistatic void
9237db96d56Sopenharmony_ciExprList_check_invariants(ExprList *l)
9247db96d56Sopenharmony_ci{
9257db96d56Sopenharmony_ci    /* Check our invariants. Make sure this object is "live", and
9267db96d56Sopenharmony_ci       hasn't been deallocated. */
9277db96d56Sopenharmony_ci    assert(l->size >= 0);
9287db96d56Sopenharmony_ci    assert(l->p != NULL);
9297db96d56Sopenharmony_ci    if (l->size <= EXPRLIST_N_CACHED) {
9307db96d56Sopenharmony_ci        assert(l->data == l->p);
9317db96d56Sopenharmony_ci    }
9327db96d56Sopenharmony_ci}
9337db96d56Sopenharmony_ci#endif
9347db96d56Sopenharmony_ci
9357db96d56Sopenharmony_cistatic void
9367db96d56Sopenharmony_ciExprList_Init(ExprList *l)
9377db96d56Sopenharmony_ci{
9387db96d56Sopenharmony_ci    l->allocated = EXPRLIST_N_CACHED;
9397db96d56Sopenharmony_ci    l->size = 0;
9407db96d56Sopenharmony_ci
9417db96d56Sopenharmony_ci    /* Until we start allocating dynamically, p points to data. */
9427db96d56Sopenharmony_ci    l->p = l->data;
9437db96d56Sopenharmony_ci
9447db96d56Sopenharmony_ci    ExprList_check_invariants(l);
9457db96d56Sopenharmony_ci}
9467db96d56Sopenharmony_ci
9477db96d56Sopenharmony_cistatic int
9487db96d56Sopenharmony_ciExprList_Append(ExprList *l, expr_ty exp)
9497db96d56Sopenharmony_ci{
9507db96d56Sopenharmony_ci    ExprList_check_invariants(l);
9517db96d56Sopenharmony_ci    if (l->size >= l->allocated) {
9527db96d56Sopenharmony_ci        /* We need to alloc (or realloc) the memory. */
9537db96d56Sopenharmony_ci        Py_ssize_t new_size = l->allocated * 2;
9547db96d56Sopenharmony_ci
9557db96d56Sopenharmony_ci        /* See if we've ever allocated anything dynamically. */
9567db96d56Sopenharmony_ci        if (l->p == l->data) {
9577db96d56Sopenharmony_ci            Py_ssize_t i;
9587db96d56Sopenharmony_ci            /* We're still using the cached data. Switch to
9597db96d56Sopenharmony_ci               alloc-ing. */
9607db96d56Sopenharmony_ci            l->p = PyMem_Malloc(sizeof(expr_ty) * new_size);
9617db96d56Sopenharmony_ci            if (!l->p) {
9627db96d56Sopenharmony_ci                return -1;
9637db96d56Sopenharmony_ci            }
9647db96d56Sopenharmony_ci            /* Copy the cached data into the new buffer. */
9657db96d56Sopenharmony_ci            for (i = 0; i < l->size; i++) {
9667db96d56Sopenharmony_ci                l->p[i] = l->data[i];
9677db96d56Sopenharmony_ci            }
9687db96d56Sopenharmony_ci        } else {
9697db96d56Sopenharmony_ci            /* Just realloc. */
9707db96d56Sopenharmony_ci            expr_ty *tmp = PyMem_Realloc(l->p, sizeof(expr_ty) * new_size);
9717db96d56Sopenharmony_ci            if (!tmp) {
9727db96d56Sopenharmony_ci                PyMem_Free(l->p);
9737db96d56Sopenharmony_ci                l->p = NULL;
9747db96d56Sopenharmony_ci                return -1;
9757db96d56Sopenharmony_ci            }
9767db96d56Sopenharmony_ci            l->p = tmp;
9777db96d56Sopenharmony_ci        }
9787db96d56Sopenharmony_ci
9797db96d56Sopenharmony_ci        l->allocated = new_size;
9807db96d56Sopenharmony_ci        assert(l->allocated == 2 * l->size);
9817db96d56Sopenharmony_ci    }
9827db96d56Sopenharmony_ci
9837db96d56Sopenharmony_ci    l->p[l->size++] = exp;
9847db96d56Sopenharmony_ci
9857db96d56Sopenharmony_ci    ExprList_check_invariants(l);
9867db96d56Sopenharmony_ci    return 0;
9877db96d56Sopenharmony_ci}
9887db96d56Sopenharmony_ci
9897db96d56Sopenharmony_cistatic void
9907db96d56Sopenharmony_ciExprList_Dealloc(ExprList *l)
9917db96d56Sopenharmony_ci{
9927db96d56Sopenharmony_ci    ExprList_check_invariants(l);
9937db96d56Sopenharmony_ci
9947db96d56Sopenharmony_ci    /* If there's been an error, or we've never dynamically allocated,
9957db96d56Sopenharmony_ci       do nothing. */
9967db96d56Sopenharmony_ci    if (!l->p || l->p == l->data) {
9977db96d56Sopenharmony_ci        /* Do nothing. */
9987db96d56Sopenharmony_ci    } else {
9997db96d56Sopenharmony_ci        /* We have dynamically allocated. Free the memory. */
10007db96d56Sopenharmony_ci        PyMem_Free(l->p);
10017db96d56Sopenharmony_ci    }
10027db96d56Sopenharmony_ci    l->p = NULL;
10037db96d56Sopenharmony_ci    l->size = -1;
10047db96d56Sopenharmony_ci}
10057db96d56Sopenharmony_ci
10067db96d56Sopenharmony_cistatic asdl_expr_seq *
10077db96d56Sopenharmony_ciExprList_Finish(ExprList *l, PyArena *arena)
10087db96d56Sopenharmony_ci{
10097db96d56Sopenharmony_ci    asdl_expr_seq *seq;
10107db96d56Sopenharmony_ci
10117db96d56Sopenharmony_ci    ExprList_check_invariants(l);
10127db96d56Sopenharmony_ci
10137db96d56Sopenharmony_ci    /* Allocate the asdl_seq and copy the expressions in to it. */
10147db96d56Sopenharmony_ci    seq = _Py_asdl_expr_seq_new(l->size, arena);
10157db96d56Sopenharmony_ci    if (seq) {
10167db96d56Sopenharmony_ci        Py_ssize_t i;
10177db96d56Sopenharmony_ci        for (i = 0; i < l->size; i++) {
10187db96d56Sopenharmony_ci            asdl_seq_SET(seq, i, l->p[i]);
10197db96d56Sopenharmony_ci        }
10207db96d56Sopenharmony_ci    }
10217db96d56Sopenharmony_ci    ExprList_Dealloc(l);
10227db96d56Sopenharmony_ci    return seq;
10237db96d56Sopenharmony_ci}
10247db96d56Sopenharmony_ci
10257db96d56Sopenharmony_ci#ifdef NDEBUG
10267db96d56Sopenharmony_ci#define FstringParser_check_invariants(state)
10277db96d56Sopenharmony_ci#else
10287db96d56Sopenharmony_cistatic void
10297db96d56Sopenharmony_ciFstringParser_check_invariants(FstringParser *state)
10307db96d56Sopenharmony_ci{
10317db96d56Sopenharmony_ci    if (state->last_str) {
10327db96d56Sopenharmony_ci        assert(PyUnicode_CheckExact(state->last_str));
10337db96d56Sopenharmony_ci    }
10347db96d56Sopenharmony_ci    ExprList_check_invariants(&state->expr_list);
10357db96d56Sopenharmony_ci}
10367db96d56Sopenharmony_ci#endif
10377db96d56Sopenharmony_ci
10387db96d56Sopenharmony_civoid
10397db96d56Sopenharmony_ci_PyPegen_FstringParser_Init(FstringParser *state)
10407db96d56Sopenharmony_ci{
10417db96d56Sopenharmony_ci    state->last_str = NULL;
10427db96d56Sopenharmony_ci    state->fmode = 0;
10437db96d56Sopenharmony_ci    ExprList_Init(&state->expr_list);
10447db96d56Sopenharmony_ci    FstringParser_check_invariants(state);
10457db96d56Sopenharmony_ci}
10467db96d56Sopenharmony_ci
10477db96d56Sopenharmony_civoid
10487db96d56Sopenharmony_ci_PyPegen_FstringParser_Dealloc(FstringParser *state)
10497db96d56Sopenharmony_ci{
10507db96d56Sopenharmony_ci    FstringParser_check_invariants(state);
10517db96d56Sopenharmony_ci
10527db96d56Sopenharmony_ci    Py_XDECREF(state->last_str);
10537db96d56Sopenharmony_ci    ExprList_Dealloc(&state->expr_list);
10547db96d56Sopenharmony_ci}
10557db96d56Sopenharmony_ci
10567db96d56Sopenharmony_ci/* Make a Constant node, but decref the PyUnicode object being added. */
10577db96d56Sopenharmony_cistatic expr_ty
10587db96d56Sopenharmony_cimake_str_node_and_del(Parser *p, PyObject **str, Token* first_token, Token *last_token)
10597db96d56Sopenharmony_ci{
10607db96d56Sopenharmony_ci    PyObject *s = *str;
10617db96d56Sopenharmony_ci    PyObject *kind = NULL;
10627db96d56Sopenharmony_ci    *str = NULL;
10637db96d56Sopenharmony_ci    assert(PyUnicode_CheckExact(s));
10647db96d56Sopenharmony_ci    if (_PyArena_AddPyObject(p->arena, s) < 0) {
10657db96d56Sopenharmony_ci        Py_DECREF(s);
10667db96d56Sopenharmony_ci        return NULL;
10677db96d56Sopenharmony_ci    }
10687db96d56Sopenharmony_ci    const char* the_str = PyBytes_AsString(first_token->bytes);
10697db96d56Sopenharmony_ci    if (the_str && the_str[0] == 'u') {
10707db96d56Sopenharmony_ci        kind = _PyPegen_new_identifier(p, "u");
10717db96d56Sopenharmony_ci    }
10727db96d56Sopenharmony_ci
10737db96d56Sopenharmony_ci    if (kind == NULL && PyErr_Occurred()) {
10747db96d56Sopenharmony_ci        return NULL;
10757db96d56Sopenharmony_ci    }
10767db96d56Sopenharmony_ci
10777db96d56Sopenharmony_ci    return _PyAST_Constant(s, kind, first_token->lineno, first_token->col_offset,
10787db96d56Sopenharmony_ci                           last_token->end_lineno, last_token->end_col_offset,
10797db96d56Sopenharmony_ci                           p->arena);
10807db96d56Sopenharmony_ci
10817db96d56Sopenharmony_ci}
10827db96d56Sopenharmony_ci
10837db96d56Sopenharmony_ci
10847db96d56Sopenharmony_ci/* Add a non-f-string (that is, a regular literal string). str is
10857db96d56Sopenharmony_ci   decref'd. */
10867db96d56Sopenharmony_ciint
10877db96d56Sopenharmony_ci_PyPegen_FstringParser_ConcatAndDel(FstringParser *state, PyObject *str)
10887db96d56Sopenharmony_ci{
10897db96d56Sopenharmony_ci    FstringParser_check_invariants(state);
10907db96d56Sopenharmony_ci
10917db96d56Sopenharmony_ci    assert(PyUnicode_CheckExact(str));
10927db96d56Sopenharmony_ci
10937db96d56Sopenharmony_ci    if (PyUnicode_GET_LENGTH(str) == 0) {
10947db96d56Sopenharmony_ci        Py_DECREF(str);
10957db96d56Sopenharmony_ci        return 0;
10967db96d56Sopenharmony_ci    }
10977db96d56Sopenharmony_ci
10987db96d56Sopenharmony_ci    if (!state->last_str) {
10997db96d56Sopenharmony_ci        /* We didn't have a string before, so just remember this one. */
11007db96d56Sopenharmony_ci        state->last_str = str;
11017db96d56Sopenharmony_ci    } else {
11027db96d56Sopenharmony_ci        /* Concatenate this with the previous string. */
11037db96d56Sopenharmony_ci        PyUnicode_AppendAndDel(&state->last_str, str);
11047db96d56Sopenharmony_ci        if (!state->last_str) {
11057db96d56Sopenharmony_ci            return -1;
11067db96d56Sopenharmony_ci        }
11077db96d56Sopenharmony_ci    }
11087db96d56Sopenharmony_ci    FstringParser_check_invariants(state);
11097db96d56Sopenharmony_ci    return 0;
11107db96d56Sopenharmony_ci}
11117db96d56Sopenharmony_ci
11127db96d56Sopenharmony_ci/* Parse an f-string. The f-string is in *str to end, with no
11137db96d56Sopenharmony_ci   'f' or quotes. */
11147db96d56Sopenharmony_ciint
11157db96d56Sopenharmony_ci_PyPegen_FstringParser_ConcatFstring(Parser *p, FstringParser *state, const char **str,
11167db96d56Sopenharmony_ci                            const char *end, int raw, int recurse_lvl,
11177db96d56Sopenharmony_ci                            Token *first_token, Token* t, Token *last_token)
11187db96d56Sopenharmony_ci{
11197db96d56Sopenharmony_ci    FstringParser_check_invariants(state);
11207db96d56Sopenharmony_ci    state->fmode = 1;
11217db96d56Sopenharmony_ci
11227db96d56Sopenharmony_ci    /* Parse the f-string. */
11237db96d56Sopenharmony_ci    while (1) {
11247db96d56Sopenharmony_ci        PyObject *literal = NULL;
11257db96d56Sopenharmony_ci        PyObject *expr_text = NULL;
11267db96d56Sopenharmony_ci        expr_ty expression = NULL;
11277db96d56Sopenharmony_ci
11287db96d56Sopenharmony_ci        /* If there's a zero length literal in front of the
11297db96d56Sopenharmony_ci           expression, literal will be NULL. If we're at the end of
11307db96d56Sopenharmony_ci           the f-string, expression will be NULL (unless result == 1,
11317db96d56Sopenharmony_ci           see below). */
11327db96d56Sopenharmony_ci        int result = fstring_find_literal_and_expr(p, str, end, raw, recurse_lvl,
11337db96d56Sopenharmony_ci                                                   &literal, &expr_text,
11347db96d56Sopenharmony_ci                                                   &expression, first_token, t, last_token);
11357db96d56Sopenharmony_ci        if (result < 0) {
11367db96d56Sopenharmony_ci            return -1;
11377db96d56Sopenharmony_ci        }
11387db96d56Sopenharmony_ci
11397db96d56Sopenharmony_ci        /* Add the literal, if any. */
11407db96d56Sopenharmony_ci        if (literal && _PyPegen_FstringParser_ConcatAndDel(state, literal) < 0) {
11417db96d56Sopenharmony_ci            Py_XDECREF(expr_text);
11427db96d56Sopenharmony_ci            return -1;
11437db96d56Sopenharmony_ci        }
11447db96d56Sopenharmony_ci        /* Add the expr_text, if any. */
11457db96d56Sopenharmony_ci        if (expr_text && _PyPegen_FstringParser_ConcatAndDel(state, expr_text) < 0) {
11467db96d56Sopenharmony_ci            return -1;
11477db96d56Sopenharmony_ci        }
11487db96d56Sopenharmony_ci
11497db96d56Sopenharmony_ci        /* We've dealt with the literal and expr_text, their ownership has
11507db96d56Sopenharmony_ci           been transferred to the state object.  Don't look at them again. */
11517db96d56Sopenharmony_ci
11527db96d56Sopenharmony_ci        /* See if we should just loop around to get the next literal
11537db96d56Sopenharmony_ci           and expression, while ignoring the expression this
11547db96d56Sopenharmony_ci           time. This is used for un-doubling braces, as an
11557db96d56Sopenharmony_ci           optimization. */
11567db96d56Sopenharmony_ci        if (result == 1) {
11577db96d56Sopenharmony_ci            continue;
11587db96d56Sopenharmony_ci        }
11597db96d56Sopenharmony_ci
11607db96d56Sopenharmony_ci        if (!expression) {
11617db96d56Sopenharmony_ci            /* We're done with this f-string. */
11627db96d56Sopenharmony_ci            break;
11637db96d56Sopenharmony_ci        }
11647db96d56Sopenharmony_ci
11657db96d56Sopenharmony_ci        /* We know we have an expression. Convert any existing string
11667db96d56Sopenharmony_ci           to a Constant node. */
11677db96d56Sopenharmony_ci        if (state->last_str) {
11687db96d56Sopenharmony_ci            /* Convert the existing last_str literal to a Constant node. */
11697db96d56Sopenharmony_ci            expr_ty last_str = make_str_node_and_del(p, &state->last_str, first_token, last_token);
11707db96d56Sopenharmony_ci            if (!last_str || ExprList_Append(&state->expr_list, last_str) < 0) {
11717db96d56Sopenharmony_ci                return -1;
11727db96d56Sopenharmony_ci            }
11737db96d56Sopenharmony_ci        }
11747db96d56Sopenharmony_ci
11757db96d56Sopenharmony_ci        if (ExprList_Append(&state->expr_list, expression) < 0) {
11767db96d56Sopenharmony_ci            return -1;
11777db96d56Sopenharmony_ci        }
11787db96d56Sopenharmony_ci    }
11797db96d56Sopenharmony_ci
11807db96d56Sopenharmony_ci    /* If recurse_lvl is zero, then we must be at the end of the
11817db96d56Sopenharmony_ci       string. Otherwise, we must be at a right brace. */
11827db96d56Sopenharmony_ci
11837db96d56Sopenharmony_ci    if (recurse_lvl == 0 && *str < end-1) {
11847db96d56Sopenharmony_ci        RAISE_SYNTAX_ERROR("f-string: unexpected end of string");
11857db96d56Sopenharmony_ci        return -1;
11867db96d56Sopenharmony_ci    }
11877db96d56Sopenharmony_ci    if (recurse_lvl != 0 && **str != '}') {
11887db96d56Sopenharmony_ci        RAISE_SYNTAX_ERROR("f-string: expecting '}'");
11897db96d56Sopenharmony_ci        return -1;
11907db96d56Sopenharmony_ci    }
11917db96d56Sopenharmony_ci
11927db96d56Sopenharmony_ci    FstringParser_check_invariants(state);
11937db96d56Sopenharmony_ci    return 0;
11947db96d56Sopenharmony_ci}
11957db96d56Sopenharmony_ci
11967db96d56Sopenharmony_ci/* Convert the partial state reflected in last_str and expr_list to an
11977db96d56Sopenharmony_ci   expr_ty. The expr_ty can be a Constant, or a JoinedStr. */
11987db96d56Sopenharmony_ciexpr_ty
11997db96d56Sopenharmony_ci_PyPegen_FstringParser_Finish(Parser *p, FstringParser *state, Token* first_token,
12007db96d56Sopenharmony_ci                     Token *last_token)
12017db96d56Sopenharmony_ci{
12027db96d56Sopenharmony_ci    asdl_expr_seq *seq;
12037db96d56Sopenharmony_ci
12047db96d56Sopenharmony_ci    FstringParser_check_invariants(state);
12057db96d56Sopenharmony_ci
12067db96d56Sopenharmony_ci    /* If we're just a constant string with no expressions, return
12077db96d56Sopenharmony_ci       that. */
12087db96d56Sopenharmony_ci    if (!state->fmode) {
12097db96d56Sopenharmony_ci        assert(!state->expr_list.size);
12107db96d56Sopenharmony_ci        if (!state->last_str) {
12117db96d56Sopenharmony_ci            /* Create a zero length string. */
12127db96d56Sopenharmony_ci            state->last_str = PyUnicode_FromStringAndSize(NULL, 0);
12137db96d56Sopenharmony_ci            if (!state->last_str) {
12147db96d56Sopenharmony_ci                goto error;
12157db96d56Sopenharmony_ci            }
12167db96d56Sopenharmony_ci        }
12177db96d56Sopenharmony_ci        return make_str_node_and_del(p, &state->last_str, first_token, last_token);
12187db96d56Sopenharmony_ci    }
12197db96d56Sopenharmony_ci
12207db96d56Sopenharmony_ci    /* Create a Constant node out of last_str, if needed. It will be the
12217db96d56Sopenharmony_ci       last node in our expression list. */
12227db96d56Sopenharmony_ci    if (state->last_str) {
12237db96d56Sopenharmony_ci        expr_ty str = make_str_node_and_del(p, &state->last_str, first_token, last_token);
12247db96d56Sopenharmony_ci        if (!str || ExprList_Append(&state->expr_list, str) < 0) {
12257db96d56Sopenharmony_ci            goto error;
12267db96d56Sopenharmony_ci        }
12277db96d56Sopenharmony_ci    }
12287db96d56Sopenharmony_ci    /* This has already been freed. */
12297db96d56Sopenharmony_ci    assert(state->last_str == NULL);
12307db96d56Sopenharmony_ci
12317db96d56Sopenharmony_ci    seq = ExprList_Finish(&state->expr_list, p->arena);
12327db96d56Sopenharmony_ci    if (!seq) {
12337db96d56Sopenharmony_ci        goto error;
12347db96d56Sopenharmony_ci    }
12357db96d56Sopenharmony_ci
12367db96d56Sopenharmony_ci    return _PyAST_JoinedStr(seq, first_token->lineno, first_token->col_offset,
12377db96d56Sopenharmony_ci                            last_token->end_lineno, last_token->end_col_offset,
12387db96d56Sopenharmony_ci                            p->arena);
12397db96d56Sopenharmony_ci
12407db96d56Sopenharmony_cierror:
12417db96d56Sopenharmony_ci    _PyPegen_FstringParser_Dealloc(state);
12427db96d56Sopenharmony_ci    return NULL;
12437db96d56Sopenharmony_ci}
12447db96d56Sopenharmony_ci
12457db96d56Sopenharmony_ci/* Given an f-string (with no 'f' or quotes) that's in *str and ends
12467db96d56Sopenharmony_ci   at end, parse it into an expr_ty.  Return NULL on error.  Adjust
12477db96d56Sopenharmony_ci   str to point past the parsed portion. */
12487db96d56Sopenharmony_cistatic expr_ty
12497db96d56Sopenharmony_cifstring_parse(Parser *p, const char **str, const char *end, int raw,
12507db96d56Sopenharmony_ci              int recurse_lvl, Token *first_token, Token* t, Token *last_token)
12517db96d56Sopenharmony_ci{
12527db96d56Sopenharmony_ci    FstringParser state;
12537db96d56Sopenharmony_ci
12547db96d56Sopenharmony_ci    _PyPegen_FstringParser_Init(&state);
12557db96d56Sopenharmony_ci    if (_PyPegen_FstringParser_ConcatFstring(p, &state, str, end, raw, recurse_lvl,
12567db96d56Sopenharmony_ci                                    first_token, t, last_token) < 0) {
12577db96d56Sopenharmony_ci        _PyPegen_FstringParser_Dealloc(&state);
12587db96d56Sopenharmony_ci        return NULL;
12597db96d56Sopenharmony_ci    }
12607db96d56Sopenharmony_ci
12617db96d56Sopenharmony_ci    return _PyPegen_FstringParser_Finish(p, &state, t, t);
12627db96d56Sopenharmony_ci}
1263