17db96d56Sopenharmony_ci#include <stdbool.h> 27db96d56Sopenharmony_ci 37db96d56Sopenharmony_ci#include <Python.h> 47db96d56Sopenharmony_ci 57db96d56Sopenharmony_ci#include "tokenizer.h" 67db96d56Sopenharmony_ci#include "pegen.h" 77db96d56Sopenharmony_ci#include "string_parser.h" 87db96d56Sopenharmony_ci 97db96d56Sopenharmony_ci//// STRING HANDLING FUNCTIONS //// 107db96d56Sopenharmony_ci 117db96d56Sopenharmony_cistatic int 127db96d56Sopenharmony_ciwarn_invalid_escape_sequence(Parser *p, const char *first_invalid_escape, Token *t) 137db96d56Sopenharmony_ci{ 147db96d56Sopenharmony_ci unsigned char c = *first_invalid_escape; 157db96d56Sopenharmony_ci int octal = ('4' <= c && c <= '7'); 167db96d56Sopenharmony_ci PyObject *msg = 177db96d56Sopenharmony_ci octal 187db96d56Sopenharmony_ci ? PyUnicode_FromFormat("invalid octal escape sequence '\\%.3s'", 197db96d56Sopenharmony_ci first_invalid_escape) 207db96d56Sopenharmony_ci : PyUnicode_FromFormat("invalid escape sequence '\\%c'", c); 217db96d56Sopenharmony_ci if (msg == NULL) { 227db96d56Sopenharmony_ci return -1; 237db96d56Sopenharmony_ci } 247db96d56Sopenharmony_ci if (PyErr_WarnExplicitObject(PyExc_DeprecationWarning, msg, p->tok->filename, 257db96d56Sopenharmony_ci t->lineno, NULL, NULL) < 0) { 267db96d56Sopenharmony_ci if (PyErr_ExceptionMatches(PyExc_DeprecationWarning)) { 277db96d56Sopenharmony_ci /* Replace the DeprecationWarning exception with a SyntaxError 287db96d56Sopenharmony_ci to get a more accurate error report */ 297db96d56Sopenharmony_ci PyErr_Clear(); 307db96d56Sopenharmony_ci 317db96d56Sopenharmony_ci /* This is needed, in order for the SyntaxError to point to the token t, 327db96d56Sopenharmony_ci since _PyPegen_raise_error uses p->tokens[p->fill - 1] for the 337db96d56Sopenharmony_ci error location, if p->known_err_token is not set. */ 347db96d56Sopenharmony_ci p->known_err_token = t; 357db96d56Sopenharmony_ci if (octal) { 367db96d56Sopenharmony_ci RAISE_SYNTAX_ERROR("invalid octal escape sequence '\\%.3s'", 377db96d56Sopenharmony_ci first_invalid_escape); 387db96d56Sopenharmony_ci } 397db96d56Sopenharmony_ci else { 407db96d56Sopenharmony_ci RAISE_SYNTAX_ERROR("invalid escape sequence '\\%c'", c); 417db96d56Sopenharmony_ci } 427db96d56Sopenharmony_ci } 437db96d56Sopenharmony_ci Py_DECREF(msg); 447db96d56Sopenharmony_ci return -1; 457db96d56Sopenharmony_ci } 467db96d56Sopenharmony_ci Py_DECREF(msg); 477db96d56Sopenharmony_ci return 0; 487db96d56Sopenharmony_ci} 497db96d56Sopenharmony_ci 507db96d56Sopenharmony_cistatic PyObject * 517db96d56Sopenharmony_cidecode_utf8(const char **sPtr, const char *end) 527db96d56Sopenharmony_ci{ 537db96d56Sopenharmony_ci const char *s; 547db96d56Sopenharmony_ci const char *t; 557db96d56Sopenharmony_ci t = s = *sPtr; 567db96d56Sopenharmony_ci while (s < end && (*s & 0x80)) { 577db96d56Sopenharmony_ci s++; 587db96d56Sopenharmony_ci } 597db96d56Sopenharmony_ci *sPtr = s; 607db96d56Sopenharmony_ci return PyUnicode_DecodeUTF8(t, s - t, NULL); 617db96d56Sopenharmony_ci} 627db96d56Sopenharmony_ci 637db96d56Sopenharmony_cistatic PyObject * 647db96d56Sopenharmony_cidecode_unicode_with_escapes(Parser *parser, const char *s, size_t len, Token *t) 657db96d56Sopenharmony_ci{ 667db96d56Sopenharmony_ci PyObject *v; 677db96d56Sopenharmony_ci PyObject *u; 687db96d56Sopenharmony_ci char *buf; 697db96d56Sopenharmony_ci char *p; 707db96d56Sopenharmony_ci const char *end; 717db96d56Sopenharmony_ci 727db96d56Sopenharmony_ci /* check for integer overflow */ 737db96d56Sopenharmony_ci if (len > SIZE_MAX / 6) { 747db96d56Sopenharmony_ci return NULL; 757db96d56Sopenharmony_ci } 767db96d56Sopenharmony_ci /* "ä" (2 bytes) may become "\U000000E4" (10 bytes), or 1:5 777db96d56Sopenharmony_ci "\ä" (3 bytes) may become "\u005c\U000000E4" (16 bytes), or ~1:6 */ 787db96d56Sopenharmony_ci u = PyBytes_FromStringAndSize((char *)NULL, len * 6); 797db96d56Sopenharmony_ci if (u == NULL) { 807db96d56Sopenharmony_ci return NULL; 817db96d56Sopenharmony_ci } 827db96d56Sopenharmony_ci p = buf = PyBytes_AsString(u); 837db96d56Sopenharmony_ci if (p == NULL) { 847db96d56Sopenharmony_ci return NULL; 857db96d56Sopenharmony_ci } 867db96d56Sopenharmony_ci end = s + len; 877db96d56Sopenharmony_ci while (s < end) { 887db96d56Sopenharmony_ci if (*s == '\\') { 897db96d56Sopenharmony_ci *p++ = *s++; 907db96d56Sopenharmony_ci if (s >= end || *s & 0x80) { 917db96d56Sopenharmony_ci strcpy(p, "u005c"); 927db96d56Sopenharmony_ci p += 5; 937db96d56Sopenharmony_ci if (s >= end) { 947db96d56Sopenharmony_ci break; 957db96d56Sopenharmony_ci } 967db96d56Sopenharmony_ci } 977db96d56Sopenharmony_ci } 987db96d56Sopenharmony_ci if (*s & 0x80) { 997db96d56Sopenharmony_ci PyObject *w; 1007db96d56Sopenharmony_ci int kind; 1017db96d56Sopenharmony_ci const void *data; 1027db96d56Sopenharmony_ci Py_ssize_t w_len; 1037db96d56Sopenharmony_ci Py_ssize_t i; 1047db96d56Sopenharmony_ci w = decode_utf8(&s, end); 1057db96d56Sopenharmony_ci if (w == NULL) { 1067db96d56Sopenharmony_ci Py_DECREF(u); 1077db96d56Sopenharmony_ci return NULL; 1087db96d56Sopenharmony_ci } 1097db96d56Sopenharmony_ci kind = PyUnicode_KIND(w); 1107db96d56Sopenharmony_ci data = PyUnicode_DATA(w); 1117db96d56Sopenharmony_ci w_len = PyUnicode_GET_LENGTH(w); 1127db96d56Sopenharmony_ci for (i = 0; i < w_len; i++) { 1137db96d56Sopenharmony_ci Py_UCS4 chr = PyUnicode_READ(kind, data, i); 1147db96d56Sopenharmony_ci sprintf(p, "\\U%08x", chr); 1157db96d56Sopenharmony_ci p += 10; 1167db96d56Sopenharmony_ci } 1177db96d56Sopenharmony_ci /* Should be impossible to overflow */ 1187db96d56Sopenharmony_ci assert(p - buf <= PyBytes_GET_SIZE(u)); 1197db96d56Sopenharmony_ci Py_DECREF(w); 1207db96d56Sopenharmony_ci } 1217db96d56Sopenharmony_ci else { 1227db96d56Sopenharmony_ci *p++ = *s++; 1237db96d56Sopenharmony_ci } 1247db96d56Sopenharmony_ci } 1257db96d56Sopenharmony_ci len = p - buf; 1267db96d56Sopenharmony_ci s = buf; 1277db96d56Sopenharmony_ci 1287db96d56Sopenharmony_ci const char *first_invalid_escape; 1297db96d56Sopenharmony_ci v = _PyUnicode_DecodeUnicodeEscapeInternal(s, len, NULL, NULL, &first_invalid_escape); 1307db96d56Sopenharmony_ci 1317db96d56Sopenharmony_ci if (v != NULL && first_invalid_escape != NULL) { 1327db96d56Sopenharmony_ci if (warn_invalid_escape_sequence(parser, first_invalid_escape, t) < 0) { 1337db96d56Sopenharmony_ci /* We have not decref u before because first_invalid_escape points 1347db96d56Sopenharmony_ci inside u. */ 1357db96d56Sopenharmony_ci Py_XDECREF(u); 1367db96d56Sopenharmony_ci Py_DECREF(v); 1377db96d56Sopenharmony_ci return NULL; 1387db96d56Sopenharmony_ci } 1397db96d56Sopenharmony_ci } 1407db96d56Sopenharmony_ci Py_XDECREF(u); 1417db96d56Sopenharmony_ci return v; 1427db96d56Sopenharmony_ci} 1437db96d56Sopenharmony_ci 1447db96d56Sopenharmony_cistatic PyObject * 1457db96d56Sopenharmony_cidecode_bytes_with_escapes(Parser *p, const char *s, Py_ssize_t len, Token *t) 1467db96d56Sopenharmony_ci{ 1477db96d56Sopenharmony_ci const char *first_invalid_escape; 1487db96d56Sopenharmony_ci PyObject *result = _PyBytes_DecodeEscape(s, len, NULL, &first_invalid_escape); 1497db96d56Sopenharmony_ci if (result == NULL) { 1507db96d56Sopenharmony_ci return NULL; 1517db96d56Sopenharmony_ci } 1527db96d56Sopenharmony_ci 1537db96d56Sopenharmony_ci if (first_invalid_escape != NULL) { 1547db96d56Sopenharmony_ci if (warn_invalid_escape_sequence(p, first_invalid_escape, t) < 0) { 1557db96d56Sopenharmony_ci Py_DECREF(result); 1567db96d56Sopenharmony_ci return NULL; 1577db96d56Sopenharmony_ci } 1587db96d56Sopenharmony_ci } 1597db96d56Sopenharmony_ci return result; 1607db96d56Sopenharmony_ci} 1617db96d56Sopenharmony_ci 1627db96d56Sopenharmony_ci/* s must include the bracketing quote characters, and r, b, u, 1637db96d56Sopenharmony_ci &/or f prefixes (if any), and embedded escape sequences (if any). 1647db96d56Sopenharmony_ci _PyPegen_parsestr parses it, and sets *result to decoded Python string object. 1657db96d56Sopenharmony_ci If the string is an f-string, set *fstr and *fstrlen to the unparsed 1667db96d56Sopenharmony_ci string object. Return 0 if no errors occurred. */ 1677db96d56Sopenharmony_ciint 1687db96d56Sopenharmony_ci_PyPegen_parsestr(Parser *p, int *bytesmode, int *rawmode, PyObject **result, 1697db96d56Sopenharmony_ci const char **fstr, Py_ssize_t *fstrlen, Token *t) 1707db96d56Sopenharmony_ci{ 1717db96d56Sopenharmony_ci const char *s = PyBytes_AsString(t->bytes); 1727db96d56Sopenharmony_ci if (s == NULL) { 1737db96d56Sopenharmony_ci return -1; 1747db96d56Sopenharmony_ci } 1757db96d56Sopenharmony_ci 1767db96d56Sopenharmony_ci size_t len; 1777db96d56Sopenharmony_ci int quote = Py_CHARMASK(*s); 1787db96d56Sopenharmony_ci int fmode = 0; 1797db96d56Sopenharmony_ci *bytesmode = 0; 1807db96d56Sopenharmony_ci *rawmode = 0; 1817db96d56Sopenharmony_ci *result = NULL; 1827db96d56Sopenharmony_ci *fstr = NULL; 1837db96d56Sopenharmony_ci if (Py_ISALPHA(quote)) { 1847db96d56Sopenharmony_ci while (!*bytesmode || !*rawmode) { 1857db96d56Sopenharmony_ci if (quote == 'b' || quote == 'B') { 1867db96d56Sopenharmony_ci quote =(unsigned char)*++s; 1877db96d56Sopenharmony_ci *bytesmode = 1; 1887db96d56Sopenharmony_ci } 1897db96d56Sopenharmony_ci else if (quote == 'u' || quote == 'U') { 1907db96d56Sopenharmony_ci quote = (unsigned char)*++s; 1917db96d56Sopenharmony_ci } 1927db96d56Sopenharmony_ci else if (quote == 'r' || quote == 'R') { 1937db96d56Sopenharmony_ci quote = (unsigned char)*++s; 1947db96d56Sopenharmony_ci *rawmode = 1; 1957db96d56Sopenharmony_ci } 1967db96d56Sopenharmony_ci else if (quote == 'f' || quote == 'F') { 1977db96d56Sopenharmony_ci quote = (unsigned char)*++s; 1987db96d56Sopenharmony_ci fmode = 1; 1997db96d56Sopenharmony_ci } 2007db96d56Sopenharmony_ci else { 2017db96d56Sopenharmony_ci break; 2027db96d56Sopenharmony_ci } 2037db96d56Sopenharmony_ci } 2047db96d56Sopenharmony_ci } 2057db96d56Sopenharmony_ci 2067db96d56Sopenharmony_ci /* fstrings are only allowed in Python 3.6 and greater */ 2077db96d56Sopenharmony_ci if (fmode && p->feature_version < 6) { 2087db96d56Sopenharmony_ci p->error_indicator = 1; 2097db96d56Sopenharmony_ci RAISE_SYNTAX_ERROR("Format strings are only supported in Python 3.6 and greater"); 2107db96d56Sopenharmony_ci return -1; 2117db96d56Sopenharmony_ci } 2127db96d56Sopenharmony_ci 2137db96d56Sopenharmony_ci if (fmode && *bytesmode) { 2147db96d56Sopenharmony_ci PyErr_BadInternalCall(); 2157db96d56Sopenharmony_ci return -1; 2167db96d56Sopenharmony_ci } 2177db96d56Sopenharmony_ci if (quote != '\'' && quote != '\"') { 2187db96d56Sopenharmony_ci PyErr_BadInternalCall(); 2197db96d56Sopenharmony_ci return -1; 2207db96d56Sopenharmony_ci } 2217db96d56Sopenharmony_ci /* Skip the leading quote char. */ 2227db96d56Sopenharmony_ci s++; 2237db96d56Sopenharmony_ci len = strlen(s); 2247db96d56Sopenharmony_ci if (len > INT_MAX) { 2257db96d56Sopenharmony_ci PyErr_SetString(PyExc_OverflowError, "string to parse is too long"); 2267db96d56Sopenharmony_ci return -1; 2277db96d56Sopenharmony_ci } 2287db96d56Sopenharmony_ci if (s[--len] != quote) { 2297db96d56Sopenharmony_ci /* Last quote char must match the first. */ 2307db96d56Sopenharmony_ci PyErr_BadInternalCall(); 2317db96d56Sopenharmony_ci return -1; 2327db96d56Sopenharmony_ci } 2337db96d56Sopenharmony_ci if (len >= 4 && s[0] == quote && s[1] == quote) { 2347db96d56Sopenharmony_ci /* A triple quoted string. We've already skipped one quote at 2357db96d56Sopenharmony_ci the start and one at the end of the string. Now skip the 2367db96d56Sopenharmony_ci two at the start. */ 2377db96d56Sopenharmony_ci s += 2; 2387db96d56Sopenharmony_ci len -= 2; 2397db96d56Sopenharmony_ci /* And check that the last two match. */ 2407db96d56Sopenharmony_ci if (s[--len] != quote || s[--len] != quote) { 2417db96d56Sopenharmony_ci PyErr_BadInternalCall(); 2427db96d56Sopenharmony_ci return -1; 2437db96d56Sopenharmony_ci } 2447db96d56Sopenharmony_ci } 2457db96d56Sopenharmony_ci 2467db96d56Sopenharmony_ci if (fmode) { 2477db96d56Sopenharmony_ci /* Just return the bytes. The caller will parse the resulting 2487db96d56Sopenharmony_ci string. */ 2497db96d56Sopenharmony_ci *fstr = s; 2507db96d56Sopenharmony_ci *fstrlen = len; 2517db96d56Sopenharmony_ci return 0; 2527db96d56Sopenharmony_ci } 2537db96d56Sopenharmony_ci 2547db96d56Sopenharmony_ci /* Not an f-string. */ 2557db96d56Sopenharmony_ci /* Avoid invoking escape decoding routines if possible. */ 2567db96d56Sopenharmony_ci *rawmode = *rawmode || strchr(s, '\\') == NULL; 2577db96d56Sopenharmony_ci if (*bytesmode) { 2587db96d56Sopenharmony_ci /* Disallow non-ASCII characters. */ 2597db96d56Sopenharmony_ci const char *ch; 2607db96d56Sopenharmony_ci for (ch = s; *ch; ch++) { 2617db96d56Sopenharmony_ci if (Py_CHARMASK(*ch) >= 0x80) { 2627db96d56Sopenharmony_ci RAISE_SYNTAX_ERROR_KNOWN_LOCATION( 2637db96d56Sopenharmony_ci t, 2647db96d56Sopenharmony_ci "bytes can only contain ASCII " 2657db96d56Sopenharmony_ci "literal characters"); 2667db96d56Sopenharmony_ci return -1; 2677db96d56Sopenharmony_ci } 2687db96d56Sopenharmony_ci } 2697db96d56Sopenharmony_ci if (*rawmode) { 2707db96d56Sopenharmony_ci *result = PyBytes_FromStringAndSize(s, len); 2717db96d56Sopenharmony_ci } 2727db96d56Sopenharmony_ci else { 2737db96d56Sopenharmony_ci *result = decode_bytes_with_escapes(p, s, len, t); 2747db96d56Sopenharmony_ci } 2757db96d56Sopenharmony_ci } 2767db96d56Sopenharmony_ci else { 2777db96d56Sopenharmony_ci if (*rawmode) { 2787db96d56Sopenharmony_ci *result = PyUnicode_DecodeUTF8Stateful(s, len, NULL, NULL); 2797db96d56Sopenharmony_ci } 2807db96d56Sopenharmony_ci else { 2817db96d56Sopenharmony_ci *result = decode_unicode_with_escapes(p, s, len, t); 2827db96d56Sopenharmony_ci } 2837db96d56Sopenharmony_ci } 2847db96d56Sopenharmony_ci return *result == NULL ? -1 : 0; 2857db96d56Sopenharmony_ci} 2867db96d56Sopenharmony_ci 2877db96d56Sopenharmony_ci 2887db96d56Sopenharmony_ci 2897db96d56Sopenharmony_ci// FSTRING STUFF 2907db96d56Sopenharmony_ci 2917db96d56Sopenharmony_ci/* Fix locations for the given node and its children. 2927db96d56Sopenharmony_ci 2937db96d56Sopenharmony_ci `parent` is the enclosing node. 2947db96d56Sopenharmony_ci `expr_start` is the starting position of the expression (pointing to the open brace). 2957db96d56Sopenharmony_ci `n` is the node which locations are going to be fixed relative to parent. 2967db96d56Sopenharmony_ci `expr_str` is the child node's string representation, including braces. 2977db96d56Sopenharmony_ci*/ 2987db96d56Sopenharmony_cistatic bool 2997db96d56Sopenharmony_cifstring_find_expr_location(Token *parent, const char* expr_start, char *expr_str, int *p_lines, int *p_cols) 3007db96d56Sopenharmony_ci{ 3017db96d56Sopenharmony_ci *p_lines = 0; 3027db96d56Sopenharmony_ci *p_cols = 0; 3037db96d56Sopenharmony_ci assert(expr_start != NULL && *expr_start == '{'); 3047db96d56Sopenharmony_ci if (parent && parent->bytes) { 3057db96d56Sopenharmony_ci const char *parent_str = PyBytes_AsString(parent->bytes); 3067db96d56Sopenharmony_ci if (!parent_str) { 3077db96d56Sopenharmony_ci return false; 3087db96d56Sopenharmony_ci } 3097db96d56Sopenharmony_ci // The following is needed, in order to correctly shift the column 3107db96d56Sopenharmony_ci // offset, in the case that (disregarding any whitespace) a newline 3117db96d56Sopenharmony_ci // immediately follows the opening curly brace of the fstring expression. 3127db96d56Sopenharmony_ci bool newline_after_brace = 1; 3137db96d56Sopenharmony_ci const char *start = expr_start + 1; 3147db96d56Sopenharmony_ci while (start && *start != '}' && *start != '\n') { 3157db96d56Sopenharmony_ci if (*start != ' ' && *start != '\t' && *start != '\f') { 3167db96d56Sopenharmony_ci newline_after_brace = 0; 3177db96d56Sopenharmony_ci break; 3187db96d56Sopenharmony_ci } 3197db96d56Sopenharmony_ci start++; 3207db96d56Sopenharmony_ci } 3217db96d56Sopenharmony_ci 3227db96d56Sopenharmony_ci // Account for the characters from the last newline character to our 3237db96d56Sopenharmony_ci // left until the beginning of expr_start. 3247db96d56Sopenharmony_ci if (!newline_after_brace) { 3257db96d56Sopenharmony_ci start = expr_start; 3267db96d56Sopenharmony_ci while (start > parent_str && *start != '\n') { 3277db96d56Sopenharmony_ci start--; 3287db96d56Sopenharmony_ci } 3297db96d56Sopenharmony_ci *p_cols += (int)(expr_start - start); 3307db96d56Sopenharmony_ci if (*start == '\n') { 3317db96d56Sopenharmony_ci *p_cols -= 1; 3327db96d56Sopenharmony_ci } 3337db96d56Sopenharmony_ci } 3347db96d56Sopenharmony_ci /* adjust the start based on the number of newlines encountered 3357db96d56Sopenharmony_ci before the f-string expression */ 3367db96d56Sopenharmony_ci for (const char *p = parent_str; p < expr_start; p++) { 3377db96d56Sopenharmony_ci if (*p == '\n') { 3387db96d56Sopenharmony_ci (*p_lines)++; 3397db96d56Sopenharmony_ci } 3407db96d56Sopenharmony_ci } 3417db96d56Sopenharmony_ci } 3427db96d56Sopenharmony_ci return true; 3437db96d56Sopenharmony_ci} 3447db96d56Sopenharmony_ci 3457db96d56Sopenharmony_ci 3467db96d56Sopenharmony_ci/* Compile this expression in to an expr_ty. Add parens around the 3477db96d56Sopenharmony_ci expression, in order to allow leading spaces in the expression. */ 3487db96d56Sopenharmony_cistatic expr_ty 3497db96d56Sopenharmony_cifstring_compile_expr(Parser *p, const char *expr_start, const char *expr_end, 3507db96d56Sopenharmony_ci Token *t) 3517db96d56Sopenharmony_ci{ 3527db96d56Sopenharmony_ci expr_ty expr = NULL; 3537db96d56Sopenharmony_ci char *str; 3547db96d56Sopenharmony_ci Py_ssize_t len; 3557db96d56Sopenharmony_ci const char *s; 3567db96d56Sopenharmony_ci expr_ty result = NULL; 3577db96d56Sopenharmony_ci 3587db96d56Sopenharmony_ci assert(expr_end >= expr_start); 3597db96d56Sopenharmony_ci assert(*(expr_start-1) == '{'); 3607db96d56Sopenharmony_ci assert(*expr_end == '}' || *expr_end == '!' || *expr_end == ':' || 3617db96d56Sopenharmony_ci *expr_end == '='); 3627db96d56Sopenharmony_ci 3637db96d56Sopenharmony_ci /* If the substring is all whitespace, it's an error. We need to catch this 3647db96d56Sopenharmony_ci here, and not when we call PyParser_SimpleParseStringFlagsFilename, 3657db96d56Sopenharmony_ci because turning the expression '' in to '()' would go from being invalid 3667db96d56Sopenharmony_ci to valid. */ 3677db96d56Sopenharmony_ci for (s = expr_start; s != expr_end; s++) { 3687db96d56Sopenharmony_ci char c = *s; 3697db96d56Sopenharmony_ci /* The Python parser ignores only the following whitespace 3707db96d56Sopenharmony_ci characters (\r already is converted to \n). */ 3717db96d56Sopenharmony_ci if (!(c == ' ' || c == '\t' || c == '\n' || c == '\f')) { 3727db96d56Sopenharmony_ci break; 3737db96d56Sopenharmony_ci } 3747db96d56Sopenharmony_ci } 3757db96d56Sopenharmony_ci 3767db96d56Sopenharmony_ci if (s == expr_end) { 3777db96d56Sopenharmony_ci if (*expr_end == '!' || *expr_end == ':' || *expr_end == '=') { 3787db96d56Sopenharmony_ci RAISE_SYNTAX_ERROR("f-string: expression required before '%c'", *expr_end); 3797db96d56Sopenharmony_ci return NULL; 3807db96d56Sopenharmony_ci } 3817db96d56Sopenharmony_ci RAISE_SYNTAX_ERROR("f-string: empty expression not allowed"); 3827db96d56Sopenharmony_ci return NULL; 3837db96d56Sopenharmony_ci } 3847db96d56Sopenharmony_ci 3857db96d56Sopenharmony_ci len = expr_end - expr_start; 3867db96d56Sopenharmony_ci /* Allocate 3 extra bytes: open paren, close paren, null byte. */ 3877db96d56Sopenharmony_ci str = PyMem_Calloc(len + 3, sizeof(char)); 3887db96d56Sopenharmony_ci if (str == NULL) { 3897db96d56Sopenharmony_ci PyErr_NoMemory(); 3907db96d56Sopenharmony_ci return NULL; 3917db96d56Sopenharmony_ci } 3927db96d56Sopenharmony_ci 3937db96d56Sopenharmony_ci // The call to fstring_find_expr_location is responsible for finding the column offset 3947db96d56Sopenharmony_ci // the generated AST nodes need to be shifted to the right, which is equal to the number 3957db96d56Sopenharmony_ci // of the f-string characters before the expression starts. 3967db96d56Sopenharmony_ci memcpy(str+1, expr_start, len); 3977db96d56Sopenharmony_ci int lines, cols; 3987db96d56Sopenharmony_ci if (!fstring_find_expr_location(t, expr_start-1, str+1, &lines, &cols)) { 3997db96d56Sopenharmony_ci PyMem_Free(str); 4007db96d56Sopenharmony_ci return NULL; 4017db96d56Sopenharmony_ci } 4027db96d56Sopenharmony_ci 4037db96d56Sopenharmony_ci // The parentheses are needed in order to allow for leading whitespace within 4047db96d56Sopenharmony_ci // the f-string expression. This consequently gets parsed as a group (see the 4057db96d56Sopenharmony_ci // group rule in python.gram). 4067db96d56Sopenharmony_ci str[0] = '('; 4077db96d56Sopenharmony_ci str[len+1] = ')'; 4087db96d56Sopenharmony_ci 4097db96d56Sopenharmony_ci struct tok_state* tok = _PyTokenizer_FromString(str, 1); 4107db96d56Sopenharmony_ci if (tok == NULL) { 4117db96d56Sopenharmony_ci PyMem_Free(str); 4127db96d56Sopenharmony_ci return NULL; 4137db96d56Sopenharmony_ci } 4147db96d56Sopenharmony_ci Py_INCREF(p->tok->filename); 4157db96d56Sopenharmony_ci 4167db96d56Sopenharmony_ci tok->filename = p->tok->filename; 4177db96d56Sopenharmony_ci tok->lineno = t->lineno + lines - 1; 4187db96d56Sopenharmony_ci 4197db96d56Sopenharmony_ci Parser *p2 = _PyPegen_Parser_New(tok, Py_fstring_input, p->flags, p->feature_version, 4207db96d56Sopenharmony_ci NULL, p->arena); 4217db96d56Sopenharmony_ci 4227db96d56Sopenharmony_ci p2->starting_lineno = t->lineno + lines; 4237db96d56Sopenharmony_ci p2->starting_col_offset = lines != 0 ? cols : t->col_offset + cols; 4247db96d56Sopenharmony_ci 4257db96d56Sopenharmony_ci expr = _PyPegen_run_parser(p2); 4267db96d56Sopenharmony_ci 4277db96d56Sopenharmony_ci if (expr == NULL) { 4287db96d56Sopenharmony_ci goto exit; 4297db96d56Sopenharmony_ci } 4307db96d56Sopenharmony_ci result = expr; 4317db96d56Sopenharmony_ci 4327db96d56Sopenharmony_ciexit: 4337db96d56Sopenharmony_ci PyMem_Free(str); 4347db96d56Sopenharmony_ci _PyPegen_Parser_Free(p2); 4357db96d56Sopenharmony_ci _PyTokenizer_Free(tok); 4367db96d56Sopenharmony_ci return result; 4377db96d56Sopenharmony_ci} 4387db96d56Sopenharmony_ci 4397db96d56Sopenharmony_ci/* Return -1 on error. 4407db96d56Sopenharmony_ci 4417db96d56Sopenharmony_ci Return 0 if we reached the end of the literal. 4427db96d56Sopenharmony_ci 4437db96d56Sopenharmony_ci Return 1 if we haven't reached the end of the literal, but we want 4447db96d56Sopenharmony_ci the caller to process the literal up to this point. Used for 4457db96d56Sopenharmony_ci doubled braces. 4467db96d56Sopenharmony_ci*/ 4477db96d56Sopenharmony_cistatic int 4487db96d56Sopenharmony_cifstring_find_literal(Parser *p, const char **str, const char *end, int raw, 4497db96d56Sopenharmony_ci PyObject **literal, int recurse_lvl, Token *t) 4507db96d56Sopenharmony_ci{ 4517db96d56Sopenharmony_ci /* Get any literal string. It ends when we hit an un-doubled left 4527db96d56Sopenharmony_ci brace (which isn't part of a unicode name escape such as 4537db96d56Sopenharmony_ci "\N{EULER CONSTANT}"), or the end of the string. */ 4547db96d56Sopenharmony_ci 4557db96d56Sopenharmony_ci const char *s = *str; 4567db96d56Sopenharmony_ci const char *literal_start = s; 4577db96d56Sopenharmony_ci int result = 0; 4587db96d56Sopenharmony_ci 4597db96d56Sopenharmony_ci assert(*literal == NULL); 4607db96d56Sopenharmony_ci while (s < end) { 4617db96d56Sopenharmony_ci char ch = *s++; 4627db96d56Sopenharmony_ci if (!raw && ch == '\\' && s < end) { 4637db96d56Sopenharmony_ci ch = *s++; 4647db96d56Sopenharmony_ci if (ch == 'N') { 4657db96d56Sopenharmony_ci /* We need to look at and skip matching braces for "\N{name}" 4667db96d56Sopenharmony_ci sequences because otherwise we'll think the opening '{' 4677db96d56Sopenharmony_ci starts an expression, which is not the case with "\N". 4687db96d56Sopenharmony_ci Keep looking for either a matched '{' '}' pair, or the end 4697db96d56Sopenharmony_ci of the string. */ 4707db96d56Sopenharmony_ci 4717db96d56Sopenharmony_ci if (s < end && *s++ == '{') { 4727db96d56Sopenharmony_ci while (s < end && *s++ != '}') { 4737db96d56Sopenharmony_ci } 4747db96d56Sopenharmony_ci continue; 4757db96d56Sopenharmony_ci } 4767db96d56Sopenharmony_ci 4777db96d56Sopenharmony_ci /* This is an invalid "\N" sequence, since it's a "\N" not 4787db96d56Sopenharmony_ci followed by a "{". Just keep parsing this literal. This 4797db96d56Sopenharmony_ci error will be caught later by 4807db96d56Sopenharmony_ci decode_unicode_with_escapes(). */ 4817db96d56Sopenharmony_ci continue; 4827db96d56Sopenharmony_ci } 4837db96d56Sopenharmony_ci if (ch == '{' && warn_invalid_escape_sequence(p, s-1, t) < 0) { 4847db96d56Sopenharmony_ci return -1; 4857db96d56Sopenharmony_ci } 4867db96d56Sopenharmony_ci } 4877db96d56Sopenharmony_ci if (ch == '{' || ch == '}') { 4887db96d56Sopenharmony_ci /* Check for doubled braces, but only at the top level. If 4897db96d56Sopenharmony_ci we checked at every level, then f'{0:{3}}' would fail 4907db96d56Sopenharmony_ci with the two closing braces. */ 4917db96d56Sopenharmony_ci if (recurse_lvl == 0) { 4927db96d56Sopenharmony_ci if (s < end && *s == ch) { 4937db96d56Sopenharmony_ci /* We're going to tell the caller that the literal ends 4947db96d56Sopenharmony_ci here, but that they should continue scanning. But also 4957db96d56Sopenharmony_ci skip over the second brace when we resume scanning. */ 4967db96d56Sopenharmony_ci *str = s + 1; 4977db96d56Sopenharmony_ci result = 1; 4987db96d56Sopenharmony_ci goto done; 4997db96d56Sopenharmony_ci } 5007db96d56Sopenharmony_ci 5017db96d56Sopenharmony_ci /* Where a single '{' is the start of a new expression, a 5027db96d56Sopenharmony_ci single '}' is not allowed. */ 5037db96d56Sopenharmony_ci if (ch == '}') { 5047db96d56Sopenharmony_ci *str = s - 1; 5057db96d56Sopenharmony_ci RAISE_SYNTAX_ERROR("f-string: single '}' is not allowed"); 5067db96d56Sopenharmony_ci return -1; 5077db96d56Sopenharmony_ci } 5087db96d56Sopenharmony_ci } 5097db96d56Sopenharmony_ci /* We're either at a '{', which means we're starting another 5107db96d56Sopenharmony_ci expression; or a '}', which means we're at the end of this 5117db96d56Sopenharmony_ci f-string (for a nested format_spec). */ 5127db96d56Sopenharmony_ci s--; 5137db96d56Sopenharmony_ci break; 5147db96d56Sopenharmony_ci } 5157db96d56Sopenharmony_ci } 5167db96d56Sopenharmony_ci *str = s; 5177db96d56Sopenharmony_ci assert(s <= end); 5187db96d56Sopenharmony_ci assert(s == end || *s == '{' || *s == '}'); 5197db96d56Sopenharmony_cidone: 5207db96d56Sopenharmony_ci if (literal_start != s) { 5217db96d56Sopenharmony_ci if (raw) { 5227db96d56Sopenharmony_ci *literal = PyUnicode_DecodeUTF8Stateful(literal_start, 5237db96d56Sopenharmony_ci s - literal_start, 5247db96d56Sopenharmony_ci NULL, NULL); 5257db96d56Sopenharmony_ci } 5267db96d56Sopenharmony_ci else { 5277db96d56Sopenharmony_ci *literal = decode_unicode_with_escapes(p, literal_start, 5287db96d56Sopenharmony_ci s - literal_start, t); 5297db96d56Sopenharmony_ci } 5307db96d56Sopenharmony_ci if (!*literal) { 5317db96d56Sopenharmony_ci return -1; 5327db96d56Sopenharmony_ci } 5337db96d56Sopenharmony_ci } 5347db96d56Sopenharmony_ci return result; 5357db96d56Sopenharmony_ci} 5367db96d56Sopenharmony_ci 5377db96d56Sopenharmony_ci/* Forward declaration because parsing is recursive. */ 5387db96d56Sopenharmony_cistatic expr_ty 5397db96d56Sopenharmony_cifstring_parse(Parser *p, const char **str, const char *end, int raw, int recurse_lvl, 5407db96d56Sopenharmony_ci Token *first_token, Token* t, Token *last_token); 5417db96d56Sopenharmony_ci 5427db96d56Sopenharmony_ci/* Parse the f-string at *str, ending at end. We know *str starts an 5437db96d56Sopenharmony_ci expression (so it must be a '{'). Returns the FormattedValue node, which 5447db96d56Sopenharmony_ci includes the expression, conversion character, format_spec expression, and 5457db96d56Sopenharmony_ci optionally the text of the expression (if = is used). 5467db96d56Sopenharmony_ci 5477db96d56Sopenharmony_ci Note that I don't do a perfect job here: I don't make sure that a 5487db96d56Sopenharmony_ci closing brace doesn't match an opening paren, for example. It 5497db96d56Sopenharmony_ci doesn't need to error on all invalid expressions, just correctly 5507db96d56Sopenharmony_ci find the end of all valid ones. Any errors inside the expression 5517db96d56Sopenharmony_ci will be caught when we parse it later. 5527db96d56Sopenharmony_ci 5537db96d56Sopenharmony_ci *expression is set to the expression. For an '=' "debug" expression, 5547db96d56Sopenharmony_ci *expr_text is set to the debug text (the original text of the expression, 5557db96d56Sopenharmony_ci including the '=' and any whitespace around it, as a string object). If 5567db96d56Sopenharmony_ci not a debug expression, *expr_text set to NULL. */ 5577db96d56Sopenharmony_cistatic int 5587db96d56Sopenharmony_cifstring_find_expr(Parser *p, const char **str, const char *end, int raw, int recurse_lvl, 5597db96d56Sopenharmony_ci PyObject **expr_text, expr_ty *expression, Token *first_token, 5607db96d56Sopenharmony_ci Token *t, Token *last_token) 5617db96d56Sopenharmony_ci{ 5627db96d56Sopenharmony_ci /* Return -1 on error, else 0. */ 5637db96d56Sopenharmony_ci 5647db96d56Sopenharmony_ci const char *expr_start; 5657db96d56Sopenharmony_ci const char *expr_end; 5667db96d56Sopenharmony_ci expr_ty simple_expression; 5677db96d56Sopenharmony_ci expr_ty format_spec = NULL; /* Optional format specifier. */ 5687db96d56Sopenharmony_ci int conversion = -1; /* The conversion char. Use default if not 5697db96d56Sopenharmony_ci specified, or !r if using = and no format 5707db96d56Sopenharmony_ci spec. */ 5717db96d56Sopenharmony_ci 5727db96d56Sopenharmony_ci /* 0 if we're not in a string, else the quote char we're trying to 5737db96d56Sopenharmony_ci match (single or double quote). */ 5747db96d56Sopenharmony_ci char quote_char = 0; 5757db96d56Sopenharmony_ci 5767db96d56Sopenharmony_ci /* If we're inside a string, 1=normal, 3=triple-quoted. */ 5777db96d56Sopenharmony_ci int string_type = 0; 5787db96d56Sopenharmony_ci 5797db96d56Sopenharmony_ci /* Keep track of nesting level for braces/parens/brackets in 5807db96d56Sopenharmony_ci expressions. */ 5817db96d56Sopenharmony_ci Py_ssize_t nested_depth = 0; 5827db96d56Sopenharmony_ci char parenstack[MAXLEVEL]; 5837db96d56Sopenharmony_ci 5847db96d56Sopenharmony_ci *expr_text = NULL; 5857db96d56Sopenharmony_ci 5867db96d56Sopenharmony_ci /* Can only nest one level deep. */ 5877db96d56Sopenharmony_ci if (recurse_lvl >= 2) { 5887db96d56Sopenharmony_ci RAISE_SYNTAX_ERROR("f-string: expressions nested too deeply"); 5897db96d56Sopenharmony_ci goto error; 5907db96d56Sopenharmony_ci } 5917db96d56Sopenharmony_ci 5927db96d56Sopenharmony_ci /* The first char must be a left brace, or we wouldn't have gotten 5937db96d56Sopenharmony_ci here. Skip over it. */ 5947db96d56Sopenharmony_ci assert(**str == '{'); 5957db96d56Sopenharmony_ci *str += 1; 5967db96d56Sopenharmony_ci 5977db96d56Sopenharmony_ci expr_start = *str; 5987db96d56Sopenharmony_ci for (; *str < end; (*str)++) { 5997db96d56Sopenharmony_ci char ch; 6007db96d56Sopenharmony_ci 6017db96d56Sopenharmony_ci /* Loop invariants. */ 6027db96d56Sopenharmony_ci assert(nested_depth >= 0); 6037db96d56Sopenharmony_ci assert(*str >= expr_start && *str < end); 6047db96d56Sopenharmony_ci if (quote_char) { 6057db96d56Sopenharmony_ci assert(string_type == 1 || string_type == 3); 6067db96d56Sopenharmony_ci } else { 6077db96d56Sopenharmony_ci assert(string_type == 0); 6087db96d56Sopenharmony_ci } 6097db96d56Sopenharmony_ci 6107db96d56Sopenharmony_ci ch = **str; 6117db96d56Sopenharmony_ci /* Nowhere inside an expression is a backslash allowed. */ 6127db96d56Sopenharmony_ci if (ch == '\\') { 6137db96d56Sopenharmony_ci /* Error: can't include a backslash character, inside 6147db96d56Sopenharmony_ci parens or strings or not. */ 6157db96d56Sopenharmony_ci RAISE_SYNTAX_ERROR( 6167db96d56Sopenharmony_ci "f-string expression part " 6177db96d56Sopenharmony_ci "cannot include a backslash"); 6187db96d56Sopenharmony_ci goto error; 6197db96d56Sopenharmony_ci } 6207db96d56Sopenharmony_ci if (quote_char) { 6217db96d56Sopenharmony_ci /* We're inside a string. See if we're at the end. */ 6227db96d56Sopenharmony_ci /* This code needs to implement the same non-error logic 6237db96d56Sopenharmony_ci as tok_get from tokenizer.c, at the letter_quote 6247db96d56Sopenharmony_ci label. To actually share that code would be a 6257db96d56Sopenharmony_ci nightmare. But, it's unlikely to change and is small, 6267db96d56Sopenharmony_ci so duplicate it here. Note we don't need to catch all 6277db96d56Sopenharmony_ci of the errors, since they'll be caught when parsing the 6287db96d56Sopenharmony_ci expression. We just need to match the non-error 6297db96d56Sopenharmony_ci cases. Thus we can ignore \n in single-quoted strings, 6307db96d56Sopenharmony_ci for example. Or non-terminated strings. */ 6317db96d56Sopenharmony_ci if (ch == quote_char) { 6327db96d56Sopenharmony_ci /* Does this match the string_type (single or triple 6337db96d56Sopenharmony_ci quoted)? */ 6347db96d56Sopenharmony_ci if (string_type == 3) { 6357db96d56Sopenharmony_ci if (*str+2 < end && *(*str+1) == ch && *(*str+2) == ch) { 6367db96d56Sopenharmony_ci /* We're at the end of a triple quoted string. */ 6377db96d56Sopenharmony_ci *str += 2; 6387db96d56Sopenharmony_ci string_type = 0; 6397db96d56Sopenharmony_ci quote_char = 0; 6407db96d56Sopenharmony_ci continue; 6417db96d56Sopenharmony_ci } 6427db96d56Sopenharmony_ci } else { 6437db96d56Sopenharmony_ci /* We're at the end of a normal string. */ 6447db96d56Sopenharmony_ci quote_char = 0; 6457db96d56Sopenharmony_ci string_type = 0; 6467db96d56Sopenharmony_ci continue; 6477db96d56Sopenharmony_ci } 6487db96d56Sopenharmony_ci } 6497db96d56Sopenharmony_ci } else if (ch == '\'' || ch == '"') { 6507db96d56Sopenharmony_ci /* Is this a triple quoted string? */ 6517db96d56Sopenharmony_ci if (*str+2 < end && *(*str+1) == ch && *(*str+2) == ch) { 6527db96d56Sopenharmony_ci string_type = 3; 6537db96d56Sopenharmony_ci *str += 2; 6547db96d56Sopenharmony_ci } else { 6557db96d56Sopenharmony_ci /* Start of a normal string. */ 6567db96d56Sopenharmony_ci string_type = 1; 6577db96d56Sopenharmony_ci } 6587db96d56Sopenharmony_ci /* Start looking for the end of the string. */ 6597db96d56Sopenharmony_ci quote_char = ch; 6607db96d56Sopenharmony_ci } else if (ch == '[' || ch == '{' || ch == '(') { 6617db96d56Sopenharmony_ci if (nested_depth >= MAXLEVEL) { 6627db96d56Sopenharmony_ci RAISE_SYNTAX_ERROR("f-string: too many nested parenthesis"); 6637db96d56Sopenharmony_ci goto error; 6647db96d56Sopenharmony_ci } 6657db96d56Sopenharmony_ci parenstack[nested_depth] = ch; 6667db96d56Sopenharmony_ci nested_depth++; 6677db96d56Sopenharmony_ci } else if (ch == '#') { 6687db96d56Sopenharmony_ci /* Error: can't include a comment character, inside parens 6697db96d56Sopenharmony_ci or not. */ 6707db96d56Sopenharmony_ci RAISE_SYNTAX_ERROR("f-string expression part cannot include '#'"); 6717db96d56Sopenharmony_ci goto error; 6727db96d56Sopenharmony_ci } else if (nested_depth == 0 && 6737db96d56Sopenharmony_ci (ch == '!' || ch == ':' || ch == '}' || 6747db96d56Sopenharmony_ci ch == '=' || ch == '>' || ch == '<')) { 6757db96d56Sopenharmony_ci /* See if there's a next character. */ 6767db96d56Sopenharmony_ci if (*str+1 < end) { 6777db96d56Sopenharmony_ci char next = *(*str+1); 6787db96d56Sopenharmony_ci 6797db96d56Sopenharmony_ci /* For "!=". since '=' is not an allowed conversion character, 6807db96d56Sopenharmony_ci nothing is lost in this test. */ 6817db96d56Sopenharmony_ci if ((ch == '!' && next == '=') || /* != */ 6827db96d56Sopenharmony_ci (ch == '=' && next == '=') || /* == */ 6837db96d56Sopenharmony_ci (ch == '<' && next == '=') || /* <= */ 6847db96d56Sopenharmony_ci (ch == '>' && next == '=') /* >= */ 6857db96d56Sopenharmony_ci ) { 6867db96d56Sopenharmony_ci *str += 1; 6877db96d56Sopenharmony_ci continue; 6887db96d56Sopenharmony_ci } 6897db96d56Sopenharmony_ci } 6907db96d56Sopenharmony_ci /* Don't get out of the loop for these, if they're single 6917db96d56Sopenharmony_ci chars (not part of 2-char tokens). If by themselves, they 6927db96d56Sopenharmony_ci don't end an expression (unlike say '!'). */ 6937db96d56Sopenharmony_ci if (ch == '>' || ch == '<') { 6947db96d56Sopenharmony_ci continue; 6957db96d56Sopenharmony_ci } 6967db96d56Sopenharmony_ci 6977db96d56Sopenharmony_ci /* Normal way out of this loop. */ 6987db96d56Sopenharmony_ci break; 6997db96d56Sopenharmony_ci } else if (ch == ']' || ch == '}' || ch == ')') { 7007db96d56Sopenharmony_ci if (!nested_depth) { 7017db96d56Sopenharmony_ci RAISE_SYNTAX_ERROR("f-string: unmatched '%c'", ch); 7027db96d56Sopenharmony_ci goto error; 7037db96d56Sopenharmony_ci } 7047db96d56Sopenharmony_ci nested_depth--; 7057db96d56Sopenharmony_ci int opening = (unsigned char)parenstack[nested_depth]; 7067db96d56Sopenharmony_ci if (!((opening == '(' && ch == ')') || 7077db96d56Sopenharmony_ci (opening == '[' && ch == ']') || 7087db96d56Sopenharmony_ci (opening == '{' && ch == '}'))) 7097db96d56Sopenharmony_ci { 7107db96d56Sopenharmony_ci RAISE_SYNTAX_ERROR( 7117db96d56Sopenharmony_ci "f-string: closing parenthesis '%c' " 7127db96d56Sopenharmony_ci "does not match opening parenthesis '%c'", 7137db96d56Sopenharmony_ci ch, opening); 7147db96d56Sopenharmony_ci goto error; 7157db96d56Sopenharmony_ci } 7167db96d56Sopenharmony_ci } else { 7177db96d56Sopenharmony_ci /* Just consume this char and loop around. */ 7187db96d56Sopenharmony_ci } 7197db96d56Sopenharmony_ci } 7207db96d56Sopenharmony_ci expr_end = *str; 7217db96d56Sopenharmony_ci /* If we leave the above loop in a string or with mismatched parens, we 7227db96d56Sopenharmony_ci don't really care. We'll get a syntax error when compiling the 7237db96d56Sopenharmony_ci expression. But, we can produce a better error message, so let's just 7247db96d56Sopenharmony_ci do that.*/ 7257db96d56Sopenharmony_ci if (quote_char) { 7267db96d56Sopenharmony_ci RAISE_SYNTAX_ERROR("f-string: unterminated string"); 7277db96d56Sopenharmony_ci goto error; 7287db96d56Sopenharmony_ci } 7297db96d56Sopenharmony_ci if (nested_depth) { 7307db96d56Sopenharmony_ci int opening = (unsigned char)parenstack[nested_depth - 1]; 7317db96d56Sopenharmony_ci RAISE_SYNTAX_ERROR("f-string: unmatched '%c'", opening); 7327db96d56Sopenharmony_ci goto error; 7337db96d56Sopenharmony_ci } 7347db96d56Sopenharmony_ci 7357db96d56Sopenharmony_ci if (*str >= end) { 7367db96d56Sopenharmony_ci goto unexpected_end_of_string; 7377db96d56Sopenharmony_ci } 7387db96d56Sopenharmony_ci 7397db96d56Sopenharmony_ci /* Compile the expression as soon as possible, so we show errors 7407db96d56Sopenharmony_ci related to the expression before errors related to the 7417db96d56Sopenharmony_ci conversion or format_spec. */ 7427db96d56Sopenharmony_ci simple_expression = fstring_compile_expr(p, expr_start, expr_end, t); 7437db96d56Sopenharmony_ci if (!simple_expression) { 7447db96d56Sopenharmony_ci goto error; 7457db96d56Sopenharmony_ci } 7467db96d56Sopenharmony_ci 7477db96d56Sopenharmony_ci /* Check for =, which puts the text value of the expression in 7487db96d56Sopenharmony_ci expr_text. */ 7497db96d56Sopenharmony_ci if (**str == '=') { 7507db96d56Sopenharmony_ci if (p->feature_version < 8) { 7517db96d56Sopenharmony_ci RAISE_SYNTAX_ERROR("f-string: self documenting expressions are " 7527db96d56Sopenharmony_ci "only supported in Python 3.8 and greater"); 7537db96d56Sopenharmony_ci goto error; 7547db96d56Sopenharmony_ci } 7557db96d56Sopenharmony_ci *str += 1; 7567db96d56Sopenharmony_ci 7577db96d56Sopenharmony_ci /* Skip over ASCII whitespace. No need to test for end of string 7587db96d56Sopenharmony_ci here, since we know there's at least a trailing quote somewhere 7597db96d56Sopenharmony_ci ahead. */ 7607db96d56Sopenharmony_ci while (Py_ISSPACE(**str)) { 7617db96d56Sopenharmony_ci *str += 1; 7627db96d56Sopenharmony_ci } 7637db96d56Sopenharmony_ci if (*str >= end) { 7647db96d56Sopenharmony_ci goto unexpected_end_of_string; 7657db96d56Sopenharmony_ci } 7667db96d56Sopenharmony_ci /* Set *expr_text to the text of the expression. */ 7677db96d56Sopenharmony_ci *expr_text = PyUnicode_FromStringAndSize(expr_start, *str-expr_start); 7687db96d56Sopenharmony_ci if (!*expr_text) { 7697db96d56Sopenharmony_ci goto error; 7707db96d56Sopenharmony_ci } 7717db96d56Sopenharmony_ci } 7727db96d56Sopenharmony_ci 7737db96d56Sopenharmony_ci /* Check for a conversion char, if present. */ 7747db96d56Sopenharmony_ci if (**str == '!') { 7757db96d56Sopenharmony_ci *str += 1; 7767db96d56Sopenharmony_ci if (*str >= end) { 7777db96d56Sopenharmony_ci goto unexpected_end_of_string; 7787db96d56Sopenharmony_ci } 7797db96d56Sopenharmony_ci 7807db96d56Sopenharmony_ci conversion = (unsigned char)**str; 7817db96d56Sopenharmony_ci *str += 1; 7827db96d56Sopenharmony_ci 7837db96d56Sopenharmony_ci /* Validate the conversion. */ 7847db96d56Sopenharmony_ci if (!(conversion == 's' || conversion == 'r' || conversion == 'a')) { 7857db96d56Sopenharmony_ci RAISE_SYNTAX_ERROR( 7867db96d56Sopenharmony_ci "f-string: invalid conversion character: " 7877db96d56Sopenharmony_ci "expected 's', 'r', or 'a'"); 7887db96d56Sopenharmony_ci goto error; 7897db96d56Sopenharmony_ci } 7907db96d56Sopenharmony_ci 7917db96d56Sopenharmony_ci } 7927db96d56Sopenharmony_ci 7937db96d56Sopenharmony_ci /* Check for the format spec, if present. */ 7947db96d56Sopenharmony_ci if (*str >= end) { 7957db96d56Sopenharmony_ci goto unexpected_end_of_string; 7967db96d56Sopenharmony_ci } 7977db96d56Sopenharmony_ci if (**str == ':') { 7987db96d56Sopenharmony_ci *str += 1; 7997db96d56Sopenharmony_ci if (*str >= end) { 8007db96d56Sopenharmony_ci goto unexpected_end_of_string; 8017db96d56Sopenharmony_ci } 8027db96d56Sopenharmony_ci 8037db96d56Sopenharmony_ci /* Parse the format spec. */ 8047db96d56Sopenharmony_ci format_spec = fstring_parse(p, str, end, raw, recurse_lvl+1, 8057db96d56Sopenharmony_ci first_token, t, last_token); 8067db96d56Sopenharmony_ci if (!format_spec) { 8077db96d56Sopenharmony_ci goto error; 8087db96d56Sopenharmony_ci } 8097db96d56Sopenharmony_ci } 8107db96d56Sopenharmony_ci 8117db96d56Sopenharmony_ci if (*str >= end || **str != '}') { 8127db96d56Sopenharmony_ci goto unexpected_end_of_string; 8137db96d56Sopenharmony_ci } 8147db96d56Sopenharmony_ci 8157db96d56Sopenharmony_ci /* We're at a right brace. Consume it. */ 8167db96d56Sopenharmony_ci assert(*str < end); 8177db96d56Sopenharmony_ci assert(**str == '}'); 8187db96d56Sopenharmony_ci *str += 1; 8197db96d56Sopenharmony_ci 8207db96d56Sopenharmony_ci /* If we're in = mode (detected by non-NULL expr_text), and have no format 8217db96d56Sopenharmony_ci spec and no explicit conversion, set the conversion to 'r'. */ 8227db96d56Sopenharmony_ci if (*expr_text && format_spec == NULL && conversion == -1) { 8237db96d56Sopenharmony_ci conversion = 'r'; 8247db96d56Sopenharmony_ci } 8257db96d56Sopenharmony_ci 8267db96d56Sopenharmony_ci /* And now create the FormattedValue node that represents this 8277db96d56Sopenharmony_ci entire expression with the conversion and format spec. */ 8287db96d56Sopenharmony_ci //TODO: Fix this 8297db96d56Sopenharmony_ci *expression = _PyAST_FormattedValue(simple_expression, conversion, 8307db96d56Sopenharmony_ci format_spec, first_token->lineno, 8317db96d56Sopenharmony_ci first_token->col_offset, 8327db96d56Sopenharmony_ci last_token->end_lineno, 8337db96d56Sopenharmony_ci last_token->end_col_offset, p->arena); 8347db96d56Sopenharmony_ci if (!*expression) { 8357db96d56Sopenharmony_ci goto error; 8367db96d56Sopenharmony_ci } 8377db96d56Sopenharmony_ci 8387db96d56Sopenharmony_ci return 0; 8397db96d56Sopenharmony_ci 8407db96d56Sopenharmony_ciunexpected_end_of_string: 8417db96d56Sopenharmony_ci RAISE_SYNTAX_ERROR("f-string: expecting '}'"); 8427db96d56Sopenharmony_ci /* Falls through to error. */ 8437db96d56Sopenharmony_ci 8447db96d56Sopenharmony_cierror: 8457db96d56Sopenharmony_ci Py_XDECREF(*expr_text); 8467db96d56Sopenharmony_ci return -1; 8477db96d56Sopenharmony_ci 8487db96d56Sopenharmony_ci} 8497db96d56Sopenharmony_ci 8507db96d56Sopenharmony_ci/* Return -1 on error. 8517db96d56Sopenharmony_ci 8527db96d56Sopenharmony_ci Return 0 if we have a literal (possible zero length) and an 8537db96d56Sopenharmony_ci expression (zero length if at the end of the string. 8547db96d56Sopenharmony_ci 8557db96d56Sopenharmony_ci Return 1 if we have a literal, but no expression, and we want the 8567db96d56Sopenharmony_ci caller to call us again. This is used to deal with doubled 8577db96d56Sopenharmony_ci braces. 8587db96d56Sopenharmony_ci 8597db96d56Sopenharmony_ci When called multiple times on the string 'a{{b{0}c', this function 8607db96d56Sopenharmony_ci will return: 8617db96d56Sopenharmony_ci 8627db96d56Sopenharmony_ci 1. the literal 'a{' with no expression, and a return value 8637db96d56Sopenharmony_ci of 1. Despite the fact that there's no expression, the return 8647db96d56Sopenharmony_ci value of 1 means we're not finished yet. 8657db96d56Sopenharmony_ci 8667db96d56Sopenharmony_ci 2. the literal 'b' and the expression '0', with a return value of 8677db96d56Sopenharmony_ci 0. The fact that there's an expression means we're not finished. 8687db96d56Sopenharmony_ci 8697db96d56Sopenharmony_ci 3. literal 'c' with no expression and a return value of 0. The 8707db96d56Sopenharmony_ci combination of the return value of 0 with no expression means 8717db96d56Sopenharmony_ci we're finished. 8727db96d56Sopenharmony_ci*/ 8737db96d56Sopenharmony_cistatic int 8747db96d56Sopenharmony_cifstring_find_literal_and_expr(Parser *p, const char **str, const char *end, int raw, 8757db96d56Sopenharmony_ci int recurse_lvl, PyObject **literal, 8767db96d56Sopenharmony_ci PyObject **expr_text, expr_ty *expression, 8777db96d56Sopenharmony_ci Token *first_token, Token *t, Token *last_token) 8787db96d56Sopenharmony_ci{ 8797db96d56Sopenharmony_ci int result; 8807db96d56Sopenharmony_ci 8817db96d56Sopenharmony_ci assert(*literal == NULL && *expression == NULL); 8827db96d56Sopenharmony_ci 8837db96d56Sopenharmony_ci /* Get any literal string. */ 8847db96d56Sopenharmony_ci result = fstring_find_literal(p, str, end, raw, literal, recurse_lvl, t); 8857db96d56Sopenharmony_ci if (result < 0) { 8867db96d56Sopenharmony_ci goto error; 8877db96d56Sopenharmony_ci } 8887db96d56Sopenharmony_ci 8897db96d56Sopenharmony_ci assert(result == 0 || result == 1); 8907db96d56Sopenharmony_ci 8917db96d56Sopenharmony_ci if (result == 1) { 8927db96d56Sopenharmony_ci /* We have a literal, but don't look at the expression. */ 8937db96d56Sopenharmony_ci return 1; 8947db96d56Sopenharmony_ci } 8957db96d56Sopenharmony_ci 8967db96d56Sopenharmony_ci if (*str >= end || **str == '}') { 8977db96d56Sopenharmony_ci /* We're at the end of the string or the end of a nested 8987db96d56Sopenharmony_ci f-string: no expression. The top-level error case where we 8997db96d56Sopenharmony_ci expect to be at the end of the string but we're at a '}' is 9007db96d56Sopenharmony_ci handled later. */ 9017db96d56Sopenharmony_ci return 0; 9027db96d56Sopenharmony_ci } 9037db96d56Sopenharmony_ci 9047db96d56Sopenharmony_ci /* We must now be the start of an expression, on a '{'. */ 9057db96d56Sopenharmony_ci assert(**str == '{'); 9067db96d56Sopenharmony_ci 9077db96d56Sopenharmony_ci if (fstring_find_expr(p, str, end, raw, recurse_lvl, expr_text, 9087db96d56Sopenharmony_ci expression, first_token, t, last_token) < 0) { 9097db96d56Sopenharmony_ci goto error; 9107db96d56Sopenharmony_ci } 9117db96d56Sopenharmony_ci 9127db96d56Sopenharmony_ci return 0; 9137db96d56Sopenharmony_ci 9147db96d56Sopenharmony_cierror: 9157db96d56Sopenharmony_ci Py_CLEAR(*literal); 9167db96d56Sopenharmony_ci return -1; 9177db96d56Sopenharmony_ci} 9187db96d56Sopenharmony_ci 9197db96d56Sopenharmony_ci#ifdef NDEBUG 9207db96d56Sopenharmony_ci#define ExprList_check_invariants(l) 9217db96d56Sopenharmony_ci#else 9227db96d56Sopenharmony_cistatic void 9237db96d56Sopenharmony_ciExprList_check_invariants(ExprList *l) 9247db96d56Sopenharmony_ci{ 9257db96d56Sopenharmony_ci /* Check our invariants. Make sure this object is "live", and 9267db96d56Sopenharmony_ci hasn't been deallocated. */ 9277db96d56Sopenharmony_ci assert(l->size >= 0); 9287db96d56Sopenharmony_ci assert(l->p != NULL); 9297db96d56Sopenharmony_ci if (l->size <= EXPRLIST_N_CACHED) { 9307db96d56Sopenharmony_ci assert(l->data == l->p); 9317db96d56Sopenharmony_ci } 9327db96d56Sopenharmony_ci} 9337db96d56Sopenharmony_ci#endif 9347db96d56Sopenharmony_ci 9357db96d56Sopenharmony_cistatic void 9367db96d56Sopenharmony_ciExprList_Init(ExprList *l) 9377db96d56Sopenharmony_ci{ 9387db96d56Sopenharmony_ci l->allocated = EXPRLIST_N_CACHED; 9397db96d56Sopenharmony_ci l->size = 0; 9407db96d56Sopenharmony_ci 9417db96d56Sopenharmony_ci /* Until we start allocating dynamically, p points to data. */ 9427db96d56Sopenharmony_ci l->p = l->data; 9437db96d56Sopenharmony_ci 9447db96d56Sopenharmony_ci ExprList_check_invariants(l); 9457db96d56Sopenharmony_ci} 9467db96d56Sopenharmony_ci 9477db96d56Sopenharmony_cistatic int 9487db96d56Sopenharmony_ciExprList_Append(ExprList *l, expr_ty exp) 9497db96d56Sopenharmony_ci{ 9507db96d56Sopenharmony_ci ExprList_check_invariants(l); 9517db96d56Sopenharmony_ci if (l->size >= l->allocated) { 9527db96d56Sopenharmony_ci /* We need to alloc (or realloc) the memory. */ 9537db96d56Sopenharmony_ci Py_ssize_t new_size = l->allocated * 2; 9547db96d56Sopenharmony_ci 9557db96d56Sopenharmony_ci /* See if we've ever allocated anything dynamically. */ 9567db96d56Sopenharmony_ci if (l->p == l->data) { 9577db96d56Sopenharmony_ci Py_ssize_t i; 9587db96d56Sopenharmony_ci /* We're still using the cached data. Switch to 9597db96d56Sopenharmony_ci alloc-ing. */ 9607db96d56Sopenharmony_ci l->p = PyMem_Malloc(sizeof(expr_ty) * new_size); 9617db96d56Sopenharmony_ci if (!l->p) { 9627db96d56Sopenharmony_ci return -1; 9637db96d56Sopenharmony_ci } 9647db96d56Sopenharmony_ci /* Copy the cached data into the new buffer. */ 9657db96d56Sopenharmony_ci for (i = 0; i < l->size; i++) { 9667db96d56Sopenharmony_ci l->p[i] = l->data[i]; 9677db96d56Sopenharmony_ci } 9687db96d56Sopenharmony_ci } else { 9697db96d56Sopenharmony_ci /* Just realloc. */ 9707db96d56Sopenharmony_ci expr_ty *tmp = PyMem_Realloc(l->p, sizeof(expr_ty) * new_size); 9717db96d56Sopenharmony_ci if (!tmp) { 9727db96d56Sopenharmony_ci PyMem_Free(l->p); 9737db96d56Sopenharmony_ci l->p = NULL; 9747db96d56Sopenharmony_ci return -1; 9757db96d56Sopenharmony_ci } 9767db96d56Sopenharmony_ci l->p = tmp; 9777db96d56Sopenharmony_ci } 9787db96d56Sopenharmony_ci 9797db96d56Sopenharmony_ci l->allocated = new_size; 9807db96d56Sopenharmony_ci assert(l->allocated == 2 * l->size); 9817db96d56Sopenharmony_ci } 9827db96d56Sopenharmony_ci 9837db96d56Sopenharmony_ci l->p[l->size++] = exp; 9847db96d56Sopenharmony_ci 9857db96d56Sopenharmony_ci ExprList_check_invariants(l); 9867db96d56Sopenharmony_ci return 0; 9877db96d56Sopenharmony_ci} 9887db96d56Sopenharmony_ci 9897db96d56Sopenharmony_cistatic void 9907db96d56Sopenharmony_ciExprList_Dealloc(ExprList *l) 9917db96d56Sopenharmony_ci{ 9927db96d56Sopenharmony_ci ExprList_check_invariants(l); 9937db96d56Sopenharmony_ci 9947db96d56Sopenharmony_ci /* If there's been an error, or we've never dynamically allocated, 9957db96d56Sopenharmony_ci do nothing. */ 9967db96d56Sopenharmony_ci if (!l->p || l->p == l->data) { 9977db96d56Sopenharmony_ci /* Do nothing. */ 9987db96d56Sopenharmony_ci } else { 9997db96d56Sopenharmony_ci /* We have dynamically allocated. Free the memory. */ 10007db96d56Sopenharmony_ci PyMem_Free(l->p); 10017db96d56Sopenharmony_ci } 10027db96d56Sopenharmony_ci l->p = NULL; 10037db96d56Sopenharmony_ci l->size = -1; 10047db96d56Sopenharmony_ci} 10057db96d56Sopenharmony_ci 10067db96d56Sopenharmony_cistatic asdl_expr_seq * 10077db96d56Sopenharmony_ciExprList_Finish(ExprList *l, PyArena *arena) 10087db96d56Sopenharmony_ci{ 10097db96d56Sopenharmony_ci asdl_expr_seq *seq; 10107db96d56Sopenharmony_ci 10117db96d56Sopenharmony_ci ExprList_check_invariants(l); 10127db96d56Sopenharmony_ci 10137db96d56Sopenharmony_ci /* Allocate the asdl_seq and copy the expressions in to it. */ 10147db96d56Sopenharmony_ci seq = _Py_asdl_expr_seq_new(l->size, arena); 10157db96d56Sopenharmony_ci if (seq) { 10167db96d56Sopenharmony_ci Py_ssize_t i; 10177db96d56Sopenharmony_ci for (i = 0; i < l->size; i++) { 10187db96d56Sopenharmony_ci asdl_seq_SET(seq, i, l->p[i]); 10197db96d56Sopenharmony_ci } 10207db96d56Sopenharmony_ci } 10217db96d56Sopenharmony_ci ExprList_Dealloc(l); 10227db96d56Sopenharmony_ci return seq; 10237db96d56Sopenharmony_ci} 10247db96d56Sopenharmony_ci 10257db96d56Sopenharmony_ci#ifdef NDEBUG 10267db96d56Sopenharmony_ci#define FstringParser_check_invariants(state) 10277db96d56Sopenharmony_ci#else 10287db96d56Sopenharmony_cistatic void 10297db96d56Sopenharmony_ciFstringParser_check_invariants(FstringParser *state) 10307db96d56Sopenharmony_ci{ 10317db96d56Sopenharmony_ci if (state->last_str) { 10327db96d56Sopenharmony_ci assert(PyUnicode_CheckExact(state->last_str)); 10337db96d56Sopenharmony_ci } 10347db96d56Sopenharmony_ci ExprList_check_invariants(&state->expr_list); 10357db96d56Sopenharmony_ci} 10367db96d56Sopenharmony_ci#endif 10377db96d56Sopenharmony_ci 10387db96d56Sopenharmony_civoid 10397db96d56Sopenharmony_ci_PyPegen_FstringParser_Init(FstringParser *state) 10407db96d56Sopenharmony_ci{ 10417db96d56Sopenharmony_ci state->last_str = NULL; 10427db96d56Sopenharmony_ci state->fmode = 0; 10437db96d56Sopenharmony_ci ExprList_Init(&state->expr_list); 10447db96d56Sopenharmony_ci FstringParser_check_invariants(state); 10457db96d56Sopenharmony_ci} 10467db96d56Sopenharmony_ci 10477db96d56Sopenharmony_civoid 10487db96d56Sopenharmony_ci_PyPegen_FstringParser_Dealloc(FstringParser *state) 10497db96d56Sopenharmony_ci{ 10507db96d56Sopenharmony_ci FstringParser_check_invariants(state); 10517db96d56Sopenharmony_ci 10527db96d56Sopenharmony_ci Py_XDECREF(state->last_str); 10537db96d56Sopenharmony_ci ExprList_Dealloc(&state->expr_list); 10547db96d56Sopenharmony_ci} 10557db96d56Sopenharmony_ci 10567db96d56Sopenharmony_ci/* Make a Constant node, but decref the PyUnicode object being added. */ 10577db96d56Sopenharmony_cistatic expr_ty 10587db96d56Sopenharmony_cimake_str_node_and_del(Parser *p, PyObject **str, Token* first_token, Token *last_token) 10597db96d56Sopenharmony_ci{ 10607db96d56Sopenharmony_ci PyObject *s = *str; 10617db96d56Sopenharmony_ci PyObject *kind = NULL; 10627db96d56Sopenharmony_ci *str = NULL; 10637db96d56Sopenharmony_ci assert(PyUnicode_CheckExact(s)); 10647db96d56Sopenharmony_ci if (_PyArena_AddPyObject(p->arena, s) < 0) { 10657db96d56Sopenharmony_ci Py_DECREF(s); 10667db96d56Sopenharmony_ci return NULL; 10677db96d56Sopenharmony_ci } 10687db96d56Sopenharmony_ci const char* the_str = PyBytes_AsString(first_token->bytes); 10697db96d56Sopenharmony_ci if (the_str && the_str[0] == 'u') { 10707db96d56Sopenharmony_ci kind = _PyPegen_new_identifier(p, "u"); 10717db96d56Sopenharmony_ci } 10727db96d56Sopenharmony_ci 10737db96d56Sopenharmony_ci if (kind == NULL && PyErr_Occurred()) { 10747db96d56Sopenharmony_ci return NULL; 10757db96d56Sopenharmony_ci } 10767db96d56Sopenharmony_ci 10777db96d56Sopenharmony_ci return _PyAST_Constant(s, kind, first_token->lineno, first_token->col_offset, 10787db96d56Sopenharmony_ci last_token->end_lineno, last_token->end_col_offset, 10797db96d56Sopenharmony_ci p->arena); 10807db96d56Sopenharmony_ci 10817db96d56Sopenharmony_ci} 10827db96d56Sopenharmony_ci 10837db96d56Sopenharmony_ci 10847db96d56Sopenharmony_ci/* Add a non-f-string (that is, a regular literal string). str is 10857db96d56Sopenharmony_ci decref'd. */ 10867db96d56Sopenharmony_ciint 10877db96d56Sopenharmony_ci_PyPegen_FstringParser_ConcatAndDel(FstringParser *state, PyObject *str) 10887db96d56Sopenharmony_ci{ 10897db96d56Sopenharmony_ci FstringParser_check_invariants(state); 10907db96d56Sopenharmony_ci 10917db96d56Sopenharmony_ci assert(PyUnicode_CheckExact(str)); 10927db96d56Sopenharmony_ci 10937db96d56Sopenharmony_ci if (PyUnicode_GET_LENGTH(str) == 0) { 10947db96d56Sopenharmony_ci Py_DECREF(str); 10957db96d56Sopenharmony_ci return 0; 10967db96d56Sopenharmony_ci } 10977db96d56Sopenharmony_ci 10987db96d56Sopenharmony_ci if (!state->last_str) { 10997db96d56Sopenharmony_ci /* We didn't have a string before, so just remember this one. */ 11007db96d56Sopenharmony_ci state->last_str = str; 11017db96d56Sopenharmony_ci } else { 11027db96d56Sopenharmony_ci /* Concatenate this with the previous string. */ 11037db96d56Sopenharmony_ci PyUnicode_AppendAndDel(&state->last_str, str); 11047db96d56Sopenharmony_ci if (!state->last_str) { 11057db96d56Sopenharmony_ci return -1; 11067db96d56Sopenharmony_ci } 11077db96d56Sopenharmony_ci } 11087db96d56Sopenharmony_ci FstringParser_check_invariants(state); 11097db96d56Sopenharmony_ci return 0; 11107db96d56Sopenharmony_ci} 11117db96d56Sopenharmony_ci 11127db96d56Sopenharmony_ci/* Parse an f-string. The f-string is in *str to end, with no 11137db96d56Sopenharmony_ci 'f' or quotes. */ 11147db96d56Sopenharmony_ciint 11157db96d56Sopenharmony_ci_PyPegen_FstringParser_ConcatFstring(Parser *p, FstringParser *state, const char **str, 11167db96d56Sopenharmony_ci const char *end, int raw, int recurse_lvl, 11177db96d56Sopenharmony_ci Token *first_token, Token* t, Token *last_token) 11187db96d56Sopenharmony_ci{ 11197db96d56Sopenharmony_ci FstringParser_check_invariants(state); 11207db96d56Sopenharmony_ci state->fmode = 1; 11217db96d56Sopenharmony_ci 11227db96d56Sopenharmony_ci /* Parse the f-string. */ 11237db96d56Sopenharmony_ci while (1) { 11247db96d56Sopenharmony_ci PyObject *literal = NULL; 11257db96d56Sopenharmony_ci PyObject *expr_text = NULL; 11267db96d56Sopenharmony_ci expr_ty expression = NULL; 11277db96d56Sopenharmony_ci 11287db96d56Sopenharmony_ci /* If there's a zero length literal in front of the 11297db96d56Sopenharmony_ci expression, literal will be NULL. If we're at the end of 11307db96d56Sopenharmony_ci the f-string, expression will be NULL (unless result == 1, 11317db96d56Sopenharmony_ci see below). */ 11327db96d56Sopenharmony_ci int result = fstring_find_literal_and_expr(p, str, end, raw, recurse_lvl, 11337db96d56Sopenharmony_ci &literal, &expr_text, 11347db96d56Sopenharmony_ci &expression, first_token, t, last_token); 11357db96d56Sopenharmony_ci if (result < 0) { 11367db96d56Sopenharmony_ci return -1; 11377db96d56Sopenharmony_ci } 11387db96d56Sopenharmony_ci 11397db96d56Sopenharmony_ci /* Add the literal, if any. */ 11407db96d56Sopenharmony_ci if (literal && _PyPegen_FstringParser_ConcatAndDel(state, literal) < 0) { 11417db96d56Sopenharmony_ci Py_XDECREF(expr_text); 11427db96d56Sopenharmony_ci return -1; 11437db96d56Sopenharmony_ci } 11447db96d56Sopenharmony_ci /* Add the expr_text, if any. */ 11457db96d56Sopenharmony_ci if (expr_text && _PyPegen_FstringParser_ConcatAndDel(state, expr_text) < 0) { 11467db96d56Sopenharmony_ci return -1; 11477db96d56Sopenharmony_ci } 11487db96d56Sopenharmony_ci 11497db96d56Sopenharmony_ci /* We've dealt with the literal and expr_text, their ownership has 11507db96d56Sopenharmony_ci been transferred to the state object. Don't look at them again. */ 11517db96d56Sopenharmony_ci 11527db96d56Sopenharmony_ci /* See if we should just loop around to get the next literal 11537db96d56Sopenharmony_ci and expression, while ignoring the expression this 11547db96d56Sopenharmony_ci time. This is used for un-doubling braces, as an 11557db96d56Sopenharmony_ci optimization. */ 11567db96d56Sopenharmony_ci if (result == 1) { 11577db96d56Sopenharmony_ci continue; 11587db96d56Sopenharmony_ci } 11597db96d56Sopenharmony_ci 11607db96d56Sopenharmony_ci if (!expression) { 11617db96d56Sopenharmony_ci /* We're done with this f-string. */ 11627db96d56Sopenharmony_ci break; 11637db96d56Sopenharmony_ci } 11647db96d56Sopenharmony_ci 11657db96d56Sopenharmony_ci /* We know we have an expression. Convert any existing string 11667db96d56Sopenharmony_ci to a Constant node. */ 11677db96d56Sopenharmony_ci if (state->last_str) { 11687db96d56Sopenharmony_ci /* Convert the existing last_str literal to a Constant node. */ 11697db96d56Sopenharmony_ci expr_ty last_str = make_str_node_and_del(p, &state->last_str, first_token, last_token); 11707db96d56Sopenharmony_ci if (!last_str || ExprList_Append(&state->expr_list, last_str) < 0) { 11717db96d56Sopenharmony_ci return -1; 11727db96d56Sopenharmony_ci } 11737db96d56Sopenharmony_ci } 11747db96d56Sopenharmony_ci 11757db96d56Sopenharmony_ci if (ExprList_Append(&state->expr_list, expression) < 0) { 11767db96d56Sopenharmony_ci return -1; 11777db96d56Sopenharmony_ci } 11787db96d56Sopenharmony_ci } 11797db96d56Sopenharmony_ci 11807db96d56Sopenharmony_ci /* If recurse_lvl is zero, then we must be at the end of the 11817db96d56Sopenharmony_ci string. Otherwise, we must be at a right brace. */ 11827db96d56Sopenharmony_ci 11837db96d56Sopenharmony_ci if (recurse_lvl == 0 && *str < end-1) { 11847db96d56Sopenharmony_ci RAISE_SYNTAX_ERROR("f-string: unexpected end of string"); 11857db96d56Sopenharmony_ci return -1; 11867db96d56Sopenharmony_ci } 11877db96d56Sopenharmony_ci if (recurse_lvl != 0 && **str != '}') { 11887db96d56Sopenharmony_ci RAISE_SYNTAX_ERROR("f-string: expecting '}'"); 11897db96d56Sopenharmony_ci return -1; 11907db96d56Sopenharmony_ci } 11917db96d56Sopenharmony_ci 11927db96d56Sopenharmony_ci FstringParser_check_invariants(state); 11937db96d56Sopenharmony_ci return 0; 11947db96d56Sopenharmony_ci} 11957db96d56Sopenharmony_ci 11967db96d56Sopenharmony_ci/* Convert the partial state reflected in last_str and expr_list to an 11977db96d56Sopenharmony_ci expr_ty. The expr_ty can be a Constant, or a JoinedStr. */ 11987db96d56Sopenharmony_ciexpr_ty 11997db96d56Sopenharmony_ci_PyPegen_FstringParser_Finish(Parser *p, FstringParser *state, Token* first_token, 12007db96d56Sopenharmony_ci Token *last_token) 12017db96d56Sopenharmony_ci{ 12027db96d56Sopenharmony_ci asdl_expr_seq *seq; 12037db96d56Sopenharmony_ci 12047db96d56Sopenharmony_ci FstringParser_check_invariants(state); 12057db96d56Sopenharmony_ci 12067db96d56Sopenharmony_ci /* If we're just a constant string with no expressions, return 12077db96d56Sopenharmony_ci that. */ 12087db96d56Sopenharmony_ci if (!state->fmode) { 12097db96d56Sopenharmony_ci assert(!state->expr_list.size); 12107db96d56Sopenharmony_ci if (!state->last_str) { 12117db96d56Sopenharmony_ci /* Create a zero length string. */ 12127db96d56Sopenharmony_ci state->last_str = PyUnicode_FromStringAndSize(NULL, 0); 12137db96d56Sopenharmony_ci if (!state->last_str) { 12147db96d56Sopenharmony_ci goto error; 12157db96d56Sopenharmony_ci } 12167db96d56Sopenharmony_ci } 12177db96d56Sopenharmony_ci return make_str_node_and_del(p, &state->last_str, first_token, last_token); 12187db96d56Sopenharmony_ci } 12197db96d56Sopenharmony_ci 12207db96d56Sopenharmony_ci /* Create a Constant node out of last_str, if needed. It will be the 12217db96d56Sopenharmony_ci last node in our expression list. */ 12227db96d56Sopenharmony_ci if (state->last_str) { 12237db96d56Sopenharmony_ci expr_ty str = make_str_node_and_del(p, &state->last_str, first_token, last_token); 12247db96d56Sopenharmony_ci if (!str || ExprList_Append(&state->expr_list, str) < 0) { 12257db96d56Sopenharmony_ci goto error; 12267db96d56Sopenharmony_ci } 12277db96d56Sopenharmony_ci } 12287db96d56Sopenharmony_ci /* This has already been freed. */ 12297db96d56Sopenharmony_ci assert(state->last_str == NULL); 12307db96d56Sopenharmony_ci 12317db96d56Sopenharmony_ci seq = ExprList_Finish(&state->expr_list, p->arena); 12327db96d56Sopenharmony_ci if (!seq) { 12337db96d56Sopenharmony_ci goto error; 12347db96d56Sopenharmony_ci } 12357db96d56Sopenharmony_ci 12367db96d56Sopenharmony_ci return _PyAST_JoinedStr(seq, first_token->lineno, first_token->col_offset, 12377db96d56Sopenharmony_ci last_token->end_lineno, last_token->end_col_offset, 12387db96d56Sopenharmony_ci p->arena); 12397db96d56Sopenharmony_ci 12407db96d56Sopenharmony_cierror: 12417db96d56Sopenharmony_ci _PyPegen_FstringParser_Dealloc(state); 12427db96d56Sopenharmony_ci return NULL; 12437db96d56Sopenharmony_ci} 12447db96d56Sopenharmony_ci 12457db96d56Sopenharmony_ci/* Given an f-string (with no 'f' or quotes) that's in *str and ends 12467db96d56Sopenharmony_ci at end, parse it into an expr_ty. Return NULL on error. Adjust 12477db96d56Sopenharmony_ci str to point past the parsed portion. */ 12487db96d56Sopenharmony_cistatic expr_ty 12497db96d56Sopenharmony_cifstring_parse(Parser *p, const char **str, const char *end, int raw, 12507db96d56Sopenharmony_ci int recurse_lvl, Token *first_token, Token* t, Token *last_token) 12517db96d56Sopenharmony_ci{ 12527db96d56Sopenharmony_ci FstringParser state; 12537db96d56Sopenharmony_ci 12547db96d56Sopenharmony_ci _PyPegen_FstringParser_Init(&state); 12557db96d56Sopenharmony_ci if (_PyPegen_FstringParser_ConcatFstring(p, &state, str, end, raw, recurse_lvl, 12567db96d56Sopenharmony_ci first_token, t, last_token) < 0) { 12577db96d56Sopenharmony_ci _PyPegen_FstringParser_Dealloc(&state); 12587db96d56Sopenharmony_ci return NULL; 12597db96d56Sopenharmony_ci } 12607db96d56Sopenharmony_ci 12617db96d56Sopenharmony_ci return _PyPegen_FstringParser_Finish(p, &state, t, t); 12627db96d56Sopenharmony_ci} 1263