tokenizer.c - OpenGrok cross reference for /third

Lines Matching refs:tok
41 static int tok_nextc(struct tok_state *tok);
42 static void tok_backup(struct tok_state *tok, int c);
43 static int syntaxerror(struct tok_state *tok, const char *format, ...);
54     struct tok_state *tok = (struct tok_state *)PyMem_Malloc(
56     if (tok == NULL)
58     tok->buf = tok->cur = tok->inp = NULL;
59     tok->fp_interactive = 0;
60     tok->interactive_src_start = NULL;
61     tok->interactive_src_end = NULL;
62     tok->start = NULL;
63     tok->end = NULL;
64     tok->done = E_OK;
65     tok->fp = NULL;
66     tok->input = NULL;
67     tok->tabsize = TABSIZE;
68     tok->indent = 0;
69     tok->indstack[0] = 0;
70     tok->atbol = 1;
71     tok->pendin = 0;
72     tok->prompt = tok->nextprompt = NULL;
73     tok->lineno = 0;
74     tok->level = 0;
75     tok->altindstack[0] = 0;
76     tok->decoding_state = STATE_INIT;
77     tok->decoding_erred = 0;
78     tok->enc = NULL;
79     tok->encoding = NULL;
80     tok->cont_line = 0;
81     tok->filename = NULL;
82     tok->decoding_readline = NULL;
83     tok->decoding_buffer = NULL;
84     tok->type_comments = 0;
85     tok->async_hacks = 0;
86     tok->async_def = 0;
87     tok->async_def_indent = 0;
88     tok->async_def_nl = 0;
89     tok->interactive_underflow = IUNDERFLOW_NORMAL;
90     tok->str = NULL;
91     tok->report_warnings = 1;
92     return tok;
96 new_string(const char *s, Py_ssize_t len, struct tok_state *tok)
100         tok->done = E_NOMEM;
109 error_ret(struct tok_state *tok) /* XXX */
111     tok->decoding_erred = 1;
112     if (tok->fp != NULL && tok->buf != NULL) /* see _PyTokenizer_Free */
113         PyMem_Free(tok->buf);
114     tok->buf = tok->cur = tok->inp = NULL;
115     tok->start = NULL;
116     tok->end = NULL;
117     tok->done = E_DECODE;
154 get_coding_spec(const char *s, char **spec, Py_ssize_t size, struct tok_state *tok)
183                 char* r = new_string(begin, t - begin, tok);
190                     r = new_string(q, strlen(q), tok);
208 check_coding_spec(const char* line, Py_ssize_t size, struct tok_state *tok,
212     if (tok->cont_line) {
214         tok->decoding_state = STATE_NORMAL;
217     if (!get_coding_spec(line, &cs, size, tok)) {
228                 tok->decoding_state = STATE_NORMAL;
234     tok->decoding_state = STATE_NORMAL;
235     if (tok->encoding == NULL) {
236         assert(tok->decoding_readline == NULL);
237         if (strcmp(cs, "utf-8") != 0 && !set_readline(tok, cs)) {
238             error_ret(tok);
243         tok->encoding = cs;
245         if (strcmp(tok->encoding, cs) != 0) {
246             error_ret(tok);
265           struct tok_state *tok)
268     ch1 = get_char(tok);
269     tok->decoding_state = STATE_SEEK_CODING;
273         ch2 = get_char(tok);
275             unget_char(ch2, tok);
276             unget_char(ch1, tok);
279         ch3 = get_char(tok);
281             unget_char(ch3, tok);
282             unget_char(ch2, tok);
283             unget_char(ch1, tok);
287         unget_char(ch1, tok);
290     if (tok->encoding != NULL)
291         PyMem_Free(tok->encoding);
292     tok->encoding = new_string("utf-8", 5, tok);
293     if (!tok->encoding)
300 tok_concatenate_interactive_new_line(struct tok_state *tok, const char *line) {
301     assert(tok->fp_interactive);
307     Py_ssize_t current_size = tok->interactive_src_end - tok->interactive_src_start;
313     char* new_str = tok->interactive_src_start;
317         if (tok->interactive_src_start) {
318             PyMem_Free(tok->interactive_src_start);
320         tok->interactive_src_start = NULL;
321         tok->interactive_src_end = NULL;
322         tok->done = E_NOMEM;
331     tok->interactive_src_start = new_str;
332     tok->interactive_src_end = new_str + current_size + line_size;
340    On entry, tok->decoding_buffer will be one of:
341      1) NULL: need to call tok->decoding_readline to get a new line
342      2) PyUnicodeObject *: decoding_feof has called tok->decoding_readline and
343        stored the result in tok->decoding_buffer
346        by tok->decoding_readline.  tok->decoding_buffer has the overflow.
353 tok_reserve_buf(struct tok_state *tok, Py_ssize_t size)
355     Py_ssize_t cur = tok->cur - tok->buf;
356     Py_ssize_t oldsize = tok->inp - tok->buf;
358     if (newsize > tok->end - tok->buf) {
359         char *newbuf = tok->buf;
360         Py_ssize_t start = tok->start == NULL ? -1 : tok->start - tok->buf;
361         Py_ssize_t line_start = tok->start == NULL ? -1 : tok->line_start - tok->buf;
362         Py_ssize_t multi_line_start = tok->multi_line_start - tok->buf;
365             tok->done = E_NOMEM;
368         tok->buf = newbuf;
369         tok->cur = tok->buf + cur;
370         tok->inp = tok->buf + oldsize;
371         tok->end = tok->buf + newsize;
372         tok->start = start < 0 ? NULL : tok->buf + start;
373         tok->line_start = line_start < 0 ? NULL : tok->buf + line_start;
374         tok->multi_line_start = multi_line_start < 0 ? NULL : tok->buf + multi_line_start;
385 tok_readline_recode(struct tok_state *tok) {
389     line = tok->decoding_buffer;
391         line = PyObject_CallNoArgs(tok->decoding_readline);
393             error_ret(tok);
398         tok->decoding_buffer = NULL;
402         error_ret(tok);
409     if (!tok_reserve_buf(tok, buffer_size)) {
412     memcpy(tok->inp, buf, buflen);
413     tok->inp += buflen;
414     *tok->inp = '\0';
415     if (tok->fp_interactive &&
416         tok_concatenate_interactive_new_line(tok, buf) == -1) {
431    ENC is usually identical to the future value of tok->encoding,
437 fp_setreadl(struct tok_state *tok, const char* enc)
443     fd = fileno(tok->fp);
445      * position of tok->fp.  If tok->fp was opened in text mode on Windows,
449     pos = ftell(tok->fp);
472     Py_XSETREF(tok->decoding_readline, readline);
487 static int fp_getc(struct tok_state *tok) {
488     return getc(tok->fp);
493 static void fp_ungetc(int c, struct tok_state *tok) {
494     ungetc(c, tok->fp);
560 ensure_utf8(char *line, struct tok_state *tok)
577                      badchar, tok->filename, tok->lineno);
586 buf_getc(struct tok_state *tok) {
587     return Py_CHARMASK(*tok->str++);
593 buf_ungetc(int c, struct tok_state *tok) {
594     tok->str--;
595     assert(Py_CHARMASK(*tok->str) == c);        /* tok->cur may point to read-only segment */
602 buf_setreadl(struct tok_state *tok, const char* enc) {
603     tok->enc = enc;
623 translate_newlines(const char *s, int exec_input, struct tok_state *tok) {
630         tok->done = E_NOMEM;
673 decode_str(const char *input, int single, struct tok_state *tok)
680     tok->input = str = translate_newlines(input, single, tok);
683     tok->enc = NULL;
684     tok->str = str;
685     if (!check_bom(buf_getc, buf_ungetc, buf_setreadl, tok))
686         return error_ret(tok);
687     str = tok->str;             /* string after BOM if any */
689     if (tok->enc != NULL) {
690         utf8 = translate_into_utf8(str, tok->enc);
692             return error_ret(tok);
704     tok->enc = NULL;
708         if (!check_coding_spec(str, newl[0] - str, tok, buf_setreadl)) {
711         if (tok->enc == NULL && tok->decoding_state != STATE_NORMAL && newl[1]) {
713                                    tok, buf_setreadl))
717     if (tok->enc != NULL) {
719         utf8 = translate_into_utf8(str, tok->enc);
721             return error_ret(tok);
724     assert(tok->decoding_buffer == NULL);
725     tok->decoding_buffer = utf8; /* CAUTION */
734     struct tok_state *tok = tok_new();
737     if (tok == NULL)
739     decoded = decode_str(str, exec_input, tok);
741         _PyTokenizer_Free(tok);
745     tok->buf = tok->cur = tok->inp = decoded;
746     tok->end = decoded;
747     return tok;
755     struct tok_state *tok = tok_new();
757     if (tok == NULL)
759     tok->input = translated = translate_newlines(str, exec_input, tok);
761         _PyTokenizer_Free(tok);
764     tok->decoding_state = STATE_NORMAL;
765     tok->enc = NULL;
766     tok->str = translated;
767     tok->encoding = new_string("utf-8", 5, tok);
768     if (!tok->encoding) {
769         _PyTokenizer_Free(tok);
773     tok->buf = tok->cur = tok->inp = translated;
774     tok->end = translated;
775     return tok;
784     struct tok_state *tok = tok_new();
785     if (tok == NULL)
787     if ((tok->buf = (char *)PyMem_Malloc(BUFSIZ)) == NULL) {
788         _PyTokenizer_Free(tok);
791     tok->cur = tok->inp = tok->buf;
792     tok->end = tok->buf + BUFSIZ;
793     tok->fp = fp;
794     tok->prompt = ps1;
795     tok->nextprompt = ps2;
799         tok->encoding = new_string(enc, strlen(enc), tok);
800         if (!tok->encoding) {
801             _PyTokenizer_Free(tok);
804         tok->decoding_state = STATE_NORMAL;
806     return tok;
812 _PyTokenizer_Free(struct tok_state *tok)
814     if (tok->encoding != NULL) {
815         PyMem_Free(tok->encoding);
817     Py_XDECREF(tok->decoding_readline);
818     Py_XDECREF(tok->decoding_buffer);
819     Py_XDECREF(tok->filename);
820     if (tok->fp != NULL && tok->buf != NULL) {
821         PyMem_Free(tok->buf);
823     if (tok->input) {
824         PyMem_Free(tok->input);
826     if (tok->interactive_src_start != NULL) {
827         PyMem_Free(tok->interactive_src_start);
829     PyMem_Free(tok);
833 tok_readline_raw(struct tok_state *tok)
836         if (!tok_reserve_buf(tok, BUFSIZ)) {
839         int n_chars = (int)(tok->end - tok->inp);
841         char *line = _Py_UniversalNewlineFgetsWithSize(tok->inp, n_chars, tok->fp, NULL, &line_size);
845         if (tok->fp_interactive &&
846             tok_concatenate_interactive_new_line(tok, line) == -1) {
849         tok->inp += line_size;
850         if (tok->inp == tok->buf) {
853     } while (tok->inp[-1] != '\n');
858 tok_underflow_string(struct tok_state *tok) {
859     char *end = strchr(tok->inp, '\n');
864         end = strchr(tok->inp, '\0');
865         if (end == tok->inp) {
866             tok->done = E_EOF;
870     if (tok->start == NULL) {
871         tok->buf = tok->cur;
873     tok->line_start = tok->cur;
874     tok->lineno++;
875     tok->inp = end;
880 tok_underflow_interactive(struct tok_state *tok) {
881     if (tok->interactive_underflow == IUNDERFLOW_STOP) {
882         tok->done = E_INTERACT_STOP;
885     char *newtok = PyOS_Readline(tok->fp ? tok->fp : stdin, stdout, tok->prompt);
887         char *translated = translate_newlines(newtok, 0, tok);
894     if (tok->encoding && newtok && *newtok) {
898         PyObject *u = translate_into_utf8(newtok, tok->encoding);
901             tok->done = E_DECODE;
909             tok->done = E_NOMEM;
915     if (tok->fp_interactive &&
916         tok_concatenate_interactive_new_line(tok, newtok) == -1) {
920     if (tok->nextprompt != NULL) {
921         tok->prompt = tok->nextprompt;
924         tok->done = E_INTR;
928         tok->done = E_EOF;
930     else if (tok->start != NULL) {
931         Py_ssize_t cur_multi_line_start = tok->multi_line_start - tok->buf;
933         tok->lineno++;
934         if (!tok_reserve_buf(tok, size + 1)) {
935             PyMem_Free(tok->buf);
936             tok->buf = NULL;
940         memcpy(tok->cur, newtok, size + 1);
942         tok->inp += size;
943         tok->multi_line_start = tok->buf + cur_multi_line_start;
946         tok->lineno++;
947         PyMem_Free(tok->buf);
948         tok->buf = newtok;
949         tok->cur = tok->buf;
950         tok->line_start = tok->buf;
951         tok->inp = strchr(tok->buf, '\0');
952         tok->end = tok->inp + 1;
954     if (tok->done != E_OK) {
955         if (tok->prompt != NULL) {
964 tok_underflow_file(struct tok_state *tok) {
965     if (tok->start == NULL) {
966         tok->cur = tok->inp = tok->buf;
968     if (tok->decoding_state == STATE_INIT) {
972         if (!check_bom(fp_getc, fp_ungetc, fp_setreadl, tok)) {
973             error_ret(tok);
976         assert(tok->decoding_state != STATE_INIT);
979     if (tok->decoding_readline != NULL) {
981         if (!tok_readline_recode(tok)) {
987         if (!tok_readline_raw(tok)) {
991     if (tok->inp == tok->cur) {
992         tok->done = E_EOF;
995     if (tok->inp[-1] != '\n') {
996         assert(tok->inp + 1 < tok->end);
998         *tok->inp++ = '\n';
999         *tok->inp = '\0';
1002     tok->lineno++;
1003     if (tok->decoding_state != STATE_NORMAL) {
1004         if (tok->lineno > 2) {
1005             tok->decoding_state = STATE_NORMAL;
1007         else if (!check_coding_spec(tok->cur, strlen(tok->cur),
1008                                     tok, fp_setreadl))
1015     if (!tok->encoding && !ensure_utf8(tok->cur, tok)) {
1016         error_ret(tok);
1019     assert(tok->done == E_OK);
1020     return tok->done == E_OK;
1052 /* Get next char, updating state; error code goes into tok->done */
1055 tok_nextc(struct tok_state *tok)
1059         if (tok->cur != tok->inp) {
1060             return Py_CHARMASK(*tok->cur++); /* Fast path */
1062         if (tok->done != E_OK) {
1065         if (tok->fp == NULL) {
1066             rc = tok_underflow_string(tok);
1068         else if (tok->prompt != NULL) {
1069             rc = tok_underflow_interactive(tok);
1072             rc = tok_underflow_file(tok);
1076             fprintf(stderr, "line[%d] = ", tok->lineno);
1077             print_escape(stderr, tok->cur, tok->inp - tok->cur);
1078             fprintf(stderr, "  tok->done = %d\n", tok->done);
1082             tok->cur = tok->inp;
1085         tok->line_start = tok->cur;
1087         if (contains_null_bytes(tok->line_start, tok->inp - tok->line_start)) {
1088             syntaxerror(tok, "source code cannot contain null bytes");
1089             tok->cur = tok->inp;
1099 tok_backup(struct tok_state *tok, int c)
1102         if (--tok->cur < tok->buf) {
1105         if ((int)(unsigned char)*tok->cur != c) {
1112 _syntaxerror_range(struct tok_state *tok, const char *format,
1122     errtext = PyUnicode_DecodeUTF8(tok->line_start, tok->cur - tok->line_start,
1135     Py_ssize_t line_len = strcspn(tok->line_start, "\n");
1136     if (line_len != tok->cur - tok->line_start) {
1138         errtext = PyUnicode_DecodeUTF8(tok->line_start, line_len,
1145     args = Py_BuildValue("(O(OiiNii))", errmsg, tok->filename, tok->lineno,
1146                          col_offset, errtext, tok->lineno, end_col_offset);
1154     tok->done = E_ERROR;
1159 syntaxerror(struct tok_state *tok, const char *format, ...)
1167     int ret = _syntaxerror_range(tok, format, -1, -1, vargs);
1173 syntaxerror_known_range(struct tok_state *tok,
1183     int ret = _syntaxerror_range(tok, format, col_offset, end_col_offset, vargs);
1191 indenterror(struct tok_state *tok)
1193     tok->done = E_TABSPACE;
1194     tok->cur = tok->inp;
1199 parser_warn(struct tok_state *tok, PyObject *category, const char *format, ...)
1201     if (!tok->report_warnings) {
1218     if (PyErr_WarnExplicitObject(category, errmsg, tok->filename,
1219                                  tok->lineno, NULL, NULL) < 0) {
1224             syntaxerror(tok, "%U", errmsg);
1233     tok->done = E_ERROR;
1238 lookahead(struct tok_state *tok, const char *test)
1243         int c = tok_nextc(tok);
1252         tok_backup(tok, c);
1254             tok_backup(tok, *--s);
1261 verify_end_of_number(struct tok_state *tok, int c, const char *kind)
1275         r = lookahead(tok, "nd");
1278         r = lookahead(tok, "lse");
1281         r = lookahead(tok, "or");
1284         int c2 = tok_nextc(tok);
1288         tok_backup(tok, c2);
1291         r = lookahead(tok, "r");
1294         r = lookahead(tok, "ot");
1297         tok_backup(tok, c);
1298         if (parser_warn(tok, PyExc_SyntaxWarning,
1303         tok_nextc(tok);
1307         tok_backup(tok, c);
1308         syntaxerror(tok, "invalid %s literal", kind);
1318 verify_identifier(struct tok_state *tok)
1321     if (tok->decoding_erred)
1323     s = PyUnicode_DecodeUTF8(tok->start, tok->cur - tok->start, NULL);
1326             tok->done = E_DECODE;
1329             tok->done = E_ERROR;
1336         tok->done = E_ERROR;
1349                 tok->done = E_ERROR;
1352             tok->cur = (char *)tok->start + PyBytes_GET_SIZE(s);
1359             syntaxerror(tok, "invalid character '%c' (U+%s)", ch, hex);
1362             syntaxerror(tok, "invalid non-printable character U+%s", hex);
1371 tok_decimal_tail(struct tok_state *tok)
1377             c = tok_nextc(tok);
1382         c = tok_nextc(tok);
1384             tok_backup(tok, c);
1385             syntaxerror(tok, "invalid decimal literal");
1395 tok_continuation_line(struct tok_state *tok) {
1396     int c = tok_nextc(tok);
1398         tok->done = E_LINECONT;
1401     c = tok_nextc(tok);
1403         tok->done = E_EOF;
1404         tok->cur = tok->inp;
1407         tok_backup(tok, c);
1413 tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
1420     tok->start = NULL;
1424     if (tok->atbol) {
1427         tok->atbol = 0;
1430             c = tok_nextc(tok);
1435                 col = (col / tok->tabsize + 1) * tok->tabsize;
1447                 if ((c = tok_continuation_line(tok)) == -1) {
1455         tok_backup(tok, c);
1462             if (col == 0 && c == '\n' && tok->prompt != NULL) {
1465             else if (tok->prompt != NULL && tok->lineno == 1) {
1477         if (!blankline && tok->level == 0) {
1480             if (col == tok->indstack[tok->indent]) {
1482                 if (altcol != tok->altindstack[tok->indent]) {
1483                     return indenterror(tok);
1486             else if (col > tok->indstack[tok->indent]) {
1488                 if (tok->indent+1 >= MAXINDENT) {
1489                     tok->done = E_TOODEEP;
1490                     tok->cur = tok->inp;
1493                 if (altcol <= tok->altindstack[tok->indent]) {
1494                     return indenterror(tok);
1496                 tok->pendin++;
1497                 tok->indstack[++tok->indent] = col;
1498                 tok->altindstack[tok->indent] = altcol;
1500             else /* col < tok->indstack[tok->indent] */ {
1502                 while (tok->indent > 0 &&
1503                     col < tok->indstack[tok->indent]) {
1504                     tok->pendin--;
1505                     tok->indent--;
1507                 if (col != tok->indstack[tok->indent]) {
1508                     tok->done = E_DEDENT;
1509                     tok->cur = tok->inp;
1512                 if (altcol != tok->altindstack[tok->indent]) {
1513                     return indenterror(tok);
1519     tok->start = tok->cur;
1522     if (tok->pendin != 0) {
1523         if (tok->pendin < 0) {
1524             tok->pendin++;
1528             tok->pendin--;
1534     c = tok_nextc(tok);
1535     tok_backup(tok, c);
1537     if (tok->async_def
1545         && tok->level == 0
1548         && tok->async_def_nl
1551         && tok->async_def_indent >= tok->indent)
1553         tok->async_def = 0;
1554         tok->async_def_indent = 0;
1555         tok->async_def_nl = 0;
1559     tok->start = NULL;
1562         c = tok_nextc(tok);
1566     tok->start = tok->cur == NULL ? NULL : tok->cur - 1;
1573             c = tok_nextc(tok);
1576         if (tok->type_comments) {
1577             p = tok->start;
1579             while (*prefix && p < tok->cur) {
1597                 tok_backup(tok, c);  /* don't eat the newline or EOF */
1604                     tok->cur >= ignore_end && memcmp(p, "ignore", 6) == 0
1605                     && !(tok->cur > ignore_end
1610                     *p_end = tok->cur;
1614                         tok_nextc(tok);
1615                         tok->atbol = 1;
1620                     *p_end = tok->cur;
1627     if (tok->done == E_INTERACT_STOP) {
1633         if (tok->level) {
1636         return tok->done == E_EOF ? ENDMARKER : ERRORTOKEN;
1663             c = tok_nextc(tok);
1672             c = tok_nextc(tok);
1674         tok_backup(tok, c);
1675         if (nonascii && !verify_identifier(tok)) {
1679         *p_start = tok->start;
1680         *p_end = tok->cur;
1683         if (tok->cur - tok->start == 5 && tok->start[0] == 'a') {
1692             if (!tok->async_hacks || tok->async_def) {
1694                 if (memcmp(tok->start, "async", 5) == 0) {
1697                 if (memcmp(tok->start, "await", 5) == 0) {
1701             else if (memcmp(tok->start, "async", 5) == 0) {
1710                 memcpy(&ahead_tok, tok, sizeof(ahead_tok));
1720                     tok->async_def_indent = tok->indent;
1721                     tok->async_def = 1;
1732         tok->atbol = 1;
1733         if (blankline || tok->level > 0) {
1736         *p_start = tok->start;
1737         *p_end = tok->cur - 1; /* Leave '\n' out of the string */
1738         tok->cont_line = 0;
1739         if (tok->async_def) {
1742             tok->async_def_nl = 1;
1749         c = tok_nextc(tok);
1753             c = tok_nextc(tok);
1755                 *p_start = tok->start;
1756                 *p_end = tok->cur;
1760                 tok_backup(tok, c);
1762             tok_backup(tok, '.');
1765             tok_backup(tok, c);
1767         *p_start = tok->start;
1768         *p_end = tok->cur;
1776             c = tok_nextc(tok);
1779                 c = tok_nextc(tok);
1782                         c = tok_nextc(tok);
1785                         tok_backup(tok, c);
1786                         return syntaxerror(tok, "invalid hexadecimal literal");
1789                         c = tok_nextc(tok);
1792                 if (!verify_end_of_number(tok, c, "hexadecimal")) {
1798                 c = tok_nextc(tok);
1801                         c = tok_nextc(tok);
1805                             return syntaxerror(tok,
1809                             tok_backup(tok, c);
1810                             return syntaxerror(tok, "invalid octal literal");
1814                         c = tok_nextc(tok);
1818                     return syntaxerror(tok,
1821                 if (!verify_end_of_number(tok, c, "octal")) {
1827                 c = tok_nextc(tok);
1830                         c = tok_nextc(tok);
1834                             return syntaxerror(tok,
1838                             tok_backup(tok, c);
1839                             return syntaxerror(tok, "invalid binary literal");
1843                         c = tok_nextc(tok);
1847                     return syntaxerror(tok,
1850                 if (!verify_end_of_number(tok, c, "binary")) {
1860                         c = tok_nextc(tok);
1862                             tok_backup(tok, c);
1863                             return syntaxerror(tok, "invalid decimal literal");
1869                     c = tok_nextc(tok);
1871                 char* zeros_end = tok->cur;
1874                     c = tok_decimal_tail(tok);
1880                     c = tok_nextc(tok);
1891                     tok_backup(tok, c);
1893                             tok, (int)(tok->start + 1 - tok->line_start),
1894                             (int)(zeros_end - tok->line_start),
1899                 if (!verify_end_of_number(tok, c, "decimal")) {
1906             c = tok_decimal_tail(tok);
1913                     c = tok_nextc(tok);
1917                         c = tok_decimal_tail(tok);
1928                     c = tok_nextc(tok);
1930                         c = tok_nextc(tok);
1932                             tok_backup(tok, c);
1933                             return syntaxerror(tok, "invalid decimal literal");
1936                         tok_backup(tok, c);
1937                         if (!verify_end_of_number(tok, e, "decimal")) {
1940                         tok_backup(tok, e);
1941                         *p_start = tok->start;
1942                         *p_end = tok->cur;
1945                     c = tok_decimal_tail(tok);
1953                     c = tok_nextc(tok);
1954                     if (!verify_end_of_number(tok, c, "imaginary")) {
1958                 else if (!verify_end_of_number(tok, c, "decimal")) {
1963         tok_backup(tok, c);
1964         *p_start = tok->start;
1965         *p_end = tok->cur;
1980         tok->first_lineno = tok->lineno;
1981         tok->multi_line_start = tok->line_start;
1984         c = tok_nextc(tok);
1986             c = tok_nextc(tok);
1995             tok_backup(tok, c);
2000             c = tok_nextc(tok);
2001             if (tok->done == E_ERROR) {
2004             if (tok->done == E_DECODE) {
2008                 assert(tok->multi_line_start != NULL);
2012                 tok->cur = (char *)tok->start;
2013                 tok->cur++;
2014                 tok->line_start = tok->multi_line_start;
2015                 int start = tok->lineno;
2016                 tok->lineno = tok->first_lineno;
2018                     syntaxerror(tok, "unterminated triple-quoted string literal"
2021                         tok->done = E_EOFS;
2026                     syntaxerror(tok, "unterminated string literal (detected at"
2029                         tok->done = E_EOLS;
2040                     tok_nextc(tok);  /* skip escaped char */
2045         *p_start = tok->start;
2046         *p_end = tok->cur;
2052         if ((c = tok_continuation_line(tok)) == -1) {
2055         tok->cont_line = 1;
2061         int c2 = tok_nextc(tok);
2064             int c3 = tok_nextc(tok);
2070                 tok_backup(tok, c3);
2072             *p_start = tok->start;
2073             *p_end = tok->cur;
2076         tok_backup(tok, c2);
2084         if (tok->level >= MAXLEVEL) {
2085             return syntaxerror(tok, "too many nested parentheses");
2087         tok->parenstack[tok->level] = c;
2088         tok->parenlinenostack[tok->level] = tok->lineno;
2089         tok->parencolstack[tok->level] = (int)(tok->start - tok->line_start);
2090         tok->level++;
2095         if (!tok->level) {
2096             return syntaxerror(tok, "unmatched '%c'", c);
2098         tok->level--;
2099         int opening = tok->parenstack[tok->level];
2104             if (tok->parenlinenostack[tok->level] != tok->lineno) {
2105                 return syntaxerror(tok,
2108                         c, opening, tok->parenlinenostack[tok->level]);
2111                 return syntaxerror(tok,
2123         return syntaxerror(tok, "invalid non-printable character U+%s", hex);
2127     *p_start = tok->start;
2128     *p_end = tok->cur;
2133 _PyTokenizer_Get(struct tok_state *tok,
2136     int result = tok_get(tok, p_start, p_end);
2137     if (tok->decoding_erred) {
2139         tok->done = E_DECODE;
2190     struct tok_state *tok;
2200     tok = _PyTokenizer_FromFile(fp, NULL, NULL, NULL);
2201     if (tok == NULL) {
2207         tok->filename = filename;
2210         tok->filename = PyUnicode_FromString("<string>");
2211         if (tok->filename == NULL) {
2213             _PyTokenizer_Free(tok);
2219     tok->report_warnings = 0;
2220     while (tok->lineno < 2 && tok->done == E_OK) {
2221         _PyTokenizer_Get(tok, &p_start, &p_end);
2224     if (tok->encoding) {
2225         encoding = (char *)PyMem_Malloc(strlen(tok->encoding) + 1);
2227             strcpy(encoding, tok->encoding);
2230     _PyTokenizer_Free(tok);