17db96d56Sopenharmony_ci#ifndef Py_TOKENIZER_H 27db96d56Sopenharmony_ci#define Py_TOKENIZER_H 37db96d56Sopenharmony_ci#ifdef __cplusplus 47db96d56Sopenharmony_ciextern "C" { 57db96d56Sopenharmony_ci#endif 67db96d56Sopenharmony_ci 77db96d56Sopenharmony_ci#include "object.h" 87db96d56Sopenharmony_ci 97db96d56Sopenharmony_ci/* Tokenizer interface */ 107db96d56Sopenharmony_ci 117db96d56Sopenharmony_ci#include "token.h" /* For token types */ 127db96d56Sopenharmony_ci 137db96d56Sopenharmony_ci#define MAXINDENT 100 /* Max indentation level */ 147db96d56Sopenharmony_ci#define MAXLEVEL 200 /* Max parentheses level */ 157db96d56Sopenharmony_ci 167db96d56Sopenharmony_cienum decoding_state { 177db96d56Sopenharmony_ci STATE_INIT, 187db96d56Sopenharmony_ci STATE_SEEK_CODING, 197db96d56Sopenharmony_ci STATE_NORMAL 207db96d56Sopenharmony_ci}; 217db96d56Sopenharmony_ci 227db96d56Sopenharmony_cienum interactive_underflow_t { 237db96d56Sopenharmony_ci /* Normal mode of operation: return a new token when asked in interactive mode */ 247db96d56Sopenharmony_ci IUNDERFLOW_NORMAL, 257db96d56Sopenharmony_ci /* Forcefully return ENDMARKER when asked for a new token in interactive mode. This 267db96d56Sopenharmony_ci * can be used to prevent the tokenizer to prompt the user for new tokens */ 277db96d56Sopenharmony_ci IUNDERFLOW_STOP, 287db96d56Sopenharmony_ci}; 297db96d56Sopenharmony_ci 307db96d56Sopenharmony_ci/* Tokenizer state */ 317db96d56Sopenharmony_cistruct tok_state { 327db96d56Sopenharmony_ci /* Input state; buf <= cur <= inp <= end */ 337db96d56Sopenharmony_ci /* NB an entire line is held in the buffer */ 347db96d56Sopenharmony_ci char *buf; /* Input buffer, or NULL; malloc'ed if fp != NULL */ 357db96d56Sopenharmony_ci char *cur; /* Next character in buffer */ 367db96d56Sopenharmony_ci char *inp; /* End of data in buffer */ 377db96d56Sopenharmony_ci int fp_interactive; /* If the file descriptor is interactive */ 387db96d56Sopenharmony_ci char *interactive_src_start; /* The start of the source parsed so far in interactive mode */ 397db96d56Sopenharmony_ci char *interactive_src_end; /* The end of the source parsed so far in interactive mode */ 407db96d56Sopenharmony_ci const char *end; /* End of input buffer if buf != NULL */ 417db96d56Sopenharmony_ci const char *start; /* Start of current token if not NULL */ 427db96d56Sopenharmony_ci int done; /* E_OK normally, E_EOF at EOF, otherwise error code */ 437db96d56Sopenharmony_ci /* NB If done != E_OK, cur must be == inp!!! */ 447db96d56Sopenharmony_ci FILE *fp; /* Rest of input; NULL if tokenizing a string */ 457db96d56Sopenharmony_ci int tabsize; /* Tab spacing */ 467db96d56Sopenharmony_ci int indent; /* Current indentation index */ 477db96d56Sopenharmony_ci int indstack[MAXINDENT]; /* Stack of indents */ 487db96d56Sopenharmony_ci int atbol; /* Nonzero if at begin of new line */ 497db96d56Sopenharmony_ci int pendin; /* Pending indents (if > 0) or dedents (if < 0) */ 507db96d56Sopenharmony_ci const char *prompt, *nextprompt; /* For interactive prompting */ 517db96d56Sopenharmony_ci int lineno; /* Current line number */ 527db96d56Sopenharmony_ci int first_lineno; /* First line of a single line or multi line string 537db96d56Sopenharmony_ci expression (cf. issue 16806) */ 547db96d56Sopenharmony_ci int level; /* () [] {} Parentheses nesting level */ 557db96d56Sopenharmony_ci /* Used to allow free continuations inside them */ 567db96d56Sopenharmony_ci char parenstack[MAXLEVEL]; 577db96d56Sopenharmony_ci int parenlinenostack[MAXLEVEL]; 587db96d56Sopenharmony_ci int parencolstack[MAXLEVEL]; 597db96d56Sopenharmony_ci PyObject *filename; 607db96d56Sopenharmony_ci /* Stuff for checking on different tab sizes */ 617db96d56Sopenharmony_ci int altindstack[MAXINDENT]; /* Stack of alternate indents */ 627db96d56Sopenharmony_ci /* Stuff for PEP 0263 */ 637db96d56Sopenharmony_ci enum decoding_state decoding_state; 647db96d56Sopenharmony_ci int decoding_erred; /* whether erred in decoding */ 657db96d56Sopenharmony_ci char *encoding; /* Source encoding. */ 667db96d56Sopenharmony_ci int cont_line; /* whether we are in a continuation line. */ 677db96d56Sopenharmony_ci const char* line_start; /* pointer to start of current line */ 687db96d56Sopenharmony_ci const char* multi_line_start; /* pointer to start of first line of 697db96d56Sopenharmony_ci a single line or multi line string 707db96d56Sopenharmony_ci expression (cf. issue 16806) */ 717db96d56Sopenharmony_ci PyObject *decoding_readline; /* open(...).readline */ 727db96d56Sopenharmony_ci PyObject *decoding_buffer; 737db96d56Sopenharmony_ci const char* enc; /* Encoding for the current str. */ 747db96d56Sopenharmony_ci char* str; /* Source string being tokenized (if tokenizing from a string)*/ 757db96d56Sopenharmony_ci char* input; /* Tokenizer's newline translated copy of the string. */ 767db96d56Sopenharmony_ci 777db96d56Sopenharmony_ci int type_comments; /* Whether to look for type comments */ 787db96d56Sopenharmony_ci 797db96d56Sopenharmony_ci /* async/await related fields (still needed depending on feature_version) */ 807db96d56Sopenharmony_ci int async_hacks; /* =1 if async/await aren't always keywords */ 817db96d56Sopenharmony_ci int async_def; /* =1 if tokens are inside an 'async def' body. */ 827db96d56Sopenharmony_ci int async_def_indent; /* Indentation level of the outermost 'async def'. */ 837db96d56Sopenharmony_ci int async_def_nl; /* =1 if the outermost 'async def' had at least one 847db96d56Sopenharmony_ci NEWLINE token after it. */ 857db96d56Sopenharmony_ci /* How to proceed when asked for a new token in interactive mode */ 867db96d56Sopenharmony_ci enum interactive_underflow_t interactive_underflow; 877db96d56Sopenharmony_ci int report_warnings; 887db96d56Sopenharmony_ci}; 897db96d56Sopenharmony_ci 907db96d56Sopenharmony_ciextern struct tok_state *_PyTokenizer_FromString(const char *, int); 917db96d56Sopenharmony_ciextern struct tok_state *_PyTokenizer_FromUTF8(const char *, int); 927db96d56Sopenharmony_ciextern struct tok_state *_PyTokenizer_FromFile(FILE *, const char*, 937db96d56Sopenharmony_ci const char *, const char *); 947db96d56Sopenharmony_ciextern void _PyTokenizer_Free(struct tok_state *); 957db96d56Sopenharmony_ciextern int _PyTokenizer_Get(struct tok_state *, const char **, const char **); 967db96d56Sopenharmony_ci 977db96d56Sopenharmony_ci#define tok_dump _Py_tok_dump 987db96d56Sopenharmony_ci 997db96d56Sopenharmony_ci#ifdef __cplusplus 1007db96d56Sopenharmony_ci} 1017db96d56Sopenharmony_ci#endif 1027db96d56Sopenharmony_ci#endif /* !Py_TOKENIZER_H */ 103