1e31aef6aSopenharmony_ci"""Implements a Jinja / Python combination lexer. The ``Lexer`` class 2e31aef6aSopenharmony_ciis used to do some preprocessing. It filters out invalid operators like 3e31aef6aSopenharmony_cithe bitshift operators we don't allow in templates. It separates 4e31aef6aSopenharmony_citemplate code and python code in expressions. 5e31aef6aSopenharmony_ci""" 6e31aef6aSopenharmony_ciimport re 7e31aef6aSopenharmony_ciimport typing as t 8e31aef6aSopenharmony_cifrom ast import literal_eval 9e31aef6aSopenharmony_cifrom collections import deque 10e31aef6aSopenharmony_cifrom sys import intern 11e31aef6aSopenharmony_ci 12e31aef6aSopenharmony_cifrom ._identifier import pattern as name_re 13e31aef6aSopenharmony_cifrom .exceptions import TemplateSyntaxError 14e31aef6aSopenharmony_cifrom .utils import LRUCache 15e31aef6aSopenharmony_ci 16e31aef6aSopenharmony_ciif t.TYPE_CHECKING: 17e31aef6aSopenharmony_ci import typing_extensions as te 18e31aef6aSopenharmony_ci from .environment import Environment 19e31aef6aSopenharmony_ci 20e31aef6aSopenharmony_ci# cache for the lexers. Exists in order to be able to have multiple 21e31aef6aSopenharmony_ci# environments with the same lexer 22e31aef6aSopenharmony_ci_lexer_cache: t.MutableMapping[t.Tuple, "Lexer"] = LRUCache(50) # type: ignore 23e31aef6aSopenharmony_ci 24e31aef6aSopenharmony_ci# static regular expressions 25e31aef6aSopenharmony_ciwhitespace_re = re.compile(r"\s+") 26e31aef6aSopenharmony_cinewline_re = re.compile(r"(\r\n|\r|\n)") 27e31aef6aSopenharmony_cistring_re = re.compile( 28e31aef6aSopenharmony_ci r"('([^'\\]*(?:\\.[^'\\]*)*)'" r'|"([^"\\]*(?:\\.[^"\\]*)*)")', re.S 29e31aef6aSopenharmony_ci) 30e31aef6aSopenharmony_ciinteger_re = re.compile( 31e31aef6aSopenharmony_ci r""" 32e31aef6aSopenharmony_ci ( 33e31aef6aSopenharmony_ci 0b(_?[0-1])+ # binary 34e31aef6aSopenharmony_ci | 35e31aef6aSopenharmony_ci 0o(_?[0-7])+ # octal 36e31aef6aSopenharmony_ci | 37e31aef6aSopenharmony_ci 0x(_?[\da-f])+ # hex 38e31aef6aSopenharmony_ci | 39e31aef6aSopenharmony_ci [1-9](_?\d)* # decimal 40e31aef6aSopenharmony_ci | 41e31aef6aSopenharmony_ci 0(_?0)* # decimal zero 42e31aef6aSopenharmony_ci ) 43e31aef6aSopenharmony_ci """, 44e31aef6aSopenharmony_ci re.IGNORECASE | re.VERBOSE, 45e31aef6aSopenharmony_ci) 46e31aef6aSopenharmony_cifloat_re = re.compile( 47e31aef6aSopenharmony_ci r""" 48e31aef6aSopenharmony_ci (?<!\.) # doesn't start with a . 49e31aef6aSopenharmony_ci (\d+_)*\d+ # digits, possibly _ separated 50e31aef6aSopenharmony_ci ( 51e31aef6aSopenharmony_ci (\.(\d+_)*\d+)? # optional fractional part 52e31aef6aSopenharmony_ci e[+\-]?(\d+_)*\d+ # exponent part 53e31aef6aSopenharmony_ci | 54e31aef6aSopenharmony_ci \.(\d+_)*\d+ # required fractional part 55e31aef6aSopenharmony_ci ) 56e31aef6aSopenharmony_ci """, 57e31aef6aSopenharmony_ci re.IGNORECASE | re.VERBOSE, 58e31aef6aSopenharmony_ci) 59e31aef6aSopenharmony_ci 60e31aef6aSopenharmony_ci# internal the tokens and keep references to them 61e31aef6aSopenharmony_ciTOKEN_ADD = intern("add") 62e31aef6aSopenharmony_ciTOKEN_ASSIGN = intern("assign") 63e31aef6aSopenharmony_ciTOKEN_COLON = intern("colon") 64e31aef6aSopenharmony_ciTOKEN_COMMA = intern("comma") 65e31aef6aSopenharmony_ciTOKEN_DIV = intern("div") 66e31aef6aSopenharmony_ciTOKEN_DOT = intern("dot") 67e31aef6aSopenharmony_ciTOKEN_EQ = intern("eq") 68e31aef6aSopenharmony_ciTOKEN_FLOORDIV = intern("floordiv") 69e31aef6aSopenharmony_ciTOKEN_GT = intern("gt") 70e31aef6aSopenharmony_ciTOKEN_GTEQ = intern("gteq") 71e31aef6aSopenharmony_ciTOKEN_LBRACE = intern("lbrace") 72e31aef6aSopenharmony_ciTOKEN_LBRACKET = intern("lbracket") 73e31aef6aSopenharmony_ciTOKEN_LPAREN = intern("lparen") 74e31aef6aSopenharmony_ciTOKEN_LT = intern("lt") 75e31aef6aSopenharmony_ciTOKEN_LTEQ = intern("lteq") 76e31aef6aSopenharmony_ciTOKEN_MOD = intern("mod") 77e31aef6aSopenharmony_ciTOKEN_MUL = intern("mul") 78e31aef6aSopenharmony_ciTOKEN_NE = intern("ne") 79e31aef6aSopenharmony_ciTOKEN_PIPE = intern("pipe") 80e31aef6aSopenharmony_ciTOKEN_POW = intern("pow") 81e31aef6aSopenharmony_ciTOKEN_RBRACE = intern("rbrace") 82e31aef6aSopenharmony_ciTOKEN_RBRACKET = intern("rbracket") 83e31aef6aSopenharmony_ciTOKEN_RPAREN = intern("rparen") 84e31aef6aSopenharmony_ciTOKEN_SEMICOLON = intern("semicolon") 85e31aef6aSopenharmony_ciTOKEN_SUB = intern("sub") 86e31aef6aSopenharmony_ciTOKEN_TILDE = intern("tilde") 87e31aef6aSopenharmony_ciTOKEN_WHITESPACE = intern("whitespace") 88e31aef6aSopenharmony_ciTOKEN_FLOAT = intern("float") 89e31aef6aSopenharmony_ciTOKEN_INTEGER = intern("integer") 90e31aef6aSopenharmony_ciTOKEN_NAME = intern("name") 91e31aef6aSopenharmony_ciTOKEN_STRING = intern("string") 92e31aef6aSopenharmony_ciTOKEN_OPERATOR = intern("operator") 93e31aef6aSopenharmony_ciTOKEN_BLOCK_BEGIN = intern("block_begin") 94e31aef6aSopenharmony_ciTOKEN_BLOCK_END = intern("block_end") 95e31aef6aSopenharmony_ciTOKEN_VARIABLE_BEGIN = intern("variable_begin") 96e31aef6aSopenharmony_ciTOKEN_VARIABLE_END = intern("variable_end") 97e31aef6aSopenharmony_ciTOKEN_RAW_BEGIN = intern("raw_begin") 98e31aef6aSopenharmony_ciTOKEN_RAW_END = intern("raw_end") 99e31aef6aSopenharmony_ciTOKEN_COMMENT_BEGIN = intern("comment_begin") 100e31aef6aSopenharmony_ciTOKEN_COMMENT_END = intern("comment_end") 101e31aef6aSopenharmony_ciTOKEN_COMMENT = intern("comment") 102e31aef6aSopenharmony_ciTOKEN_LINESTATEMENT_BEGIN = intern("linestatement_begin") 103e31aef6aSopenharmony_ciTOKEN_LINESTATEMENT_END = intern("linestatement_end") 104e31aef6aSopenharmony_ciTOKEN_LINECOMMENT_BEGIN = intern("linecomment_begin") 105e31aef6aSopenharmony_ciTOKEN_LINECOMMENT_END = intern("linecomment_end") 106e31aef6aSopenharmony_ciTOKEN_LINECOMMENT = intern("linecomment") 107e31aef6aSopenharmony_ciTOKEN_DATA = intern("data") 108e31aef6aSopenharmony_ciTOKEN_INITIAL = intern("initial") 109e31aef6aSopenharmony_ciTOKEN_EOF = intern("eof") 110e31aef6aSopenharmony_ci 111e31aef6aSopenharmony_ci# bind operators to token types 112e31aef6aSopenharmony_cioperators = { 113e31aef6aSopenharmony_ci "+": TOKEN_ADD, 114e31aef6aSopenharmony_ci "-": TOKEN_SUB, 115e31aef6aSopenharmony_ci "/": TOKEN_DIV, 116e31aef6aSopenharmony_ci "//": TOKEN_FLOORDIV, 117e31aef6aSopenharmony_ci "*": TOKEN_MUL, 118e31aef6aSopenharmony_ci "%": TOKEN_MOD, 119e31aef6aSopenharmony_ci "**": TOKEN_POW, 120e31aef6aSopenharmony_ci "~": TOKEN_TILDE, 121e31aef6aSopenharmony_ci "[": TOKEN_LBRACKET, 122e31aef6aSopenharmony_ci "]": TOKEN_RBRACKET, 123e31aef6aSopenharmony_ci "(": TOKEN_LPAREN, 124e31aef6aSopenharmony_ci ")": TOKEN_RPAREN, 125e31aef6aSopenharmony_ci "{": TOKEN_LBRACE, 126e31aef6aSopenharmony_ci "}": TOKEN_RBRACE, 127e31aef6aSopenharmony_ci "==": TOKEN_EQ, 128e31aef6aSopenharmony_ci "!=": TOKEN_NE, 129e31aef6aSopenharmony_ci ">": TOKEN_GT, 130e31aef6aSopenharmony_ci ">=": TOKEN_GTEQ, 131e31aef6aSopenharmony_ci "<": TOKEN_LT, 132e31aef6aSopenharmony_ci "<=": TOKEN_LTEQ, 133e31aef6aSopenharmony_ci "=": TOKEN_ASSIGN, 134e31aef6aSopenharmony_ci ".": TOKEN_DOT, 135e31aef6aSopenharmony_ci ":": TOKEN_COLON, 136e31aef6aSopenharmony_ci "|": TOKEN_PIPE, 137e31aef6aSopenharmony_ci ",": TOKEN_COMMA, 138e31aef6aSopenharmony_ci ";": TOKEN_SEMICOLON, 139e31aef6aSopenharmony_ci} 140e31aef6aSopenharmony_ci 141e31aef6aSopenharmony_cireverse_operators = {v: k for k, v in operators.items()} 142e31aef6aSopenharmony_ciassert len(operators) == len(reverse_operators), "operators dropped" 143e31aef6aSopenharmony_cioperator_re = re.compile( 144e31aef6aSopenharmony_ci f"({'|'.join(re.escape(x) for x in sorted(operators, key=lambda x: -len(x)))})" 145e31aef6aSopenharmony_ci) 146e31aef6aSopenharmony_ci 147e31aef6aSopenharmony_ciignored_tokens = frozenset( 148e31aef6aSopenharmony_ci [ 149e31aef6aSopenharmony_ci TOKEN_COMMENT_BEGIN, 150e31aef6aSopenharmony_ci TOKEN_COMMENT, 151e31aef6aSopenharmony_ci TOKEN_COMMENT_END, 152e31aef6aSopenharmony_ci TOKEN_WHITESPACE, 153e31aef6aSopenharmony_ci TOKEN_LINECOMMENT_BEGIN, 154e31aef6aSopenharmony_ci TOKEN_LINECOMMENT_END, 155e31aef6aSopenharmony_ci TOKEN_LINECOMMENT, 156e31aef6aSopenharmony_ci ] 157e31aef6aSopenharmony_ci) 158e31aef6aSopenharmony_ciignore_if_empty = frozenset( 159e31aef6aSopenharmony_ci [TOKEN_WHITESPACE, TOKEN_DATA, TOKEN_COMMENT, TOKEN_LINECOMMENT] 160e31aef6aSopenharmony_ci) 161e31aef6aSopenharmony_ci 162e31aef6aSopenharmony_ci 163e31aef6aSopenharmony_cidef _describe_token_type(token_type: str) -> str: 164e31aef6aSopenharmony_ci if token_type in reverse_operators: 165e31aef6aSopenharmony_ci return reverse_operators[token_type] 166e31aef6aSopenharmony_ci 167e31aef6aSopenharmony_ci return { 168e31aef6aSopenharmony_ci TOKEN_COMMENT_BEGIN: "begin of comment", 169e31aef6aSopenharmony_ci TOKEN_COMMENT_END: "end of comment", 170e31aef6aSopenharmony_ci TOKEN_COMMENT: "comment", 171e31aef6aSopenharmony_ci TOKEN_LINECOMMENT: "comment", 172e31aef6aSopenharmony_ci TOKEN_BLOCK_BEGIN: "begin of statement block", 173e31aef6aSopenharmony_ci TOKEN_BLOCK_END: "end of statement block", 174e31aef6aSopenharmony_ci TOKEN_VARIABLE_BEGIN: "begin of print statement", 175e31aef6aSopenharmony_ci TOKEN_VARIABLE_END: "end of print statement", 176e31aef6aSopenharmony_ci TOKEN_LINESTATEMENT_BEGIN: "begin of line statement", 177e31aef6aSopenharmony_ci TOKEN_LINESTATEMENT_END: "end of line statement", 178e31aef6aSopenharmony_ci TOKEN_DATA: "template data / text", 179e31aef6aSopenharmony_ci TOKEN_EOF: "end of template", 180e31aef6aSopenharmony_ci }.get(token_type, token_type) 181e31aef6aSopenharmony_ci 182e31aef6aSopenharmony_ci 183e31aef6aSopenharmony_cidef describe_token(token: "Token") -> str: 184e31aef6aSopenharmony_ci """Returns a description of the token.""" 185e31aef6aSopenharmony_ci if token.type == TOKEN_NAME: 186e31aef6aSopenharmony_ci return token.value 187e31aef6aSopenharmony_ci 188e31aef6aSopenharmony_ci return _describe_token_type(token.type) 189e31aef6aSopenharmony_ci 190e31aef6aSopenharmony_ci 191e31aef6aSopenharmony_cidef describe_token_expr(expr: str) -> str: 192e31aef6aSopenharmony_ci """Like `describe_token` but for token expressions.""" 193e31aef6aSopenharmony_ci if ":" in expr: 194e31aef6aSopenharmony_ci type, value = expr.split(":", 1) 195e31aef6aSopenharmony_ci 196e31aef6aSopenharmony_ci if type == TOKEN_NAME: 197e31aef6aSopenharmony_ci return value 198e31aef6aSopenharmony_ci else: 199e31aef6aSopenharmony_ci type = expr 200e31aef6aSopenharmony_ci 201e31aef6aSopenharmony_ci return _describe_token_type(type) 202e31aef6aSopenharmony_ci 203e31aef6aSopenharmony_ci 204e31aef6aSopenharmony_cidef count_newlines(value: str) -> int: 205e31aef6aSopenharmony_ci """Count the number of newline characters in the string. This is 206e31aef6aSopenharmony_ci useful for extensions that filter a stream. 207e31aef6aSopenharmony_ci """ 208e31aef6aSopenharmony_ci return len(newline_re.findall(value)) 209e31aef6aSopenharmony_ci 210e31aef6aSopenharmony_ci 211e31aef6aSopenharmony_cidef compile_rules(environment: "Environment") -> t.List[t.Tuple[str, str]]: 212e31aef6aSopenharmony_ci """Compiles all the rules from the environment into a list of rules.""" 213e31aef6aSopenharmony_ci e = re.escape 214e31aef6aSopenharmony_ci rules = [ 215e31aef6aSopenharmony_ci ( 216e31aef6aSopenharmony_ci len(environment.comment_start_string), 217e31aef6aSopenharmony_ci TOKEN_COMMENT_BEGIN, 218e31aef6aSopenharmony_ci e(environment.comment_start_string), 219e31aef6aSopenharmony_ci ), 220e31aef6aSopenharmony_ci ( 221e31aef6aSopenharmony_ci len(environment.block_start_string), 222e31aef6aSopenharmony_ci TOKEN_BLOCK_BEGIN, 223e31aef6aSopenharmony_ci e(environment.block_start_string), 224e31aef6aSopenharmony_ci ), 225e31aef6aSopenharmony_ci ( 226e31aef6aSopenharmony_ci len(environment.variable_start_string), 227e31aef6aSopenharmony_ci TOKEN_VARIABLE_BEGIN, 228e31aef6aSopenharmony_ci e(environment.variable_start_string), 229e31aef6aSopenharmony_ci ), 230e31aef6aSopenharmony_ci ] 231e31aef6aSopenharmony_ci 232e31aef6aSopenharmony_ci if environment.line_statement_prefix is not None: 233e31aef6aSopenharmony_ci rules.append( 234e31aef6aSopenharmony_ci ( 235e31aef6aSopenharmony_ci len(environment.line_statement_prefix), 236e31aef6aSopenharmony_ci TOKEN_LINESTATEMENT_BEGIN, 237e31aef6aSopenharmony_ci r"^[ \t\v]*" + e(environment.line_statement_prefix), 238e31aef6aSopenharmony_ci ) 239e31aef6aSopenharmony_ci ) 240e31aef6aSopenharmony_ci if environment.line_comment_prefix is not None: 241e31aef6aSopenharmony_ci rules.append( 242e31aef6aSopenharmony_ci ( 243e31aef6aSopenharmony_ci len(environment.line_comment_prefix), 244e31aef6aSopenharmony_ci TOKEN_LINECOMMENT_BEGIN, 245e31aef6aSopenharmony_ci r"(?:^|(?<=\S))[^\S\r\n]*" + e(environment.line_comment_prefix), 246e31aef6aSopenharmony_ci ) 247e31aef6aSopenharmony_ci ) 248e31aef6aSopenharmony_ci 249e31aef6aSopenharmony_ci return [x[1:] for x in sorted(rules, reverse=True)] 250e31aef6aSopenharmony_ci 251e31aef6aSopenharmony_ci 252e31aef6aSopenharmony_ciclass Failure: 253e31aef6aSopenharmony_ci """Class that raises a `TemplateSyntaxError` if called. 254e31aef6aSopenharmony_ci Used by the `Lexer` to specify known errors. 255e31aef6aSopenharmony_ci """ 256e31aef6aSopenharmony_ci 257e31aef6aSopenharmony_ci def __init__( 258e31aef6aSopenharmony_ci self, message: str, cls: t.Type[TemplateSyntaxError] = TemplateSyntaxError 259e31aef6aSopenharmony_ci ) -> None: 260e31aef6aSopenharmony_ci self.message = message 261e31aef6aSopenharmony_ci self.error_class = cls 262e31aef6aSopenharmony_ci 263e31aef6aSopenharmony_ci def __call__(self, lineno: int, filename: str) -> "te.NoReturn": 264e31aef6aSopenharmony_ci raise self.error_class(self.message, lineno, filename) 265e31aef6aSopenharmony_ci 266e31aef6aSopenharmony_ci 267e31aef6aSopenharmony_ciclass Token(t.NamedTuple): 268e31aef6aSopenharmony_ci lineno: int 269e31aef6aSopenharmony_ci type: str 270e31aef6aSopenharmony_ci value: str 271e31aef6aSopenharmony_ci 272e31aef6aSopenharmony_ci def __str__(self) -> str: 273e31aef6aSopenharmony_ci return describe_token(self) 274e31aef6aSopenharmony_ci 275e31aef6aSopenharmony_ci def test(self, expr: str) -> bool: 276e31aef6aSopenharmony_ci """Test a token against a token expression. This can either be a 277e31aef6aSopenharmony_ci token type or ``'token_type:token_value'``. This can only test 278e31aef6aSopenharmony_ci against string values and types. 279e31aef6aSopenharmony_ci """ 280e31aef6aSopenharmony_ci # here we do a regular string equality check as test_any is usually 281e31aef6aSopenharmony_ci # passed an iterable of not interned strings. 282e31aef6aSopenharmony_ci if self.type == expr: 283e31aef6aSopenharmony_ci return True 284e31aef6aSopenharmony_ci 285e31aef6aSopenharmony_ci if ":" in expr: 286e31aef6aSopenharmony_ci return expr.split(":", 1) == [self.type, self.value] 287e31aef6aSopenharmony_ci 288e31aef6aSopenharmony_ci return False 289e31aef6aSopenharmony_ci 290e31aef6aSopenharmony_ci def test_any(self, *iterable: str) -> bool: 291e31aef6aSopenharmony_ci """Test against multiple token expressions.""" 292e31aef6aSopenharmony_ci return any(self.test(expr) for expr in iterable) 293e31aef6aSopenharmony_ci 294e31aef6aSopenharmony_ci 295e31aef6aSopenharmony_ciclass TokenStreamIterator: 296e31aef6aSopenharmony_ci """The iterator for tokenstreams. Iterate over the stream 297e31aef6aSopenharmony_ci until the eof token is reached. 298e31aef6aSopenharmony_ci """ 299e31aef6aSopenharmony_ci 300e31aef6aSopenharmony_ci def __init__(self, stream: "TokenStream") -> None: 301e31aef6aSopenharmony_ci self.stream = stream 302e31aef6aSopenharmony_ci 303e31aef6aSopenharmony_ci def __iter__(self) -> "TokenStreamIterator": 304e31aef6aSopenharmony_ci return self 305e31aef6aSopenharmony_ci 306e31aef6aSopenharmony_ci def __next__(self) -> Token: 307e31aef6aSopenharmony_ci token = self.stream.current 308e31aef6aSopenharmony_ci 309e31aef6aSopenharmony_ci if token.type is TOKEN_EOF: 310e31aef6aSopenharmony_ci self.stream.close() 311e31aef6aSopenharmony_ci raise StopIteration 312e31aef6aSopenharmony_ci 313e31aef6aSopenharmony_ci next(self.stream) 314e31aef6aSopenharmony_ci return token 315e31aef6aSopenharmony_ci 316e31aef6aSopenharmony_ci 317e31aef6aSopenharmony_ciclass TokenStream: 318e31aef6aSopenharmony_ci """A token stream is an iterable that yields :class:`Token`\\s. The 319e31aef6aSopenharmony_ci parser however does not iterate over it but calls :meth:`next` to go 320e31aef6aSopenharmony_ci one token ahead. The current active token is stored as :attr:`current`. 321e31aef6aSopenharmony_ci """ 322e31aef6aSopenharmony_ci 323e31aef6aSopenharmony_ci def __init__( 324e31aef6aSopenharmony_ci self, 325e31aef6aSopenharmony_ci generator: t.Iterable[Token], 326e31aef6aSopenharmony_ci name: t.Optional[str], 327e31aef6aSopenharmony_ci filename: t.Optional[str], 328e31aef6aSopenharmony_ci ): 329e31aef6aSopenharmony_ci self._iter = iter(generator) 330e31aef6aSopenharmony_ci self._pushed: "te.Deque[Token]" = deque() 331e31aef6aSopenharmony_ci self.name = name 332e31aef6aSopenharmony_ci self.filename = filename 333e31aef6aSopenharmony_ci self.closed = False 334e31aef6aSopenharmony_ci self.current = Token(1, TOKEN_INITIAL, "") 335e31aef6aSopenharmony_ci next(self) 336e31aef6aSopenharmony_ci 337e31aef6aSopenharmony_ci def __iter__(self) -> TokenStreamIterator: 338e31aef6aSopenharmony_ci return TokenStreamIterator(self) 339e31aef6aSopenharmony_ci 340e31aef6aSopenharmony_ci def __bool__(self) -> bool: 341e31aef6aSopenharmony_ci return bool(self._pushed) or self.current.type is not TOKEN_EOF 342e31aef6aSopenharmony_ci 343e31aef6aSopenharmony_ci @property 344e31aef6aSopenharmony_ci def eos(self) -> bool: 345e31aef6aSopenharmony_ci """Are we at the end of the stream?""" 346e31aef6aSopenharmony_ci return not self 347e31aef6aSopenharmony_ci 348e31aef6aSopenharmony_ci def push(self, token: Token) -> None: 349e31aef6aSopenharmony_ci """Push a token back to the stream.""" 350e31aef6aSopenharmony_ci self._pushed.append(token) 351e31aef6aSopenharmony_ci 352e31aef6aSopenharmony_ci def look(self) -> Token: 353e31aef6aSopenharmony_ci """Look at the next token.""" 354e31aef6aSopenharmony_ci old_token = next(self) 355e31aef6aSopenharmony_ci result = self.current 356e31aef6aSopenharmony_ci self.push(result) 357e31aef6aSopenharmony_ci self.current = old_token 358e31aef6aSopenharmony_ci return result 359e31aef6aSopenharmony_ci 360e31aef6aSopenharmony_ci def skip(self, n: int = 1) -> None: 361e31aef6aSopenharmony_ci """Got n tokens ahead.""" 362e31aef6aSopenharmony_ci for _ in range(n): 363e31aef6aSopenharmony_ci next(self) 364e31aef6aSopenharmony_ci 365e31aef6aSopenharmony_ci def next_if(self, expr: str) -> t.Optional[Token]: 366e31aef6aSopenharmony_ci """Perform the token test and return the token if it matched. 367e31aef6aSopenharmony_ci Otherwise the return value is `None`. 368e31aef6aSopenharmony_ci """ 369e31aef6aSopenharmony_ci if self.current.test(expr): 370e31aef6aSopenharmony_ci return next(self) 371e31aef6aSopenharmony_ci 372e31aef6aSopenharmony_ci return None 373e31aef6aSopenharmony_ci 374e31aef6aSopenharmony_ci def skip_if(self, expr: str) -> bool: 375e31aef6aSopenharmony_ci """Like :meth:`next_if` but only returns `True` or `False`.""" 376e31aef6aSopenharmony_ci return self.next_if(expr) is not None 377e31aef6aSopenharmony_ci 378e31aef6aSopenharmony_ci def __next__(self) -> Token: 379e31aef6aSopenharmony_ci """Go one token ahead and return the old one. 380e31aef6aSopenharmony_ci 381e31aef6aSopenharmony_ci Use the built-in :func:`next` instead of calling this directly. 382e31aef6aSopenharmony_ci """ 383e31aef6aSopenharmony_ci rv = self.current 384e31aef6aSopenharmony_ci 385e31aef6aSopenharmony_ci if self._pushed: 386e31aef6aSopenharmony_ci self.current = self._pushed.popleft() 387e31aef6aSopenharmony_ci elif self.current.type is not TOKEN_EOF: 388e31aef6aSopenharmony_ci try: 389e31aef6aSopenharmony_ci self.current = next(self._iter) 390e31aef6aSopenharmony_ci except StopIteration: 391e31aef6aSopenharmony_ci self.close() 392e31aef6aSopenharmony_ci 393e31aef6aSopenharmony_ci return rv 394e31aef6aSopenharmony_ci 395e31aef6aSopenharmony_ci def close(self) -> None: 396e31aef6aSopenharmony_ci """Close the stream.""" 397e31aef6aSopenharmony_ci self.current = Token(self.current.lineno, TOKEN_EOF, "") 398e31aef6aSopenharmony_ci self._iter = iter(()) 399e31aef6aSopenharmony_ci self.closed = True 400e31aef6aSopenharmony_ci 401e31aef6aSopenharmony_ci def expect(self, expr: str) -> Token: 402e31aef6aSopenharmony_ci """Expect a given token type and return it. This accepts the same 403e31aef6aSopenharmony_ci argument as :meth:`jinja2.lexer.Token.test`. 404e31aef6aSopenharmony_ci """ 405e31aef6aSopenharmony_ci if not self.current.test(expr): 406e31aef6aSopenharmony_ci expr = describe_token_expr(expr) 407e31aef6aSopenharmony_ci 408e31aef6aSopenharmony_ci if self.current.type is TOKEN_EOF: 409e31aef6aSopenharmony_ci raise TemplateSyntaxError( 410e31aef6aSopenharmony_ci f"unexpected end of template, expected {expr!r}.", 411e31aef6aSopenharmony_ci self.current.lineno, 412e31aef6aSopenharmony_ci self.name, 413e31aef6aSopenharmony_ci self.filename, 414e31aef6aSopenharmony_ci ) 415e31aef6aSopenharmony_ci 416e31aef6aSopenharmony_ci raise TemplateSyntaxError( 417e31aef6aSopenharmony_ci f"expected token {expr!r}, got {describe_token(self.current)!r}", 418e31aef6aSopenharmony_ci self.current.lineno, 419e31aef6aSopenharmony_ci self.name, 420e31aef6aSopenharmony_ci self.filename, 421e31aef6aSopenharmony_ci ) 422e31aef6aSopenharmony_ci 423e31aef6aSopenharmony_ci return next(self) 424e31aef6aSopenharmony_ci 425e31aef6aSopenharmony_ci 426e31aef6aSopenharmony_cidef get_lexer(environment: "Environment") -> "Lexer": 427e31aef6aSopenharmony_ci """Return a lexer which is probably cached.""" 428e31aef6aSopenharmony_ci key = ( 429e31aef6aSopenharmony_ci environment.block_start_string, 430e31aef6aSopenharmony_ci environment.block_end_string, 431e31aef6aSopenharmony_ci environment.variable_start_string, 432e31aef6aSopenharmony_ci environment.variable_end_string, 433e31aef6aSopenharmony_ci environment.comment_start_string, 434e31aef6aSopenharmony_ci environment.comment_end_string, 435e31aef6aSopenharmony_ci environment.line_statement_prefix, 436e31aef6aSopenharmony_ci environment.line_comment_prefix, 437e31aef6aSopenharmony_ci environment.trim_blocks, 438e31aef6aSopenharmony_ci environment.lstrip_blocks, 439e31aef6aSopenharmony_ci environment.newline_sequence, 440e31aef6aSopenharmony_ci environment.keep_trailing_newline, 441e31aef6aSopenharmony_ci ) 442e31aef6aSopenharmony_ci lexer = _lexer_cache.get(key) 443e31aef6aSopenharmony_ci 444e31aef6aSopenharmony_ci if lexer is None: 445e31aef6aSopenharmony_ci _lexer_cache[key] = lexer = Lexer(environment) 446e31aef6aSopenharmony_ci 447e31aef6aSopenharmony_ci return lexer 448e31aef6aSopenharmony_ci 449e31aef6aSopenharmony_ci 450e31aef6aSopenharmony_ciclass OptionalLStrip(tuple): 451e31aef6aSopenharmony_ci """A special tuple for marking a point in the state that can have 452e31aef6aSopenharmony_ci lstrip applied. 453e31aef6aSopenharmony_ci """ 454e31aef6aSopenharmony_ci 455e31aef6aSopenharmony_ci __slots__ = () 456e31aef6aSopenharmony_ci 457e31aef6aSopenharmony_ci # Even though it looks like a no-op, creating instances fails 458e31aef6aSopenharmony_ci # without this. 459e31aef6aSopenharmony_ci def __new__(cls, *members, **kwargs): # type: ignore 460e31aef6aSopenharmony_ci return super().__new__(cls, members) 461e31aef6aSopenharmony_ci 462e31aef6aSopenharmony_ci 463e31aef6aSopenharmony_ciclass _Rule(t.NamedTuple): 464e31aef6aSopenharmony_ci pattern: t.Pattern[str] 465e31aef6aSopenharmony_ci tokens: t.Union[str, t.Tuple[str, ...], t.Tuple[Failure]] 466e31aef6aSopenharmony_ci command: t.Optional[str] 467e31aef6aSopenharmony_ci 468e31aef6aSopenharmony_ci 469e31aef6aSopenharmony_ciclass Lexer: 470e31aef6aSopenharmony_ci """Class that implements a lexer for a given environment. Automatically 471e31aef6aSopenharmony_ci created by the environment class, usually you don't have to do that. 472e31aef6aSopenharmony_ci 473e31aef6aSopenharmony_ci Note that the lexer is not automatically bound to an environment. 474e31aef6aSopenharmony_ci Multiple environments can share the same lexer. 475e31aef6aSopenharmony_ci """ 476e31aef6aSopenharmony_ci 477e31aef6aSopenharmony_ci def __init__(self, environment: "Environment") -> None: 478e31aef6aSopenharmony_ci # shortcuts 479e31aef6aSopenharmony_ci e = re.escape 480e31aef6aSopenharmony_ci 481e31aef6aSopenharmony_ci def c(x: str) -> t.Pattern[str]: 482e31aef6aSopenharmony_ci return re.compile(x, re.M | re.S) 483e31aef6aSopenharmony_ci 484e31aef6aSopenharmony_ci # lexing rules for tags 485e31aef6aSopenharmony_ci tag_rules: t.List[_Rule] = [ 486e31aef6aSopenharmony_ci _Rule(whitespace_re, TOKEN_WHITESPACE, None), 487e31aef6aSopenharmony_ci _Rule(float_re, TOKEN_FLOAT, None), 488e31aef6aSopenharmony_ci _Rule(integer_re, TOKEN_INTEGER, None), 489e31aef6aSopenharmony_ci _Rule(name_re, TOKEN_NAME, None), 490e31aef6aSopenharmony_ci _Rule(string_re, TOKEN_STRING, None), 491e31aef6aSopenharmony_ci _Rule(operator_re, TOKEN_OPERATOR, None), 492e31aef6aSopenharmony_ci ] 493e31aef6aSopenharmony_ci 494e31aef6aSopenharmony_ci # assemble the root lexing rule. because "|" is ungreedy 495e31aef6aSopenharmony_ci # we have to sort by length so that the lexer continues working 496e31aef6aSopenharmony_ci # as expected when we have parsing rules like <% for block and 497e31aef6aSopenharmony_ci # <%= for variables. (if someone wants asp like syntax) 498e31aef6aSopenharmony_ci # variables are just part of the rules if variable processing 499e31aef6aSopenharmony_ci # is required. 500e31aef6aSopenharmony_ci root_tag_rules = compile_rules(environment) 501e31aef6aSopenharmony_ci 502e31aef6aSopenharmony_ci block_start_re = e(environment.block_start_string) 503e31aef6aSopenharmony_ci block_end_re = e(environment.block_end_string) 504e31aef6aSopenharmony_ci comment_end_re = e(environment.comment_end_string) 505e31aef6aSopenharmony_ci variable_end_re = e(environment.variable_end_string) 506e31aef6aSopenharmony_ci 507e31aef6aSopenharmony_ci # block suffix if trimming is enabled 508e31aef6aSopenharmony_ci block_suffix_re = "\\n?" if environment.trim_blocks else "" 509e31aef6aSopenharmony_ci 510e31aef6aSopenharmony_ci self.lstrip_blocks = environment.lstrip_blocks 511e31aef6aSopenharmony_ci 512e31aef6aSopenharmony_ci self.newline_sequence = environment.newline_sequence 513e31aef6aSopenharmony_ci self.keep_trailing_newline = environment.keep_trailing_newline 514e31aef6aSopenharmony_ci 515e31aef6aSopenharmony_ci root_raw_re = ( 516e31aef6aSopenharmony_ci rf"(?P<raw_begin>{block_start_re}(\-|\+|)\s*raw\s*" 517e31aef6aSopenharmony_ci rf"(?:\-{block_end_re}\s*|{block_end_re}))" 518e31aef6aSopenharmony_ci ) 519e31aef6aSopenharmony_ci root_parts_re = "|".join( 520e31aef6aSopenharmony_ci [root_raw_re] + [rf"(?P<{n}>{r}(\-|\+|))" for n, r in root_tag_rules] 521e31aef6aSopenharmony_ci ) 522e31aef6aSopenharmony_ci 523e31aef6aSopenharmony_ci # global lexing rules 524e31aef6aSopenharmony_ci self.rules: t.Dict[str, t.List[_Rule]] = { 525e31aef6aSopenharmony_ci "root": [ 526e31aef6aSopenharmony_ci # directives 527e31aef6aSopenharmony_ci _Rule( 528e31aef6aSopenharmony_ci c(rf"(.*?)(?:{root_parts_re})"), 529e31aef6aSopenharmony_ci OptionalLStrip(TOKEN_DATA, "#bygroup"), # type: ignore 530e31aef6aSopenharmony_ci "#bygroup", 531e31aef6aSopenharmony_ci ), 532e31aef6aSopenharmony_ci # data 533e31aef6aSopenharmony_ci _Rule(c(".+"), TOKEN_DATA, None), 534e31aef6aSopenharmony_ci ], 535e31aef6aSopenharmony_ci # comments 536e31aef6aSopenharmony_ci TOKEN_COMMENT_BEGIN: [ 537e31aef6aSopenharmony_ci _Rule( 538e31aef6aSopenharmony_ci c( 539e31aef6aSopenharmony_ci rf"(.*?)((?:\+{comment_end_re}|\-{comment_end_re}\s*" 540e31aef6aSopenharmony_ci rf"|{comment_end_re}{block_suffix_re}))" 541e31aef6aSopenharmony_ci ), 542e31aef6aSopenharmony_ci (TOKEN_COMMENT, TOKEN_COMMENT_END), 543e31aef6aSopenharmony_ci "#pop", 544e31aef6aSopenharmony_ci ), 545e31aef6aSopenharmony_ci _Rule(c(r"(.)"), (Failure("Missing end of comment tag"),), None), 546e31aef6aSopenharmony_ci ], 547e31aef6aSopenharmony_ci # blocks 548e31aef6aSopenharmony_ci TOKEN_BLOCK_BEGIN: [ 549e31aef6aSopenharmony_ci _Rule( 550e31aef6aSopenharmony_ci c( 551e31aef6aSopenharmony_ci rf"(?:\+{block_end_re}|\-{block_end_re}\s*" 552e31aef6aSopenharmony_ci rf"|{block_end_re}{block_suffix_re})" 553e31aef6aSopenharmony_ci ), 554e31aef6aSopenharmony_ci TOKEN_BLOCK_END, 555e31aef6aSopenharmony_ci "#pop", 556e31aef6aSopenharmony_ci ), 557e31aef6aSopenharmony_ci ] 558e31aef6aSopenharmony_ci + tag_rules, 559e31aef6aSopenharmony_ci # variables 560e31aef6aSopenharmony_ci TOKEN_VARIABLE_BEGIN: [ 561e31aef6aSopenharmony_ci _Rule( 562e31aef6aSopenharmony_ci c(rf"\-{variable_end_re}\s*|{variable_end_re}"), 563e31aef6aSopenharmony_ci TOKEN_VARIABLE_END, 564e31aef6aSopenharmony_ci "#pop", 565e31aef6aSopenharmony_ci ) 566e31aef6aSopenharmony_ci ] 567e31aef6aSopenharmony_ci + tag_rules, 568e31aef6aSopenharmony_ci # raw block 569e31aef6aSopenharmony_ci TOKEN_RAW_BEGIN: [ 570e31aef6aSopenharmony_ci _Rule( 571e31aef6aSopenharmony_ci c( 572e31aef6aSopenharmony_ci rf"(.*?)((?:{block_start_re}(\-|\+|))\s*endraw\s*" 573e31aef6aSopenharmony_ci rf"(?:\+{block_end_re}|\-{block_end_re}\s*" 574e31aef6aSopenharmony_ci rf"|{block_end_re}{block_suffix_re}))" 575e31aef6aSopenharmony_ci ), 576e31aef6aSopenharmony_ci OptionalLStrip(TOKEN_DATA, TOKEN_RAW_END), # type: ignore 577e31aef6aSopenharmony_ci "#pop", 578e31aef6aSopenharmony_ci ), 579e31aef6aSopenharmony_ci _Rule(c(r"(.)"), (Failure("Missing end of raw directive"),), None), 580e31aef6aSopenharmony_ci ], 581e31aef6aSopenharmony_ci # line statements 582e31aef6aSopenharmony_ci TOKEN_LINESTATEMENT_BEGIN: [ 583e31aef6aSopenharmony_ci _Rule(c(r"\s*(\n|$)"), TOKEN_LINESTATEMENT_END, "#pop") 584e31aef6aSopenharmony_ci ] 585e31aef6aSopenharmony_ci + tag_rules, 586e31aef6aSopenharmony_ci # line comments 587e31aef6aSopenharmony_ci TOKEN_LINECOMMENT_BEGIN: [ 588e31aef6aSopenharmony_ci _Rule( 589e31aef6aSopenharmony_ci c(r"(.*?)()(?=\n|$)"), 590e31aef6aSopenharmony_ci (TOKEN_LINECOMMENT, TOKEN_LINECOMMENT_END), 591e31aef6aSopenharmony_ci "#pop", 592e31aef6aSopenharmony_ci ) 593e31aef6aSopenharmony_ci ], 594e31aef6aSopenharmony_ci } 595e31aef6aSopenharmony_ci 596e31aef6aSopenharmony_ci def _normalize_newlines(self, value: str) -> str: 597e31aef6aSopenharmony_ci """Replace all newlines with the configured sequence in strings 598e31aef6aSopenharmony_ci and template data. 599e31aef6aSopenharmony_ci """ 600e31aef6aSopenharmony_ci return newline_re.sub(self.newline_sequence, value) 601e31aef6aSopenharmony_ci 602e31aef6aSopenharmony_ci def tokenize( 603e31aef6aSopenharmony_ci self, 604e31aef6aSopenharmony_ci source: str, 605e31aef6aSopenharmony_ci name: t.Optional[str] = None, 606e31aef6aSopenharmony_ci filename: t.Optional[str] = None, 607e31aef6aSopenharmony_ci state: t.Optional[str] = None, 608e31aef6aSopenharmony_ci ) -> TokenStream: 609e31aef6aSopenharmony_ci """Calls tokeniter + tokenize and wraps it in a token stream.""" 610e31aef6aSopenharmony_ci stream = self.tokeniter(source, name, filename, state) 611e31aef6aSopenharmony_ci return TokenStream(self.wrap(stream, name, filename), name, filename) 612e31aef6aSopenharmony_ci 613e31aef6aSopenharmony_ci def wrap( 614e31aef6aSopenharmony_ci self, 615e31aef6aSopenharmony_ci stream: t.Iterable[t.Tuple[int, str, str]], 616e31aef6aSopenharmony_ci name: t.Optional[str] = None, 617e31aef6aSopenharmony_ci filename: t.Optional[str] = None, 618e31aef6aSopenharmony_ci ) -> t.Iterator[Token]: 619e31aef6aSopenharmony_ci """This is called with the stream as returned by `tokenize` and wraps 620e31aef6aSopenharmony_ci every token in a :class:`Token` and converts the value. 621e31aef6aSopenharmony_ci """ 622e31aef6aSopenharmony_ci for lineno, token, value_str in stream: 623e31aef6aSopenharmony_ci if token in ignored_tokens: 624e31aef6aSopenharmony_ci continue 625e31aef6aSopenharmony_ci 626e31aef6aSopenharmony_ci value: t.Any = value_str 627e31aef6aSopenharmony_ci 628e31aef6aSopenharmony_ci if token == TOKEN_LINESTATEMENT_BEGIN: 629e31aef6aSopenharmony_ci token = TOKEN_BLOCK_BEGIN 630e31aef6aSopenharmony_ci elif token == TOKEN_LINESTATEMENT_END: 631e31aef6aSopenharmony_ci token = TOKEN_BLOCK_END 632e31aef6aSopenharmony_ci # we are not interested in those tokens in the parser 633e31aef6aSopenharmony_ci elif token in (TOKEN_RAW_BEGIN, TOKEN_RAW_END): 634e31aef6aSopenharmony_ci continue 635e31aef6aSopenharmony_ci elif token == TOKEN_DATA: 636e31aef6aSopenharmony_ci value = self._normalize_newlines(value_str) 637e31aef6aSopenharmony_ci elif token == "keyword": 638e31aef6aSopenharmony_ci token = value_str 639e31aef6aSopenharmony_ci elif token == TOKEN_NAME: 640e31aef6aSopenharmony_ci value = value_str 641e31aef6aSopenharmony_ci 642e31aef6aSopenharmony_ci if not value.isidentifier(): 643e31aef6aSopenharmony_ci raise TemplateSyntaxError( 644e31aef6aSopenharmony_ci "Invalid character in identifier", lineno, name, filename 645e31aef6aSopenharmony_ci ) 646e31aef6aSopenharmony_ci elif token == TOKEN_STRING: 647e31aef6aSopenharmony_ci # try to unescape string 648e31aef6aSopenharmony_ci try: 649e31aef6aSopenharmony_ci value = ( 650e31aef6aSopenharmony_ci self._normalize_newlines(value_str[1:-1]) 651e31aef6aSopenharmony_ci .encode("ascii", "backslashreplace") 652e31aef6aSopenharmony_ci .decode("unicode-escape") 653e31aef6aSopenharmony_ci ) 654e31aef6aSopenharmony_ci except Exception as e: 655e31aef6aSopenharmony_ci msg = str(e).split(":")[-1].strip() 656e31aef6aSopenharmony_ci raise TemplateSyntaxError(msg, lineno, name, filename) from e 657e31aef6aSopenharmony_ci elif token == TOKEN_INTEGER: 658e31aef6aSopenharmony_ci value = int(value_str.replace("_", ""), 0) 659e31aef6aSopenharmony_ci elif token == TOKEN_FLOAT: 660e31aef6aSopenharmony_ci # remove all "_" first to support more Python versions 661e31aef6aSopenharmony_ci value = literal_eval(value_str.replace("_", "")) 662e31aef6aSopenharmony_ci elif token == TOKEN_OPERATOR: 663e31aef6aSopenharmony_ci token = operators[value_str] 664e31aef6aSopenharmony_ci 665e31aef6aSopenharmony_ci yield Token(lineno, token, value) 666e31aef6aSopenharmony_ci 667e31aef6aSopenharmony_ci def tokeniter( 668e31aef6aSopenharmony_ci self, 669e31aef6aSopenharmony_ci source: str, 670e31aef6aSopenharmony_ci name: t.Optional[str], 671e31aef6aSopenharmony_ci filename: t.Optional[str] = None, 672e31aef6aSopenharmony_ci state: t.Optional[str] = None, 673e31aef6aSopenharmony_ci ) -> t.Iterator[t.Tuple[int, str, str]]: 674e31aef6aSopenharmony_ci """This method tokenizes the text and returns the tokens in a 675e31aef6aSopenharmony_ci generator. Use this method if you just want to tokenize a template. 676e31aef6aSopenharmony_ci 677e31aef6aSopenharmony_ci .. versionchanged:: 3.0 678e31aef6aSopenharmony_ci Only ``\\n``, ``\\r\\n`` and ``\\r`` are treated as line 679e31aef6aSopenharmony_ci breaks. 680e31aef6aSopenharmony_ci """ 681e31aef6aSopenharmony_ci lines = newline_re.split(source)[::2] 682e31aef6aSopenharmony_ci 683e31aef6aSopenharmony_ci if not self.keep_trailing_newline and lines[-1] == "": 684e31aef6aSopenharmony_ci del lines[-1] 685e31aef6aSopenharmony_ci 686e31aef6aSopenharmony_ci source = "\n".join(lines) 687e31aef6aSopenharmony_ci pos = 0 688e31aef6aSopenharmony_ci lineno = 1 689e31aef6aSopenharmony_ci stack = ["root"] 690e31aef6aSopenharmony_ci 691e31aef6aSopenharmony_ci if state is not None and state != "root": 692e31aef6aSopenharmony_ci assert state in ("variable", "block"), "invalid state" 693e31aef6aSopenharmony_ci stack.append(state + "_begin") 694e31aef6aSopenharmony_ci 695e31aef6aSopenharmony_ci statetokens = self.rules[stack[-1]] 696e31aef6aSopenharmony_ci source_length = len(source) 697e31aef6aSopenharmony_ci balancing_stack: t.List[str] = [] 698e31aef6aSopenharmony_ci newlines_stripped = 0 699e31aef6aSopenharmony_ci line_starting = True 700e31aef6aSopenharmony_ci 701e31aef6aSopenharmony_ci while True: 702e31aef6aSopenharmony_ci # tokenizer loop 703e31aef6aSopenharmony_ci for regex, tokens, new_state in statetokens: 704e31aef6aSopenharmony_ci m = regex.match(source, pos) 705e31aef6aSopenharmony_ci 706e31aef6aSopenharmony_ci # if no match we try again with the next rule 707e31aef6aSopenharmony_ci if m is None: 708e31aef6aSopenharmony_ci continue 709e31aef6aSopenharmony_ci 710e31aef6aSopenharmony_ci # we only match blocks and variables if braces / parentheses 711e31aef6aSopenharmony_ci # are balanced. continue parsing with the lower rule which 712e31aef6aSopenharmony_ci # is the operator rule. do this only if the end tags look 713e31aef6aSopenharmony_ci # like operators 714e31aef6aSopenharmony_ci if balancing_stack and tokens in ( 715e31aef6aSopenharmony_ci TOKEN_VARIABLE_END, 716e31aef6aSopenharmony_ci TOKEN_BLOCK_END, 717e31aef6aSopenharmony_ci TOKEN_LINESTATEMENT_END, 718e31aef6aSopenharmony_ci ): 719e31aef6aSopenharmony_ci continue 720e31aef6aSopenharmony_ci 721e31aef6aSopenharmony_ci # tuples support more options 722e31aef6aSopenharmony_ci if isinstance(tokens, tuple): 723e31aef6aSopenharmony_ci groups: t.Sequence[str] = m.groups() 724e31aef6aSopenharmony_ci 725e31aef6aSopenharmony_ci if isinstance(tokens, OptionalLStrip): 726e31aef6aSopenharmony_ci # Rule supports lstrip. Match will look like 727e31aef6aSopenharmony_ci # text, block type, whitespace control, type, control, ... 728e31aef6aSopenharmony_ci text = groups[0] 729e31aef6aSopenharmony_ci # Skipping the text and first type, every other group is the 730e31aef6aSopenharmony_ci # whitespace control for each type. One of the groups will be 731e31aef6aSopenharmony_ci # -, +, or empty string instead of None. 732e31aef6aSopenharmony_ci strip_sign = next(g for g in groups[2::2] if g is not None) 733e31aef6aSopenharmony_ci 734e31aef6aSopenharmony_ci if strip_sign == "-": 735e31aef6aSopenharmony_ci # Strip all whitespace between the text and the tag. 736e31aef6aSopenharmony_ci stripped = text.rstrip() 737e31aef6aSopenharmony_ci newlines_stripped = text[len(stripped) :].count("\n") 738e31aef6aSopenharmony_ci groups = [stripped, *groups[1:]] 739e31aef6aSopenharmony_ci elif ( 740e31aef6aSopenharmony_ci # Not marked for preserving whitespace. 741e31aef6aSopenharmony_ci strip_sign != "+" 742e31aef6aSopenharmony_ci # lstrip is enabled. 743e31aef6aSopenharmony_ci and self.lstrip_blocks 744e31aef6aSopenharmony_ci # Not a variable expression. 745e31aef6aSopenharmony_ci and not m.groupdict().get(TOKEN_VARIABLE_BEGIN) 746e31aef6aSopenharmony_ci ): 747e31aef6aSopenharmony_ci # The start of text between the last newline and the tag. 748e31aef6aSopenharmony_ci l_pos = text.rfind("\n") + 1 749e31aef6aSopenharmony_ci 750e31aef6aSopenharmony_ci if l_pos > 0 or line_starting: 751e31aef6aSopenharmony_ci # If there's only whitespace between the newline and the 752e31aef6aSopenharmony_ci # tag, strip it. 753e31aef6aSopenharmony_ci if whitespace_re.fullmatch(text, l_pos): 754e31aef6aSopenharmony_ci groups = [text[:l_pos], *groups[1:]] 755e31aef6aSopenharmony_ci 756e31aef6aSopenharmony_ci for idx, token in enumerate(tokens): 757e31aef6aSopenharmony_ci # failure group 758e31aef6aSopenharmony_ci if token.__class__ is Failure: 759e31aef6aSopenharmony_ci raise token(lineno, filename) 760e31aef6aSopenharmony_ci # bygroup is a bit more complex, in that case we 761e31aef6aSopenharmony_ci # yield for the current token the first named 762e31aef6aSopenharmony_ci # group that matched 763e31aef6aSopenharmony_ci elif token == "#bygroup": 764e31aef6aSopenharmony_ci for key, value in m.groupdict().items(): 765e31aef6aSopenharmony_ci if value is not None: 766e31aef6aSopenharmony_ci yield lineno, key, value 767e31aef6aSopenharmony_ci lineno += value.count("\n") 768e31aef6aSopenharmony_ci break 769e31aef6aSopenharmony_ci else: 770e31aef6aSopenharmony_ci raise RuntimeError( 771e31aef6aSopenharmony_ci f"{regex!r} wanted to resolve the token dynamically" 772e31aef6aSopenharmony_ci " but no group matched" 773e31aef6aSopenharmony_ci ) 774e31aef6aSopenharmony_ci # normal group 775e31aef6aSopenharmony_ci else: 776e31aef6aSopenharmony_ci data = groups[idx] 777e31aef6aSopenharmony_ci 778e31aef6aSopenharmony_ci if data or token not in ignore_if_empty: 779e31aef6aSopenharmony_ci yield lineno, token, data 780e31aef6aSopenharmony_ci 781e31aef6aSopenharmony_ci lineno += data.count("\n") + newlines_stripped 782e31aef6aSopenharmony_ci newlines_stripped = 0 783e31aef6aSopenharmony_ci 784e31aef6aSopenharmony_ci # strings as token just are yielded as it. 785e31aef6aSopenharmony_ci else: 786e31aef6aSopenharmony_ci data = m.group() 787e31aef6aSopenharmony_ci 788e31aef6aSopenharmony_ci # update brace/parentheses balance 789e31aef6aSopenharmony_ci if tokens == TOKEN_OPERATOR: 790e31aef6aSopenharmony_ci if data == "{": 791e31aef6aSopenharmony_ci balancing_stack.append("}") 792e31aef6aSopenharmony_ci elif data == "(": 793e31aef6aSopenharmony_ci balancing_stack.append(")") 794e31aef6aSopenharmony_ci elif data == "[": 795e31aef6aSopenharmony_ci balancing_stack.append("]") 796e31aef6aSopenharmony_ci elif data in ("}", ")", "]"): 797e31aef6aSopenharmony_ci if not balancing_stack: 798e31aef6aSopenharmony_ci raise TemplateSyntaxError( 799e31aef6aSopenharmony_ci f"unexpected '{data}'", lineno, name, filename 800e31aef6aSopenharmony_ci ) 801e31aef6aSopenharmony_ci 802e31aef6aSopenharmony_ci expected_op = balancing_stack.pop() 803e31aef6aSopenharmony_ci 804e31aef6aSopenharmony_ci if expected_op != data: 805e31aef6aSopenharmony_ci raise TemplateSyntaxError( 806e31aef6aSopenharmony_ci f"unexpected '{data}', expected '{expected_op}'", 807e31aef6aSopenharmony_ci lineno, 808e31aef6aSopenharmony_ci name, 809e31aef6aSopenharmony_ci filename, 810e31aef6aSopenharmony_ci ) 811e31aef6aSopenharmony_ci 812e31aef6aSopenharmony_ci # yield items 813e31aef6aSopenharmony_ci if data or tokens not in ignore_if_empty: 814e31aef6aSopenharmony_ci yield lineno, tokens, data 815e31aef6aSopenharmony_ci 816e31aef6aSopenharmony_ci lineno += data.count("\n") 817e31aef6aSopenharmony_ci 818e31aef6aSopenharmony_ci line_starting = m.group()[-1:] == "\n" 819e31aef6aSopenharmony_ci # fetch new position into new variable so that we can check 820e31aef6aSopenharmony_ci # if there is a internal parsing error which would result 821e31aef6aSopenharmony_ci # in an infinite loop 822e31aef6aSopenharmony_ci pos2 = m.end() 823e31aef6aSopenharmony_ci 824e31aef6aSopenharmony_ci # handle state changes 825e31aef6aSopenharmony_ci if new_state is not None: 826e31aef6aSopenharmony_ci # remove the uppermost state 827e31aef6aSopenharmony_ci if new_state == "#pop": 828e31aef6aSopenharmony_ci stack.pop() 829e31aef6aSopenharmony_ci # resolve the new state by group checking 830e31aef6aSopenharmony_ci elif new_state == "#bygroup": 831e31aef6aSopenharmony_ci for key, value in m.groupdict().items(): 832e31aef6aSopenharmony_ci if value is not None: 833e31aef6aSopenharmony_ci stack.append(key) 834e31aef6aSopenharmony_ci break 835e31aef6aSopenharmony_ci else: 836e31aef6aSopenharmony_ci raise RuntimeError( 837e31aef6aSopenharmony_ci f"{regex!r} wanted to resolve the new state dynamically" 838e31aef6aSopenharmony_ci f" but no group matched" 839e31aef6aSopenharmony_ci ) 840e31aef6aSopenharmony_ci # direct state name given 841e31aef6aSopenharmony_ci else: 842e31aef6aSopenharmony_ci stack.append(new_state) 843e31aef6aSopenharmony_ci 844e31aef6aSopenharmony_ci statetokens = self.rules[stack[-1]] 845e31aef6aSopenharmony_ci # we are still at the same position and no stack change. 846e31aef6aSopenharmony_ci # this means a loop without break condition, avoid that and 847e31aef6aSopenharmony_ci # raise error 848e31aef6aSopenharmony_ci elif pos2 == pos: 849e31aef6aSopenharmony_ci raise RuntimeError( 850e31aef6aSopenharmony_ci f"{regex!r} yielded empty string without stack change" 851e31aef6aSopenharmony_ci ) 852e31aef6aSopenharmony_ci 853e31aef6aSopenharmony_ci # publish new function and start again 854e31aef6aSopenharmony_ci pos = pos2 855e31aef6aSopenharmony_ci break 856e31aef6aSopenharmony_ci # if loop terminated without break we haven't found a single match 857e31aef6aSopenharmony_ci # either we are at the end of the file or we have a problem 858e31aef6aSopenharmony_ci else: 859e31aef6aSopenharmony_ci # end of text 860e31aef6aSopenharmony_ci if pos >= source_length: 861e31aef6aSopenharmony_ci return 862e31aef6aSopenharmony_ci 863e31aef6aSopenharmony_ci # something went wrong 864e31aef6aSopenharmony_ci raise TemplateSyntaxError( 865e31aef6aSopenharmony_ci f"unexpected char {source[pos]!r} at {pos}", lineno, name, filename 866e31aef6aSopenharmony_ci ) 867