1e31aef6aSopenharmony_ci"""Implements a Jinja / Python combination lexer. The ``Lexer`` class
2e31aef6aSopenharmony_ciis used to do some preprocessing. It filters out invalid operators like
3e31aef6aSopenharmony_cithe bitshift operators we don't allow in templates. It separates
4e31aef6aSopenharmony_citemplate code and python code in expressions.
5e31aef6aSopenharmony_ci"""
6e31aef6aSopenharmony_ciimport re
7e31aef6aSopenharmony_ciimport typing as t
8e31aef6aSopenharmony_cifrom ast import literal_eval
9e31aef6aSopenharmony_cifrom collections import deque
10e31aef6aSopenharmony_cifrom sys import intern
11e31aef6aSopenharmony_ci
12e31aef6aSopenharmony_cifrom ._identifier import pattern as name_re
13e31aef6aSopenharmony_cifrom .exceptions import TemplateSyntaxError
14e31aef6aSopenharmony_cifrom .utils import LRUCache
15e31aef6aSopenharmony_ci
16e31aef6aSopenharmony_ciif t.TYPE_CHECKING:
17e31aef6aSopenharmony_ci    import typing_extensions as te
18e31aef6aSopenharmony_ci    from .environment import Environment
19e31aef6aSopenharmony_ci
20e31aef6aSopenharmony_ci# cache for the lexers. Exists in order to be able to have multiple
21e31aef6aSopenharmony_ci# environments with the same lexer
22e31aef6aSopenharmony_ci_lexer_cache: t.MutableMapping[t.Tuple, "Lexer"] = LRUCache(50)  # type: ignore
23e31aef6aSopenharmony_ci
24e31aef6aSopenharmony_ci# static regular expressions
25e31aef6aSopenharmony_ciwhitespace_re = re.compile(r"\s+")
26e31aef6aSopenharmony_cinewline_re = re.compile(r"(\r\n|\r|\n)")
27e31aef6aSopenharmony_cistring_re = re.compile(
28e31aef6aSopenharmony_ci    r"('([^'\\]*(?:\\.[^'\\]*)*)'" r'|"([^"\\]*(?:\\.[^"\\]*)*)")', re.S
29e31aef6aSopenharmony_ci)
30e31aef6aSopenharmony_ciinteger_re = re.compile(
31e31aef6aSopenharmony_ci    r"""
32e31aef6aSopenharmony_ci    (
33e31aef6aSopenharmony_ci        0b(_?[0-1])+ # binary
34e31aef6aSopenharmony_ci    |
35e31aef6aSopenharmony_ci        0o(_?[0-7])+ # octal
36e31aef6aSopenharmony_ci    |
37e31aef6aSopenharmony_ci        0x(_?[\da-f])+ # hex
38e31aef6aSopenharmony_ci    |
39e31aef6aSopenharmony_ci        [1-9](_?\d)* # decimal
40e31aef6aSopenharmony_ci    |
41e31aef6aSopenharmony_ci        0(_?0)* # decimal zero
42e31aef6aSopenharmony_ci    )
43e31aef6aSopenharmony_ci    """,
44e31aef6aSopenharmony_ci    re.IGNORECASE | re.VERBOSE,
45e31aef6aSopenharmony_ci)
46e31aef6aSopenharmony_cifloat_re = re.compile(
47e31aef6aSopenharmony_ci    r"""
48e31aef6aSopenharmony_ci    (?<!\.)  # doesn't start with a .
49e31aef6aSopenharmony_ci    (\d+_)*\d+  # digits, possibly _ separated
50e31aef6aSopenharmony_ci    (
51e31aef6aSopenharmony_ci        (\.(\d+_)*\d+)?  # optional fractional part
52e31aef6aSopenharmony_ci        e[+\-]?(\d+_)*\d+  # exponent part
53e31aef6aSopenharmony_ci    |
54e31aef6aSopenharmony_ci        \.(\d+_)*\d+  # required fractional part
55e31aef6aSopenharmony_ci    )
56e31aef6aSopenharmony_ci    """,
57e31aef6aSopenharmony_ci    re.IGNORECASE | re.VERBOSE,
58e31aef6aSopenharmony_ci)
59e31aef6aSopenharmony_ci
60e31aef6aSopenharmony_ci# internal the tokens and keep references to them
61e31aef6aSopenharmony_ciTOKEN_ADD = intern("add")
62e31aef6aSopenharmony_ciTOKEN_ASSIGN = intern("assign")
63e31aef6aSopenharmony_ciTOKEN_COLON = intern("colon")
64e31aef6aSopenharmony_ciTOKEN_COMMA = intern("comma")
65e31aef6aSopenharmony_ciTOKEN_DIV = intern("div")
66e31aef6aSopenharmony_ciTOKEN_DOT = intern("dot")
67e31aef6aSopenharmony_ciTOKEN_EQ = intern("eq")
68e31aef6aSopenharmony_ciTOKEN_FLOORDIV = intern("floordiv")
69e31aef6aSopenharmony_ciTOKEN_GT = intern("gt")
70e31aef6aSopenharmony_ciTOKEN_GTEQ = intern("gteq")
71e31aef6aSopenharmony_ciTOKEN_LBRACE = intern("lbrace")
72e31aef6aSopenharmony_ciTOKEN_LBRACKET = intern("lbracket")
73e31aef6aSopenharmony_ciTOKEN_LPAREN = intern("lparen")
74e31aef6aSopenharmony_ciTOKEN_LT = intern("lt")
75e31aef6aSopenharmony_ciTOKEN_LTEQ = intern("lteq")
76e31aef6aSopenharmony_ciTOKEN_MOD = intern("mod")
77e31aef6aSopenharmony_ciTOKEN_MUL = intern("mul")
78e31aef6aSopenharmony_ciTOKEN_NE = intern("ne")
79e31aef6aSopenharmony_ciTOKEN_PIPE = intern("pipe")
80e31aef6aSopenharmony_ciTOKEN_POW = intern("pow")
81e31aef6aSopenharmony_ciTOKEN_RBRACE = intern("rbrace")
82e31aef6aSopenharmony_ciTOKEN_RBRACKET = intern("rbracket")
83e31aef6aSopenharmony_ciTOKEN_RPAREN = intern("rparen")
84e31aef6aSopenharmony_ciTOKEN_SEMICOLON = intern("semicolon")
85e31aef6aSopenharmony_ciTOKEN_SUB = intern("sub")
86e31aef6aSopenharmony_ciTOKEN_TILDE = intern("tilde")
87e31aef6aSopenharmony_ciTOKEN_WHITESPACE = intern("whitespace")
88e31aef6aSopenharmony_ciTOKEN_FLOAT = intern("float")
89e31aef6aSopenharmony_ciTOKEN_INTEGER = intern("integer")
90e31aef6aSopenharmony_ciTOKEN_NAME = intern("name")
91e31aef6aSopenharmony_ciTOKEN_STRING = intern("string")
92e31aef6aSopenharmony_ciTOKEN_OPERATOR = intern("operator")
93e31aef6aSopenharmony_ciTOKEN_BLOCK_BEGIN = intern("block_begin")
94e31aef6aSopenharmony_ciTOKEN_BLOCK_END = intern("block_end")
95e31aef6aSopenharmony_ciTOKEN_VARIABLE_BEGIN = intern("variable_begin")
96e31aef6aSopenharmony_ciTOKEN_VARIABLE_END = intern("variable_end")
97e31aef6aSopenharmony_ciTOKEN_RAW_BEGIN = intern("raw_begin")
98e31aef6aSopenharmony_ciTOKEN_RAW_END = intern("raw_end")
99e31aef6aSopenharmony_ciTOKEN_COMMENT_BEGIN = intern("comment_begin")
100e31aef6aSopenharmony_ciTOKEN_COMMENT_END = intern("comment_end")
101e31aef6aSopenharmony_ciTOKEN_COMMENT = intern("comment")
102e31aef6aSopenharmony_ciTOKEN_LINESTATEMENT_BEGIN = intern("linestatement_begin")
103e31aef6aSopenharmony_ciTOKEN_LINESTATEMENT_END = intern("linestatement_end")
104e31aef6aSopenharmony_ciTOKEN_LINECOMMENT_BEGIN = intern("linecomment_begin")
105e31aef6aSopenharmony_ciTOKEN_LINECOMMENT_END = intern("linecomment_end")
106e31aef6aSopenharmony_ciTOKEN_LINECOMMENT = intern("linecomment")
107e31aef6aSopenharmony_ciTOKEN_DATA = intern("data")
108e31aef6aSopenharmony_ciTOKEN_INITIAL = intern("initial")
109e31aef6aSopenharmony_ciTOKEN_EOF = intern("eof")
110e31aef6aSopenharmony_ci
111e31aef6aSopenharmony_ci# bind operators to token types
112e31aef6aSopenharmony_cioperators = {
113e31aef6aSopenharmony_ci    "+": TOKEN_ADD,
114e31aef6aSopenharmony_ci    "-": TOKEN_SUB,
115e31aef6aSopenharmony_ci    "/": TOKEN_DIV,
116e31aef6aSopenharmony_ci    "//": TOKEN_FLOORDIV,
117e31aef6aSopenharmony_ci    "*": TOKEN_MUL,
118e31aef6aSopenharmony_ci    "%": TOKEN_MOD,
119e31aef6aSopenharmony_ci    "**": TOKEN_POW,
120e31aef6aSopenharmony_ci    "~": TOKEN_TILDE,
121e31aef6aSopenharmony_ci    "[": TOKEN_LBRACKET,
122e31aef6aSopenharmony_ci    "]": TOKEN_RBRACKET,
123e31aef6aSopenharmony_ci    "(": TOKEN_LPAREN,
124e31aef6aSopenharmony_ci    ")": TOKEN_RPAREN,
125e31aef6aSopenharmony_ci    "{": TOKEN_LBRACE,
126e31aef6aSopenharmony_ci    "}": TOKEN_RBRACE,
127e31aef6aSopenharmony_ci    "==": TOKEN_EQ,
128e31aef6aSopenharmony_ci    "!=": TOKEN_NE,
129e31aef6aSopenharmony_ci    ">": TOKEN_GT,
130e31aef6aSopenharmony_ci    ">=": TOKEN_GTEQ,
131e31aef6aSopenharmony_ci    "<": TOKEN_LT,
132e31aef6aSopenharmony_ci    "<=": TOKEN_LTEQ,
133e31aef6aSopenharmony_ci    "=": TOKEN_ASSIGN,
134e31aef6aSopenharmony_ci    ".": TOKEN_DOT,
135e31aef6aSopenharmony_ci    ":": TOKEN_COLON,
136e31aef6aSopenharmony_ci    "|": TOKEN_PIPE,
137e31aef6aSopenharmony_ci    ",": TOKEN_COMMA,
138e31aef6aSopenharmony_ci    ";": TOKEN_SEMICOLON,
139e31aef6aSopenharmony_ci}
140e31aef6aSopenharmony_ci
141e31aef6aSopenharmony_cireverse_operators = {v: k for k, v in operators.items()}
142e31aef6aSopenharmony_ciassert len(operators) == len(reverse_operators), "operators dropped"
143e31aef6aSopenharmony_cioperator_re = re.compile(
144e31aef6aSopenharmony_ci    f"({'|'.join(re.escape(x) for x in sorted(operators, key=lambda x: -len(x)))})"
145e31aef6aSopenharmony_ci)
146e31aef6aSopenharmony_ci
147e31aef6aSopenharmony_ciignored_tokens = frozenset(
148e31aef6aSopenharmony_ci    [
149e31aef6aSopenharmony_ci        TOKEN_COMMENT_BEGIN,
150e31aef6aSopenharmony_ci        TOKEN_COMMENT,
151e31aef6aSopenharmony_ci        TOKEN_COMMENT_END,
152e31aef6aSopenharmony_ci        TOKEN_WHITESPACE,
153e31aef6aSopenharmony_ci        TOKEN_LINECOMMENT_BEGIN,
154e31aef6aSopenharmony_ci        TOKEN_LINECOMMENT_END,
155e31aef6aSopenharmony_ci        TOKEN_LINECOMMENT,
156e31aef6aSopenharmony_ci    ]
157e31aef6aSopenharmony_ci)
158e31aef6aSopenharmony_ciignore_if_empty = frozenset(
159e31aef6aSopenharmony_ci    [TOKEN_WHITESPACE, TOKEN_DATA, TOKEN_COMMENT, TOKEN_LINECOMMENT]
160e31aef6aSopenharmony_ci)
161e31aef6aSopenharmony_ci
162e31aef6aSopenharmony_ci
163e31aef6aSopenharmony_cidef _describe_token_type(token_type: str) -> str:
164e31aef6aSopenharmony_ci    if token_type in reverse_operators:
165e31aef6aSopenharmony_ci        return reverse_operators[token_type]
166e31aef6aSopenharmony_ci
167e31aef6aSopenharmony_ci    return {
168e31aef6aSopenharmony_ci        TOKEN_COMMENT_BEGIN: "begin of comment",
169e31aef6aSopenharmony_ci        TOKEN_COMMENT_END: "end of comment",
170e31aef6aSopenharmony_ci        TOKEN_COMMENT: "comment",
171e31aef6aSopenharmony_ci        TOKEN_LINECOMMENT: "comment",
172e31aef6aSopenharmony_ci        TOKEN_BLOCK_BEGIN: "begin of statement block",
173e31aef6aSopenharmony_ci        TOKEN_BLOCK_END: "end of statement block",
174e31aef6aSopenharmony_ci        TOKEN_VARIABLE_BEGIN: "begin of print statement",
175e31aef6aSopenharmony_ci        TOKEN_VARIABLE_END: "end of print statement",
176e31aef6aSopenharmony_ci        TOKEN_LINESTATEMENT_BEGIN: "begin of line statement",
177e31aef6aSopenharmony_ci        TOKEN_LINESTATEMENT_END: "end of line statement",
178e31aef6aSopenharmony_ci        TOKEN_DATA: "template data / text",
179e31aef6aSopenharmony_ci        TOKEN_EOF: "end of template",
180e31aef6aSopenharmony_ci    }.get(token_type, token_type)
181e31aef6aSopenharmony_ci
182e31aef6aSopenharmony_ci
183e31aef6aSopenharmony_cidef describe_token(token: "Token") -> str:
184e31aef6aSopenharmony_ci    """Returns a description of the token."""
185e31aef6aSopenharmony_ci    if token.type == TOKEN_NAME:
186e31aef6aSopenharmony_ci        return token.value
187e31aef6aSopenharmony_ci
188e31aef6aSopenharmony_ci    return _describe_token_type(token.type)
189e31aef6aSopenharmony_ci
190e31aef6aSopenharmony_ci
191e31aef6aSopenharmony_cidef describe_token_expr(expr: str) -> str:
192e31aef6aSopenharmony_ci    """Like `describe_token` but for token expressions."""
193e31aef6aSopenharmony_ci    if ":" in expr:
194e31aef6aSopenharmony_ci        type, value = expr.split(":", 1)
195e31aef6aSopenharmony_ci
196e31aef6aSopenharmony_ci        if type == TOKEN_NAME:
197e31aef6aSopenharmony_ci            return value
198e31aef6aSopenharmony_ci    else:
199e31aef6aSopenharmony_ci        type = expr
200e31aef6aSopenharmony_ci
201e31aef6aSopenharmony_ci    return _describe_token_type(type)
202e31aef6aSopenharmony_ci
203e31aef6aSopenharmony_ci
204e31aef6aSopenharmony_cidef count_newlines(value: str) -> int:
205e31aef6aSopenharmony_ci    """Count the number of newline characters in the string.  This is
206e31aef6aSopenharmony_ci    useful for extensions that filter a stream.
207e31aef6aSopenharmony_ci    """
208e31aef6aSopenharmony_ci    return len(newline_re.findall(value))
209e31aef6aSopenharmony_ci
210e31aef6aSopenharmony_ci
211e31aef6aSopenharmony_cidef compile_rules(environment: "Environment") -> t.List[t.Tuple[str, str]]:
212e31aef6aSopenharmony_ci    """Compiles all the rules from the environment into a list of rules."""
213e31aef6aSopenharmony_ci    e = re.escape
214e31aef6aSopenharmony_ci    rules = [
215e31aef6aSopenharmony_ci        (
216e31aef6aSopenharmony_ci            len(environment.comment_start_string),
217e31aef6aSopenharmony_ci            TOKEN_COMMENT_BEGIN,
218e31aef6aSopenharmony_ci            e(environment.comment_start_string),
219e31aef6aSopenharmony_ci        ),
220e31aef6aSopenharmony_ci        (
221e31aef6aSopenharmony_ci            len(environment.block_start_string),
222e31aef6aSopenharmony_ci            TOKEN_BLOCK_BEGIN,
223e31aef6aSopenharmony_ci            e(environment.block_start_string),
224e31aef6aSopenharmony_ci        ),
225e31aef6aSopenharmony_ci        (
226e31aef6aSopenharmony_ci            len(environment.variable_start_string),
227e31aef6aSopenharmony_ci            TOKEN_VARIABLE_BEGIN,
228e31aef6aSopenharmony_ci            e(environment.variable_start_string),
229e31aef6aSopenharmony_ci        ),
230e31aef6aSopenharmony_ci    ]
231e31aef6aSopenharmony_ci
232e31aef6aSopenharmony_ci    if environment.line_statement_prefix is not None:
233e31aef6aSopenharmony_ci        rules.append(
234e31aef6aSopenharmony_ci            (
235e31aef6aSopenharmony_ci                len(environment.line_statement_prefix),
236e31aef6aSopenharmony_ci                TOKEN_LINESTATEMENT_BEGIN,
237e31aef6aSopenharmony_ci                r"^[ \t\v]*" + e(environment.line_statement_prefix),
238e31aef6aSopenharmony_ci            )
239e31aef6aSopenharmony_ci        )
240e31aef6aSopenharmony_ci    if environment.line_comment_prefix is not None:
241e31aef6aSopenharmony_ci        rules.append(
242e31aef6aSopenharmony_ci            (
243e31aef6aSopenharmony_ci                len(environment.line_comment_prefix),
244e31aef6aSopenharmony_ci                TOKEN_LINECOMMENT_BEGIN,
245e31aef6aSopenharmony_ci                r"(?:^|(?<=\S))[^\S\r\n]*" + e(environment.line_comment_prefix),
246e31aef6aSopenharmony_ci            )
247e31aef6aSopenharmony_ci        )
248e31aef6aSopenharmony_ci
249e31aef6aSopenharmony_ci    return [x[1:] for x in sorted(rules, reverse=True)]
250e31aef6aSopenharmony_ci
251e31aef6aSopenharmony_ci
252e31aef6aSopenharmony_ciclass Failure:
253e31aef6aSopenharmony_ci    """Class that raises a `TemplateSyntaxError` if called.
254e31aef6aSopenharmony_ci    Used by the `Lexer` to specify known errors.
255e31aef6aSopenharmony_ci    """
256e31aef6aSopenharmony_ci
257e31aef6aSopenharmony_ci    def __init__(
258e31aef6aSopenharmony_ci        self, message: str, cls: t.Type[TemplateSyntaxError] = TemplateSyntaxError
259e31aef6aSopenharmony_ci    ) -> None:
260e31aef6aSopenharmony_ci        self.message = message
261e31aef6aSopenharmony_ci        self.error_class = cls
262e31aef6aSopenharmony_ci
263e31aef6aSopenharmony_ci    def __call__(self, lineno: int, filename: str) -> "te.NoReturn":
264e31aef6aSopenharmony_ci        raise self.error_class(self.message, lineno, filename)
265e31aef6aSopenharmony_ci
266e31aef6aSopenharmony_ci
267e31aef6aSopenharmony_ciclass Token(t.NamedTuple):
268e31aef6aSopenharmony_ci    lineno: int
269e31aef6aSopenharmony_ci    type: str
270e31aef6aSopenharmony_ci    value: str
271e31aef6aSopenharmony_ci
272e31aef6aSopenharmony_ci    def __str__(self) -> str:
273e31aef6aSopenharmony_ci        return describe_token(self)
274e31aef6aSopenharmony_ci
275e31aef6aSopenharmony_ci    def test(self, expr: str) -> bool:
276e31aef6aSopenharmony_ci        """Test a token against a token expression.  This can either be a
277e31aef6aSopenharmony_ci        token type or ``'token_type:token_value'``.  This can only test
278e31aef6aSopenharmony_ci        against string values and types.
279e31aef6aSopenharmony_ci        """
280e31aef6aSopenharmony_ci        # here we do a regular string equality check as test_any is usually
281e31aef6aSopenharmony_ci        # passed an iterable of not interned strings.
282e31aef6aSopenharmony_ci        if self.type == expr:
283e31aef6aSopenharmony_ci            return True
284e31aef6aSopenharmony_ci
285e31aef6aSopenharmony_ci        if ":" in expr:
286e31aef6aSopenharmony_ci            return expr.split(":", 1) == [self.type, self.value]
287e31aef6aSopenharmony_ci
288e31aef6aSopenharmony_ci        return False
289e31aef6aSopenharmony_ci
290e31aef6aSopenharmony_ci    def test_any(self, *iterable: str) -> bool:
291e31aef6aSopenharmony_ci        """Test against multiple token expressions."""
292e31aef6aSopenharmony_ci        return any(self.test(expr) for expr in iterable)
293e31aef6aSopenharmony_ci
294e31aef6aSopenharmony_ci
295e31aef6aSopenharmony_ciclass TokenStreamIterator:
296e31aef6aSopenharmony_ci    """The iterator for tokenstreams.  Iterate over the stream
297e31aef6aSopenharmony_ci    until the eof token is reached.
298e31aef6aSopenharmony_ci    """
299e31aef6aSopenharmony_ci
300e31aef6aSopenharmony_ci    def __init__(self, stream: "TokenStream") -> None:
301e31aef6aSopenharmony_ci        self.stream = stream
302e31aef6aSopenharmony_ci
303e31aef6aSopenharmony_ci    def __iter__(self) -> "TokenStreamIterator":
304e31aef6aSopenharmony_ci        return self
305e31aef6aSopenharmony_ci
306e31aef6aSopenharmony_ci    def __next__(self) -> Token:
307e31aef6aSopenharmony_ci        token = self.stream.current
308e31aef6aSopenharmony_ci
309e31aef6aSopenharmony_ci        if token.type is TOKEN_EOF:
310e31aef6aSopenharmony_ci            self.stream.close()
311e31aef6aSopenharmony_ci            raise StopIteration
312e31aef6aSopenharmony_ci
313e31aef6aSopenharmony_ci        next(self.stream)
314e31aef6aSopenharmony_ci        return token
315e31aef6aSopenharmony_ci
316e31aef6aSopenharmony_ci
317e31aef6aSopenharmony_ciclass TokenStream:
318e31aef6aSopenharmony_ci    """A token stream is an iterable that yields :class:`Token`\\s.  The
319e31aef6aSopenharmony_ci    parser however does not iterate over it but calls :meth:`next` to go
320e31aef6aSopenharmony_ci    one token ahead.  The current active token is stored as :attr:`current`.
321e31aef6aSopenharmony_ci    """
322e31aef6aSopenharmony_ci
323e31aef6aSopenharmony_ci    def __init__(
324e31aef6aSopenharmony_ci        self,
325e31aef6aSopenharmony_ci        generator: t.Iterable[Token],
326e31aef6aSopenharmony_ci        name: t.Optional[str],
327e31aef6aSopenharmony_ci        filename: t.Optional[str],
328e31aef6aSopenharmony_ci    ):
329e31aef6aSopenharmony_ci        self._iter = iter(generator)
330e31aef6aSopenharmony_ci        self._pushed: "te.Deque[Token]" = deque()
331e31aef6aSopenharmony_ci        self.name = name
332e31aef6aSopenharmony_ci        self.filename = filename
333e31aef6aSopenharmony_ci        self.closed = False
334e31aef6aSopenharmony_ci        self.current = Token(1, TOKEN_INITIAL, "")
335e31aef6aSopenharmony_ci        next(self)
336e31aef6aSopenharmony_ci
337e31aef6aSopenharmony_ci    def __iter__(self) -> TokenStreamIterator:
338e31aef6aSopenharmony_ci        return TokenStreamIterator(self)
339e31aef6aSopenharmony_ci
340e31aef6aSopenharmony_ci    def __bool__(self) -> bool:
341e31aef6aSopenharmony_ci        return bool(self._pushed) or self.current.type is not TOKEN_EOF
342e31aef6aSopenharmony_ci
343e31aef6aSopenharmony_ci    @property
344e31aef6aSopenharmony_ci    def eos(self) -> bool:
345e31aef6aSopenharmony_ci        """Are we at the end of the stream?"""
346e31aef6aSopenharmony_ci        return not self
347e31aef6aSopenharmony_ci
348e31aef6aSopenharmony_ci    def push(self, token: Token) -> None:
349e31aef6aSopenharmony_ci        """Push a token back to the stream."""
350e31aef6aSopenharmony_ci        self._pushed.append(token)
351e31aef6aSopenharmony_ci
352e31aef6aSopenharmony_ci    def look(self) -> Token:
353e31aef6aSopenharmony_ci        """Look at the next token."""
354e31aef6aSopenharmony_ci        old_token = next(self)
355e31aef6aSopenharmony_ci        result = self.current
356e31aef6aSopenharmony_ci        self.push(result)
357e31aef6aSopenharmony_ci        self.current = old_token
358e31aef6aSopenharmony_ci        return result
359e31aef6aSopenharmony_ci
360e31aef6aSopenharmony_ci    def skip(self, n: int = 1) -> None:
361e31aef6aSopenharmony_ci        """Got n tokens ahead."""
362e31aef6aSopenharmony_ci        for _ in range(n):
363e31aef6aSopenharmony_ci            next(self)
364e31aef6aSopenharmony_ci
365e31aef6aSopenharmony_ci    def next_if(self, expr: str) -> t.Optional[Token]:
366e31aef6aSopenharmony_ci        """Perform the token test and return the token if it matched.
367e31aef6aSopenharmony_ci        Otherwise the return value is `None`.
368e31aef6aSopenharmony_ci        """
369e31aef6aSopenharmony_ci        if self.current.test(expr):
370e31aef6aSopenharmony_ci            return next(self)
371e31aef6aSopenharmony_ci
372e31aef6aSopenharmony_ci        return None
373e31aef6aSopenharmony_ci
374e31aef6aSopenharmony_ci    def skip_if(self, expr: str) -> bool:
375e31aef6aSopenharmony_ci        """Like :meth:`next_if` but only returns `True` or `False`."""
376e31aef6aSopenharmony_ci        return self.next_if(expr) is not None
377e31aef6aSopenharmony_ci
378e31aef6aSopenharmony_ci    def __next__(self) -> Token:
379e31aef6aSopenharmony_ci        """Go one token ahead and return the old one.
380e31aef6aSopenharmony_ci
381e31aef6aSopenharmony_ci        Use the built-in :func:`next` instead of calling this directly.
382e31aef6aSopenharmony_ci        """
383e31aef6aSopenharmony_ci        rv = self.current
384e31aef6aSopenharmony_ci
385e31aef6aSopenharmony_ci        if self._pushed:
386e31aef6aSopenharmony_ci            self.current = self._pushed.popleft()
387e31aef6aSopenharmony_ci        elif self.current.type is not TOKEN_EOF:
388e31aef6aSopenharmony_ci            try:
389e31aef6aSopenharmony_ci                self.current = next(self._iter)
390e31aef6aSopenharmony_ci            except StopIteration:
391e31aef6aSopenharmony_ci                self.close()
392e31aef6aSopenharmony_ci
393e31aef6aSopenharmony_ci        return rv
394e31aef6aSopenharmony_ci
395e31aef6aSopenharmony_ci    def close(self) -> None:
396e31aef6aSopenharmony_ci        """Close the stream."""
397e31aef6aSopenharmony_ci        self.current = Token(self.current.lineno, TOKEN_EOF, "")
398e31aef6aSopenharmony_ci        self._iter = iter(())
399e31aef6aSopenharmony_ci        self.closed = True
400e31aef6aSopenharmony_ci
401e31aef6aSopenharmony_ci    def expect(self, expr: str) -> Token:
402e31aef6aSopenharmony_ci        """Expect a given token type and return it.  This accepts the same
403e31aef6aSopenharmony_ci        argument as :meth:`jinja2.lexer.Token.test`.
404e31aef6aSopenharmony_ci        """
405e31aef6aSopenharmony_ci        if not self.current.test(expr):
406e31aef6aSopenharmony_ci            expr = describe_token_expr(expr)
407e31aef6aSopenharmony_ci
408e31aef6aSopenharmony_ci            if self.current.type is TOKEN_EOF:
409e31aef6aSopenharmony_ci                raise TemplateSyntaxError(
410e31aef6aSopenharmony_ci                    f"unexpected end of template, expected {expr!r}.",
411e31aef6aSopenharmony_ci                    self.current.lineno,
412e31aef6aSopenharmony_ci                    self.name,
413e31aef6aSopenharmony_ci                    self.filename,
414e31aef6aSopenharmony_ci                )
415e31aef6aSopenharmony_ci
416e31aef6aSopenharmony_ci            raise TemplateSyntaxError(
417e31aef6aSopenharmony_ci                f"expected token {expr!r}, got {describe_token(self.current)!r}",
418e31aef6aSopenharmony_ci                self.current.lineno,
419e31aef6aSopenharmony_ci                self.name,
420e31aef6aSopenharmony_ci                self.filename,
421e31aef6aSopenharmony_ci            )
422e31aef6aSopenharmony_ci
423e31aef6aSopenharmony_ci        return next(self)
424e31aef6aSopenharmony_ci
425e31aef6aSopenharmony_ci
426e31aef6aSopenharmony_cidef get_lexer(environment: "Environment") -> "Lexer":
427e31aef6aSopenharmony_ci    """Return a lexer which is probably cached."""
428e31aef6aSopenharmony_ci    key = (
429e31aef6aSopenharmony_ci        environment.block_start_string,
430e31aef6aSopenharmony_ci        environment.block_end_string,
431e31aef6aSopenharmony_ci        environment.variable_start_string,
432e31aef6aSopenharmony_ci        environment.variable_end_string,
433e31aef6aSopenharmony_ci        environment.comment_start_string,
434e31aef6aSopenharmony_ci        environment.comment_end_string,
435e31aef6aSopenharmony_ci        environment.line_statement_prefix,
436e31aef6aSopenharmony_ci        environment.line_comment_prefix,
437e31aef6aSopenharmony_ci        environment.trim_blocks,
438e31aef6aSopenharmony_ci        environment.lstrip_blocks,
439e31aef6aSopenharmony_ci        environment.newline_sequence,
440e31aef6aSopenharmony_ci        environment.keep_trailing_newline,
441e31aef6aSopenharmony_ci    )
442e31aef6aSopenharmony_ci    lexer = _lexer_cache.get(key)
443e31aef6aSopenharmony_ci
444e31aef6aSopenharmony_ci    if lexer is None:
445e31aef6aSopenharmony_ci        _lexer_cache[key] = lexer = Lexer(environment)
446e31aef6aSopenharmony_ci
447e31aef6aSopenharmony_ci    return lexer
448e31aef6aSopenharmony_ci
449e31aef6aSopenharmony_ci
450e31aef6aSopenharmony_ciclass OptionalLStrip(tuple):
451e31aef6aSopenharmony_ci    """A special tuple for marking a point in the state that can have
452e31aef6aSopenharmony_ci    lstrip applied.
453e31aef6aSopenharmony_ci    """
454e31aef6aSopenharmony_ci
455e31aef6aSopenharmony_ci    __slots__ = ()
456e31aef6aSopenharmony_ci
457e31aef6aSopenharmony_ci    # Even though it looks like a no-op, creating instances fails
458e31aef6aSopenharmony_ci    # without this.
459e31aef6aSopenharmony_ci    def __new__(cls, *members, **kwargs):  # type: ignore
460e31aef6aSopenharmony_ci        return super().__new__(cls, members)
461e31aef6aSopenharmony_ci
462e31aef6aSopenharmony_ci
463e31aef6aSopenharmony_ciclass _Rule(t.NamedTuple):
464e31aef6aSopenharmony_ci    pattern: t.Pattern[str]
465e31aef6aSopenharmony_ci    tokens: t.Union[str, t.Tuple[str, ...], t.Tuple[Failure]]
466e31aef6aSopenharmony_ci    command: t.Optional[str]
467e31aef6aSopenharmony_ci
468e31aef6aSopenharmony_ci
469e31aef6aSopenharmony_ciclass Lexer:
470e31aef6aSopenharmony_ci    """Class that implements a lexer for a given environment. Automatically
471e31aef6aSopenharmony_ci    created by the environment class, usually you don't have to do that.
472e31aef6aSopenharmony_ci
473e31aef6aSopenharmony_ci    Note that the lexer is not automatically bound to an environment.
474e31aef6aSopenharmony_ci    Multiple environments can share the same lexer.
475e31aef6aSopenharmony_ci    """
476e31aef6aSopenharmony_ci
477e31aef6aSopenharmony_ci    def __init__(self, environment: "Environment") -> None:
478e31aef6aSopenharmony_ci        # shortcuts
479e31aef6aSopenharmony_ci        e = re.escape
480e31aef6aSopenharmony_ci
481e31aef6aSopenharmony_ci        def c(x: str) -> t.Pattern[str]:
482e31aef6aSopenharmony_ci            return re.compile(x, re.M | re.S)
483e31aef6aSopenharmony_ci
484e31aef6aSopenharmony_ci        # lexing rules for tags
485e31aef6aSopenharmony_ci        tag_rules: t.List[_Rule] = [
486e31aef6aSopenharmony_ci            _Rule(whitespace_re, TOKEN_WHITESPACE, None),
487e31aef6aSopenharmony_ci            _Rule(float_re, TOKEN_FLOAT, None),
488e31aef6aSopenharmony_ci            _Rule(integer_re, TOKEN_INTEGER, None),
489e31aef6aSopenharmony_ci            _Rule(name_re, TOKEN_NAME, None),
490e31aef6aSopenharmony_ci            _Rule(string_re, TOKEN_STRING, None),
491e31aef6aSopenharmony_ci            _Rule(operator_re, TOKEN_OPERATOR, None),
492e31aef6aSopenharmony_ci        ]
493e31aef6aSopenharmony_ci
494e31aef6aSopenharmony_ci        # assemble the root lexing rule. because "|" is ungreedy
495e31aef6aSopenharmony_ci        # we have to sort by length so that the lexer continues working
496e31aef6aSopenharmony_ci        # as expected when we have parsing rules like <% for block and
497e31aef6aSopenharmony_ci        # <%= for variables. (if someone wants asp like syntax)
498e31aef6aSopenharmony_ci        # variables are just part of the rules if variable processing
499e31aef6aSopenharmony_ci        # is required.
500e31aef6aSopenharmony_ci        root_tag_rules = compile_rules(environment)
501e31aef6aSopenharmony_ci
502e31aef6aSopenharmony_ci        block_start_re = e(environment.block_start_string)
503e31aef6aSopenharmony_ci        block_end_re = e(environment.block_end_string)
504e31aef6aSopenharmony_ci        comment_end_re = e(environment.comment_end_string)
505e31aef6aSopenharmony_ci        variable_end_re = e(environment.variable_end_string)
506e31aef6aSopenharmony_ci
507e31aef6aSopenharmony_ci        # block suffix if trimming is enabled
508e31aef6aSopenharmony_ci        block_suffix_re = "\\n?" if environment.trim_blocks else ""
509e31aef6aSopenharmony_ci
510e31aef6aSopenharmony_ci        self.lstrip_blocks = environment.lstrip_blocks
511e31aef6aSopenharmony_ci
512e31aef6aSopenharmony_ci        self.newline_sequence = environment.newline_sequence
513e31aef6aSopenharmony_ci        self.keep_trailing_newline = environment.keep_trailing_newline
514e31aef6aSopenharmony_ci
515e31aef6aSopenharmony_ci        root_raw_re = (
516e31aef6aSopenharmony_ci            rf"(?P<raw_begin>{block_start_re}(\-|\+|)\s*raw\s*"
517e31aef6aSopenharmony_ci            rf"(?:\-{block_end_re}\s*|{block_end_re}))"
518e31aef6aSopenharmony_ci        )
519e31aef6aSopenharmony_ci        root_parts_re = "|".join(
520e31aef6aSopenharmony_ci            [root_raw_re] + [rf"(?P<{n}>{r}(\-|\+|))" for n, r in root_tag_rules]
521e31aef6aSopenharmony_ci        )
522e31aef6aSopenharmony_ci
523e31aef6aSopenharmony_ci        # global lexing rules
524e31aef6aSopenharmony_ci        self.rules: t.Dict[str, t.List[_Rule]] = {
525e31aef6aSopenharmony_ci            "root": [
526e31aef6aSopenharmony_ci                # directives
527e31aef6aSopenharmony_ci                _Rule(
528e31aef6aSopenharmony_ci                    c(rf"(.*?)(?:{root_parts_re})"),
529e31aef6aSopenharmony_ci                    OptionalLStrip(TOKEN_DATA, "#bygroup"),  # type: ignore
530e31aef6aSopenharmony_ci                    "#bygroup",
531e31aef6aSopenharmony_ci                ),
532e31aef6aSopenharmony_ci                # data
533e31aef6aSopenharmony_ci                _Rule(c(".+"), TOKEN_DATA, None),
534e31aef6aSopenharmony_ci            ],
535e31aef6aSopenharmony_ci            # comments
536e31aef6aSopenharmony_ci            TOKEN_COMMENT_BEGIN: [
537e31aef6aSopenharmony_ci                _Rule(
538e31aef6aSopenharmony_ci                    c(
539e31aef6aSopenharmony_ci                        rf"(.*?)((?:\+{comment_end_re}|\-{comment_end_re}\s*"
540e31aef6aSopenharmony_ci                        rf"|{comment_end_re}{block_suffix_re}))"
541e31aef6aSopenharmony_ci                    ),
542e31aef6aSopenharmony_ci                    (TOKEN_COMMENT, TOKEN_COMMENT_END),
543e31aef6aSopenharmony_ci                    "#pop",
544e31aef6aSopenharmony_ci                ),
545e31aef6aSopenharmony_ci                _Rule(c(r"(.)"), (Failure("Missing end of comment tag"),), None),
546e31aef6aSopenharmony_ci            ],
547e31aef6aSopenharmony_ci            # blocks
548e31aef6aSopenharmony_ci            TOKEN_BLOCK_BEGIN: [
549e31aef6aSopenharmony_ci                _Rule(
550e31aef6aSopenharmony_ci                    c(
551e31aef6aSopenharmony_ci                        rf"(?:\+{block_end_re}|\-{block_end_re}\s*"
552e31aef6aSopenharmony_ci                        rf"|{block_end_re}{block_suffix_re})"
553e31aef6aSopenharmony_ci                    ),
554e31aef6aSopenharmony_ci                    TOKEN_BLOCK_END,
555e31aef6aSopenharmony_ci                    "#pop",
556e31aef6aSopenharmony_ci                ),
557e31aef6aSopenharmony_ci            ]
558e31aef6aSopenharmony_ci            + tag_rules,
559e31aef6aSopenharmony_ci            # variables
560e31aef6aSopenharmony_ci            TOKEN_VARIABLE_BEGIN: [
561e31aef6aSopenharmony_ci                _Rule(
562e31aef6aSopenharmony_ci                    c(rf"\-{variable_end_re}\s*|{variable_end_re}"),
563e31aef6aSopenharmony_ci                    TOKEN_VARIABLE_END,
564e31aef6aSopenharmony_ci                    "#pop",
565e31aef6aSopenharmony_ci                )
566e31aef6aSopenharmony_ci            ]
567e31aef6aSopenharmony_ci            + tag_rules,
568e31aef6aSopenharmony_ci            # raw block
569e31aef6aSopenharmony_ci            TOKEN_RAW_BEGIN: [
570e31aef6aSopenharmony_ci                _Rule(
571e31aef6aSopenharmony_ci                    c(
572e31aef6aSopenharmony_ci                        rf"(.*?)((?:{block_start_re}(\-|\+|))\s*endraw\s*"
573e31aef6aSopenharmony_ci                        rf"(?:\+{block_end_re}|\-{block_end_re}\s*"
574e31aef6aSopenharmony_ci                        rf"|{block_end_re}{block_suffix_re}))"
575e31aef6aSopenharmony_ci                    ),
576e31aef6aSopenharmony_ci                    OptionalLStrip(TOKEN_DATA, TOKEN_RAW_END),  # type: ignore
577e31aef6aSopenharmony_ci                    "#pop",
578e31aef6aSopenharmony_ci                ),
579e31aef6aSopenharmony_ci                _Rule(c(r"(.)"), (Failure("Missing end of raw directive"),), None),
580e31aef6aSopenharmony_ci            ],
581e31aef6aSopenharmony_ci            # line statements
582e31aef6aSopenharmony_ci            TOKEN_LINESTATEMENT_BEGIN: [
583e31aef6aSopenharmony_ci                _Rule(c(r"\s*(\n|$)"), TOKEN_LINESTATEMENT_END, "#pop")
584e31aef6aSopenharmony_ci            ]
585e31aef6aSopenharmony_ci            + tag_rules,
586e31aef6aSopenharmony_ci            # line comments
587e31aef6aSopenharmony_ci            TOKEN_LINECOMMENT_BEGIN: [
588e31aef6aSopenharmony_ci                _Rule(
589e31aef6aSopenharmony_ci                    c(r"(.*?)()(?=\n|$)"),
590e31aef6aSopenharmony_ci                    (TOKEN_LINECOMMENT, TOKEN_LINECOMMENT_END),
591e31aef6aSopenharmony_ci                    "#pop",
592e31aef6aSopenharmony_ci                )
593e31aef6aSopenharmony_ci            ],
594e31aef6aSopenharmony_ci        }
595e31aef6aSopenharmony_ci
596e31aef6aSopenharmony_ci    def _normalize_newlines(self, value: str) -> str:
597e31aef6aSopenharmony_ci        """Replace all newlines with the configured sequence in strings
598e31aef6aSopenharmony_ci        and template data.
599e31aef6aSopenharmony_ci        """
600e31aef6aSopenharmony_ci        return newline_re.sub(self.newline_sequence, value)
601e31aef6aSopenharmony_ci
602e31aef6aSopenharmony_ci    def tokenize(
603e31aef6aSopenharmony_ci        self,
604e31aef6aSopenharmony_ci        source: str,
605e31aef6aSopenharmony_ci        name: t.Optional[str] = None,
606e31aef6aSopenharmony_ci        filename: t.Optional[str] = None,
607e31aef6aSopenharmony_ci        state: t.Optional[str] = None,
608e31aef6aSopenharmony_ci    ) -> TokenStream:
609e31aef6aSopenharmony_ci        """Calls tokeniter + tokenize and wraps it in a token stream."""
610e31aef6aSopenharmony_ci        stream = self.tokeniter(source, name, filename, state)
611e31aef6aSopenharmony_ci        return TokenStream(self.wrap(stream, name, filename), name, filename)
612e31aef6aSopenharmony_ci
613e31aef6aSopenharmony_ci    def wrap(
614e31aef6aSopenharmony_ci        self,
615e31aef6aSopenharmony_ci        stream: t.Iterable[t.Tuple[int, str, str]],
616e31aef6aSopenharmony_ci        name: t.Optional[str] = None,
617e31aef6aSopenharmony_ci        filename: t.Optional[str] = None,
618e31aef6aSopenharmony_ci    ) -> t.Iterator[Token]:
619e31aef6aSopenharmony_ci        """This is called with the stream as returned by `tokenize` and wraps
620e31aef6aSopenharmony_ci        every token in a :class:`Token` and converts the value.
621e31aef6aSopenharmony_ci        """
622e31aef6aSopenharmony_ci        for lineno, token, value_str in stream:
623e31aef6aSopenharmony_ci            if token in ignored_tokens:
624e31aef6aSopenharmony_ci                continue
625e31aef6aSopenharmony_ci
626e31aef6aSopenharmony_ci            value: t.Any = value_str
627e31aef6aSopenharmony_ci
628e31aef6aSopenharmony_ci            if token == TOKEN_LINESTATEMENT_BEGIN:
629e31aef6aSopenharmony_ci                token = TOKEN_BLOCK_BEGIN
630e31aef6aSopenharmony_ci            elif token == TOKEN_LINESTATEMENT_END:
631e31aef6aSopenharmony_ci                token = TOKEN_BLOCK_END
632e31aef6aSopenharmony_ci            # we are not interested in those tokens in the parser
633e31aef6aSopenharmony_ci            elif token in (TOKEN_RAW_BEGIN, TOKEN_RAW_END):
634e31aef6aSopenharmony_ci                continue
635e31aef6aSopenharmony_ci            elif token == TOKEN_DATA:
636e31aef6aSopenharmony_ci                value = self._normalize_newlines(value_str)
637e31aef6aSopenharmony_ci            elif token == "keyword":
638e31aef6aSopenharmony_ci                token = value_str
639e31aef6aSopenharmony_ci            elif token == TOKEN_NAME:
640e31aef6aSopenharmony_ci                value = value_str
641e31aef6aSopenharmony_ci
642e31aef6aSopenharmony_ci                if not value.isidentifier():
643e31aef6aSopenharmony_ci                    raise TemplateSyntaxError(
644e31aef6aSopenharmony_ci                        "Invalid character in identifier", lineno, name, filename
645e31aef6aSopenharmony_ci                    )
646e31aef6aSopenharmony_ci            elif token == TOKEN_STRING:
647e31aef6aSopenharmony_ci                # try to unescape string
648e31aef6aSopenharmony_ci                try:
649e31aef6aSopenharmony_ci                    value = (
650e31aef6aSopenharmony_ci                        self._normalize_newlines(value_str[1:-1])
651e31aef6aSopenharmony_ci                        .encode("ascii", "backslashreplace")
652e31aef6aSopenharmony_ci                        .decode("unicode-escape")
653e31aef6aSopenharmony_ci                    )
654e31aef6aSopenharmony_ci                except Exception as e:
655e31aef6aSopenharmony_ci                    msg = str(e).split(":")[-1].strip()
656e31aef6aSopenharmony_ci                    raise TemplateSyntaxError(msg, lineno, name, filename) from e
657e31aef6aSopenharmony_ci            elif token == TOKEN_INTEGER:
658e31aef6aSopenharmony_ci                value = int(value_str.replace("_", ""), 0)
659e31aef6aSopenharmony_ci            elif token == TOKEN_FLOAT:
660e31aef6aSopenharmony_ci                # remove all "_" first to support more Python versions
661e31aef6aSopenharmony_ci                value = literal_eval(value_str.replace("_", ""))
662e31aef6aSopenharmony_ci            elif token == TOKEN_OPERATOR:
663e31aef6aSopenharmony_ci                token = operators[value_str]
664e31aef6aSopenharmony_ci
665e31aef6aSopenharmony_ci            yield Token(lineno, token, value)
666e31aef6aSopenharmony_ci
667e31aef6aSopenharmony_ci    def tokeniter(
668e31aef6aSopenharmony_ci        self,
669e31aef6aSopenharmony_ci        source: str,
670e31aef6aSopenharmony_ci        name: t.Optional[str],
671e31aef6aSopenharmony_ci        filename: t.Optional[str] = None,
672e31aef6aSopenharmony_ci        state: t.Optional[str] = None,
673e31aef6aSopenharmony_ci    ) -> t.Iterator[t.Tuple[int, str, str]]:
674e31aef6aSopenharmony_ci        """This method tokenizes the text and returns the tokens in a
675e31aef6aSopenharmony_ci        generator. Use this method if you just want to tokenize a template.
676e31aef6aSopenharmony_ci
677e31aef6aSopenharmony_ci        .. versionchanged:: 3.0
678e31aef6aSopenharmony_ci            Only ``\\n``, ``\\r\\n`` and ``\\r`` are treated as line
679e31aef6aSopenharmony_ci            breaks.
680e31aef6aSopenharmony_ci        """
681e31aef6aSopenharmony_ci        lines = newline_re.split(source)[::2]
682e31aef6aSopenharmony_ci
683e31aef6aSopenharmony_ci        if not self.keep_trailing_newline and lines[-1] == "":
684e31aef6aSopenharmony_ci            del lines[-1]
685e31aef6aSopenharmony_ci
686e31aef6aSopenharmony_ci        source = "\n".join(lines)
687e31aef6aSopenharmony_ci        pos = 0
688e31aef6aSopenharmony_ci        lineno = 1
689e31aef6aSopenharmony_ci        stack = ["root"]
690e31aef6aSopenharmony_ci
691e31aef6aSopenharmony_ci        if state is not None and state != "root":
692e31aef6aSopenharmony_ci            assert state in ("variable", "block"), "invalid state"
693e31aef6aSopenharmony_ci            stack.append(state + "_begin")
694e31aef6aSopenharmony_ci
695e31aef6aSopenharmony_ci        statetokens = self.rules[stack[-1]]
696e31aef6aSopenharmony_ci        source_length = len(source)
697e31aef6aSopenharmony_ci        balancing_stack: t.List[str] = []
698e31aef6aSopenharmony_ci        newlines_stripped = 0
699e31aef6aSopenharmony_ci        line_starting = True
700e31aef6aSopenharmony_ci
701e31aef6aSopenharmony_ci        while True:
702e31aef6aSopenharmony_ci            # tokenizer loop
703e31aef6aSopenharmony_ci            for regex, tokens, new_state in statetokens:
704e31aef6aSopenharmony_ci                m = regex.match(source, pos)
705e31aef6aSopenharmony_ci
706e31aef6aSopenharmony_ci                # if no match we try again with the next rule
707e31aef6aSopenharmony_ci                if m is None:
708e31aef6aSopenharmony_ci                    continue
709e31aef6aSopenharmony_ci
710e31aef6aSopenharmony_ci                # we only match blocks and variables if braces / parentheses
711e31aef6aSopenharmony_ci                # are balanced. continue parsing with the lower rule which
712e31aef6aSopenharmony_ci                # is the operator rule. do this only if the end tags look
713e31aef6aSopenharmony_ci                # like operators
714e31aef6aSopenharmony_ci                if balancing_stack and tokens in (
715e31aef6aSopenharmony_ci                    TOKEN_VARIABLE_END,
716e31aef6aSopenharmony_ci                    TOKEN_BLOCK_END,
717e31aef6aSopenharmony_ci                    TOKEN_LINESTATEMENT_END,
718e31aef6aSopenharmony_ci                ):
719e31aef6aSopenharmony_ci                    continue
720e31aef6aSopenharmony_ci
721e31aef6aSopenharmony_ci                # tuples support more options
722e31aef6aSopenharmony_ci                if isinstance(tokens, tuple):
723e31aef6aSopenharmony_ci                    groups: t.Sequence[str] = m.groups()
724e31aef6aSopenharmony_ci
725e31aef6aSopenharmony_ci                    if isinstance(tokens, OptionalLStrip):
726e31aef6aSopenharmony_ci                        # Rule supports lstrip. Match will look like
727e31aef6aSopenharmony_ci                        # text, block type, whitespace control, type, control, ...
728e31aef6aSopenharmony_ci                        text = groups[0]
729e31aef6aSopenharmony_ci                        # Skipping the text and first type, every other group is the
730e31aef6aSopenharmony_ci                        # whitespace control for each type. One of the groups will be
731e31aef6aSopenharmony_ci                        # -, +, or empty string instead of None.
732e31aef6aSopenharmony_ci                        strip_sign = next(g for g in groups[2::2] if g is not None)
733e31aef6aSopenharmony_ci
734e31aef6aSopenharmony_ci                        if strip_sign == "-":
735e31aef6aSopenharmony_ci                            # Strip all whitespace between the text and the tag.
736e31aef6aSopenharmony_ci                            stripped = text.rstrip()
737e31aef6aSopenharmony_ci                            newlines_stripped = text[len(stripped) :].count("\n")
738e31aef6aSopenharmony_ci                            groups = [stripped, *groups[1:]]
739e31aef6aSopenharmony_ci                        elif (
740e31aef6aSopenharmony_ci                            # Not marked for preserving whitespace.
741e31aef6aSopenharmony_ci                            strip_sign != "+"
742e31aef6aSopenharmony_ci                            # lstrip is enabled.
743e31aef6aSopenharmony_ci                            and self.lstrip_blocks
744e31aef6aSopenharmony_ci                            # Not a variable expression.
745e31aef6aSopenharmony_ci                            and not m.groupdict().get(TOKEN_VARIABLE_BEGIN)
746e31aef6aSopenharmony_ci                        ):
747e31aef6aSopenharmony_ci                            # The start of text between the last newline and the tag.
748e31aef6aSopenharmony_ci                            l_pos = text.rfind("\n") + 1
749e31aef6aSopenharmony_ci
750e31aef6aSopenharmony_ci                            if l_pos > 0 or line_starting:
751e31aef6aSopenharmony_ci                                # If there's only whitespace between the newline and the
752e31aef6aSopenharmony_ci                                # tag, strip it.
753e31aef6aSopenharmony_ci                                if whitespace_re.fullmatch(text, l_pos):
754e31aef6aSopenharmony_ci                                    groups = [text[:l_pos], *groups[1:]]
755e31aef6aSopenharmony_ci
756e31aef6aSopenharmony_ci                    for idx, token in enumerate(tokens):
757e31aef6aSopenharmony_ci                        # failure group
758e31aef6aSopenharmony_ci                        if token.__class__ is Failure:
759e31aef6aSopenharmony_ci                            raise token(lineno, filename)
760e31aef6aSopenharmony_ci                        # bygroup is a bit more complex, in that case we
761e31aef6aSopenharmony_ci                        # yield for the current token the first named
762e31aef6aSopenharmony_ci                        # group that matched
763e31aef6aSopenharmony_ci                        elif token == "#bygroup":
764e31aef6aSopenharmony_ci                            for key, value in m.groupdict().items():
765e31aef6aSopenharmony_ci                                if value is not None:
766e31aef6aSopenharmony_ci                                    yield lineno, key, value
767e31aef6aSopenharmony_ci                                    lineno += value.count("\n")
768e31aef6aSopenharmony_ci                                    break
769e31aef6aSopenharmony_ci                            else:
770e31aef6aSopenharmony_ci                                raise RuntimeError(
771e31aef6aSopenharmony_ci                                    f"{regex!r} wanted to resolve the token dynamically"
772e31aef6aSopenharmony_ci                                    " but no group matched"
773e31aef6aSopenharmony_ci                                )
774e31aef6aSopenharmony_ci                        # normal group
775e31aef6aSopenharmony_ci                        else:
776e31aef6aSopenharmony_ci                            data = groups[idx]
777e31aef6aSopenharmony_ci
778e31aef6aSopenharmony_ci                            if data or token not in ignore_if_empty:
779e31aef6aSopenharmony_ci                                yield lineno, token, data
780e31aef6aSopenharmony_ci
781e31aef6aSopenharmony_ci                            lineno += data.count("\n") + newlines_stripped
782e31aef6aSopenharmony_ci                            newlines_stripped = 0
783e31aef6aSopenharmony_ci
784e31aef6aSopenharmony_ci                # strings as token just are yielded as it.
785e31aef6aSopenharmony_ci                else:
786e31aef6aSopenharmony_ci                    data = m.group()
787e31aef6aSopenharmony_ci
788e31aef6aSopenharmony_ci                    # update brace/parentheses balance
789e31aef6aSopenharmony_ci                    if tokens == TOKEN_OPERATOR:
790e31aef6aSopenharmony_ci                        if data == "{":
791e31aef6aSopenharmony_ci                            balancing_stack.append("}")
792e31aef6aSopenharmony_ci                        elif data == "(":
793e31aef6aSopenharmony_ci                            balancing_stack.append(")")
794e31aef6aSopenharmony_ci                        elif data == "[":
795e31aef6aSopenharmony_ci                            balancing_stack.append("]")
796e31aef6aSopenharmony_ci                        elif data in ("}", ")", "]"):
797e31aef6aSopenharmony_ci                            if not balancing_stack:
798e31aef6aSopenharmony_ci                                raise TemplateSyntaxError(
799e31aef6aSopenharmony_ci                                    f"unexpected '{data}'", lineno, name, filename
800e31aef6aSopenharmony_ci                                )
801e31aef6aSopenharmony_ci
802e31aef6aSopenharmony_ci                            expected_op = balancing_stack.pop()
803e31aef6aSopenharmony_ci
804e31aef6aSopenharmony_ci                            if expected_op != data:
805e31aef6aSopenharmony_ci                                raise TemplateSyntaxError(
806e31aef6aSopenharmony_ci                                    f"unexpected '{data}', expected '{expected_op}'",
807e31aef6aSopenharmony_ci                                    lineno,
808e31aef6aSopenharmony_ci                                    name,
809e31aef6aSopenharmony_ci                                    filename,
810e31aef6aSopenharmony_ci                                )
811e31aef6aSopenharmony_ci
812e31aef6aSopenharmony_ci                    # yield items
813e31aef6aSopenharmony_ci                    if data or tokens not in ignore_if_empty:
814e31aef6aSopenharmony_ci                        yield lineno, tokens, data
815e31aef6aSopenharmony_ci
816e31aef6aSopenharmony_ci                    lineno += data.count("\n")
817e31aef6aSopenharmony_ci
818e31aef6aSopenharmony_ci                line_starting = m.group()[-1:] == "\n"
819e31aef6aSopenharmony_ci                # fetch new position into new variable so that we can check
820e31aef6aSopenharmony_ci                # if there is a internal parsing error which would result
821e31aef6aSopenharmony_ci                # in an infinite loop
822e31aef6aSopenharmony_ci                pos2 = m.end()
823e31aef6aSopenharmony_ci
824e31aef6aSopenharmony_ci                # handle state changes
825e31aef6aSopenharmony_ci                if new_state is not None:
826e31aef6aSopenharmony_ci                    # remove the uppermost state
827e31aef6aSopenharmony_ci                    if new_state == "#pop":
828e31aef6aSopenharmony_ci                        stack.pop()
829e31aef6aSopenharmony_ci                    # resolve the new state by group checking
830e31aef6aSopenharmony_ci                    elif new_state == "#bygroup":
831e31aef6aSopenharmony_ci                        for key, value in m.groupdict().items():
832e31aef6aSopenharmony_ci                            if value is not None:
833e31aef6aSopenharmony_ci                                stack.append(key)
834e31aef6aSopenharmony_ci                                break
835e31aef6aSopenharmony_ci                        else:
836e31aef6aSopenharmony_ci                            raise RuntimeError(
837e31aef6aSopenharmony_ci                                f"{regex!r} wanted to resolve the new state dynamically"
838e31aef6aSopenharmony_ci                                f" but no group matched"
839e31aef6aSopenharmony_ci                            )
840e31aef6aSopenharmony_ci                    # direct state name given
841e31aef6aSopenharmony_ci                    else:
842e31aef6aSopenharmony_ci                        stack.append(new_state)
843e31aef6aSopenharmony_ci
844e31aef6aSopenharmony_ci                    statetokens = self.rules[stack[-1]]
845e31aef6aSopenharmony_ci                # we are still at the same position and no stack change.
846e31aef6aSopenharmony_ci                # this means a loop without break condition, avoid that and
847e31aef6aSopenharmony_ci                # raise error
848e31aef6aSopenharmony_ci                elif pos2 == pos:
849e31aef6aSopenharmony_ci                    raise RuntimeError(
850e31aef6aSopenharmony_ci                        f"{regex!r} yielded empty string without stack change"
851e31aef6aSopenharmony_ci                    )
852e31aef6aSopenharmony_ci
853e31aef6aSopenharmony_ci                # publish new function and start again
854e31aef6aSopenharmony_ci                pos = pos2
855e31aef6aSopenharmony_ci                break
856e31aef6aSopenharmony_ci            # if loop terminated without break we haven't found a single match
857e31aef6aSopenharmony_ci            # either we are at the end of the file or we have a problem
858e31aef6aSopenharmony_ci            else:
859e31aef6aSopenharmony_ci                # end of text
860e31aef6aSopenharmony_ci                if pos >= source_length:
861e31aef6aSopenharmony_ci                    return
862e31aef6aSopenharmony_ci
863e31aef6aSopenharmony_ci                # something went wrong
864e31aef6aSopenharmony_ci                raise TemplateSyntaxError(
865e31aef6aSopenharmony_ci                    f"unexpected char {source[pos]!r} at {pos}", lineno, name, filename
866e31aef6aSopenharmony_ci                )
867