11cb0ef41Sopenharmony_ci# -*- coding: utf-8 -*- 21cb0ef41Sopenharmony_ci""" 31cb0ef41Sopenharmony_ci jinja2.lexer 41cb0ef41Sopenharmony_ci ~~~~~~~~~~~~ 51cb0ef41Sopenharmony_ci 61cb0ef41Sopenharmony_ci This module implements a Jinja / Python combination lexer. The 71cb0ef41Sopenharmony_ci `Lexer` class provided by this module is used to do some preprocessing 81cb0ef41Sopenharmony_ci for Jinja. 91cb0ef41Sopenharmony_ci 101cb0ef41Sopenharmony_ci On the one hand it filters out invalid operators like the bitshift 111cb0ef41Sopenharmony_ci operators we don't allow in templates. On the other hand it separates 121cb0ef41Sopenharmony_ci template code and python code in expressions. 131cb0ef41Sopenharmony_ci 141cb0ef41Sopenharmony_ci :copyright: (c) 2017 by the Jinja Team. 151cb0ef41Sopenharmony_ci :license: BSD, see LICENSE for more details. 161cb0ef41Sopenharmony_ci""" 171cb0ef41Sopenharmony_ciimport re 181cb0ef41Sopenharmony_cifrom collections import deque 191cb0ef41Sopenharmony_cifrom operator import itemgetter 201cb0ef41Sopenharmony_ci 211cb0ef41Sopenharmony_cifrom jinja2._compat import implements_iterator, intern, iteritems, text_type 221cb0ef41Sopenharmony_cifrom jinja2.exceptions import TemplateSyntaxError 231cb0ef41Sopenharmony_cifrom jinja2.utils import LRUCache 241cb0ef41Sopenharmony_ci 251cb0ef41Sopenharmony_ci# cache for the lexers. Exists in order to be able to have multiple 261cb0ef41Sopenharmony_ci# environments with the same lexer 271cb0ef41Sopenharmony_ci_lexer_cache = LRUCache(50) 281cb0ef41Sopenharmony_ci 291cb0ef41Sopenharmony_ci# static regular expressions 301cb0ef41Sopenharmony_ciwhitespace_re = re.compile(r'\s+', re.U) 311cb0ef41Sopenharmony_cistring_re = re.compile(r"('([^'\\]*(?:\\.[^'\\]*)*)'" 321cb0ef41Sopenharmony_ci r'|"([^"\\]*(?:\\.[^"\\]*)*)")', re.S) 331cb0ef41Sopenharmony_ciinteger_re = re.compile(r'\d+') 341cb0ef41Sopenharmony_ci 351cb0ef41Sopenharmony_citry: 361cb0ef41Sopenharmony_ci # check if this Python supports Unicode identifiers 371cb0ef41Sopenharmony_ci compile('föö', '<unknown>', 'eval') 381cb0ef41Sopenharmony_ciexcept SyntaxError: 391cb0ef41Sopenharmony_ci # no Unicode support, use ASCII identifiers 401cb0ef41Sopenharmony_ci name_re = re.compile(r'[a-zA-Z_][a-zA-Z0-9_]*') 411cb0ef41Sopenharmony_ci check_ident = False 421cb0ef41Sopenharmony_cielse: 431cb0ef41Sopenharmony_ci # Unicode support, build a pattern to match valid characters, and set flag 441cb0ef41Sopenharmony_ci # to use str.isidentifier to validate during lexing 451cb0ef41Sopenharmony_ci from jinja2 import _identifier 461cb0ef41Sopenharmony_ci name_re = re.compile(r'[\w{0}]+'.format(_identifier.pattern)) 471cb0ef41Sopenharmony_ci check_ident = True 481cb0ef41Sopenharmony_ci # remove the pattern from memory after building the regex 491cb0ef41Sopenharmony_ci import sys 501cb0ef41Sopenharmony_ci del sys.modules['jinja2._identifier'] 511cb0ef41Sopenharmony_ci import jinja2 521cb0ef41Sopenharmony_ci del jinja2._identifier 531cb0ef41Sopenharmony_ci del _identifier 541cb0ef41Sopenharmony_ci 551cb0ef41Sopenharmony_cifloat_re = re.compile(r'(?<!\.)\d+\.\d+') 561cb0ef41Sopenharmony_cinewline_re = re.compile(r'(\r\n|\r|\n)') 571cb0ef41Sopenharmony_ci 581cb0ef41Sopenharmony_ci# internal the tokens and keep references to them 591cb0ef41Sopenharmony_ciTOKEN_ADD = intern('add') 601cb0ef41Sopenharmony_ciTOKEN_ASSIGN = intern('assign') 611cb0ef41Sopenharmony_ciTOKEN_COLON = intern('colon') 621cb0ef41Sopenharmony_ciTOKEN_COMMA = intern('comma') 631cb0ef41Sopenharmony_ciTOKEN_DIV = intern('div') 641cb0ef41Sopenharmony_ciTOKEN_DOT = intern('dot') 651cb0ef41Sopenharmony_ciTOKEN_EQ = intern('eq') 661cb0ef41Sopenharmony_ciTOKEN_FLOORDIV = intern('floordiv') 671cb0ef41Sopenharmony_ciTOKEN_GT = intern('gt') 681cb0ef41Sopenharmony_ciTOKEN_GTEQ = intern('gteq') 691cb0ef41Sopenharmony_ciTOKEN_LBRACE = intern('lbrace') 701cb0ef41Sopenharmony_ciTOKEN_LBRACKET = intern('lbracket') 711cb0ef41Sopenharmony_ciTOKEN_LPAREN = intern('lparen') 721cb0ef41Sopenharmony_ciTOKEN_LT = intern('lt') 731cb0ef41Sopenharmony_ciTOKEN_LTEQ = intern('lteq') 741cb0ef41Sopenharmony_ciTOKEN_MOD = intern('mod') 751cb0ef41Sopenharmony_ciTOKEN_MUL = intern('mul') 761cb0ef41Sopenharmony_ciTOKEN_NE = intern('ne') 771cb0ef41Sopenharmony_ciTOKEN_PIPE = intern('pipe') 781cb0ef41Sopenharmony_ciTOKEN_POW = intern('pow') 791cb0ef41Sopenharmony_ciTOKEN_RBRACE = intern('rbrace') 801cb0ef41Sopenharmony_ciTOKEN_RBRACKET = intern('rbracket') 811cb0ef41Sopenharmony_ciTOKEN_RPAREN = intern('rparen') 821cb0ef41Sopenharmony_ciTOKEN_SEMICOLON = intern('semicolon') 831cb0ef41Sopenharmony_ciTOKEN_SUB = intern('sub') 841cb0ef41Sopenharmony_ciTOKEN_TILDE = intern('tilde') 851cb0ef41Sopenharmony_ciTOKEN_WHITESPACE = intern('whitespace') 861cb0ef41Sopenharmony_ciTOKEN_FLOAT = intern('float') 871cb0ef41Sopenharmony_ciTOKEN_INTEGER = intern('integer') 881cb0ef41Sopenharmony_ciTOKEN_NAME = intern('name') 891cb0ef41Sopenharmony_ciTOKEN_STRING = intern('string') 901cb0ef41Sopenharmony_ciTOKEN_OPERATOR = intern('operator') 911cb0ef41Sopenharmony_ciTOKEN_BLOCK_BEGIN = intern('block_begin') 921cb0ef41Sopenharmony_ciTOKEN_BLOCK_END = intern('block_end') 931cb0ef41Sopenharmony_ciTOKEN_VARIABLE_BEGIN = intern('variable_begin') 941cb0ef41Sopenharmony_ciTOKEN_VARIABLE_END = intern('variable_end') 951cb0ef41Sopenharmony_ciTOKEN_RAW_BEGIN = intern('raw_begin') 961cb0ef41Sopenharmony_ciTOKEN_RAW_END = intern('raw_end') 971cb0ef41Sopenharmony_ciTOKEN_COMMENT_BEGIN = intern('comment_begin') 981cb0ef41Sopenharmony_ciTOKEN_COMMENT_END = intern('comment_end') 991cb0ef41Sopenharmony_ciTOKEN_COMMENT = intern('comment') 1001cb0ef41Sopenharmony_ciTOKEN_LINESTATEMENT_BEGIN = intern('linestatement_begin') 1011cb0ef41Sopenharmony_ciTOKEN_LINESTATEMENT_END = intern('linestatement_end') 1021cb0ef41Sopenharmony_ciTOKEN_LINECOMMENT_BEGIN = intern('linecomment_begin') 1031cb0ef41Sopenharmony_ciTOKEN_LINECOMMENT_END = intern('linecomment_end') 1041cb0ef41Sopenharmony_ciTOKEN_LINECOMMENT = intern('linecomment') 1051cb0ef41Sopenharmony_ciTOKEN_DATA = intern('data') 1061cb0ef41Sopenharmony_ciTOKEN_INITIAL = intern('initial') 1071cb0ef41Sopenharmony_ciTOKEN_EOF = intern('eof') 1081cb0ef41Sopenharmony_ci 1091cb0ef41Sopenharmony_ci# bind operators to token types 1101cb0ef41Sopenharmony_cioperators = { 1111cb0ef41Sopenharmony_ci '+': TOKEN_ADD, 1121cb0ef41Sopenharmony_ci '-': TOKEN_SUB, 1131cb0ef41Sopenharmony_ci '/': TOKEN_DIV, 1141cb0ef41Sopenharmony_ci '//': TOKEN_FLOORDIV, 1151cb0ef41Sopenharmony_ci '*': TOKEN_MUL, 1161cb0ef41Sopenharmony_ci '%': TOKEN_MOD, 1171cb0ef41Sopenharmony_ci '**': TOKEN_POW, 1181cb0ef41Sopenharmony_ci '~': TOKEN_TILDE, 1191cb0ef41Sopenharmony_ci '[': TOKEN_LBRACKET, 1201cb0ef41Sopenharmony_ci ']': TOKEN_RBRACKET, 1211cb0ef41Sopenharmony_ci '(': TOKEN_LPAREN, 1221cb0ef41Sopenharmony_ci ')': TOKEN_RPAREN, 1231cb0ef41Sopenharmony_ci '{': TOKEN_LBRACE, 1241cb0ef41Sopenharmony_ci '}': TOKEN_RBRACE, 1251cb0ef41Sopenharmony_ci '==': TOKEN_EQ, 1261cb0ef41Sopenharmony_ci '!=': TOKEN_NE, 1271cb0ef41Sopenharmony_ci '>': TOKEN_GT, 1281cb0ef41Sopenharmony_ci '>=': TOKEN_GTEQ, 1291cb0ef41Sopenharmony_ci '<': TOKEN_LT, 1301cb0ef41Sopenharmony_ci '<=': TOKEN_LTEQ, 1311cb0ef41Sopenharmony_ci '=': TOKEN_ASSIGN, 1321cb0ef41Sopenharmony_ci '.': TOKEN_DOT, 1331cb0ef41Sopenharmony_ci ':': TOKEN_COLON, 1341cb0ef41Sopenharmony_ci '|': TOKEN_PIPE, 1351cb0ef41Sopenharmony_ci ',': TOKEN_COMMA, 1361cb0ef41Sopenharmony_ci ';': TOKEN_SEMICOLON 1371cb0ef41Sopenharmony_ci} 1381cb0ef41Sopenharmony_ci 1391cb0ef41Sopenharmony_cireverse_operators = dict([(v, k) for k, v in iteritems(operators)]) 1401cb0ef41Sopenharmony_ciassert len(operators) == len(reverse_operators), 'operators dropped' 1411cb0ef41Sopenharmony_cioperator_re = re.compile('(%s)' % '|'.join(re.escape(x) for x in 1421cb0ef41Sopenharmony_ci sorted(operators, key=lambda x: -len(x)))) 1431cb0ef41Sopenharmony_ci 1441cb0ef41Sopenharmony_ciignored_tokens = frozenset([TOKEN_COMMENT_BEGIN, TOKEN_COMMENT, 1451cb0ef41Sopenharmony_ci TOKEN_COMMENT_END, TOKEN_WHITESPACE, 1461cb0ef41Sopenharmony_ci TOKEN_LINECOMMENT_BEGIN, TOKEN_LINECOMMENT_END, 1471cb0ef41Sopenharmony_ci TOKEN_LINECOMMENT]) 1481cb0ef41Sopenharmony_ciignore_if_empty = frozenset([TOKEN_WHITESPACE, TOKEN_DATA, 1491cb0ef41Sopenharmony_ci TOKEN_COMMENT, TOKEN_LINECOMMENT]) 1501cb0ef41Sopenharmony_ci 1511cb0ef41Sopenharmony_ci 1521cb0ef41Sopenharmony_cidef _describe_token_type(token_type): 1531cb0ef41Sopenharmony_ci if token_type in reverse_operators: 1541cb0ef41Sopenharmony_ci return reverse_operators[token_type] 1551cb0ef41Sopenharmony_ci return { 1561cb0ef41Sopenharmony_ci TOKEN_COMMENT_BEGIN: 'begin of comment', 1571cb0ef41Sopenharmony_ci TOKEN_COMMENT_END: 'end of comment', 1581cb0ef41Sopenharmony_ci TOKEN_COMMENT: 'comment', 1591cb0ef41Sopenharmony_ci TOKEN_LINECOMMENT: 'comment', 1601cb0ef41Sopenharmony_ci TOKEN_BLOCK_BEGIN: 'begin of statement block', 1611cb0ef41Sopenharmony_ci TOKEN_BLOCK_END: 'end of statement block', 1621cb0ef41Sopenharmony_ci TOKEN_VARIABLE_BEGIN: 'begin of print statement', 1631cb0ef41Sopenharmony_ci TOKEN_VARIABLE_END: 'end of print statement', 1641cb0ef41Sopenharmony_ci TOKEN_LINESTATEMENT_BEGIN: 'begin of line statement', 1651cb0ef41Sopenharmony_ci TOKEN_LINESTATEMENT_END: 'end of line statement', 1661cb0ef41Sopenharmony_ci TOKEN_DATA: 'template data / text', 1671cb0ef41Sopenharmony_ci TOKEN_EOF: 'end of template' 1681cb0ef41Sopenharmony_ci }.get(token_type, token_type) 1691cb0ef41Sopenharmony_ci 1701cb0ef41Sopenharmony_ci 1711cb0ef41Sopenharmony_cidef describe_token(token): 1721cb0ef41Sopenharmony_ci """Returns a description of the token.""" 1731cb0ef41Sopenharmony_ci if token.type == 'name': 1741cb0ef41Sopenharmony_ci return token.value 1751cb0ef41Sopenharmony_ci return _describe_token_type(token.type) 1761cb0ef41Sopenharmony_ci 1771cb0ef41Sopenharmony_ci 1781cb0ef41Sopenharmony_cidef describe_token_expr(expr): 1791cb0ef41Sopenharmony_ci """Like `describe_token` but for token expressions.""" 1801cb0ef41Sopenharmony_ci if ':' in expr: 1811cb0ef41Sopenharmony_ci type, value = expr.split(':', 1) 1821cb0ef41Sopenharmony_ci if type == 'name': 1831cb0ef41Sopenharmony_ci return value 1841cb0ef41Sopenharmony_ci else: 1851cb0ef41Sopenharmony_ci type = expr 1861cb0ef41Sopenharmony_ci return _describe_token_type(type) 1871cb0ef41Sopenharmony_ci 1881cb0ef41Sopenharmony_ci 1891cb0ef41Sopenharmony_cidef count_newlines(value): 1901cb0ef41Sopenharmony_ci """Count the number of newline characters in the string. This is 1911cb0ef41Sopenharmony_ci useful for extensions that filter a stream. 1921cb0ef41Sopenharmony_ci """ 1931cb0ef41Sopenharmony_ci return len(newline_re.findall(value)) 1941cb0ef41Sopenharmony_ci 1951cb0ef41Sopenharmony_ci 1961cb0ef41Sopenharmony_cidef compile_rules(environment): 1971cb0ef41Sopenharmony_ci """Compiles all the rules from the environment into a list of rules.""" 1981cb0ef41Sopenharmony_ci e = re.escape 1991cb0ef41Sopenharmony_ci rules = [ 2001cb0ef41Sopenharmony_ci (len(environment.comment_start_string), 'comment', 2011cb0ef41Sopenharmony_ci e(environment.comment_start_string)), 2021cb0ef41Sopenharmony_ci (len(environment.block_start_string), 'block', 2031cb0ef41Sopenharmony_ci e(environment.block_start_string)), 2041cb0ef41Sopenharmony_ci (len(environment.variable_start_string), 'variable', 2051cb0ef41Sopenharmony_ci e(environment.variable_start_string)) 2061cb0ef41Sopenharmony_ci ] 2071cb0ef41Sopenharmony_ci 2081cb0ef41Sopenharmony_ci if environment.line_statement_prefix is not None: 2091cb0ef41Sopenharmony_ci rules.append((len(environment.line_statement_prefix), 'linestatement', 2101cb0ef41Sopenharmony_ci r'^[ \t\v]*' + e(environment.line_statement_prefix))) 2111cb0ef41Sopenharmony_ci if environment.line_comment_prefix is not None: 2121cb0ef41Sopenharmony_ci rules.append((len(environment.line_comment_prefix), 'linecomment', 2131cb0ef41Sopenharmony_ci r'(?:^|(?<=\S))[^\S\r\n]*' + 2141cb0ef41Sopenharmony_ci e(environment.line_comment_prefix))) 2151cb0ef41Sopenharmony_ci 2161cb0ef41Sopenharmony_ci return [x[1:] for x in sorted(rules, reverse=True)] 2171cb0ef41Sopenharmony_ci 2181cb0ef41Sopenharmony_ci 2191cb0ef41Sopenharmony_ciclass Failure(object): 2201cb0ef41Sopenharmony_ci """Class that raises a `TemplateSyntaxError` if called. 2211cb0ef41Sopenharmony_ci Used by the `Lexer` to specify known errors. 2221cb0ef41Sopenharmony_ci """ 2231cb0ef41Sopenharmony_ci 2241cb0ef41Sopenharmony_ci def __init__(self, message, cls=TemplateSyntaxError): 2251cb0ef41Sopenharmony_ci self.message = message 2261cb0ef41Sopenharmony_ci self.error_class = cls 2271cb0ef41Sopenharmony_ci 2281cb0ef41Sopenharmony_ci def __call__(self, lineno, filename): 2291cb0ef41Sopenharmony_ci raise self.error_class(self.message, lineno, filename) 2301cb0ef41Sopenharmony_ci 2311cb0ef41Sopenharmony_ci 2321cb0ef41Sopenharmony_ciclass Token(tuple): 2331cb0ef41Sopenharmony_ci """Token class.""" 2341cb0ef41Sopenharmony_ci __slots__ = () 2351cb0ef41Sopenharmony_ci lineno, type, value = (property(itemgetter(x)) for x in range(3)) 2361cb0ef41Sopenharmony_ci 2371cb0ef41Sopenharmony_ci def __new__(cls, lineno, type, value): 2381cb0ef41Sopenharmony_ci return tuple.__new__(cls, (lineno, intern(str(type)), value)) 2391cb0ef41Sopenharmony_ci 2401cb0ef41Sopenharmony_ci def __str__(self): 2411cb0ef41Sopenharmony_ci if self.type in reverse_operators: 2421cb0ef41Sopenharmony_ci return reverse_operators[self.type] 2431cb0ef41Sopenharmony_ci elif self.type == 'name': 2441cb0ef41Sopenharmony_ci return self.value 2451cb0ef41Sopenharmony_ci return self.type 2461cb0ef41Sopenharmony_ci 2471cb0ef41Sopenharmony_ci def test(self, expr): 2481cb0ef41Sopenharmony_ci """Test a token against a token expression. This can either be a 2491cb0ef41Sopenharmony_ci token type or ``'token_type:token_value'``. This can only test 2501cb0ef41Sopenharmony_ci against string values and types. 2511cb0ef41Sopenharmony_ci """ 2521cb0ef41Sopenharmony_ci # here we do a regular string equality check as test_any is usually 2531cb0ef41Sopenharmony_ci # passed an iterable of not interned strings. 2541cb0ef41Sopenharmony_ci if self.type == expr: 2551cb0ef41Sopenharmony_ci return True 2561cb0ef41Sopenharmony_ci elif ':' in expr: 2571cb0ef41Sopenharmony_ci return expr.split(':', 1) == [self.type, self.value] 2581cb0ef41Sopenharmony_ci return False 2591cb0ef41Sopenharmony_ci 2601cb0ef41Sopenharmony_ci def test_any(self, *iterable): 2611cb0ef41Sopenharmony_ci """Test against multiple token expressions.""" 2621cb0ef41Sopenharmony_ci for expr in iterable: 2631cb0ef41Sopenharmony_ci if self.test(expr): 2641cb0ef41Sopenharmony_ci return True 2651cb0ef41Sopenharmony_ci return False 2661cb0ef41Sopenharmony_ci 2671cb0ef41Sopenharmony_ci def __repr__(self): 2681cb0ef41Sopenharmony_ci return 'Token(%r, %r, %r)' % ( 2691cb0ef41Sopenharmony_ci self.lineno, 2701cb0ef41Sopenharmony_ci self.type, 2711cb0ef41Sopenharmony_ci self.value 2721cb0ef41Sopenharmony_ci ) 2731cb0ef41Sopenharmony_ci 2741cb0ef41Sopenharmony_ci 2751cb0ef41Sopenharmony_ci@implements_iterator 2761cb0ef41Sopenharmony_ciclass TokenStreamIterator(object): 2771cb0ef41Sopenharmony_ci """The iterator for tokenstreams. Iterate over the stream 2781cb0ef41Sopenharmony_ci until the eof token is reached. 2791cb0ef41Sopenharmony_ci """ 2801cb0ef41Sopenharmony_ci 2811cb0ef41Sopenharmony_ci def __init__(self, stream): 2821cb0ef41Sopenharmony_ci self.stream = stream 2831cb0ef41Sopenharmony_ci 2841cb0ef41Sopenharmony_ci def __iter__(self): 2851cb0ef41Sopenharmony_ci return self 2861cb0ef41Sopenharmony_ci 2871cb0ef41Sopenharmony_ci def __next__(self): 2881cb0ef41Sopenharmony_ci token = self.stream.current 2891cb0ef41Sopenharmony_ci if token.type is TOKEN_EOF: 2901cb0ef41Sopenharmony_ci self.stream.close() 2911cb0ef41Sopenharmony_ci raise StopIteration() 2921cb0ef41Sopenharmony_ci next(self.stream) 2931cb0ef41Sopenharmony_ci return token 2941cb0ef41Sopenharmony_ci 2951cb0ef41Sopenharmony_ci 2961cb0ef41Sopenharmony_ci@implements_iterator 2971cb0ef41Sopenharmony_ciclass TokenStream(object): 2981cb0ef41Sopenharmony_ci """A token stream is an iterable that yields :class:`Token`\\s. The 2991cb0ef41Sopenharmony_ci parser however does not iterate over it but calls :meth:`next` to go 3001cb0ef41Sopenharmony_ci one token ahead. The current active token is stored as :attr:`current`. 3011cb0ef41Sopenharmony_ci """ 3021cb0ef41Sopenharmony_ci 3031cb0ef41Sopenharmony_ci def __init__(self, generator, name, filename): 3041cb0ef41Sopenharmony_ci self._iter = iter(generator) 3051cb0ef41Sopenharmony_ci self._pushed = deque() 3061cb0ef41Sopenharmony_ci self.name = name 3071cb0ef41Sopenharmony_ci self.filename = filename 3081cb0ef41Sopenharmony_ci self.closed = False 3091cb0ef41Sopenharmony_ci self.current = Token(1, TOKEN_INITIAL, '') 3101cb0ef41Sopenharmony_ci next(self) 3111cb0ef41Sopenharmony_ci 3121cb0ef41Sopenharmony_ci def __iter__(self): 3131cb0ef41Sopenharmony_ci return TokenStreamIterator(self) 3141cb0ef41Sopenharmony_ci 3151cb0ef41Sopenharmony_ci def __bool__(self): 3161cb0ef41Sopenharmony_ci return bool(self._pushed) or self.current.type is not TOKEN_EOF 3171cb0ef41Sopenharmony_ci __nonzero__ = __bool__ # py2 3181cb0ef41Sopenharmony_ci 3191cb0ef41Sopenharmony_ci eos = property(lambda x: not x, doc="Are we at the end of the stream?") 3201cb0ef41Sopenharmony_ci 3211cb0ef41Sopenharmony_ci def push(self, token): 3221cb0ef41Sopenharmony_ci """Push a token back to the stream.""" 3231cb0ef41Sopenharmony_ci self._pushed.append(token) 3241cb0ef41Sopenharmony_ci 3251cb0ef41Sopenharmony_ci def look(self): 3261cb0ef41Sopenharmony_ci """Look at the next token.""" 3271cb0ef41Sopenharmony_ci old_token = next(self) 3281cb0ef41Sopenharmony_ci result = self.current 3291cb0ef41Sopenharmony_ci self.push(result) 3301cb0ef41Sopenharmony_ci self.current = old_token 3311cb0ef41Sopenharmony_ci return result 3321cb0ef41Sopenharmony_ci 3331cb0ef41Sopenharmony_ci def skip(self, n=1): 3341cb0ef41Sopenharmony_ci """Got n tokens ahead.""" 3351cb0ef41Sopenharmony_ci for x in range(n): 3361cb0ef41Sopenharmony_ci next(self) 3371cb0ef41Sopenharmony_ci 3381cb0ef41Sopenharmony_ci def next_if(self, expr): 3391cb0ef41Sopenharmony_ci """Perform the token test and return the token if it matched. 3401cb0ef41Sopenharmony_ci Otherwise the return value is `None`. 3411cb0ef41Sopenharmony_ci """ 3421cb0ef41Sopenharmony_ci if self.current.test(expr): 3431cb0ef41Sopenharmony_ci return next(self) 3441cb0ef41Sopenharmony_ci 3451cb0ef41Sopenharmony_ci def skip_if(self, expr): 3461cb0ef41Sopenharmony_ci """Like :meth:`next_if` but only returns `True` or `False`.""" 3471cb0ef41Sopenharmony_ci return self.next_if(expr) is not None 3481cb0ef41Sopenharmony_ci 3491cb0ef41Sopenharmony_ci def __next__(self): 3501cb0ef41Sopenharmony_ci """Go one token ahead and return the old one. 3511cb0ef41Sopenharmony_ci 3521cb0ef41Sopenharmony_ci Use the built-in :func:`next` instead of calling this directly. 3531cb0ef41Sopenharmony_ci """ 3541cb0ef41Sopenharmony_ci rv = self.current 3551cb0ef41Sopenharmony_ci if self._pushed: 3561cb0ef41Sopenharmony_ci self.current = self._pushed.popleft() 3571cb0ef41Sopenharmony_ci elif self.current.type is not TOKEN_EOF: 3581cb0ef41Sopenharmony_ci try: 3591cb0ef41Sopenharmony_ci self.current = next(self._iter) 3601cb0ef41Sopenharmony_ci except StopIteration: 3611cb0ef41Sopenharmony_ci self.close() 3621cb0ef41Sopenharmony_ci return rv 3631cb0ef41Sopenharmony_ci 3641cb0ef41Sopenharmony_ci def close(self): 3651cb0ef41Sopenharmony_ci """Close the stream.""" 3661cb0ef41Sopenharmony_ci self.current = Token(self.current.lineno, TOKEN_EOF, '') 3671cb0ef41Sopenharmony_ci self._iter = None 3681cb0ef41Sopenharmony_ci self.closed = True 3691cb0ef41Sopenharmony_ci 3701cb0ef41Sopenharmony_ci def expect(self, expr): 3711cb0ef41Sopenharmony_ci """Expect a given token type and return it. This accepts the same 3721cb0ef41Sopenharmony_ci argument as :meth:`jinja2.lexer.Token.test`. 3731cb0ef41Sopenharmony_ci """ 3741cb0ef41Sopenharmony_ci if not self.current.test(expr): 3751cb0ef41Sopenharmony_ci expr = describe_token_expr(expr) 3761cb0ef41Sopenharmony_ci if self.current.type is TOKEN_EOF: 3771cb0ef41Sopenharmony_ci raise TemplateSyntaxError('unexpected end of template, ' 3781cb0ef41Sopenharmony_ci 'expected %r.' % expr, 3791cb0ef41Sopenharmony_ci self.current.lineno, 3801cb0ef41Sopenharmony_ci self.name, self.filename) 3811cb0ef41Sopenharmony_ci raise TemplateSyntaxError("expected token %r, got %r" % 3821cb0ef41Sopenharmony_ci (expr, describe_token(self.current)), 3831cb0ef41Sopenharmony_ci self.current.lineno, 3841cb0ef41Sopenharmony_ci self.name, self.filename) 3851cb0ef41Sopenharmony_ci try: 3861cb0ef41Sopenharmony_ci return self.current 3871cb0ef41Sopenharmony_ci finally: 3881cb0ef41Sopenharmony_ci next(self) 3891cb0ef41Sopenharmony_ci 3901cb0ef41Sopenharmony_ci 3911cb0ef41Sopenharmony_cidef get_lexer(environment): 3921cb0ef41Sopenharmony_ci """Return a lexer which is probably cached.""" 3931cb0ef41Sopenharmony_ci key = (environment.block_start_string, 3941cb0ef41Sopenharmony_ci environment.block_end_string, 3951cb0ef41Sopenharmony_ci environment.variable_start_string, 3961cb0ef41Sopenharmony_ci environment.variable_end_string, 3971cb0ef41Sopenharmony_ci environment.comment_start_string, 3981cb0ef41Sopenharmony_ci environment.comment_end_string, 3991cb0ef41Sopenharmony_ci environment.line_statement_prefix, 4001cb0ef41Sopenharmony_ci environment.line_comment_prefix, 4011cb0ef41Sopenharmony_ci environment.trim_blocks, 4021cb0ef41Sopenharmony_ci environment.lstrip_blocks, 4031cb0ef41Sopenharmony_ci environment.newline_sequence, 4041cb0ef41Sopenharmony_ci environment.keep_trailing_newline) 4051cb0ef41Sopenharmony_ci lexer = _lexer_cache.get(key) 4061cb0ef41Sopenharmony_ci if lexer is None: 4071cb0ef41Sopenharmony_ci lexer = Lexer(environment) 4081cb0ef41Sopenharmony_ci _lexer_cache[key] = lexer 4091cb0ef41Sopenharmony_ci return lexer 4101cb0ef41Sopenharmony_ci 4111cb0ef41Sopenharmony_ci 4121cb0ef41Sopenharmony_ciclass Lexer(object): 4131cb0ef41Sopenharmony_ci """Class that implements a lexer for a given environment. Automatically 4141cb0ef41Sopenharmony_ci created by the environment class, usually you don't have to do that. 4151cb0ef41Sopenharmony_ci 4161cb0ef41Sopenharmony_ci Note that the lexer is not automatically bound to an environment. 4171cb0ef41Sopenharmony_ci Multiple environments can share the same lexer. 4181cb0ef41Sopenharmony_ci """ 4191cb0ef41Sopenharmony_ci 4201cb0ef41Sopenharmony_ci def __init__(self, environment): 4211cb0ef41Sopenharmony_ci # shortcuts 4221cb0ef41Sopenharmony_ci c = lambda x: re.compile(x, re.M | re.S) 4231cb0ef41Sopenharmony_ci e = re.escape 4241cb0ef41Sopenharmony_ci 4251cb0ef41Sopenharmony_ci # lexing rules for tags 4261cb0ef41Sopenharmony_ci tag_rules = [ 4271cb0ef41Sopenharmony_ci (whitespace_re, TOKEN_WHITESPACE, None), 4281cb0ef41Sopenharmony_ci (float_re, TOKEN_FLOAT, None), 4291cb0ef41Sopenharmony_ci (integer_re, TOKEN_INTEGER, None), 4301cb0ef41Sopenharmony_ci (name_re, TOKEN_NAME, None), 4311cb0ef41Sopenharmony_ci (string_re, TOKEN_STRING, None), 4321cb0ef41Sopenharmony_ci (operator_re, TOKEN_OPERATOR, None) 4331cb0ef41Sopenharmony_ci ] 4341cb0ef41Sopenharmony_ci 4351cb0ef41Sopenharmony_ci # assemble the root lexing rule. because "|" is ungreedy 4361cb0ef41Sopenharmony_ci # we have to sort by length so that the lexer continues working 4371cb0ef41Sopenharmony_ci # as expected when we have parsing rules like <% for block and 4381cb0ef41Sopenharmony_ci # <%= for variables. (if someone wants asp like syntax) 4391cb0ef41Sopenharmony_ci # variables are just part of the rules if variable processing 4401cb0ef41Sopenharmony_ci # is required. 4411cb0ef41Sopenharmony_ci root_tag_rules = compile_rules(environment) 4421cb0ef41Sopenharmony_ci 4431cb0ef41Sopenharmony_ci # block suffix if trimming is enabled 4441cb0ef41Sopenharmony_ci block_suffix_re = environment.trim_blocks and '\\n?' or '' 4451cb0ef41Sopenharmony_ci 4461cb0ef41Sopenharmony_ci # strip leading spaces if lstrip_blocks is enabled 4471cb0ef41Sopenharmony_ci prefix_re = {} 4481cb0ef41Sopenharmony_ci if environment.lstrip_blocks: 4491cb0ef41Sopenharmony_ci # use '{%+' to manually disable lstrip_blocks behavior 4501cb0ef41Sopenharmony_ci no_lstrip_re = e('+') 4511cb0ef41Sopenharmony_ci # detect overlap between block and variable or comment strings 4521cb0ef41Sopenharmony_ci block_diff = c(r'^%s(.*)' % e(environment.block_start_string)) 4531cb0ef41Sopenharmony_ci # make sure we don't mistake a block for a variable or a comment 4541cb0ef41Sopenharmony_ci m = block_diff.match(environment.comment_start_string) 4551cb0ef41Sopenharmony_ci no_lstrip_re += m and r'|%s' % e(m.group(1)) or '' 4561cb0ef41Sopenharmony_ci m = block_diff.match(environment.variable_start_string) 4571cb0ef41Sopenharmony_ci no_lstrip_re += m and r'|%s' % e(m.group(1)) or '' 4581cb0ef41Sopenharmony_ci 4591cb0ef41Sopenharmony_ci # detect overlap between comment and variable strings 4601cb0ef41Sopenharmony_ci comment_diff = c(r'^%s(.*)' % e(environment.comment_start_string)) 4611cb0ef41Sopenharmony_ci m = comment_diff.match(environment.variable_start_string) 4621cb0ef41Sopenharmony_ci no_variable_re = m and r'(?!%s)' % e(m.group(1)) or '' 4631cb0ef41Sopenharmony_ci 4641cb0ef41Sopenharmony_ci lstrip_re = r'^[ \t]*' 4651cb0ef41Sopenharmony_ci block_prefix_re = r'%s%s(?!%s)|%s\+?' % ( 4661cb0ef41Sopenharmony_ci lstrip_re, 4671cb0ef41Sopenharmony_ci e(environment.block_start_string), 4681cb0ef41Sopenharmony_ci no_lstrip_re, 4691cb0ef41Sopenharmony_ci e(environment.block_start_string), 4701cb0ef41Sopenharmony_ci ) 4711cb0ef41Sopenharmony_ci comment_prefix_re = r'%s%s%s|%s\+?' % ( 4721cb0ef41Sopenharmony_ci lstrip_re, 4731cb0ef41Sopenharmony_ci e(environment.comment_start_string), 4741cb0ef41Sopenharmony_ci no_variable_re, 4751cb0ef41Sopenharmony_ci e(environment.comment_start_string), 4761cb0ef41Sopenharmony_ci ) 4771cb0ef41Sopenharmony_ci prefix_re['block'] = block_prefix_re 4781cb0ef41Sopenharmony_ci prefix_re['comment'] = comment_prefix_re 4791cb0ef41Sopenharmony_ci else: 4801cb0ef41Sopenharmony_ci block_prefix_re = '%s' % e(environment.block_start_string) 4811cb0ef41Sopenharmony_ci 4821cb0ef41Sopenharmony_ci self.newline_sequence = environment.newline_sequence 4831cb0ef41Sopenharmony_ci self.keep_trailing_newline = environment.keep_trailing_newline 4841cb0ef41Sopenharmony_ci 4851cb0ef41Sopenharmony_ci # global lexing rules 4861cb0ef41Sopenharmony_ci self.rules = { 4871cb0ef41Sopenharmony_ci 'root': [ 4881cb0ef41Sopenharmony_ci # directives 4891cb0ef41Sopenharmony_ci (c('(.*?)(?:%s)' % '|'.join( 4901cb0ef41Sopenharmony_ci [r'(?P<raw_begin>(?:\s*%s\-|%s)\s*raw\s*(?:\-%s\s*|%s))' % ( 4911cb0ef41Sopenharmony_ci e(environment.block_start_string), 4921cb0ef41Sopenharmony_ci block_prefix_re, 4931cb0ef41Sopenharmony_ci e(environment.block_end_string), 4941cb0ef41Sopenharmony_ci e(environment.block_end_string) 4951cb0ef41Sopenharmony_ci )] + [ 4961cb0ef41Sopenharmony_ci r'(?P<%s_begin>\s*%s\-|%s)' % (n, r, prefix_re.get(n,r)) 4971cb0ef41Sopenharmony_ci for n, r in root_tag_rules 4981cb0ef41Sopenharmony_ci ])), (TOKEN_DATA, '#bygroup'), '#bygroup'), 4991cb0ef41Sopenharmony_ci # data 5001cb0ef41Sopenharmony_ci (c('.+'), TOKEN_DATA, None) 5011cb0ef41Sopenharmony_ci ], 5021cb0ef41Sopenharmony_ci # comments 5031cb0ef41Sopenharmony_ci TOKEN_COMMENT_BEGIN: [ 5041cb0ef41Sopenharmony_ci (c(r'(.*?)((?:\-%s\s*|%s)%s)' % ( 5051cb0ef41Sopenharmony_ci e(environment.comment_end_string), 5061cb0ef41Sopenharmony_ci e(environment.comment_end_string), 5071cb0ef41Sopenharmony_ci block_suffix_re 5081cb0ef41Sopenharmony_ci )), (TOKEN_COMMENT, TOKEN_COMMENT_END), '#pop'), 5091cb0ef41Sopenharmony_ci (c('(.)'), (Failure('Missing end of comment tag'),), None) 5101cb0ef41Sopenharmony_ci ], 5111cb0ef41Sopenharmony_ci # blocks 5121cb0ef41Sopenharmony_ci TOKEN_BLOCK_BEGIN: [ 5131cb0ef41Sopenharmony_ci (c(r'(?:\-%s\s*|%s)%s' % ( 5141cb0ef41Sopenharmony_ci e(environment.block_end_string), 5151cb0ef41Sopenharmony_ci e(environment.block_end_string), 5161cb0ef41Sopenharmony_ci block_suffix_re 5171cb0ef41Sopenharmony_ci )), TOKEN_BLOCK_END, '#pop'), 5181cb0ef41Sopenharmony_ci ] + tag_rules, 5191cb0ef41Sopenharmony_ci # variables 5201cb0ef41Sopenharmony_ci TOKEN_VARIABLE_BEGIN: [ 5211cb0ef41Sopenharmony_ci (c(r'\-%s\s*|%s' % ( 5221cb0ef41Sopenharmony_ci e(environment.variable_end_string), 5231cb0ef41Sopenharmony_ci e(environment.variable_end_string) 5241cb0ef41Sopenharmony_ci )), TOKEN_VARIABLE_END, '#pop') 5251cb0ef41Sopenharmony_ci ] + tag_rules, 5261cb0ef41Sopenharmony_ci # raw block 5271cb0ef41Sopenharmony_ci TOKEN_RAW_BEGIN: [ 5281cb0ef41Sopenharmony_ci (c(r'(.*?)((?:\s*%s\-|%s)\s*endraw\s*(?:\-%s\s*|%s%s))' % ( 5291cb0ef41Sopenharmony_ci e(environment.block_start_string), 5301cb0ef41Sopenharmony_ci block_prefix_re, 5311cb0ef41Sopenharmony_ci e(environment.block_end_string), 5321cb0ef41Sopenharmony_ci e(environment.block_end_string), 5331cb0ef41Sopenharmony_ci block_suffix_re 5341cb0ef41Sopenharmony_ci )), (TOKEN_DATA, TOKEN_RAW_END), '#pop'), 5351cb0ef41Sopenharmony_ci (c('(.)'), (Failure('Missing end of raw directive'),), None) 5361cb0ef41Sopenharmony_ci ], 5371cb0ef41Sopenharmony_ci # line statements 5381cb0ef41Sopenharmony_ci TOKEN_LINESTATEMENT_BEGIN: [ 5391cb0ef41Sopenharmony_ci (c(r'\s*(\n|$)'), TOKEN_LINESTATEMENT_END, '#pop') 5401cb0ef41Sopenharmony_ci ] + tag_rules, 5411cb0ef41Sopenharmony_ci # line comments 5421cb0ef41Sopenharmony_ci TOKEN_LINECOMMENT_BEGIN: [ 5431cb0ef41Sopenharmony_ci (c(r'(.*?)()(?=\n|$)'), (TOKEN_LINECOMMENT, 5441cb0ef41Sopenharmony_ci TOKEN_LINECOMMENT_END), '#pop') 5451cb0ef41Sopenharmony_ci ] 5461cb0ef41Sopenharmony_ci } 5471cb0ef41Sopenharmony_ci 5481cb0ef41Sopenharmony_ci def _normalize_newlines(self, value): 5491cb0ef41Sopenharmony_ci """Called for strings and template data to normalize it to unicode.""" 5501cb0ef41Sopenharmony_ci return newline_re.sub(self.newline_sequence, value) 5511cb0ef41Sopenharmony_ci 5521cb0ef41Sopenharmony_ci def tokenize(self, source, name=None, filename=None, state=None): 5531cb0ef41Sopenharmony_ci """Calls tokeniter + tokenize and wraps it in a token stream. 5541cb0ef41Sopenharmony_ci """ 5551cb0ef41Sopenharmony_ci stream = self.tokeniter(source, name, filename, state) 5561cb0ef41Sopenharmony_ci return TokenStream(self.wrap(stream, name, filename), name, filename) 5571cb0ef41Sopenharmony_ci 5581cb0ef41Sopenharmony_ci def wrap(self, stream, name=None, filename=None): 5591cb0ef41Sopenharmony_ci """This is called with the stream as returned by `tokenize` and wraps 5601cb0ef41Sopenharmony_ci every token in a :class:`Token` and converts the value. 5611cb0ef41Sopenharmony_ci """ 5621cb0ef41Sopenharmony_ci for lineno, token, value in stream: 5631cb0ef41Sopenharmony_ci if token in ignored_tokens: 5641cb0ef41Sopenharmony_ci continue 5651cb0ef41Sopenharmony_ci elif token == 'linestatement_begin': 5661cb0ef41Sopenharmony_ci token = 'block_begin' 5671cb0ef41Sopenharmony_ci elif token == 'linestatement_end': 5681cb0ef41Sopenharmony_ci token = 'block_end' 5691cb0ef41Sopenharmony_ci # we are not interested in those tokens in the parser 5701cb0ef41Sopenharmony_ci elif token in ('raw_begin', 'raw_end'): 5711cb0ef41Sopenharmony_ci continue 5721cb0ef41Sopenharmony_ci elif token == 'data': 5731cb0ef41Sopenharmony_ci value = self._normalize_newlines(value) 5741cb0ef41Sopenharmony_ci elif token == 'keyword': 5751cb0ef41Sopenharmony_ci token = value 5761cb0ef41Sopenharmony_ci elif token == 'name': 5771cb0ef41Sopenharmony_ci value = str(value) 5781cb0ef41Sopenharmony_ci if check_ident and not value.isidentifier(): 5791cb0ef41Sopenharmony_ci raise TemplateSyntaxError( 5801cb0ef41Sopenharmony_ci 'Invalid character in identifier', 5811cb0ef41Sopenharmony_ci lineno, name, filename) 5821cb0ef41Sopenharmony_ci elif token == 'string': 5831cb0ef41Sopenharmony_ci # try to unescape string 5841cb0ef41Sopenharmony_ci try: 5851cb0ef41Sopenharmony_ci value = self._normalize_newlines(value[1:-1]) \ 5861cb0ef41Sopenharmony_ci .encode('ascii', 'backslashreplace') \ 5871cb0ef41Sopenharmony_ci .decode('unicode-escape') 5881cb0ef41Sopenharmony_ci except Exception as e: 5891cb0ef41Sopenharmony_ci msg = str(e).split(':')[-1].strip() 5901cb0ef41Sopenharmony_ci raise TemplateSyntaxError(msg, lineno, name, filename) 5911cb0ef41Sopenharmony_ci elif token == 'integer': 5921cb0ef41Sopenharmony_ci value = int(value) 5931cb0ef41Sopenharmony_ci elif token == 'float': 5941cb0ef41Sopenharmony_ci value = float(value) 5951cb0ef41Sopenharmony_ci elif token == 'operator': 5961cb0ef41Sopenharmony_ci token = operators[value] 5971cb0ef41Sopenharmony_ci yield Token(lineno, token, value) 5981cb0ef41Sopenharmony_ci 5991cb0ef41Sopenharmony_ci def tokeniter(self, source, name, filename=None, state=None): 6001cb0ef41Sopenharmony_ci """This method tokenizes the text and returns the tokens in a 6011cb0ef41Sopenharmony_ci generator. Use this method if you just want to tokenize a template. 6021cb0ef41Sopenharmony_ci """ 6031cb0ef41Sopenharmony_ci source = text_type(source) 6041cb0ef41Sopenharmony_ci lines = source.splitlines() 6051cb0ef41Sopenharmony_ci if self.keep_trailing_newline and source: 6061cb0ef41Sopenharmony_ci for newline in ('\r\n', '\r', '\n'): 6071cb0ef41Sopenharmony_ci if source.endswith(newline): 6081cb0ef41Sopenharmony_ci lines.append('') 6091cb0ef41Sopenharmony_ci break 6101cb0ef41Sopenharmony_ci source = '\n'.join(lines) 6111cb0ef41Sopenharmony_ci pos = 0 6121cb0ef41Sopenharmony_ci lineno = 1 6131cb0ef41Sopenharmony_ci stack = ['root'] 6141cb0ef41Sopenharmony_ci if state is not None and state != 'root': 6151cb0ef41Sopenharmony_ci assert state in ('variable', 'block'), 'invalid state' 6161cb0ef41Sopenharmony_ci stack.append(state + '_begin') 6171cb0ef41Sopenharmony_ci else: 6181cb0ef41Sopenharmony_ci state = 'root' 6191cb0ef41Sopenharmony_ci statetokens = self.rules[stack[-1]] 6201cb0ef41Sopenharmony_ci source_length = len(source) 6211cb0ef41Sopenharmony_ci 6221cb0ef41Sopenharmony_ci balancing_stack = [] 6231cb0ef41Sopenharmony_ci 6241cb0ef41Sopenharmony_ci while 1: 6251cb0ef41Sopenharmony_ci # tokenizer loop 6261cb0ef41Sopenharmony_ci for regex, tokens, new_state in statetokens: 6271cb0ef41Sopenharmony_ci m = regex.match(source, pos) 6281cb0ef41Sopenharmony_ci # if no match we try again with the next rule 6291cb0ef41Sopenharmony_ci if m is None: 6301cb0ef41Sopenharmony_ci continue 6311cb0ef41Sopenharmony_ci 6321cb0ef41Sopenharmony_ci # we only match blocks and variables if braces / parentheses 6331cb0ef41Sopenharmony_ci # are balanced. continue parsing with the lower rule which 6341cb0ef41Sopenharmony_ci # is the operator rule. do this only if the end tags look 6351cb0ef41Sopenharmony_ci # like operators 6361cb0ef41Sopenharmony_ci if balancing_stack and \ 6371cb0ef41Sopenharmony_ci tokens in ('variable_end', 'block_end', 6381cb0ef41Sopenharmony_ci 'linestatement_end'): 6391cb0ef41Sopenharmony_ci continue 6401cb0ef41Sopenharmony_ci 6411cb0ef41Sopenharmony_ci # tuples support more options 6421cb0ef41Sopenharmony_ci if isinstance(tokens, tuple): 6431cb0ef41Sopenharmony_ci for idx, token in enumerate(tokens): 6441cb0ef41Sopenharmony_ci # failure group 6451cb0ef41Sopenharmony_ci if token.__class__ is Failure: 6461cb0ef41Sopenharmony_ci raise token(lineno, filename) 6471cb0ef41Sopenharmony_ci # bygroup is a bit more complex, in that case we 6481cb0ef41Sopenharmony_ci # yield for the current token the first named 6491cb0ef41Sopenharmony_ci # group that matched 6501cb0ef41Sopenharmony_ci elif token == '#bygroup': 6511cb0ef41Sopenharmony_ci for key, value in iteritems(m.groupdict()): 6521cb0ef41Sopenharmony_ci if value is not None: 6531cb0ef41Sopenharmony_ci yield lineno, key, value 6541cb0ef41Sopenharmony_ci lineno += value.count('\n') 6551cb0ef41Sopenharmony_ci break 6561cb0ef41Sopenharmony_ci else: 6571cb0ef41Sopenharmony_ci raise RuntimeError('%r wanted to resolve ' 6581cb0ef41Sopenharmony_ci 'the token dynamically' 6591cb0ef41Sopenharmony_ci ' but no group matched' 6601cb0ef41Sopenharmony_ci % regex) 6611cb0ef41Sopenharmony_ci # normal group 6621cb0ef41Sopenharmony_ci else: 6631cb0ef41Sopenharmony_ci data = m.group(idx + 1) 6641cb0ef41Sopenharmony_ci if data or token not in ignore_if_empty: 6651cb0ef41Sopenharmony_ci yield lineno, token, data 6661cb0ef41Sopenharmony_ci lineno += data.count('\n') 6671cb0ef41Sopenharmony_ci 6681cb0ef41Sopenharmony_ci # strings as token just are yielded as it. 6691cb0ef41Sopenharmony_ci else: 6701cb0ef41Sopenharmony_ci data = m.group() 6711cb0ef41Sopenharmony_ci # update brace/parentheses balance 6721cb0ef41Sopenharmony_ci if tokens == 'operator': 6731cb0ef41Sopenharmony_ci if data == '{': 6741cb0ef41Sopenharmony_ci balancing_stack.append('}') 6751cb0ef41Sopenharmony_ci elif data == '(': 6761cb0ef41Sopenharmony_ci balancing_stack.append(')') 6771cb0ef41Sopenharmony_ci elif data == '[': 6781cb0ef41Sopenharmony_ci balancing_stack.append(']') 6791cb0ef41Sopenharmony_ci elif data in ('}', ')', ']'): 6801cb0ef41Sopenharmony_ci if not balancing_stack: 6811cb0ef41Sopenharmony_ci raise TemplateSyntaxError('unexpected \'%s\'' % 6821cb0ef41Sopenharmony_ci data, lineno, name, 6831cb0ef41Sopenharmony_ci filename) 6841cb0ef41Sopenharmony_ci expected_op = balancing_stack.pop() 6851cb0ef41Sopenharmony_ci if expected_op != data: 6861cb0ef41Sopenharmony_ci raise TemplateSyntaxError('unexpected \'%s\', ' 6871cb0ef41Sopenharmony_ci 'expected \'%s\'' % 6881cb0ef41Sopenharmony_ci (data, expected_op), 6891cb0ef41Sopenharmony_ci lineno, name, 6901cb0ef41Sopenharmony_ci filename) 6911cb0ef41Sopenharmony_ci # yield items 6921cb0ef41Sopenharmony_ci if data or tokens not in ignore_if_empty: 6931cb0ef41Sopenharmony_ci yield lineno, tokens, data 6941cb0ef41Sopenharmony_ci lineno += data.count('\n') 6951cb0ef41Sopenharmony_ci 6961cb0ef41Sopenharmony_ci # fetch new position into new variable so that we can check 6971cb0ef41Sopenharmony_ci # if there is a internal parsing error which would result 6981cb0ef41Sopenharmony_ci # in an infinite loop 6991cb0ef41Sopenharmony_ci pos2 = m.end() 7001cb0ef41Sopenharmony_ci 7011cb0ef41Sopenharmony_ci # handle state changes 7021cb0ef41Sopenharmony_ci if new_state is not None: 7031cb0ef41Sopenharmony_ci # remove the uppermost state 7041cb0ef41Sopenharmony_ci if new_state == '#pop': 7051cb0ef41Sopenharmony_ci stack.pop() 7061cb0ef41Sopenharmony_ci # resolve the new state by group checking 7071cb0ef41Sopenharmony_ci elif new_state == '#bygroup': 7081cb0ef41Sopenharmony_ci for key, value in iteritems(m.groupdict()): 7091cb0ef41Sopenharmony_ci if value is not None: 7101cb0ef41Sopenharmony_ci stack.append(key) 7111cb0ef41Sopenharmony_ci break 7121cb0ef41Sopenharmony_ci else: 7131cb0ef41Sopenharmony_ci raise RuntimeError('%r wanted to resolve the ' 7141cb0ef41Sopenharmony_ci 'new state dynamically but' 7151cb0ef41Sopenharmony_ci ' no group matched' % 7161cb0ef41Sopenharmony_ci regex) 7171cb0ef41Sopenharmony_ci # direct state name given 7181cb0ef41Sopenharmony_ci else: 7191cb0ef41Sopenharmony_ci stack.append(new_state) 7201cb0ef41Sopenharmony_ci statetokens = self.rules[stack[-1]] 7211cb0ef41Sopenharmony_ci # we are still at the same position and no stack change. 7221cb0ef41Sopenharmony_ci # this means a loop without break condition, avoid that and 7231cb0ef41Sopenharmony_ci # raise error 7241cb0ef41Sopenharmony_ci elif pos2 == pos: 7251cb0ef41Sopenharmony_ci raise RuntimeError('%r yielded empty string without ' 7261cb0ef41Sopenharmony_ci 'stack change' % regex) 7271cb0ef41Sopenharmony_ci # publish new function and start again 7281cb0ef41Sopenharmony_ci pos = pos2 7291cb0ef41Sopenharmony_ci break 7301cb0ef41Sopenharmony_ci # if loop terminated without break we haven't found a single match 7311cb0ef41Sopenharmony_ci # either we are at the end of the file or we have a problem 7321cb0ef41Sopenharmony_ci else: 7331cb0ef41Sopenharmony_ci # end of text 7341cb0ef41Sopenharmony_ci if pos >= source_length: 7351cb0ef41Sopenharmony_ci return 7361cb0ef41Sopenharmony_ci # something went wrong 7371cb0ef41Sopenharmony_ci raise TemplateSyntaxError('unexpected char %r at %d' % 7381cb0ef41Sopenharmony_ci (source[pos], pos), lineno, 7391cb0ef41Sopenharmony_ci name, filename) 740