1"""Extension API for adding custom tags and behavior."""
2import pprint
3import re
4import typing as t
5
6from markupsafe import Markup
7
8from . import defaults
9from . import nodes
10from .environment import Environment
11from .exceptions import TemplateAssertionError
12from .exceptions import TemplateSyntaxError
13from .runtime import concat  # type: ignore
14from .runtime import Context
15from .runtime import Undefined
16from .utils import import_string
17from .utils import pass_context
18
19if t.TYPE_CHECKING:
20    import typing_extensions as te
21    from .lexer import Token
22    from .lexer import TokenStream
23    from .parser import Parser
24
25    class _TranslationsBasic(te.Protocol):
26        def gettext(self, message: str) -> str:
27            ...
28
29        def ngettext(self, singular: str, plural: str, n: int) -> str:
30            pass
31
32    class _TranslationsContext(_TranslationsBasic):
33        def pgettext(self, context: str, message: str) -> str:
34            ...
35
36        def npgettext(self, context: str, singular: str, plural: str, n: int) -> str:
37            ...
38
39    _SupportedTranslations = t.Union[_TranslationsBasic, _TranslationsContext]
40
41
42# I18N functions available in Jinja templates. If the I18N library
43# provides ugettext, it will be assigned to gettext.
44GETTEXT_FUNCTIONS: t.Tuple[str, ...] = (
45    "_",
46    "gettext",
47    "ngettext",
48    "pgettext",
49    "npgettext",
50)
51_ws_re = re.compile(r"\s*\n\s*")
52
53
54class Extension:
55    """Extensions can be used to add extra functionality to the Jinja template
56    system at the parser level.  Custom extensions are bound to an environment
57    but may not store environment specific data on `self`.  The reason for
58    this is that an extension can be bound to another environment (for
59    overlays) by creating a copy and reassigning the `environment` attribute.
60
61    As extensions are created by the environment they cannot accept any
62    arguments for configuration.  One may want to work around that by using
63    a factory function, but that is not possible as extensions are identified
64    by their import name.  The correct way to configure the extension is
65    storing the configuration values on the environment.  Because this way the
66    environment ends up acting as central configuration storage the
67    attributes may clash which is why extensions have to ensure that the names
68    they choose for configuration are not too generic.  ``prefix`` for example
69    is a terrible name, ``fragment_cache_prefix`` on the other hand is a good
70    name as includes the name of the extension (fragment cache).
71    """
72
73    identifier: t.ClassVar[str]
74
75    def __init_subclass__(cls) -> None:
76        cls.identifier = f"{cls.__module__}.{cls.__name__}"
77
78    #: if this extension parses this is the list of tags it's listening to.
79    tags: t.Set[str] = set()
80
81    #: the priority of that extension.  This is especially useful for
82    #: extensions that preprocess values.  A lower value means higher
83    #: priority.
84    #:
85    #: .. versionadded:: 2.4
86    priority = 100
87
88    def __init__(self, environment: Environment) -> None:
89        self.environment = environment
90
91    def bind(self, environment: Environment) -> "Extension":
92        """Create a copy of this extension bound to another environment."""
93        rv = object.__new__(self.__class__)
94        rv.__dict__.update(self.__dict__)
95        rv.environment = environment
96        return rv
97
98    def preprocess(
99        self, source: str, name: t.Optional[str], filename: t.Optional[str] = None
100    ) -> str:
101        """This method is called before the actual lexing and can be used to
102        preprocess the source.  The `filename` is optional.  The return value
103        must be the preprocessed source.
104        """
105        return source
106
107    def filter_stream(
108        self, stream: "TokenStream"
109    ) -> t.Union["TokenStream", t.Iterable["Token"]]:
110        """It's passed a :class:`~jinja2.lexer.TokenStream` that can be used
111        to filter tokens returned.  This method has to return an iterable of
112        :class:`~jinja2.lexer.Token`\\s, but it doesn't have to return a
113        :class:`~jinja2.lexer.TokenStream`.
114        """
115        return stream
116
117    def parse(self, parser: "Parser") -> t.Union[nodes.Node, t.List[nodes.Node]]:
118        """If any of the :attr:`tags` matched this method is called with the
119        parser as first argument.  The token the parser stream is pointing at
120        is the name token that matched.  This method has to return one or a
121        list of multiple nodes.
122        """
123        raise NotImplementedError()
124
125    def attr(
126        self, name: str, lineno: t.Optional[int] = None
127    ) -> nodes.ExtensionAttribute:
128        """Return an attribute node for the current extension.  This is useful
129        to pass constants on extensions to generated template code.
130
131        ::
132
133            self.attr('_my_attribute', lineno=lineno)
134        """
135        return nodes.ExtensionAttribute(self.identifier, name, lineno=lineno)
136
137    def call_method(
138        self,
139        name: str,
140        args: t.Optional[t.List[nodes.Expr]] = None,
141        kwargs: t.Optional[t.List[nodes.Keyword]] = None,
142        dyn_args: t.Optional[nodes.Expr] = None,
143        dyn_kwargs: t.Optional[nodes.Expr] = None,
144        lineno: t.Optional[int] = None,
145    ) -> nodes.Call:
146        """Call a method of the extension.  This is a shortcut for
147        :meth:`attr` + :class:`jinja2.nodes.Call`.
148        """
149        if args is None:
150            args = []
151        if kwargs is None:
152            kwargs = []
153        return nodes.Call(
154            self.attr(name, lineno=lineno),
155            args,
156            kwargs,
157            dyn_args,
158            dyn_kwargs,
159            lineno=lineno,
160        )
161
162
163@pass_context
164def _gettext_alias(
165    __context: Context, *args: t.Any, **kwargs: t.Any
166) -> t.Union[t.Any, Undefined]:
167    return __context.call(__context.resolve("gettext"), *args, **kwargs)
168
169
170def _make_new_gettext(func: t.Callable[[str], str]) -> t.Callable[..., str]:
171    @pass_context
172    def gettext(__context: Context, __string: str, **variables: t.Any) -> str:
173        rv = __context.call(func, __string)
174        if __context.eval_ctx.autoescape:
175            rv = Markup(rv)
176        # Always treat as a format string, even if there are no
177        # variables. This makes translation strings more consistent
178        # and predictable. This requires escaping
179        return rv % variables  # type: ignore
180
181    return gettext
182
183
184def _make_new_ngettext(func: t.Callable[[str, str, int], str]) -> t.Callable[..., str]:
185    @pass_context
186    def ngettext(
187        __context: Context,
188        __singular: str,
189        __plural: str,
190        __num: int,
191        **variables: t.Any,
192    ) -> str:
193        variables.setdefault("num", __num)
194        rv = __context.call(func, __singular, __plural, __num)
195        if __context.eval_ctx.autoescape:
196            rv = Markup(rv)
197        # Always treat as a format string, see gettext comment above.
198        return rv % variables  # type: ignore
199
200    return ngettext
201
202
203def _make_new_pgettext(func: t.Callable[[str, str], str]) -> t.Callable[..., str]:
204    @pass_context
205    def pgettext(
206        __context: Context, __string_ctx: str, __string: str, **variables: t.Any
207    ) -> str:
208        variables.setdefault("context", __string_ctx)
209        rv = __context.call(func, __string_ctx, __string)
210
211        if __context.eval_ctx.autoescape:
212            rv = Markup(rv)
213
214        # Always treat as a format string, see gettext comment above.
215        return rv % variables  # type: ignore
216
217    return pgettext
218
219
220def _make_new_npgettext(
221    func: t.Callable[[str, str, str, int], str]
222) -> t.Callable[..., str]:
223    @pass_context
224    def npgettext(
225        __context: Context,
226        __string_ctx: str,
227        __singular: str,
228        __plural: str,
229        __num: int,
230        **variables: t.Any,
231    ) -> str:
232        variables.setdefault("context", __string_ctx)
233        variables.setdefault("num", __num)
234        rv = __context.call(func, __string_ctx, __singular, __plural, __num)
235
236        if __context.eval_ctx.autoescape:
237            rv = Markup(rv)
238
239        # Always treat as a format string, see gettext comment above.
240        return rv % variables  # type: ignore
241
242    return npgettext
243
244
245class InternationalizationExtension(Extension):
246    """This extension adds gettext support to Jinja."""
247
248    tags = {"trans"}
249
250    # TODO: the i18n extension is currently reevaluating values in a few
251    # situations.  Take this example:
252    #   {% trans count=something() %}{{ count }} foo{% pluralize
253    #     %}{{ count }} fooss{% endtrans %}
254    # something is called twice here.  One time for the gettext value and
255    # the other time for the n-parameter of the ngettext function.
256
257    def __init__(self, environment: Environment) -> None:
258        super().__init__(environment)
259        environment.globals["_"] = _gettext_alias
260        environment.extend(
261            install_gettext_translations=self._install,
262            install_null_translations=self._install_null,
263            install_gettext_callables=self._install_callables,
264            uninstall_gettext_translations=self._uninstall,
265            extract_translations=self._extract,
266            newstyle_gettext=False,
267        )
268
269    def _install(
270        self, translations: "_SupportedTranslations", newstyle: t.Optional[bool] = None
271    ) -> None:
272        # ugettext and ungettext are preferred in case the I18N library
273        # is providing compatibility with older Python versions.
274        gettext = getattr(translations, "ugettext", None)
275        if gettext is None:
276            gettext = translations.gettext
277        ngettext = getattr(translations, "ungettext", None)
278        if ngettext is None:
279            ngettext = translations.ngettext
280
281        pgettext = getattr(translations, "pgettext", None)
282        npgettext = getattr(translations, "npgettext", None)
283        self._install_callables(
284            gettext, ngettext, newstyle=newstyle, pgettext=pgettext, npgettext=npgettext
285        )
286
287    def _install_null(self, newstyle: t.Optional[bool] = None) -> None:
288        import gettext
289
290        translations = gettext.NullTranslations()
291
292        if hasattr(translations, "pgettext"):
293            # Python < 3.8
294            pgettext = translations.pgettext
295        else:
296
297            def pgettext(c: str, s: str) -> str:
298                return s
299
300        if hasattr(translations, "npgettext"):
301            npgettext = translations.npgettext
302        else:
303
304            def npgettext(c: str, s: str, p: str, n: int) -> str:
305                return s if n == 1 else p
306
307        self._install_callables(
308            gettext=translations.gettext,
309            ngettext=translations.ngettext,
310            newstyle=newstyle,
311            pgettext=pgettext,
312            npgettext=npgettext,
313        )
314
315    def _install_callables(
316        self,
317        gettext: t.Callable[[str], str],
318        ngettext: t.Callable[[str, str, int], str],
319        newstyle: t.Optional[bool] = None,
320        pgettext: t.Optional[t.Callable[[str, str], str]] = None,
321        npgettext: t.Optional[t.Callable[[str, str, str, int], str]] = None,
322    ) -> None:
323        if newstyle is not None:
324            self.environment.newstyle_gettext = newstyle  # type: ignore
325        if self.environment.newstyle_gettext:  # type: ignore
326            gettext = _make_new_gettext(gettext)
327            ngettext = _make_new_ngettext(ngettext)
328
329            if pgettext is not None:
330                pgettext = _make_new_pgettext(pgettext)
331
332            if npgettext is not None:
333                npgettext = _make_new_npgettext(npgettext)
334
335        self.environment.globals.update(
336            gettext=gettext, ngettext=ngettext, pgettext=pgettext, npgettext=npgettext
337        )
338
339    def _uninstall(self, translations: "_SupportedTranslations") -> None:
340        for key in ("gettext", "ngettext", "pgettext", "npgettext"):
341            self.environment.globals.pop(key, None)
342
343    def _extract(
344        self,
345        source: t.Union[str, nodes.Template],
346        gettext_functions: t.Sequence[str] = GETTEXT_FUNCTIONS,
347    ) -> t.Iterator[
348        t.Tuple[int, str, t.Union[t.Optional[str], t.Tuple[t.Optional[str], ...]]]
349    ]:
350        if isinstance(source, str):
351            source = self.environment.parse(source)
352        return extract_from_ast(source, gettext_functions)
353
354    def parse(self, parser: "Parser") -> t.Union[nodes.Node, t.List[nodes.Node]]:
355        """Parse a translatable tag."""
356        lineno = next(parser.stream).lineno
357
358        context = None
359        context_token = parser.stream.next_if("string")
360
361        if context_token is not None:
362            context = context_token.value
363
364        # find all the variables referenced.  Additionally a variable can be
365        # defined in the body of the trans block too, but this is checked at
366        # a later state.
367        plural_expr: t.Optional[nodes.Expr] = None
368        plural_expr_assignment: t.Optional[nodes.Assign] = None
369        num_called_num = False
370        variables: t.Dict[str, nodes.Expr] = {}
371        trimmed = None
372        while parser.stream.current.type != "block_end":
373            if variables:
374                parser.stream.expect("comma")
375
376            # skip colon for python compatibility
377            if parser.stream.skip_if("colon"):
378                break
379
380            token = parser.stream.expect("name")
381            if token.value in variables:
382                parser.fail(
383                    f"translatable variable {token.value!r} defined twice.",
384                    token.lineno,
385                    exc=TemplateAssertionError,
386                )
387
388            # expressions
389            if parser.stream.current.type == "assign":
390                next(parser.stream)
391                variables[token.value] = var = parser.parse_expression()
392            elif trimmed is None and token.value in ("trimmed", "notrimmed"):
393                trimmed = token.value == "trimmed"
394                continue
395            else:
396                variables[token.value] = var = nodes.Name(token.value, "load")
397
398            if plural_expr is None:
399                if isinstance(var, nodes.Call):
400                    plural_expr = nodes.Name("_trans", "load")
401                    variables[token.value] = plural_expr
402                    plural_expr_assignment = nodes.Assign(
403                        nodes.Name("_trans", "store"), var
404                    )
405                else:
406                    plural_expr = var
407                num_called_num = token.value == "num"
408
409        parser.stream.expect("block_end")
410
411        plural = None
412        have_plural = False
413        referenced = set()
414
415        # now parse until endtrans or pluralize
416        singular_names, singular = self._parse_block(parser, True)
417        if singular_names:
418            referenced.update(singular_names)
419            if plural_expr is None:
420                plural_expr = nodes.Name(singular_names[0], "load")
421                num_called_num = singular_names[0] == "num"
422
423        # if we have a pluralize block, we parse that too
424        if parser.stream.current.test("name:pluralize"):
425            have_plural = True
426            next(parser.stream)
427            if parser.stream.current.type != "block_end":
428                token = parser.stream.expect("name")
429                if token.value not in variables:
430                    parser.fail(
431                        f"unknown variable {token.value!r} for pluralization",
432                        token.lineno,
433                        exc=TemplateAssertionError,
434                    )
435                plural_expr = variables[token.value]
436                num_called_num = token.value == "num"
437            parser.stream.expect("block_end")
438            plural_names, plural = self._parse_block(parser, False)
439            next(parser.stream)
440            referenced.update(plural_names)
441        else:
442            next(parser.stream)
443
444        # register free names as simple name expressions
445        for name in referenced:
446            if name not in variables:
447                variables[name] = nodes.Name(name, "load")
448
449        if not have_plural:
450            plural_expr = None
451        elif plural_expr is None:
452            parser.fail("pluralize without variables", lineno)
453
454        if trimmed is None:
455            trimmed = self.environment.policies["ext.i18n.trimmed"]
456        if trimmed:
457            singular = self._trim_whitespace(singular)
458            if plural:
459                plural = self._trim_whitespace(plural)
460
461        node = self._make_node(
462            singular,
463            plural,
464            context,
465            variables,
466            plural_expr,
467            bool(referenced),
468            num_called_num and have_plural,
469        )
470        node.set_lineno(lineno)
471        if plural_expr_assignment is not None:
472            return [plural_expr_assignment, node]
473        else:
474            return node
475
476    def _trim_whitespace(self, string: str, _ws_re: t.Pattern[str] = _ws_re) -> str:
477        return _ws_re.sub(" ", string.strip())
478
479    def _parse_block(
480        self, parser: "Parser", allow_pluralize: bool
481    ) -> t.Tuple[t.List[str], str]:
482        """Parse until the next block tag with a given name."""
483        referenced = []
484        buf = []
485
486        while True:
487            if parser.stream.current.type == "data":
488                buf.append(parser.stream.current.value.replace("%", "%%"))
489                next(parser.stream)
490            elif parser.stream.current.type == "variable_begin":
491                next(parser.stream)
492                name = parser.stream.expect("name").value
493                referenced.append(name)
494                buf.append(f"%({name})s")
495                parser.stream.expect("variable_end")
496            elif parser.stream.current.type == "block_begin":
497                next(parser.stream)
498                block_name = (
499                    parser.stream.current.value
500                    if parser.stream.current.type == "name"
501                    else None
502                )
503                if block_name == "endtrans":
504                    break
505                elif block_name == "pluralize":
506                    if allow_pluralize:
507                        break
508                    parser.fail(
509                        "a translatable section can have only one pluralize section"
510                    )
511                elif block_name == "trans":
512                    parser.fail(
513                        "trans blocks can't be nested; did you mean `endtrans`?"
514                    )
515                parser.fail(
516                    f"control structures in translatable sections are not allowed; "
517                    f"saw `{block_name}`"
518                )
519            elif parser.stream.eos:
520                parser.fail("unclosed translation block")
521            else:
522                raise RuntimeError("internal parser error")
523
524        return referenced, concat(buf)
525
526    def _make_node(
527        self,
528        singular: str,
529        plural: t.Optional[str],
530        context: t.Optional[str],
531        variables: t.Dict[str, nodes.Expr],
532        plural_expr: t.Optional[nodes.Expr],
533        vars_referenced: bool,
534        num_called_num: bool,
535    ) -> nodes.Output:
536        """Generates a useful node from the data provided."""
537        newstyle = self.environment.newstyle_gettext  # type: ignore
538        node: nodes.Expr
539
540        # no variables referenced?  no need to escape for old style
541        # gettext invocations only if there are vars.
542        if not vars_referenced and not newstyle:
543            singular = singular.replace("%%", "%")
544            if plural:
545                plural = plural.replace("%%", "%")
546
547        func_name = "gettext"
548        func_args: t.List[nodes.Expr] = [nodes.Const(singular)]
549
550        if context is not None:
551            func_args.insert(0, nodes.Const(context))
552            func_name = f"p{func_name}"
553
554        if plural_expr is not None:
555            func_name = f"n{func_name}"
556            func_args.extend((nodes.Const(plural), plural_expr))
557
558        node = nodes.Call(nodes.Name(func_name, "load"), func_args, [], None, None)
559
560        # in case newstyle gettext is used, the method is powerful
561        # enough to handle the variable expansion and autoescape
562        # handling itself
563        if newstyle:
564            for key, value in variables.items():
565                # the function adds that later anyways in case num was
566                # called num, so just skip it.
567                if num_called_num and key == "num":
568                    continue
569                node.kwargs.append(nodes.Keyword(key, value))
570
571        # otherwise do that here
572        else:
573            # mark the return value as safe if we are in an
574            # environment with autoescaping turned on
575            node = nodes.MarkSafeIfAutoescape(node)
576            if variables:
577                node = nodes.Mod(
578                    node,
579                    nodes.Dict(
580                        [
581                            nodes.Pair(nodes.Const(key), value)
582                            for key, value in variables.items()
583                        ]
584                    ),
585                )
586        return nodes.Output([node])
587
588
589class ExprStmtExtension(Extension):
590    """Adds a `do` tag to Jinja that works like the print statement just
591    that it doesn't print the return value.
592    """
593
594    tags = {"do"}
595
596    def parse(self, parser: "Parser") -> nodes.ExprStmt:
597        node = nodes.ExprStmt(lineno=next(parser.stream).lineno)
598        node.node = parser.parse_tuple()
599        return node
600
601
602class LoopControlExtension(Extension):
603    """Adds break and continue to the template engine."""
604
605    tags = {"break", "continue"}
606
607    def parse(self, parser: "Parser") -> t.Union[nodes.Break, nodes.Continue]:
608        token = next(parser.stream)
609        if token.value == "break":
610            return nodes.Break(lineno=token.lineno)
611        return nodes.Continue(lineno=token.lineno)
612
613
614class DebugExtension(Extension):
615    """A ``{% debug %}`` tag that dumps the available variables,
616    filters, and tests.
617
618    .. code-block:: html+jinja
619
620        <pre>{% debug %}</pre>
621
622    .. code-block:: text
623
624        {'context': {'cycler': <class 'jinja2.utils.Cycler'>,
625                     ...,
626                     'namespace': <class 'jinja2.utils.Namespace'>},
627         'filters': ['abs', 'attr', 'batch', 'capitalize', 'center', 'count', 'd',
628                     ..., 'urlencode', 'urlize', 'wordcount', 'wordwrap', 'xmlattr'],
629         'tests': ['!=', '<', '<=', '==', '>', '>=', 'callable', 'defined',
630                   ..., 'odd', 'sameas', 'sequence', 'string', 'undefined', 'upper']}
631
632    .. versionadded:: 2.11.0
633    """
634
635    tags = {"debug"}
636
637    def parse(self, parser: "Parser") -> nodes.Output:
638        lineno = parser.stream.expect("name:debug").lineno
639        context = nodes.ContextReference()
640        result = self.call_method("_render", [context], lineno=lineno)
641        return nodes.Output([result], lineno=lineno)
642
643    def _render(self, context: Context) -> str:
644        result = {
645            "context": context.get_all(),
646            "filters": sorted(self.environment.filters.keys()),
647            "tests": sorted(self.environment.tests.keys()),
648        }
649
650        # Set the depth since the intent is to show the top few names.
651        return pprint.pformat(result, depth=3, compact=True)
652
653
654def extract_from_ast(
655    ast: nodes.Template,
656    gettext_functions: t.Sequence[str] = GETTEXT_FUNCTIONS,
657    babel_style: bool = True,
658) -> t.Iterator[
659    t.Tuple[int, str, t.Union[t.Optional[str], t.Tuple[t.Optional[str], ...]]]
660]:
661    """Extract localizable strings from the given template node.  Per
662    default this function returns matches in babel style that means non string
663    parameters as well as keyword arguments are returned as `None`.  This
664    allows Babel to figure out what you really meant if you are using
665    gettext functions that allow keyword arguments for placeholder expansion.
666    If you don't want that behavior set the `babel_style` parameter to `False`
667    which causes only strings to be returned and parameters are always stored
668    in tuples.  As a consequence invalid gettext calls (calls without a single
669    string parameter or string parameters after non-string parameters) are
670    skipped.
671
672    This example explains the behavior:
673
674    >>> from jinja2 import Environment
675    >>> env = Environment()
676    >>> node = env.parse('{{ (_("foo"), _(), ngettext("foo", "bar", 42)) }}')
677    >>> list(extract_from_ast(node))
678    [(1, '_', 'foo'), (1, '_', ()), (1, 'ngettext', ('foo', 'bar', None))]
679    >>> list(extract_from_ast(node, babel_style=False))
680    [(1, '_', ('foo',)), (1, 'ngettext', ('foo', 'bar'))]
681
682    For every string found this function yields a ``(lineno, function,
683    message)`` tuple, where:
684
685    * ``lineno`` is the number of the line on which the string was found,
686    * ``function`` is the name of the ``gettext`` function used (if the
687      string was extracted from embedded Python code), and
688    *   ``message`` is the string, or a tuple of strings for functions
689         with multiple string arguments.
690
691    This extraction function operates on the AST and is because of that unable
692    to extract any comments.  For comment support you have to use the babel
693    extraction interface or extract comments yourself.
694    """
695    out: t.Union[t.Optional[str], t.Tuple[t.Optional[str], ...]]
696
697    for node in ast.find_all(nodes.Call):
698        if (
699            not isinstance(node.node, nodes.Name)
700            or node.node.name not in gettext_functions
701        ):
702            continue
703
704        strings: t.List[t.Optional[str]] = []
705
706        for arg in node.args:
707            if isinstance(arg, nodes.Const) and isinstance(arg.value, str):
708                strings.append(arg.value)
709            else:
710                strings.append(None)
711
712        for _ in node.kwargs:
713            strings.append(None)
714        if node.dyn_args is not None:
715            strings.append(None)
716        if node.dyn_kwargs is not None:
717            strings.append(None)
718
719        if not babel_style:
720            out = tuple(x for x in strings if x is not None)
721
722            if not out:
723                continue
724        else:
725            if len(strings) == 1:
726                out = strings[0]
727            else:
728                out = tuple(strings)
729
730        yield node.lineno, node.node.name, out
731
732
733class _CommentFinder:
734    """Helper class to find comments in a token stream.  Can only
735    find comments for gettext calls forwards.  Once the comment
736    from line 4 is found, a comment for line 1 will not return a
737    usable value.
738    """
739
740    def __init__(
741        self, tokens: t.Sequence[t.Tuple[int, str, str]], comment_tags: t.Sequence[str]
742    ) -> None:
743        self.tokens = tokens
744        self.comment_tags = comment_tags
745        self.offset = 0
746        self.last_lineno = 0
747
748    def find_backwards(self, offset: int) -> t.List[str]:
749        try:
750            for _, token_type, token_value in reversed(
751                self.tokens[self.offset : offset]
752            ):
753                if token_type in ("comment", "linecomment"):
754                    try:
755                        prefix, comment = token_value.split(None, 1)
756                    except ValueError:
757                        continue
758                    if prefix in self.comment_tags:
759                        return [comment.rstrip()]
760            return []
761        finally:
762            self.offset = offset
763
764    def find_comments(self, lineno: int) -> t.List[str]:
765        if not self.comment_tags or self.last_lineno > lineno:
766            return []
767        for idx, (token_lineno, _, _) in enumerate(self.tokens[self.offset :]):
768            if token_lineno > lineno:
769                return self.find_backwards(self.offset + idx)
770        return self.find_backwards(len(self.tokens))
771
772
773def babel_extract(
774    fileobj: t.BinaryIO,
775    keywords: t.Sequence[str],
776    comment_tags: t.Sequence[str],
777    options: t.Dict[str, t.Any],
778) -> t.Iterator[
779    t.Tuple[
780        int, str, t.Union[t.Optional[str], t.Tuple[t.Optional[str], ...]], t.List[str]
781    ]
782]:
783    """Babel extraction method for Jinja templates.
784
785    .. versionchanged:: 2.3
786       Basic support for translation comments was added.  If `comment_tags`
787       is now set to a list of keywords for extraction, the extractor will
788       try to find the best preceding comment that begins with one of the
789       keywords.  For best results, make sure to not have more than one
790       gettext call in one line of code and the matching comment in the
791       same line or the line before.
792
793    .. versionchanged:: 2.5.1
794       The `newstyle_gettext` flag can be set to `True` to enable newstyle
795       gettext calls.
796
797    .. versionchanged:: 2.7
798       A `silent` option can now be provided.  If set to `False` template
799       syntax errors are propagated instead of being ignored.
800
801    :param fileobj: the file-like object the messages should be extracted from
802    :param keywords: a list of keywords (i.e. function names) that should be
803                     recognized as translation functions
804    :param comment_tags: a list of translator tags to search for and include
805                         in the results.
806    :param options: a dictionary of additional options (optional)
807    :return: an iterator over ``(lineno, funcname, message, comments)`` tuples.
808             (comments will be empty currently)
809    """
810    extensions: t.Dict[t.Type[Extension], None] = {}
811
812    for extension_name in options.get("extensions", "").split(","):
813        extension_name = extension_name.strip()
814
815        if not extension_name:
816            continue
817
818        extensions[import_string(extension_name)] = None
819
820    if InternationalizationExtension not in extensions:
821        extensions[InternationalizationExtension] = None
822
823    def getbool(options: t.Mapping[str, str], key: str, default: bool = False) -> bool:
824        return options.get(key, str(default)).lower() in {"1", "on", "yes", "true"}
825
826    silent = getbool(options, "silent", True)
827    environment = Environment(
828        options.get("block_start_string", defaults.BLOCK_START_STRING),
829        options.get("block_end_string", defaults.BLOCK_END_STRING),
830        options.get("variable_start_string", defaults.VARIABLE_START_STRING),
831        options.get("variable_end_string", defaults.VARIABLE_END_STRING),
832        options.get("comment_start_string", defaults.COMMENT_START_STRING),
833        options.get("comment_end_string", defaults.COMMENT_END_STRING),
834        options.get("line_statement_prefix") or defaults.LINE_STATEMENT_PREFIX,
835        options.get("line_comment_prefix") or defaults.LINE_COMMENT_PREFIX,
836        getbool(options, "trim_blocks", defaults.TRIM_BLOCKS),
837        getbool(options, "lstrip_blocks", defaults.LSTRIP_BLOCKS),
838        defaults.NEWLINE_SEQUENCE,
839        getbool(options, "keep_trailing_newline", defaults.KEEP_TRAILING_NEWLINE),
840        tuple(extensions),
841        cache_size=0,
842        auto_reload=False,
843    )
844
845    if getbool(options, "trimmed"):
846        environment.policies["ext.i18n.trimmed"] = True
847    if getbool(options, "newstyle_gettext"):
848        environment.newstyle_gettext = True  # type: ignore
849
850    source = fileobj.read().decode(options.get("encoding", "utf-8"))
851    try:
852        node = environment.parse(source)
853        tokens = list(environment.lex(environment.preprocess(source)))
854    except TemplateSyntaxError:
855        if not silent:
856            raise
857        # skip templates with syntax errors
858        return
859
860    finder = _CommentFinder(tokens, comment_tags)
861    for lineno, func, message in extract_from_ast(node, keywords):
862        yield lineno, func, message, finder.find_comments(lineno)
863
864
865#: nicer import names
866i18n = InternationalizationExtension
867do = ExprStmtExtension
868loopcontrols = LoopControlExtension
869debug = DebugExtension
870