17db96d56Sopenharmony_ci#
27db96d56Sopenharmony_ci# Secret Labs' Regular Expression Engine
37db96d56Sopenharmony_ci#
47db96d56Sopenharmony_ci# re-compatible interface for the sre matching engine
57db96d56Sopenharmony_ci#
67db96d56Sopenharmony_ci# Copyright (c) 1998-2001 by Secret Labs AB.  All rights reserved.
77db96d56Sopenharmony_ci#
87db96d56Sopenharmony_ci# This version of the SRE library can be redistributed under CNRI's
97db96d56Sopenharmony_ci# Python 1.6 license.  For any other use, please contact Secret Labs
107db96d56Sopenharmony_ci# AB (info@pythonware.com).
117db96d56Sopenharmony_ci#
127db96d56Sopenharmony_ci# Portions of this engine have been developed in cooperation with
137db96d56Sopenharmony_ci# CNRI.  Hewlett-Packard provided funding for 1.6 integration and
147db96d56Sopenharmony_ci# other compatibility work.
157db96d56Sopenharmony_ci#
167db96d56Sopenharmony_ci
177db96d56Sopenharmony_cir"""Support for regular expressions (RE).
187db96d56Sopenharmony_ci
197db96d56Sopenharmony_ciThis module provides regular expression matching operations similar to
207db96d56Sopenharmony_cithose found in Perl.  It supports both 8-bit and Unicode strings; both
217db96d56Sopenharmony_cithe pattern and the strings being processed can contain null bytes and
227db96d56Sopenharmony_cicharacters outside the US ASCII range.
237db96d56Sopenharmony_ci
247db96d56Sopenharmony_ciRegular expressions can contain both special and ordinary characters.
257db96d56Sopenharmony_ciMost ordinary characters, like "A", "a", or "0", are the simplest
267db96d56Sopenharmony_ciregular expressions; they simply match themselves.  You can
277db96d56Sopenharmony_ciconcatenate ordinary characters, so last matches the string 'last'.
287db96d56Sopenharmony_ci
297db96d56Sopenharmony_ciThe special characters are:
307db96d56Sopenharmony_ci    "."      Matches any character except a newline.
317db96d56Sopenharmony_ci    "^"      Matches the start of the string.
327db96d56Sopenharmony_ci    "$"      Matches the end of the string or just before the newline at
337db96d56Sopenharmony_ci             the end of the string.
347db96d56Sopenharmony_ci    "*"      Matches 0 or more (greedy) repetitions of the preceding RE.
357db96d56Sopenharmony_ci             Greedy means that it will match as many repetitions as possible.
367db96d56Sopenharmony_ci    "+"      Matches 1 or more (greedy) repetitions of the preceding RE.
377db96d56Sopenharmony_ci    "?"      Matches 0 or 1 (greedy) of the preceding RE.
387db96d56Sopenharmony_ci    *?,+?,?? Non-greedy versions of the previous three special characters.
397db96d56Sopenharmony_ci    {m,n}    Matches from m to n repetitions of the preceding RE.
407db96d56Sopenharmony_ci    {m,n}?   Non-greedy version of the above.
417db96d56Sopenharmony_ci    "\\"     Either escapes special characters or signals a special sequence.
427db96d56Sopenharmony_ci    []       Indicates a set of characters.
437db96d56Sopenharmony_ci             A "^" as the first character indicates a complementing set.
447db96d56Sopenharmony_ci    "|"      A|B, creates an RE that will match either A or B.
457db96d56Sopenharmony_ci    (...)    Matches the RE inside the parentheses.
467db96d56Sopenharmony_ci             The contents can be retrieved or matched later in the string.
477db96d56Sopenharmony_ci    (?aiLmsux) The letters set the corresponding flags defined below.
487db96d56Sopenharmony_ci    (?:...)  Non-grouping version of regular parentheses.
497db96d56Sopenharmony_ci    (?P<name>...) The substring matched by the group is accessible by name.
507db96d56Sopenharmony_ci    (?P=name)     Matches the text matched earlier by the group named name.
517db96d56Sopenharmony_ci    (?#...)  A comment; ignored.
527db96d56Sopenharmony_ci    (?=...)  Matches if ... matches next, but doesn't consume the string.
537db96d56Sopenharmony_ci    (?!...)  Matches if ... doesn't match next.
547db96d56Sopenharmony_ci    (?<=...) Matches if preceded by ... (must be fixed length).
557db96d56Sopenharmony_ci    (?<!...) Matches if not preceded by ... (must be fixed length).
567db96d56Sopenharmony_ci    (?(id/name)yes|no) Matches yes pattern if the group with id/name matched,
577db96d56Sopenharmony_ci                       the (optional) no pattern otherwise.
587db96d56Sopenharmony_ci
597db96d56Sopenharmony_ciThe special sequences consist of "\\" and a character from the list
607db96d56Sopenharmony_cibelow.  If the ordinary character is not on the list, then the
617db96d56Sopenharmony_ciresulting RE will match the second character.
627db96d56Sopenharmony_ci    \number  Matches the contents of the group of the same number.
637db96d56Sopenharmony_ci    \A       Matches only at the start of the string.
647db96d56Sopenharmony_ci    \Z       Matches only at the end of the string.
657db96d56Sopenharmony_ci    \b       Matches the empty string, but only at the start or end of a word.
667db96d56Sopenharmony_ci    \B       Matches the empty string, but not at the start or end of a word.
677db96d56Sopenharmony_ci    \d       Matches any decimal digit; equivalent to the set [0-9] in
687db96d56Sopenharmony_ci             bytes patterns or string patterns with the ASCII flag.
697db96d56Sopenharmony_ci             In string patterns without the ASCII flag, it will match the whole
707db96d56Sopenharmony_ci             range of Unicode digits.
717db96d56Sopenharmony_ci    \D       Matches any non-digit character; equivalent to [^\d].
727db96d56Sopenharmony_ci    \s       Matches any whitespace character; equivalent to [ \t\n\r\f\v] in
737db96d56Sopenharmony_ci             bytes patterns or string patterns with the ASCII flag.
747db96d56Sopenharmony_ci             In string patterns without the ASCII flag, it will match the whole
757db96d56Sopenharmony_ci             range of Unicode whitespace characters.
767db96d56Sopenharmony_ci    \S       Matches any non-whitespace character; equivalent to [^\s].
777db96d56Sopenharmony_ci    \w       Matches any alphanumeric character; equivalent to [a-zA-Z0-9_]
787db96d56Sopenharmony_ci             in bytes patterns or string patterns with the ASCII flag.
797db96d56Sopenharmony_ci             In string patterns without the ASCII flag, it will match the
807db96d56Sopenharmony_ci             range of Unicode alphanumeric characters (letters plus digits
817db96d56Sopenharmony_ci             plus underscore).
827db96d56Sopenharmony_ci             With LOCALE, it will match the set [0-9_] plus characters defined
837db96d56Sopenharmony_ci             as letters for the current locale.
847db96d56Sopenharmony_ci    \W       Matches the complement of \w.
857db96d56Sopenharmony_ci    \\       Matches a literal backslash.
867db96d56Sopenharmony_ci
877db96d56Sopenharmony_ciThis module exports the following functions:
887db96d56Sopenharmony_ci    match     Match a regular expression pattern to the beginning of a string.
897db96d56Sopenharmony_ci    fullmatch Match a regular expression pattern to all of a string.
907db96d56Sopenharmony_ci    search    Search a string for the presence of a pattern.
917db96d56Sopenharmony_ci    sub       Substitute occurrences of a pattern found in a string.
927db96d56Sopenharmony_ci    subn      Same as sub, but also return the number of substitutions made.
937db96d56Sopenharmony_ci    split     Split a string by the occurrences of a pattern.
947db96d56Sopenharmony_ci    findall   Find all occurrences of a pattern in a string.
957db96d56Sopenharmony_ci    finditer  Return an iterator yielding a Match object for each match.
967db96d56Sopenharmony_ci    compile   Compile a pattern into a Pattern object.
977db96d56Sopenharmony_ci    purge     Clear the regular expression cache.
987db96d56Sopenharmony_ci    escape    Backslash all non-alphanumerics in a string.
997db96d56Sopenharmony_ci
1007db96d56Sopenharmony_ciEach function other than purge and escape can take an optional 'flags' argument
1017db96d56Sopenharmony_ciconsisting of one or more of the following module constants, joined by "|".
1027db96d56Sopenharmony_ciA, L, and U are mutually exclusive.
1037db96d56Sopenharmony_ci    A  ASCII       For string patterns, make \w, \W, \b, \B, \d, \D
1047db96d56Sopenharmony_ci                   match the corresponding ASCII character categories
1057db96d56Sopenharmony_ci                   (rather than the whole Unicode categories, which is the
1067db96d56Sopenharmony_ci                   default).
1077db96d56Sopenharmony_ci                   For bytes patterns, this flag is the only available
1087db96d56Sopenharmony_ci                   behaviour and needn't be specified.
1097db96d56Sopenharmony_ci    I  IGNORECASE  Perform case-insensitive matching.
1107db96d56Sopenharmony_ci    L  LOCALE      Make \w, \W, \b, \B, dependent on the current locale.
1117db96d56Sopenharmony_ci    M  MULTILINE   "^" matches the beginning of lines (after a newline)
1127db96d56Sopenharmony_ci                   as well as the string.
1137db96d56Sopenharmony_ci                   "$" matches the end of lines (before a newline) as well
1147db96d56Sopenharmony_ci                   as the end of the string.
1157db96d56Sopenharmony_ci    S  DOTALL      "." matches any character at all, including the newline.
1167db96d56Sopenharmony_ci    X  VERBOSE     Ignore whitespace and comments for nicer looking RE's.
1177db96d56Sopenharmony_ci    U  UNICODE     For compatibility only. Ignored for string patterns (it
1187db96d56Sopenharmony_ci                   is the default), and forbidden for bytes patterns.
1197db96d56Sopenharmony_ci
1207db96d56Sopenharmony_ciThis module also defines an exception 'error'.
1217db96d56Sopenharmony_ci
1227db96d56Sopenharmony_ci"""
1237db96d56Sopenharmony_ci
1247db96d56Sopenharmony_ciimport enum
1257db96d56Sopenharmony_cifrom . import _compiler, _parser
1267db96d56Sopenharmony_ciimport functools
1277db96d56Sopenharmony_ci
1287db96d56Sopenharmony_ci
1297db96d56Sopenharmony_ci# public symbols
1307db96d56Sopenharmony_ci__all__ = [
1317db96d56Sopenharmony_ci    "match", "fullmatch", "search", "sub", "subn", "split",
1327db96d56Sopenharmony_ci    "findall", "finditer", "compile", "purge", "template", "escape",
1337db96d56Sopenharmony_ci    "error", "Pattern", "Match", "A", "I", "L", "M", "S", "X", "U",
1347db96d56Sopenharmony_ci    "ASCII", "IGNORECASE", "LOCALE", "MULTILINE", "DOTALL", "VERBOSE",
1357db96d56Sopenharmony_ci    "UNICODE", "NOFLAG", "RegexFlag",
1367db96d56Sopenharmony_ci]
1377db96d56Sopenharmony_ci
1387db96d56Sopenharmony_ci__version__ = "2.2.1"
1397db96d56Sopenharmony_ci
1407db96d56Sopenharmony_ci@enum.global_enum
1417db96d56Sopenharmony_ci@enum._simple_enum(enum.IntFlag, boundary=enum.KEEP)
1427db96d56Sopenharmony_ciclass RegexFlag:
1437db96d56Sopenharmony_ci    NOFLAG = 0
1447db96d56Sopenharmony_ci    ASCII = A = _compiler.SRE_FLAG_ASCII # assume ascii "locale"
1457db96d56Sopenharmony_ci    IGNORECASE = I = _compiler.SRE_FLAG_IGNORECASE # ignore case
1467db96d56Sopenharmony_ci    LOCALE = L = _compiler.SRE_FLAG_LOCALE # assume current 8-bit locale
1477db96d56Sopenharmony_ci    UNICODE = U = _compiler.SRE_FLAG_UNICODE # assume unicode "locale"
1487db96d56Sopenharmony_ci    MULTILINE = M = _compiler.SRE_FLAG_MULTILINE # make anchors look for newline
1497db96d56Sopenharmony_ci    DOTALL = S = _compiler.SRE_FLAG_DOTALL # make dot match newline
1507db96d56Sopenharmony_ci    VERBOSE = X = _compiler.SRE_FLAG_VERBOSE # ignore whitespace and comments
1517db96d56Sopenharmony_ci    # sre extensions (experimental, don't rely on these)
1527db96d56Sopenharmony_ci    TEMPLATE = T = _compiler.SRE_FLAG_TEMPLATE # unknown purpose, deprecated
1537db96d56Sopenharmony_ci    DEBUG = _compiler.SRE_FLAG_DEBUG # dump pattern after compilation
1547db96d56Sopenharmony_ci    __str__ = object.__str__
1557db96d56Sopenharmony_ci    _numeric_repr_ = hex
1567db96d56Sopenharmony_ci
1577db96d56Sopenharmony_ci# sre exception
1587db96d56Sopenharmony_cierror = _compiler.error
1597db96d56Sopenharmony_ci
1607db96d56Sopenharmony_ci# --------------------------------------------------------------------
1617db96d56Sopenharmony_ci# public interface
1627db96d56Sopenharmony_ci
1637db96d56Sopenharmony_cidef match(pattern, string, flags=0):
1647db96d56Sopenharmony_ci    """Try to apply the pattern at the start of the string, returning
1657db96d56Sopenharmony_ci    a Match object, or None if no match was found."""
1667db96d56Sopenharmony_ci    return _compile(pattern, flags).match(string)
1677db96d56Sopenharmony_ci
1687db96d56Sopenharmony_cidef fullmatch(pattern, string, flags=0):
1697db96d56Sopenharmony_ci    """Try to apply the pattern to all of the string, returning
1707db96d56Sopenharmony_ci    a Match object, or None if no match was found."""
1717db96d56Sopenharmony_ci    return _compile(pattern, flags).fullmatch(string)
1727db96d56Sopenharmony_ci
1737db96d56Sopenharmony_cidef search(pattern, string, flags=0):
1747db96d56Sopenharmony_ci    """Scan through string looking for a match to the pattern, returning
1757db96d56Sopenharmony_ci    a Match object, or None if no match was found."""
1767db96d56Sopenharmony_ci    return _compile(pattern, flags).search(string)
1777db96d56Sopenharmony_ci
1787db96d56Sopenharmony_cidef sub(pattern, repl, string, count=0, flags=0):
1797db96d56Sopenharmony_ci    """Return the string obtained by replacing the leftmost
1807db96d56Sopenharmony_ci    non-overlapping occurrences of the pattern in string by the
1817db96d56Sopenharmony_ci    replacement repl.  repl can be either a string or a callable;
1827db96d56Sopenharmony_ci    if a string, backslash escapes in it are processed.  If it is
1837db96d56Sopenharmony_ci    a callable, it's passed the Match object and must return
1847db96d56Sopenharmony_ci    a replacement string to be used."""
1857db96d56Sopenharmony_ci    return _compile(pattern, flags).sub(repl, string, count)
1867db96d56Sopenharmony_ci
1877db96d56Sopenharmony_cidef subn(pattern, repl, string, count=0, flags=0):
1887db96d56Sopenharmony_ci    """Return a 2-tuple containing (new_string, number).
1897db96d56Sopenharmony_ci    new_string is the string obtained by replacing the leftmost
1907db96d56Sopenharmony_ci    non-overlapping occurrences of the pattern in the source
1917db96d56Sopenharmony_ci    string by the replacement repl.  number is the number of
1927db96d56Sopenharmony_ci    substitutions that were made. repl can be either a string or a
1937db96d56Sopenharmony_ci    callable; if a string, backslash escapes in it are processed.
1947db96d56Sopenharmony_ci    If it is a callable, it's passed the Match object and must
1957db96d56Sopenharmony_ci    return a replacement string to be used."""
1967db96d56Sopenharmony_ci    return _compile(pattern, flags).subn(repl, string, count)
1977db96d56Sopenharmony_ci
1987db96d56Sopenharmony_cidef split(pattern, string, maxsplit=0, flags=0):
1997db96d56Sopenharmony_ci    """Split the source string by the occurrences of the pattern,
2007db96d56Sopenharmony_ci    returning a list containing the resulting substrings.  If
2017db96d56Sopenharmony_ci    capturing parentheses are used in pattern, then the text of all
2027db96d56Sopenharmony_ci    groups in the pattern are also returned as part of the resulting
2037db96d56Sopenharmony_ci    list.  If maxsplit is nonzero, at most maxsplit splits occur,
2047db96d56Sopenharmony_ci    and the remainder of the string is returned as the final element
2057db96d56Sopenharmony_ci    of the list."""
2067db96d56Sopenharmony_ci    return _compile(pattern, flags).split(string, maxsplit)
2077db96d56Sopenharmony_ci
2087db96d56Sopenharmony_cidef findall(pattern, string, flags=0):
2097db96d56Sopenharmony_ci    """Return a list of all non-overlapping matches in the string.
2107db96d56Sopenharmony_ci
2117db96d56Sopenharmony_ci    If one or more capturing groups are present in the pattern, return
2127db96d56Sopenharmony_ci    a list of groups; this will be a list of tuples if the pattern
2137db96d56Sopenharmony_ci    has more than one group.
2147db96d56Sopenharmony_ci
2157db96d56Sopenharmony_ci    Empty matches are included in the result."""
2167db96d56Sopenharmony_ci    return _compile(pattern, flags).findall(string)
2177db96d56Sopenharmony_ci
2187db96d56Sopenharmony_cidef finditer(pattern, string, flags=0):
2197db96d56Sopenharmony_ci    """Return an iterator over all non-overlapping matches in the
2207db96d56Sopenharmony_ci    string.  For each match, the iterator returns a Match object.
2217db96d56Sopenharmony_ci
2227db96d56Sopenharmony_ci    Empty matches are included in the result."""
2237db96d56Sopenharmony_ci    return _compile(pattern, flags).finditer(string)
2247db96d56Sopenharmony_ci
2257db96d56Sopenharmony_cidef compile(pattern, flags=0):
2267db96d56Sopenharmony_ci    "Compile a regular expression pattern, returning a Pattern object."
2277db96d56Sopenharmony_ci    return _compile(pattern, flags)
2287db96d56Sopenharmony_ci
2297db96d56Sopenharmony_cidef purge():
2307db96d56Sopenharmony_ci    "Clear the regular expression caches"
2317db96d56Sopenharmony_ci    _cache.clear()
2327db96d56Sopenharmony_ci    _compile_repl.cache_clear()
2337db96d56Sopenharmony_ci
2347db96d56Sopenharmony_cidef template(pattern, flags=0):
2357db96d56Sopenharmony_ci    "Compile a template pattern, returning a Pattern object, deprecated"
2367db96d56Sopenharmony_ci    import warnings
2377db96d56Sopenharmony_ci    warnings.warn("The re.template() function is deprecated "
2387db96d56Sopenharmony_ci                  "as it is an undocumented function "
2397db96d56Sopenharmony_ci                  "without an obvious purpose. "
2407db96d56Sopenharmony_ci                  "Use re.compile() instead.",
2417db96d56Sopenharmony_ci                  DeprecationWarning)
2427db96d56Sopenharmony_ci    with warnings.catch_warnings():
2437db96d56Sopenharmony_ci        warnings.simplefilter("ignore", DeprecationWarning)  # warn just once
2447db96d56Sopenharmony_ci        return _compile(pattern, flags|T)
2457db96d56Sopenharmony_ci
2467db96d56Sopenharmony_ci# SPECIAL_CHARS
2477db96d56Sopenharmony_ci# closing ')', '}' and ']'
2487db96d56Sopenharmony_ci# '-' (a range in character set)
2497db96d56Sopenharmony_ci# '&', '~', (extended character set operations)
2507db96d56Sopenharmony_ci# '#' (comment) and WHITESPACE (ignored) in verbose mode
2517db96d56Sopenharmony_ci_special_chars_map = {i: '\\' + chr(i) for i in b'()[]{}?*+-|^$\\.&~# \t\n\r\v\f'}
2527db96d56Sopenharmony_ci
2537db96d56Sopenharmony_cidef escape(pattern):
2547db96d56Sopenharmony_ci    """
2557db96d56Sopenharmony_ci    Escape special characters in a string.
2567db96d56Sopenharmony_ci    """
2577db96d56Sopenharmony_ci    if isinstance(pattern, str):
2587db96d56Sopenharmony_ci        return pattern.translate(_special_chars_map)
2597db96d56Sopenharmony_ci    else:
2607db96d56Sopenharmony_ci        pattern = str(pattern, 'latin1')
2617db96d56Sopenharmony_ci        return pattern.translate(_special_chars_map).encode('latin1')
2627db96d56Sopenharmony_ci
2637db96d56Sopenharmony_ciPattern = type(_compiler.compile('', 0))
2647db96d56Sopenharmony_ciMatch = type(_compiler.compile('', 0).match(''))
2657db96d56Sopenharmony_ci
2667db96d56Sopenharmony_ci# --------------------------------------------------------------------
2677db96d56Sopenharmony_ci# internals
2687db96d56Sopenharmony_ci
2697db96d56Sopenharmony_ci_cache = {}  # ordered!
2707db96d56Sopenharmony_ci
2717db96d56Sopenharmony_ci_MAXCACHE = 512
2727db96d56Sopenharmony_cidef _compile(pattern, flags):
2737db96d56Sopenharmony_ci    # internal: compile pattern
2747db96d56Sopenharmony_ci    if isinstance(flags, RegexFlag):
2757db96d56Sopenharmony_ci        flags = flags.value
2767db96d56Sopenharmony_ci    try:
2777db96d56Sopenharmony_ci        return _cache[type(pattern), pattern, flags]
2787db96d56Sopenharmony_ci    except KeyError:
2797db96d56Sopenharmony_ci        pass
2807db96d56Sopenharmony_ci    if isinstance(pattern, Pattern):
2817db96d56Sopenharmony_ci        if flags:
2827db96d56Sopenharmony_ci            raise ValueError(
2837db96d56Sopenharmony_ci                "cannot process flags argument with a compiled pattern")
2847db96d56Sopenharmony_ci        return pattern
2857db96d56Sopenharmony_ci    if not _compiler.isstring(pattern):
2867db96d56Sopenharmony_ci        raise TypeError("first argument must be string or compiled pattern")
2877db96d56Sopenharmony_ci    if flags & T:
2887db96d56Sopenharmony_ci        import warnings
2897db96d56Sopenharmony_ci        warnings.warn("The re.TEMPLATE/re.T flag is deprecated "
2907db96d56Sopenharmony_ci                  "as it is an undocumented flag "
2917db96d56Sopenharmony_ci                  "without an obvious purpose. "
2927db96d56Sopenharmony_ci                  "Don't use it.",
2937db96d56Sopenharmony_ci                  DeprecationWarning)
2947db96d56Sopenharmony_ci    p = _compiler.compile(pattern, flags)
2957db96d56Sopenharmony_ci    if not (flags & DEBUG):
2967db96d56Sopenharmony_ci        if len(_cache) >= _MAXCACHE:
2977db96d56Sopenharmony_ci            # Drop the oldest item
2987db96d56Sopenharmony_ci            try:
2997db96d56Sopenharmony_ci                del _cache[next(iter(_cache))]
3007db96d56Sopenharmony_ci            except (StopIteration, RuntimeError, KeyError):
3017db96d56Sopenharmony_ci                pass
3027db96d56Sopenharmony_ci        _cache[type(pattern), pattern, flags] = p
3037db96d56Sopenharmony_ci    return p
3047db96d56Sopenharmony_ci
3057db96d56Sopenharmony_ci@functools.lru_cache(_MAXCACHE)
3067db96d56Sopenharmony_cidef _compile_repl(repl, pattern):
3077db96d56Sopenharmony_ci    # internal: compile replacement pattern
3087db96d56Sopenharmony_ci    return _parser.parse_template(repl, pattern)
3097db96d56Sopenharmony_ci
3107db96d56Sopenharmony_cidef _expand(pattern, match, template):
3117db96d56Sopenharmony_ci    # internal: Match.expand implementation hook
3127db96d56Sopenharmony_ci    template = _parser.parse_template(template, pattern)
3137db96d56Sopenharmony_ci    return _parser.expand_template(template, match)
3147db96d56Sopenharmony_ci
3157db96d56Sopenharmony_cidef _subx(pattern, template):
3167db96d56Sopenharmony_ci    # internal: Pattern.sub/subn implementation helper
3177db96d56Sopenharmony_ci    template = _compile_repl(template, pattern)
3187db96d56Sopenharmony_ci    if not template[0] and len(template[1]) == 1:
3197db96d56Sopenharmony_ci        # literal replacement
3207db96d56Sopenharmony_ci        return template[1][0]
3217db96d56Sopenharmony_ci    def filter(match, template=template):
3227db96d56Sopenharmony_ci        return _parser.expand_template(template, match)
3237db96d56Sopenharmony_ci    return filter
3247db96d56Sopenharmony_ci
3257db96d56Sopenharmony_ci# register myself for pickling
3267db96d56Sopenharmony_ci
3277db96d56Sopenharmony_ciimport copyreg
3287db96d56Sopenharmony_ci
3297db96d56Sopenharmony_cidef _pickle(p):
3307db96d56Sopenharmony_ci    return _compile, (p.pattern, p.flags)
3317db96d56Sopenharmony_ci
3327db96d56Sopenharmony_cicopyreg.pickle(Pattern, _pickle, _compile)
3337db96d56Sopenharmony_ci
3347db96d56Sopenharmony_ci# --------------------------------------------------------------------
3357db96d56Sopenharmony_ci# experimental stuff (see python-dev discussions for details)
3367db96d56Sopenharmony_ci
3377db96d56Sopenharmony_ciclass Scanner:
3387db96d56Sopenharmony_ci    def __init__(self, lexicon, flags=0):
3397db96d56Sopenharmony_ci        from ._constants import BRANCH, SUBPATTERN
3407db96d56Sopenharmony_ci        if isinstance(flags, RegexFlag):
3417db96d56Sopenharmony_ci            flags = flags.value
3427db96d56Sopenharmony_ci        self.lexicon = lexicon
3437db96d56Sopenharmony_ci        # combine phrases into a compound pattern
3447db96d56Sopenharmony_ci        p = []
3457db96d56Sopenharmony_ci        s = _parser.State()
3467db96d56Sopenharmony_ci        s.flags = flags
3477db96d56Sopenharmony_ci        for phrase, action in lexicon:
3487db96d56Sopenharmony_ci            gid = s.opengroup()
3497db96d56Sopenharmony_ci            p.append(_parser.SubPattern(s, [
3507db96d56Sopenharmony_ci                (SUBPATTERN, (gid, 0, 0, _parser.parse(phrase, flags))),
3517db96d56Sopenharmony_ci                ]))
3527db96d56Sopenharmony_ci            s.closegroup(gid, p[-1])
3537db96d56Sopenharmony_ci        p = _parser.SubPattern(s, [(BRANCH, (None, p))])
3547db96d56Sopenharmony_ci        self.scanner = _compiler.compile(p)
3557db96d56Sopenharmony_ci    def scan(self, string):
3567db96d56Sopenharmony_ci        result = []
3577db96d56Sopenharmony_ci        append = result.append
3587db96d56Sopenharmony_ci        match = self.scanner.scanner(string).match
3597db96d56Sopenharmony_ci        i = 0
3607db96d56Sopenharmony_ci        while True:
3617db96d56Sopenharmony_ci            m = match()
3627db96d56Sopenharmony_ci            if not m:
3637db96d56Sopenharmony_ci                break
3647db96d56Sopenharmony_ci            j = m.end()
3657db96d56Sopenharmony_ci            if i == j:
3667db96d56Sopenharmony_ci                break
3677db96d56Sopenharmony_ci            action = self.lexicon[m.lastindex-1][1]
3687db96d56Sopenharmony_ci            if callable(action):
3697db96d56Sopenharmony_ci                self.match = m
3707db96d56Sopenharmony_ci                action = action(self, m.group())
3717db96d56Sopenharmony_ci            if action is not None:
3727db96d56Sopenharmony_ci                append(action)
3737db96d56Sopenharmony_ci            i = j
3747db96d56Sopenharmony_ci        return result, string[i:]
375