17db96d56Sopenharmony_ci"""Filename matching with shell patterns.
27db96d56Sopenharmony_ci
37db96d56Sopenharmony_cifnmatch(FILENAME, PATTERN) matches according to the local convention.
47db96d56Sopenharmony_cifnmatchcase(FILENAME, PATTERN) always takes case in account.
57db96d56Sopenharmony_ci
67db96d56Sopenharmony_ciThe functions operate by translating the pattern into a regular
77db96d56Sopenharmony_ciexpression.  They cache the compiled regular expressions for speed.
87db96d56Sopenharmony_ci
97db96d56Sopenharmony_ciThe function translate(PATTERN) returns a regular expression
107db96d56Sopenharmony_cicorresponding to PATTERN.  (It does not compile it.)
117db96d56Sopenharmony_ci"""
127db96d56Sopenharmony_ciimport os
137db96d56Sopenharmony_ciimport posixpath
147db96d56Sopenharmony_ciimport re
157db96d56Sopenharmony_ciimport functools
167db96d56Sopenharmony_ci
177db96d56Sopenharmony_ci__all__ = ["filter", "fnmatch", "fnmatchcase", "translate"]
187db96d56Sopenharmony_ci
197db96d56Sopenharmony_cidef fnmatch(name, pat):
207db96d56Sopenharmony_ci    """Test whether FILENAME matches PATTERN.
217db96d56Sopenharmony_ci
227db96d56Sopenharmony_ci    Patterns are Unix shell style:
237db96d56Sopenharmony_ci
247db96d56Sopenharmony_ci    *       matches everything
257db96d56Sopenharmony_ci    ?       matches any single character
267db96d56Sopenharmony_ci    [seq]   matches any character in seq
277db96d56Sopenharmony_ci    [!seq]  matches any char not in seq
287db96d56Sopenharmony_ci
297db96d56Sopenharmony_ci    An initial period in FILENAME is not special.
307db96d56Sopenharmony_ci    Both FILENAME and PATTERN are first case-normalized
317db96d56Sopenharmony_ci    if the operating system requires it.
327db96d56Sopenharmony_ci    If you don't want this, use fnmatchcase(FILENAME, PATTERN).
337db96d56Sopenharmony_ci    """
347db96d56Sopenharmony_ci    name = os.path.normcase(name)
357db96d56Sopenharmony_ci    pat = os.path.normcase(pat)
367db96d56Sopenharmony_ci    return fnmatchcase(name, pat)
377db96d56Sopenharmony_ci
387db96d56Sopenharmony_ci@functools.lru_cache(maxsize=32768, typed=True)
397db96d56Sopenharmony_cidef _compile_pattern(pat):
407db96d56Sopenharmony_ci    if isinstance(pat, bytes):
417db96d56Sopenharmony_ci        pat_str = str(pat, 'ISO-8859-1')
427db96d56Sopenharmony_ci        res_str = translate(pat_str)
437db96d56Sopenharmony_ci        res = bytes(res_str, 'ISO-8859-1')
447db96d56Sopenharmony_ci    else:
457db96d56Sopenharmony_ci        res = translate(pat)
467db96d56Sopenharmony_ci    return re.compile(res).match
477db96d56Sopenharmony_ci
487db96d56Sopenharmony_cidef filter(names, pat):
497db96d56Sopenharmony_ci    """Construct a list from those elements of the iterable NAMES that match PAT."""
507db96d56Sopenharmony_ci    result = []
517db96d56Sopenharmony_ci    pat = os.path.normcase(pat)
527db96d56Sopenharmony_ci    match = _compile_pattern(pat)
537db96d56Sopenharmony_ci    if os.path is posixpath:
547db96d56Sopenharmony_ci        # normcase on posix is NOP. Optimize it away from the loop.
557db96d56Sopenharmony_ci        for name in names:
567db96d56Sopenharmony_ci            if match(name):
577db96d56Sopenharmony_ci                result.append(name)
587db96d56Sopenharmony_ci    else:
597db96d56Sopenharmony_ci        for name in names:
607db96d56Sopenharmony_ci            if match(os.path.normcase(name)):
617db96d56Sopenharmony_ci                result.append(name)
627db96d56Sopenharmony_ci    return result
637db96d56Sopenharmony_ci
647db96d56Sopenharmony_cidef fnmatchcase(name, pat):
657db96d56Sopenharmony_ci    """Test whether FILENAME matches PATTERN, including case.
667db96d56Sopenharmony_ci
677db96d56Sopenharmony_ci    This is a version of fnmatch() which doesn't case-normalize
687db96d56Sopenharmony_ci    its arguments.
697db96d56Sopenharmony_ci    """
707db96d56Sopenharmony_ci    match = _compile_pattern(pat)
717db96d56Sopenharmony_ci    return match(name) is not None
727db96d56Sopenharmony_ci
737db96d56Sopenharmony_ci
747db96d56Sopenharmony_cidef translate(pat):
757db96d56Sopenharmony_ci    """Translate a shell PATTERN to a regular expression.
767db96d56Sopenharmony_ci
777db96d56Sopenharmony_ci    There is no way to quote meta-characters.
787db96d56Sopenharmony_ci    """
797db96d56Sopenharmony_ci
807db96d56Sopenharmony_ci    STAR = object()
817db96d56Sopenharmony_ci    res = []
827db96d56Sopenharmony_ci    add = res.append
837db96d56Sopenharmony_ci    i, n = 0, len(pat)
847db96d56Sopenharmony_ci    while i < n:
857db96d56Sopenharmony_ci        c = pat[i]
867db96d56Sopenharmony_ci        i = i+1
877db96d56Sopenharmony_ci        if c == '*':
887db96d56Sopenharmony_ci            # compress consecutive `*` into one
897db96d56Sopenharmony_ci            if (not res) or res[-1] is not STAR:
907db96d56Sopenharmony_ci                add(STAR)
917db96d56Sopenharmony_ci        elif c == '?':
927db96d56Sopenharmony_ci            add('.')
937db96d56Sopenharmony_ci        elif c == '[':
947db96d56Sopenharmony_ci            j = i
957db96d56Sopenharmony_ci            if j < n and pat[j] == '!':
967db96d56Sopenharmony_ci                j = j+1
977db96d56Sopenharmony_ci            if j < n and pat[j] == ']':
987db96d56Sopenharmony_ci                j = j+1
997db96d56Sopenharmony_ci            while j < n and pat[j] != ']':
1007db96d56Sopenharmony_ci                j = j+1
1017db96d56Sopenharmony_ci            if j >= n:
1027db96d56Sopenharmony_ci                add('\\[')
1037db96d56Sopenharmony_ci            else:
1047db96d56Sopenharmony_ci                stuff = pat[i:j]
1057db96d56Sopenharmony_ci                if '-' not in stuff:
1067db96d56Sopenharmony_ci                    stuff = stuff.replace('\\', r'\\')
1077db96d56Sopenharmony_ci                else:
1087db96d56Sopenharmony_ci                    chunks = []
1097db96d56Sopenharmony_ci                    k = i+2 if pat[i] == '!' else i+1
1107db96d56Sopenharmony_ci                    while True:
1117db96d56Sopenharmony_ci                        k = pat.find('-', k, j)
1127db96d56Sopenharmony_ci                        if k < 0:
1137db96d56Sopenharmony_ci                            break
1147db96d56Sopenharmony_ci                        chunks.append(pat[i:k])
1157db96d56Sopenharmony_ci                        i = k+1
1167db96d56Sopenharmony_ci                        k = k+3
1177db96d56Sopenharmony_ci                    chunk = pat[i:j]
1187db96d56Sopenharmony_ci                    if chunk:
1197db96d56Sopenharmony_ci                        chunks.append(chunk)
1207db96d56Sopenharmony_ci                    else:
1217db96d56Sopenharmony_ci                        chunks[-1] += '-'
1227db96d56Sopenharmony_ci                    # Remove empty ranges -- invalid in RE.
1237db96d56Sopenharmony_ci                    for k in range(len(chunks)-1, 0, -1):
1247db96d56Sopenharmony_ci                        if chunks[k-1][-1] > chunks[k][0]:
1257db96d56Sopenharmony_ci                            chunks[k-1] = chunks[k-1][:-1] + chunks[k][1:]
1267db96d56Sopenharmony_ci                            del chunks[k]
1277db96d56Sopenharmony_ci                    # Escape backslashes and hyphens for set difference (--).
1287db96d56Sopenharmony_ci                    # Hyphens that create ranges shouldn't be escaped.
1297db96d56Sopenharmony_ci                    stuff = '-'.join(s.replace('\\', r'\\').replace('-', r'\-')
1307db96d56Sopenharmony_ci                                     for s in chunks)
1317db96d56Sopenharmony_ci                # Escape set operations (&&, ~~ and ||).
1327db96d56Sopenharmony_ci                stuff = re.sub(r'([&~|])', r'\\\1', stuff)
1337db96d56Sopenharmony_ci                i = j+1
1347db96d56Sopenharmony_ci                if not stuff:
1357db96d56Sopenharmony_ci                    # Empty range: never match.
1367db96d56Sopenharmony_ci                    add('(?!)')
1377db96d56Sopenharmony_ci                elif stuff == '!':
1387db96d56Sopenharmony_ci                    # Negated empty range: match any character.
1397db96d56Sopenharmony_ci                    add('.')
1407db96d56Sopenharmony_ci                else:
1417db96d56Sopenharmony_ci                    if stuff[0] == '!':
1427db96d56Sopenharmony_ci                        stuff = '^' + stuff[1:]
1437db96d56Sopenharmony_ci                    elif stuff[0] in ('^', '['):
1447db96d56Sopenharmony_ci                        stuff = '\\' + stuff
1457db96d56Sopenharmony_ci                    add(f'[{stuff}]')
1467db96d56Sopenharmony_ci        else:
1477db96d56Sopenharmony_ci            add(re.escape(c))
1487db96d56Sopenharmony_ci    assert i == n
1497db96d56Sopenharmony_ci
1507db96d56Sopenharmony_ci    # Deal with STARs.
1517db96d56Sopenharmony_ci    inp = res
1527db96d56Sopenharmony_ci    res = []
1537db96d56Sopenharmony_ci    add = res.append
1547db96d56Sopenharmony_ci    i, n = 0, len(inp)
1557db96d56Sopenharmony_ci    # Fixed pieces at the start?
1567db96d56Sopenharmony_ci    while i < n and inp[i] is not STAR:
1577db96d56Sopenharmony_ci        add(inp[i])
1587db96d56Sopenharmony_ci        i += 1
1597db96d56Sopenharmony_ci    # Now deal with STAR fixed STAR fixed ...
1607db96d56Sopenharmony_ci    # For an interior `STAR fixed` pairing, we want to do a minimal
1617db96d56Sopenharmony_ci    # .*? match followed by `fixed`, with no possibility of backtracking.
1627db96d56Sopenharmony_ci    # Atomic groups ("(?>...)") allow us to spell that directly.
1637db96d56Sopenharmony_ci    # Note: people rely on the undocumented ability to join multiple
1647db96d56Sopenharmony_ci    # translate() results together via "|" to build large regexps matching
1657db96d56Sopenharmony_ci    # "one of many" shell patterns.
1667db96d56Sopenharmony_ci    while i < n:
1677db96d56Sopenharmony_ci        assert inp[i] is STAR
1687db96d56Sopenharmony_ci        i += 1
1697db96d56Sopenharmony_ci        if i == n:
1707db96d56Sopenharmony_ci            add(".*")
1717db96d56Sopenharmony_ci            break
1727db96d56Sopenharmony_ci        assert inp[i] is not STAR
1737db96d56Sopenharmony_ci        fixed = []
1747db96d56Sopenharmony_ci        while i < n and inp[i] is not STAR:
1757db96d56Sopenharmony_ci            fixed.append(inp[i])
1767db96d56Sopenharmony_ci            i += 1
1777db96d56Sopenharmony_ci        fixed = "".join(fixed)
1787db96d56Sopenharmony_ci        if i == n:
1797db96d56Sopenharmony_ci            add(".*")
1807db96d56Sopenharmony_ci            add(fixed)
1817db96d56Sopenharmony_ci        else:
1827db96d56Sopenharmony_ci            add(f"(?>.*?{fixed})")
1837db96d56Sopenharmony_ci    assert i == n
1847db96d56Sopenharmony_ci    res = "".join(res)
1857db96d56Sopenharmony_ci    return fr'(?s:{res})\Z'
186