17db96d56Sopenharmony_ci"""Filename matching with shell patterns. 27db96d56Sopenharmony_ci 37db96d56Sopenharmony_cifnmatch(FILENAME, PATTERN) matches according to the local convention. 47db96d56Sopenharmony_cifnmatchcase(FILENAME, PATTERN) always takes case in account. 57db96d56Sopenharmony_ci 67db96d56Sopenharmony_ciThe functions operate by translating the pattern into a regular 77db96d56Sopenharmony_ciexpression. They cache the compiled regular expressions for speed. 87db96d56Sopenharmony_ci 97db96d56Sopenharmony_ciThe function translate(PATTERN) returns a regular expression 107db96d56Sopenharmony_cicorresponding to PATTERN. (It does not compile it.) 117db96d56Sopenharmony_ci""" 127db96d56Sopenharmony_ciimport os 137db96d56Sopenharmony_ciimport posixpath 147db96d56Sopenharmony_ciimport re 157db96d56Sopenharmony_ciimport functools 167db96d56Sopenharmony_ci 177db96d56Sopenharmony_ci__all__ = ["filter", "fnmatch", "fnmatchcase", "translate"] 187db96d56Sopenharmony_ci 197db96d56Sopenharmony_cidef fnmatch(name, pat): 207db96d56Sopenharmony_ci """Test whether FILENAME matches PATTERN. 217db96d56Sopenharmony_ci 227db96d56Sopenharmony_ci Patterns are Unix shell style: 237db96d56Sopenharmony_ci 247db96d56Sopenharmony_ci * matches everything 257db96d56Sopenharmony_ci ? matches any single character 267db96d56Sopenharmony_ci [seq] matches any character in seq 277db96d56Sopenharmony_ci [!seq] matches any char not in seq 287db96d56Sopenharmony_ci 297db96d56Sopenharmony_ci An initial period in FILENAME is not special. 307db96d56Sopenharmony_ci Both FILENAME and PATTERN are first case-normalized 317db96d56Sopenharmony_ci if the operating system requires it. 327db96d56Sopenharmony_ci If you don't want this, use fnmatchcase(FILENAME, PATTERN). 337db96d56Sopenharmony_ci """ 347db96d56Sopenharmony_ci name = os.path.normcase(name) 357db96d56Sopenharmony_ci pat = os.path.normcase(pat) 367db96d56Sopenharmony_ci return fnmatchcase(name, pat) 377db96d56Sopenharmony_ci 387db96d56Sopenharmony_ci@functools.lru_cache(maxsize=32768, typed=True) 397db96d56Sopenharmony_cidef _compile_pattern(pat): 407db96d56Sopenharmony_ci if isinstance(pat, bytes): 417db96d56Sopenharmony_ci pat_str = str(pat, 'ISO-8859-1') 427db96d56Sopenharmony_ci res_str = translate(pat_str) 437db96d56Sopenharmony_ci res = bytes(res_str, 'ISO-8859-1') 447db96d56Sopenharmony_ci else: 457db96d56Sopenharmony_ci res = translate(pat) 467db96d56Sopenharmony_ci return re.compile(res).match 477db96d56Sopenharmony_ci 487db96d56Sopenharmony_cidef filter(names, pat): 497db96d56Sopenharmony_ci """Construct a list from those elements of the iterable NAMES that match PAT.""" 507db96d56Sopenharmony_ci result = [] 517db96d56Sopenharmony_ci pat = os.path.normcase(pat) 527db96d56Sopenharmony_ci match = _compile_pattern(pat) 537db96d56Sopenharmony_ci if os.path is posixpath: 547db96d56Sopenharmony_ci # normcase on posix is NOP. Optimize it away from the loop. 557db96d56Sopenharmony_ci for name in names: 567db96d56Sopenharmony_ci if match(name): 577db96d56Sopenharmony_ci result.append(name) 587db96d56Sopenharmony_ci else: 597db96d56Sopenharmony_ci for name in names: 607db96d56Sopenharmony_ci if match(os.path.normcase(name)): 617db96d56Sopenharmony_ci result.append(name) 627db96d56Sopenharmony_ci return result 637db96d56Sopenharmony_ci 647db96d56Sopenharmony_cidef fnmatchcase(name, pat): 657db96d56Sopenharmony_ci """Test whether FILENAME matches PATTERN, including case. 667db96d56Sopenharmony_ci 677db96d56Sopenharmony_ci This is a version of fnmatch() which doesn't case-normalize 687db96d56Sopenharmony_ci its arguments. 697db96d56Sopenharmony_ci """ 707db96d56Sopenharmony_ci match = _compile_pattern(pat) 717db96d56Sopenharmony_ci return match(name) is not None 727db96d56Sopenharmony_ci 737db96d56Sopenharmony_ci 747db96d56Sopenharmony_cidef translate(pat): 757db96d56Sopenharmony_ci """Translate a shell PATTERN to a regular expression. 767db96d56Sopenharmony_ci 777db96d56Sopenharmony_ci There is no way to quote meta-characters. 787db96d56Sopenharmony_ci """ 797db96d56Sopenharmony_ci 807db96d56Sopenharmony_ci STAR = object() 817db96d56Sopenharmony_ci res = [] 827db96d56Sopenharmony_ci add = res.append 837db96d56Sopenharmony_ci i, n = 0, len(pat) 847db96d56Sopenharmony_ci while i < n: 857db96d56Sopenharmony_ci c = pat[i] 867db96d56Sopenharmony_ci i = i+1 877db96d56Sopenharmony_ci if c == '*': 887db96d56Sopenharmony_ci # compress consecutive `*` into one 897db96d56Sopenharmony_ci if (not res) or res[-1] is not STAR: 907db96d56Sopenharmony_ci add(STAR) 917db96d56Sopenharmony_ci elif c == '?': 927db96d56Sopenharmony_ci add('.') 937db96d56Sopenharmony_ci elif c == '[': 947db96d56Sopenharmony_ci j = i 957db96d56Sopenharmony_ci if j < n and pat[j] == '!': 967db96d56Sopenharmony_ci j = j+1 977db96d56Sopenharmony_ci if j < n and pat[j] == ']': 987db96d56Sopenharmony_ci j = j+1 997db96d56Sopenharmony_ci while j < n and pat[j] != ']': 1007db96d56Sopenharmony_ci j = j+1 1017db96d56Sopenharmony_ci if j >= n: 1027db96d56Sopenharmony_ci add('\\[') 1037db96d56Sopenharmony_ci else: 1047db96d56Sopenharmony_ci stuff = pat[i:j] 1057db96d56Sopenharmony_ci if '-' not in stuff: 1067db96d56Sopenharmony_ci stuff = stuff.replace('\\', r'\\') 1077db96d56Sopenharmony_ci else: 1087db96d56Sopenharmony_ci chunks = [] 1097db96d56Sopenharmony_ci k = i+2 if pat[i] == '!' else i+1 1107db96d56Sopenharmony_ci while True: 1117db96d56Sopenharmony_ci k = pat.find('-', k, j) 1127db96d56Sopenharmony_ci if k < 0: 1137db96d56Sopenharmony_ci break 1147db96d56Sopenharmony_ci chunks.append(pat[i:k]) 1157db96d56Sopenharmony_ci i = k+1 1167db96d56Sopenharmony_ci k = k+3 1177db96d56Sopenharmony_ci chunk = pat[i:j] 1187db96d56Sopenharmony_ci if chunk: 1197db96d56Sopenharmony_ci chunks.append(chunk) 1207db96d56Sopenharmony_ci else: 1217db96d56Sopenharmony_ci chunks[-1] += '-' 1227db96d56Sopenharmony_ci # Remove empty ranges -- invalid in RE. 1237db96d56Sopenharmony_ci for k in range(len(chunks)-1, 0, -1): 1247db96d56Sopenharmony_ci if chunks[k-1][-1] > chunks[k][0]: 1257db96d56Sopenharmony_ci chunks[k-1] = chunks[k-1][:-1] + chunks[k][1:] 1267db96d56Sopenharmony_ci del chunks[k] 1277db96d56Sopenharmony_ci # Escape backslashes and hyphens for set difference (--). 1287db96d56Sopenharmony_ci # Hyphens that create ranges shouldn't be escaped. 1297db96d56Sopenharmony_ci stuff = '-'.join(s.replace('\\', r'\\').replace('-', r'\-') 1307db96d56Sopenharmony_ci for s in chunks) 1317db96d56Sopenharmony_ci # Escape set operations (&&, ~~ and ||). 1327db96d56Sopenharmony_ci stuff = re.sub(r'([&~|])', r'\\\1', stuff) 1337db96d56Sopenharmony_ci i = j+1 1347db96d56Sopenharmony_ci if not stuff: 1357db96d56Sopenharmony_ci # Empty range: never match. 1367db96d56Sopenharmony_ci add('(?!)') 1377db96d56Sopenharmony_ci elif stuff == '!': 1387db96d56Sopenharmony_ci # Negated empty range: match any character. 1397db96d56Sopenharmony_ci add('.') 1407db96d56Sopenharmony_ci else: 1417db96d56Sopenharmony_ci if stuff[0] == '!': 1427db96d56Sopenharmony_ci stuff = '^' + stuff[1:] 1437db96d56Sopenharmony_ci elif stuff[0] in ('^', '['): 1447db96d56Sopenharmony_ci stuff = '\\' + stuff 1457db96d56Sopenharmony_ci add(f'[{stuff}]') 1467db96d56Sopenharmony_ci else: 1477db96d56Sopenharmony_ci add(re.escape(c)) 1487db96d56Sopenharmony_ci assert i == n 1497db96d56Sopenharmony_ci 1507db96d56Sopenharmony_ci # Deal with STARs. 1517db96d56Sopenharmony_ci inp = res 1527db96d56Sopenharmony_ci res = [] 1537db96d56Sopenharmony_ci add = res.append 1547db96d56Sopenharmony_ci i, n = 0, len(inp) 1557db96d56Sopenharmony_ci # Fixed pieces at the start? 1567db96d56Sopenharmony_ci while i < n and inp[i] is not STAR: 1577db96d56Sopenharmony_ci add(inp[i]) 1587db96d56Sopenharmony_ci i += 1 1597db96d56Sopenharmony_ci # Now deal with STAR fixed STAR fixed ... 1607db96d56Sopenharmony_ci # For an interior `STAR fixed` pairing, we want to do a minimal 1617db96d56Sopenharmony_ci # .*? match followed by `fixed`, with no possibility of backtracking. 1627db96d56Sopenharmony_ci # Atomic groups ("(?>...)") allow us to spell that directly. 1637db96d56Sopenharmony_ci # Note: people rely on the undocumented ability to join multiple 1647db96d56Sopenharmony_ci # translate() results together via "|" to build large regexps matching 1657db96d56Sopenharmony_ci # "one of many" shell patterns. 1667db96d56Sopenharmony_ci while i < n: 1677db96d56Sopenharmony_ci assert inp[i] is STAR 1687db96d56Sopenharmony_ci i += 1 1697db96d56Sopenharmony_ci if i == n: 1707db96d56Sopenharmony_ci add(".*") 1717db96d56Sopenharmony_ci break 1727db96d56Sopenharmony_ci assert inp[i] is not STAR 1737db96d56Sopenharmony_ci fixed = [] 1747db96d56Sopenharmony_ci while i < n and inp[i] is not STAR: 1757db96d56Sopenharmony_ci fixed.append(inp[i]) 1767db96d56Sopenharmony_ci i += 1 1777db96d56Sopenharmony_ci fixed = "".join(fixed) 1787db96d56Sopenharmony_ci if i == n: 1797db96d56Sopenharmony_ci add(".*") 1807db96d56Sopenharmony_ci add(fixed) 1817db96d56Sopenharmony_ci else: 1827db96d56Sopenharmony_ci add(f"(?>.*?{fixed})") 1837db96d56Sopenharmony_ci assert i == n 1847db96d56Sopenharmony_ci res = "".join(res) 1857db96d56Sopenharmony_ci return fr'(?s:{res})\Z' 186