17db96d56Sopenharmony_cifrom test.support import (gc_collect, bigmemtest, _2G, 27db96d56Sopenharmony_ci cpython_only, captured_stdout, 37db96d56Sopenharmony_ci check_disallow_instantiation, is_emscripten, is_wasi, 47db96d56Sopenharmony_ci SHORT_TIMEOUT) 57db96d56Sopenharmony_ciimport locale 67db96d56Sopenharmony_ciimport re 77db96d56Sopenharmony_ciimport string 87db96d56Sopenharmony_ciimport sys 97db96d56Sopenharmony_ciimport time 107db96d56Sopenharmony_ciimport unittest 117db96d56Sopenharmony_ciimport warnings 127db96d56Sopenharmony_cifrom re import Scanner 137db96d56Sopenharmony_cifrom weakref import proxy 147db96d56Sopenharmony_ci 157db96d56Sopenharmony_ci# some platforms lack working multiprocessing 167db96d56Sopenharmony_citry: 177db96d56Sopenharmony_ci import _multiprocessing 187db96d56Sopenharmony_ciexcept ImportError: 197db96d56Sopenharmony_ci multiprocessing = None 207db96d56Sopenharmony_cielse: 217db96d56Sopenharmony_ci import multiprocessing 227db96d56Sopenharmony_ci 237db96d56Sopenharmony_ci# Misc tests from Tim Peters' re.doc 247db96d56Sopenharmony_ci 257db96d56Sopenharmony_ci# WARNING: Don't change details in these tests if you don't know 267db96d56Sopenharmony_ci# what you're doing. Some of these tests were carefully modeled to 277db96d56Sopenharmony_ci# cover most of the code. 287db96d56Sopenharmony_ci 297db96d56Sopenharmony_ciclass S(str): 307db96d56Sopenharmony_ci def __getitem__(self, index): 317db96d56Sopenharmony_ci return S(super().__getitem__(index)) 327db96d56Sopenharmony_ci 337db96d56Sopenharmony_ciclass B(bytes): 347db96d56Sopenharmony_ci def __getitem__(self, index): 357db96d56Sopenharmony_ci return B(super().__getitem__(index)) 367db96d56Sopenharmony_ci 377db96d56Sopenharmony_ciclass ReTests(unittest.TestCase): 387db96d56Sopenharmony_ci 397db96d56Sopenharmony_ci def assertTypedEqual(self, actual, expect, msg=None): 407db96d56Sopenharmony_ci self.assertEqual(actual, expect, msg) 417db96d56Sopenharmony_ci def recurse(actual, expect): 427db96d56Sopenharmony_ci if isinstance(expect, (tuple, list)): 437db96d56Sopenharmony_ci for x, y in zip(actual, expect): 447db96d56Sopenharmony_ci recurse(x, y) 457db96d56Sopenharmony_ci else: 467db96d56Sopenharmony_ci self.assertIs(type(actual), type(expect), msg) 477db96d56Sopenharmony_ci recurse(actual, expect) 487db96d56Sopenharmony_ci 497db96d56Sopenharmony_ci def checkPatternError(self, pattern, errmsg, pos=None): 507db96d56Sopenharmony_ci with self.assertRaises(re.error) as cm: 517db96d56Sopenharmony_ci re.compile(pattern) 527db96d56Sopenharmony_ci with self.subTest(pattern=pattern): 537db96d56Sopenharmony_ci err = cm.exception 547db96d56Sopenharmony_ci self.assertEqual(err.msg, errmsg) 557db96d56Sopenharmony_ci if pos is not None: 567db96d56Sopenharmony_ci self.assertEqual(err.pos, pos) 577db96d56Sopenharmony_ci 587db96d56Sopenharmony_ci def checkTemplateError(self, pattern, repl, string, errmsg, pos=None): 597db96d56Sopenharmony_ci with self.assertRaises(re.error) as cm: 607db96d56Sopenharmony_ci re.sub(pattern, repl, string) 617db96d56Sopenharmony_ci with self.subTest(pattern=pattern, repl=repl): 627db96d56Sopenharmony_ci err = cm.exception 637db96d56Sopenharmony_ci self.assertEqual(err.msg, errmsg) 647db96d56Sopenharmony_ci if pos is not None: 657db96d56Sopenharmony_ci self.assertEqual(err.pos, pos) 667db96d56Sopenharmony_ci 677db96d56Sopenharmony_ci def test_keep_buffer(self): 687db96d56Sopenharmony_ci # See bug 14212 697db96d56Sopenharmony_ci b = bytearray(b'x') 707db96d56Sopenharmony_ci it = re.finditer(b'a', b) 717db96d56Sopenharmony_ci with self.assertRaises(BufferError): 727db96d56Sopenharmony_ci b.extend(b'x'*400) 737db96d56Sopenharmony_ci list(it) 747db96d56Sopenharmony_ci del it 757db96d56Sopenharmony_ci gc_collect() 767db96d56Sopenharmony_ci b.extend(b'x'*400) 777db96d56Sopenharmony_ci 787db96d56Sopenharmony_ci def test_weakref(self): 797db96d56Sopenharmony_ci s = 'QabbbcR' 807db96d56Sopenharmony_ci x = re.compile('ab+c') 817db96d56Sopenharmony_ci y = proxy(x) 827db96d56Sopenharmony_ci self.assertEqual(x.findall('QabbbcR'), y.findall('QabbbcR')) 837db96d56Sopenharmony_ci 847db96d56Sopenharmony_ci def test_search_star_plus(self): 857db96d56Sopenharmony_ci self.assertEqual(re.search('x*', 'axx').span(0), (0, 0)) 867db96d56Sopenharmony_ci self.assertEqual(re.search('x*', 'axx').span(), (0, 0)) 877db96d56Sopenharmony_ci self.assertEqual(re.search('x+', 'axx').span(0), (1, 3)) 887db96d56Sopenharmony_ci self.assertEqual(re.search('x+', 'axx').span(), (1, 3)) 897db96d56Sopenharmony_ci self.assertIsNone(re.search('x', 'aaa')) 907db96d56Sopenharmony_ci self.assertEqual(re.match('a*', 'xxx').span(0), (0, 0)) 917db96d56Sopenharmony_ci self.assertEqual(re.match('a*', 'xxx').span(), (0, 0)) 927db96d56Sopenharmony_ci self.assertEqual(re.match('x*', 'xxxa').span(0), (0, 3)) 937db96d56Sopenharmony_ci self.assertEqual(re.match('x*', 'xxxa').span(), (0, 3)) 947db96d56Sopenharmony_ci self.assertIsNone(re.match('a+', 'xxx')) 957db96d56Sopenharmony_ci 967db96d56Sopenharmony_ci def test_branching(self): 977db96d56Sopenharmony_ci """Test Branching 987db96d56Sopenharmony_ci Test expressions using the OR ('|') operator.""" 997db96d56Sopenharmony_ci self.assertEqual(re.match('(ab|ba)', 'ab').span(), (0, 2)) 1007db96d56Sopenharmony_ci self.assertEqual(re.match('(ab|ba)', 'ba').span(), (0, 2)) 1017db96d56Sopenharmony_ci self.assertEqual(re.match('(abc|bac|ca|cb)', 'abc').span(), 1027db96d56Sopenharmony_ci (0, 3)) 1037db96d56Sopenharmony_ci self.assertEqual(re.match('(abc|bac|ca|cb)', 'bac').span(), 1047db96d56Sopenharmony_ci (0, 3)) 1057db96d56Sopenharmony_ci self.assertEqual(re.match('(abc|bac|ca|cb)', 'ca').span(), 1067db96d56Sopenharmony_ci (0, 2)) 1077db96d56Sopenharmony_ci self.assertEqual(re.match('(abc|bac|ca|cb)', 'cb').span(), 1087db96d56Sopenharmony_ci (0, 2)) 1097db96d56Sopenharmony_ci self.assertEqual(re.match('((a)|(b)|(c))', 'a').span(), (0, 1)) 1107db96d56Sopenharmony_ci self.assertEqual(re.match('((a)|(b)|(c))', 'b').span(), (0, 1)) 1117db96d56Sopenharmony_ci self.assertEqual(re.match('((a)|(b)|(c))', 'c').span(), (0, 1)) 1127db96d56Sopenharmony_ci 1137db96d56Sopenharmony_ci def bump_num(self, matchobj): 1147db96d56Sopenharmony_ci int_value = int(matchobj.group(0)) 1157db96d56Sopenharmony_ci return str(int_value + 1) 1167db96d56Sopenharmony_ci 1177db96d56Sopenharmony_ci def test_basic_re_sub(self): 1187db96d56Sopenharmony_ci self.assertTypedEqual(re.sub('y', 'a', 'xyz'), 'xaz') 1197db96d56Sopenharmony_ci self.assertTypedEqual(re.sub('y', S('a'), S('xyz')), 'xaz') 1207db96d56Sopenharmony_ci self.assertTypedEqual(re.sub(b'y', b'a', b'xyz'), b'xaz') 1217db96d56Sopenharmony_ci self.assertTypedEqual(re.sub(b'y', B(b'a'), B(b'xyz')), b'xaz') 1227db96d56Sopenharmony_ci self.assertTypedEqual(re.sub(b'y', bytearray(b'a'), bytearray(b'xyz')), b'xaz') 1237db96d56Sopenharmony_ci self.assertTypedEqual(re.sub(b'y', memoryview(b'a'), memoryview(b'xyz')), b'xaz') 1247db96d56Sopenharmony_ci for y in ("\xe0", "\u0430", "\U0001d49c"): 1257db96d56Sopenharmony_ci self.assertEqual(re.sub(y, 'a', 'x%sz' % y), 'xaz') 1267db96d56Sopenharmony_ci 1277db96d56Sopenharmony_ci self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x') 1287db96d56Sopenharmony_ci self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'), 1297db96d56Sopenharmony_ci '9.3 -3 24x100y') 1307db96d56Sopenharmony_ci self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3), 1317db96d56Sopenharmony_ci '9.3 -3 23x99y') 1327db96d56Sopenharmony_ci self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', count=3), 1337db96d56Sopenharmony_ci '9.3 -3 23x99y') 1347db96d56Sopenharmony_ci 1357db96d56Sopenharmony_ci self.assertEqual(re.sub('.', lambda m: r"\n", 'x'), '\\n') 1367db96d56Sopenharmony_ci self.assertEqual(re.sub('.', r"\n", 'x'), '\n') 1377db96d56Sopenharmony_ci 1387db96d56Sopenharmony_ci s = r"\1\1" 1397db96d56Sopenharmony_ci self.assertEqual(re.sub('(.)', s, 'x'), 'xx') 1407db96d56Sopenharmony_ci self.assertEqual(re.sub('(.)', s.replace('\\', r'\\'), 'x'), s) 1417db96d56Sopenharmony_ci self.assertEqual(re.sub('(.)', lambda m: s, 'x'), s) 1427db96d56Sopenharmony_ci 1437db96d56Sopenharmony_ci self.assertEqual(re.sub('(?P<a>x)', r'\g<a>\g<a>', 'xx'), 'xxxx') 1447db96d56Sopenharmony_ci self.assertEqual(re.sub('(?P<a>x)', r'\g<a>\g<1>', 'xx'), 'xxxx') 1457db96d56Sopenharmony_ci self.assertEqual(re.sub('(?P<unk>x)', r'\g<unk>\g<unk>', 'xx'), 'xxxx') 1467db96d56Sopenharmony_ci self.assertEqual(re.sub('(?P<unk>x)', r'\g<1>\g<1>', 'xx'), 'xxxx') 1477db96d56Sopenharmony_ci self.assertEqual(re.sub('()x', r'\g<0>\g<0>', 'xx'), 'xxxx') 1487db96d56Sopenharmony_ci 1497db96d56Sopenharmony_ci self.assertEqual(re.sub('a', r'\t\n\v\r\f\a\b', 'a'), '\t\n\v\r\f\a\b') 1507db96d56Sopenharmony_ci self.assertEqual(re.sub('a', '\t\n\v\r\f\a\b', 'a'), '\t\n\v\r\f\a\b') 1517db96d56Sopenharmony_ci self.assertEqual(re.sub('a', '\t\n\v\r\f\a\b', 'a'), 1527db96d56Sopenharmony_ci (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)+chr(8))) 1537db96d56Sopenharmony_ci for c in 'cdehijklmopqsuwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ': 1547db96d56Sopenharmony_ci with self.subTest(c): 1557db96d56Sopenharmony_ci with self.assertRaises(re.error): 1567db96d56Sopenharmony_ci self.assertEqual(re.sub('a', '\\' + c, 'a'), '\\' + c) 1577db96d56Sopenharmony_ci 1587db96d56Sopenharmony_ci self.assertEqual(re.sub(r'^\s*', 'X', 'test'), 'Xtest') 1597db96d56Sopenharmony_ci 1607db96d56Sopenharmony_ci def test_bug_449964(self): 1617db96d56Sopenharmony_ci # fails for group followed by other escape 1627db96d56Sopenharmony_ci self.assertEqual(re.sub(r'(?P<unk>x)', r'\g<1>\g<1>\b', 'xx'), 1637db96d56Sopenharmony_ci 'xx\bxx\b') 1647db96d56Sopenharmony_ci 1657db96d56Sopenharmony_ci def test_bug_449000(self): 1667db96d56Sopenharmony_ci # Test for sub() on escaped characters 1677db96d56Sopenharmony_ci self.assertEqual(re.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'), 1687db96d56Sopenharmony_ci 'abc\ndef\n') 1697db96d56Sopenharmony_ci self.assertEqual(re.sub('\r\n', r'\n', 'abc\r\ndef\r\n'), 1707db96d56Sopenharmony_ci 'abc\ndef\n') 1717db96d56Sopenharmony_ci self.assertEqual(re.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'), 1727db96d56Sopenharmony_ci 'abc\ndef\n') 1737db96d56Sopenharmony_ci self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'), 1747db96d56Sopenharmony_ci 'abc\ndef\n') 1757db96d56Sopenharmony_ci 1767db96d56Sopenharmony_ci def test_bug_1661(self): 1777db96d56Sopenharmony_ci # Verify that flags do not get silently ignored with compiled patterns 1787db96d56Sopenharmony_ci pattern = re.compile('.') 1797db96d56Sopenharmony_ci self.assertRaises(ValueError, re.match, pattern, 'A', re.I) 1807db96d56Sopenharmony_ci self.assertRaises(ValueError, re.search, pattern, 'A', re.I) 1817db96d56Sopenharmony_ci self.assertRaises(ValueError, re.findall, pattern, 'A', re.I) 1827db96d56Sopenharmony_ci self.assertRaises(ValueError, re.compile, pattern, re.I) 1837db96d56Sopenharmony_ci 1847db96d56Sopenharmony_ci def test_bug_3629(self): 1857db96d56Sopenharmony_ci # A regex that triggered a bug in the sre-code validator 1867db96d56Sopenharmony_ci re.compile("(?P<quote>)(?(quote))") 1877db96d56Sopenharmony_ci 1887db96d56Sopenharmony_ci def test_sub_template_numeric_escape(self): 1897db96d56Sopenharmony_ci # bug 776311 and friends 1907db96d56Sopenharmony_ci self.assertEqual(re.sub('x', r'\0', 'x'), '\0') 1917db96d56Sopenharmony_ci self.assertEqual(re.sub('x', r'\000', 'x'), '\000') 1927db96d56Sopenharmony_ci self.assertEqual(re.sub('x', r'\001', 'x'), '\001') 1937db96d56Sopenharmony_ci self.assertEqual(re.sub('x', r'\008', 'x'), '\0' + '8') 1947db96d56Sopenharmony_ci self.assertEqual(re.sub('x', r'\009', 'x'), '\0' + '9') 1957db96d56Sopenharmony_ci self.assertEqual(re.sub('x', r'\111', 'x'), '\111') 1967db96d56Sopenharmony_ci self.assertEqual(re.sub('x', r'\117', 'x'), '\117') 1977db96d56Sopenharmony_ci self.assertEqual(re.sub('x', r'\377', 'x'), '\377') 1987db96d56Sopenharmony_ci 1997db96d56Sopenharmony_ci self.assertEqual(re.sub('x', r'\1111', 'x'), '\1111') 2007db96d56Sopenharmony_ci self.assertEqual(re.sub('x', r'\1111', 'x'), '\111' + '1') 2017db96d56Sopenharmony_ci 2027db96d56Sopenharmony_ci self.assertEqual(re.sub('x', r'\00', 'x'), '\x00') 2037db96d56Sopenharmony_ci self.assertEqual(re.sub('x', r'\07', 'x'), '\x07') 2047db96d56Sopenharmony_ci self.assertEqual(re.sub('x', r'\08', 'x'), '\0' + '8') 2057db96d56Sopenharmony_ci self.assertEqual(re.sub('x', r'\09', 'x'), '\0' + '9') 2067db96d56Sopenharmony_ci self.assertEqual(re.sub('x', r'\0a', 'x'), '\0' + 'a') 2077db96d56Sopenharmony_ci 2087db96d56Sopenharmony_ci self.checkTemplateError('x', r'\400', 'x', 2097db96d56Sopenharmony_ci r'octal escape value \400 outside of ' 2107db96d56Sopenharmony_ci r'range 0-0o377', 0) 2117db96d56Sopenharmony_ci self.checkTemplateError('x', r'\777', 'x', 2127db96d56Sopenharmony_ci r'octal escape value \777 outside of ' 2137db96d56Sopenharmony_ci r'range 0-0o377', 0) 2147db96d56Sopenharmony_ci 2157db96d56Sopenharmony_ci self.checkTemplateError('x', r'\1', 'x', 'invalid group reference 1', 1) 2167db96d56Sopenharmony_ci self.checkTemplateError('x', r'\8', 'x', 'invalid group reference 8', 1) 2177db96d56Sopenharmony_ci self.checkTemplateError('x', r'\9', 'x', 'invalid group reference 9', 1) 2187db96d56Sopenharmony_ci self.checkTemplateError('x', r'\11', 'x', 'invalid group reference 11', 1) 2197db96d56Sopenharmony_ci self.checkTemplateError('x', r'\18', 'x', 'invalid group reference 18', 1) 2207db96d56Sopenharmony_ci self.checkTemplateError('x', r'\1a', 'x', 'invalid group reference 1', 1) 2217db96d56Sopenharmony_ci self.checkTemplateError('x', r'\90', 'x', 'invalid group reference 90', 1) 2227db96d56Sopenharmony_ci self.checkTemplateError('x', r'\99', 'x', 'invalid group reference 99', 1) 2237db96d56Sopenharmony_ci self.checkTemplateError('x', r'\118', 'x', 'invalid group reference 11', 1) 2247db96d56Sopenharmony_ci self.checkTemplateError('x', r'\11a', 'x', 'invalid group reference 11', 1) 2257db96d56Sopenharmony_ci self.checkTemplateError('x', r'\181', 'x', 'invalid group reference 18', 1) 2267db96d56Sopenharmony_ci self.checkTemplateError('x', r'\800', 'x', 'invalid group reference 80', 1) 2277db96d56Sopenharmony_ci self.checkTemplateError('x', r'\8', '', 'invalid group reference 8', 1) 2287db96d56Sopenharmony_ci 2297db96d56Sopenharmony_ci # in python2.3 (etc), these loop endlessly in sre_parser.py 2307db96d56Sopenharmony_ci self.assertEqual(re.sub('(((((((((((x)))))))))))', r'\11', 'x'), 'x') 2317db96d56Sopenharmony_ci self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\118', 'xyz'), 2327db96d56Sopenharmony_ci 'xz8') 2337db96d56Sopenharmony_ci self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\11a', 'xyz'), 2347db96d56Sopenharmony_ci 'xza') 2357db96d56Sopenharmony_ci 2367db96d56Sopenharmony_ci def test_qualified_re_sub(self): 2377db96d56Sopenharmony_ci self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb') 2387db96d56Sopenharmony_ci self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa') 2397db96d56Sopenharmony_ci self.assertEqual(re.sub('a', 'b', 'aaaaa', count=1), 'baaaa') 2407db96d56Sopenharmony_ci 2417db96d56Sopenharmony_ci def test_bug_114660(self): 2427db96d56Sopenharmony_ci self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there'), 2437db96d56Sopenharmony_ci 'hello there') 2447db96d56Sopenharmony_ci 2457db96d56Sopenharmony_ci def test_symbolic_groups(self): 2467db96d56Sopenharmony_ci re.compile(r'(?P<a>x)(?P=a)(?(a)y)') 2477db96d56Sopenharmony_ci re.compile(r'(?P<a1>x)(?P=a1)(?(a1)y)') 2487db96d56Sopenharmony_ci re.compile(r'(?P<a1>x)\1(?(1)y)') 2497db96d56Sopenharmony_ci re.compile(b'(?P<a1>x)(?P=a1)(?(a1)y)') 2507db96d56Sopenharmony_ci # New valid identifiers in Python 3 2517db96d56Sopenharmony_ci re.compile('(?P<µ>x)(?P=µ)(?(µ)y)') 2527db96d56Sopenharmony_ci re.compile('(?P<>x)(?P=)(?()y)') 2537db96d56Sopenharmony_ci # Support > 100 groups. 2547db96d56Sopenharmony_ci pat = '|'.join('x(?P<a%d>%x)y' % (i, i) for i in range(1, 200 + 1)) 2557db96d56Sopenharmony_ci pat = '(?:%s)(?(200)z|t)' % pat 2567db96d56Sopenharmony_ci self.assertEqual(re.match(pat, 'xc8yz').span(), (0, 5)) 2577db96d56Sopenharmony_ci 2587db96d56Sopenharmony_ci def test_symbolic_groups_errors(self): 2597db96d56Sopenharmony_ci self.checkPatternError(r'(?P<a>)(?P<a>)', 2607db96d56Sopenharmony_ci "redefinition of group name 'a' as group 2; " 2617db96d56Sopenharmony_ci "was group 1") 2627db96d56Sopenharmony_ci self.checkPatternError(r'(?P<a>(?P=a))', 2637db96d56Sopenharmony_ci "cannot refer to an open group", 10) 2647db96d56Sopenharmony_ci self.checkPatternError(r'(?Pxy)', 'unknown extension ?Px') 2657db96d56Sopenharmony_ci self.checkPatternError(r'(?P<a>)(?P=a', 'missing ), unterminated name', 11) 2667db96d56Sopenharmony_ci self.checkPatternError(r'(?P=', 'missing group name', 4) 2677db96d56Sopenharmony_ci self.checkPatternError(r'(?P=)', 'missing group name', 4) 2687db96d56Sopenharmony_ci self.checkPatternError(r'(?P=1)', "bad character in group name '1'", 4) 2697db96d56Sopenharmony_ci self.checkPatternError(r'(?P=a)', "unknown group name 'a'") 2707db96d56Sopenharmony_ci self.checkPatternError(r'(?P=a1)', "unknown group name 'a1'") 2717db96d56Sopenharmony_ci self.checkPatternError(r'(?P=a.)', "bad character in group name 'a.'", 4) 2727db96d56Sopenharmony_ci self.checkPatternError(r'(?P<)', 'missing >, unterminated name', 4) 2737db96d56Sopenharmony_ci self.checkPatternError(r'(?P<a', 'missing >, unterminated name', 4) 2747db96d56Sopenharmony_ci self.checkPatternError(r'(?P<', 'missing group name', 4) 2757db96d56Sopenharmony_ci self.checkPatternError(r'(?P<>)', 'missing group name', 4) 2767db96d56Sopenharmony_ci self.checkPatternError(r'(?P<1>)', "bad character in group name '1'", 4) 2777db96d56Sopenharmony_ci self.checkPatternError(r'(?P<a.>)', "bad character in group name 'a.'", 4) 2787db96d56Sopenharmony_ci self.checkPatternError(r'(?(', 'missing group name', 3) 2797db96d56Sopenharmony_ci self.checkPatternError(r'(?())', 'missing group name', 3) 2807db96d56Sopenharmony_ci self.checkPatternError(r'(?(a))', "unknown group name 'a'", 3) 2817db96d56Sopenharmony_ci self.checkPatternError(r'(?(-1))', "bad character in group name '-1'", 3) 2827db96d56Sopenharmony_ci self.checkPatternError(r'(?(1a))', "bad character in group name '1a'", 3) 2837db96d56Sopenharmony_ci self.checkPatternError(r'(?(a.))', "bad character in group name 'a.'", 3) 2847db96d56Sopenharmony_ci self.checkPatternError('(?P<©>x)', "bad character in group name '©'", 4) 2857db96d56Sopenharmony_ci self.checkPatternError('(?P=©)', "bad character in group name '©'", 4) 2867db96d56Sopenharmony_ci self.checkPatternError('(?(©)y)', "bad character in group name '©'", 3) 2877db96d56Sopenharmony_ci with self.assertWarnsRegex(DeprecationWarning, 2887db96d56Sopenharmony_ci r"bad character in group name '\\xc2\\xb5' " 2897db96d56Sopenharmony_ci r"at position 4") as w: 2907db96d56Sopenharmony_ci re.compile(b'(?P<\xc2\xb5>x)') 2917db96d56Sopenharmony_ci self.assertEqual(w.filename, __file__) 2927db96d56Sopenharmony_ci with self.assertWarnsRegex(DeprecationWarning, 2937db96d56Sopenharmony_ci r"bad character in group name '\\xc2\\xb5' " 2947db96d56Sopenharmony_ci r"at position 4"): 2957db96d56Sopenharmony_ci self.checkPatternError(b'(?P=\xc2\xb5)', 2967db96d56Sopenharmony_ci r"unknown group name '\xc2\xb5'", 4) 2977db96d56Sopenharmony_ci with self.assertWarnsRegex(DeprecationWarning, 2987db96d56Sopenharmony_ci r"bad character in group name '\\xc2\\xb5' " 2997db96d56Sopenharmony_ci r"at position 3"): 3007db96d56Sopenharmony_ci self.checkPatternError(b'(?(\xc2\xb5)y)', 3017db96d56Sopenharmony_ci r"unknown group name '\xc2\xb5'", 3) 3027db96d56Sopenharmony_ci 3037db96d56Sopenharmony_ci def test_symbolic_refs(self): 3047db96d56Sopenharmony_ci self.assertEqual(re.sub('(?P<a>x)|(?P<b>y)', r'\g<b>', 'xx'), '') 3057db96d56Sopenharmony_ci self.assertEqual(re.sub('(?P<a>x)|(?P<b>y)', r'\2', 'xx'), '') 3067db96d56Sopenharmony_ci self.assertEqual(re.sub(b'(?P<a1>x)', br'\g<a1>', b'xx'), b'xx') 3077db96d56Sopenharmony_ci # New valid identifiers in Python 3 3087db96d56Sopenharmony_ci self.assertEqual(re.sub('(?P<µ>x)', r'\g<µ>', 'xx'), 'xx') 3097db96d56Sopenharmony_ci self.assertEqual(re.sub('(?P<>x)', r'\g<>', 'xx'), 'xx') 3107db96d56Sopenharmony_ci # Support > 100 groups. 3117db96d56Sopenharmony_ci pat = '|'.join('x(?P<a%d>%x)y' % (i, i) for i in range(1, 200 + 1)) 3127db96d56Sopenharmony_ci self.assertEqual(re.sub(pat, r'\g<200>', 'xc8yzxc8y'), 'c8zc8') 3137db96d56Sopenharmony_ci 3147db96d56Sopenharmony_ci def test_symbolic_refs_errors(self): 3157db96d56Sopenharmony_ci self.checkTemplateError('(?P<a>x)', r'\g<a', 'xx', 3167db96d56Sopenharmony_ci 'missing >, unterminated name', 3) 3177db96d56Sopenharmony_ci self.checkTemplateError('(?P<a>x)', r'\g<', 'xx', 3187db96d56Sopenharmony_ci 'missing group name', 3) 3197db96d56Sopenharmony_ci self.checkTemplateError('(?P<a>x)', r'\g', 'xx', 'missing <', 2) 3207db96d56Sopenharmony_ci self.checkTemplateError('(?P<a>x)', r'\g<a a>', 'xx', 3217db96d56Sopenharmony_ci "bad character in group name 'a a'", 3) 3227db96d56Sopenharmony_ci self.checkTemplateError('(?P<a>x)', r'\g<>', 'xx', 3237db96d56Sopenharmony_ci 'missing group name', 3) 3247db96d56Sopenharmony_ci self.checkTemplateError('(?P<a>x)', r'\g<1a1>', 'xx', 3257db96d56Sopenharmony_ci "bad character in group name '1a1'", 3) 3267db96d56Sopenharmony_ci self.checkTemplateError('(?P<a>x)', r'\g<2>', 'xx', 3277db96d56Sopenharmony_ci 'invalid group reference 2', 3) 3287db96d56Sopenharmony_ci self.checkTemplateError('(?P<a>x)', r'\2', 'xx', 3297db96d56Sopenharmony_ci 'invalid group reference 2', 1) 3307db96d56Sopenharmony_ci with self.assertRaisesRegex(IndexError, "unknown group name 'ab'"): 3317db96d56Sopenharmony_ci re.sub('(?P<a>x)', r'\g<ab>', 'xx') 3327db96d56Sopenharmony_ci self.checkTemplateError('(?P<a>x)', r'\g<-1>', 'xx', 3337db96d56Sopenharmony_ci "bad character in group name '-1'", 3) 3347db96d56Sopenharmony_ci with self.assertWarnsRegex(DeprecationWarning, 3357db96d56Sopenharmony_ci r"bad character in group name '\+1' " 3367db96d56Sopenharmony_ci r"at position 3") as w: 3377db96d56Sopenharmony_ci re.sub('(?P<a>x)', r'\g<+1>', 'xx') 3387db96d56Sopenharmony_ci self.assertEqual(w.filename, __file__) 3397db96d56Sopenharmony_ci with self.assertWarnsRegex(DeprecationWarning, 3407db96d56Sopenharmony_ci r"bad character in group name '1_0' " 3417db96d56Sopenharmony_ci r"at position 3"): 3427db96d56Sopenharmony_ci re.sub('()'*10, r'\g<1_0>', 'xx') 3437db96d56Sopenharmony_ci with self.assertWarnsRegex(DeprecationWarning, 3447db96d56Sopenharmony_ci r"bad character in group name ' 1 ' " 3457db96d56Sopenharmony_ci r"at position 3"): 3467db96d56Sopenharmony_ci re.sub('(?P<a>x)', r'\g< 1 >', 'xx') 3477db96d56Sopenharmony_ci self.checkTemplateError('(?P<a>x)', r'\g<©>', 'xx', 3487db96d56Sopenharmony_ci "bad character in group name '©'", 3) 3497db96d56Sopenharmony_ci with self.assertWarnsRegex(DeprecationWarning, 3507db96d56Sopenharmony_ci r"bad character in group name '\\xc2\\xb5' " 3517db96d56Sopenharmony_ci r"at position 3") as w: 3527db96d56Sopenharmony_ci with self.assertRaisesRegex(IndexError, "unknown group name '\xc2\xb5'"): 3537db96d56Sopenharmony_ci re.sub(b'(?P<a>x)', b'\\g<\xc2\xb5>', b'xx') 3547db96d56Sopenharmony_ci self.assertEqual(w.filename, __file__) 3557db96d56Sopenharmony_ci self.checkTemplateError('(?P<a>x)', r'\g<㊀>', 'xx', 3567db96d56Sopenharmony_ci "bad character in group name '㊀'", 3) 3577db96d56Sopenharmony_ci self.checkTemplateError('(?P<a>x)', r'\g<¹>', 'xx', 3587db96d56Sopenharmony_ci "bad character in group name '¹'", 3) 3597db96d56Sopenharmony_ci with self.assertWarnsRegex(DeprecationWarning, 3607db96d56Sopenharmony_ci r"bad character in group name '१' " 3617db96d56Sopenharmony_ci r"at position 3"): 3627db96d56Sopenharmony_ci re.sub('(?P<a>x)', r'\g<१>', 'xx') 3637db96d56Sopenharmony_ci 3647db96d56Sopenharmony_ci def test_re_subn(self): 3657db96d56Sopenharmony_ci self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2)) 3667db96d56Sopenharmony_ci self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1)) 3677db96d56Sopenharmony_ci self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0)) 3687db96d56Sopenharmony_ci self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4)) 3697db96d56Sopenharmony_ci self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2)) 3707db96d56Sopenharmony_ci self.assertEqual(re.subn("b*", "x", "xyz", count=2), ('xxxyz', 2)) 3717db96d56Sopenharmony_ci 3727db96d56Sopenharmony_ci def test_re_split(self): 3737db96d56Sopenharmony_ci for string in ":a:b::c", S(":a:b::c"): 3747db96d56Sopenharmony_ci self.assertTypedEqual(re.split(":", string), 3757db96d56Sopenharmony_ci ['', 'a', 'b', '', 'c']) 3767db96d56Sopenharmony_ci self.assertTypedEqual(re.split(":+", string), 3777db96d56Sopenharmony_ci ['', 'a', 'b', 'c']) 3787db96d56Sopenharmony_ci self.assertTypedEqual(re.split("(:+)", string), 3797db96d56Sopenharmony_ci ['', ':', 'a', ':', 'b', '::', 'c']) 3807db96d56Sopenharmony_ci for string in (b":a:b::c", B(b":a:b::c"), bytearray(b":a:b::c"), 3817db96d56Sopenharmony_ci memoryview(b":a:b::c")): 3827db96d56Sopenharmony_ci self.assertTypedEqual(re.split(b":", string), 3837db96d56Sopenharmony_ci [b'', b'a', b'b', b'', b'c']) 3847db96d56Sopenharmony_ci self.assertTypedEqual(re.split(b":+", string), 3857db96d56Sopenharmony_ci [b'', b'a', b'b', b'c']) 3867db96d56Sopenharmony_ci self.assertTypedEqual(re.split(b"(:+)", string), 3877db96d56Sopenharmony_ci [b'', b':', b'a', b':', b'b', b'::', b'c']) 3887db96d56Sopenharmony_ci for a, b, c in ("\xe0\xdf\xe7", "\u0430\u0431\u0432", 3897db96d56Sopenharmony_ci "\U0001d49c\U0001d49e\U0001d4b5"): 3907db96d56Sopenharmony_ci string = ":%s:%s::%s" % (a, b, c) 3917db96d56Sopenharmony_ci self.assertEqual(re.split(":", string), ['', a, b, '', c]) 3927db96d56Sopenharmony_ci self.assertEqual(re.split(":+", string), ['', a, b, c]) 3937db96d56Sopenharmony_ci self.assertEqual(re.split("(:+)", string), 3947db96d56Sopenharmony_ci ['', ':', a, ':', b, '::', c]) 3957db96d56Sopenharmony_ci 3967db96d56Sopenharmony_ci self.assertEqual(re.split("(?::+)", ":a:b::c"), ['', 'a', 'b', 'c']) 3977db96d56Sopenharmony_ci self.assertEqual(re.split("(:)+", ":a:b::c"), 3987db96d56Sopenharmony_ci ['', ':', 'a', ':', 'b', ':', 'c']) 3997db96d56Sopenharmony_ci self.assertEqual(re.split("([b:]+)", ":a:b::c"), 4007db96d56Sopenharmony_ci ['', ':', 'a', ':b::', 'c']) 4017db96d56Sopenharmony_ci self.assertEqual(re.split("(b)|(:+)", ":a:b::c"), 4027db96d56Sopenharmony_ci ['', None, ':', 'a', None, ':', '', 'b', None, '', 4037db96d56Sopenharmony_ci None, '::', 'c']) 4047db96d56Sopenharmony_ci self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"), 4057db96d56Sopenharmony_ci ['', 'a', '', '', 'c']) 4067db96d56Sopenharmony_ci 4077db96d56Sopenharmony_ci for sep, expected in [ 4087db96d56Sopenharmony_ci (':*', ['', '', 'a', '', 'b', '', 'c', '']), 4097db96d56Sopenharmony_ci ('(?::*)', ['', '', 'a', '', 'b', '', 'c', '']), 4107db96d56Sopenharmony_ci ('(:*)', ['', ':', '', '', 'a', ':', '', '', 'b', '::', '', '', 'c', '', '']), 4117db96d56Sopenharmony_ci ('(:)*', ['', ':', '', None, 'a', ':', '', None, 'b', ':', '', None, 'c', None, '']), 4127db96d56Sopenharmony_ci ]: 4137db96d56Sopenharmony_ci with self.subTest(sep=sep): 4147db96d56Sopenharmony_ci self.assertTypedEqual(re.split(sep, ':a:b::c'), expected) 4157db96d56Sopenharmony_ci 4167db96d56Sopenharmony_ci for sep, expected in [ 4177db96d56Sopenharmony_ci ('', ['', ':', 'a', ':', 'b', ':', ':', 'c', '']), 4187db96d56Sopenharmony_ci (r'\b', [':', 'a', ':', 'b', '::', 'c', '']), 4197db96d56Sopenharmony_ci (r'(?=:)', ['', ':a', ':b', ':', ':c']), 4207db96d56Sopenharmony_ci (r'(?<=:)', [':', 'a:', 'b:', ':', 'c']), 4217db96d56Sopenharmony_ci ]: 4227db96d56Sopenharmony_ci with self.subTest(sep=sep): 4237db96d56Sopenharmony_ci self.assertTypedEqual(re.split(sep, ':a:b::c'), expected) 4247db96d56Sopenharmony_ci 4257db96d56Sopenharmony_ci def test_qualified_re_split(self): 4267db96d56Sopenharmony_ci self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c']) 4277db96d56Sopenharmony_ci self.assertEqual(re.split(":", ":a:b::c", maxsplit=2), ['', 'a', 'b::c']) 4287db96d56Sopenharmony_ci self.assertEqual(re.split(':', 'a:b:c:d', maxsplit=2), ['a', 'b', 'c:d']) 4297db96d56Sopenharmony_ci self.assertEqual(re.split("(:)", ":a:b::c", maxsplit=2), 4307db96d56Sopenharmony_ci ['', ':', 'a', ':', 'b::c']) 4317db96d56Sopenharmony_ci self.assertEqual(re.split("(:+)", ":a:b::c", maxsplit=2), 4327db96d56Sopenharmony_ci ['', ':', 'a', ':', 'b::c']) 4337db96d56Sopenharmony_ci self.assertEqual(re.split("(:*)", ":a:b::c", maxsplit=2), 4347db96d56Sopenharmony_ci ['', ':', '', '', 'a:b::c']) 4357db96d56Sopenharmony_ci 4367db96d56Sopenharmony_ci def test_re_findall(self): 4377db96d56Sopenharmony_ci self.assertEqual(re.findall(":+", "abc"), []) 4387db96d56Sopenharmony_ci for string in "a:b::c:::d", S("a:b::c:::d"): 4397db96d56Sopenharmony_ci self.assertTypedEqual(re.findall(":+", string), 4407db96d56Sopenharmony_ci [":", "::", ":::"]) 4417db96d56Sopenharmony_ci self.assertTypedEqual(re.findall("(:+)", string), 4427db96d56Sopenharmony_ci [":", "::", ":::"]) 4437db96d56Sopenharmony_ci self.assertTypedEqual(re.findall("(:)(:*)", string), 4447db96d56Sopenharmony_ci [(":", ""), (":", ":"), (":", "::")]) 4457db96d56Sopenharmony_ci for string in (b"a:b::c:::d", B(b"a:b::c:::d"), bytearray(b"a:b::c:::d"), 4467db96d56Sopenharmony_ci memoryview(b"a:b::c:::d")): 4477db96d56Sopenharmony_ci self.assertTypedEqual(re.findall(b":+", string), 4487db96d56Sopenharmony_ci [b":", b"::", b":::"]) 4497db96d56Sopenharmony_ci self.assertTypedEqual(re.findall(b"(:+)", string), 4507db96d56Sopenharmony_ci [b":", b"::", b":::"]) 4517db96d56Sopenharmony_ci self.assertTypedEqual(re.findall(b"(:)(:*)", string), 4527db96d56Sopenharmony_ci [(b":", b""), (b":", b":"), (b":", b"::")]) 4537db96d56Sopenharmony_ci for x in ("\xe0", "\u0430", "\U0001d49c"): 4547db96d56Sopenharmony_ci xx = x * 2 4557db96d56Sopenharmony_ci xxx = x * 3 4567db96d56Sopenharmony_ci string = "a%sb%sc%sd" % (x, xx, xxx) 4577db96d56Sopenharmony_ci self.assertEqual(re.findall("%s+" % x, string), [x, xx, xxx]) 4587db96d56Sopenharmony_ci self.assertEqual(re.findall("(%s+)" % x, string), [x, xx, xxx]) 4597db96d56Sopenharmony_ci self.assertEqual(re.findall("(%s)(%s*)" % (x, x), string), 4607db96d56Sopenharmony_ci [(x, ""), (x, x), (x, xx)]) 4617db96d56Sopenharmony_ci 4627db96d56Sopenharmony_ci def test_bug_117612(self): 4637db96d56Sopenharmony_ci self.assertEqual(re.findall(r"(a|(b))", "aba"), 4647db96d56Sopenharmony_ci [("a", ""),("b", "b"),("a", "")]) 4657db96d56Sopenharmony_ci 4667db96d56Sopenharmony_ci def test_re_match(self): 4677db96d56Sopenharmony_ci for string in 'a', S('a'): 4687db96d56Sopenharmony_ci self.assertEqual(re.match('a', string).groups(), ()) 4697db96d56Sopenharmony_ci self.assertEqual(re.match('(a)', string).groups(), ('a',)) 4707db96d56Sopenharmony_ci self.assertEqual(re.match('(a)', string).group(0), 'a') 4717db96d56Sopenharmony_ci self.assertEqual(re.match('(a)', string).group(1), 'a') 4727db96d56Sopenharmony_ci self.assertEqual(re.match('(a)', string).group(1, 1), ('a', 'a')) 4737db96d56Sopenharmony_ci for string in b'a', B(b'a'), bytearray(b'a'), memoryview(b'a'): 4747db96d56Sopenharmony_ci self.assertEqual(re.match(b'a', string).groups(), ()) 4757db96d56Sopenharmony_ci self.assertEqual(re.match(b'(a)', string).groups(), (b'a',)) 4767db96d56Sopenharmony_ci self.assertEqual(re.match(b'(a)', string).group(0), b'a') 4777db96d56Sopenharmony_ci self.assertEqual(re.match(b'(a)', string).group(1), b'a') 4787db96d56Sopenharmony_ci self.assertEqual(re.match(b'(a)', string).group(1, 1), (b'a', b'a')) 4797db96d56Sopenharmony_ci for a in ("\xe0", "\u0430", "\U0001d49c"): 4807db96d56Sopenharmony_ci self.assertEqual(re.match(a, a).groups(), ()) 4817db96d56Sopenharmony_ci self.assertEqual(re.match('(%s)' % a, a).groups(), (a,)) 4827db96d56Sopenharmony_ci self.assertEqual(re.match('(%s)' % a, a).group(0), a) 4837db96d56Sopenharmony_ci self.assertEqual(re.match('(%s)' % a, a).group(1), a) 4847db96d56Sopenharmony_ci self.assertEqual(re.match('(%s)' % a, a).group(1, 1), (a, a)) 4857db96d56Sopenharmony_ci 4867db96d56Sopenharmony_ci pat = re.compile('((a)|(b))(c)?') 4877db96d56Sopenharmony_ci self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None)) 4887db96d56Sopenharmony_ci self.assertEqual(pat.match('b').groups(), ('b', None, 'b', None)) 4897db96d56Sopenharmony_ci self.assertEqual(pat.match('ac').groups(), ('a', 'a', None, 'c')) 4907db96d56Sopenharmony_ci self.assertEqual(pat.match('bc').groups(), ('b', None, 'b', 'c')) 4917db96d56Sopenharmony_ci self.assertEqual(pat.match('bc').groups(""), ('b', "", 'b', 'c')) 4927db96d56Sopenharmony_ci 4937db96d56Sopenharmony_ci pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?') 4947db96d56Sopenharmony_ci self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None)) 4957db96d56Sopenharmony_ci self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'), 4967db96d56Sopenharmony_ci (None, 'b', None)) 4977db96d56Sopenharmony_ci self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c')) 4987db96d56Sopenharmony_ci 4997db96d56Sopenharmony_ci def test_group(self): 5007db96d56Sopenharmony_ci class Index: 5017db96d56Sopenharmony_ci def __init__(self, value): 5027db96d56Sopenharmony_ci self.value = value 5037db96d56Sopenharmony_ci def __index__(self): 5047db96d56Sopenharmony_ci return self.value 5057db96d56Sopenharmony_ci # A single group 5067db96d56Sopenharmony_ci m = re.match('(a)(b)', 'ab') 5077db96d56Sopenharmony_ci self.assertEqual(m.group(), 'ab') 5087db96d56Sopenharmony_ci self.assertEqual(m.group(0), 'ab') 5097db96d56Sopenharmony_ci self.assertEqual(m.group(1), 'a') 5107db96d56Sopenharmony_ci self.assertEqual(m.group(Index(1)), 'a') 5117db96d56Sopenharmony_ci self.assertRaises(IndexError, m.group, -1) 5127db96d56Sopenharmony_ci self.assertRaises(IndexError, m.group, 3) 5137db96d56Sopenharmony_ci self.assertRaises(IndexError, m.group, 1<<1000) 5147db96d56Sopenharmony_ci self.assertRaises(IndexError, m.group, Index(1<<1000)) 5157db96d56Sopenharmony_ci self.assertRaises(IndexError, m.group, 'x') 5167db96d56Sopenharmony_ci # Multiple groups 5177db96d56Sopenharmony_ci self.assertEqual(m.group(2, 1), ('b', 'a')) 5187db96d56Sopenharmony_ci self.assertEqual(m.group(Index(2), Index(1)), ('b', 'a')) 5197db96d56Sopenharmony_ci 5207db96d56Sopenharmony_ci def test_match_getitem(self): 5217db96d56Sopenharmony_ci pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?') 5227db96d56Sopenharmony_ci 5237db96d56Sopenharmony_ci m = pat.match('a') 5247db96d56Sopenharmony_ci self.assertEqual(m['a1'], 'a') 5257db96d56Sopenharmony_ci self.assertEqual(m['b2'], None) 5267db96d56Sopenharmony_ci self.assertEqual(m['c3'], None) 5277db96d56Sopenharmony_ci self.assertEqual('a1={a1} b2={b2} c3={c3}'.format_map(m), 'a1=a b2=None c3=None') 5287db96d56Sopenharmony_ci self.assertEqual(m[0], 'a') 5297db96d56Sopenharmony_ci self.assertEqual(m[1], 'a') 5307db96d56Sopenharmony_ci self.assertEqual(m[2], None) 5317db96d56Sopenharmony_ci self.assertEqual(m[3], None) 5327db96d56Sopenharmony_ci with self.assertRaisesRegex(IndexError, 'no such group'): 5337db96d56Sopenharmony_ci m['X'] 5347db96d56Sopenharmony_ci with self.assertRaisesRegex(IndexError, 'no such group'): 5357db96d56Sopenharmony_ci m[-1] 5367db96d56Sopenharmony_ci with self.assertRaisesRegex(IndexError, 'no such group'): 5377db96d56Sopenharmony_ci m[4] 5387db96d56Sopenharmony_ci with self.assertRaisesRegex(IndexError, 'no such group'): 5397db96d56Sopenharmony_ci m[0, 1] 5407db96d56Sopenharmony_ci with self.assertRaisesRegex(IndexError, 'no such group'): 5417db96d56Sopenharmony_ci m[(0,)] 5427db96d56Sopenharmony_ci with self.assertRaisesRegex(IndexError, 'no such group'): 5437db96d56Sopenharmony_ci m[(0, 1)] 5447db96d56Sopenharmony_ci with self.assertRaisesRegex(IndexError, 'no such group'): 5457db96d56Sopenharmony_ci 'a1={a2}'.format_map(m) 5467db96d56Sopenharmony_ci 5477db96d56Sopenharmony_ci m = pat.match('ac') 5487db96d56Sopenharmony_ci self.assertEqual(m['a1'], 'a') 5497db96d56Sopenharmony_ci self.assertEqual(m['b2'], None) 5507db96d56Sopenharmony_ci self.assertEqual(m['c3'], 'c') 5517db96d56Sopenharmony_ci self.assertEqual('a1={a1} b2={b2} c3={c3}'.format_map(m), 'a1=a b2=None c3=c') 5527db96d56Sopenharmony_ci self.assertEqual(m[0], 'ac') 5537db96d56Sopenharmony_ci self.assertEqual(m[1], 'a') 5547db96d56Sopenharmony_ci self.assertEqual(m[2], None) 5557db96d56Sopenharmony_ci self.assertEqual(m[3], 'c') 5567db96d56Sopenharmony_ci 5577db96d56Sopenharmony_ci # Cannot assign. 5587db96d56Sopenharmony_ci with self.assertRaises(TypeError): 5597db96d56Sopenharmony_ci m[0] = 1 5607db96d56Sopenharmony_ci 5617db96d56Sopenharmony_ci # No len(). 5627db96d56Sopenharmony_ci self.assertRaises(TypeError, len, m) 5637db96d56Sopenharmony_ci 5647db96d56Sopenharmony_ci def test_re_fullmatch(self): 5657db96d56Sopenharmony_ci # Issue 16203: Proposal: add re.fullmatch() method. 5667db96d56Sopenharmony_ci self.assertEqual(re.fullmatch(r"a", "a").span(), (0, 1)) 5677db96d56Sopenharmony_ci for string in "ab", S("ab"): 5687db96d56Sopenharmony_ci self.assertEqual(re.fullmatch(r"a|ab", string).span(), (0, 2)) 5697db96d56Sopenharmony_ci for string in b"ab", B(b"ab"), bytearray(b"ab"), memoryview(b"ab"): 5707db96d56Sopenharmony_ci self.assertEqual(re.fullmatch(br"a|ab", string).span(), (0, 2)) 5717db96d56Sopenharmony_ci for a, b in "\xe0\xdf", "\u0430\u0431", "\U0001d49c\U0001d49e": 5727db96d56Sopenharmony_ci r = r"%s|%s" % (a, a + b) 5737db96d56Sopenharmony_ci self.assertEqual(re.fullmatch(r, a + b).span(), (0, 2)) 5747db96d56Sopenharmony_ci self.assertEqual(re.fullmatch(r".*?$", "abc").span(), (0, 3)) 5757db96d56Sopenharmony_ci self.assertEqual(re.fullmatch(r".*?", "abc").span(), (0, 3)) 5767db96d56Sopenharmony_ci self.assertEqual(re.fullmatch(r"a.*?b", "ab").span(), (0, 2)) 5777db96d56Sopenharmony_ci self.assertEqual(re.fullmatch(r"a.*?b", "abb").span(), (0, 3)) 5787db96d56Sopenharmony_ci self.assertEqual(re.fullmatch(r"a.*?b", "axxb").span(), (0, 4)) 5797db96d56Sopenharmony_ci self.assertIsNone(re.fullmatch(r"a+", "ab")) 5807db96d56Sopenharmony_ci self.assertIsNone(re.fullmatch(r"abc$", "abc\n")) 5817db96d56Sopenharmony_ci self.assertIsNone(re.fullmatch(r"abc\Z", "abc\n")) 5827db96d56Sopenharmony_ci self.assertIsNone(re.fullmatch(r"(?m)abc$", "abc\n")) 5837db96d56Sopenharmony_ci self.assertEqual(re.fullmatch(r"ab(?=c)cd", "abcd").span(), (0, 4)) 5847db96d56Sopenharmony_ci self.assertEqual(re.fullmatch(r"ab(?<=b)cd", "abcd").span(), (0, 4)) 5857db96d56Sopenharmony_ci self.assertEqual(re.fullmatch(r"(?=a|ab)ab", "ab").span(), (0, 2)) 5867db96d56Sopenharmony_ci 5877db96d56Sopenharmony_ci self.assertEqual( 5887db96d56Sopenharmony_ci re.compile(r"bc").fullmatch("abcd", pos=1, endpos=3).span(), (1, 3)) 5897db96d56Sopenharmony_ci self.assertEqual( 5907db96d56Sopenharmony_ci re.compile(r".*?$").fullmatch("abcd", pos=1, endpos=3).span(), (1, 3)) 5917db96d56Sopenharmony_ci self.assertEqual( 5927db96d56Sopenharmony_ci re.compile(r".*?").fullmatch("abcd", pos=1, endpos=3).span(), (1, 3)) 5937db96d56Sopenharmony_ci 5947db96d56Sopenharmony_ci def test_re_groupref_exists(self): 5957db96d56Sopenharmony_ci self.assertEqual(re.match(r'^(\()?([^()]+)(?(1)\))$', '(a)').groups(), 5967db96d56Sopenharmony_ci ('(', 'a')) 5977db96d56Sopenharmony_ci self.assertEqual(re.match(r'^(\()?([^()]+)(?(1)\))$', 'a').groups(), 5987db96d56Sopenharmony_ci (None, 'a')) 5997db96d56Sopenharmony_ci self.assertIsNone(re.match(r'^(\()?([^()]+)(?(1)\))$', 'a)')) 6007db96d56Sopenharmony_ci self.assertIsNone(re.match(r'^(\()?([^()]+)(?(1)\))$', '(a')) 6017db96d56Sopenharmony_ci self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(), 6027db96d56Sopenharmony_ci ('a', 'b')) 6037db96d56Sopenharmony_ci self.assertEqual(re.match(r'^(?:(a)|c)((?(1)b|d))$', 'cd').groups(), 6047db96d56Sopenharmony_ci (None, 'd')) 6057db96d56Sopenharmony_ci self.assertEqual(re.match(r'^(?:(a)|c)((?(1)|d))$', 'cd').groups(), 6067db96d56Sopenharmony_ci (None, 'd')) 6077db96d56Sopenharmony_ci self.assertEqual(re.match(r'^(?:(a)|c)((?(1)|d))$', 'a').groups(), 6087db96d56Sopenharmony_ci ('a', '')) 6097db96d56Sopenharmony_ci 6107db96d56Sopenharmony_ci # Tests for bug #1177831: exercise groups other than the first group 6117db96d56Sopenharmony_ci p = re.compile('(?P<g1>a)(?P<g2>b)?((?(g2)c|d))') 6127db96d56Sopenharmony_ci self.assertEqual(p.match('abc').groups(), 6137db96d56Sopenharmony_ci ('a', 'b', 'c')) 6147db96d56Sopenharmony_ci self.assertEqual(p.match('ad').groups(), 6157db96d56Sopenharmony_ci ('a', None, 'd')) 6167db96d56Sopenharmony_ci self.assertIsNone(p.match('abd')) 6177db96d56Sopenharmony_ci self.assertIsNone(p.match('ac')) 6187db96d56Sopenharmony_ci 6197db96d56Sopenharmony_ci # Support > 100 groups. 6207db96d56Sopenharmony_ci pat = '|'.join('x(?P<a%d>%x)y' % (i, i) for i in range(1, 200 + 1)) 6217db96d56Sopenharmony_ci pat = '(?:%s)(?(200)z)' % pat 6227db96d56Sopenharmony_ci self.assertEqual(re.match(pat, 'xc8yz').span(), (0, 5)) 6237db96d56Sopenharmony_ci 6247db96d56Sopenharmony_ci def test_re_groupref_exists_errors(self): 6257db96d56Sopenharmony_ci self.checkPatternError(r'(?P<a>)(?(0)a|b)', 'bad group number', 10) 6267db96d56Sopenharmony_ci self.checkPatternError(r'()(?(-1)a|b)', 6277db96d56Sopenharmony_ci "bad character in group name '-1'", 5) 6287db96d56Sopenharmony_ci with self.assertWarnsRegex(DeprecationWarning, 6297db96d56Sopenharmony_ci r"bad character in group name '\+1' " 6307db96d56Sopenharmony_ci r"at position 5") as w: 6317db96d56Sopenharmony_ci re.compile(r'()(?(+1)a|b)') 6327db96d56Sopenharmony_ci self.assertEqual(w.filename, __file__) 6337db96d56Sopenharmony_ci with self.assertWarnsRegex(DeprecationWarning, 6347db96d56Sopenharmony_ci r"bad character in group name '1_0' " 6357db96d56Sopenharmony_ci r"at position 23"): 6367db96d56Sopenharmony_ci re.compile(r'()'*10 + r'(?(1_0)a|b)') 6377db96d56Sopenharmony_ci with self.assertWarnsRegex(DeprecationWarning, 6387db96d56Sopenharmony_ci r"bad character in group name ' 1 ' " 6397db96d56Sopenharmony_ci r"at position 5"): 6407db96d56Sopenharmony_ci re.compile(r'()(?( 1 )a|b)') 6417db96d56Sopenharmony_ci self.checkPatternError(r'()(?(㊀)a|b)', 6427db96d56Sopenharmony_ci "bad character in group name '㊀'", 5) 6437db96d56Sopenharmony_ci self.checkPatternError(r'()(?(¹)a|b)', 6447db96d56Sopenharmony_ci "bad character in group name '¹'", 5) 6457db96d56Sopenharmony_ci with self.assertWarnsRegex(DeprecationWarning, 6467db96d56Sopenharmony_ci r"bad character in group name '१' " 6477db96d56Sopenharmony_ci r"at position 5"): 6487db96d56Sopenharmony_ci re.compile(r'()(?(१)a|b)') 6497db96d56Sopenharmony_ci self.checkPatternError(r'()(?(1', 6507db96d56Sopenharmony_ci "missing ), unterminated name", 5) 6517db96d56Sopenharmony_ci self.checkPatternError(r'()(?(1)a', 6527db96d56Sopenharmony_ci "missing ), unterminated subpattern", 2) 6537db96d56Sopenharmony_ci self.checkPatternError(r'()(?(1)a|b', 6547db96d56Sopenharmony_ci 'missing ), unterminated subpattern', 2) 6557db96d56Sopenharmony_ci self.checkPatternError(r'()(?(1)a|b|c', 6567db96d56Sopenharmony_ci 'conditional backref with more than ' 6577db96d56Sopenharmony_ci 'two branches', 10) 6587db96d56Sopenharmony_ci self.checkPatternError(r'()(?(1)a|b|c)', 6597db96d56Sopenharmony_ci 'conditional backref with more than ' 6607db96d56Sopenharmony_ci 'two branches', 10) 6617db96d56Sopenharmony_ci self.checkPatternError(r'()(?(2)a)', 6627db96d56Sopenharmony_ci "invalid group reference 2", 5) 6637db96d56Sopenharmony_ci 6647db96d56Sopenharmony_ci def test_re_groupref_exists_validation_bug(self): 6657db96d56Sopenharmony_ci for i in range(256): 6667db96d56Sopenharmony_ci with self.subTest(code=i): 6677db96d56Sopenharmony_ci re.compile(r'()(?(1)\x%02x?)' % i) 6687db96d56Sopenharmony_ci 6697db96d56Sopenharmony_ci def test_re_groupref_overflow(self): 6707db96d56Sopenharmony_ci from re._constants import MAXGROUPS 6717db96d56Sopenharmony_ci self.checkTemplateError('()', r'\g<%s>' % MAXGROUPS, 'xx', 6727db96d56Sopenharmony_ci 'invalid group reference %d' % MAXGROUPS, 3) 6737db96d56Sopenharmony_ci self.checkPatternError(r'(?P<a>)(?(%d))' % MAXGROUPS, 6747db96d56Sopenharmony_ci 'invalid group reference %d' % MAXGROUPS, 10) 6757db96d56Sopenharmony_ci 6767db96d56Sopenharmony_ci def test_re_groupref(self): 6777db96d56Sopenharmony_ci self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(), 6787db96d56Sopenharmony_ci ('|', 'a')) 6797db96d56Sopenharmony_ci self.assertEqual(re.match(r'^(\|)?([^()]+)\1?$', 'a').groups(), 6807db96d56Sopenharmony_ci (None, 'a')) 6817db96d56Sopenharmony_ci self.assertIsNone(re.match(r'^(\|)?([^()]+)\1$', 'a|')) 6827db96d56Sopenharmony_ci self.assertIsNone(re.match(r'^(\|)?([^()]+)\1$', '|a')) 6837db96d56Sopenharmony_ci self.assertEqual(re.match(r'^(?:(a)|c)(\1)$', 'aa').groups(), 6847db96d56Sopenharmony_ci ('a', 'a')) 6857db96d56Sopenharmony_ci self.assertEqual(re.match(r'^(?:(a)|c)(\1)?$', 'c').groups(), 6867db96d56Sopenharmony_ci (None, None)) 6877db96d56Sopenharmony_ci 6887db96d56Sopenharmony_ci self.checkPatternError(r'(abc\1)', 'cannot refer to an open group', 4) 6897db96d56Sopenharmony_ci 6907db96d56Sopenharmony_ci def test_groupdict(self): 6917db96d56Sopenharmony_ci self.assertEqual(re.match('(?P<first>first) (?P<second>second)', 6927db96d56Sopenharmony_ci 'first second').groupdict(), 6937db96d56Sopenharmony_ci {'first':'first', 'second':'second'}) 6947db96d56Sopenharmony_ci 6957db96d56Sopenharmony_ci def test_expand(self): 6967db96d56Sopenharmony_ci self.assertEqual(re.match("(?P<first>first) (?P<second>second)", 6977db96d56Sopenharmony_ci "first second") 6987db96d56Sopenharmony_ci .expand(r"\2 \1 \g<second> \g<first>"), 6997db96d56Sopenharmony_ci "second first second first") 7007db96d56Sopenharmony_ci self.assertEqual(re.match("(?P<first>first)|(?P<second>second)", 7017db96d56Sopenharmony_ci "first") 7027db96d56Sopenharmony_ci .expand(r"\2 \g<second>"), 7037db96d56Sopenharmony_ci " ") 7047db96d56Sopenharmony_ci 7057db96d56Sopenharmony_ci def test_repeat_minmax(self): 7067db96d56Sopenharmony_ci self.assertIsNone(re.match(r"^(\w){1}$", "abc")) 7077db96d56Sopenharmony_ci self.assertIsNone(re.match(r"^(\w){1}?$", "abc")) 7087db96d56Sopenharmony_ci self.assertIsNone(re.match(r"^(\w){1,2}$", "abc")) 7097db96d56Sopenharmony_ci self.assertIsNone(re.match(r"^(\w){1,2}?$", "abc")) 7107db96d56Sopenharmony_ci 7117db96d56Sopenharmony_ci self.assertEqual(re.match(r"^(\w){3}$", "abc").group(1), "c") 7127db96d56Sopenharmony_ci self.assertEqual(re.match(r"^(\w){1,3}$", "abc").group(1), "c") 7137db96d56Sopenharmony_ci self.assertEqual(re.match(r"^(\w){1,4}$", "abc").group(1), "c") 7147db96d56Sopenharmony_ci self.assertEqual(re.match(r"^(\w){3,4}?$", "abc").group(1), "c") 7157db96d56Sopenharmony_ci self.assertEqual(re.match(r"^(\w){3}?$", "abc").group(1), "c") 7167db96d56Sopenharmony_ci self.assertEqual(re.match(r"^(\w){1,3}?$", "abc").group(1), "c") 7177db96d56Sopenharmony_ci self.assertEqual(re.match(r"^(\w){1,4}?$", "abc").group(1), "c") 7187db96d56Sopenharmony_ci self.assertEqual(re.match(r"^(\w){3,4}?$", "abc").group(1), "c") 7197db96d56Sopenharmony_ci 7207db96d56Sopenharmony_ci self.assertIsNone(re.match(r"^x{1}$", "xxx")) 7217db96d56Sopenharmony_ci self.assertIsNone(re.match(r"^x{1}?$", "xxx")) 7227db96d56Sopenharmony_ci self.assertIsNone(re.match(r"^x{1,2}$", "xxx")) 7237db96d56Sopenharmony_ci self.assertIsNone(re.match(r"^x{1,2}?$", "xxx")) 7247db96d56Sopenharmony_ci 7257db96d56Sopenharmony_ci self.assertTrue(re.match(r"^x{3}$", "xxx")) 7267db96d56Sopenharmony_ci self.assertTrue(re.match(r"^x{1,3}$", "xxx")) 7277db96d56Sopenharmony_ci self.assertTrue(re.match(r"^x{3,3}$", "xxx")) 7287db96d56Sopenharmony_ci self.assertTrue(re.match(r"^x{1,4}$", "xxx")) 7297db96d56Sopenharmony_ci self.assertTrue(re.match(r"^x{3,4}?$", "xxx")) 7307db96d56Sopenharmony_ci self.assertTrue(re.match(r"^x{3}?$", "xxx")) 7317db96d56Sopenharmony_ci self.assertTrue(re.match(r"^x{1,3}?$", "xxx")) 7327db96d56Sopenharmony_ci self.assertTrue(re.match(r"^x{1,4}?$", "xxx")) 7337db96d56Sopenharmony_ci self.assertTrue(re.match(r"^x{3,4}?$", "xxx")) 7347db96d56Sopenharmony_ci 7357db96d56Sopenharmony_ci self.assertIsNone(re.match(r"^x{}$", "xxx")) 7367db96d56Sopenharmony_ci self.assertTrue(re.match(r"^x{}$", "x{}")) 7377db96d56Sopenharmony_ci 7387db96d56Sopenharmony_ci self.checkPatternError(r'x{2,1}', 7397db96d56Sopenharmony_ci 'min repeat greater than max repeat', 2) 7407db96d56Sopenharmony_ci 7417db96d56Sopenharmony_ci def test_getattr(self): 7427db96d56Sopenharmony_ci self.assertEqual(re.compile("(?i)(a)(b)").pattern, "(?i)(a)(b)") 7437db96d56Sopenharmony_ci self.assertEqual(re.compile("(?i)(a)(b)").flags, re.I | re.U) 7447db96d56Sopenharmony_ci self.assertEqual(re.compile("(?i)(a)(b)").groups, 2) 7457db96d56Sopenharmony_ci self.assertEqual(re.compile("(?i)(a)(b)").groupindex, {}) 7467db96d56Sopenharmony_ci self.assertEqual(re.compile("(?i)(?P<first>a)(?P<other>b)").groupindex, 7477db96d56Sopenharmony_ci {'first': 1, 'other': 2}) 7487db96d56Sopenharmony_ci 7497db96d56Sopenharmony_ci self.assertEqual(re.match("(a)", "a").pos, 0) 7507db96d56Sopenharmony_ci self.assertEqual(re.match("(a)", "a").endpos, 1) 7517db96d56Sopenharmony_ci self.assertEqual(re.match("(a)", "a").string, "a") 7527db96d56Sopenharmony_ci self.assertEqual(re.match("(a)", "a").regs, ((0, 1), (0, 1))) 7537db96d56Sopenharmony_ci self.assertTrue(re.match("(a)", "a").re) 7547db96d56Sopenharmony_ci 7557db96d56Sopenharmony_ci # Issue 14260. groupindex should be non-modifiable mapping. 7567db96d56Sopenharmony_ci p = re.compile(r'(?i)(?P<first>a)(?P<other>b)') 7577db96d56Sopenharmony_ci self.assertEqual(sorted(p.groupindex), ['first', 'other']) 7587db96d56Sopenharmony_ci self.assertEqual(p.groupindex['other'], 2) 7597db96d56Sopenharmony_ci with self.assertRaises(TypeError): 7607db96d56Sopenharmony_ci p.groupindex['other'] = 0 7617db96d56Sopenharmony_ci self.assertEqual(p.groupindex['other'], 2) 7627db96d56Sopenharmony_ci 7637db96d56Sopenharmony_ci def test_special_escapes(self): 7647db96d56Sopenharmony_ci self.assertEqual(re.search(r"\b(b.)\b", 7657db96d56Sopenharmony_ci "abcd abc bcd bx").group(1), "bx") 7667db96d56Sopenharmony_ci self.assertEqual(re.search(r"\B(b.)\B", 7677db96d56Sopenharmony_ci "abc bcd bc abxd").group(1), "bx") 7687db96d56Sopenharmony_ci self.assertEqual(re.search(r"\b(b.)\b", 7697db96d56Sopenharmony_ci "abcd abc bcd bx", re.ASCII).group(1), "bx") 7707db96d56Sopenharmony_ci self.assertEqual(re.search(r"\B(b.)\B", 7717db96d56Sopenharmony_ci "abc bcd bc abxd", re.ASCII).group(1), "bx") 7727db96d56Sopenharmony_ci self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc") 7737db96d56Sopenharmony_ci self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc") 7747db96d56Sopenharmony_ci self.assertIsNone(re.search(r"^\Aabc\Z$", "\nabc\n", re.M)) 7757db96d56Sopenharmony_ci self.assertEqual(re.search(br"\b(b.)\b", 7767db96d56Sopenharmony_ci b"abcd abc bcd bx").group(1), b"bx") 7777db96d56Sopenharmony_ci self.assertEqual(re.search(br"\B(b.)\B", 7787db96d56Sopenharmony_ci b"abc bcd bc abxd").group(1), b"bx") 7797db96d56Sopenharmony_ci self.assertEqual(re.search(br"\b(b.)\b", 7807db96d56Sopenharmony_ci b"abcd abc bcd bx", re.LOCALE).group(1), b"bx") 7817db96d56Sopenharmony_ci self.assertEqual(re.search(br"\B(b.)\B", 7827db96d56Sopenharmony_ci b"abc bcd bc abxd", re.LOCALE).group(1), b"bx") 7837db96d56Sopenharmony_ci self.assertEqual(re.search(br"^abc$", b"\nabc\n", re.M).group(0), b"abc") 7847db96d56Sopenharmony_ci self.assertEqual(re.search(br"^\Aabc\Z$", b"abc", re.M).group(0), b"abc") 7857db96d56Sopenharmony_ci self.assertIsNone(re.search(br"^\Aabc\Z$", b"\nabc\n", re.M)) 7867db96d56Sopenharmony_ci self.assertEqual(re.search(r"\d\D\w\W\s\S", 7877db96d56Sopenharmony_ci "1aa! a").group(0), "1aa! a") 7887db96d56Sopenharmony_ci self.assertEqual(re.search(br"\d\D\w\W\s\S", 7897db96d56Sopenharmony_ci b"1aa! a").group(0), b"1aa! a") 7907db96d56Sopenharmony_ci self.assertEqual(re.search(r"\d\D\w\W\s\S", 7917db96d56Sopenharmony_ci "1aa! a", re.ASCII).group(0), "1aa! a") 7927db96d56Sopenharmony_ci self.assertEqual(re.search(br"\d\D\w\W\s\S", 7937db96d56Sopenharmony_ci b"1aa! a", re.LOCALE).group(0), b"1aa! a") 7947db96d56Sopenharmony_ci 7957db96d56Sopenharmony_ci def test_other_escapes(self): 7967db96d56Sopenharmony_ci self.checkPatternError("\\", 'bad escape (end of pattern)', 0) 7977db96d56Sopenharmony_ci self.assertEqual(re.match(r"\(", '(').group(), '(') 7987db96d56Sopenharmony_ci self.assertIsNone(re.match(r"\(", ')')) 7997db96d56Sopenharmony_ci self.assertEqual(re.match(r"\\", '\\').group(), '\\') 8007db96d56Sopenharmony_ci self.assertEqual(re.match(r"[\]]", ']').group(), ']') 8017db96d56Sopenharmony_ci self.assertIsNone(re.match(r"[\]]", '[')) 8027db96d56Sopenharmony_ci self.assertEqual(re.match(r"[a\-c]", '-').group(), '-') 8037db96d56Sopenharmony_ci self.assertIsNone(re.match(r"[a\-c]", 'b')) 8047db96d56Sopenharmony_ci self.assertEqual(re.match(r"[\^a]+", 'a^').group(), 'a^') 8057db96d56Sopenharmony_ci self.assertIsNone(re.match(r"[\^a]+", 'b')) 8067db96d56Sopenharmony_ci re.purge() # for warnings 8077db96d56Sopenharmony_ci for c in 'ceghijklmopqyzCEFGHIJKLMNOPQRTVXY': 8087db96d56Sopenharmony_ci with self.subTest(c): 8097db96d56Sopenharmony_ci self.assertRaises(re.error, re.compile, '\\%c' % c) 8107db96d56Sopenharmony_ci for c in 'ceghijklmopqyzABCEFGHIJKLMNOPQRTVXYZ': 8117db96d56Sopenharmony_ci with self.subTest(c): 8127db96d56Sopenharmony_ci self.assertRaises(re.error, re.compile, '[\\%c]' % c) 8137db96d56Sopenharmony_ci 8147db96d56Sopenharmony_ci def test_named_unicode_escapes(self): 8157db96d56Sopenharmony_ci # test individual Unicode named escapes 8167db96d56Sopenharmony_ci self.assertTrue(re.match(r'\N{LESS-THAN SIGN}', '<')) 8177db96d56Sopenharmony_ci self.assertTrue(re.match(r'\N{less-than sign}', '<')) 8187db96d56Sopenharmony_ci self.assertIsNone(re.match(r'\N{LESS-THAN SIGN}', '>')) 8197db96d56Sopenharmony_ci self.assertTrue(re.match(r'\N{SNAKE}', '\U0001f40d')) 8207db96d56Sopenharmony_ci self.assertTrue(re.match(r'\N{ARABIC LIGATURE UIGHUR KIRGHIZ YEH WITH ' 8217db96d56Sopenharmony_ci r'HAMZA ABOVE WITH ALEF MAKSURA ISOLATED FORM}', 8227db96d56Sopenharmony_ci '\ufbf9')) 8237db96d56Sopenharmony_ci self.assertTrue(re.match(r'[\N{LESS-THAN SIGN}-\N{GREATER-THAN SIGN}]', 8247db96d56Sopenharmony_ci '=')) 8257db96d56Sopenharmony_ci self.assertIsNone(re.match(r'[\N{LESS-THAN SIGN}-\N{GREATER-THAN SIGN}]', 8267db96d56Sopenharmony_ci ';')) 8277db96d56Sopenharmony_ci 8287db96d56Sopenharmony_ci # test errors in \N{name} handling - only valid names should pass 8297db96d56Sopenharmony_ci self.checkPatternError(r'\N', 'missing {', 2) 8307db96d56Sopenharmony_ci self.checkPatternError(r'[\N]', 'missing {', 3) 8317db96d56Sopenharmony_ci self.checkPatternError(r'\N{', 'missing character name', 3) 8327db96d56Sopenharmony_ci self.checkPatternError(r'[\N{', 'missing character name', 4) 8337db96d56Sopenharmony_ci self.checkPatternError(r'\N{}', 'missing character name', 3) 8347db96d56Sopenharmony_ci self.checkPatternError(r'[\N{}]', 'missing character name', 4) 8357db96d56Sopenharmony_ci self.checkPatternError(r'\NSNAKE}', 'missing {', 2) 8367db96d56Sopenharmony_ci self.checkPatternError(r'[\NSNAKE}]', 'missing {', 3) 8377db96d56Sopenharmony_ci self.checkPatternError(r'\N{SNAKE', 8387db96d56Sopenharmony_ci 'missing }, unterminated name', 3) 8397db96d56Sopenharmony_ci self.checkPatternError(r'[\N{SNAKE]', 8407db96d56Sopenharmony_ci 'missing }, unterminated name', 4) 8417db96d56Sopenharmony_ci self.checkPatternError(r'[\N{SNAKE]}', 8427db96d56Sopenharmony_ci "undefined character name 'SNAKE]'", 1) 8437db96d56Sopenharmony_ci self.checkPatternError(r'\N{SPAM}', 8447db96d56Sopenharmony_ci "undefined character name 'SPAM'", 0) 8457db96d56Sopenharmony_ci self.checkPatternError(r'[\N{SPAM}]', 8467db96d56Sopenharmony_ci "undefined character name 'SPAM'", 1) 8477db96d56Sopenharmony_ci self.checkPatternError(r'\N{KEYCAP NUMBER SIGN}', 8487db96d56Sopenharmony_ci "undefined character name 'KEYCAP NUMBER SIGN'", 0) 8497db96d56Sopenharmony_ci self.checkPatternError(r'[\N{KEYCAP NUMBER SIGN}]', 8507db96d56Sopenharmony_ci "undefined character name 'KEYCAP NUMBER SIGN'", 1) 8517db96d56Sopenharmony_ci self.checkPatternError(br'\N{LESS-THAN SIGN}', r'bad escape \N', 0) 8527db96d56Sopenharmony_ci self.checkPatternError(br'[\N{LESS-THAN SIGN}]', r'bad escape \N', 1) 8537db96d56Sopenharmony_ci 8547db96d56Sopenharmony_ci def test_string_boundaries(self): 8557db96d56Sopenharmony_ci # See http://bugs.python.org/issue10713 8567db96d56Sopenharmony_ci self.assertEqual(re.search(r"\b(abc)\b", "abc").group(1), 8577db96d56Sopenharmony_ci "abc") 8587db96d56Sopenharmony_ci # There's a word boundary at the start of a string. 8597db96d56Sopenharmony_ci self.assertTrue(re.match(r"\b", "abc")) 8607db96d56Sopenharmony_ci # A non-empty string includes a non-boundary zero-length match. 8617db96d56Sopenharmony_ci self.assertTrue(re.search(r"\B", "abc")) 8627db96d56Sopenharmony_ci # There is no non-boundary match at the start of a string. 8637db96d56Sopenharmony_ci self.assertFalse(re.match(r"\B", "abc")) 8647db96d56Sopenharmony_ci # However, an empty string contains no word boundaries, and also no 8657db96d56Sopenharmony_ci # non-boundaries. 8667db96d56Sopenharmony_ci self.assertIsNone(re.search(r"\B", "")) 8677db96d56Sopenharmony_ci # This one is questionable and different from the perlre behaviour, 8687db96d56Sopenharmony_ci # but describes current behavior. 8697db96d56Sopenharmony_ci self.assertIsNone(re.search(r"\b", "")) 8707db96d56Sopenharmony_ci # A single word-character string has two boundaries, but no 8717db96d56Sopenharmony_ci # non-boundary gaps. 8727db96d56Sopenharmony_ci self.assertEqual(len(re.findall(r"\b", "a")), 2) 8737db96d56Sopenharmony_ci self.assertEqual(len(re.findall(r"\B", "a")), 0) 8747db96d56Sopenharmony_ci # If there are no words, there are no boundaries 8757db96d56Sopenharmony_ci self.assertEqual(len(re.findall(r"\b", " ")), 0) 8767db96d56Sopenharmony_ci self.assertEqual(len(re.findall(r"\b", " ")), 0) 8777db96d56Sopenharmony_ci # Can match around the whitespace. 8787db96d56Sopenharmony_ci self.assertEqual(len(re.findall(r"\B", " ")), 2) 8797db96d56Sopenharmony_ci 8807db96d56Sopenharmony_ci def test_bigcharset(self): 8817db96d56Sopenharmony_ci self.assertEqual(re.match("([\u2222\u2223])", 8827db96d56Sopenharmony_ci "\u2222").group(1), "\u2222") 8837db96d56Sopenharmony_ci r = '[%s]' % ''.join(map(chr, range(256, 2**16, 255))) 8847db96d56Sopenharmony_ci self.assertEqual(re.match(r, "\uff01").group(), "\uff01") 8857db96d56Sopenharmony_ci 8867db96d56Sopenharmony_ci def test_big_codesize(self): 8877db96d56Sopenharmony_ci # Issue #1160 8887db96d56Sopenharmony_ci r = re.compile('|'.join(('%d'%x for x in range(10000)))) 8897db96d56Sopenharmony_ci self.assertTrue(r.match('1000')) 8907db96d56Sopenharmony_ci self.assertTrue(r.match('9999')) 8917db96d56Sopenharmony_ci 8927db96d56Sopenharmony_ci def test_anyall(self): 8937db96d56Sopenharmony_ci self.assertEqual(re.match("a.b", "a\nb", re.DOTALL).group(0), 8947db96d56Sopenharmony_ci "a\nb") 8957db96d56Sopenharmony_ci self.assertEqual(re.match("a.*b", "a\n\nb", re.DOTALL).group(0), 8967db96d56Sopenharmony_ci "a\n\nb") 8977db96d56Sopenharmony_ci 8987db96d56Sopenharmony_ci def test_lookahead(self): 8997db96d56Sopenharmony_ci self.assertEqual(re.match(r"(a(?=\s[^a]))", "a b").group(1), "a") 9007db96d56Sopenharmony_ci self.assertEqual(re.match(r"(a(?=\s[^a]*))", "a b").group(1), "a") 9017db96d56Sopenharmony_ci self.assertEqual(re.match(r"(a(?=\s[abc]))", "a b").group(1), "a") 9027db96d56Sopenharmony_ci self.assertEqual(re.match(r"(a(?=\s[abc]*))", "a bc").group(1), "a") 9037db96d56Sopenharmony_ci self.assertEqual(re.match(r"(a)(?=\s\1)", "a a").group(1), "a") 9047db96d56Sopenharmony_ci self.assertEqual(re.match(r"(a)(?=\s\1*)", "a aa").group(1), "a") 9057db96d56Sopenharmony_ci self.assertEqual(re.match(r"(a)(?=\s(abc|a))", "a a").group(1), "a") 9067db96d56Sopenharmony_ci 9077db96d56Sopenharmony_ci self.assertEqual(re.match(r"(a(?!\s[^a]))", "a a").group(1), "a") 9087db96d56Sopenharmony_ci self.assertEqual(re.match(r"(a(?!\s[abc]))", "a d").group(1), "a") 9097db96d56Sopenharmony_ci self.assertEqual(re.match(r"(a)(?!\s\1)", "a b").group(1), "a") 9107db96d56Sopenharmony_ci self.assertEqual(re.match(r"(a)(?!\s(abc|a))", "a b").group(1), "a") 9117db96d56Sopenharmony_ci 9127db96d56Sopenharmony_ci # Group reference. 9137db96d56Sopenharmony_ci self.assertTrue(re.match(r'(a)b(?=\1)a', 'aba')) 9147db96d56Sopenharmony_ci self.assertIsNone(re.match(r'(a)b(?=\1)c', 'abac')) 9157db96d56Sopenharmony_ci # Conditional group reference. 9167db96d56Sopenharmony_ci self.assertTrue(re.match(r'(?:(a)|(x))b(?=(?(2)x|c))c', 'abc')) 9177db96d56Sopenharmony_ci self.assertIsNone(re.match(r'(?:(a)|(x))b(?=(?(2)c|x))c', 'abc')) 9187db96d56Sopenharmony_ci self.assertTrue(re.match(r'(?:(a)|(x))b(?=(?(2)x|c))c', 'abc')) 9197db96d56Sopenharmony_ci self.assertIsNone(re.match(r'(?:(a)|(x))b(?=(?(1)b|x))c', 'abc')) 9207db96d56Sopenharmony_ci self.assertTrue(re.match(r'(?:(a)|(x))b(?=(?(1)c|x))c', 'abc')) 9217db96d56Sopenharmony_ci # Group used before defined. 9227db96d56Sopenharmony_ci self.assertTrue(re.match(r'(a)b(?=(?(2)x|c))(c)', 'abc')) 9237db96d56Sopenharmony_ci self.assertIsNone(re.match(r'(a)b(?=(?(2)b|x))(c)', 'abc')) 9247db96d56Sopenharmony_ci self.assertTrue(re.match(r'(a)b(?=(?(1)c|x))(c)', 'abc')) 9257db96d56Sopenharmony_ci 9267db96d56Sopenharmony_ci def test_lookbehind(self): 9277db96d56Sopenharmony_ci self.assertTrue(re.match(r'ab(?<=b)c', 'abc')) 9287db96d56Sopenharmony_ci self.assertIsNone(re.match(r'ab(?<=c)c', 'abc')) 9297db96d56Sopenharmony_ci self.assertIsNone(re.match(r'ab(?<!b)c', 'abc')) 9307db96d56Sopenharmony_ci self.assertTrue(re.match(r'ab(?<!c)c', 'abc')) 9317db96d56Sopenharmony_ci # Group reference. 9327db96d56Sopenharmony_ci self.assertTrue(re.match(r'(a)a(?<=\1)c', 'aac')) 9337db96d56Sopenharmony_ci self.assertIsNone(re.match(r'(a)b(?<=\1)a', 'abaa')) 9347db96d56Sopenharmony_ci self.assertIsNone(re.match(r'(a)a(?<!\1)c', 'aac')) 9357db96d56Sopenharmony_ci self.assertTrue(re.match(r'(a)b(?<!\1)a', 'abaa')) 9367db96d56Sopenharmony_ci # Conditional group reference. 9377db96d56Sopenharmony_ci self.assertIsNone(re.match(r'(?:(a)|(x))b(?<=(?(2)x|c))c', 'abc')) 9387db96d56Sopenharmony_ci self.assertIsNone(re.match(r'(?:(a)|(x))b(?<=(?(2)b|x))c', 'abc')) 9397db96d56Sopenharmony_ci self.assertTrue(re.match(r'(?:(a)|(x))b(?<=(?(2)x|b))c', 'abc')) 9407db96d56Sopenharmony_ci self.assertIsNone(re.match(r'(?:(a)|(x))b(?<=(?(1)c|x))c', 'abc')) 9417db96d56Sopenharmony_ci self.assertTrue(re.match(r'(?:(a)|(x))b(?<=(?(1)b|x))c', 'abc')) 9427db96d56Sopenharmony_ci # Group used before defined. 9437db96d56Sopenharmony_ci self.assertRaises(re.error, re.compile, r'(a)b(?<=(?(2)b|x))(c)') 9447db96d56Sopenharmony_ci self.assertIsNone(re.match(r'(a)b(?<=(?(1)c|x))(c)', 'abc')) 9457db96d56Sopenharmony_ci self.assertTrue(re.match(r'(a)b(?<=(?(1)b|x))(c)', 'abc')) 9467db96d56Sopenharmony_ci # Group defined in the same lookbehind pattern 9477db96d56Sopenharmony_ci self.assertRaises(re.error, re.compile, r'(a)b(?<=(.)\2)(c)') 9487db96d56Sopenharmony_ci self.assertRaises(re.error, re.compile, r'(a)b(?<=(?P<a>.)(?P=a))(c)') 9497db96d56Sopenharmony_ci self.assertRaises(re.error, re.compile, r'(a)b(?<=(a)(?(2)b|x))(c)') 9507db96d56Sopenharmony_ci self.assertRaises(re.error, re.compile, r'(a)b(?<=(.)(?<=\2))(c)') 9517db96d56Sopenharmony_ci 9527db96d56Sopenharmony_ci def test_ignore_case(self): 9537db96d56Sopenharmony_ci self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC") 9547db96d56Sopenharmony_ci self.assertEqual(re.match(b"abc", b"ABC", re.I).group(0), b"ABC") 9557db96d56Sopenharmony_ci self.assertEqual(re.match(r"(a\s[^a])", "a b", re.I).group(1), "a b") 9567db96d56Sopenharmony_ci self.assertEqual(re.match(r"(a\s[^a]*)", "a bb", re.I).group(1), "a bb") 9577db96d56Sopenharmony_ci self.assertEqual(re.match(r"(a\s[abc])", "a b", re.I).group(1), "a b") 9587db96d56Sopenharmony_ci self.assertEqual(re.match(r"(a\s[abc]*)", "a bb", re.I).group(1), "a bb") 9597db96d56Sopenharmony_ci self.assertEqual(re.match(r"((a)\s\2)", "a a", re.I).group(1), "a a") 9607db96d56Sopenharmony_ci self.assertEqual(re.match(r"((a)\s\2*)", "a aa", re.I).group(1), "a aa") 9617db96d56Sopenharmony_ci self.assertEqual(re.match(r"((a)\s(abc|a))", "a a", re.I).group(1), "a a") 9627db96d56Sopenharmony_ci self.assertEqual(re.match(r"((a)\s(abc|a)*)", "a aa", re.I).group(1), "a aa") 9637db96d56Sopenharmony_ci 9647db96d56Sopenharmony_ci # Two different characters have the same lowercase. 9657db96d56Sopenharmony_ci assert 'K'.lower() == '\u212a'.lower() == 'k' # 'K' 9667db96d56Sopenharmony_ci self.assertTrue(re.match(r'K', '\u212a', re.I)) 9677db96d56Sopenharmony_ci self.assertTrue(re.match(r'k', '\u212a', re.I)) 9687db96d56Sopenharmony_ci self.assertTrue(re.match(r'\u212a', 'K', re.I)) 9697db96d56Sopenharmony_ci self.assertTrue(re.match(r'\u212a', 'k', re.I)) 9707db96d56Sopenharmony_ci 9717db96d56Sopenharmony_ci # Two different characters have the same uppercase. 9727db96d56Sopenharmony_ci assert 's'.upper() == '\u017f'.upper() == 'S' # 'ſ' 9737db96d56Sopenharmony_ci self.assertTrue(re.match(r'S', '\u017f', re.I)) 9747db96d56Sopenharmony_ci self.assertTrue(re.match(r's', '\u017f', re.I)) 9757db96d56Sopenharmony_ci self.assertTrue(re.match(r'\u017f', 'S', re.I)) 9767db96d56Sopenharmony_ci self.assertTrue(re.match(r'\u017f', 's', re.I)) 9777db96d56Sopenharmony_ci 9787db96d56Sopenharmony_ci # Two different characters have the same uppercase. Unicode 9.0+. 9797db96d56Sopenharmony_ci assert '\u0432'.upper() == '\u1c80'.upper() == '\u0412' # 'в', 'ᲀ', 'В' 9807db96d56Sopenharmony_ci self.assertTrue(re.match(r'\u0412', '\u0432', re.I)) 9817db96d56Sopenharmony_ci self.assertTrue(re.match(r'\u0412', '\u1c80', re.I)) 9827db96d56Sopenharmony_ci self.assertTrue(re.match(r'\u0432', '\u0412', re.I)) 9837db96d56Sopenharmony_ci self.assertTrue(re.match(r'\u0432', '\u1c80', re.I)) 9847db96d56Sopenharmony_ci self.assertTrue(re.match(r'\u1c80', '\u0412', re.I)) 9857db96d56Sopenharmony_ci self.assertTrue(re.match(r'\u1c80', '\u0432', re.I)) 9867db96d56Sopenharmony_ci 9877db96d56Sopenharmony_ci # Two different characters have the same multicharacter uppercase. 9887db96d56Sopenharmony_ci assert '\ufb05'.upper() == '\ufb06'.upper() == 'ST' # 'ſt', 'st' 9897db96d56Sopenharmony_ci self.assertTrue(re.match(r'\ufb05', '\ufb06', re.I)) 9907db96d56Sopenharmony_ci self.assertTrue(re.match(r'\ufb06', '\ufb05', re.I)) 9917db96d56Sopenharmony_ci 9927db96d56Sopenharmony_ci def test_ignore_case_set(self): 9937db96d56Sopenharmony_ci self.assertTrue(re.match(r'[19A]', 'A', re.I)) 9947db96d56Sopenharmony_ci self.assertTrue(re.match(r'[19a]', 'a', re.I)) 9957db96d56Sopenharmony_ci self.assertTrue(re.match(r'[19a]', 'A', re.I)) 9967db96d56Sopenharmony_ci self.assertTrue(re.match(r'[19A]', 'a', re.I)) 9977db96d56Sopenharmony_ci self.assertTrue(re.match(br'[19A]', b'A', re.I)) 9987db96d56Sopenharmony_ci self.assertTrue(re.match(br'[19a]', b'a', re.I)) 9997db96d56Sopenharmony_ci self.assertTrue(re.match(br'[19a]', b'A', re.I)) 10007db96d56Sopenharmony_ci self.assertTrue(re.match(br'[19A]', b'a', re.I)) 10017db96d56Sopenharmony_ci 10027db96d56Sopenharmony_ci # Two different characters have the same lowercase. 10037db96d56Sopenharmony_ci assert 'K'.lower() == '\u212a'.lower() == 'k' # 'K' 10047db96d56Sopenharmony_ci self.assertTrue(re.match(r'[19K]', '\u212a', re.I)) 10057db96d56Sopenharmony_ci self.assertTrue(re.match(r'[19k]', '\u212a', re.I)) 10067db96d56Sopenharmony_ci self.assertTrue(re.match(r'[19\u212a]', 'K', re.I)) 10077db96d56Sopenharmony_ci self.assertTrue(re.match(r'[19\u212a]', 'k', re.I)) 10087db96d56Sopenharmony_ci 10097db96d56Sopenharmony_ci # Two different characters have the same uppercase. 10107db96d56Sopenharmony_ci assert 's'.upper() == '\u017f'.upper() == 'S' # 'ſ' 10117db96d56Sopenharmony_ci self.assertTrue(re.match(r'[19S]', '\u017f', re.I)) 10127db96d56Sopenharmony_ci self.assertTrue(re.match(r'[19s]', '\u017f', re.I)) 10137db96d56Sopenharmony_ci self.assertTrue(re.match(r'[19\u017f]', 'S', re.I)) 10147db96d56Sopenharmony_ci self.assertTrue(re.match(r'[19\u017f]', 's', re.I)) 10157db96d56Sopenharmony_ci 10167db96d56Sopenharmony_ci # Two different characters have the same uppercase. Unicode 9.0+. 10177db96d56Sopenharmony_ci assert '\u0432'.upper() == '\u1c80'.upper() == '\u0412' # 'в', 'ᲀ', 'В' 10187db96d56Sopenharmony_ci self.assertTrue(re.match(r'[19\u0412]', '\u0432', re.I)) 10197db96d56Sopenharmony_ci self.assertTrue(re.match(r'[19\u0412]', '\u1c80', re.I)) 10207db96d56Sopenharmony_ci self.assertTrue(re.match(r'[19\u0432]', '\u0412', re.I)) 10217db96d56Sopenharmony_ci self.assertTrue(re.match(r'[19\u0432]', '\u1c80', re.I)) 10227db96d56Sopenharmony_ci self.assertTrue(re.match(r'[19\u1c80]', '\u0412', re.I)) 10237db96d56Sopenharmony_ci self.assertTrue(re.match(r'[19\u1c80]', '\u0432', re.I)) 10247db96d56Sopenharmony_ci 10257db96d56Sopenharmony_ci # Two different characters have the same multicharacter uppercase. 10267db96d56Sopenharmony_ci assert '\ufb05'.upper() == '\ufb06'.upper() == 'ST' # 'ſt', 'st' 10277db96d56Sopenharmony_ci self.assertTrue(re.match(r'[19\ufb05]', '\ufb06', re.I)) 10287db96d56Sopenharmony_ci self.assertTrue(re.match(r'[19\ufb06]', '\ufb05', re.I)) 10297db96d56Sopenharmony_ci 10307db96d56Sopenharmony_ci def test_ignore_case_range(self): 10317db96d56Sopenharmony_ci # Issues #3511, #17381. 10327db96d56Sopenharmony_ci self.assertTrue(re.match(r'[9-a]', '_', re.I)) 10337db96d56Sopenharmony_ci self.assertIsNone(re.match(r'[9-A]', '_', re.I)) 10347db96d56Sopenharmony_ci self.assertTrue(re.match(br'[9-a]', b'_', re.I)) 10357db96d56Sopenharmony_ci self.assertIsNone(re.match(br'[9-A]', b'_', re.I)) 10367db96d56Sopenharmony_ci self.assertTrue(re.match(r'[\xc0-\xde]', '\xd7', re.I)) 10377db96d56Sopenharmony_ci self.assertIsNone(re.match(r'[\xc0-\xde]', '\xf7', re.I)) 10387db96d56Sopenharmony_ci self.assertTrue(re.match(r'[\xe0-\xfe]', '\xf7', re.I)) 10397db96d56Sopenharmony_ci self.assertIsNone(re.match(r'[\xe0-\xfe]', '\xd7', re.I)) 10407db96d56Sopenharmony_ci self.assertTrue(re.match(r'[\u0430-\u045f]', '\u0450', re.I)) 10417db96d56Sopenharmony_ci self.assertTrue(re.match(r'[\u0430-\u045f]', '\u0400', re.I)) 10427db96d56Sopenharmony_ci self.assertTrue(re.match(r'[\u0400-\u042f]', '\u0450', re.I)) 10437db96d56Sopenharmony_ci self.assertTrue(re.match(r'[\u0400-\u042f]', '\u0400', re.I)) 10447db96d56Sopenharmony_ci self.assertTrue(re.match(r'[\U00010428-\U0001044f]', '\U00010428', re.I)) 10457db96d56Sopenharmony_ci self.assertTrue(re.match(r'[\U00010428-\U0001044f]', '\U00010400', re.I)) 10467db96d56Sopenharmony_ci self.assertTrue(re.match(r'[\U00010400-\U00010427]', '\U00010428', re.I)) 10477db96d56Sopenharmony_ci self.assertTrue(re.match(r'[\U00010400-\U00010427]', '\U00010400', re.I)) 10487db96d56Sopenharmony_ci 10497db96d56Sopenharmony_ci # Two different characters have the same lowercase. 10507db96d56Sopenharmony_ci assert 'K'.lower() == '\u212a'.lower() == 'k' # 'K' 10517db96d56Sopenharmony_ci self.assertTrue(re.match(r'[J-M]', '\u212a', re.I)) 10527db96d56Sopenharmony_ci self.assertTrue(re.match(r'[j-m]', '\u212a', re.I)) 10537db96d56Sopenharmony_ci self.assertTrue(re.match(r'[\u2129-\u212b]', 'K', re.I)) 10547db96d56Sopenharmony_ci self.assertTrue(re.match(r'[\u2129-\u212b]', 'k', re.I)) 10557db96d56Sopenharmony_ci 10567db96d56Sopenharmony_ci # Two different characters have the same uppercase. 10577db96d56Sopenharmony_ci assert 's'.upper() == '\u017f'.upper() == 'S' # 'ſ' 10587db96d56Sopenharmony_ci self.assertTrue(re.match(r'[R-T]', '\u017f', re.I)) 10597db96d56Sopenharmony_ci self.assertTrue(re.match(r'[r-t]', '\u017f', re.I)) 10607db96d56Sopenharmony_ci self.assertTrue(re.match(r'[\u017e-\u0180]', 'S', re.I)) 10617db96d56Sopenharmony_ci self.assertTrue(re.match(r'[\u017e-\u0180]', 's', re.I)) 10627db96d56Sopenharmony_ci 10637db96d56Sopenharmony_ci # Two different characters have the same uppercase. Unicode 9.0+. 10647db96d56Sopenharmony_ci assert '\u0432'.upper() == '\u1c80'.upper() == '\u0412' # 'в', 'ᲀ', 'В' 10657db96d56Sopenharmony_ci self.assertTrue(re.match(r'[\u0411-\u0413]', '\u0432', re.I)) 10667db96d56Sopenharmony_ci self.assertTrue(re.match(r'[\u0411-\u0413]', '\u1c80', re.I)) 10677db96d56Sopenharmony_ci self.assertTrue(re.match(r'[\u0431-\u0433]', '\u0412', re.I)) 10687db96d56Sopenharmony_ci self.assertTrue(re.match(r'[\u0431-\u0433]', '\u1c80', re.I)) 10697db96d56Sopenharmony_ci self.assertTrue(re.match(r'[\u1c80-\u1c82]', '\u0412', re.I)) 10707db96d56Sopenharmony_ci self.assertTrue(re.match(r'[\u1c80-\u1c82]', '\u0432', re.I)) 10717db96d56Sopenharmony_ci 10727db96d56Sopenharmony_ci # Two different characters have the same multicharacter uppercase. 10737db96d56Sopenharmony_ci assert '\ufb05'.upper() == '\ufb06'.upper() == 'ST' # 'ſt', 'st' 10747db96d56Sopenharmony_ci self.assertTrue(re.match(r'[\ufb04-\ufb05]', '\ufb06', re.I)) 10757db96d56Sopenharmony_ci self.assertTrue(re.match(r'[\ufb06-\ufb07]', '\ufb05', re.I)) 10767db96d56Sopenharmony_ci 10777db96d56Sopenharmony_ci def test_category(self): 10787db96d56Sopenharmony_ci self.assertEqual(re.match(r"(\s)", " ").group(1), " ") 10797db96d56Sopenharmony_ci 10807db96d56Sopenharmony_ci @cpython_only 10817db96d56Sopenharmony_ci def test_case_helpers(self): 10827db96d56Sopenharmony_ci import _sre 10837db96d56Sopenharmony_ci for i in range(128): 10847db96d56Sopenharmony_ci c = chr(i) 10857db96d56Sopenharmony_ci lo = ord(c.lower()) 10867db96d56Sopenharmony_ci self.assertEqual(_sre.ascii_tolower(i), lo) 10877db96d56Sopenharmony_ci self.assertEqual(_sre.unicode_tolower(i), lo) 10887db96d56Sopenharmony_ci iscased = c in string.ascii_letters 10897db96d56Sopenharmony_ci self.assertEqual(_sre.ascii_iscased(i), iscased) 10907db96d56Sopenharmony_ci self.assertEqual(_sre.unicode_iscased(i), iscased) 10917db96d56Sopenharmony_ci 10927db96d56Sopenharmony_ci for i in list(range(128, 0x1000)) + [0x10400, 0x10428]: 10937db96d56Sopenharmony_ci c = chr(i) 10947db96d56Sopenharmony_ci self.assertEqual(_sre.ascii_tolower(i), i) 10957db96d56Sopenharmony_ci if i != 0x0130: 10967db96d56Sopenharmony_ci self.assertEqual(_sre.unicode_tolower(i), ord(c.lower())) 10977db96d56Sopenharmony_ci iscased = c != c.lower() or c != c.upper() 10987db96d56Sopenharmony_ci self.assertFalse(_sre.ascii_iscased(i)) 10997db96d56Sopenharmony_ci self.assertEqual(_sre.unicode_iscased(i), 11007db96d56Sopenharmony_ci c != c.lower() or c != c.upper()) 11017db96d56Sopenharmony_ci 11027db96d56Sopenharmony_ci self.assertEqual(_sre.ascii_tolower(0x0130), 0x0130) 11037db96d56Sopenharmony_ci self.assertEqual(_sre.unicode_tolower(0x0130), ord('i')) 11047db96d56Sopenharmony_ci self.assertFalse(_sre.ascii_iscased(0x0130)) 11057db96d56Sopenharmony_ci self.assertTrue(_sre.unicode_iscased(0x0130)) 11067db96d56Sopenharmony_ci 11077db96d56Sopenharmony_ci def test_not_literal(self): 11087db96d56Sopenharmony_ci self.assertEqual(re.search(r"\s([^a])", " b").group(1), "b") 11097db96d56Sopenharmony_ci self.assertEqual(re.search(r"\s([^a]*)", " bb").group(1), "bb") 11107db96d56Sopenharmony_ci 11117db96d56Sopenharmony_ci def test_possible_set_operations(self): 11127db96d56Sopenharmony_ci s = bytes(range(128)).decode() 11137db96d56Sopenharmony_ci with self.assertWarns(FutureWarning): 11147db96d56Sopenharmony_ci p = re.compile(r'[0-9--1]') 11157db96d56Sopenharmony_ci self.assertEqual(p.findall(s), list('-./0123456789')) 11167db96d56Sopenharmony_ci self.assertEqual(re.findall(r'[--1]', s), list('-./01')) 11177db96d56Sopenharmony_ci with self.assertWarns(FutureWarning): 11187db96d56Sopenharmony_ci p = re.compile(r'[%--1]') 11197db96d56Sopenharmony_ci self.assertEqual(p.findall(s), list("%&'()*+,-1")) 11207db96d56Sopenharmony_ci with self.assertWarns(FutureWarning): 11217db96d56Sopenharmony_ci p = re.compile(r'[%--]') 11227db96d56Sopenharmony_ci self.assertEqual(p.findall(s), list("%&'()*+,-")) 11237db96d56Sopenharmony_ci 11247db96d56Sopenharmony_ci with self.assertWarns(FutureWarning): 11257db96d56Sopenharmony_ci p = re.compile(r'[0-9&&1]') 11267db96d56Sopenharmony_ci self.assertEqual(p.findall(s), list('&0123456789')) 11277db96d56Sopenharmony_ci with self.assertWarns(FutureWarning): 11287db96d56Sopenharmony_ci p = re.compile(r'[\d&&1]') 11297db96d56Sopenharmony_ci self.assertEqual(p.findall(s), list('&0123456789')) 11307db96d56Sopenharmony_ci self.assertEqual(re.findall(r'[&&1]', s), list('&1')) 11317db96d56Sopenharmony_ci 11327db96d56Sopenharmony_ci with self.assertWarns(FutureWarning): 11337db96d56Sopenharmony_ci p = re.compile(r'[0-9||a]') 11347db96d56Sopenharmony_ci self.assertEqual(p.findall(s), list('0123456789a|')) 11357db96d56Sopenharmony_ci with self.assertWarns(FutureWarning): 11367db96d56Sopenharmony_ci p = re.compile(r'[\d||a]') 11377db96d56Sopenharmony_ci self.assertEqual(p.findall(s), list('0123456789a|')) 11387db96d56Sopenharmony_ci self.assertEqual(re.findall(r'[||1]', s), list('1|')) 11397db96d56Sopenharmony_ci 11407db96d56Sopenharmony_ci with self.assertWarns(FutureWarning): 11417db96d56Sopenharmony_ci p = re.compile(r'[0-9~~1]') 11427db96d56Sopenharmony_ci self.assertEqual(p.findall(s), list('0123456789~')) 11437db96d56Sopenharmony_ci with self.assertWarns(FutureWarning): 11447db96d56Sopenharmony_ci p = re.compile(r'[\d~~1]') 11457db96d56Sopenharmony_ci self.assertEqual(p.findall(s), list('0123456789~')) 11467db96d56Sopenharmony_ci self.assertEqual(re.findall(r'[~~1]', s), list('1~')) 11477db96d56Sopenharmony_ci 11487db96d56Sopenharmony_ci with self.assertWarns(FutureWarning): 11497db96d56Sopenharmony_ci p = re.compile(r'[[0-9]|]') 11507db96d56Sopenharmony_ci self.assertEqual(p.findall(s), list('0123456789[]')) 11517db96d56Sopenharmony_ci 11527db96d56Sopenharmony_ci with self.assertWarns(FutureWarning): 11537db96d56Sopenharmony_ci p = re.compile(r'[[:digit:]|]') 11547db96d56Sopenharmony_ci self.assertEqual(p.findall(s), list(':[]dgit')) 11557db96d56Sopenharmony_ci 11567db96d56Sopenharmony_ci def test_search_coverage(self): 11577db96d56Sopenharmony_ci self.assertEqual(re.search(r"\s(b)", " b").group(1), "b") 11587db96d56Sopenharmony_ci self.assertEqual(re.search(r"a\s", "a ").group(0), "a ") 11597db96d56Sopenharmony_ci 11607db96d56Sopenharmony_ci def assertMatch(self, pattern, text, match=None, span=None, 11617db96d56Sopenharmony_ci matcher=re.fullmatch): 11627db96d56Sopenharmony_ci if match is None and span is None: 11637db96d56Sopenharmony_ci # the pattern matches the whole text 11647db96d56Sopenharmony_ci match = text 11657db96d56Sopenharmony_ci span = (0, len(text)) 11667db96d56Sopenharmony_ci elif match is None or span is None: 11677db96d56Sopenharmony_ci raise ValueError('If match is not None, span should be specified ' 11687db96d56Sopenharmony_ci '(and vice versa).') 11697db96d56Sopenharmony_ci m = matcher(pattern, text) 11707db96d56Sopenharmony_ci self.assertTrue(m) 11717db96d56Sopenharmony_ci self.assertEqual(m.group(), match) 11727db96d56Sopenharmony_ci self.assertEqual(m.span(), span) 11737db96d56Sopenharmony_ci 11747db96d56Sopenharmony_ci LITERAL_CHARS = string.ascii_letters + string.digits + '!"%\',/:;<=>@_`' 11757db96d56Sopenharmony_ci 11767db96d56Sopenharmony_ci def test_re_escape(self): 11777db96d56Sopenharmony_ci p = ''.join(chr(i) for i in range(256)) 11787db96d56Sopenharmony_ci for c in p: 11797db96d56Sopenharmony_ci self.assertMatch(re.escape(c), c) 11807db96d56Sopenharmony_ci self.assertMatch('[' + re.escape(c) + ']', c) 11817db96d56Sopenharmony_ci self.assertMatch('(?x)' + re.escape(c), c) 11827db96d56Sopenharmony_ci self.assertMatch(re.escape(p), p) 11837db96d56Sopenharmony_ci for c in '-.]{}': 11847db96d56Sopenharmony_ci self.assertEqual(re.escape(c)[:1], '\\') 11857db96d56Sopenharmony_ci literal_chars = self.LITERAL_CHARS 11867db96d56Sopenharmony_ci self.assertEqual(re.escape(literal_chars), literal_chars) 11877db96d56Sopenharmony_ci 11887db96d56Sopenharmony_ci def test_re_escape_bytes(self): 11897db96d56Sopenharmony_ci p = bytes(range(256)) 11907db96d56Sopenharmony_ci for i in p: 11917db96d56Sopenharmony_ci b = bytes([i]) 11927db96d56Sopenharmony_ci self.assertMatch(re.escape(b), b) 11937db96d56Sopenharmony_ci self.assertMatch(b'[' + re.escape(b) + b']', b) 11947db96d56Sopenharmony_ci self.assertMatch(b'(?x)' + re.escape(b), b) 11957db96d56Sopenharmony_ci self.assertMatch(re.escape(p), p) 11967db96d56Sopenharmony_ci for i in b'-.]{}': 11977db96d56Sopenharmony_ci b = bytes([i]) 11987db96d56Sopenharmony_ci self.assertEqual(re.escape(b)[:1], b'\\') 11997db96d56Sopenharmony_ci literal_chars = self.LITERAL_CHARS.encode('ascii') 12007db96d56Sopenharmony_ci self.assertEqual(re.escape(literal_chars), literal_chars) 12017db96d56Sopenharmony_ci 12027db96d56Sopenharmony_ci def test_re_escape_non_ascii(self): 12037db96d56Sopenharmony_ci s = 'xxx\u2620\u2620\u2620xxx' 12047db96d56Sopenharmony_ci s_escaped = re.escape(s) 12057db96d56Sopenharmony_ci self.assertEqual(s_escaped, s) 12067db96d56Sopenharmony_ci self.assertMatch(s_escaped, s) 12077db96d56Sopenharmony_ci self.assertMatch('.%s+.' % re.escape('\u2620'), s, 12087db96d56Sopenharmony_ci 'x\u2620\u2620\u2620x', (2, 7), re.search) 12097db96d56Sopenharmony_ci 12107db96d56Sopenharmony_ci def test_re_escape_non_ascii_bytes(self): 12117db96d56Sopenharmony_ci b = 'y\u2620y\u2620y'.encode('utf-8') 12127db96d56Sopenharmony_ci b_escaped = re.escape(b) 12137db96d56Sopenharmony_ci self.assertEqual(b_escaped, b) 12147db96d56Sopenharmony_ci self.assertMatch(b_escaped, b) 12157db96d56Sopenharmony_ci res = re.findall(re.escape('\u2620'.encode('utf-8')), b) 12167db96d56Sopenharmony_ci self.assertEqual(len(res), 2) 12177db96d56Sopenharmony_ci 12187db96d56Sopenharmony_ci def test_pickling(self): 12197db96d56Sopenharmony_ci import pickle 12207db96d56Sopenharmony_ci oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)', re.UNICODE) 12217db96d56Sopenharmony_ci for proto in range(pickle.HIGHEST_PROTOCOL + 1): 12227db96d56Sopenharmony_ci pickled = pickle.dumps(oldpat, proto) 12237db96d56Sopenharmony_ci newpat = pickle.loads(pickled) 12247db96d56Sopenharmony_ci self.assertEqual(newpat, oldpat) 12257db96d56Sopenharmony_ci # current pickle expects the _compile() reconstructor in re module 12267db96d56Sopenharmony_ci from re import _compile 12277db96d56Sopenharmony_ci 12287db96d56Sopenharmony_ci def test_copying(self): 12297db96d56Sopenharmony_ci import copy 12307db96d56Sopenharmony_ci p = re.compile(r'(?P<int>\d+)(?:\.(?P<frac>\d*))?') 12317db96d56Sopenharmony_ci self.assertIs(copy.copy(p), p) 12327db96d56Sopenharmony_ci self.assertIs(copy.deepcopy(p), p) 12337db96d56Sopenharmony_ci m = p.match('12.34') 12347db96d56Sopenharmony_ci self.assertIs(copy.copy(m), m) 12357db96d56Sopenharmony_ci self.assertIs(copy.deepcopy(m), m) 12367db96d56Sopenharmony_ci 12377db96d56Sopenharmony_ci def test_constants(self): 12387db96d56Sopenharmony_ci self.assertEqual(re.I, re.IGNORECASE) 12397db96d56Sopenharmony_ci self.assertEqual(re.L, re.LOCALE) 12407db96d56Sopenharmony_ci self.assertEqual(re.M, re.MULTILINE) 12417db96d56Sopenharmony_ci self.assertEqual(re.S, re.DOTALL) 12427db96d56Sopenharmony_ci self.assertEqual(re.X, re.VERBOSE) 12437db96d56Sopenharmony_ci 12447db96d56Sopenharmony_ci def test_flags(self): 12457db96d56Sopenharmony_ci for flag in [re.I, re.M, re.X, re.S, re.A, re.U]: 12467db96d56Sopenharmony_ci self.assertTrue(re.compile('^pattern$', flag)) 12477db96d56Sopenharmony_ci for flag in [re.I, re.M, re.X, re.S, re.A, re.L]: 12487db96d56Sopenharmony_ci self.assertTrue(re.compile(b'^pattern$', flag)) 12497db96d56Sopenharmony_ci 12507db96d56Sopenharmony_ci def test_sre_character_literals(self): 12517db96d56Sopenharmony_ci for i in [0, 8, 16, 32, 64, 127, 128, 255, 256, 0xFFFF, 0x10000, 0x10FFFF]: 12527db96d56Sopenharmony_ci if i < 256: 12537db96d56Sopenharmony_ci self.assertTrue(re.match(r"\%03o" % i, chr(i))) 12547db96d56Sopenharmony_ci self.assertTrue(re.match(r"\%03o0" % i, chr(i)+"0")) 12557db96d56Sopenharmony_ci self.assertTrue(re.match(r"\%03o8" % i, chr(i)+"8")) 12567db96d56Sopenharmony_ci self.assertTrue(re.match(r"\x%02x" % i, chr(i))) 12577db96d56Sopenharmony_ci self.assertTrue(re.match(r"\x%02x0" % i, chr(i)+"0")) 12587db96d56Sopenharmony_ci self.assertTrue(re.match(r"\x%02xz" % i, chr(i)+"z")) 12597db96d56Sopenharmony_ci if i < 0x10000: 12607db96d56Sopenharmony_ci self.assertTrue(re.match(r"\u%04x" % i, chr(i))) 12617db96d56Sopenharmony_ci self.assertTrue(re.match(r"\u%04x0" % i, chr(i)+"0")) 12627db96d56Sopenharmony_ci self.assertTrue(re.match(r"\u%04xz" % i, chr(i)+"z")) 12637db96d56Sopenharmony_ci self.assertTrue(re.match(r"\U%08x" % i, chr(i))) 12647db96d56Sopenharmony_ci self.assertTrue(re.match(r"\U%08x0" % i, chr(i)+"0")) 12657db96d56Sopenharmony_ci self.assertTrue(re.match(r"\U%08xz" % i, chr(i)+"z")) 12667db96d56Sopenharmony_ci self.assertTrue(re.match(r"\0", "\000")) 12677db96d56Sopenharmony_ci self.assertTrue(re.match(r"\08", "\0008")) 12687db96d56Sopenharmony_ci self.assertTrue(re.match(r"\01", "\001")) 12697db96d56Sopenharmony_ci self.assertTrue(re.match(r"\018", "\0018")) 12707db96d56Sopenharmony_ci self.checkPatternError(r"\567", 12717db96d56Sopenharmony_ci r'octal escape value \567 outside of ' 12727db96d56Sopenharmony_ci r'range 0-0o377', 0) 12737db96d56Sopenharmony_ci self.checkPatternError(r"\911", 'invalid group reference 91', 1) 12747db96d56Sopenharmony_ci self.checkPatternError(r"\x1", r'incomplete escape \x1', 0) 12757db96d56Sopenharmony_ci self.checkPatternError(r"\x1z", r'incomplete escape \x1', 0) 12767db96d56Sopenharmony_ci self.checkPatternError(r"\u123", r'incomplete escape \u123', 0) 12777db96d56Sopenharmony_ci self.checkPatternError(r"\u123z", r'incomplete escape \u123', 0) 12787db96d56Sopenharmony_ci self.checkPatternError(r"\U0001234", r'incomplete escape \U0001234', 0) 12797db96d56Sopenharmony_ci self.checkPatternError(r"\U0001234z", r'incomplete escape \U0001234', 0) 12807db96d56Sopenharmony_ci self.checkPatternError(r"\U00110000", r'bad escape \U00110000', 0) 12817db96d56Sopenharmony_ci 12827db96d56Sopenharmony_ci def test_sre_character_class_literals(self): 12837db96d56Sopenharmony_ci for i in [0, 8, 16, 32, 64, 127, 128, 255, 256, 0xFFFF, 0x10000, 0x10FFFF]: 12847db96d56Sopenharmony_ci if i < 256: 12857db96d56Sopenharmony_ci self.assertTrue(re.match(r"[\%o]" % i, chr(i))) 12867db96d56Sopenharmony_ci self.assertTrue(re.match(r"[\%o8]" % i, chr(i))) 12877db96d56Sopenharmony_ci self.assertTrue(re.match(r"[\%03o]" % i, chr(i))) 12887db96d56Sopenharmony_ci self.assertTrue(re.match(r"[\%03o0]" % i, chr(i))) 12897db96d56Sopenharmony_ci self.assertTrue(re.match(r"[\%03o8]" % i, chr(i))) 12907db96d56Sopenharmony_ci self.assertTrue(re.match(r"[\x%02x]" % i, chr(i))) 12917db96d56Sopenharmony_ci self.assertTrue(re.match(r"[\x%02x0]" % i, chr(i))) 12927db96d56Sopenharmony_ci self.assertTrue(re.match(r"[\x%02xz]" % i, chr(i))) 12937db96d56Sopenharmony_ci if i < 0x10000: 12947db96d56Sopenharmony_ci self.assertTrue(re.match(r"[\u%04x]" % i, chr(i))) 12957db96d56Sopenharmony_ci self.assertTrue(re.match(r"[\u%04x0]" % i, chr(i))) 12967db96d56Sopenharmony_ci self.assertTrue(re.match(r"[\u%04xz]" % i, chr(i))) 12977db96d56Sopenharmony_ci self.assertTrue(re.match(r"[\U%08x]" % i, chr(i))) 12987db96d56Sopenharmony_ci self.assertTrue(re.match(r"[\U%08x0]" % i, chr(i)+"0")) 12997db96d56Sopenharmony_ci self.assertTrue(re.match(r"[\U%08xz]" % i, chr(i)+"z")) 13007db96d56Sopenharmony_ci self.checkPatternError(r"[\567]", 13017db96d56Sopenharmony_ci r'octal escape value \567 outside of ' 13027db96d56Sopenharmony_ci r'range 0-0o377', 1) 13037db96d56Sopenharmony_ci self.checkPatternError(r"[\911]", r'bad escape \9', 1) 13047db96d56Sopenharmony_ci self.checkPatternError(r"[\x1z]", r'incomplete escape \x1', 1) 13057db96d56Sopenharmony_ci self.checkPatternError(r"[\u123z]", r'incomplete escape \u123', 1) 13067db96d56Sopenharmony_ci self.checkPatternError(r"[\U0001234z]", r'incomplete escape \U0001234', 1) 13077db96d56Sopenharmony_ci self.checkPatternError(r"[\U00110000]", r'bad escape \U00110000', 1) 13087db96d56Sopenharmony_ci self.assertTrue(re.match(r"[\U0001d49c-\U0001d4b5]", "\U0001d49e")) 13097db96d56Sopenharmony_ci 13107db96d56Sopenharmony_ci def test_sre_byte_literals(self): 13117db96d56Sopenharmony_ci for i in [0, 8, 16, 32, 64, 127, 128, 255]: 13127db96d56Sopenharmony_ci self.assertTrue(re.match((r"\%03o" % i).encode(), bytes([i]))) 13137db96d56Sopenharmony_ci self.assertTrue(re.match((r"\%03o0" % i).encode(), bytes([i])+b"0")) 13147db96d56Sopenharmony_ci self.assertTrue(re.match((r"\%03o8" % i).encode(), bytes([i])+b"8")) 13157db96d56Sopenharmony_ci self.assertTrue(re.match((r"\x%02x" % i).encode(), bytes([i]))) 13167db96d56Sopenharmony_ci self.assertTrue(re.match((r"\x%02x0" % i).encode(), bytes([i])+b"0")) 13177db96d56Sopenharmony_ci self.assertTrue(re.match((r"\x%02xz" % i).encode(), bytes([i])+b"z")) 13187db96d56Sopenharmony_ci self.assertRaises(re.error, re.compile, br"\u1234") 13197db96d56Sopenharmony_ci self.assertRaises(re.error, re.compile, br"\U00012345") 13207db96d56Sopenharmony_ci self.assertTrue(re.match(br"\0", b"\000")) 13217db96d56Sopenharmony_ci self.assertTrue(re.match(br"\08", b"\0008")) 13227db96d56Sopenharmony_ci self.assertTrue(re.match(br"\01", b"\001")) 13237db96d56Sopenharmony_ci self.assertTrue(re.match(br"\018", b"\0018")) 13247db96d56Sopenharmony_ci self.checkPatternError(br"\567", 13257db96d56Sopenharmony_ci r'octal escape value \567 outside of ' 13267db96d56Sopenharmony_ci r'range 0-0o377', 0) 13277db96d56Sopenharmony_ci self.checkPatternError(br"\911", 'invalid group reference 91', 1) 13287db96d56Sopenharmony_ci self.checkPatternError(br"\x1", r'incomplete escape \x1', 0) 13297db96d56Sopenharmony_ci self.checkPatternError(br"\x1z", r'incomplete escape \x1', 0) 13307db96d56Sopenharmony_ci 13317db96d56Sopenharmony_ci def test_sre_byte_class_literals(self): 13327db96d56Sopenharmony_ci for i in [0, 8, 16, 32, 64, 127, 128, 255]: 13337db96d56Sopenharmony_ci self.assertTrue(re.match((r"[\%o]" % i).encode(), bytes([i]))) 13347db96d56Sopenharmony_ci self.assertTrue(re.match((r"[\%o8]" % i).encode(), bytes([i]))) 13357db96d56Sopenharmony_ci self.assertTrue(re.match((r"[\%03o]" % i).encode(), bytes([i]))) 13367db96d56Sopenharmony_ci self.assertTrue(re.match((r"[\%03o0]" % i).encode(), bytes([i]))) 13377db96d56Sopenharmony_ci self.assertTrue(re.match((r"[\%03o8]" % i).encode(), bytes([i]))) 13387db96d56Sopenharmony_ci self.assertTrue(re.match((r"[\x%02x]" % i).encode(), bytes([i]))) 13397db96d56Sopenharmony_ci self.assertTrue(re.match((r"[\x%02x0]" % i).encode(), bytes([i]))) 13407db96d56Sopenharmony_ci self.assertTrue(re.match((r"[\x%02xz]" % i).encode(), bytes([i]))) 13417db96d56Sopenharmony_ci self.assertRaises(re.error, re.compile, br"[\u1234]") 13427db96d56Sopenharmony_ci self.assertRaises(re.error, re.compile, br"[\U00012345]") 13437db96d56Sopenharmony_ci self.checkPatternError(br"[\567]", 13447db96d56Sopenharmony_ci r'octal escape value \567 outside of ' 13457db96d56Sopenharmony_ci r'range 0-0o377', 1) 13467db96d56Sopenharmony_ci self.checkPatternError(br"[\911]", r'bad escape \9', 1) 13477db96d56Sopenharmony_ci self.checkPatternError(br"[\x1z]", r'incomplete escape \x1', 1) 13487db96d56Sopenharmony_ci 13497db96d56Sopenharmony_ci def test_character_set_errors(self): 13507db96d56Sopenharmony_ci self.checkPatternError(r'[', 'unterminated character set', 0) 13517db96d56Sopenharmony_ci self.checkPatternError(r'[^', 'unterminated character set', 0) 13527db96d56Sopenharmony_ci self.checkPatternError(r'[a', 'unterminated character set', 0) 13537db96d56Sopenharmony_ci # bug 545855 -- This pattern failed to cause a compile error as it 13547db96d56Sopenharmony_ci # should, instead provoking a TypeError. 13557db96d56Sopenharmony_ci self.checkPatternError(r"[a-", 'unterminated character set', 0) 13567db96d56Sopenharmony_ci self.checkPatternError(r"[\w-b]", r'bad character range \w-b', 1) 13577db96d56Sopenharmony_ci self.checkPatternError(r"[a-\w]", r'bad character range a-\w', 1) 13587db96d56Sopenharmony_ci self.checkPatternError(r"[b-a]", 'bad character range b-a', 1) 13597db96d56Sopenharmony_ci 13607db96d56Sopenharmony_ci def test_bug_113254(self): 13617db96d56Sopenharmony_ci self.assertEqual(re.match(r'(a)|(b)', 'b').start(1), -1) 13627db96d56Sopenharmony_ci self.assertEqual(re.match(r'(a)|(b)', 'b').end(1), -1) 13637db96d56Sopenharmony_ci self.assertEqual(re.match(r'(a)|(b)', 'b').span(1), (-1, -1)) 13647db96d56Sopenharmony_ci 13657db96d56Sopenharmony_ci def test_bug_527371(self): 13667db96d56Sopenharmony_ci # bug described in patches 527371/672491 13677db96d56Sopenharmony_ci self.assertIsNone(re.match(r'(a)?a','a').lastindex) 13687db96d56Sopenharmony_ci self.assertEqual(re.match(r'(a)(b)?b','ab').lastindex, 1) 13697db96d56Sopenharmony_ci self.assertEqual(re.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup, 'a') 13707db96d56Sopenharmony_ci self.assertEqual(re.match(r"(?P<a>a(b))", "ab").lastgroup, 'a') 13717db96d56Sopenharmony_ci self.assertEqual(re.match(r"((a))", "a").lastindex, 1) 13727db96d56Sopenharmony_ci 13737db96d56Sopenharmony_ci def test_bug_418626(self): 13747db96d56Sopenharmony_ci # bugs 418626 at al. -- Testing Greg Chapman's addition of op code 13757db96d56Sopenharmony_ci # SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of 13767db96d56Sopenharmony_ci # pattern '*?' on a long string. 13777db96d56Sopenharmony_ci self.assertEqual(re.match('.*?c', 10000*'ab'+'cd').end(0), 20001) 13787db96d56Sopenharmony_ci self.assertEqual(re.match('.*?cd', 5000*'ab'+'c'+5000*'ab'+'cde').end(0), 13797db96d56Sopenharmony_ci 20003) 13807db96d56Sopenharmony_ci self.assertEqual(re.match('.*?cd', 20000*'abc'+'de').end(0), 60001) 13817db96d56Sopenharmony_ci # non-simple '*?' still used to hit the recursion limit, before the 13827db96d56Sopenharmony_ci # non-recursive scheme was implemented. 13837db96d56Sopenharmony_ci self.assertEqual(re.search('(a|b)*?c', 10000*'ab'+'cd').end(0), 20001) 13847db96d56Sopenharmony_ci 13857db96d56Sopenharmony_ci def test_bug_612074(self): 13867db96d56Sopenharmony_ci pat="["+re.escape("\u2039")+"]" 13877db96d56Sopenharmony_ci self.assertEqual(re.compile(pat) and 1, 1) 13887db96d56Sopenharmony_ci 13897db96d56Sopenharmony_ci def test_stack_overflow(self): 13907db96d56Sopenharmony_ci # nasty cases that used to overflow the straightforward recursive 13917db96d56Sopenharmony_ci # implementation of repeated groups. 13927db96d56Sopenharmony_ci self.assertEqual(re.match('(x)*', 50000*'x').group(1), 'x') 13937db96d56Sopenharmony_ci self.assertEqual(re.match('(x)*y', 50000*'x'+'y').group(1), 'x') 13947db96d56Sopenharmony_ci self.assertEqual(re.match('(x)*?y', 50000*'x'+'y').group(1), 'x') 13957db96d56Sopenharmony_ci 13967db96d56Sopenharmony_ci def test_nothing_to_repeat(self): 13977db96d56Sopenharmony_ci for reps in '*', '+', '?', '{1,2}': 13987db96d56Sopenharmony_ci for mod in '', '?': 13997db96d56Sopenharmony_ci self.checkPatternError('%s%s' % (reps, mod), 14007db96d56Sopenharmony_ci 'nothing to repeat', 0) 14017db96d56Sopenharmony_ci self.checkPatternError('(?:%s%s)' % (reps, mod), 14027db96d56Sopenharmony_ci 'nothing to repeat', 3) 14037db96d56Sopenharmony_ci 14047db96d56Sopenharmony_ci def test_multiple_repeat(self): 14057db96d56Sopenharmony_ci for outer_reps in '*', '+', '?', '{1,2}': 14067db96d56Sopenharmony_ci for outer_mod in '', '?', '+': 14077db96d56Sopenharmony_ci outer_op = outer_reps + outer_mod 14087db96d56Sopenharmony_ci for inner_reps in '*', '+', '?', '{1,2}': 14097db96d56Sopenharmony_ci for inner_mod in '', '?', '+': 14107db96d56Sopenharmony_ci if inner_mod + outer_reps in ('?', '+'): 14117db96d56Sopenharmony_ci continue 14127db96d56Sopenharmony_ci inner_op = inner_reps + inner_mod 14137db96d56Sopenharmony_ci self.checkPatternError(r'x%s%s' % (inner_op, outer_op), 14147db96d56Sopenharmony_ci 'multiple repeat', 1 + len(inner_op)) 14157db96d56Sopenharmony_ci 14167db96d56Sopenharmony_ci def test_unlimited_zero_width_repeat(self): 14177db96d56Sopenharmony_ci # Issue #9669 14187db96d56Sopenharmony_ci self.assertIsNone(re.match(r'(?:a?)*y', 'z')) 14197db96d56Sopenharmony_ci self.assertIsNone(re.match(r'(?:a?)+y', 'z')) 14207db96d56Sopenharmony_ci self.assertIsNone(re.match(r'(?:a?){2,}y', 'z')) 14217db96d56Sopenharmony_ci self.assertIsNone(re.match(r'(?:a?)*?y', 'z')) 14227db96d56Sopenharmony_ci self.assertIsNone(re.match(r'(?:a?)+?y', 'z')) 14237db96d56Sopenharmony_ci self.assertIsNone(re.match(r'(?:a?){2,}?y', 'z')) 14247db96d56Sopenharmony_ci 14257db96d56Sopenharmony_ci def test_scanner(self): 14267db96d56Sopenharmony_ci def s_ident(scanner, token): return token 14277db96d56Sopenharmony_ci def s_operator(scanner, token): return "op%s" % token 14287db96d56Sopenharmony_ci def s_float(scanner, token): return float(token) 14297db96d56Sopenharmony_ci def s_int(scanner, token): return int(token) 14307db96d56Sopenharmony_ci 14317db96d56Sopenharmony_ci scanner = Scanner([ 14327db96d56Sopenharmony_ci (r"[a-zA-Z_]\w*", s_ident), 14337db96d56Sopenharmony_ci (r"\d+\.\d*", s_float), 14347db96d56Sopenharmony_ci (r"\d+", s_int), 14357db96d56Sopenharmony_ci (r"=|\+|-|\*|/", s_operator), 14367db96d56Sopenharmony_ci (r"\s+", None), 14377db96d56Sopenharmony_ci ]) 14387db96d56Sopenharmony_ci 14397db96d56Sopenharmony_ci self.assertTrue(scanner.scanner.scanner("").pattern) 14407db96d56Sopenharmony_ci 14417db96d56Sopenharmony_ci self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"), 14427db96d56Sopenharmony_ci (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5, 14437db96d56Sopenharmony_ci 'op+', 'bar'], '')) 14447db96d56Sopenharmony_ci 14457db96d56Sopenharmony_ci def test_bug_448951(self): 14467db96d56Sopenharmony_ci # bug 448951 (similar to 429357, but with single char match) 14477db96d56Sopenharmony_ci # (Also test greedy matches.) 14487db96d56Sopenharmony_ci for op in '','?','*': 14497db96d56Sopenharmony_ci self.assertEqual(re.match(r'((.%s):)?z'%op, 'z').groups(), 14507db96d56Sopenharmony_ci (None, None)) 14517db96d56Sopenharmony_ci self.assertEqual(re.match(r'((.%s):)?z'%op, 'a:z').groups(), 14527db96d56Sopenharmony_ci ('a:', 'a')) 14537db96d56Sopenharmony_ci 14547db96d56Sopenharmony_ci def test_bug_725106(self): 14557db96d56Sopenharmony_ci # capturing groups in alternatives in repeats 14567db96d56Sopenharmony_ci self.assertEqual(re.match('^((a)|b)*', 'abc').groups(), 14577db96d56Sopenharmony_ci ('b', 'a')) 14587db96d56Sopenharmony_ci self.assertEqual(re.match('^(([ab])|c)*', 'abc').groups(), 14597db96d56Sopenharmony_ci ('c', 'b')) 14607db96d56Sopenharmony_ci self.assertEqual(re.match('^((d)|[ab])*', 'abc').groups(), 14617db96d56Sopenharmony_ci ('b', None)) 14627db96d56Sopenharmony_ci self.assertEqual(re.match('^((a)c|[ab])*', 'abc').groups(), 14637db96d56Sopenharmony_ci ('b', None)) 14647db96d56Sopenharmony_ci self.assertEqual(re.match('^((a)|b)*?c', 'abc').groups(), 14657db96d56Sopenharmony_ci ('b', 'a')) 14667db96d56Sopenharmony_ci self.assertEqual(re.match('^(([ab])|c)*?d', 'abcd').groups(), 14677db96d56Sopenharmony_ci ('c', 'b')) 14687db96d56Sopenharmony_ci self.assertEqual(re.match('^((d)|[ab])*?c', 'abc').groups(), 14697db96d56Sopenharmony_ci ('b', None)) 14707db96d56Sopenharmony_ci self.assertEqual(re.match('^((a)c|[ab])*?c', 'abc').groups(), 14717db96d56Sopenharmony_ci ('b', None)) 14727db96d56Sopenharmony_ci 14737db96d56Sopenharmony_ci def test_bug_725149(self): 14747db96d56Sopenharmony_ci # mark_stack_base restoring before restoring marks 14757db96d56Sopenharmony_ci self.assertEqual(re.match('(a)(?:(?=(b)*)c)*', 'abb').groups(), 14767db96d56Sopenharmony_ci ('a', None)) 14777db96d56Sopenharmony_ci self.assertEqual(re.match('(a)((?!(b)*))*', 'abb').groups(), 14787db96d56Sopenharmony_ci ('a', None, None)) 14797db96d56Sopenharmony_ci 14807db96d56Sopenharmony_ci def test_bug_764548(self): 14817db96d56Sopenharmony_ci # bug 764548, re.compile() barfs on str/unicode subclasses 14827db96d56Sopenharmony_ci class my_unicode(str): pass 14837db96d56Sopenharmony_ci pat = re.compile(my_unicode("abc")) 14847db96d56Sopenharmony_ci self.assertIsNone(pat.match("xyz")) 14857db96d56Sopenharmony_ci 14867db96d56Sopenharmony_ci def test_finditer(self): 14877db96d56Sopenharmony_ci iter = re.finditer(r":+", "a:b::c:::d") 14887db96d56Sopenharmony_ci self.assertEqual([item.group(0) for item in iter], 14897db96d56Sopenharmony_ci [":", "::", ":::"]) 14907db96d56Sopenharmony_ci 14917db96d56Sopenharmony_ci pat = re.compile(r":+") 14927db96d56Sopenharmony_ci iter = pat.finditer("a:b::c:::d", 1, 10) 14937db96d56Sopenharmony_ci self.assertEqual([item.group(0) for item in iter], 14947db96d56Sopenharmony_ci [":", "::", ":::"]) 14957db96d56Sopenharmony_ci 14967db96d56Sopenharmony_ci pat = re.compile(r":+") 14977db96d56Sopenharmony_ci iter = pat.finditer("a:b::c:::d", pos=1, endpos=10) 14987db96d56Sopenharmony_ci self.assertEqual([item.group(0) for item in iter], 14997db96d56Sopenharmony_ci [":", "::", ":::"]) 15007db96d56Sopenharmony_ci 15017db96d56Sopenharmony_ci pat = re.compile(r":+") 15027db96d56Sopenharmony_ci iter = pat.finditer("a:b::c:::d", endpos=10, pos=1) 15037db96d56Sopenharmony_ci self.assertEqual([item.group(0) for item in iter], 15047db96d56Sopenharmony_ci [":", "::", ":::"]) 15057db96d56Sopenharmony_ci 15067db96d56Sopenharmony_ci pat = re.compile(r":+") 15077db96d56Sopenharmony_ci iter = pat.finditer("a:b::c:::d", pos=3, endpos=8) 15087db96d56Sopenharmony_ci self.assertEqual([item.group(0) for item in iter], 15097db96d56Sopenharmony_ci ["::", "::"]) 15107db96d56Sopenharmony_ci 15117db96d56Sopenharmony_ci def test_bug_926075(self): 15127db96d56Sopenharmony_ci self.assertIsNot(re.compile('bug_926075'), 15137db96d56Sopenharmony_ci re.compile(b'bug_926075')) 15147db96d56Sopenharmony_ci 15157db96d56Sopenharmony_ci def test_bug_931848(self): 15167db96d56Sopenharmony_ci pattern = "[\u002E\u3002\uFF0E\uFF61]" 15177db96d56Sopenharmony_ci self.assertEqual(re.compile(pattern).split("a.b.c"), 15187db96d56Sopenharmony_ci ['a','b','c']) 15197db96d56Sopenharmony_ci 15207db96d56Sopenharmony_ci def test_bug_581080(self): 15217db96d56Sopenharmony_ci iter = re.finditer(r"\s", "a b") 15227db96d56Sopenharmony_ci self.assertEqual(next(iter).span(), (1,2)) 15237db96d56Sopenharmony_ci self.assertRaises(StopIteration, next, iter) 15247db96d56Sopenharmony_ci 15257db96d56Sopenharmony_ci scanner = re.compile(r"\s").scanner("a b") 15267db96d56Sopenharmony_ci self.assertEqual(scanner.search().span(), (1, 2)) 15277db96d56Sopenharmony_ci self.assertIsNone(scanner.search()) 15287db96d56Sopenharmony_ci 15297db96d56Sopenharmony_ci def test_bug_817234(self): 15307db96d56Sopenharmony_ci iter = re.finditer(r".*", "asdf") 15317db96d56Sopenharmony_ci self.assertEqual(next(iter).span(), (0, 4)) 15327db96d56Sopenharmony_ci self.assertEqual(next(iter).span(), (4, 4)) 15337db96d56Sopenharmony_ci self.assertRaises(StopIteration, next, iter) 15347db96d56Sopenharmony_ci 15357db96d56Sopenharmony_ci def test_bug_6561(self): 15367db96d56Sopenharmony_ci # '\d' should match characters in Unicode category 'Nd' 15377db96d56Sopenharmony_ci # (Number, Decimal Digit), but not those in 'Nl' (Number, 15387db96d56Sopenharmony_ci # Letter) or 'No' (Number, Other). 15397db96d56Sopenharmony_ci decimal_digits = [ 15407db96d56Sopenharmony_ci '\u0037', # '\N{DIGIT SEVEN}', category 'Nd' 15417db96d56Sopenharmony_ci '\u0e58', # '\N{THAI DIGIT SIX}', category 'Nd' 15427db96d56Sopenharmony_ci '\uff10', # '\N{FULLWIDTH DIGIT ZERO}', category 'Nd' 15437db96d56Sopenharmony_ci ] 15447db96d56Sopenharmony_ci for x in decimal_digits: 15457db96d56Sopenharmony_ci self.assertEqual(re.match(r'^\d$', x).group(0), x) 15467db96d56Sopenharmony_ci 15477db96d56Sopenharmony_ci not_decimal_digits = [ 15487db96d56Sopenharmony_ci '\u2165', # '\N{ROMAN NUMERAL SIX}', category 'Nl' 15497db96d56Sopenharmony_ci '\u3039', # '\N{HANGZHOU NUMERAL TWENTY}', category 'Nl' 15507db96d56Sopenharmony_ci '\u2082', # '\N{SUBSCRIPT TWO}', category 'No' 15517db96d56Sopenharmony_ci '\u32b4', # '\N{CIRCLED NUMBER THIRTY NINE}', category 'No' 15527db96d56Sopenharmony_ci ] 15537db96d56Sopenharmony_ci for x in not_decimal_digits: 15547db96d56Sopenharmony_ci self.assertIsNone(re.match(r'^\d$', x)) 15557db96d56Sopenharmony_ci 15567db96d56Sopenharmony_ci def test_empty_array(self): 15577db96d56Sopenharmony_ci # SF buf 1647541 15587db96d56Sopenharmony_ci import array 15597db96d56Sopenharmony_ci for typecode in 'bBuhHiIlLfd': 15607db96d56Sopenharmony_ci a = array.array(typecode) 15617db96d56Sopenharmony_ci self.assertIsNone(re.compile(b"bla").match(a)) 15627db96d56Sopenharmony_ci self.assertEqual(re.compile(b"").match(a).groups(), ()) 15637db96d56Sopenharmony_ci 15647db96d56Sopenharmony_ci def test_inline_flags(self): 15657db96d56Sopenharmony_ci # Bug #1700 15667db96d56Sopenharmony_ci upper_char = '\u1ea0' # Latin Capital Letter A with Dot Below 15677db96d56Sopenharmony_ci lower_char = '\u1ea1' # Latin Small Letter A with Dot Below 15687db96d56Sopenharmony_ci 15697db96d56Sopenharmony_ci p = re.compile('.' + upper_char, re.I | re.S) 15707db96d56Sopenharmony_ci q = p.match('\n' + lower_char) 15717db96d56Sopenharmony_ci self.assertTrue(q) 15727db96d56Sopenharmony_ci 15737db96d56Sopenharmony_ci p = re.compile('.' + lower_char, re.I | re.S) 15747db96d56Sopenharmony_ci q = p.match('\n' + upper_char) 15757db96d56Sopenharmony_ci self.assertTrue(q) 15767db96d56Sopenharmony_ci 15777db96d56Sopenharmony_ci p = re.compile('(?i).' + upper_char, re.S) 15787db96d56Sopenharmony_ci q = p.match('\n' + lower_char) 15797db96d56Sopenharmony_ci self.assertTrue(q) 15807db96d56Sopenharmony_ci 15817db96d56Sopenharmony_ci p = re.compile('(?i).' + lower_char, re.S) 15827db96d56Sopenharmony_ci q = p.match('\n' + upper_char) 15837db96d56Sopenharmony_ci self.assertTrue(q) 15847db96d56Sopenharmony_ci 15857db96d56Sopenharmony_ci p = re.compile('(?is).' + upper_char) 15867db96d56Sopenharmony_ci q = p.match('\n' + lower_char) 15877db96d56Sopenharmony_ci self.assertTrue(q) 15887db96d56Sopenharmony_ci 15897db96d56Sopenharmony_ci p = re.compile('(?is).' + lower_char) 15907db96d56Sopenharmony_ci q = p.match('\n' + upper_char) 15917db96d56Sopenharmony_ci self.assertTrue(q) 15927db96d56Sopenharmony_ci 15937db96d56Sopenharmony_ci p = re.compile('(?s)(?i).' + upper_char) 15947db96d56Sopenharmony_ci q = p.match('\n' + lower_char) 15957db96d56Sopenharmony_ci self.assertTrue(q) 15967db96d56Sopenharmony_ci 15977db96d56Sopenharmony_ci p = re.compile('(?s)(?i).' + lower_char) 15987db96d56Sopenharmony_ci q = p.match('\n' + upper_char) 15997db96d56Sopenharmony_ci self.assertTrue(q) 16007db96d56Sopenharmony_ci 16017db96d56Sopenharmony_ci self.assertTrue(re.match('(?ix) ' + upper_char, lower_char)) 16027db96d56Sopenharmony_ci self.assertTrue(re.match('(?ix) ' + lower_char, upper_char)) 16037db96d56Sopenharmony_ci self.assertTrue(re.match(' (?i) ' + upper_char, lower_char, re.X)) 16047db96d56Sopenharmony_ci self.assertTrue(re.match('(?x) (?i) ' + upper_char, lower_char)) 16057db96d56Sopenharmony_ci self.assertTrue(re.match(' (?x) (?i) ' + upper_char, lower_char, re.X)) 16067db96d56Sopenharmony_ci 16077db96d56Sopenharmony_ci msg = "global flags not at the start of the expression" 16087db96d56Sopenharmony_ci self.checkPatternError(upper_char + '(?i)', msg, 1) 16097db96d56Sopenharmony_ci 16107db96d56Sopenharmony_ci # bpo-30605: Compiling a bytes instance regex was throwing a BytesWarning 16117db96d56Sopenharmony_ci with warnings.catch_warnings(): 16127db96d56Sopenharmony_ci warnings.simplefilter('error', BytesWarning) 16137db96d56Sopenharmony_ci self.checkPatternError(b'A(?i)', msg, 1) 16147db96d56Sopenharmony_ci 16157db96d56Sopenharmony_ci self.checkPatternError('(?s).(?i)' + upper_char, msg, 5) 16167db96d56Sopenharmony_ci self.checkPatternError('(?i) ' + upper_char + ' (?x)', msg, 7) 16177db96d56Sopenharmony_ci self.checkPatternError(' (?x) (?i) ' + upper_char, msg, 1) 16187db96d56Sopenharmony_ci self.checkPatternError('^(?i)' + upper_char, msg, 1) 16197db96d56Sopenharmony_ci self.checkPatternError('$|(?i)' + upper_char, msg, 2) 16207db96d56Sopenharmony_ci self.checkPatternError('(?:(?i)' + upper_char + ')', msg, 3) 16217db96d56Sopenharmony_ci self.checkPatternError('(^)?(?(1)(?i)' + upper_char + ')', msg, 9) 16227db96d56Sopenharmony_ci self.checkPatternError('($)?(?(1)|(?i)' + upper_char + ')', msg, 10) 16237db96d56Sopenharmony_ci 16247db96d56Sopenharmony_ci 16257db96d56Sopenharmony_ci def test_dollar_matches_twice(self): 16267db96d56Sopenharmony_ci r"""Test that $ does not include \n 16277db96d56Sopenharmony_ci $ matches the end of string, and just before the terminating \n""" 16287db96d56Sopenharmony_ci pattern = re.compile('$') 16297db96d56Sopenharmony_ci self.assertEqual(pattern.sub('#', 'a\nb\n'), 'a\nb#\n#') 16307db96d56Sopenharmony_ci self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a\nb\nc#') 16317db96d56Sopenharmony_ci self.assertEqual(pattern.sub('#', '\n'), '#\n#') 16327db96d56Sopenharmony_ci 16337db96d56Sopenharmony_ci pattern = re.compile('$', re.MULTILINE) 16347db96d56Sopenharmony_ci self.assertEqual(pattern.sub('#', 'a\nb\n' ), 'a#\nb#\n#' ) 16357db96d56Sopenharmony_ci self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a#\nb#\nc#') 16367db96d56Sopenharmony_ci self.assertEqual(pattern.sub('#', '\n'), '#\n#') 16377db96d56Sopenharmony_ci 16387db96d56Sopenharmony_ci def test_bytes_str_mixing(self): 16397db96d56Sopenharmony_ci # Mixing str and bytes is disallowed 16407db96d56Sopenharmony_ci pat = re.compile('.') 16417db96d56Sopenharmony_ci bpat = re.compile(b'.') 16427db96d56Sopenharmony_ci self.assertRaises(TypeError, pat.match, b'b') 16437db96d56Sopenharmony_ci self.assertRaises(TypeError, bpat.match, 'b') 16447db96d56Sopenharmony_ci self.assertRaises(TypeError, pat.sub, b'b', 'c') 16457db96d56Sopenharmony_ci self.assertRaises(TypeError, pat.sub, 'b', b'c') 16467db96d56Sopenharmony_ci self.assertRaises(TypeError, pat.sub, b'b', b'c') 16477db96d56Sopenharmony_ci self.assertRaises(TypeError, bpat.sub, b'b', 'c') 16487db96d56Sopenharmony_ci self.assertRaises(TypeError, bpat.sub, 'b', b'c') 16497db96d56Sopenharmony_ci self.assertRaises(TypeError, bpat.sub, 'b', 'c') 16507db96d56Sopenharmony_ci 16517db96d56Sopenharmony_ci def test_ascii_and_unicode_flag(self): 16527db96d56Sopenharmony_ci # String patterns 16537db96d56Sopenharmony_ci for flags in (0, re.UNICODE): 16547db96d56Sopenharmony_ci pat = re.compile('\xc0', flags | re.IGNORECASE) 16557db96d56Sopenharmony_ci self.assertTrue(pat.match('\xe0')) 16567db96d56Sopenharmony_ci pat = re.compile(r'\w', flags) 16577db96d56Sopenharmony_ci self.assertTrue(pat.match('\xe0')) 16587db96d56Sopenharmony_ci pat = re.compile('\xc0', re.ASCII | re.IGNORECASE) 16597db96d56Sopenharmony_ci self.assertIsNone(pat.match('\xe0')) 16607db96d56Sopenharmony_ci pat = re.compile('(?a)\xc0', re.IGNORECASE) 16617db96d56Sopenharmony_ci self.assertIsNone(pat.match('\xe0')) 16627db96d56Sopenharmony_ci pat = re.compile(r'\w', re.ASCII) 16637db96d56Sopenharmony_ci self.assertIsNone(pat.match('\xe0')) 16647db96d56Sopenharmony_ci pat = re.compile(r'(?a)\w') 16657db96d56Sopenharmony_ci self.assertIsNone(pat.match('\xe0')) 16667db96d56Sopenharmony_ci # Bytes patterns 16677db96d56Sopenharmony_ci for flags in (0, re.ASCII): 16687db96d56Sopenharmony_ci pat = re.compile(b'\xc0', flags | re.IGNORECASE) 16697db96d56Sopenharmony_ci self.assertIsNone(pat.match(b'\xe0')) 16707db96d56Sopenharmony_ci pat = re.compile(br'\w', flags) 16717db96d56Sopenharmony_ci self.assertIsNone(pat.match(b'\xe0')) 16727db96d56Sopenharmony_ci # Incompatibilities 16737db96d56Sopenharmony_ci self.assertRaises(ValueError, re.compile, br'\w', re.UNICODE) 16747db96d56Sopenharmony_ci self.assertRaises(re.error, re.compile, br'(?u)\w') 16757db96d56Sopenharmony_ci self.assertRaises(ValueError, re.compile, r'\w', re.UNICODE | re.ASCII) 16767db96d56Sopenharmony_ci self.assertRaises(ValueError, re.compile, r'(?u)\w', re.ASCII) 16777db96d56Sopenharmony_ci self.assertRaises(ValueError, re.compile, r'(?a)\w', re.UNICODE) 16787db96d56Sopenharmony_ci self.assertRaises(re.error, re.compile, r'(?au)\w') 16797db96d56Sopenharmony_ci 16807db96d56Sopenharmony_ci def test_locale_flag(self): 16817db96d56Sopenharmony_ci enc = locale.getpreferredencoding() 16827db96d56Sopenharmony_ci # Search non-ASCII letter 16837db96d56Sopenharmony_ci for i in range(128, 256): 16847db96d56Sopenharmony_ci try: 16857db96d56Sopenharmony_ci c = bytes([i]).decode(enc) 16867db96d56Sopenharmony_ci sletter = c.lower() 16877db96d56Sopenharmony_ci if sletter == c: continue 16887db96d56Sopenharmony_ci bletter = sletter.encode(enc) 16897db96d56Sopenharmony_ci if len(bletter) != 1: continue 16907db96d56Sopenharmony_ci if bletter.decode(enc) != sletter: continue 16917db96d56Sopenharmony_ci bpat = re.escape(bytes([i])) 16927db96d56Sopenharmony_ci break 16937db96d56Sopenharmony_ci except (UnicodeError, TypeError): 16947db96d56Sopenharmony_ci pass 16957db96d56Sopenharmony_ci else: 16967db96d56Sopenharmony_ci bletter = None 16977db96d56Sopenharmony_ci bpat = b'A' 16987db96d56Sopenharmony_ci # Bytes patterns 16997db96d56Sopenharmony_ci pat = re.compile(bpat, re.LOCALE | re.IGNORECASE) 17007db96d56Sopenharmony_ci if bletter: 17017db96d56Sopenharmony_ci self.assertTrue(pat.match(bletter)) 17027db96d56Sopenharmony_ci pat = re.compile(b'(?L)' + bpat, re.IGNORECASE) 17037db96d56Sopenharmony_ci if bletter: 17047db96d56Sopenharmony_ci self.assertTrue(pat.match(bletter)) 17057db96d56Sopenharmony_ci pat = re.compile(bpat, re.IGNORECASE) 17067db96d56Sopenharmony_ci if bletter: 17077db96d56Sopenharmony_ci self.assertIsNone(pat.match(bletter)) 17087db96d56Sopenharmony_ci pat = re.compile(br'\w', re.LOCALE) 17097db96d56Sopenharmony_ci if bletter: 17107db96d56Sopenharmony_ci self.assertTrue(pat.match(bletter)) 17117db96d56Sopenharmony_ci pat = re.compile(br'(?L)\w') 17127db96d56Sopenharmony_ci if bletter: 17137db96d56Sopenharmony_ci self.assertTrue(pat.match(bletter)) 17147db96d56Sopenharmony_ci pat = re.compile(br'\w') 17157db96d56Sopenharmony_ci if bletter: 17167db96d56Sopenharmony_ci self.assertIsNone(pat.match(bletter)) 17177db96d56Sopenharmony_ci # Incompatibilities 17187db96d56Sopenharmony_ci self.assertRaises(ValueError, re.compile, '', re.LOCALE) 17197db96d56Sopenharmony_ci self.assertRaises(re.error, re.compile, '(?L)') 17207db96d56Sopenharmony_ci self.assertRaises(ValueError, re.compile, b'', re.LOCALE | re.ASCII) 17217db96d56Sopenharmony_ci self.assertRaises(ValueError, re.compile, b'(?L)', re.ASCII) 17227db96d56Sopenharmony_ci self.assertRaises(ValueError, re.compile, b'(?a)', re.LOCALE) 17237db96d56Sopenharmony_ci self.assertRaises(re.error, re.compile, b'(?aL)') 17247db96d56Sopenharmony_ci 17257db96d56Sopenharmony_ci def test_scoped_flags(self): 17267db96d56Sopenharmony_ci self.assertTrue(re.match(r'(?i:a)b', 'Ab')) 17277db96d56Sopenharmony_ci self.assertIsNone(re.match(r'(?i:a)b', 'aB')) 17287db96d56Sopenharmony_ci self.assertIsNone(re.match(r'(?-i:a)b', 'Ab', re.IGNORECASE)) 17297db96d56Sopenharmony_ci self.assertTrue(re.match(r'(?-i:a)b', 'aB', re.IGNORECASE)) 17307db96d56Sopenharmony_ci self.assertIsNone(re.match(r'(?i:(?-i:a)b)', 'Ab')) 17317db96d56Sopenharmony_ci self.assertTrue(re.match(r'(?i:(?-i:a)b)', 'aB')) 17327db96d56Sopenharmony_ci 17337db96d56Sopenharmony_ci self.assertTrue(re.match(r'\w(?a:\W)\w', '\xe0\xe0\xe0')) 17347db96d56Sopenharmony_ci self.assertTrue(re.match(r'(?a:\W(?u:\w)\W)', '\xe0\xe0\xe0')) 17357db96d56Sopenharmony_ci self.assertTrue(re.match(r'\W(?u:\w)\W', '\xe0\xe0\xe0', re.ASCII)) 17367db96d56Sopenharmony_ci 17377db96d56Sopenharmony_ci self.checkPatternError(r'(?a)(?-a:\w)', 17387db96d56Sopenharmony_ci "bad inline flags: cannot turn off flags 'a', 'u' and 'L'", 8) 17397db96d56Sopenharmony_ci self.checkPatternError(r'(?i-i:a)', 17407db96d56Sopenharmony_ci 'bad inline flags: flag turned on and off', 5) 17417db96d56Sopenharmony_ci self.checkPatternError(r'(?au:a)', 17427db96d56Sopenharmony_ci "bad inline flags: flags 'a', 'u' and 'L' are incompatible", 4) 17437db96d56Sopenharmony_ci self.checkPatternError(br'(?aL:a)', 17447db96d56Sopenharmony_ci "bad inline flags: flags 'a', 'u' and 'L' are incompatible", 4) 17457db96d56Sopenharmony_ci 17467db96d56Sopenharmony_ci self.checkPatternError(r'(?-', 'missing flag', 3) 17477db96d56Sopenharmony_ci self.checkPatternError(r'(?-+', 'missing flag', 3) 17487db96d56Sopenharmony_ci self.checkPatternError(r'(?-z', 'unknown flag', 3) 17497db96d56Sopenharmony_ci self.checkPatternError(r'(?-i', 'missing :', 4) 17507db96d56Sopenharmony_ci self.checkPatternError(r'(?-i)', 'missing :', 4) 17517db96d56Sopenharmony_ci self.checkPatternError(r'(?-i+', 'missing :', 4) 17527db96d56Sopenharmony_ci self.checkPatternError(r'(?-iz', 'unknown flag', 4) 17537db96d56Sopenharmony_ci self.checkPatternError(r'(?i:', 'missing ), unterminated subpattern', 0) 17547db96d56Sopenharmony_ci self.checkPatternError(r'(?i', 'missing -, : or )', 3) 17557db96d56Sopenharmony_ci self.checkPatternError(r'(?i+', 'missing -, : or )', 3) 17567db96d56Sopenharmony_ci self.checkPatternError(r'(?iz', 'unknown flag', 3) 17577db96d56Sopenharmony_ci 17587db96d56Sopenharmony_ci def test_ignore_spaces(self): 17597db96d56Sopenharmony_ci for space in " \t\n\r\v\f": 17607db96d56Sopenharmony_ci self.assertTrue(re.fullmatch(space + 'a', 'a', re.VERBOSE)) 17617db96d56Sopenharmony_ci for space in b" ", b"\t", b"\n", b"\r", b"\v", b"\f": 17627db96d56Sopenharmony_ci self.assertTrue(re.fullmatch(space + b'a', b'a', re.VERBOSE)) 17637db96d56Sopenharmony_ci self.assertTrue(re.fullmatch('(?x) a', 'a')) 17647db96d56Sopenharmony_ci self.assertTrue(re.fullmatch(' (?x) a', 'a', re.VERBOSE)) 17657db96d56Sopenharmony_ci self.assertTrue(re.fullmatch('(?x) (?x) a', 'a')) 17667db96d56Sopenharmony_ci self.assertTrue(re.fullmatch(' a(?x: b) c', ' ab c')) 17677db96d56Sopenharmony_ci self.assertTrue(re.fullmatch(' a(?-x: b) c', 'a bc', re.VERBOSE)) 17687db96d56Sopenharmony_ci self.assertTrue(re.fullmatch('(?x) a(?-x: b) c', 'a bc')) 17697db96d56Sopenharmony_ci self.assertTrue(re.fullmatch('(?x) a| b', 'a')) 17707db96d56Sopenharmony_ci self.assertTrue(re.fullmatch('(?x) a| b', 'b')) 17717db96d56Sopenharmony_ci 17727db96d56Sopenharmony_ci def test_comments(self): 17737db96d56Sopenharmony_ci self.assertTrue(re.fullmatch('#x\na', 'a', re.VERBOSE)) 17747db96d56Sopenharmony_ci self.assertTrue(re.fullmatch(b'#x\na', b'a', re.VERBOSE)) 17757db96d56Sopenharmony_ci self.assertTrue(re.fullmatch('(?x)#x\na', 'a')) 17767db96d56Sopenharmony_ci self.assertTrue(re.fullmatch('#x\n(?x)#y\na', 'a', re.VERBOSE)) 17777db96d56Sopenharmony_ci self.assertTrue(re.fullmatch('(?x)#x\n(?x)#y\na', 'a')) 17787db96d56Sopenharmony_ci self.assertTrue(re.fullmatch('#x\na(?x:#y\nb)#z\nc', '#x\nab#z\nc')) 17797db96d56Sopenharmony_ci self.assertTrue(re.fullmatch('#x\na(?-x:#y\nb)#z\nc', 'a#y\nbc', 17807db96d56Sopenharmony_ci re.VERBOSE)) 17817db96d56Sopenharmony_ci self.assertTrue(re.fullmatch('(?x)#x\na(?-x:#y\nb)#z\nc', 'a#y\nbc')) 17827db96d56Sopenharmony_ci self.assertTrue(re.fullmatch('(?x)#x\na|#y\nb', 'a')) 17837db96d56Sopenharmony_ci self.assertTrue(re.fullmatch('(?x)#x\na|#y\nb', 'b')) 17847db96d56Sopenharmony_ci 17857db96d56Sopenharmony_ci def test_bug_6509(self): 17867db96d56Sopenharmony_ci # Replacement strings of both types must parse properly. 17877db96d56Sopenharmony_ci # all strings 17887db96d56Sopenharmony_ci pat = re.compile(r'a(\w)') 17897db96d56Sopenharmony_ci self.assertEqual(pat.sub('b\\1', 'ac'), 'bc') 17907db96d56Sopenharmony_ci pat = re.compile('a(.)') 17917db96d56Sopenharmony_ci self.assertEqual(pat.sub('b\\1', 'a\u1234'), 'b\u1234') 17927db96d56Sopenharmony_ci pat = re.compile('..') 17937db96d56Sopenharmony_ci self.assertEqual(pat.sub(lambda m: 'str', 'a5'), 'str') 17947db96d56Sopenharmony_ci 17957db96d56Sopenharmony_ci # all bytes 17967db96d56Sopenharmony_ci pat = re.compile(br'a(\w)') 17977db96d56Sopenharmony_ci self.assertEqual(pat.sub(b'b\\1', b'ac'), b'bc') 17987db96d56Sopenharmony_ci pat = re.compile(b'a(.)') 17997db96d56Sopenharmony_ci self.assertEqual(pat.sub(b'b\\1', b'a\xCD'), b'b\xCD') 18007db96d56Sopenharmony_ci pat = re.compile(b'..') 18017db96d56Sopenharmony_ci self.assertEqual(pat.sub(lambda m: b'bytes', b'a5'), b'bytes') 18027db96d56Sopenharmony_ci 18037db96d56Sopenharmony_ci def test_dealloc(self): 18047db96d56Sopenharmony_ci # issue 3299: check for segfault in debug build 18057db96d56Sopenharmony_ci import _sre 18067db96d56Sopenharmony_ci # the overflow limit is different on wide and narrow builds and it 18077db96d56Sopenharmony_ci # depends on the definition of SRE_CODE (see sre.h). 18087db96d56Sopenharmony_ci # 2**128 should be big enough to overflow on both. For smaller values 18097db96d56Sopenharmony_ci # a RuntimeError is raised instead of OverflowError. 18107db96d56Sopenharmony_ci long_overflow = 2**128 18117db96d56Sopenharmony_ci self.assertRaises(TypeError, re.finditer, "a", {}) 18127db96d56Sopenharmony_ci with self.assertRaises(OverflowError): 18137db96d56Sopenharmony_ci _sre.compile("abc", 0, [long_overflow], 0, {}, ()) 18147db96d56Sopenharmony_ci with self.assertRaises(TypeError): 18157db96d56Sopenharmony_ci _sre.compile({}, 0, [], 0, [], []) 18167db96d56Sopenharmony_ci 18177db96d56Sopenharmony_ci def test_search_dot_unicode(self): 18187db96d56Sopenharmony_ci self.assertTrue(re.search("123.*-", '123abc-')) 18197db96d56Sopenharmony_ci self.assertTrue(re.search("123.*-", '123\xe9-')) 18207db96d56Sopenharmony_ci self.assertTrue(re.search("123.*-", '123\u20ac-')) 18217db96d56Sopenharmony_ci self.assertTrue(re.search("123.*-", '123\U0010ffff-')) 18227db96d56Sopenharmony_ci self.assertTrue(re.search("123.*-", '123\xe9\u20ac\U0010ffff-')) 18237db96d56Sopenharmony_ci 18247db96d56Sopenharmony_ci def test_compile(self): 18257db96d56Sopenharmony_ci # Test return value when given string and pattern as parameter 18267db96d56Sopenharmony_ci pattern = re.compile('random pattern') 18277db96d56Sopenharmony_ci self.assertIsInstance(pattern, re.Pattern) 18287db96d56Sopenharmony_ci same_pattern = re.compile(pattern) 18297db96d56Sopenharmony_ci self.assertIsInstance(same_pattern, re.Pattern) 18307db96d56Sopenharmony_ci self.assertIs(same_pattern, pattern) 18317db96d56Sopenharmony_ci # Test behaviour when not given a string or pattern as parameter 18327db96d56Sopenharmony_ci self.assertRaises(TypeError, re.compile, 0) 18337db96d56Sopenharmony_ci 18347db96d56Sopenharmony_ci @bigmemtest(size=_2G, memuse=1) 18357db96d56Sopenharmony_ci def test_large_search(self, size): 18367db96d56Sopenharmony_ci # Issue #10182: indices were 32-bit-truncated. 18377db96d56Sopenharmony_ci s = 'a' * size 18387db96d56Sopenharmony_ci m = re.search('$', s) 18397db96d56Sopenharmony_ci self.assertIsNotNone(m) 18407db96d56Sopenharmony_ci self.assertEqual(m.start(), size) 18417db96d56Sopenharmony_ci self.assertEqual(m.end(), size) 18427db96d56Sopenharmony_ci 18437db96d56Sopenharmony_ci # The huge memuse is because of re.sub() using a list and a join() 18447db96d56Sopenharmony_ci # to create the replacement result. 18457db96d56Sopenharmony_ci @bigmemtest(size=_2G, memuse=16 + 2) 18467db96d56Sopenharmony_ci def test_large_subn(self, size): 18477db96d56Sopenharmony_ci # Issue #10182: indices were 32-bit-truncated. 18487db96d56Sopenharmony_ci s = 'a' * size 18497db96d56Sopenharmony_ci r, n = re.subn('', '', s) 18507db96d56Sopenharmony_ci self.assertEqual(r, s) 18517db96d56Sopenharmony_ci self.assertEqual(n, size + 1) 18527db96d56Sopenharmony_ci 18537db96d56Sopenharmony_ci def test_bug_16688(self): 18547db96d56Sopenharmony_ci # Issue 16688: Backreferences make case-insensitive regex fail on 18557db96d56Sopenharmony_ci # non-ASCII strings. 18567db96d56Sopenharmony_ci self.assertEqual(re.findall(r"(?i)(a)\1", "aa \u0100"), ['a']) 18577db96d56Sopenharmony_ci self.assertEqual(re.match(r"(?s).{1,3}", "\u0100\u0100").span(), (0, 2)) 18587db96d56Sopenharmony_ci 18597db96d56Sopenharmony_ci def test_repeat_minmax_overflow(self): 18607db96d56Sopenharmony_ci # Issue #13169 18617db96d56Sopenharmony_ci string = "x" * 100000 18627db96d56Sopenharmony_ci self.assertEqual(re.match(r".{65535}", string).span(), (0, 65535)) 18637db96d56Sopenharmony_ci self.assertEqual(re.match(r".{,65535}", string).span(), (0, 65535)) 18647db96d56Sopenharmony_ci self.assertEqual(re.match(r".{65535,}?", string).span(), (0, 65535)) 18657db96d56Sopenharmony_ci self.assertEqual(re.match(r".{65536}", string).span(), (0, 65536)) 18667db96d56Sopenharmony_ci self.assertEqual(re.match(r".{,65536}", string).span(), (0, 65536)) 18677db96d56Sopenharmony_ci self.assertEqual(re.match(r".{65536,}?", string).span(), (0, 65536)) 18687db96d56Sopenharmony_ci # 2**128 should be big enough to overflow both SRE_CODE and Py_ssize_t. 18697db96d56Sopenharmony_ci self.assertRaises(OverflowError, re.compile, r".{%d}" % 2**128) 18707db96d56Sopenharmony_ci self.assertRaises(OverflowError, re.compile, r".{,%d}" % 2**128) 18717db96d56Sopenharmony_ci self.assertRaises(OverflowError, re.compile, r".{%d,}?" % 2**128) 18727db96d56Sopenharmony_ci self.assertRaises(OverflowError, re.compile, r".{%d,%d}" % (2**129, 2**128)) 18737db96d56Sopenharmony_ci 18747db96d56Sopenharmony_ci @cpython_only 18757db96d56Sopenharmony_ci def test_repeat_minmax_overflow_maxrepeat(self): 18767db96d56Sopenharmony_ci try: 18777db96d56Sopenharmony_ci from _sre import MAXREPEAT 18787db96d56Sopenharmony_ci except ImportError: 18797db96d56Sopenharmony_ci self.skipTest('requires _sre.MAXREPEAT constant') 18807db96d56Sopenharmony_ci string = "x" * 100000 18817db96d56Sopenharmony_ci self.assertIsNone(re.match(r".{%d}" % (MAXREPEAT - 1), string)) 18827db96d56Sopenharmony_ci self.assertEqual(re.match(r".{,%d}" % (MAXREPEAT - 1), string).span(), 18837db96d56Sopenharmony_ci (0, 100000)) 18847db96d56Sopenharmony_ci self.assertIsNone(re.match(r".{%d,}?" % (MAXREPEAT - 1), string)) 18857db96d56Sopenharmony_ci self.assertRaises(OverflowError, re.compile, r".{%d}" % MAXREPEAT) 18867db96d56Sopenharmony_ci self.assertRaises(OverflowError, re.compile, r".{,%d}" % MAXREPEAT) 18877db96d56Sopenharmony_ci self.assertRaises(OverflowError, re.compile, r".{%d,}?" % MAXREPEAT) 18887db96d56Sopenharmony_ci 18897db96d56Sopenharmony_ci def test_backref_group_name_in_exception(self): 18907db96d56Sopenharmony_ci # Issue 17341: Poor error message when compiling invalid regex 18917db96d56Sopenharmony_ci self.checkPatternError('(?P=<foo>)', 18927db96d56Sopenharmony_ci "bad character in group name '<foo>'", 4) 18937db96d56Sopenharmony_ci 18947db96d56Sopenharmony_ci def test_group_name_in_exception(self): 18957db96d56Sopenharmony_ci # Issue 17341: Poor error message when compiling invalid regex 18967db96d56Sopenharmony_ci self.checkPatternError('(?P<?foo>)', 18977db96d56Sopenharmony_ci "bad character in group name '?foo'", 4) 18987db96d56Sopenharmony_ci 18997db96d56Sopenharmony_ci def test_issue17998(self): 19007db96d56Sopenharmony_ci for reps in '*', '+', '?', '{1}': 19017db96d56Sopenharmony_ci for mod in '', '?': 19027db96d56Sopenharmony_ci pattern = '.' + reps + mod + 'yz' 19037db96d56Sopenharmony_ci self.assertEqual(re.compile(pattern, re.S).findall('xyz'), 19047db96d56Sopenharmony_ci ['xyz'], msg=pattern) 19057db96d56Sopenharmony_ci pattern = pattern.encode() 19067db96d56Sopenharmony_ci self.assertEqual(re.compile(pattern, re.S).findall(b'xyz'), 19077db96d56Sopenharmony_ci [b'xyz'], msg=pattern) 19087db96d56Sopenharmony_ci 19097db96d56Sopenharmony_ci def test_match_repr(self): 19107db96d56Sopenharmony_ci for string in '[abracadabra]', S('[abracadabra]'): 19117db96d56Sopenharmony_ci m = re.search(r'(.+)(.*?)\1', string) 19127db96d56Sopenharmony_ci pattern = r"<(%s\.)?%s object; span=\(1, 12\), match='abracadabra'>" % ( 19137db96d56Sopenharmony_ci type(m).__module__, type(m).__qualname__ 19147db96d56Sopenharmony_ci ) 19157db96d56Sopenharmony_ci self.assertRegex(repr(m), pattern) 19167db96d56Sopenharmony_ci for string in (b'[abracadabra]', B(b'[abracadabra]'), 19177db96d56Sopenharmony_ci bytearray(b'[abracadabra]'), 19187db96d56Sopenharmony_ci memoryview(b'[abracadabra]')): 19197db96d56Sopenharmony_ci m = re.search(br'(.+)(.*?)\1', string) 19207db96d56Sopenharmony_ci pattern = r"<(%s\.)?%s object; span=\(1, 12\), match=b'abracadabra'>" % ( 19217db96d56Sopenharmony_ci type(m).__module__, type(m).__qualname__ 19227db96d56Sopenharmony_ci ) 19237db96d56Sopenharmony_ci self.assertRegex(repr(m), pattern) 19247db96d56Sopenharmony_ci 19257db96d56Sopenharmony_ci first, second = list(re.finditer("(aa)|(bb)", "aa bb")) 19267db96d56Sopenharmony_ci pattern = r"<(%s\.)?%s object; span=\(0, 2\), match='aa'>" % ( 19277db96d56Sopenharmony_ci type(second).__module__, type(second).__qualname__ 19287db96d56Sopenharmony_ci ) 19297db96d56Sopenharmony_ci self.assertRegex(repr(first), pattern) 19307db96d56Sopenharmony_ci pattern = r"<(%s\.)?%s object; span=\(3, 5\), match='bb'>" % ( 19317db96d56Sopenharmony_ci type(second).__module__, type(second).__qualname__ 19327db96d56Sopenharmony_ci ) 19337db96d56Sopenharmony_ci self.assertRegex(repr(second), pattern) 19347db96d56Sopenharmony_ci 19357db96d56Sopenharmony_ci def test_zerowidth(self): 19367db96d56Sopenharmony_ci # Issues 852532, 1647489, 3262, 25054. 19377db96d56Sopenharmony_ci self.assertEqual(re.split(r"\b", "a::bc"), ['', 'a', '::', 'bc', '']) 19387db96d56Sopenharmony_ci self.assertEqual(re.split(r"\b|:+", "a::bc"), ['', 'a', '', '', 'bc', '']) 19397db96d56Sopenharmony_ci self.assertEqual(re.split(r"(?<!\w)(?=\w)|:+", "a::bc"), ['', 'a', '', 'bc']) 19407db96d56Sopenharmony_ci self.assertEqual(re.split(r"(?<=\w)(?!\w)|:+", "a::bc"), ['a', '', 'bc', '']) 19417db96d56Sopenharmony_ci 19427db96d56Sopenharmony_ci self.assertEqual(re.sub(r"\b", "-", "a::bc"), '-a-::-bc-') 19437db96d56Sopenharmony_ci self.assertEqual(re.sub(r"\b|:+", "-", "a::bc"), '-a---bc-') 19447db96d56Sopenharmony_ci self.assertEqual(re.sub(r"(\b|:+)", r"[\1]", "a::bc"), '[]a[][::][]bc[]') 19457db96d56Sopenharmony_ci 19467db96d56Sopenharmony_ci self.assertEqual(re.findall(r"\b|:+", "a::bc"), ['', '', '::', '', '']) 19477db96d56Sopenharmony_ci self.assertEqual(re.findall(r"\b|\w+", "a::bc"), 19487db96d56Sopenharmony_ci ['', 'a', '', '', 'bc', '']) 19497db96d56Sopenharmony_ci 19507db96d56Sopenharmony_ci self.assertEqual([m.span() for m in re.finditer(r"\b|:+", "a::bc")], 19517db96d56Sopenharmony_ci [(0, 0), (1, 1), (1, 3), (3, 3), (5, 5)]) 19527db96d56Sopenharmony_ci self.assertEqual([m.span() for m in re.finditer(r"\b|\w+", "a::bc")], 19537db96d56Sopenharmony_ci [(0, 0), (0, 1), (1, 1), (3, 3), (3, 5), (5, 5)]) 19547db96d56Sopenharmony_ci 19557db96d56Sopenharmony_ci def test_bug_2537(self): 19567db96d56Sopenharmony_ci # issue 2537: empty submatches 19577db96d56Sopenharmony_ci for outer_op in ('{0,}', '*', '+', '{1,187}'): 19587db96d56Sopenharmony_ci for inner_op in ('{0,}', '*', '?'): 19597db96d56Sopenharmony_ci r = re.compile("^((x|y)%s)%s" % (inner_op, outer_op)) 19607db96d56Sopenharmony_ci m = r.match("xyyzy") 19617db96d56Sopenharmony_ci self.assertEqual(m.group(0), "xyy") 19627db96d56Sopenharmony_ci self.assertEqual(m.group(1), "") 19637db96d56Sopenharmony_ci self.assertEqual(m.group(2), "y") 19647db96d56Sopenharmony_ci 19657db96d56Sopenharmony_ci def test_keyword_parameters(self): 19667db96d56Sopenharmony_ci # Issue #20283: Accepting the string keyword parameter. 19677db96d56Sopenharmony_ci pat = re.compile(r'(ab)') 19687db96d56Sopenharmony_ci self.assertEqual( 19697db96d56Sopenharmony_ci pat.match(string='abracadabra', pos=7, endpos=10).span(), (7, 9)) 19707db96d56Sopenharmony_ci self.assertEqual( 19717db96d56Sopenharmony_ci pat.fullmatch(string='abracadabra', pos=7, endpos=9).span(), (7, 9)) 19727db96d56Sopenharmony_ci self.assertEqual( 19737db96d56Sopenharmony_ci pat.search(string='abracadabra', pos=3, endpos=10).span(), (7, 9)) 19747db96d56Sopenharmony_ci self.assertEqual( 19757db96d56Sopenharmony_ci pat.findall(string='abracadabra', pos=3, endpos=10), ['ab']) 19767db96d56Sopenharmony_ci self.assertEqual( 19777db96d56Sopenharmony_ci pat.split(string='abracadabra', maxsplit=1), 19787db96d56Sopenharmony_ci ['', 'ab', 'racadabra']) 19797db96d56Sopenharmony_ci self.assertEqual( 19807db96d56Sopenharmony_ci pat.scanner(string='abracadabra', pos=3, endpos=10).search().span(), 19817db96d56Sopenharmony_ci (7, 9)) 19827db96d56Sopenharmony_ci 19837db96d56Sopenharmony_ci def test_bug_20998(self): 19847db96d56Sopenharmony_ci # Issue #20998: Fullmatch of repeated single character pattern 19857db96d56Sopenharmony_ci # with ignore case. 19867db96d56Sopenharmony_ci self.assertEqual(re.fullmatch('[a-c]+', 'ABC', re.I).span(), (0, 3)) 19877db96d56Sopenharmony_ci 19887db96d56Sopenharmony_ci @unittest.skipIf( 19897db96d56Sopenharmony_ci is_emscripten or is_wasi, 19907db96d56Sopenharmony_ci "musl libc issue on Emscripten/WASI, bpo-46390" 19917db96d56Sopenharmony_ci ) 19927db96d56Sopenharmony_ci def test_locale_caching(self): 19937db96d56Sopenharmony_ci # Issue #22410 19947db96d56Sopenharmony_ci oldlocale = locale.setlocale(locale.LC_CTYPE) 19957db96d56Sopenharmony_ci self.addCleanup(locale.setlocale, locale.LC_CTYPE, oldlocale) 19967db96d56Sopenharmony_ci for loc in 'en_US.iso88591', 'en_US.utf8': 19977db96d56Sopenharmony_ci try: 19987db96d56Sopenharmony_ci locale.setlocale(locale.LC_CTYPE, loc) 19997db96d56Sopenharmony_ci except locale.Error: 20007db96d56Sopenharmony_ci # Unsupported locale on this system 20017db96d56Sopenharmony_ci self.skipTest('test needs %s locale' % loc) 20027db96d56Sopenharmony_ci 20037db96d56Sopenharmony_ci re.purge() 20047db96d56Sopenharmony_ci self.check_en_US_iso88591() 20057db96d56Sopenharmony_ci self.check_en_US_utf8() 20067db96d56Sopenharmony_ci re.purge() 20077db96d56Sopenharmony_ci self.check_en_US_utf8() 20087db96d56Sopenharmony_ci self.check_en_US_iso88591() 20097db96d56Sopenharmony_ci 20107db96d56Sopenharmony_ci def check_en_US_iso88591(self): 20117db96d56Sopenharmony_ci locale.setlocale(locale.LC_CTYPE, 'en_US.iso88591') 20127db96d56Sopenharmony_ci self.assertTrue(re.match(b'\xc5\xe5', b'\xc5\xe5', re.L|re.I)) 20137db96d56Sopenharmony_ci self.assertTrue(re.match(b'\xc5', b'\xe5', re.L|re.I)) 20147db96d56Sopenharmony_ci self.assertTrue(re.match(b'\xe5', b'\xc5', re.L|re.I)) 20157db96d56Sopenharmony_ci self.assertTrue(re.match(b'(?Li)\xc5\xe5', b'\xc5\xe5')) 20167db96d56Sopenharmony_ci self.assertTrue(re.match(b'(?Li)\xc5', b'\xe5')) 20177db96d56Sopenharmony_ci self.assertTrue(re.match(b'(?Li)\xe5', b'\xc5')) 20187db96d56Sopenharmony_ci 20197db96d56Sopenharmony_ci def check_en_US_utf8(self): 20207db96d56Sopenharmony_ci locale.setlocale(locale.LC_CTYPE, 'en_US.utf8') 20217db96d56Sopenharmony_ci self.assertTrue(re.match(b'\xc5\xe5', b'\xc5\xe5', re.L|re.I)) 20227db96d56Sopenharmony_ci self.assertIsNone(re.match(b'\xc5', b'\xe5', re.L|re.I)) 20237db96d56Sopenharmony_ci self.assertIsNone(re.match(b'\xe5', b'\xc5', re.L|re.I)) 20247db96d56Sopenharmony_ci self.assertTrue(re.match(b'(?Li)\xc5\xe5', b'\xc5\xe5')) 20257db96d56Sopenharmony_ci self.assertIsNone(re.match(b'(?Li)\xc5', b'\xe5')) 20267db96d56Sopenharmony_ci self.assertIsNone(re.match(b'(?Li)\xe5', b'\xc5')) 20277db96d56Sopenharmony_ci 20287db96d56Sopenharmony_ci @unittest.skipIf( 20297db96d56Sopenharmony_ci is_emscripten or is_wasi, 20307db96d56Sopenharmony_ci "musl libc issue on Emscripten/WASI, bpo-46390" 20317db96d56Sopenharmony_ci ) 20327db96d56Sopenharmony_ci def test_locale_compiled(self): 20337db96d56Sopenharmony_ci oldlocale = locale.setlocale(locale.LC_CTYPE) 20347db96d56Sopenharmony_ci self.addCleanup(locale.setlocale, locale.LC_CTYPE, oldlocale) 20357db96d56Sopenharmony_ci for loc in 'en_US.iso88591', 'en_US.utf8': 20367db96d56Sopenharmony_ci try: 20377db96d56Sopenharmony_ci locale.setlocale(locale.LC_CTYPE, loc) 20387db96d56Sopenharmony_ci except locale.Error: 20397db96d56Sopenharmony_ci # Unsupported locale on this system 20407db96d56Sopenharmony_ci self.skipTest('test needs %s locale' % loc) 20417db96d56Sopenharmony_ci 20427db96d56Sopenharmony_ci locale.setlocale(locale.LC_CTYPE, 'en_US.iso88591') 20437db96d56Sopenharmony_ci p1 = re.compile(b'\xc5\xe5', re.L|re.I) 20447db96d56Sopenharmony_ci p2 = re.compile(b'[a\xc5][a\xe5]', re.L|re.I) 20457db96d56Sopenharmony_ci p3 = re.compile(b'[az\xc5][az\xe5]', re.L|re.I) 20467db96d56Sopenharmony_ci p4 = re.compile(b'[^\xc5][^\xe5]', re.L|re.I) 20477db96d56Sopenharmony_ci for p in p1, p2, p3: 20487db96d56Sopenharmony_ci self.assertTrue(p.match(b'\xc5\xe5')) 20497db96d56Sopenharmony_ci self.assertTrue(p.match(b'\xe5\xe5')) 20507db96d56Sopenharmony_ci self.assertTrue(p.match(b'\xc5\xc5')) 20517db96d56Sopenharmony_ci self.assertIsNone(p4.match(b'\xe5\xc5')) 20527db96d56Sopenharmony_ci self.assertIsNone(p4.match(b'\xe5\xe5')) 20537db96d56Sopenharmony_ci self.assertIsNone(p4.match(b'\xc5\xc5')) 20547db96d56Sopenharmony_ci 20557db96d56Sopenharmony_ci locale.setlocale(locale.LC_CTYPE, 'en_US.utf8') 20567db96d56Sopenharmony_ci for p in p1, p2, p3: 20577db96d56Sopenharmony_ci self.assertTrue(p.match(b'\xc5\xe5')) 20587db96d56Sopenharmony_ci self.assertIsNone(p.match(b'\xe5\xe5')) 20597db96d56Sopenharmony_ci self.assertIsNone(p.match(b'\xc5\xc5')) 20607db96d56Sopenharmony_ci self.assertTrue(p4.match(b'\xe5\xc5')) 20617db96d56Sopenharmony_ci self.assertIsNone(p4.match(b'\xe5\xe5')) 20627db96d56Sopenharmony_ci self.assertIsNone(p4.match(b'\xc5\xc5')) 20637db96d56Sopenharmony_ci 20647db96d56Sopenharmony_ci def test_error(self): 20657db96d56Sopenharmony_ci with self.assertRaises(re.error) as cm: 20667db96d56Sopenharmony_ci re.compile('(\u20ac))') 20677db96d56Sopenharmony_ci err = cm.exception 20687db96d56Sopenharmony_ci self.assertIsInstance(err.pattern, str) 20697db96d56Sopenharmony_ci self.assertEqual(err.pattern, '(\u20ac))') 20707db96d56Sopenharmony_ci self.assertEqual(err.pos, 3) 20717db96d56Sopenharmony_ci self.assertEqual(err.lineno, 1) 20727db96d56Sopenharmony_ci self.assertEqual(err.colno, 4) 20737db96d56Sopenharmony_ci self.assertIn(err.msg, str(err)) 20747db96d56Sopenharmony_ci self.assertIn(' at position 3', str(err)) 20757db96d56Sopenharmony_ci self.assertNotIn(' at position 3', err.msg) 20767db96d56Sopenharmony_ci # Bytes pattern 20777db96d56Sopenharmony_ci with self.assertRaises(re.error) as cm: 20787db96d56Sopenharmony_ci re.compile(b'(\xa4))') 20797db96d56Sopenharmony_ci err = cm.exception 20807db96d56Sopenharmony_ci self.assertIsInstance(err.pattern, bytes) 20817db96d56Sopenharmony_ci self.assertEqual(err.pattern, b'(\xa4))') 20827db96d56Sopenharmony_ci self.assertEqual(err.pos, 3) 20837db96d56Sopenharmony_ci # Multiline pattern 20847db96d56Sopenharmony_ci with self.assertRaises(re.error) as cm: 20857db96d56Sopenharmony_ci re.compile(""" 20867db96d56Sopenharmony_ci ( 20877db96d56Sopenharmony_ci abc 20887db96d56Sopenharmony_ci ) 20897db96d56Sopenharmony_ci ) 20907db96d56Sopenharmony_ci ( 20917db96d56Sopenharmony_ci """, re.VERBOSE) 20927db96d56Sopenharmony_ci err = cm.exception 20937db96d56Sopenharmony_ci self.assertEqual(err.pos, 77) 20947db96d56Sopenharmony_ci self.assertEqual(err.lineno, 5) 20957db96d56Sopenharmony_ci self.assertEqual(err.colno, 17) 20967db96d56Sopenharmony_ci self.assertIn(err.msg, str(err)) 20977db96d56Sopenharmony_ci self.assertIn(' at position 77', str(err)) 20987db96d56Sopenharmony_ci self.assertIn('(line 5, column 17)', str(err)) 20997db96d56Sopenharmony_ci 21007db96d56Sopenharmony_ci def test_misc_errors(self): 21017db96d56Sopenharmony_ci self.checkPatternError(r'(', 'missing ), unterminated subpattern', 0) 21027db96d56Sopenharmony_ci self.checkPatternError(r'((a|b)', 'missing ), unterminated subpattern', 0) 21037db96d56Sopenharmony_ci self.checkPatternError(r'(a|b))', 'unbalanced parenthesis', 5) 21047db96d56Sopenharmony_ci self.checkPatternError(r'(?P', 'unexpected end of pattern', 3) 21057db96d56Sopenharmony_ci self.checkPatternError(r'(?z)', 'unknown extension ?z', 1) 21067db96d56Sopenharmony_ci self.checkPatternError(r'(?iz)', 'unknown flag', 3) 21077db96d56Sopenharmony_ci self.checkPatternError(r'(?i', 'missing -, : or )', 3) 21087db96d56Sopenharmony_ci self.checkPatternError(r'(?#abc', 'missing ), unterminated comment', 0) 21097db96d56Sopenharmony_ci self.checkPatternError(r'(?<', 'unexpected end of pattern', 3) 21107db96d56Sopenharmony_ci self.checkPatternError(r'(?<>)', 'unknown extension ?<>', 1) 21117db96d56Sopenharmony_ci self.checkPatternError(r'(?', 'unexpected end of pattern', 2) 21127db96d56Sopenharmony_ci 21137db96d56Sopenharmony_ci def test_enum(self): 21147db96d56Sopenharmony_ci # Issue #28082: Check that str(flag) returns a human readable string 21157db96d56Sopenharmony_ci # instead of an integer 21167db96d56Sopenharmony_ci self.assertIn('ASCII', str(re.A)) 21177db96d56Sopenharmony_ci self.assertIn('DOTALL', str(re.S)) 21187db96d56Sopenharmony_ci 21197db96d56Sopenharmony_ci def test_pattern_compare(self): 21207db96d56Sopenharmony_ci pattern1 = re.compile('abc', re.IGNORECASE) 21217db96d56Sopenharmony_ci 21227db96d56Sopenharmony_ci # equal to itself 21237db96d56Sopenharmony_ci self.assertEqual(pattern1, pattern1) 21247db96d56Sopenharmony_ci self.assertFalse(pattern1 != pattern1) 21257db96d56Sopenharmony_ci 21267db96d56Sopenharmony_ci # equal 21277db96d56Sopenharmony_ci re.purge() 21287db96d56Sopenharmony_ci pattern2 = re.compile('abc', re.IGNORECASE) 21297db96d56Sopenharmony_ci self.assertEqual(hash(pattern2), hash(pattern1)) 21307db96d56Sopenharmony_ci self.assertEqual(pattern2, pattern1) 21317db96d56Sopenharmony_ci 21327db96d56Sopenharmony_ci # not equal: different pattern 21337db96d56Sopenharmony_ci re.purge() 21347db96d56Sopenharmony_ci pattern3 = re.compile('XYZ', re.IGNORECASE) 21357db96d56Sopenharmony_ci # Don't test hash(pattern3) != hash(pattern1) because there is no 21367db96d56Sopenharmony_ci # warranty that hash values are different 21377db96d56Sopenharmony_ci self.assertNotEqual(pattern3, pattern1) 21387db96d56Sopenharmony_ci 21397db96d56Sopenharmony_ci # not equal: different flag (flags=0) 21407db96d56Sopenharmony_ci re.purge() 21417db96d56Sopenharmony_ci pattern4 = re.compile('abc') 21427db96d56Sopenharmony_ci self.assertNotEqual(pattern4, pattern1) 21437db96d56Sopenharmony_ci 21447db96d56Sopenharmony_ci # only == and != comparison operators are supported 21457db96d56Sopenharmony_ci with self.assertRaises(TypeError): 21467db96d56Sopenharmony_ci pattern1 < pattern2 21477db96d56Sopenharmony_ci 21487db96d56Sopenharmony_ci def test_pattern_compare_bytes(self): 21497db96d56Sopenharmony_ci pattern1 = re.compile(b'abc') 21507db96d56Sopenharmony_ci 21517db96d56Sopenharmony_ci # equal: test bytes patterns 21527db96d56Sopenharmony_ci re.purge() 21537db96d56Sopenharmony_ci pattern2 = re.compile(b'abc') 21547db96d56Sopenharmony_ci self.assertEqual(hash(pattern2), hash(pattern1)) 21557db96d56Sopenharmony_ci self.assertEqual(pattern2, pattern1) 21567db96d56Sopenharmony_ci 21577db96d56Sopenharmony_ci # not equal: pattern of a different types (str vs bytes), 21587db96d56Sopenharmony_ci # comparison must not raise a BytesWarning 21597db96d56Sopenharmony_ci re.purge() 21607db96d56Sopenharmony_ci pattern3 = re.compile('abc') 21617db96d56Sopenharmony_ci with warnings.catch_warnings(): 21627db96d56Sopenharmony_ci warnings.simplefilter('error', BytesWarning) 21637db96d56Sopenharmony_ci self.assertNotEqual(pattern3, pattern1) 21647db96d56Sopenharmony_ci 21657db96d56Sopenharmony_ci def test_bug_29444(self): 21667db96d56Sopenharmony_ci s = bytearray(b'abcdefgh') 21677db96d56Sopenharmony_ci m = re.search(b'[a-h]+', s) 21687db96d56Sopenharmony_ci m2 = re.search(b'[e-h]+', s) 21697db96d56Sopenharmony_ci self.assertEqual(m.group(), b'abcdefgh') 21707db96d56Sopenharmony_ci self.assertEqual(m2.group(), b'efgh') 21717db96d56Sopenharmony_ci s[:] = b'xyz' 21727db96d56Sopenharmony_ci self.assertEqual(m.group(), b'xyz') 21737db96d56Sopenharmony_ci self.assertEqual(m2.group(), b'') 21747db96d56Sopenharmony_ci 21757db96d56Sopenharmony_ci def test_bug_34294(self): 21767db96d56Sopenharmony_ci # Issue 34294: wrong capturing groups 21777db96d56Sopenharmony_ci 21787db96d56Sopenharmony_ci # exists since Python 2 21797db96d56Sopenharmony_ci s = "a\tx" 21807db96d56Sopenharmony_ci p = r"\b(?=(\t)|(x))x" 21817db96d56Sopenharmony_ci self.assertEqual(re.search(p, s).groups(), (None, 'x')) 21827db96d56Sopenharmony_ci 21837db96d56Sopenharmony_ci # introduced in Python 3.7.0 21847db96d56Sopenharmony_ci s = "ab" 21857db96d56Sopenharmony_ci p = r"(?=(.)(.)?)" 21867db96d56Sopenharmony_ci self.assertEqual(re.findall(p, s), 21877db96d56Sopenharmony_ci [('a', 'b'), ('b', '')]) 21887db96d56Sopenharmony_ci self.assertEqual([m.groups() for m in re.finditer(p, s)], 21897db96d56Sopenharmony_ci [('a', 'b'), ('b', None)]) 21907db96d56Sopenharmony_ci 21917db96d56Sopenharmony_ci # test-cases provided by issue34294, introduced in Python 3.7.0 21927db96d56Sopenharmony_ci p = r"(?=<(?P<tag>\w+)/?>(?:(?P<text>.+?)</(?P=tag)>)?)" 21937db96d56Sopenharmony_ci s = "<test><foo2/></test>" 21947db96d56Sopenharmony_ci self.assertEqual(re.findall(p, s), 21957db96d56Sopenharmony_ci [('test', '<foo2/>'), ('foo2', '')]) 21967db96d56Sopenharmony_ci self.assertEqual([m.groupdict() for m in re.finditer(p, s)], 21977db96d56Sopenharmony_ci [{'tag': 'test', 'text': '<foo2/>'}, 21987db96d56Sopenharmony_ci {'tag': 'foo2', 'text': None}]) 21997db96d56Sopenharmony_ci s = "<test>Hello</test><foo/>" 22007db96d56Sopenharmony_ci self.assertEqual([m.groupdict() for m in re.finditer(p, s)], 22017db96d56Sopenharmony_ci [{'tag': 'test', 'text': 'Hello'}, 22027db96d56Sopenharmony_ci {'tag': 'foo', 'text': None}]) 22037db96d56Sopenharmony_ci s = "<test>Hello</test><foo/><foo/>" 22047db96d56Sopenharmony_ci self.assertEqual([m.groupdict() for m in re.finditer(p, s)], 22057db96d56Sopenharmony_ci [{'tag': 'test', 'text': 'Hello'}, 22067db96d56Sopenharmony_ci {'tag': 'foo', 'text': None}, 22077db96d56Sopenharmony_ci {'tag': 'foo', 'text': None}]) 22087db96d56Sopenharmony_ci 22097db96d56Sopenharmony_ci def test_MARK_PUSH_macro_bug(self): 22107db96d56Sopenharmony_ci # issue35859, MARK_PUSH() macro didn't protect MARK-0 if it 22117db96d56Sopenharmony_ci # was the only available mark. 22127db96d56Sopenharmony_ci self.assertEqual(re.match(r'(ab|a)*?b', 'ab').groups(), ('a',)) 22137db96d56Sopenharmony_ci self.assertEqual(re.match(r'(ab|a)+?b', 'ab').groups(), ('a',)) 22147db96d56Sopenharmony_ci self.assertEqual(re.match(r'(ab|a){0,2}?b', 'ab').groups(), ('a',)) 22157db96d56Sopenharmony_ci self.assertEqual(re.match(r'(.b|a)*?b', 'ab').groups(), ('a',)) 22167db96d56Sopenharmony_ci 22177db96d56Sopenharmony_ci def test_MIN_UNTIL_mark_bug(self): 22187db96d56Sopenharmony_ci # Fixed in issue35859, reported in issue9134. 22197db96d56Sopenharmony_ci # JUMP_MIN_UNTIL_2 should MARK_PUSH() if in a repeat 22207db96d56Sopenharmony_ci s = 'axxzbcz' 22217db96d56Sopenharmony_ci p = r'(?:(?:a|bc)*?(xx)??z)*' 22227db96d56Sopenharmony_ci self.assertEqual(re.match(p, s).groups(), ('xx',)) 22237db96d56Sopenharmony_ci 22247db96d56Sopenharmony_ci # test-case provided by issue9134 22257db96d56Sopenharmony_ci s = 'xtcxyzxc' 22267db96d56Sopenharmony_ci p = r'((x|yz)+?(t)??c)*' 22277db96d56Sopenharmony_ci m = re.match(p, s) 22287db96d56Sopenharmony_ci self.assertEqual(m.span(), (0, 8)) 22297db96d56Sopenharmony_ci self.assertEqual(m.span(2), (6, 7)) 22307db96d56Sopenharmony_ci self.assertEqual(m.groups(), ('xyzxc', 'x', 't')) 22317db96d56Sopenharmony_ci 22327db96d56Sopenharmony_ci def test_REPEAT_ONE_mark_bug(self): 22337db96d56Sopenharmony_ci # issue35859 22347db96d56Sopenharmony_ci # JUMP_REPEAT_ONE_1 should MARK_PUSH() if in a repeat 22357db96d56Sopenharmony_ci s = 'aabaab' 22367db96d56Sopenharmony_ci p = r'(?:[^b]*a(?=(b)|(a))ab)*' 22377db96d56Sopenharmony_ci m = re.match(p, s) 22387db96d56Sopenharmony_ci self.assertEqual(m.span(), (0, 6)) 22397db96d56Sopenharmony_ci self.assertEqual(m.span(2), (4, 5)) 22407db96d56Sopenharmony_ci self.assertEqual(m.groups(), (None, 'a')) 22417db96d56Sopenharmony_ci 22427db96d56Sopenharmony_ci # JUMP_REPEAT_ONE_2 should MARK_PUSH() if in a repeat 22437db96d56Sopenharmony_ci s = 'abab' 22447db96d56Sopenharmony_ci p = r'(?:[^b]*(?=(b)|(a))ab)*' 22457db96d56Sopenharmony_ci m = re.match(p, s) 22467db96d56Sopenharmony_ci self.assertEqual(m.span(), (0, 4)) 22477db96d56Sopenharmony_ci self.assertEqual(m.span(2), (2, 3)) 22487db96d56Sopenharmony_ci self.assertEqual(m.groups(), (None, 'a')) 22497db96d56Sopenharmony_ci 22507db96d56Sopenharmony_ci self.assertEqual(re.match(r'(ab?)*?b', 'ab').groups(), ('a',)) 22517db96d56Sopenharmony_ci 22527db96d56Sopenharmony_ci def test_MIN_REPEAT_ONE_mark_bug(self): 22537db96d56Sopenharmony_ci # issue35859 22547db96d56Sopenharmony_ci # JUMP_MIN_REPEAT_ONE should MARK_PUSH() if in a repeat 22557db96d56Sopenharmony_ci s = 'abab' 22567db96d56Sopenharmony_ci p = r'(?:.*?(?=(a)|(b))b)*' 22577db96d56Sopenharmony_ci m = re.match(p, s) 22587db96d56Sopenharmony_ci self.assertEqual(m.span(), (0, 4)) 22597db96d56Sopenharmony_ci self.assertEqual(m.span(2), (3, 4)) 22607db96d56Sopenharmony_ci self.assertEqual(m.groups(), (None, 'b')) 22617db96d56Sopenharmony_ci 22627db96d56Sopenharmony_ci s = 'axxzaz' 22637db96d56Sopenharmony_ci p = r'(?:a*?(xx)??z)*' 22647db96d56Sopenharmony_ci self.assertEqual(re.match(p, s).groups(), ('xx',)) 22657db96d56Sopenharmony_ci 22667db96d56Sopenharmony_ci def test_ASSERT_NOT_mark_bug(self): 22677db96d56Sopenharmony_ci # Fixed in issue35859, reported in issue725149. 22687db96d56Sopenharmony_ci # JUMP_ASSERT_NOT should LASTMARK_SAVE() 22697db96d56Sopenharmony_ci self.assertEqual(re.match(r'(?!(..)c)', 'ab').groups(), (None,)) 22707db96d56Sopenharmony_ci 22717db96d56Sopenharmony_ci # JUMP_ASSERT_NOT should MARK_PUSH() if in a repeat 22727db96d56Sopenharmony_ci m = re.match(r'((?!(ab)c)(.))*', 'abab') 22737db96d56Sopenharmony_ci self.assertEqual(m.span(), (0, 4)) 22747db96d56Sopenharmony_ci self.assertEqual(m.span(1), (3, 4)) 22757db96d56Sopenharmony_ci self.assertEqual(m.span(3), (3, 4)) 22767db96d56Sopenharmony_ci self.assertEqual(m.groups(), ('b', None, 'b')) 22777db96d56Sopenharmony_ci 22787db96d56Sopenharmony_ci def test_bug_40736(self): 22797db96d56Sopenharmony_ci with self.assertRaisesRegex(TypeError, "got 'int'"): 22807db96d56Sopenharmony_ci re.search("x*", 5) 22817db96d56Sopenharmony_ci with self.assertRaisesRegex(TypeError, "got 'type'"): 22827db96d56Sopenharmony_ci re.search("x*", type) 22837db96d56Sopenharmony_ci 22847db96d56Sopenharmony_ci def test_search_anchor_at_beginning(self): 22857db96d56Sopenharmony_ci s = 'x'*10**7 22867db96d56Sopenharmony_ci start = time.perf_counter() 22877db96d56Sopenharmony_ci for p in r'\Ay', r'^y': 22887db96d56Sopenharmony_ci self.assertIsNone(re.search(p, s)) 22897db96d56Sopenharmony_ci self.assertEqual(re.split(p, s), [s]) 22907db96d56Sopenharmony_ci self.assertEqual(re.findall(p, s), []) 22917db96d56Sopenharmony_ci self.assertEqual(list(re.finditer(p, s)), []) 22927db96d56Sopenharmony_ci self.assertEqual(re.sub(p, '', s), s) 22937db96d56Sopenharmony_ci t = time.perf_counter() - start 22947db96d56Sopenharmony_ci # Without optimization it takes 1 second on my computer. 22957db96d56Sopenharmony_ci # With optimization -- 0.0003 seconds. 22967db96d56Sopenharmony_ci self.assertLess(t, 0.1) 22977db96d56Sopenharmony_ci 22987db96d56Sopenharmony_ci def test_possessive_quantifiers(self): 22997db96d56Sopenharmony_ci """Test Possessive Quantifiers 23007db96d56Sopenharmony_ci Test quantifiers of the form @+ for some repetition operator @, 23017db96d56Sopenharmony_ci e.g. x{3,5}+ meaning match from 3 to 5 greadily and proceed 23027db96d56Sopenharmony_ci without creating a stack frame for rolling the stack back and 23037db96d56Sopenharmony_ci trying 1 or more fewer matches.""" 23047db96d56Sopenharmony_ci self.assertIsNone(re.match('e*+e', 'eeee')) 23057db96d56Sopenharmony_ci self.assertEqual(re.match('e++a', 'eeea').group(0), 'eeea') 23067db96d56Sopenharmony_ci self.assertEqual(re.match('e?+a', 'ea').group(0), 'ea') 23077db96d56Sopenharmony_ci self.assertEqual(re.match('e{2,4}+a', 'eeea').group(0), 'eeea') 23087db96d56Sopenharmony_ci self.assertIsNone(re.match('(.)++.', 'ee')) 23097db96d56Sopenharmony_ci self.assertEqual(re.match('(ae)*+a', 'aea').groups(), ('ae',)) 23107db96d56Sopenharmony_ci self.assertEqual(re.match('([ae][ae])?+a', 'aea').groups(), 23117db96d56Sopenharmony_ci ('ae',)) 23127db96d56Sopenharmony_ci self.assertEqual(re.match('(e?){2,4}+a', 'eeea').groups(), 23137db96d56Sopenharmony_ci ('',)) 23147db96d56Sopenharmony_ci self.assertEqual(re.match('()*+a', 'a').groups(), ('',)) 23157db96d56Sopenharmony_ci self.assertEqual(re.search('x*+', 'axx').span(), (0, 0)) 23167db96d56Sopenharmony_ci self.assertEqual(re.search('x++', 'axx').span(), (1, 3)) 23177db96d56Sopenharmony_ci self.assertEqual(re.match('a*+', 'xxx').span(), (0, 0)) 23187db96d56Sopenharmony_ci self.assertEqual(re.match('x*+', 'xxxa').span(), (0, 3)) 23197db96d56Sopenharmony_ci self.assertIsNone(re.match('a++', 'xxx')) 23207db96d56Sopenharmony_ci self.assertIsNone(re.match(r"^(\w){1}+$", "abc")) 23217db96d56Sopenharmony_ci self.assertIsNone(re.match(r"^(\w){1,2}+$", "abc")) 23227db96d56Sopenharmony_ci 23237db96d56Sopenharmony_ci self.assertEqual(re.match(r"^(\w){3}+$", "abc").group(1), "c") 23247db96d56Sopenharmony_ci self.assertEqual(re.match(r"^(\w){1,3}+$", "abc").group(1), "c") 23257db96d56Sopenharmony_ci self.assertEqual(re.match(r"^(\w){1,4}+$", "abc").group(1), "c") 23267db96d56Sopenharmony_ci 23277db96d56Sopenharmony_ci self.assertIsNone(re.match("^x{1}+$", "xxx")) 23287db96d56Sopenharmony_ci self.assertIsNone(re.match("^x{1,2}+$", "xxx")) 23297db96d56Sopenharmony_ci 23307db96d56Sopenharmony_ci self.assertTrue(re.match("^x{3}+$", "xxx")) 23317db96d56Sopenharmony_ci self.assertTrue(re.match("^x{1,3}+$", "xxx")) 23327db96d56Sopenharmony_ci self.assertTrue(re.match("^x{1,4}+$", "xxx")) 23337db96d56Sopenharmony_ci 23347db96d56Sopenharmony_ci self.assertIsNone(re.match("^x{}+$", "xxx")) 23357db96d56Sopenharmony_ci self.assertTrue(re.match("^x{}+$", "x{}")) 23367db96d56Sopenharmony_ci 23377db96d56Sopenharmony_ci def test_fullmatch_possessive_quantifiers(self): 23387db96d56Sopenharmony_ci self.assertTrue(re.fullmatch(r'a++', 'a')) 23397db96d56Sopenharmony_ci self.assertTrue(re.fullmatch(r'a*+', 'a')) 23407db96d56Sopenharmony_ci self.assertTrue(re.fullmatch(r'a?+', 'a')) 23417db96d56Sopenharmony_ci self.assertTrue(re.fullmatch(r'a{1,3}+', 'a')) 23427db96d56Sopenharmony_ci self.assertIsNone(re.fullmatch(r'a++', 'ab')) 23437db96d56Sopenharmony_ci self.assertIsNone(re.fullmatch(r'a*+', 'ab')) 23447db96d56Sopenharmony_ci self.assertIsNone(re.fullmatch(r'a?+', 'ab')) 23457db96d56Sopenharmony_ci self.assertIsNone(re.fullmatch(r'a{1,3}+', 'ab')) 23467db96d56Sopenharmony_ci self.assertTrue(re.fullmatch(r'a++b', 'ab')) 23477db96d56Sopenharmony_ci self.assertTrue(re.fullmatch(r'a*+b', 'ab')) 23487db96d56Sopenharmony_ci self.assertTrue(re.fullmatch(r'a?+b', 'ab')) 23497db96d56Sopenharmony_ci self.assertTrue(re.fullmatch(r'a{1,3}+b', 'ab')) 23507db96d56Sopenharmony_ci 23517db96d56Sopenharmony_ci self.assertTrue(re.fullmatch(r'(?:ab)++', 'ab')) 23527db96d56Sopenharmony_ci self.assertTrue(re.fullmatch(r'(?:ab)*+', 'ab')) 23537db96d56Sopenharmony_ci self.assertTrue(re.fullmatch(r'(?:ab)?+', 'ab')) 23547db96d56Sopenharmony_ci self.assertTrue(re.fullmatch(r'(?:ab){1,3}+', 'ab')) 23557db96d56Sopenharmony_ci self.assertIsNone(re.fullmatch(r'(?:ab)++', 'abc')) 23567db96d56Sopenharmony_ci self.assertIsNone(re.fullmatch(r'(?:ab)*+', 'abc')) 23577db96d56Sopenharmony_ci self.assertIsNone(re.fullmatch(r'(?:ab)?+', 'abc')) 23587db96d56Sopenharmony_ci self.assertIsNone(re.fullmatch(r'(?:ab){1,3}+', 'abc')) 23597db96d56Sopenharmony_ci self.assertTrue(re.fullmatch(r'(?:ab)++c', 'abc')) 23607db96d56Sopenharmony_ci self.assertTrue(re.fullmatch(r'(?:ab)*+c', 'abc')) 23617db96d56Sopenharmony_ci self.assertTrue(re.fullmatch(r'(?:ab)?+c', 'abc')) 23627db96d56Sopenharmony_ci self.assertTrue(re.fullmatch(r'(?:ab){1,3}+c', 'abc')) 23637db96d56Sopenharmony_ci 23647db96d56Sopenharmony_ci def test_findall_possessive_quantifiers(self): 23657db96d56Sopenharmony_ci self.assertEqual(re.findall(r'a++', 'aab'), ['aa']) 23667db96d56Sopenharmony_ci self.assertEqual(re.findall(r'a*+', 'aab'), ['aa', '', '']) 23677db96d56Sopenharmony_ci self.assertEqual(re.findall(r'a?+', 'aab'), ['a', 'a', '', '']) 23687db96d56Sopenharmony_ci self.assertEqual(re.findall(r'a{1,3}+', 'aab'), ['aa']) 23697db96d56Sopenharmony_ci 23707db96d56Sopenharmony_ci self.assertEqual(re.findall(r'(?:ab)++', 'ababc'), ['abab']) 23717db96d56Sopenharmony_ci self.assertEqual(re.findall(r'(?:ab)*+', 'ababc'), ['abab', '', '']) 23727db96d56Sopenharmony_ci self.assertEqual(re.findall(r'(?:ab)?+', 'ababc'), ['ab', 'ab', '', '']) 23737db96d56Sopenharmony_ci self.assertEqual(re.findall(r'(?:ab){1,3}+', 'ababc'), ['abab']) 23747db96d56Sopenharmony_ci 23757db96d56Sopenharmony_ci def test_atomic_grouping(self): 23767db96d56Sopenharmony_ci """Test Atomic Grouping 23777db96d56Sopenharmony_ci Test non-capturing groups of the form (?>...), which does 23787db96d56Sopenharmony_ci not maintain any stack point created within the group once the 23797db96d56Sopenharmony_ci group is finished being evaluated.""" 23807db96d56Sopenharmony_ci pattern1 = re.compile(r'a(?>bc|b)c') 23817db96d56Sopenharmony_ci self.assertIsNone(pattern1.match('abc')) 23827db96d56Sopenharmony_ci self.assertTrue(pattern1.match('abcc')) 23837db96d56Sopenharmony_ci self.assertIsNone(re.match(r'(?>.*).', 'abc')) 23847db96d56Sopenharmony_ci self.assertTrue(re.match(r'(?>x)++', 'xxx')) 23857db96d56Sopenharmony_ci self.assertTrue(re.match(r'(?>x++)', 'xxx')) 23867db96d56Sopenharmony_ci self.assertIsNone(re.match(r'(?>x)++x', 'xxx')) 23877db96d56Sopenharmony_ci self.assertIsNone(re.match(r'(?>x++)x', 'xxx')) 23887db96d56Sopenharmony_ci 23897db96d56Sopenharmony_ci def test_fullmatch_atomic_grouping(self): 23907db96d56Sopenharmony_ci self.assertTrue(re.fullmatch(r'(?>a+)', 'a')) 23917db96d56Sopenharmony_ci self.assertTrue(re.fullmatch(r'(?>a*)', 'a')) 23927db96d56Sopenharmony_ci self.assertTrue(re.fullmatch(r'(?>a?)', 'a')) 23937db96d56Sopenharmony_ci self.assertTrue(re.fullmatch(r'(?>a{1,3})', 'a')) 23947db96d56Sopenharmony_ci self.assertIsNone(re.fullmatch(r'(?>a+)', 'ab')) 23957db96d56Sopenharmony_ci self.assertIsNone(re.fullmatch(r'(?>a*)', 'ab')) 23967db96d56Sopenharmony_ci self.assertIsNone(re.fullmatch(r'(?>a?)', 'ab')) 23977db96d56Sopenharmony_ci self.assertIsNone(re.fullmatch(r'(?>a{1,3})', 'ab')) 23987db96d56Sopenharmony_ci self.assertTrue(re.fullmatch(r'(?>a+)b', 'ab')) 23997db96d56Sopenharmony_ci self.assertTrue(re.fullmatch(r'(?>a*)b', 'ab')) 24007db96d56Sopenharmony_ci self.assertTrue(re.fullmatch(r'(?>a?)b', 'ab')) 24017db96d56Sopenharmony_ci self.assertTrue(re.fullmatch(r'(?>a{1,3})b', 'ab')) 24027db96d56Sopenharmony_ci 24037db96d56Sopenharmony_ci self.assertTrue(re.fullmatch(r'(?>(?:ab)+)', 'ab')) 24047db96d56Sopenharmony_ci self.assertTrue(re.fullmatch(r'(?>(?:ab)*)', 'ab')) 24057db96d56Sopenharmony_ci self.assertTrue(re.fullmatch(r'(?>(?:ab)?)', 'ab')) 24067db96d56Sopenharmony_ci self.assertTrue(re.fullmatch(r'(?>(?:ab){1,3})', 'ab')) 24077db96d56Sopenharmony_ci self.assertIsNone(re.fullmatch(r'(?>(?:ab)+)', 'abc')) 24087db96d56Sopenharmony_ci self.assertIsNone(re.fullmatch(r'(?>(?:ab)*)', 'abc')) 24097db96d56Sopenharmony_ci self.assertIsNone(re.fullmatch(r'(?>(?:ab)?)', 'abc')) 24107db96d56Sopenharmony_ci self.assertIsNone(re.fullmatch(r'(?>(?:ab){1,3})', 'abc')) 24117db96d56Sopenharmony_ci self.assertTrue(re.fullmatch(r'(?>(?:ab)+)c', 'abc')) 24127db96d56Sopenharmony_ci self.assertTrue(re.fullmatch(r'(?>(?:ab)*)c', 'abc')) 24137db96d56Sopenharmony_ci self.assertTrue(re.fullmatch(r'(?>(?:ab)?)c', 'abc')) 24147db96d56Sopenharmony_ci self.assertTrue(re.fullmatch(r'(?>(?:ab){1,3})c', 'abc')) 24157db96d56Sopenharmony_ci 24167db96d56Sopenharmony_ci def test_findall_atomic_grouping(self): 24177db96d56Sopenharmony_ci self.assertEqual(re.findall(r'(?>a+)', 'aab'), ['aa']) 24187db96d56Sopenharmony_ci self.assertEqual(re.findall(r'(?>a*)', 'aab'), ['aa', '', '']) 24197db96d56Sopenharmony_ci self.assertEqual(re.findall(r'(?>a?)', 'aab'), ['a', 'a', '', '']) 24207db96d56Sopenharmony_ci self.assertEqual(re.findall(r'(?>a{1,3})', 'aab'), ['aa']) 24217db96d56Sopenharmony_ci 24227db96d56Sopenharmony_ci self.assertEqual(re.findall(r'(?>(?:ab)+)', 'ababc'), ['abab']) 24237db96d56Sopenharmony_ci self.assertEqual(re.findall(r'(?>(?:ab)*)', 'ababc'), ['abab', '', '']) 24247db96d56Sopenharmony_ci self.assertEqual(re.findall(r'(?>(?:ab)?)', 'ababc'), ['ab', 'ab', '', '']) 24257db96d56Sopenharmony_ci self.assertEqual(re.findall(r'(?>(?:ab){1,3})', 'ababc'), ['abab']) 24267db96d56Sopenharmony_ci 24277db96d56Sopenharmony_ci def test_bug_gh91616(self): 24287db96d56Sopenharmony_ci self.assertTrue(re.fullmatch(r'(?s:(?>.*?\.).*)\Z', "a.txt")) # reproducer 24297db96d56Sopenharmony_ci self.assertTrue(re.fullmatch(r'(?s:(?=(?P<g0>.*?\.))(?P=g0).*)\Z', "a.txt")) 24307db96d56Sopenharmony_ci 24317db96d56Sopenharmony_ci def test_template_function_and_flag_is_deprecated(self): 24327db96d56Sopenharmony_ci with self.assertWarns(DeprecationWarning) as cm: 24337db96d56Sopenharmony_ci template_re1 = re.template(r'a') 24347db96d56Sopenharmony_ci self.assertIn('re.template()', str(cm.warning)) 24357db96d56Sopenharmony_ci self.assertIn('is deprecated', str(cm.warning)) 24367db96d56Sopenharmony_ci self.assertIn('function', str(cm.warning)) 24377db96d56Sopenharmony_ci self.assertNotIn('flag', str(cm.warning)) 24387db96d56Sopenharmony_ci 24397db96d56Sopenharmony_ci with self.assertWarns(DeprecationWarning) as cm: 24407db96d56Sopenharmony_ci # we deliberately use more flags here to test that that still 24417db96d56Sopenharmony_ci # triggers the warning 24427db96d56Sopenharmony_ci # if paranoid, we could test multiple different combinations, 24437db96d56Sopenharmony_ci # but it's probably not worth it 24447db96d56Sopenharmony_ci template_re2 = re.compile(r'a', flags=re.TEMPLATE|re.UNICODE) 24457db96d56Sopenharmony_ci self.assertIn('re.TEMPLATE', str(cm.warning)) 24467db96d56Sopenharmony_ci self.assertIn('is deprecated', str(cm.warning)) 24477db96d56Sopenharmony_ci self.assertIn('flag', str(cm.warning)) 24487db96d56Sopenharmony_ci self.assertNotIn('function', str(cm.warning)) 24497db96d56Sopenharmony_ci 24507db96d56Sopenharmony_ci # while deprecated, is should still function 24517db96d56Sopenharmony_ci self.assertEqual(template_re1, template_re2) 24527db96d56Sopenharmony_ci self.assertTrue(template_re1.match('ahoy')) 24537db96d56Sopenharmony_ci self.assertFalse(template_re1.match('nope')) 24547db96d56Sopenharmony_ci 24557db96d56Sopenharmony_ci @unittest.skipIf(multiprocessing is None, 'test requires multiprocessing') 24567db96d56Sopenharmony_ci def test_regression_gh94675(self): 24577db96d56Sopenharmony_ci pattern = re.compile(r'(?<=[({}])(((//[^\n]*)?[\n])([\000-\040])*)*' 24587db96d56Sopenharmony_ci r'((/[^/\[\n]*(([^\n]|(\[\n]*(]*)*\]))' 24597db96d56Sopenharmony_ci r'[^/\[]*)*/))((((//[^\n]*)?[\n])' 24607db96d56Sopenharmony_ci r'([\000-\040]|(/\*[^*]*\*+' 24617db96d56Sopenharmony_ci r'([^/*]\*+)*/))*)+(?=[^\000-\040);\]}]))') 24627db96d56Sopenharmony_ci input_js = '''a(function() { 24637db96d56Sopenharmony_ci /////////////////////////////////////////////////////////////////// 24647db96d56Sopenharmony_ci });''' 24657db96d56Sopenharmony_ci p = multiprocessing.Process(target=pattern.sub, args=('', input_js)) 24667db96d56Sopenharmony_ci p.start() 24677db96d56Sopenharmony_ci p.join(SHORT_TIMEOUT) 24687db96d56Sopenharmony_ci try: 24697db96d56Sopenharmony_ci self.assertFalse(p.is_alive(), 'pattern.sub() timed out') 24707db96d56Sopenharmony_ci finally: 24717db96d56Sopenharmony_ci if p.is_alive(): 24727db96d56Sopenharmony_ci p.terminate() 24737db96d56Sopenharmony_ci p.join() 24747db96d56Sopenharmony_ci 24757db96d56Sopenharmony_ci 24767db96d56Sopenharmony_cidef get_debug_out(pat): 24777db96d56Sopenharmony_ci with captured_stdout() as out: 24787db96d56Sopenharmony_ci re.compile(pat, re.DEBUG) 24797db96d56Sopenharmony_ci return out.getvalue() 24807db96d56Sopenharmony_ci 24817db96d56Sopenharmony_ci 24827db96d56Sopenharmony_ci@cpython_only 24837db96d56Sopenharmony_ciclass DebugTests(unittest.TestCase): 24847db96d56Sopenharmony_ci maxDiff = None 24857db96d56Sopenharmony_ci 24867db96d56Sopenharmony_ci def test_debug_flag(self): 24877db96d56Sopenharmony_ci pat = r'(\.)(?:[ch]|py)(?(1)$|: )' 24887db96d56Sopenharmony_ci dump = '''\ 24897db96d56Sopenharmony_ciSUBPATTERN 1 0 0 24907db96d56Sopenharmony_ci LITERAL 46 24917db96d56Sopenharmony_ciBRANCH 24927db96d56Sopenharmony_ci IN 24937db96d56Sopenharmony_ci LITERAL 99 24947db96d56Sopenharmony_ci LITERAL 104 24957db96d56Sopenharmony_ciOR 24967db96d56Sopenharmony_ci LITERAL 112 24977db96d56Sopenharmony_ci LITERAL 121 24987db96d56Sopenharmony_ciGROUPREF_EXISTS 1 24997db96d56Sopenharmony_ci AT AT_END 25007db96d56Sopenharmony_ciELSE 25017db96d56Sopenharmony_ci LITERAL 58 25027db96d56Sopenharmony_ci LITERAL 32 25037db96d56Sopenharmony_ci 25047db96d56Sopenharmony_ci 0. INFO 8 0b1 2 5 (to 9) 25057db96d56Sopenharmony_ci prefix_skip 0 25067db96d56Sopenharmony_ci prefix [0x2e] ('.') 25077db96d56Sopenharmony_ci overlap [0] 25087db96d56Sopenharmony_ci 9: MARK 0 25097db96d56Sopenharmony_ci11. LITERAL 0x2e ('.') 25107db96d56Sopenharmony_ci13. MARK 1 25117db96d56Sopenharmony_ci15. BRANCH 10 (to 26) 25127db96d56Sopenharmony_ci17. IN 6 (to 24) 25137db96d56Sopenharmony_ci19. LITERAL 0x63 ('c') 25147db96d56Sopenharmony_ci21. LITERAL 0x68 ('h') 25157db96d56Sopenharmony_ci23. FAILURE 25167db96d56Sopenharmony_ci24: JUMP 9 (to 34) 25177db96d56Sopenharmony_ci26: branch 7 (to 33) 25187db96d56Sopenharmony_ci27. LITERAL 0x70 ('p') 25197db96d56Sopenharmony_ci29. LITERAL 0x79 ('y') 25207db96d56Sopenharmony_ci31. JUMP 2 (to 34) 25217db96d56Sopenharmony_ci33: FAILURE 25227db96d56Sopenharmony_ci34: GROUPREF_EXISTS 0 6 (to 41) 25237db96d56Sopenharmony_ci37. AT END 25247db96d56Sopenharmony_ci39. JUMP 5 (to 45) 25257db96d56Sopenharmony_ci41: LITERAL 0x3a (':') 25267db96d56Sopenharmony_ci43. LITERAL 0x20 (' ') 25277db96d56Sopenharmony_ci45: SUCCESS 25287db96d56Sopenharmony_ci''' 25297db96d56Sopenharmony_ci self.assertEqual(get_debug_out(pat), dump) 25307db96d56Sopenharmony_ci # Debug output is output again even a second time (bypassing 25317db96d56Sopenharmony_ci # the cache -- issue #20426). 25327db96d56Sopenharmony_ci self.assertEqual(get_debug_out(pat), dump) 25337db96d56Sopenharmony_ci 25347db96d56Sopenharmony_ci def test_atomic_group(self): 25357db96d56Sopenharmony_ci self.assertEqual(get_debug_out(r'(?>ab?)'), '''\ 25367db96d56Sopenharmony_ciATOMIC_GROUP [(LITERAL, 97), (MAX_REPEAT, (0, 1, [(LITERAL, 98)]))] 25377db96d56Sopenharmony_ci 25387db96d56Sopenharmony_ci 0. INFO 4 0b0 1 2 (to 5) 25397db96d56Sopenharmony_ci 5: ATOMIC_GROUP 11 (to 17) 25407db96d56Sopenharmony_ci 7. LITERAL 0x61 ('a') 25417db96d56Sopenharmony_ci 9. REPEAT_ONE 6 0 1 (to 16) 25427db96d56Sopenharmony_ci13. LITERAL 0x62 ('b') 25437db96d56Sopenharmony_ci15. SUCCESS 25447db96d56Sopenharmony_ci16: SUCCESS 25457db96d56Sopenharmony_ci17: SUCCESS 25467db96d56Sopenharmony_ci''') 25477db96d56Sopenharmony_ci 25487db96d56Sopenharmony_ci def test_possesive_repeat_one(self): 25497db96d56Sopenharmony_ci self.assertEqual(get_debug_out(r'a?+'), '''\ 25507db96d56Sopenharmony_ciPOSSESSIVE_REPEAT 0 1 25517db96d56Sopenharmony_ci LITERAL 97 25527db96d56Sopenharmony_ci 25537db96d56Sopenharmony_ci 0. INFO 4 0b0 0 1 (to 5) 25547db96d56Sopenharmony_ci 5: POSSESSIVE_REPEAT_ONE 6 0 1 (to 12) 25557db96d56Sopenharmony_ci 9. LITERAL 0x61 ('a') 25567db96d56Sopenharmony_ci11. SUCCESS 25577db96d56Sopenharmony_ci12: SUCCESS 25587db96d56Sopenharmony_ci''') 25597db96d56Sopenharmony_ci 25607db96d56Sopenharmony_ci def test_possesive_repeat(self): 25617db96d56Sopenharmony_ci self.assertEqual(get_debug_out(r'(?:ab)?+'), '''\ 25627db96d56Sopenharmony_ciPOSSESSIVE_REPEAT 0 1 25637db96d56Sopenharmony_ci LITERAL 97 25647db96d56Sopenharmony_ci LITERAL 98 25657db96d56Sopenharmony_ci 25667db96d56Sopenharmony_ci 0. INFO 4 0b0 0 2 (to 5) 25677db96d56Sopenharmony_ci 5: POSSESSIVE_REPEAT 7 0 1 (to 13) 25687db96d56Sopenharmony_ci 9. LITERAL 0x61 ('a') 25697db96d56Sopenharmony_ci11. LITERAL 0x62 ('b') 25707db96d56Sopenharmony_ci13: SUCCESS 25717db96d56Sopenharmony_ci14. SUCCESS 25727db96d56Sopenharmony_ci''') 25737db96d56Sopenharmony_ci 25747db96d56Sopenharmony_ci 25757db96d56Sopenharmony_ciclass PatternReprTests(unittest.TestCase): 25767db96d56Sopenharmony_ci def check(self, pattern, expected): 25777db96d56Sopenharmony_ci self.assertEqual(repr(re.compile(pattern)), expected) 25787db96d56Sopenharmony_ci 25797db96d56Sopenharmony_ci def check_flags(self, pattern, flags, expected): 25807db96d56Sopenharmony_ci self.assertEqual(repr(re.compile(pattern, flags)), expected) 25817db96d56Sopenharmony_ci 25827db96d56Sopenharmony_ci def test_without_flags(self): 25837db96d56Sopenharmony_ci self.check('random pattern', 25847db96d56Sopenharmony_ci "re.compile('random pattern')") 25857db96d56Sopenharmony_ci 25867db96d56Sopenharmony_ci def test_single_flag(self): 25877db96d56Sopenharmony_ci self.check_flags('random pattern', re.IGNORECASE, 25887db96d56Sopenharmony_ci "re.compile('random pattern', re.IGNORECASE)") 25897db96d56Sopenharmony_ci 25907db96d56Sopenharmony_ci def test_multiple_flags(self): 25917db96d56Sopenharmony_ci self.check_flags('random pattern', re.I|re.S|re.X, 25927db96d56Sopenharmony_ci "re.compile('random pattern', " 25937db96d56Sopenharmony_ci "re.IGNORECASE|re.DOTALL|re.VERBOSE)") 25947db96d56Sopenharmony_ci 25957db96d56Sopenharmony_ci def test_unicode_flag(self): 25967db96d56Sopenharmony_ci self.check_flags('random pattern', re.U, 25977db96d56Sopenharmony_ci "re.compile('random pattern')") 25987db96d56Sopenharmony_ci self.check_flags('random pattern', re.I|re.S|re.U, 25997db96d56Sopenharmony_ci "re.compile('random pattern', " 26007db96d56Sopenharmony_ci "re.IGNORECASE|re.DOTALL)") 26017db96d56Sopenharmony_ci 26027db96d56Sopenharmony_ci def test_inline_flags(self): 26037db96d56Sopenharmony_ci self.check('(?i)pattern', 26047db96d56Sopenharmony_ci "re.compile('(?i)pattern', re.IGNORECASE)") 26057db96d56Sopenharmony_ci 26067db96d56Sopenharmony_ci def test_unknown_flags(self): 26077db96d56Sopenharmony_ci self.check_flags('random pattern', 0x123000, 26087db96d56Sopenharmony_ci "re.compile('random pattern', 0x123000)") 26097db96d56Sopenharmony_ci self.check_flags('random pattern', 0x123000|re.I, 26107db96d56Sopenharmony_ci "re.compile('random pattern', re.IGNORECASE|0x123000)") 26117db96d56Sopenharmony_ci 26127db96d56Sopenharmony_ci def test_bytes(self): 26137db96d56Sopenharmony_ci self.check(b'bytes pattern', 26147db96d56Sopenharmony_ci "re.compile(b'bytes pattern')") 26157db96d56Sopenharmony_ci self.check_flags(b'bytes pattern', re.A, 26167db96d56Sopenharmony_ci "re.compile(b'bytes pattern', re.ASCII)") 26177db96d56Sopenharmony_ci 26187db96d56Sopenharmony_ci def test_locale(self): 26197db96d56Sopenharmony_ci self.check_flags(b'bytes pattern', re.L, 26207db96d56Sopenharmony_ci "re.compile(b'bytes pattern', re.LOCALE)") 26217db96d56Sopenharmony_ci 26227db96d56Sopenharmony_ci def test_quotes(self): 26237db96d56Sopenharmony_ci self.check('random "double quoted" pattern', 26247db96d56Sopenharmony_ci '''re.compile('random "double quoted" pattern')''') 26257db96d56Sopenharmony_ci self.check("random 'single quoted' pattern", 26267db96d56Sopenharmony_ci '''re.compile("random 'single quoted' pattern")''') 26277db96d56Sopenharmony_ci self.check('''both 'single' and "double" quotes''', 26287db96d56Sopenharmony_ci '''re.compile('both \\'single\\' and "double" quotes')''') 26297db96d56Sopenharmony_ci 26307db96d56Sopenharmony_ci def test_long_pattern(self): 26317db96d56Sopenharmony_ci pattern = 'Very %spattern' % ('long ' * 1000) 26327db96d56Sopenharmony_ci r = repr(re.compile(pattern)) 26337db96d56Sopenharmony_ci self.assertLess(len(r), 300) 26347db96d56Sopenharmony_ci self.assertEqual(r[:30], "re.compile('Very long long lon") 26357db96d56Sopenharmony_ci r = repr(re.compile(pattern, re.I)) 26367db96d56Sopenharmony_ci self.assertLess(len(r), 300) 26377db96d56Sopenharmony_ci self.assertEqual(r[:30], "re.compile('Very long long lon") 26387db96d56Sopenharmony_ci self.assertEqual(r[-16:], ", re.IGNORECASE)") 26397db96d56Sopenharmony_ci 26407db96d56Sopenharmony_ci def test_flags_repr(self): 26417db96d56Sopenharmony_ci self.assertEqual(repr(re.I), "re.IGNORECASE") 26427db96d56Sopenharmony_ci self.assertEqual(repr(re.I|re.S|re.X), 26437db96d56Sopenharmony_ci "re.IGNORECASE|re.DOTALL|re.VERBOSE") 26447db96d56Sopenharmony_ci self.assertEqual(repr(re.I|re.S|re.X|(1<<20)), 26457db96d56Sopenharmony_ci "re.IGNORECASE|re.DOTALL|re.VERBOSE|0x100000") 26467db96d56Sopenharmony_ci self.assertEqual( 26477db96d56Sopenharmony_ci repr(~re.I), 26487db96d56Sopenharmony_ci "re.ASCII|re.LOCALE|re.UNICODE|re.MULTILINE|re.DOTALL|re.VERBOSE|re.TEMPLATE|re.DEBUG") 26497db96d56Sopenharmony_ci self.assertEqual(repr(~(re.I|re.S|re.X)), 26507db96d56Sopenharmony_ci "re.ASCII|re.LOCALE|re.UNICODE|re.MULTILINE|re.TEMPLATE|re.DEBUG") 26517db96d56Sopenharmony_ci self.assertEqual(repr(~(re.I|re.S|re.X|(1<<20))), 26527db96d56Sopenharmony_ci "re.ASCII|re.LOCALE|re.UNICODE|re.MULTILINE|re.TEMPLATE|re.DEBUG|0xffe00") 26537db96d56Sopenharmony_ci 26547db96d56Sopenharmony_ci 26557db96d56Sopenharmony_ciclass ImplementationTest(unittest.TestCase): 26567db96d56Sopenharmony_ci """ 26577db96d56Sopenharmony_ci Test implementation details of the re module. 26587db96d56Sopenharmony_ci """ 26597db96d56Sopenharmony_ci 26607db96d56Sopenharmony_ci @cpython_only 26617db96d56Sopenharmony_ci def test_immutable(self): 26627db96d56Sopenharmony_ci # bpo-43908: check that re types are immutable 26637db96d56Sopenharmony_ci with self.assertRaises(TypeError): 26647db96d56Sopenharmony_ci re.Match.foo = 1 26657db96d56Sopenharmony_ci with self.assertRaises(TypeError): 26667db96d56Sopenharmony_ci re.Pattern.foo = 1 26677db96d56Sopenharmony_ci with self.assertRaises(TypeError): 26687db96d56Sopenharmony_ci pat = re.compile("") 26697db96d56Sopenharmony_ci tp = type(pat.scanner("")) 26707db96d56Sopenharmony_ci tp.foo = 1 26717db96d56Sopenharmony_ci 26727db96d56Sopenharmony_ci def test_overlap_table(self): 26737db96d56Sopenharmony_ci f = re._compiler._generate_overlap_table 26747db96d56Sopenharmony_ci self.assertEqual(f(""), []) 26757db96d56Sopenharmony_ci self.assertEqual(f("a"), [0]) 26767db96d56Sopenharmony_ci self.assertEqual(f("abcd"), [0, 0, 0, 0]) 26777db96d56Sopenharmony_ci self.assertEqual(f("aaaa"), [0, 1, 2, 3]) 26787db96d56Sopenharmony_ci self.assertEqual(f("ababba"), [0, 0, 1, 2, 0, 1]) 26797db96d56Sopenharmony_ci self.assertEqual(f("abcabdac"), [0, 0, 0, 1, 2, 0, 1, 0]) 26807db96d56Sopenharmony_ci 26817db96d56Sopenharmony_ci def test_signedness(self): 26827db96d56Sopenharmony_ci self.assertGreaterEqual(re._compiler.MAXREPEAT, 0) 26837db96d56Sopenharmony_ci self.assertGreaterEqual(re._compiler.MAXGROUPS, 0) 26847db96d56Sopenharmony_ci 26857db96d56Sopenharmony_ci @cpython_only 26867db96d56Sopenharmony_ci def test_disallow_instantiation(self): 26877db96d56Sopenharmony_ci # Ensure that the type disallows instantiation (bpo-43916) 26887db96d56Sopenharmony_ci check_disallow_instantiation(self, re.Match) 26897db96d56Sopenharmony_ci check_disallow_instantiation(self, re.Pattern) 26907db96d56Sopenharmony_ci pat = re.compile("") 26917db96d56Sopenharmony_ci check_disallow_instantiation(self, type(pat.scanner(""))) 26927db96d56Sopenharmony_ci 26937db96d56Sopenharmony_ci def test_deprecated_modules(self): 26947db96d56Sopenharmony_ci deprecated = { 26957db96d56Sopenharmony_ci 'sre_compile': ['compile', 'error', 26967db96d56Sopenharmony_ci 'SRE_FLAG_IGNORECASE', 'SUBPATTERN', 26977db96d56Sopenharmony_ci '_compile_info'], 26987db96d56Sopenharmony_ci 'sre_constants': ['error', 'SRE_FLAG_IGNORECASE', 'SUBPATTERN', 26997db96d56Sopenharmony_ci '_NamedIntConstant'], 27007db96d56Sopenharmony_ci 'sre_parse': ['SubPattern', 'parse', 27017db96d56Sopenharmony_ci 'SRE_FLAG_IGNORECASE', 'SUBPATTERN', 27027db96d56Sopenharmony_ci '_parse_sub'], 27037db96d56Sopenharmony_ci } 27047db96d56Sopenharmony_ci for name in deprecated: 27057db96d56Sopenharmony_ci with self.subTest(module=name): 27067db96d56Sopenharmony_ci sys.modules.pop(name, None) 27077db96d56Sopenharmony_ci with self.assertWarns(DeprecationWarning) as w: 27087db96d56Sopenharmony_ci __import__(name) 27097db96d56Sopenharmony_ci self.assertEqual(str(w.warning), 27107db96d56Sopenharmony_ci f"module {name!r} is deprecated") 27117db96d56Sopenharmony_ci self.assertEqual(w.filename, __file__) 27127db96d56Sopenharmony_ci self.assertIn(name, sys.modules) 27137db96d56Sopenharmony_ci mod = sys.modules[name] 27147db96d56Sopenharmony_ci self.assertEqual(mod.__name__, name) 27157db96d56Sopenharmony_ci self.assertEqual(mod.__package__, '') 27167db96d56Sopenharmony_ci for attr in deprecated[name]: 27177db96d56Sopenharmony_ci self.assertTrue(hasattr(mod, attr)) 27187db96d56Sopenharmony_ci del sys.modules[name] 27197db96d56Sopenharmony_ci 27207db96d56Sopenharmony_ciclass ExternalTests(unittest.TestCase): 27217db96d56Sopenharmony_ci 27227db96d56Sopenharmony_ci def test_re_benchmarks(self): 27237db96d56Sopenharmony_ci 're_tests benchmarks' 27247db96d56Sopenharmony_ci from test.re_tests import benchmarks 27257db96d56Sopenharmony_ci for pattern, s in benchmarks: 27267db96d56Sopenharmony_ci with self.subTest(pattern=pattern, string=s): 27277db96d56Sopenharmony_ci p = re.compile(pattern) 27287db96d56Sopenharmony_ci self.assertTrue(p.search(s)) 27297db96d56Sopenharmony_ci self.assertTrue(p.match(s)) 27307db96d56Sopenharmony_ci self.assertTrue(p.fullmatch(s)) 27317db96d56Sopenharmony_ci s2 = ' '*10000 + s + ' '*10000 27327db96d56Sopenharmony_ci self.assertTrue(p.search(s2)) 27337db96d56Sopenharmony_ci self.assertTrue(p.match(s2, 10000)) 27347db96d56Sopenharmony_ci self.assertTrue(p.match(s2, 10000, 10000 + len(s))) 27357db96d56Sopenharmony_ci self.assertTrue(p.fullmatch(s2, 10000, 10000 + len(s))) 27367db96d56Sopenharmony_ci 27377db96d56Sopenharmony_ci def test_re_tests(self): 27387db96d56Sopenharmony_ci 're_tests test suite' 27397db96d56Sopenharmony_ci from test.re_tests import tests, FAIL, SYNTAX_ERROR 27407db96d56Sopenharmony_ci for t in tests: 27417db96d56Sopenharmony_ci pattern = s = outcome = repl = expected = None 27427db96d56Sopenharmony_ci if len(t) == 5: 27437db96d56Sopenharmony_ci pattern, s, outcome, repl, expected = t 27447db96d56Sopenharmony_ci elif len(t) == 3: 27457db96d56Sopenharmony_ci pattern, s, outcome = t 27467db96d56Sopenharmony_ci else: 27477db96d56Sopenharmony_ci raise ValueError('Test tuples should have 3 or 5 fields', t) 27487db96d56Sopenharmony_ci 27497db96d56Sopenharmony_ci with self.subTest(pattern=pattern, string=s): 27507db96d56Sopenharmony_ci if outcome == SYNTAX_ERROR: # Expected a syntax error 27517db96d56Sopenharmony_ci with self.assertRaises(re.error): 27527db96d56Sopenharmony_ci re.compile(pattern) 27537db96d56Sopenharmony_ci continue 27547db96d56Sopenharmony_ci 27557db96d56Sopenharmony_ci obj = re.compile(pattern) 27567db96d56Sopenharmony_ci result = obj.search(s) 27577db96d56Sopenharmony_ci if outcome == FAIL: 27587db96d56Sopenharmony_ci self.assertIsNone(result, 'Succeeded incorrectly') 27597db96d56Sopenharmony_ci continue 27607db96d56Sopenharmony_ci 27617db96d56Sopenharmony_ci with self.subTest(): 27627db96d56Sopenharmony_ci self.assertTrue(result, 'Failed incorrectly') 27637db96d56Sopenharmony_ci # Matched, as expected, so now we compute the 27647db96d56Sopenharmony_ci # result string and compare it to our expected result. 27657db96d56Sopenharmony_ci start, end = result.span(0) 27667db96d56Sopenharmony_ci vardict = {'found': result.group(0), 27677db96d56Sopenharmony_ci 'groups': result.group(), 27687db96d56Sopenharmony_ci 'flags': result.re.flags} 27697db96d56Sopenharmony_ci for i in range(1, 100): 27707db96d56Sopenharmony_ci try: 27717db96d56Sopenharmony_ci gi = result.group(i) 27727db96d56Sopenharmony_ci # Special hack because else the string concat fails: 27737db96d56Sopenharmony_ci if gi is None: 27747db96d56Sopenharmony_ci gi = "None" 27757db96d56Sopenharmony_ci except IndexError: 27767db96d56Sopenharmony_ci gi = "Error" 27777db96d56Sopenharmony_ci vardict['g%d' % i] = gi 27787db96d56Sopenharmony_ci for i in result.re.groupindex.keys(): 27797db96d56Sopenharmony_ci try: 27807db96d56Sopenharmony_ci gi = result.group(i) 27817db96d56Sopenharmony_ci if gi is None: 27827db96d56Sopenharmony_ci gi = "None" 27837db96d56Sopenharmony_ci except IndexError: 27847db96d56Sopenharmony_ci gi = "Error" 27857db96d56Sopenharmony_ci vardict[i] = gi 27867db96d56Sopenharmony_ci self.assertEqual(eval(repl, vardict), expected, 27877db96d56Sopenharmony_ci 'grouping error') 27887db96d56Sopenharmony_ci 27897db96d56Sopenharmony_ci # Try the match with both pattern and string converted to 27907db96d56Sopenharmony_ci # bytes, and check that it still succeeds. 27917db96d56Sopenharmony_ci try: 27927db96d56Sopenharmony_ci bpat = bytes(pattern, "ascii") 27937db96d56Sopenharmony_ci bs = bytes(s, "ascii") 27947db96d56Sopenharmony_ci except UnicodeEncodeError: 27957db96d56Sopenharmony_ci # skip non-ascii tests 27967db96d56Sopenharmony_ci pass 27977db96d56Sopenharmony_ci else: 27987db96d56Sopenharmony_ci with self.subTest('bytes pattern match'): 27997db96d56Sopenharmony_ci obj = re.compile(bpat) 28007db96d56Sopenharmony_ci self.assertTrue(obj.search(bs)) 28017db96d56Sopenharmony_ci 28027db96d56Sopenharmony_ci # Try the match with LOCALE enabled, and check that it 28037db96d56Sopenharmony_ci # still succeeds. 28047db96d56Sopenharmony_ci with self.subTest('locale-sensitive match'): 28057db96d56Sopenharmony_ci obj = re.compile(bpat, re.LOCALE) 28067db96d56Sopenharmony_ci result = obj.search(bs) 28077db96d56Sopenharmony_ci if result is None: 28087db96d56Sopenharmony_ci print('=== Fails on locale-sensitive match', t) 28097db96d56Sopenharmony_ci 28107db96d56Sopenharmony_ci # Try the match with the search area limited to the extent 28117db96d56Sopenharmony_ci # of the match and see if it still succeeds. \B will 28127db96d56Sopenharmony_ci # break (because it won't match at the end or start of a 28137db96d56Sopenharmony_ci # string), so we'll ignore patterns that feature it. 28147db96d56Sopenharmony_ci if (pattern[:2] != r'\B' and pattern[-2:] != r'\B' 28157db96d56Sopenharmony_ci and result is not None): 28167db96d56Sopenharmony_ci with self.subTest('range-limited match'): 28177db96d56Sopenharmony_ci obj = re.compile(pattern) 28187db96d56Sopenharmony_ci self.assertTrue(obj.search(s, start, end + 1)) 28197db96d56Sopenharmony_ci 28207db96d56Sopenharmony_ci # Try the match with IGNORECASE enabled, and check that it 28217db96d56Sopenharmony_ci # still succeeds. 28227db96d56Sopenharmony_ci with self.subTest('case-insensitive match'): 28237db96d56Sopenharmony_ci obj = re.compile(pattern, re.IGNORECASE) 28247db96d56Sopenharmony_ci self.assertTrue(obj.search(s)) 28257db96d56Sopenharmony_ci 28267db96d56Sopenharmony_ci # Try the match with UNICODE locale enabled, and check 28277db96d56Sopenharmony_ci # that it still succeeds. 28287db96d56Sopenharmony_ci with self.subTest('unicode-sensitive match'): 28297db96d56Sopenharmony_ci obj = re.compile(pattern, re.UNICODE) 28307db96d56Sopenharmony_ci self.assertTrue(obj.search(s)) 28317db96d56Sopenharmony_ci 28327db96d56Sopenharmony_ci 28337db96d56Sopenharmony_ciif __name__ == "__main__": 28347db96d56Sopenharmony_ci unittest.main() 2835