xref: /third_party/rust/crates/regex/tests/crazy.rs (revision c67d6573)
1mat!(ascii_literal, r"a", "a", Some((0, 1)));
2
3// Some crazy expressions from regular-expressions.info.
4mat!(
5    match_ranges,
6    r"(?-u)\b(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\b",
7    "num: 255",
8    Some((5, 8))
9);
10mat!(
11    match_ranges_not,
12    r"(?-u)\b(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\b",
13    "num: 256",
14    None
15);
16mat!(match_float1, r"[-+]?[0-9]*\.?[0-9]+", "0.1", Some((0, 3)));
17mat!(match_float2, r"[-+]?[0-9]*\.?[0-9]+", "0.1.2", Some((0, 3)));
18mat!(match_float3, r"[-+]?[0-9]*\.?[0-9]+", "a1.2", Some((1, 4)));
19mat!(match_float4, r"^[-+]?[0-9]*\.?[0-9]+$", "1.a", None);
20mat!(
21    match_email,
22    r"(?i-u)\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b",
23    "mine is jam.slam@gmail.com ",
24    Some((8, 26))
25);
26mat!(
27    match_email_not,
28    r"(?i-u)\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b",
29    "mine is jam.slam@gmail ",
30    None
31);
32mat!(
33    match_email_big,
34    r"[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?",
35    "mine is jam.slam@gmail.com ",
36    Some((8, 26))
37);
38mat!(
39    match_date1,
40    r"(?-u)^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$",
41    "1900-01-01",
42    Some((0, 10))
43);
44mat!(
45    match_date2,
46    r"(?-u)^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$",
47    "1900-00-01",
48    None
49);
50mat!(
51    match_date3,
52    r"(?-u)^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$",
53    "1900-13-01",
54    None
55);
56
57// Do some crazy dancing with the start/end assertions.
58matiter!(match_start_end_empty, r"^$", "", (0, 0));
59matiter!(match_start_end_empty_many_1, r"^$^$^$", "", (0, 0));
60matiter!(match_start_end_empty_many_2, r"^^^$$$", "", (0, 0));
61matiter!(match_start_end_empty_rev, r"$^", "", (0, 0));
62matiter!(
63    match_start_end_empty_rep,
64    r"(?:^$)*",
65    "a\nb\nc",
66    (0, 0),
67    (1, 1),
68    (2, 2),
69    (3, 3),
70    (4, 4),
71    (5, 5)
72);
73matiter!(
74    match_start_end_empty_rep_rev,
75    r"(?:$^)*",
76    "a\nb\nc",
77    (0, 0),
78    (1, 1),
79    (2, 2),
80    (3, 3),
81    (4, 4),
82    (5, 5)
83);
84
85// Test negated character classes.
86mat!(negclass_letters, r"[^ac]", "acx", Some((2, 3)));
87mat!(negclass_letter_comma, r"[^a,]", "a,x", Some((2, 3)));
88mat!(negclass_letter_space, r"[^a[:space:]]", "a x", Some((2, 3)));
89mat!(negclass_comma, r"[^,]", ",,x", Some((2, 3)));
90mat!(negclass_space, r"[^[:space:]]", " a", Some((1, 2)));
91mat!(negclass_space_comma, r"[^,[:space:]]", ", a", Some((2, 3)));
92mat!(negclass_comma_space, r"[^[:space:],]", " ,a", Some((2, 3)));
93mat!(negclass_ascii, r"[^[:alpha:]Z]", "A1", Some((1, 2)));
94
95// Test that repeated empty expressions don't loop forever.
96mat!(lazy_many_many, r"((?:.*)*?)=", "a=b", Some((0, 2)));
97mat!(lazy_many_optional, r"((?:.?)*?)=", "a=b", Some((0, 2)));
98mat!(lazy_one_many_many, r"((?:.*)+?)=", "a=b", Some((0, 2)));
99mat!(lazy_one_many_optional, r"((?:.?)+?)=", "a=b", Some((0, 2)));
100mat!(lazy_range_min_many, r"((?:.*){1,}?)=", "a=b", Some((0, 2)));
101mat!(lazy_range_many, r"((?:.*){1,2}?)=", "a=b", Some((0, 2)));
102mat!(greedy_many_many, r"((?:.*)*)=", "a=b", Some((0, 2)));
103mat!(greedy_many_optional, r"((?:.?)*)=", "a=b", Some((0, 2)));
104mat!(greedy_one_many_many, r"((?:.*)+)=", "a=b", Some((0, 2)));
105mat!(greedy_one_many_optional, r"((?:.?)+)=", "a=b", Some((0, 2)));
106mat!(greedy_range_min_many, r"((?:.*){1,})=", "a=b", Some((0, 2)));
107mat!(greedy_range_many, r"((?:.*){1,2})=", "a=b", Some((0, 2)));
108
109// Test that we handle various flavors of empty expressions.
110matiter!(match_empty1, r"", "", (0, 0));
111matiter!(match_empty2, r"", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
112matiter!(match_empty3, r"()", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
113matiter!(match_empty4, r"()*", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
114matiter!(match_empty5, r"()+", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
115matiter!(match_empty6, r"()?", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
116matiter!(match_empty7, r"()()", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
117matiter!(match_empty8, r"()+|z", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
118matiter!(match_empty9, r"z|()+", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
119matiter!(match_empty10, r"()+|b", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
120matiter!(match_empty11, r"b|()+", "abc", (0, 0), (1, 2), (3, 3));
121matiter!(match_empty12, r"|b", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
122matiter!(match_empty13, r"b|", "abc", (0, 0), (1, 2), (3, 3));
123matiter!(match_empty14, r"|z", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
124matiter!(match_empty15, r"z|", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
125matiter!(match_empty16, r"|", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
126matiter!(match_empty17, r"||", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
127matiter!(match_empty18, r"||z", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
128matiter!(match_empty19, r"(?:)|b", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
129matiter!(match_empty20, r"b|(?:)", "abc", (0, 0), (1, 2), (3, 3));
130matiter!(match_empty21, r"(?:|)", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
131matiter!(match_empty22, r"(?:|)|z", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
132matiter!(match_empty23, r"a(?:)|b", "abc", (0, 1), (1, 2));
133
134// Test that the DFA can handle pathological cases.
135// (This should result in the DFA's cache being flushed too frequently, which
136// should cause it to quit and fall back to the NFA algorithm.)
137#[test]
138fn dfa_handles_pathological_case() {
139    fn ones_and_zeroes(count: usize) -> String {
140        use rand::rngs::SmallRng;
141        use rand::{Rng, SeedableRng};
142
143        let mut rng = SmallRng::from_entropy();
144        let mut s = String::new();
145        for _ in 0..count {
146            if rng.gen() {
147                s.push('1');
148            } else {
149                s.push('0');
150            }
151        }
152        s
153    }
154
155    let re = regex!(r"[01]*1[01]{20}$");
156    let text = {
157        let mut pieces = ones_and_zeroes(100_000);
158        pieces.push('1');
159        pieces.push_str(&ones_and_zeroes(20));
160        pieces
161    };
162    assert!(re.is_match(text!(&*text)));
163}
164
165#[test]
166fn nest_limit_makes_it_parse() {
167    use regex::RegexBuilder;
168
169    RegexBuilder::new(
170        r#"(?-u)
171        2(?:
172          [45]\d{3}|
173          7(?:
174            1[0-267]|
175            2[0-289]|
176            3[0-29]|
177            4[01]|
178            5[1-3]|
179            6[013]|
180            7[0178]|
181            91
182          )|
183          8(?:
184            0[125]|
185            [139][1-6]|
186            2[0157-9]|
187            41|
188            6[1-35]|
189            7[1-5]|
190            8[1-8]|
191            90
192          )|
193          9(?:
194            0[0-2]|
195            1[0-4]|
196            2[568]|
197            3[3-6]|
198            5[5-7]|
199            6[0167]|
200            7[15]|
201            8[0146-9]
202          )
203        )\d{4}|
204        3(?:
205          12?[5-7]\d{2}|
206          0(?:
207            2(?:
208              [025-79]\d|
209              [348]\d{1,2}
210            )|
211            3(?:
212              [2-4]\d|
213              [56]\d?
214            )
215          )|
216          2(?:
217            1\d{2}|
218            2(?:
219              [12]\d|
220              [35]\d{1,2}|
221              4\d?
222            )
223          )|
224          3(?:
225            1\d{2}|
226            2(?:
227              [2356]\d|
228              4\d{1,2}
229            )
230          )|
231          4(?:
232            1\d{2}|
233            2(?:
234              2\d{1,2}|
235              [47]|
236              5\d{2}
237            )
238          )|
239          5(?:
240            1\d{2}|
241            29
242          )|
243          [67]1\d{2}|
244          8(?:
245            1\d{2}|
246            2(?:
247              2\d{2}|
248              3|
249              4\d
250            )
251          )
252        )\d{3}|
253        4(?:
254          0(?:
255            2(?:
256              [09]\d|
257              7
258            )|
259            33\d{2}
260          )|
261          1\d{3}|
262          2(?:
263            1\d{2}|
264            2(?:
265              [25]\d?|
266              [348]\d|
267              [67]\d{1,2}
268            )
269          )|
270          3(?:
271            1\d{2}(?:
272              \d{2}
273            )?|
274            2(?:
275              [045]\d|
276              [236-9]\d{1,2}
277            )|
278            32\d{2}
279          )|
280          4(?:
281            [18]\d{2}|
282            2(?:
283              [2-46]\d{2}|
284              3
285            )|
286            5[25]\d{2}
287          )|
288          5(?:
289            1\d{2}|
290            2(?:
291              3\d|
292              5
293            )
294          )|
295          6(?:
296            [18]\d{2}|
297            2(?:
298              3(?:
299                \d{2}
300              )?|
301              [46]\d{1,2}|
302              5\d{2}|
303              7\d
304            )|
305            5(?:
306              3\d?|
307              4\d|
308              [57]\d{1,2}|
309              6\d{2}|
310              8
311            )
312          )|
313          71\d{2}|
314          8(?:
315            [18]\d{2}|
316            23\d{2}|
317            54\d{2}
318          )|
319          9(?:
320            [18]\d{2}|
321            2[2-5]\d{2}|
322            53\d{1,2}
323          )
324        )\d{3}|
325        5(?:
326          02[03489]\d{2}|
327          1\d{2}|
328          2(?:
329            1\d{2}|
330            2(?:
331              2(?:
332                \d{2}
333              )?|
334              [457]\d{2}
335            )
336          )|
337          3(?:
338            1\d{2}|
339            2(?:
340              [37](?:
341                \d{2}
342              )?|
343              [569]\d{2}
344            )
345          )|
346          4(?:
347            1\d{2}|
348            2[46]\d{2}
349          )|
350          5(?:
351            1\d{2}|
352            26\d{1,2}
353          )|
354          6(?:
355            [18]\d{2}|
356            2|
357            53\d{2}
358          )|
359          7(?:
360            1|
361            24
362          )\d{2}|
363          8(?:
364            1|
365            26
366          )\d{2}|
367          91\d{2}
368        )\d{3}|
369        6(?:
370          0(?:
371            1\d{2}|
372            2(?:
373              3\d{2}|
374              4\d{1,2}
375            )
376          )|
377          2(?:
378            2[2-5]\d{2}|
379            5(?:
380              [3-5]\d{2}|
381              7
382            )|
383            8\d{2}
384          )|
385          3(?:
386            1|
387            2[3478]
388          )\d{2}|
389          4(?:
390            1|
391            2[34]
392          )\d{2}|
393          5(?:
394            1|
395            2[47]
396          )\d{2}|
397          6(?:
398            [18]\d{2}|
399            6(?:
400              2(?:
401                2\d|
402                [34]\d{2}
403              )|
404              5(?:
405                [24]\d{2}|
406                3\d|
407                5\d{1,2}
408              )
409            )
410          )|
411          72[2-5]\d{2}|
412          8(?:
413            1\d{2}|
414            2[2-5]\d{2}
415          )|
416          9(?:
417            1\d{2}|
418            2[2-6]\d{2}
419          )
420        )\d{3}|
421        7(?:
422          (?:
423            02|
424            [3-589]1|
425            6[12]|
426            72[24]
427          )\d{2}|
428          21\d{3}|
429          32
430        )\d{3}|
431        8(?:
432          (?:
433            4[12]|
434            [5-7]2|
435            1\d?
436          )|
437          (?:
438            0|
439            3[12]|
440            [5-7]1|
441            217
442          )\d
443        )\d{4}|
444        9(?:
445          [35]1|
446          (?:
447            [024]2|
448            81
449          )\d|
450          (?:
451            1|
452            [24]1
453          )\d{2}
454        )\d{3}
455        "#,
456    )
457    .build()
458    .unwrap();
459}
460