1c67d6573Sopenharmony_ci#![cfg_attr(feature = "pattern", feature(pattern))]
2c67d6573Sopenharmony_ci
3c67d6573Sopenharmony_ciuse regex;
4c67d6573Sopenharmony_ci
5c67d6573Sopenharmony_ci// Due to macro scoping rules, this definition only applies for the modules
6c67d6573Sopenharmony_ci// defined below. Effectively, it allows us to use the same tests for both
7c67d6573Sopenharmony_ci// native and dynamic regexes.
8c67d6573Sopenharmony_ci//
9c67d6573Sopenharmony_ci// This is also used to test the various matching engines. This one exercises
10c67d6573Sopenharmony_ci// the normal code path which automatically chooses the engine based on the
11c67d6573Sopenharmony_ci// regex and the input. Other dynamic tests explicitly set the engine to use.
12c67d6573Sopenharmony_cimacro_rules! regex_new {
13c67d6573Sopenharmony_ci    ($re:expr) => {{
14c67d6573Sopenharmony_ci        use regex::Regex;
15c67d6573Sopenharmony_ci        Regex::new($re)
16c67d6573Sopenharmony_ci    }};
17c67d6573Sopenharmony_ci}
18c67d6573Sopenharmony_ci
19c67d6573Sopenharmony_cimacro_rules! regex {
20c67d6573Sopenharmony_ci    ($re:expr) => {
21c67d6573Sopenharmony_ci        regex_new!($re).unwrap()
22c67d6573Sopenharmony_ci    };
23c67d6573Sopenharmony_ci}
24c67d6573Sopenharmony_ci
25c67d6573Sopenharmony_cimacro_rules! regex_set_new {
26c67d6573Sopenharmony_ci    ($re:expr) => {{
27c67d6573Sopenharmony_ci        use regex::RegexSet;
28c67d6573Sopenharmony_ci        RegexSet::new($re)
29c67d6573Sopenharmony_ci    }};
30c67d6573Sopenharmony_ci}
31c67d6573Sopenharmony_ci
32c67d6573Sopenharmony_cimacro_rules! regex_set {
33c67d6573Sopenharmony_ci    ($res:expr) => {
34c67d6573Sopenharmony_ci        regex_set_new!($res).unwrap()
35c67d6573Sopenharmony_ci    };
36c67d6573Sopenharmony_ci}
37c67d6573Sopenharmony_ci
38c67d6573Sopenharmony_ci// Must come before other module definitions.
39c67d6573Sopenharmony_ciinclude!("macros_str.rs");
40c67d6573Sopenharmony_ciinclude!("macros.rs");
41c67d6573Sopenharmony_ci
42c67d6573Sopenharmony_cimod api;
43c67d6573Sopenharmony_cimod api_str;
44c67d6573Sopenharmony_cimod crazy;
45c67d6573Sopenharmony_cimod flags;
46c67d6573Sopenharmony_cimod fowler;
47c67d6573Sopenharmony_cimod misc;
48c67d6573Sopenharmony_cimod multiline;
49c67d6573Sopenharmony_cimod noparse;
50c67d6573Sopenharmony_cimod regression;
51c67d6573Sopenharmony_cimod regression_fuzz;
52c67d6573Sopenharmony_cimod replace;
53c67d6573Sopenharmony_cimod searcher;
54c67d6573Sopenharmony_cimod set;
55c67d6573Sopenharmony_cimod shortest_match;
56c67d6573Sopenharmony_cimod suffix_reverse;
57c67d6573Sopenharmony_ci#[cfg(feature = "unicode")]
58c67d6573Sopenharmony_cimod unicode;
59c67d6573Sopenharmony_ci#[cfg(feature = "unicode-perl")]
60c67d6573Sopenharmony_cimod word_boundary;
61c67d6573Sopenharmony_ci#[cfg(feature = "unicode-perl")]
62c67d6573Sopenharmony_cimod word_boundary_unicode;
63c67d6573Sopenharmony_ci
64c67d6573Sopenharmony_ci#[test]
65c67d6573Sopenharmony_cifn disallow_non_utf8() {
66c67d6573Sopenharmony_ci    assert!(regex::Regex::new(r"(?-u)\xFF").is_err());
67c67d6573Sopenharmony_ci    assert!(regex::Regex::new(r"(?-u).").is_err());
68c67d6573Sopenharmony_ci    assert!(regex::Regex::new(r"(?-u)[\xFF]").is_err());
69c67d6573Sopenharmony_ci    assert!(regex::Regex::new(r"(?-u)☃").is_err());
70c67d6573Sopenharmony_ci}
71c67d6573Sopenharmony_ci
72c67d6573Sopenharmony_ci#[test]
73c67d6573Sopenharmony_cifn disallow_octal() {
74c67d6573Sopenharmony_ci    assert!(regex::Regex::new(r"\0").is_err());
75c67d6573Sopenharmony_ci}
76c67d6573Sopenharmony_ci
77c67d6573Sopenharmony_ci#[test]
78c67d6573Sopenharmony_cifn allow_octal() {
79c67d6573Sopenharmony_ci    assert!(regex::RegexBuilder::new(r"\0").octal(true).build().is_ok());
80c67d6573Sopenharmony_ci}
81c67d6573Sopenharmony_ci
82c67d6573Sopenharmony_ci#[test]
83c67d6573Sopenharmony_cifn oibits() {
84c67d6573Sopenharmony_ci    use regex::bytes;
85c67d6573Sopenharmony_ci    use regex::{Regex, RegexBuilder, RegexSet, RegexSetBuilder};
86c67d6573Sopenharmony_ci    use std::panic::{RefUnwindSafe, UnwindSafe};
87c67d6573Sopenharmony_ci
88c67d6573Sopenharmony_ci    fn assert_send<T: Send>() {}
89c67d6573Sopenharmony_ci    fn assert_sync<T: Sync>() {}
90c67d6573Sopenharmony_ci    fn assert_unwind_safe<T: UnwindSafe>() {}
91c67d6573Sopenharmony_ci    fn assert_ref_unwind_safe<T: RefUnwindSafe>() {}
92c67d6573Sopenharmony_ci
93c67d6573Sopenharmony_ci    assert_send::<Regex>();
94c67d6573Sopenharmony_ci    assert_sync::<Regex>();
95c67d6573Sopenharmony_ci    assert_unwind_safe::<Regex>();
96c67d6573Sopenharmony_ci    assert_ref_unwind_safe::<Regex>();
97c67d6573Sopenharmony_ci    assert_send::<RegexBuilder>();
98c67d6573Sopenharmony_ci    assert_sync::<RegexBuilder>();
99c67d6573Sopenharmony_ci    assert_unwind_safe::<RegexBuilder>();
100c67d6573Sopenharmony_ci    assert_ref_unwind_safe::<RegexBuilder>();
101c67d6573Sopenharmony_ci
102c67d6573Sopenharmony_ci    assert_send::<bytes::Regex>();
103c67d6573Sopenharmony_ci    assert_sync::<bytes::Regex>();
104c67d6573Sopenharmony_ci    assert_unwind_safe::<bytes::Regex>();
105c67d6573Sopenharmony_ci    assert_ref_unwind_safe::<bytes::Regex>();
106c67d6573Sopenharmony_ci    assert_send::<bytes::RegexBuilder>();
107c67d6573Sopenharmony_ci    assert_sync::<bytes::RegexBuilder>();
108c67d6573Sopenharmony_ci    assert_unwind_safe::<bytes::RegexBuilder>();
109c67d6573Sopenharmony_ci    assert_ref_unwind_safe::<bytes::RegexBuilder>();
110c67d6573Sopenharmony_ci
111c67d6573Sopenharmony_ci    assert_send::<RegexSet>();
112c67d6573Sopenharmony_ci    assert_sync::<RegexSet>();
113c67d6573Sopenharmony_ci    assert_unwind_safe::<RegexSet>();
114c67d6573Sopenharmony_ci    assert_ref_unwind_safe::<RegexSet>();
115c67d6573Sopenharmony_ci    assert_send::<RegexSetBuilder>();
116c67d6573Sopenharmony_ci    assert_sync::<RegexSetBuilder>();
117c67d6573Sopenharmony_ci    assert_unwind_safe::<RegexSetBuilder>();
118c67d6573Sopenharmony_ci    assert_ref_unwind_safe::<RegexSetBuilder>();
119c67d6573Sopenharmony_ci
120c67d6573Sopenharmony_ci    assert_send::<bytes::RegexSet>();
121c67d6573Sopenharmony_ci    assert_sync::<bytes::RegexSet>();
122c67d6573Sopenharmony_ci    assert_unwind_safe::<bytes::RegexSet>();
123c67d6573Sopenharmony_ci    assert_ref_unwind_safe::<bytes::RegexSet>();
124c67d6573Sopenharmony_ci    assert_send::<bytes::RegexSetBuilder>();
125c67d6573Sopenharmony_ci    assert_sync::<bytes::RegexSetBuilder>();
126c67d6573Sopenharmony_ci    assert_unwind_safe::<bytes::RegexSetBuilder>();
127c67d6573Sopenharmony_ci    assert_ref_unwind_safe::<bytes::RegexSetBuilder>();
128c67d6573Sopenharmony_ci}
129c67d6573Sopenharmony_ci
130c67d6573Sopenharmony_ci// See: https://github.com/rust-lang/regex/issues/568
131c67d6573Sopenharmony_ci#[test]
132c67d6573Sopenharmony_cifn oibits_regression() {
133c67d6573Sopenharmony_ci    use regex::Regex;
134c67d6573Sopenharmony_ci    use std::panic;
135c67d6573Sopenharmony_ci
136c67d6573Sopenharmony_ci    let _ = panic::catch_unwind(|| Regex::new("a").unwrap());
137c67d6573Sopenharmony_ci}
138c67d6573Sopenharmony_ci
139c67d6573Sopenharmony_ci// See: https://github.com/rust-lang/regex/issues/750
140c67d6573Sopenharmony_ci#[test]
141c67d6573Sopenharmony_ci#[cfg(target_pointer_width = "64")]
142c67d6573Sopenharmony_cifn regex_is_reasonably_small() {
143c67d6573Sopenharmony_ci    use std::mem::size_of;
144c67d6573Sopenharmony_ci
145c67d6573Sopenharmony_ci    use regex::bytes;
146c67d6573Sopenharmony_ci    use regex::{Regex, RegexSet};
147c67d6573Sopenharmony_ci
148c67d6573Sopenharmony_ci    assert_eq!(16, size_of::<Regex>());
149c67d6573Sopenharmony_ci    assert_eq!(16, size_of::<RegexSet>());
150c67d6573Sopenharmony_ci    assert_eq!(16, size_of::<bytes::Regex>());
151c67d6573Sopenharmony_ci    assert_eq!(16, size_of::<bytes::RegexSet>());
152c67d6573Sopenharmony_ci}
153c67d6573Sopenharmony_ci
154c67d6573Sopenharmony_ci// See: https://github.com/rust-lang/regex/security/advisories/GHSA-m5pq-gvj9-9vr8
155c67d6573Sopenharmony_ci// See: CVE-2022-24713
156c67d6573Sopenharmony_ci//
157c67d6573Sopenharmony_ci// We test that our regex compiler will correctly return a "too big" error when
158c67d6573Sopenharmony_ci// we try to use a very large repetition on an *empty* sub-expression.
159c67d6573Sopenharmony_ci//
160c67d6573Sopenharmony_ci// At the time this test was written, the regex compiler does not represent
161c67d6573Sopenharmony_ci// empty sub-expressions with any bytecode instructions. In effect, it's an
162c67d6573Sopenharmony_ci// "optimization" to leave them out, since they would otherwise correspond
163c67d6573Sopenharmony_ci// to an unconditional JUMP in the regex bytecode (i.e., an unconditional
164c67d6573Sopenharmony_ci// epsilon transition in the NFA graph). Therefore, an empty sub-expression
165c67d6573Sopenharmony_ci// represents an interesting case for the compiler's size limits. Since it
166c67d6573Sopenharmony_ci// doesn't actually contribute any additional memory to the compiled regex
167c67d6573Sopenharmony_ci// instructions, the size limit machinery never detects it. Instead, it just
168c67d6573Sopenharmony_ci// dumbly tries to compile the empty sub-expression N times, where N is the
169c67d6573Sopenharmony_ci// repetition size.
170c67d6573Sopenharmony_ci//
171c67d6573Sopenharmony_ci// When N is very large, this will cause the compiler to essentially spin and
172c67d6573Sopenharmony_ci// do nothing for a decently large amount of time. It causes the regex to take
173c67d6573Sopenharmony_ci// quite a bit of time to compile, despite the concrete syntax of the regex
174c67d6573Sopenharmony_ci// being quite small.
175c67d6573Sopenharmony_ci//
176c67d6573Sopenharmony_ci// The degree to which this is actually a problem is somewhat of a judgment
177c67d6573Sopenharmony_ci// call. Some regexes simply take a long time to compile. But in general, you
178c67d6573Sopenharmony_ci// should be able to reasonably control this by setting lower or higher size
179c67d6573Sopenharmony_ci// limits on the compiled object size. But this mitigation doesn't work at all
180c67d6573Sopenharmony_ci// for this case.
181c67d6573Sopenharmony_ci//
182c67d6573Sopenharmony_ci// This particular test is somewhat narrow. It merely checks that regex
183c67d6573Sopenharmony_ci// compilation will, at some point, return a "too big" error. Before the
184c67d6573Sopenharmony_ci// fix landed, this test would eventually fail because the regex would be
185c67d6573Sopenharmony_ci// successfully compiled (after enough time elapsed). So while this test
186c67d6573Sopenharmony_ci// doesn't check that we exit in a reasonable amount of time, it does at least
187c67d6573Sopenharmony_ci// check that we are properly returning an error at some point.
188c67d6573Sopenharmony_ci#[test]
189c67d6573Sopenharmony_cifn big_empty_regex_fails() {
190c67d6573Sopenharmony_ci    use regex::Regex;
191c67d6573Sopenharmony_ci
192c67d6573Sopenharmony_ci    let result = Regex::new("(?:){4294967295}");
193c67d6573Sopenharmony_ci    assert!(result.is_err());
194c67d6573Sopenharmony_ci}
195c67d6573Sopenharmony_ci
196c67d6573Sopenharmony_ci// Below is a "billion laughs" variant of the previous test case.
197c67d6573Sopenharmony_ci#[test]
198c67d6573Sopenharmony_cifn big_empty_reps_chain_regex_fails() {
199c67d6573Sopenharmony_ci    use regex::Regex;
200c67d6573Sopenharmony_ci
201c67d6573Sopenharmony_ci    let result = Regex::new("(?:){64}{64}{64}{64}{64}{64}");
202c67d6573Sopenharmony_ci    assert!(result.is_err());
203c67d6573Sopenharmony_ci}
204c67d6573Sopenharmony_ci
205c67d6573Sopenharmony_ci// Below is another situation where a zero-length sub-expression can be
206c67d6573Sopenharmony_ci// introduced.
207c67d6573Sopenharmony_ci#[test]
208c67d6573Sopenharmony_cifn big_zero_reps_regex_fails() {
209c67d6573Sopenharmony_ci    use regex::Regex;
210c67d6573Sopenharmony_ci
211c67d6573Sopenharmony_ci    let result = Regex::new(r"x{0}{4294967295}");
212c67d6573Sopenharmony_ci    assert!(result.is_err());
213c67d6573Sopenharmony_ci}
214c67d6573Sopenharmony_ci
215c67d6573Sopenharmony_ci// Testing another case for completeness.
216c67d6573Sopenharmony_ci#[test]
217c67d6573Sopenharmony_cifn empty_alt_regex_fails() {
218c67d6573Sopenharmony_ci    use regex::Regex;
219c67d6573Sopenharmony_ci
220c67d6573Sopenharmony_ci    let result = Regex::new(r"(?:|){4294967295}");
221c67d6573Sopenharmony_ci    assert!(result.is_err());
222c67d6573Sopenharmony_ci}
223