1#![cfg_attr(feature = "pattern", feature(pattern))]
2
3use regex;
4
5// Due to macro scoping rules, this definition only applies for the modules
6// defined below. Effectively, it allows us to use the same tests for both
7// native and dynamic regexes.
8//
9// This is also used to test the various matching engines. This one exercises
10// the normal code path which automatically chooses the engine based on the
11// regex and the input. Other dynamic tests explicitly set the engine to use.
12macro_rules! regex_new {
13    ($re:expr) => {{
14        use regex::Regex;
15        Regex::new($re)
16    }};
17}
18
19macro_rules! regex {
20    ($re:expr) => {
21        regex_new!($re).unwrap()
22    };
23}
24
25macro_rules! regex_set_new {
26    ($re:expr) => {{
27        use regex::RegexSet;
28        RegexSet::new($re)
29    }};
30}
31
32macro_rules! regex_set {
33    ($res:expr) => {
34        regex_set_new!($res).unwrap()
35    };
36}
37
38// Must come before other module definitions.
39include!("macros_str.rs");
40include!("macros.rs");
41
42mod api;
43mod api_str;
44mod crazy;
45mod flags;
46mod fowler;
47mod misc;
48mod multiline;
49mod noparse;
50mod regression;
51mod regression_fuzz;
52mod replace;
53mod searcher;
54mod set;
55mod shortest_match;
56mod suffix_reverse;
57#[cfg(feature = "unicode")]
58mod unicode;
59#[cfg(feature = "unicode-perl")]
60mod word_boundary;
61#[cfg(feature = "unicode-perl")]
62mod word_boundary_unicode;
63
64#[test]
65fn disallow_non_utf8() {
66    assert!(regex::Regex::new(r"(?-u)\xFF").is_err());
67    assert!(regex::Regex::new(r"(?-u).").is_err());
68    assert!(regex::Regex::new(r"(?-u)[\xFF]").is_err());
69    assert!(regex::Regex::new(r"(?-u)☃").is_err());
70}
71
72#[test]
73fn disallow_octal() {
74    assert!(regex::Regex::new(r"\0").is_err());
75}
76
77#[test]
78fn allow_octal() {
79    assert!(regex::RegexBuilder::new(r"\0").octal(true).build().is_ok());
80}
81
82#[test]
83fn oibits() {
84    use regex::bytes;
85    use regex::{Regex, RegexBuilder, RegexSet, RegexSetBuilder};
86    use std::panic::{RefUnwindSafe, UnwindSafe};
87
88    fn assert_send<T: Send>() {}
89    fn assert_sync<T: Sync>() {}
90    fn assert_unwind_safe<T: UnwindSafe>() {}
91    fn assert_ref_unwind_safe<T: RefUnwindSafe>() {}
92
93    assert_send::<Regex>();
94    assert_sync::<Regex>();
95    assert_unwind_safe::<Regex>();
96    assert_ref_unwind_safe::<Regex>();
97    assert_send::<RegexBuilder>();
98    assert_sync::<RegexBuilder>();
99    assert_unwind_safe::<RegexBuilder>();
100    assert_ref_unwind_safe::<RegexBuilder>();
101
102    assert_send::<bytes::Regex>();
103    assert_sync::<bytes::Regex>();
104    assert_unwind_safe::<bytes::Regex>();
105    assert_ref_unwind_safe::<bytes::Regex>();
106    assert_send::<bytes::RegexBuilder>();
107    assert_sync::<bytes::RegexBuilder>();
108    assert_unwind_safe::<bytes::RegexBuilder>();
109    assert_ref_unwind_safe::<bytes::RegexBuilder>();
110
111    assert_send::<RegexSet>();
112    assert_sync::<RegexSet>();
113    assert_unwind_safe::<RegexSet>();
114    assert_ref_unwind_safe::<RegexSet>();
115    assert_send::<RegexSetBuilder>();
116    assert_sync::<RegexSetBuilder>();
117    assert_unwind_safe::<RegexSetBuilder>();
118    assert_ref_unwind_safe::<RegexSetBuilder>();
119
120    assert_send::<bytes::RegexSet>();
121    assert_sync::<bytes::RegexSet>();
122    assert_unwind_safe::<bytes::RegexSet>();
123    assert_ref_unwind_safe::<bytes::RegexSet>();
124    assert_send::<bytes::RegexSetBuilder>();
125    assert_sync::<bytes::RegexSetBuilder>();
126    assert_unwind_safe::<bytes::RegexSetBuilder>();
127    assert_ref_unwind_safe::<bytes::RegexSetBuilder>();
128}
129
130// See: https://github.com/rust-lang/regex/issues/568
131#[test]
132fn oibits_regression() {
133    use regex::Regex;
134    use std::panic;
135
136    let _ = panic::catch_unwind(|| Regex::new("a").unwrap());
137}
138
139// See: https://github.com/rust-lang/regex/issues/750
140#[test]
141#[cfg(target_pointer_width = "64")]
142fn regex_is_reasonably_small() {
143    use std::mem::size_of;
144
145    use regex::bytes;
146    use regex::{Regex, RegexSet};
147
148    assert_eq!(16, size_of::<Regex>());
149    assert_eq!(16, size_of::<RegexSet>());
150    assert_eq!(16, size_of::<bytes::Regex>());
151    assert_eq!(16, size_of::<bytes::RegexSet>());
152}
153
154// See: https://github.com/rust-lang/regex/security/advisories/GHSA-m5pq-gvj9-9vr8
155// See: CVE-2022-24713
156//
157// We test that our regex compiler will correctly return a "too big" error when
158// we try to use a very large repetition on an *empty* sub-expression.
159//
160// At the time this test was written, the regex compiler does not represent
161// empty sub-expressions with any bytecode instructions. In effect, it's an
162// "optimization" to leave them out, since they would otherwise correspond
163// to an unconditional JUMP in the regex bytecode (i.e., an unconditional
164// epsilon transition in the NFA graph). Therefore, an empty sub-expression
165// represents an interesting case for the compiler's size limits. Since it
166// doesn't actually contribute any additional memory to the compiled regex
167// instructions, the size limit machinery never detects it. Instead, it just
168// dumbly tries to compile the empty sub-expression N times, where N is the
169// repetition size.
170//
171// When N is very large, this will cause the compiler to essentially spin and
172// do nothing for a decently large amount of time. It causes the regex to take
173// quite a bit of time to compile, despite the concrete syntax of the regex
174// being quite small.
175//
176// The degree to which this is actually a problem is somewhat of a judgment
177// call. Some regexes simply take a long time to compile. But in general, you
178// should be able to reasonably control this by setting lower or higher size
179// limits on the compiled object size. But this mitigation doesn't work at all
180// for this case.
181//
182// This particular test is somewhat narrow. It merely checks that regex
183// compilation will, at some point, return a "too big" error. Before the
184// fix landed, this test would eventually fail because the regex would be
185// successfully compiled (after enough time elapsed). So while this test
186// doesn't check that we exit in a reasonable amount of time, it does at least
187// check that we are properly returning an error at some point.
188#[test]
189fn big_empty_regex_fails() {
190    use regex::Regex;
191
192    let result = Regex::new("(?:){4294967295}");
193    assert!(result.is_err());
194}
195
196// Below is a "billion laughs" variant of the previous test case.
197#[test]
198fn big_empty_reps_chain_regex_fails() {
199    use regex::Regex;
200
201    let result = Regex::new("(?:){64}{64}{64}{64}{64}{64}");
202    assert!(result.is_err());
203}
204
205// Below is another situation where a zero-length sub-expression can be
206// introduced.
207#[test]
208fn big_zero_reps_regex_fails() {
209    use regex::Regex;
210
211    let result = Regex::new(r"x{0}{4294967295}");
212    assert!(result.is_err());
213}
214
215// Testing another case for completeness.
216#[test]
217fn empty_alt_regex_fails() {
218    use regex::Regex;
219
220    let result = Regex::new(r"(?:|){4294967295}");
221    assert!(result.is_err());
222}
223