1#![cfg_attr(feature = "pattern", feature(pattern))] 2 3use regex; 4 5// Due to macro scoping rules, this definition only applies for the modules 6// defined below. Effectively, it allows us to use the same tests for both 7// native and dynamic regexes. 8// 9// This is also used to test the various matching engines. This one exercises 10// the normal code path which automatically chooses the engine based on the 11// regex and the input. Other dynamic tests explicitly set the engine to use. 12macro_rules! regex_new { 13 ($re:expr) => {{ 14 use regex::Regex; 15 Regex::new($re) 16 }}; 17} 18 19macro_rules! regex { 20 ($re:expr) => { 21 regex_new!($re).unwrap() 22 }; 23} 24 25macro_rules! regex_set_new { 26 ($re:expr) => {{ 27 use regex::RegexSet; 28 RegexSet::new($re) 29 }}; 30} 31 32macro_rules! regex_set { 33 ($res:expr) => { 34 regex_set_new!($res).unwrap() 35 }; 36} 37 38// Must come before other module definitions. 39include!("macros_str.rs"); 40include!("macros.rs"); 41 42mod api; 43mod api_str; 44mod crazy; 45mod flags; 46mod fowler; 47mod misc; 48mod multiline; 49mod noparse; 50mod regression; 51mod regression_fuzz; 52mod replace; 53mod searcher; 54mod set; 55mod shortest_match; 56mod suffix_reverse; 57#[cfg(feature = "unicode")] 58mod unicode; 59#[cfg(feature = "unicode-perl")] 60mod word_boundary; 61#[cfg(feature = "unicode-perl")] 62mod word_boundary_unicode; 63 64#[test] 65fn disallow_non_utf8() { 66 assert!(regex::Regex::new(r"(?-u)\xFF").is_err()); 67 assert!(regex::Regex::new(r"(?-u).").is_err()); 68 assert!(regex::Regex::new(r"(?-u)[\xFF]").is_err()); 69 assert!(regex::Regex::new(r"(?-u)☃").is_err()); 70} 71 72#[test] 73fn disallow_octal() { 74 assert!(regex::Regex::new(r"\0").is_err()); 75} 76 77#[test] 78fn allow_octal() { 79 assert!(regex::RegexBuilder::new(r"\0").octal(true).build().is_ok()); 80} 81 82#[test] 83fn oibits() { 84 use regex::bytes; 85 use regex::{Regex, RegexBuilder, RegexSet, RegexSetBuilder}; 86 use std::panic::{RefUnwindSafe, UnwindSafe}; 87 88 fn assert_send<T: Send>() {} 89 fn assert_sync<T: Sync>() {} 90 fn assert_unwind_safe<T: UnwindSafe>() {} 91 fn assert_ref_unwind_safe<T: RefUnwindSafe>() {} 92 93 assert_send::<Regex>(); 94 assert_sync::<Regex>(); 95 assert_unwind_safe::<Regex>(); 96 assert_ref_unwind_safe::<Regex>(); 97 assert_send::<RegexBuilder>(); 98 assert_sync::<RegexBuilder>(); 99 assert_unwind_safe::<RegexBuilder>(); 100 assert_ref_unwind_safe::<RegexBuilder>(); 101 102 assert_send::<bytes::Regex>(); 103 assert_sync::<bytes::Regex>(); 104 assert_unwind_safe::<bytes::Regex>(); 105 assert_ref_unwind_safe::<bytes::Regex>(); 106 assert_send::<bytes::RegexBuilder>(); 107 assert_sync::<bytes::RegexBuilder>(); 108 assert_unwind_safe::<bytes::RegexBuilder>(); 109 assert_ref_unwind_safe::<bytes::RegexBuilder>(); 110 111 assert_send::<RegexSet>(); 112 assert_sync::<RegexSet>(); 113 assert_unwind_safe::<RegexSet>(); 114 assert_ref_unwind_safe::<RegexSet>(); 115 assert_send::<RegexSetBuilder>(); 116 assert_sync::<RegexSetBuilder>(); 117 assert_unwind_safe::<RegexSetBuilder>(); 118 assert_ref_unwind_safe::<RegexSetBuilder>(); 119 120 assert_send::<bytes::RegexSet>(); 121 assert_sync::<bytes::RegexSet>(); 122 assert_unwind_safe::<bytes::RegexSet>(); 123 assert_ref_unwind_safe::<bytes::RegexSet>(); 124 assert_send::<bytes::RegexSetBuilder>(); 125 assert_sync::<bytes::RegexSetBuilder>(); 126 assert_unwind_safe::<bytes::RegexSetBuilder>(); 127 assert_ref_unwind_safe::<bytes::RegexSetBuilder>(); 128} 129 130// See: https://github.com/rust-lang/regex/issues/568 131#[test] 132fn oibits_regression() { 133 use regex::Regex; 134 use std::panic; 135 136 let _ = panic::catch_unwind(|| Regex::new("a").unwrap()); 137} 138 139// See: https://github.com/rust-lang/regex/issues/750 140#[test] 141#[cfg(target_pointer_width = "64")] 142fn regex_is_reasonably_small() { 143 use std::mem::size_of; 144 145 use regex::bytes; 146 use regex::{Regex, RegexSet}; 147 148 assert_eq!(16, size_of::<Regex>()); 149 assert_eq!(16, size_of::<RegexSet>()); 150 assert_eq!(16, size_of::<bytes::Regex>()); 151 assert_eq!(16, size_of::<bytes::RegexSet>()); 152} 153 154// See: https://github.com/rust-lang/regex/security/advisories/GHSA-m5pq-gvj9-9vr8 155// See: CVE-2022-24713 156// 157// We test that our regex compiler will correctly return a "too big" error when 158// we try to use a very large repetition on an *empty* sub-expression. 159// 160// At the time this test was written, the regex compiler does not represent 161// empty sub-expressions with any bytecode instructions. In effect, it's an 162// "optimization" to leave them out, since they would otherwise correspond 163// to an unconditional JUMP in the regex bytecode (i.e., an unconditional 164// epsilon transition in the NFA graph). Therefore, an empty sub-expression 165// represents an interesting case for the compiler's size limits. Since it 166// doesn't actually contribute any additional memory to the compiled regex 167// instructions, the size limit machinery never detects it. Instead, it just 168// dumbly tries to compile the empty sub-expression N times, where N is the 169// repetition size. 170// 171// When N is very large, this will cause the compiler to essentially spin and 172// do nothing for a decently large amount of time. It causes the regex to take 173// quite a bit of time to compile, despite the concrete syntax of the regex 174// being quite small. 175// 176// The degree to which this is actually a problem is somewhat of a judgment 177// call. Some regexes simply take a long time to compile. But in general, you 178// should be able to reasonably control this by setting lower or higher size 179// limits on the compiled object size. But this mitigation doesn't work at all 180// for this case. 181// 182// This particular test is somewhat narrow. It merely checks that regex 183// compilation will, at some point, return a "too big" error. Before the 184// fix landed, this test would eventually fail because the regex would be 185// successfully compiled (after enough time elapsed). So while this test 186// doesn't check that we exit in a reasonable amount of time, it does at least 187// check that we are properly returning an error at some point. 188#[test] 189fn big_empty_regex_fails() { 190 use regex::Regex; 191 192 let result = Regex::new("(?:){4294967295}"); 193 assert!(result.is_err()); 194} 195 196// Below is a "billion laughs" variant of the previous test case. 197#[test] 198fn big_empty_reps_chain_regex_fails() { 199 use regex::Regex; 200 201 let result = Regex::new("(?:){64}{64}{64}{64}{64}{64}"); 202 assert!(result.is_err()); 203} 204 205// Below is another situation where a zero-length sub-expression can be 206// introduced. 207#[test] 208fn big_zero_reps_regex_fails() { 209 use regex::Regex; 210 211 let result = Regex::new(r"x{0}{4294967295}"); 212 assert!(result.is_err()); 213} 214 215// Testing another case for completeness. 216#[test] 217fn empty_alt_regex_fails() { 218 use regex::Regex; 219 220 let result = Regex::new(r"(?:|){4294967295}"); 221 assert!(result.is_err()); 222} 223