1c67d6573Sopenharmony_ci#![cfg_attr(feature = "pattern", feature(pattern))] 2c67d6573Sopenharmony_ci 3c67d6573Sopenharmony_ciuse regex; 4c67d6573Sopenharmony_ci 5c67d6573Sopenharmony_ci// Due to macro scoping rules, this definition only applies for the modules 6c67d6573Sopenharmony_ci// defined below. Effectively, it allows us to use the same tests for both 7c67d6573Sopenharmony_ci// native and dynamic regexes. 8c67d6573Sopenharmony_ci// 9c67d6573Sopenharmony_ci// This is also used to test the various matching engines. This one exercises 10c67d6573Sopenharmony_ci// the normal code path which automatically chooses the engine based on the 11c67d6573Sopenharmony_ci// regex and the input. Other dynamic tests explicitly set the engine to use. 12c67d6573Sopenharmony_cimacro_rules! regex_new { 13c67d6573Sopenharmony_ci ($re:expr) => {{ 14c67d6573Sopenharmony_ci use regex::Regex; 15c67d6573Sopenharmony_ci Regex::new($re) 16c67d6573Sopenharmony_ci }}; 17c67d6573Sopenharmony_ci} 18c67d6573Sopenharmony_ci 19c67d6573Sopenharmony_cimacro_rules! regex { 20c67d6573Sopenharmony_ci ($re:expr) => { 21c67d6573Sopenharmony_ci regex_new!($re).unwrap() 22c67d6573Sopenharmony_ci }; 23c67d6573Sopenharmony_ci} 24c67d6573Sopenharmony_ci 25c67d6573Sopenharmony_cimacro_rules! regex_set_new { 26c67d6573Sopenharmony_ci ($re:expr) => {{ 27c67d6573Sopenharmony_ci use regex::RegexSet; 28c67d6573Sopenharmony_ci RegexSet::new($re) 29c67d6573Sopenharmony_ci }}; 30c67d6573Sopenharmony_ci} 31c67d6573Sopenharmony_ci 32c67d6573Sopenharmony_cimacro_rules! regex_set { 33c67d6573Sopenharmony_ci ($res:expr) => { 34c67d6573Sopenharmony_ci regex_set_new!($res).unwrap() 35c67d6573Sopenharmony_ci }; 36c67d6573Sopenharmony_ci} 37c67d6573Sopenharmony_ci 38c67d6573Sopenharmony_ci// Must come before other module definitions. 39c67d6573Sopenharmony_ciinclude!("macros_str.rs"); 40c67d6573Sopenharmony_ciinclude!("macros.rs"); 41c67d6573Sopenharmony_ci 42c67d6573Sopenharmony_cimod api; 43c67d6573Sopenharmony_cimod api_str; 44c67d6573Sopenharmony_cimod crazy; 45c67d6573Sopenharmony_cimod flags; 46c67d6573Sopenharmony_cimod fowler; 47c67d6573Sopenharmony_cimod misc; 48c67d6573Sopenharmony_cimod multiline; 49c67d6573Sopenharmony_cimod noparse; 50c67d6573Sopenharmony_cimod regression; 51c67d6573Sopenharmony_cimod regression_fuzz; 52c67d6573Sopenharmony_cimod replace; 53c67d6573Sopenharmony_cimod searcher; 54c67d6573Sopenharmony_cimod set; 55c67d6573Sopenharmony_cimod shortest_match; 56c67d6573Sopenharmony_cimod suffix_reverse; 57c67d6573Sopenharmony_ci#[cfg(feature = "unicode")] 58c67d6573Sopenharmony_cimod unicode; 59c67d6573Sopenharmony_ci#[cfg(feature = "unicode-perl")] 60c67d6573Sopenharmony_cimod word_boundary; 61c67d6573Sopenharmony_ci#[cfg(feature = "unicode-perl")] 62c67d6573Sopenharmony_cimod word_boundary_unicode; 63c67d6573Sopenharmony_ci 64c67d6573Sopenharmony_ci#[test] 65c67d6573Sopenharmony_cifn disallow_non_utf8() { 66c67d6573Sopenharmony_ci assert!(regex::Regex::new(r"(?-u)\xFF").is_err()); 67c67d6573Sopenharmony_ci assert!(regex::Regex::new(r"(?-u).").is_err()); 68c67d6573Sopenharmony_ci assert!(regex::Regex::new(r"(?-u)[\xFF]").is_err()); 69c67d6573Sopenharmony_ci assert!(regex::Regex::new(r"(?-u)☃").is_err()); 70c67d6573Sopenharmony_ci} 71c67d6573Sopenharmony_ci 72c67d6573Sopenharmony_ci#[test] 73c67d6573Sopenharmony_cifn disallow_octal() { 74c67d6573Sopenharmony_ci assert!(regex::Regex::new(r"\0").is_err()); 75c67d6573Sopenharmony_ci} 76c67d6573Sopenharmony_ci 77c67d6573Sopenharmony_ci#[test] 78c67d6573Sopenharmony_cifn allow_octal() { 79c67d6573Sopenharmony_ci assert!(regex::RegexBuilder::new(r"\0").octal(true).build().is_ok()); 80c67d6573Sopenharmony_ci} 81c67d6573Sopenharmony_ci 82c67d6573Sopenharmony_ci#[test] 83c67d6573Sopenharmony_cifn oibits() { 84c67d6573Sopenharmony_ci use regex::bytes; 85c67d6573Sopenharmony_ci use regex::{Regex, RegexBuilder, RegexSet, RegexSetBuilder}; 86c67d6573Sopenharmony_ci use std::panic::{RefUnwindSafe, UnwindSafe}; 87c67d6573Sopenharmony_ci 88c67d6573Sopenharmony_ci fn assert_send<T: Send>() {} 89c67d6573Sopenharmony_ci fn assert_sync<T: Sync>() {} 90c67d6573Sopenharmony_ci fn assert_unwind_safe<T: UnwindSafe>() {} 91c67d6573Sopenharmony_ci fn assert_ref_unwind_safe<T: RefUnwindSafe>() {} 92c67d6573Sopenharmony_ci 93c67d6573Sopenharmony_ci assert_send::<Regex>(); 94c67d6573Sopenharmony_ci assert_sync::<Regex>(); 95c67d6573Sopenharmony_ci assert_unwind_safe::<Regex>(); 96c67d6573Sopenharmony_ci assert_ref_unwind_safe::<Regex>(); 97c67d6573Sopenharmony_ci assert_send::<RegexBuilder>(); 98c67d6573Sopenharmony_ci assert_sync::<RegexBuilder>(); 99c67d6573Sopenharmony_ci assert_unwind_safe::<RegexBuilder>(); 100c67d6573Sopenharmony_ci assert_ref_unwind_safe::<RegexBuilder>(); 101c67d6573Sopenharmony_ci 102c67d6573Sopenharmony_ci assert_send::<bytes::Regex>(); 103c67d6573Sopenharmony_ci assert_sync::<bytes::Regex>(); 104c67d6573Sopenharmony_ci assert_unwind_safe::<bytes::Regex>(); 105c67d6573Sopenharmony_ci assert_ref_unwind_safe::<bytes::Regex>(); 106c67d6573Sopenharmony_ci assert_send::<bytes::RegexBuilder>(); 107c67d6573Sopenharmony_ci assert_sync::<bytes::RegexBuilder>(); 108c67d6573Sopenharmony_ci assert_unwind_safe::<bytes::RegexBuilder>(); 109c67d6573Sopenharmony_ci assert_ref_unwind_safe::<bytes::RegexBuilder>(); 110c67d6573Sopenharmony_ci 111c67d6573Sopenharmony_ci assert_send::<RegexSet>(); 112c67d6573Sopenharmony_ci assert_sync::<RegexSet>(); 113c67d6573Sopenharmony_ci assert_unwind_safe::<RegexSet>(); 114c67d6573Sopenharmony_ci assert_ref_unwind_safe::<RegexSet>(); 115c67d6573Sopenharmony_ci assert_send::<RegexSetBuilder>(); 116c67d6573Sopenharmony_ci assert_sync::<RegexSetBuilder>(); 117c67d6573Sopenharmony_ci assert_unwind_safe::<RegexSetBuilder>(); 118c67d6573Sopenharmony_ci assert_ref_unwind_safe::<RegexSetBuilder>(); 119c67d6573Sopenharmony_ci 120c67d6573Sopenharmony_ci assert_send::<bytes::RegexSet>(); 121c67d6573Sopenharmony_ci assert_sync::<bytes::RegexSet>(); 122c67d6573Sopenharmony_ci assert_unwind_safe::<bytes::RegexSet>(); 123c67d6573Sopenharmony_ci assert_ref_unwind_safe::<bytes::RegexSet>(); 124c67d6573Sopenharmony_ci assert_send::<bytes::RegexSetBuilder>(); 125c67d6573Sopenharmony_ci assert_sync::<bytes::RegexSetBuilder>(); 126c67d6573Sopenharmony_ci assert_unwind_safe::<bytes::RegexSetBuilder>(); 127c67d6573Sopenharmony_ci assert_ref_unwind_safe::<bytes::RegexSetBuilder>(); 128c67d6573Sopenharmony_ci} 129c67d6573Sopenharmony_ci 130c67d6573Sopenharmony_ci// See: https://github.com/rust-lang/regex/issues/568 131c67d6573Sopenharmony_ci#[test] 132c67d6573Sopenharmony_cifn oibits_regression() { 133c67d6573Sopenharmony_ci use regex::Regex; 134c67d6573Sopenharmony_ci use std::panic; 135c67d6573Sopenharmony_ci 136c67d6573Sopenharmony_ci let _ = panic::catch_unwind(|| Regex::new("a").unwrap()); 137c67d6573Sopenharmony_ci} 138c67d6573Sopenharmony_ci 139c67d6573Sopenharmony_ci// See: https://github.com/rust-lang/regex/issues/750 140c67d6573Sopenharmony_ci#[test] 141c67d6573Sopenharmony_ci#[cfg(target_pointer_width = "64")] 142c67d6573Sopenharmony_cifn regex_is_reasonably_small() { 143c67d6573Sopenharmony_ci use std::mem::size_of; 144c67d6573Sopenharmony_ci 145c67d6573Sopenharmony_ci use regex::bytes; 146c67d6573Sopenharmony_ci use regex::{Regex, RegexSet}; 147c67d6573Sopenharmony_ci 148c67d6573Sopenharmony_ci assert_eq!(16, size_of::<Regex>()); 149c67d6573Sopenharmony_ci assert_eq!(16, size_of::<RegexSet>()); 150c67d6573Sopenharmony_ci assert_eq!(16, size_of::<bytes::Regex>()); 151c67d6573Sopenharmony_ci assert_eq!(16, size_of::<bytes::RegexSet>()); 152c67d6573Sopenharmony_ci} 153c67d6573Sopenharmony_ci 154c67d6573Sopenharmony_ci// See: https://github.com/rust-lang/regex/security/advisories/GHSA-m5pq-gvj9-9vr8 155c67d6573Sopenharmony_ci// See: CVE-2022-24713 156c67d6573Sopenharmony_ci// 157c67d6573Sopenharmony_ci// We test that our regex compiler will correctly return a "too big" error when 158c67d6573Sopenharmony_ci// we try to use a very large repetition on an *empty* sub-expression. 159c67d6573Sopenharmony_ci// 160c67d6573Sopenharmony_ci// At the time this test was written, the regex compiler does not represent 161c67d6573Sopenharmony_ci// empty sub-expressions with any bytecode instructions. In effect, it's an 162c67d6573Sopenharmony_ci// "optimization" to leave them out, since they would otherwise correspond 163c67d6573Sopenharmony_ci// to an unconditional JUMP in the regex bytecode (i.e., an unconditional 164c67d6573Sopenharmony_ci// epsilon transition in the NFA graph). Therefore, an empty sub-expression 165c67d6573Sopenharmony_ci// represents an interesting case for the compiler's size limits. Since it 166c67d6573Sopenharmony_ci// doesn't actually contribute any additional memory to the compiled regex 167c67d6573Sopenharmony_ci// instructions, the size limit machinery never detects it. Instead, it just 168c67d6573Sopenharmony_ci// dumbly tries to compile the empty sub-expression N times, where N is the 169c67d6573Sopenharmony_ci// repetition size. 170c67d6573Sopenharmony_ci// 171c67d6573Sopenharmony_ci// When N is very large, this will cause the compiler to essentially spin and 172c67d6573Sopenharmony_ci// do nothing for a decently large amount of time. It causes the regex to take 173c67d6573Sopenharmony_ci// quite a bit of time to compile, despite the concrete syntax of the regex 174c67d6573Sopenharmony_ci// being quite small. 175c67d6573Sopenharmony_ci// 176c67d6573Sopenharmony_ci// The degree to which this is actually a problem is somewhat of a judgment 177c67d6573Sopenharmony_ci// call. Some regexes simply take a long time to compile. But in general, you 178c67d6573Sopenharmony_ci// should be able to reasonably control this by setting lower or higher size 179c67d6573Sopenharmony_ci// limits on the compiled object size. But this mitigation doesn't work at all 180c67d6573Sopenharmony_ci// for this case. 181c67d6573Sopenharmony_ci// 182c67d6573Sopenharmony_ci// This particular test is somewhat narrow. It merely checks that regex 183c67d6573Sopenharmony_ci// compilation will, at some point, return a "too big" error. Before the 184c67d6573Sopenharmony_ci// fix landed, this test would eventually fail because the regex would be 185c67d6573Sopenharmony_ci// successfully compiled (after enough time elapsed). So while this test 186c67d6573Sopenharmony_ci// doesn't check that we exit in a reasonable amount of time, it does at least 187c67d6573Sopenharmony_ci// check that we are properly returning an error at some point. 188c67d6573Sopenharmony_ci#[test] 189c67d6573Sopenharmony_cifn big_empty_regex_fails() { 190c67d6573Sopenharmony_ci use regex::Regex; 191c67d6573Sopenharmony_ci 192c67d6573Sopenharmony_ci let result = Regex::new("(?:){4294967295}"); 193c67d6573Sopenharmony_ci assert!(result.is_err()); 194c67d6573Sopenharmony_ci} 195c67d6573Sopenharmony_ci 196c67d6573Sopenharmony_ci// Below is a "billion laughs" variant of the previous test case. 197c67d6573Sopenharmony_ci#[test] 198c67d6573Sopenharmony_cifn big_empty_reps_chain_regex_fails() { 199c67d6573Sopenharmony_ci use regex::Regex; 200c67d6573Sopenharmony_ci 201c67d6573Sopenharmony_ci let result = Regex::new("(?:){64}{64}{64}{64}{64}{64}"); 202c67d6573Sopenharmony_ci assert!(result.is_err()); 203c67d6573Sopenharmony_ci} 204c67d6573Sopenharmony_ci 205c67d6573Sopenharmony_ci// Below is another situation where a zero-length sub-expression can be 206c67d6573Sopenharmony_ci// introduced. 207c67d6573Sopenharmony_ci#[test] 208c67d6573Sopenharmony_cifn big_zero_reps_regex_fails() { 209c67d6573Sopenharmony_ci use regex::Regex; 210c67d6573Sopenharmony_ci 211c67d6573Sopenharmony_ci let result = Regex::new(r"x{0}{4294967295}"); 212c67d6573Sopenharmony_ci assert!(result.is_err()); 213c67d6573Sopenharmony_ci} 214c67d6573Sopenharmony_ci 215c67d6573Sopenharmony_ci// Testing another case for completeness. 216c67d6573Sopenharmony_ci#[test] 217c67d6573Sopenharmony_cifn empty_alt_regex_fails() { 218c67d6573Sopenharmony_ci use regex::Regex; 219c67d6573Sopenharmony_ci 220c67d6573Sopenharmony_ci let result = Regex::new(r"(?:|){4294967295}"); 221c67d6573Sopenharmony_ci assert!(result.is_err()); 222c67d6573Sopenharmony_ci} 223