1c67d6573Sopenharmony_ci// Enable the benchmarking harness. 2c67d6573Sopenharmony_ci#![feature(test)] 3c67d6573Sopenharmony_ci// It's too annoying to carefully define macros based on which regex engines 4c67d6573Sopenharmony_ci// have which benchmarks, so just ignore these warnings. 5c67d6573Sopenharmony_ci#![allow(unused_macros)] 6c67d6573Sopenharmony_ci 7c67d6573Sopenharmony_ciextern crate test; 8c67d6573Sopenharmony_ci 9c67d6573Sopenharmony_ciuse cfg_if::cfg_if; 10c67d6573Sopenharmony_ci 11c67d6573Sopenharmony_cicfg_if! { 12c67d6573Sopenharmony_ci if #[cfg(feature = "re-pcre1")] { 13c67d6573Sopenharmony_ci pub use ffi::pcre1::Regex; 14c67d6573Sopenharmony_ci } else if #[cfg(feature = "re-onig")] { 15c67d6573Sopenharmony_ci pub use ffi::onig::Regex; 16c67d6573Sopenharmony_ci } else if #[cfg(any(feature = "re-rust"))] { 17c67d6573Sopenharmony_ci pub use regex::{Regex, RegexSet}; 18c67d6573Sopenharmony_ci } else if #[cfg(feature = "re-rust-bytes")] { 19c67d6573Sopenharmony_ci pub use regex::bytes::{Regex, RegexSet}; 20c67d6573Sopenharmony_ci } else if #[cfg(feature = "re-re2")] { 21c67d6573Sopenharmony_ci pub use ffi::re2::Regex; 22c67d6573Sopenharmony_ci } else if #[cfg(feature = "re-pcre2")] { 23c67d6573Sopenharmony_ci pub use ffi::pcre2::Regex; 24c67d6573Sopenharmony_ci } else if #[cfg(feature = "re-tcl")] { 25c67d6573Sopenharmony_ci pub use ffi::tcl::Regex; 26c67d6573Sopenharmony_ci } else { 27c67d6573Sopenharmony_ci compile_error!( 28c67d6573Sopenharmony_ci "To run the benchmarks, see `./run -h` or the HACKING.md document" 29c67d6573Sopenharmony_ci ); 30c67d6573Sopenharmony_ci } 31c67d6573Sopenharmony_ci} 32c67d6573Sopenharmony_ci 33c67d6573Sopenharmony_ci// Usage: regex!(pattern) 34c67d6573Sopenharmony_ci// 35c67d6573Sopenharmony_ci// Builds a ::Regex from a borrowed string. 36c67d6573Sopenharmony_ci// 37c67d6573Sopenharmony_ci// Due to macro scoping rules, this definition only applies for the modules 38c67d6573Sopenharmony_ci// defined below. Effectively, it allows us to use the same tests for both 39c67d6573Sopenharmony_ci// native and dynamic regexes. 40c67d6573Sopenharmony_cimacro_rules! regex { 41c67d6573Sopenharmony_ci ($re:expr) => { 42c67d6573Sopenharmony_ci crate::Regex::new(&$re.to_owned()).unwrap() 43c67d6573Sopenharmony_ci }; 44c67d6573Sopenharmony_ci} 45c67d6573Sopenharmony_ci 46c67d6573Sopenharmony_cicfg_if! { 47c67d6573Sopenharmony_ci if #[cfg(feature = "re-tcl")] { 48c67d6573Sopenharmony_ci // Usage: text!(haystack) 49c67d6573Sopenharmony_ci // 50c67d6573Sopenharmony_ci // Builds a ::Text from an owned string. 51c67d6573Sopenharmony_ci // 52c67d6573Sopenharmony_ci // This macro is called on every input searched in every benchmark. It is 53c67d6573Sopenharmony_ci // called exactly once per benchmark and its time is not included in the 54c67d6573Sopenharmony_ci // benchmark timing. 55c67d6573Sopenharmony_ci // 56c67d6573Sopenharmony_ci // The text given to the macro is always a String, which is guaranteed to be 57c67d6573Sopenharmony_ci // valid UTF-8. 58c67d6573Sopenharmony_ci // 59c67d6573Sopenharmony_ci // The return type should be an owned value that can deref to whatever the 60c67d6573Sopenharmony_ci // regex accepts in its `is_match` and `find_iter` methods. 61c67d6573Sopenharmony_ci macro_rules! text { 62c67d6573Sopenharmony_ci ($text:expr) => {{ 63c67d6573Sopenharmony_ci use crate::ffi::tcl::Text; 64c67d6573Sopenharmony_ci Text::new($text) 65c67d6573Sopenharmony_ci }} 66c67d6573Sopenharmony_ci } 67c67d6573Sopenharmony_ci type Text = ffi::tcl::Text; 68c67d6573Sopenharmony_ci } else if #[cfg(feature = "re-rust-bytes")] { 69c67d6573Sopenharmony_ci macro_rules! text { 70c67d6573Sopenharmony_ci ($text:expr) => {{ 71c67d6573Sopenharmony_ci let text: String = $text; 72c67d6573Sopenharmony_ci text.into_bytes() 73c67d6573Sopenharmony_ci }} 74c67d6573Sopenharmony_ci } 75c67d6573Sopenharmony_ci type Text = Vec<u8>; 76c67d6573Sopenharmony_ci } else { 77c67d6573Sopenharmony_ci macro_rules! text { 78c67d6573Sopenharmony_ci ($text:expr) => { $text } 79c67d6573Sopenharmony_ci } 80c67d6573Sopenharmony_ci type Text = String; 81c67d6573Sopenharmony_ci } 82c67d6573Sopenharmony_ci} 83c67d6573Sopenharmony_ci 84c67d6573Sopenharmony_ci// Macros for writing benchmarks easily. We provide macros for benchmarking 85c67d6573Sopenharmony_ci// matches, non-matches and for finding all successive non-overlapping matches 86c67d6573Sopenharmony_ci// in a string (including a check that the count is correct). 87c67d6573Sopenharmony_ci 88c67d6573Sopenharmony_ci// USAGE: bench_match!(name, pattern, haystack) 89c67d6573Sopenharmony_ci// 90c67d6573Sopenharmony_ci// This benchmarks how fast a regular expression can report whether it matches 91c67d6573Sopenharmony_ci// a particular haystack. If the regex doesn't match, then the benchmark fails. 92c67d6573Sopenharmony_ci// Regexes are compiled exactly once. 93c67d6573Sopenharmony_ci// 94c67d6573Sopenharmony_ci// name is an identifier for the benchmark. 95c67d6573Sopenharmony_ci// 96c67d6573Sopenharmony_ci// pattern should be a &'static str representing the regular expression. 97c67d6573Sopenharmony_ci// 98c67d6573Sopenharmony_ci// haystack should be a String. 99c67d6573Sopenharmony_cimacro_rules! bench_match { 100c67d6573Sopenharmony_ci ($name:ident, $pattern:expr, $haystack:expr) => { 101c67d6573Sopenharmony_ci bench_is_match!($name, true, regex!($pattern), $haystack); 102c67d6573Sopenharmony_ci }; 103c67d6573Sopenharmony_ci} 104c67d6573Sopenharmony_ci 105c67d6573Sopenharmony_ci// USAGE: bench_not_match!(name, pattern, haystack) 106c67d6573Sopenharmony_ci// 107c67d6573Sopenharmony_ci// This benchmarks how fast a regular expression can report whether it matches 108c67d6573Sopenharmony_ci// a particular haystack. If the regex matches, then the benchmark fails. 109c67d6573Sopenharmony_ci// Regexes are compiled exactly once. 110c67d6573Sopenharmony_ci// 111c67d6573Sopenharmony_ci// name is an identifier for the benchmark. 112c67d6573Sopenharmony_ci// 113c67d6573Sopenharmony_ci// pattern should be a &'static str representing the regular expression. 114c67d6573Sopenharmony_ci// 115c67d6573Sopenharmony_ci// haystack should be a String. 116c67d6573Sopenharmony_cimacro_rules! bench_not_match { 117c67d6573Sopenharmony_ci ($name:ident, $pattern:expr, $haystack:expr) => { 118c67d6573Sopenharmony_ci bench_is_match!($name, false, regex!($pattern), $haystack); 119c67d6573Sopenharmony_ci }; 120c67d6573Sopenharmony_ci} 121c67d6573Sopenharmony_ci 122c67d6573Sopenharmony_ci// USAGE: bench_is_match!(name, is_match, regex, haystack) 123c67d6573Sopenharmony_ci// 124c67d6573Sopenharmony_ci// This benchmarks how fast a regular expression can report whether it matches 125c67d6573Sopenharmony_ci// a particular haystack. If the regex match status doesn't match is_match, 126c67d6573Sopenharmony_ci// then the benchmark fails. Regexes are compiled exactly once. 127c67d6573Sopenharmony_ci// 128c67d6573Sopenharmony_ci// name is an identifier for the benchmark. 129c67d6573Sopenharmony_ci// 130c67d6573Sopenharmony_ci// is_match reports whether the regex is expected to match the haystack or not. 131c67d6573Sopenharmony_ci// 132c67d6573Sopenharmony_ci// regex should be a ::Regex. 133c67d6573Sopenharmony_ci// 134c67d6573Sopenharmony_ci// haystack should be a String. 135c67d6573Sopenharmony_cimacro_rules! bench_is_match { 136c67d6573Sopenharmony_ci ($name:ident, $is_match:expr, $re:expr, $haystack:expr) => { 137c67d6573Sopenharmony_ci #[bench] 138c67d6573Sopenharmony_ci fn $name(b: &mut Bencher) { 139c67d6573Sopenharmony_ci use lazy_static::lazy_static; 140c67d6573Sopenharmony_ci use std::sync::Mutex; 141c67d6573Sopenharmony_ci 142c67d6573Sopenharmony_ci // Why do we use lazy_static here? It seems sensible to just 143c67d6573Sopenharmony_ci // compile a regex outside of the b.iter() call and be done with 144c67d6573Sopenharmony_ci // it. However, it seems like Rust's benchmark harness actually 145c67d6573Sopenharmony_ci // calls the entire benchmark function multiple times. This doesn't 146c67d6573Sopenharmony_ci // factor into the timings reported in the benchmarks, but it does 147c67d6573Sopenharmony_ci // make the benchmarks take substantially longer to run because 148c67d6573Sopenharmony_ci // they're spending a lot of time recompiling regexes. 149c67d6573Sopenharmony_ci lazy_static! { 150c67d6573Sopenharmony_ci static ref RE: Mutex<Regex> = Mutex::new($re); 151c67d6573Sopenharmony_ci static ref TEXT: Mutex<Text> = Mutex::new(text!($haystack)); 152c67d6573Sopenharmony_ci }; 153c67d6573Sopenharmony_ci let re = RE.lock().unwrap(); 154c67d6573Sopenharmony_ci let text = TEXT.lock().unwrap(); 155c67d6573Sopenharmony_ci b.bytes = text.len() as u64; 156c67d6573Sopenharmony_ci b.iter(|| { 157c67d6573Sopenharmony_ci if re.is_match(&text) != $is_match { 158c67d6573Sopenharmony_ci if $is_match { 159c67d6573Sopenharmony_ci panic!("expected match, got not match"); 160c67d6573Sopenharmony_ci } else { 161c67d6573Sopenharmony_ci panic!("expected no match, got match"); 162c67d6573Sopenharmony_ci } 163c67d6573Sopenharmony_ci } 164c67d6573Sopenharmony_ci }); 165c67d6573Sopenharmony_ci } 166c67d6573Sopenharmony_ci }; 167c67d6573Sopenharmony_ci} 168c67d6573Sopenharmony_ci 169c67d6573Sopenharmony_ci// USAGE: bench_find!(name, pattern, count, haystack) 170c67d6573Sopenharmony_ci// 171c67d6573Sopenharmony_ci// This benchmarks how fast a regular expression can count all successive 172c67d6573Sopenharmony_ci// non-overlapping matches in haystack. If the count reported does not match 173c67d6573Sopenharmony_ci// the count given, then the benchmark fails. 174c67d6573Sopenharmony_ci// 175c67d6573Sopenharmony_ci// name is an identifier for the benchmark. 176c67d6573Sopenharmony_ci// 177c67d6573Sopenharmony_ci// pattern should be a &'static str representing the regular expression. 178c67d6573Sopenharmony_ci// 179c67d6573Sopenharmony_ci// haystack should be a String. 180c67d6573Sopenharmony_cimacro_rules! bench_find { 181c67d6573Sopenharmony_ci ($name:ident, $pattern:expr, $count:expr, $haystack:expr) => { 182c67d6573Sopenharmony_ci #[bench] 183c67d6573Sopenharmony_ci fn $name(b: &mut Bencher) { 184c67d6573Sopenharmony_ci use lazy_static::lazy_static; 185c67d6573Sopenharmony_ci use std::sync::Mutex; 186c67d6573Sopenharmony_ci 187c67d6573Sopenharmony_ci lazy_static! { 188c67d6573Sopenharmony_ci static ref RE: Mutex<Regex> = Mutex::new(regex!($pattern)); 189c67d6573Sopenharmony_ci static ref TEXT: Mutex<Text> = Mutex::new(text!($haystack)); 190c67d6573Sopenharmony_ci }; 191c67d6573Sopenharmony_ci let re = RE.lock().unwrap(); 192c67d6573Sopenharmony_ci let text = TEXT.lock().unwrap(); 193c67d6573Sopenharmony_ci b.bytes = text.len() as u64; 194c67d6573Sopenharmony_ci b.iter(|| { 195c67d6573Sopenharmony_ci let count = re.find_iter(&text).count(); 196c67d6573Sopenharmony_ci assert_eq!($count, count) 197c67d6573Sopenharmony_ci }); 198c67d6573Sopenharmony_ci } 199c67d6573Sopenharmony_ci }; 200c67d6573Sopenharmony_ci} 201c67d6573Sopenharmony_ci 202c67d6573Sopenharmony_ci// USAGE: bench_captures!(name, pattern, groups, haystack); 203c67d6573Sopenharmony_ci// 204c67d6573Sopenharmony_ci// CONTRACT: 205c67d6573Sopenharmony_ci// Given: 206c67d6573Sopenharmony_ci// ident, the desired benchmarking function name 207c67d6573Sopenharmony_ci// pattern : ::Regex, the regular expression to be executed 208c67d6573Sopenharmony_ci// groups : usize, the number of capture groups 209c67d6573Sopenharmony_ci// haystack : String, the string to search 210c67d6573Sopenharmony_ci// bench_captures will benchmark how fast re.captures() produces 211c67d6573Sopenharmony_ci// the capture groups in question. 212c67d6573Sopenharmony_cimacro_rules! bench_captures { 213c67d6573Sopenharmony_ci ($name:ident, $pattern:expr, $count:expr, $haystack:expr) => { 214c67d6573Sopenharmony_ci #[cfg(feature = "re-rust")] 215c67d6573Sopenharmony_ci #[bench] 216c67d6573Sopenharmony_ci fn $name(b: &mut Bencher) { 217c67d6573Sopenharmony_ci use lazy_static::lazy_static; 218c67d6573Sopenharmony_ci use std::sync::Mutex; 219c67d6573Sopenharmony_ci 220c67d6573Sopenharmony_ci lazy_static! { 221c67d6573Sopenharmony_ci static ref RE: Mutex<Regex> = Mutex::new($pattern); 222c67d6573Sopenharmony_ci static ref TEXT: Mutex<Text> = Mutex::new(text!($haystack)); 223c67d6573Sopenharmony_ci }; 224c67d6573Sopenharmony_ci let re = RE.lock().unwrap(); 225c67d6573Sopenharmony_ci let text = TEXT.lock().unwrap(); 226c67d6573Sopenharmony_ci b.bytes = text.len() as u64; 227c67d6573Sopenharmony_ci b.iter(|| match re.captures(&text) { 228c67d6573Sopenharmony_ci None => assert!(false, "no captures"), 229c67d6573Sopenharmony_ci Some(caps) => assert_eq!($count + 1, caps.len()), 230c67d6573Sopenharmony_ci }); 231c67d6573Sopenharmony_ci } 232c67d6573Sopenharmony_ci }; 233c67d6573Sopenharmony_ci} 234c67d6573Sopenharmony_ci 235c67d6573Sopenharmony_ci// USAGE: bench_is_match_set!(name, is_match, regex, haystack) 236c67d6573Sopenharmony_cimacro_rules! bench_is_match_set { 237c67d6573Sopenharmony_ci ($name:ident, $is_match:expr, $re:expr, $haystack:expr) => { 238c67d6573Sopenharmony_ci #[bench] 239c67d6573Sopenharmony_ci fn $name(b: &mut Bencher) { 240c67d6573Sopenharmony_ci use lazy_static::lazy_static; 241c67d6573Sopenharmony_ci use std::sync::Mutex; 242c67d6573Sopenharmony_ci 243c67d6573Sopenharmony_ci lazy_static! { 244c67d6573Sopenharmony_ci static ref RE: Mutex<RegexSet> = Mutex::new($re); 245c67d6573Sopenharmony_ci static ref TEXT: Mutex<Text> = Mutex::new(text!($haystack)); 246c67d6573Sopenharmony_ci }; 247c67d6573Sopenharmony_ci let re = RE.lock().unwrap(); 248c67d6573Sopenharmony_ci let text = TEXT.lock().unwrap(); 249c67d6573Sopenharmony_ci b.bytes = text.len() as u64; 250c67d6573Sopenharmony_ci b.iter(|| { 251c67d6573Sopenharmony_ci if re.is_match(&text) != $is_match { 252c67d6573Sopenharmony_ci if $is_match { 253c67d6573Sopenharmony_ci panic!("expected match, got not match"); 254c67d6573Sopenharmony_ci } else { 255c67d6573Sopenharmony_ci panic!("expected no match, got match"); 256c67d6573Sopenharmony_ci } 257c67d6573Sopenharmony_ci } 258c67d6573Sopenharmony_ci }); 259c67d6573Sopenharmony_ci } 260c67d6573Sopenharmony_ci }; 261c67d6573Sopenharmony_ci} 262c67d6573Sopenharmony_ci 263c67d6573Sopenharmony_ci// USAGE: bench_matches_set!(name, is_match, regex, haystack) 264c67d6573Sopenharmony_cimacro_rules! bench_matches_set { 265c67d6573Sopenharmony_ci ($name:ident, $is_match:expr, $re:expr, $haystack:expr) => { 266c67d6573Sopenharmony_ci #[bench] 267c67d6573Sopenharmony_ci fn $name(b: &mut Bencher) { 268c67d6573Sopenharmony_ci use lazy_static::lazy_static; 269c67d6573Sopenharmony_ci use std::sync::Mutex; 270c67d6573Sopenharmony_ci 271c67d6573Sopenharmony_ci lazy_static! { 272c67d6573Sopenharmony_ci static ref RE: Mutex<RegexSet> = Mutex::new($re); 273c67d6573Sopenharmony_ci static ref TEXT: Mutex<Text> = Mutex::new(text!($haystack)); 274c67d6573Sopenharmony_ci }; 275c67d6573Sopenharmony_ci let re = RE.lock().unwrap(); 276c67d6573Sopenharmony_ci let text = TEXT.lock().unwrap(); 277c67d6573Sopenharmony_ci b.bytes = text.len() as u64; 278c67d6573Sopenharmony_ci b.iter(|| { 279c67d6573Sopenharmony_ci if re.matches(&text).matched_any() != $is_match { 280c67d6573Sopenharmony_ci if $is_match { 281c67d6573Sopenharmony_ci panic!("expected match, got not match"); 282c67d6573Sopenharmony_ci } else { 283c67d6573Sopenharmony_ci panic!("expected no match, got match"); 284c67d6573Sopenharmony_ci } 285c67d6573Sopenharmony_ci } 286c67d6573Sopenharmony_ci }); 287c67d6573Sopenharmony_ci } 288c67d6573Sopenharmony_ci }; 289c67d6573Sopenharmony_ci} 290c67d6573Sopenharmony_ci 291c67d6573Sopenharmony_cicfg_if! { 292c67d6573Sopenharmony_ci if #[cfg(any( 293c67d6573Sopenharmony_ci feature = "re-pcre1", 294c67d6573Sopenharmony_ci feature = "re-onig", 295c67d6573Sopenharmony_ci feature = "re-rust", 296c67d6573Sopenharmony_ci feature = "re-rust-bytes", 297c67d6573Sopenharmony_ci feature = "re-re2", 298c67d6573Sopenharmony_ci feature = "re-pcre2", 299c67d6573Sopenharmony_ci feature = "re-tcl" 300c67d6573Sopenharmony_ci ))] { 301c67d6573Sopenharmony_ci mod ffi; 302c67d6573Sopenharmony_ci mod misc; 303c67d6573Sopenharmony_ci mod regexdna; 304c67d6573Sopenharmony_ci mod sherlock; 305c67d6573Sopenharmony_ci } 306c67d6573Sopenharmony_ci} 307c67d6573Sopenharmony_ci 308c67d6573Sopenharmony_ci#[cfg(any(feature = "re-rust", feature = "re-rust-bytes"))] 309c67d6573Sopenharmony_cimod rust_compile; 310