1// Enable the benchmarking harness. 2#![feature(test)] 3// It's too annoying to carefully define macros based on which regex engines 4// have which benchmarks, so just ignore these warnings. 5#![allow(unused_macros)] 6 7extern crate test; 8 9use cfg_if::cfg_if; 10 11cfg_if! { 12 if #[cfg(feature = "re-pcre1")] { 13 pub use ffi::pcre1::Regex; 14 } else if #[cfg(feature = "re-onig")] { 15 pub use ffi::onig::Regex; 16 } else if #[cfg(any(feature = "re-rust"))] { 17 pub use regex::{Regex, RegexSet}; 18 } else if #[cfg(feature = "re-rust-bytes")] { 19 pub use regex::bytes::{Regex, RegexSet}; 20 } else if #[cfg(feature = "re-re2")] { 21 pub use ffi::re2::Regex; 22 } else if #[cfg(feature = "re-pcre2")] { 23 pub use ffi::pcre2::Regex; 24 } else if #[cfg(feature = "re-tcl")] { 25 pub use ffi::tcl::Regex; 26 } else { 27 compile_error!( 28 "To run the benchmarks, see `./run -h` or the HACKING.md document" 29 ); 30 } 31} 32 33// Usage: regex!(pattern) 34// 35// Builds a ::Regex from a borrowed string. 36// 37// Due to macro scoping rules, this definition only applies for the modules 38// defined below. Effectively, it allows us to use the same tests for both 39// native and dynamic regexes. 40macro_rules! regex { 41 ($re:expr) => { 42 crate::Regex::new(&$re.to_owned()).unwrap() 43 }; 44} 45 46cfg_if! { 47 if #[cfg(feature = "re-tcl")] { 48 // Usage: text!(haystack) 49 // 50 // Builds a ::Text from an owned string. 51 // 52 // This macro is called on every input searched in every benchmark. It is 53 // called exactly once per benchmark and its time is not included in the 54 // benchmark timing. 55 // 56 // The text given to the macro is always a String, which is guaranteed to be 57 // valid UTF-8. 58 // 59 // The return type should be an owned value that can deref to whatever the 60 // regex accepts in its `is_match` and `find_iter` methods. 61 macro_rules! text { 62 ($text:expr) => {{ 63 use crate::ffi::tcl::Text; 64 Text::new($text) 65 }} 66 } 67 type Text = ffi::tcl::Text; 68 } else if #[cfg(feature = "re-rust-bytes")] { 69 macro_rules! text { 70 ($text:expr) => {{ 71 let text: String = $text; 72 text.into_bytes() 73 }} 74 } 75 type Text = Vec<u8>; 76 } else { 77 macro_rules! text { 78 ($text:expr) => { $text } 79 } 80 type Text = String; 81 } 82} 83 84// Macros for writing benchmarks easily. We provide macros for benchmarking 85// matches, non-matches and for finding all successive non-overlapping matches 86// in a string (including a check that the count is correct). 87 88// USAGE: bench_match!(name, pattern, haystack) 89// 90// This benchmarks how fast a regular expression can report whether it matches 91// a particular haystack. If the regex doesn't match, then the benchmark fails. 92// Regexes are compiled exactly once. 93// 94// name is an identifier for the benchmark. 95// 96// pattern should be a &'static str representing the regular expression. 97// 98// haystack should be a String. 99macro_rules! bench_match { 100 ($name:ident, $pattern:expr, $haystack:expr) => { 101 bench_is_match!($name, true, regex!($pattern), $haystack); 102 }; 103} 104 105// USAGE: bench_not_match!(name, pattern, haystack) 106// 107// This benchmarks how fast a regular expression can report whether it matches 108// a particular haystack. If the regex matches, then the benchmark fails. 109// Regexes are compiled exactly once. 110// 111// name is an identifier for the benchmark. 112// 113// pattern should be a &'static str representing the regular expression. 114// 115// haystack should be a String. 116macro_rules! bench_not_match { 117 ($name:ident, $pattern:expr, $haystack:expr) => { 118 bench_is_match!($name, false, regex!($pattern), $haystack); 119 }; 120} 121 122// USAGE: bench_is_match!(name, is_match, regex, haystack) 123// 124// This benchmarks how fast a regular expression can report whether it matches 125// a particular haystack. If the regex match status doesn't match is_match, 126// then the benchmark fails. Regexes are compiled exactly once. 127// 128// name is an identifier for the benchmark. 129// 130// is_match reports whether the regex is expected to match the haystack or not. 131// 132// regex should be a ::Regex. 133// 134// haystack should be a String. 135macro_rules! bench_is_match { 136 ($name:ident, $is_match:expr, $re:expr, $haystack:expr) => { 137 #[bench] 138 fn $name(b: &mut Bencher) { 139 use lazy_static::lazy_static; 140 use std::sync::Mutex; 141 142 // Why do we use lazy_static here? It seems sensible to just 143 // compile a regex outside of the b.iter() call and be done with 144 // it. However, it seems like Rust's benchmark harness actually 145 // calls the entire benchmark function multiple times. This doesn't 146 // factor into the timings reported in the benchmarks, but it does 147 // make the benchmarks take substantially longer to run because 148 // they're spending a lot of time recompiling regexes. 149 lazy_static! { 150 static ref RE: Mutex<Regex> = Mutex::new($re); 151 static ref TEXT: Mutex<Text> = Mutex::new(text!($haystack)); 152 }; 153 let re = RE.lock().unwrap(); 154 let text = TEXT.lock().unwrap(); 155 b.bytes = text.len() as u64; 156 b.iter(|| { 157 if re.is_match(&text) != $is_match { 158 if $is_match { 159 panic!("expected match, got not match"); 160 } else { 161 panic!("expected no match, got match"); 162 } 163 } 164 }); 165 } 166 }; 167} 168 169// USAGE: bench_find!(name, pattern, count, haystack) 170// 171// This benchmarks how fast a regular expression can count all successive 172// non-overlapping matches in haystack. If the count reported does not match 173// the count given, then the benchmark fails. 174// 175// name is an identifier for the benchmark. 176// 177// pattern should be a &'static str representing the regular expression. 178// 179// haystack should be a String. 180macro_rules! bench_find { 181 ($name:ident, $pattern:expr, $count:expr, $haystack:expr) => { 182 #[bench] 183 fn $name(b: &mut Bencher) { 184 use lazy_static::lazy_static; 185 use std::sync::Mutex; 186 187 lazy_static! { 188 static ref RE: Mutex<Regex> = Mutex::new(regex!($pattern)); 189 static ref TEXT: Mutex<Text> = Mutex::new(text!($haystack)); 190 }; 191 let re = RE.lock().unwrap(); 192 let text = TEXT.lock().unwrap(); 193 b.bytes = text.len() as u64; 194 b.iter(|| { 195 let count = re.find_iter(&text).count(); 196 assert_eq!($count, count) 197 }); 198 } 199 }; 200} 201 202// USAGE: bench_captures!(name, pattern, groups, haystack); 203// 204// CONTRACT: 205// Given: 206// ident, the desired benchmarking function name 207// pattern : ::Regex, the regular expression to be executed 208// groups : usize, the number of capture groups 209// haystack : String, the string to search 210// bench_captures will benchmark how fast re.captures() produces 211// the capture groups in question. 212macro_rules! bench_captures { 213 ($name:ident, $pattern:expr, $count:expr, $haystack:expr) => { 214 #[cfg(feature = "re-rust")] 215 #[bench] 216 fn $name(b: &mut Bencher) { 217 use lazy_static::lazy_static; 218 use std::sync::Mutex; 219 220 lazy_static! { 221 static ref RE: Mutex<Regex> = Mutex::new($pattern); 222 static ref TEXT: Mutex<Text> = Mutex::new(text!($haystack)); 223 }; 224 let re = RE.lock().unwrap(); 225 let text = TEXT.lock().unwrap(); 226 b.bytes = text.len() as u64; 227 b.iter(|| match re.captures(&text) { 228 None => assert!(false, "no captures"), 229 Some(caps) => assert_eq!($count + 1, caps.len()), 230 }); 231 } 232 }; 233} 234 235// USAGE: bench_is_match_set!(name, is_match, regex, haystack) 236macro_rules! bench_is_match_set { 237 ($name:ident, $is_match:expr, $re:expr, $haystack:expr) => { 238 #[bench] 239 fn $name(b: &mut Bencher) { 240 use lazy_static::lazy_static; 241 use std::sync::Mutex; 242 243 lazy_static! { 244 static ref RE: Mutex<RegexSet> = Mutex::new($re); 245 static ref TEXT: Mutex<Text> = Mutex::new(text!($haystack)); 246 }; 247 let re = RE.lock().unwrap(); 248 let text = TEXT.lock().unwrap(); 249 b.bytes = text.len() as u64; 250 b.iter(|| { 251 if re.is_match(&text) != $is_match { 252 if $is_match { 253 panic!("expected match, got not match"); 254 } else { 255 panic!("expected no match, got match"); 256 } 257 } 258 }); 259 } 260 }; 261} 262 263// USAGE: bench_matches_set!(name, is_match, regex, haystack) 264macro_rules! bench_matches_set { 265 ($name:ident, $is_match:expr, $re:expr, $haystack:expr) => { 266 #[bench] 267 fn $name(b: &mut Bencher) { 268 use lazy_static::lazy_static; 269 use std::sync::Mutex; 270 271 lazy_static! { 272 static ref RE: Mutex<RegexSet> = Mutex::new($re); 273 static ref TEXT: Mutex<Text> = Mutex::new(text!($haystack)); 274 }; 275 let re = RE.lock().unwrap(); 276 let text = TEXT.lock().unwrap(); 277 b.bytes = text.len() as u64; 278 b.iter(|| { 279 if re.matches(&text).matched_any() != $is_match { 280 if $is_match { 281 panic!("expected match, got not match"); 282 } else { 283 panic!("expected no match, got match"); 284 } 285 } 286 }); 287 } 288 }; 289} 290 291cfg_if! { 292 if #[cfg(any( 293 feature = "re-pcre1", 294 feature = "re-onig", 295 feature = "re-rust", 296 feature = "re-rust-bytes", 297 feature = "re-re2", 298 feature = "re-pcre2", 299 feature = "re-tcl" 300 ))] { 301 mod ffi; 302 mod misc; 303 mod regexdna; 304 mod sherlock; 305 } 306} 307 308#[cfg(any(feature = "re-rust", feature = "re-rust-bytes"))] 309mod rust_compile; 310