1#![allow(non_snake_case)] 2 3use std::iter::repeat; 4 5use test::Bencher; 6 7#[cfg(any(feature = "re-rust", feature = "re-rust-bytes"))] 8use crate::RegexSet; 9use crate::{Regex, Text}; 10 11#[cfg(not(feature = "re-onig"))] 12#[cfg(not(feature = "re-pcre1"))] 13#[cfg(not(feature = "re-pcre2"))] 14bench_match!( 15 no_exponential, 16 { 17 format!( 18 "{}{}", 19 repeat("a?").take(100).collect::<String>(), 20 repeat("a").take(100).collect::<String>() 21 ) 22 }, 23 repeat("a").take(100).collect() 24); 25 26bench_match!(literal, r"y", { 27 format!("{}y", repeat("x").take(50).collect::<String>()) 28}); 29 30bench_match!(not_literal, r".y", { 31 format!("{}y", repeat("x").take(50).collect::<String>()) 32}); 33 34bench_match!(match_class, "[abcdw]", { 35 format!("{}w", repeat("xxxx").take(20).collect::<String>()) 36}); 37 38bench_match!(match_class_in_range, "[ac]", { 39 format!("{}c", repeat("bbbb").take(20).collect::<String>()) 40}); 41 42#[cfg(not(feature = "re-rust-bytes"))] 43#[cfg(not(feature = "re-tcl"))] 44bench_match!(match_class_unicode, r"\p{L}", { 45 format!("{}a", repeat("☃5☃5").take(20).collect::<String>()) 46}); 47 48bench_not_match!(anchored_literal_short_non_match, r"^zbc(d|e)", { 49 "abcdefghijklmnopqrstuvwxyz".to_owned() 50}); 51 52bench_not_match!(anchored_literal_long_non_match, r"^zbc(d|e)", { 53 repeat("abcdefghijklmnopqrstuvwxyz").take(15).collect::<String>() 54}); 55 56bench_match!(anchored_literal_short_match, r"^.bc(d|e)", { 57 "abcdefghijklmnopqrstuvwxyz".to_owned() 58}); 59 60bench_match!(anchored_literal_long_match, r"^.bc(d|e)", { 61 repeat("abcdefghijklmnopqrstuvwxyz").take(15).collect::<String>() 62}); 63 64bench_match!(one_pass_short, r"^.bc(d|e)*$", { 65 "abcddddddeeeededd".to_owned() 66}); 67 68bench_match!(one_pass_short_not, r".bc(d|e)*$", { 69 "abcddddddeeeededd".to_owned() 70}); 71 72bench_match!(one_pass_long_prefix, r"^abcdefghijklmnopqrstuvwxyz.*$", { 73 "abcdefghijklmnopqrstuvwxyz".to_owned() 74}); 75 76bench_match!(one_pass_long_prefix_not, r"^.bcdefghijklmnopqrstuvwxyz.*$", { 77 "abcdefghijklmnopqrstuvwxyz".to_owned() 78}); 79 80bench_match!(long_needle1, r"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaab", { 81 repeat("a").take(100_000).collect::<String>() + "b" 82}); 83 84bench_match!(long_needle2, r"bbbbbbbbbbbbbbbbbbbbbbbbbbbbbba", { 85 repeat("b").take(100_000).collect::<String>() + "a" 86}); 87 88// This benchmark specifically targets the "reverse suffix literal" 89// optimization. In particular, it is easy for a naive implementation to 90// take quadratic worst case time. This benchmark provides a case for such 91// a scenario. 92bench_not_match!(reverse_suffix_no_quadratic, r"[r-z].*bcdefghijklmnopq", { 93 repeat("bcdefghijklmnopq").take(500).collect::<String>() 94}); 95 96#[cfg(feature = "re-rust")] 97#[bench] 98fn replace_all(b: &mut Bencher) { 99 let re = regex!("[cjrw]"); 100 let text = "abcdefghijklmnopqrstuvwxyz"; 101 b.iter(|| re.replace_all(text, "")); 102} 103 104const TXT_32: &'static str = include_str!("data/32.txt"); 105const TXT_1K: &'static str = include_str!("data/1K.txt"); 106const TXT_32K: &'static str = include_str!("data/32K.txt"); 107const TXT_1MB: &'static str = include_str!("data/1MB.txt"); 108 109fn get_text(corpus: &str, suffix: String) -> String { 110 let mut corpus = corpus.to_string(); 111 corpus.push_str(&suffix); 112 corpus 113} 114 115fn easy0_suffix() -> String { 116 "ABCDEFGHIJKLMNOPQRSTUVWXYZ".to_string() 117} 118 119macro_rules! easy0 { 120 () => { 121 "ABCDEFGHIJKLMNOPQRSTUVWXYZ$" 122 }; 123} 124 125bench_match!(easy0_32, easy0!(), get_text(TXT_32, easy0_suffix())); 126bench_match!(easy0_1K, easy0!(), get_text(TXT_1K, easy0_suffix())); 127bench_match!(easy0_32K, easy0!(), get_text(TXT_32K, easy0_suffix())); 128bench_match!(easy0_1MB, easy0!(), get_text(TXT_1MB, easy0_suffix())); 129 130fn easy1_suffix() -> String { 131 "AABCCCDEEEFGGHHHIJJ".to_string() 132} 133 134macro_rules! easy1 { 135 () => { 136 r"A[AB]B[BC]C[CD]D[DE]E[EF]F[FG]G[GH]H[HI]I[IJ]J$" 137 }; 138} 139 140bench_match!(easy1_32, easy1!(), get_text(TXT_32, easy1_suffix())); 141bench_match!(easy1_1K, easy1!(), get_text(TXT_1K, easy1_suffix())); 142bench_match!(easy1_32K, easy1!(), get_text(TXT_32K, easy1_suffix())); 143bench_match!(easy1_1MB, easy1!(), get_text(TXT_1MB, easy1_suffix())); 144 145fn medium_suffix() -> String { 146 "XABCDEFGHIJKLMNOPQRSTUVWXYZ".to_string() 147} 148 149macro_rules! medium { 150 () => { 151 r"[XYZ]ABCDEFGHIJKLMNOPQRSTUVWXYZ$" 152 }; 153} 154 155bench_match!(medium_32, medium!(), get_text(TXT_32, medium_suffix())); 156bench_match!(medium_1K, medium!(), get_text(TXT_1K, medium_suffix())); 157bench_match!(medium_32K, medium!(), get_text(TXT_32K, medium_suffix())); 158bench_match!(medium_1MB, medium!(), get_text(TXT_1MB, medium_suffix())); 159 160fn hard_suffix() -> String { 161 "ABCDEFGHIJKLMNOPQRSTUVWXYZ".to_string() 162} 163 164macro_rules! hard { 165 () => { 166 r"[ -~]*ABCDEFGHIJKLMNOPQRSTUVWXYZ$" 167 }; 168} 169 170bench_match!(hard_32, hard!(), get_text(TXT_32, hard_suffix())); 171bench_match!(hard_1K, hard!(), get_text(TXT_1K, hard_suffix())); 172bench_match!(hard_32K, hard!(), get_text(TXT_32K, hard_suffix())); 173bench_match!(hard_1MB, hard!(), get_text(TXT_1MB, hard_suffix())); 174 175fn reallyhard_suffix() -> String { 176 "ABCDEFGHIJKLMNOPQRSTUVWXYZ".to_string() 177} 178 179macro_rules! reallyhard { 180 () => { 181 // The point of this being "really" hard is that it should completely 182 // thwart any prefix or suffix literal optimizations. 183 r"[ -~]*ABCDEFGHIJKLMNOPQRSTUVWXYZ.*" 184 }; 185} 186 187bench_match!( 188 reallyhard_32, 189 reallyhard!(), 190 get_text(TXT_32, reallyhard_suffix()) 191); 192bench_match!( 193 reallyhard_1K, 194 reallyhard!(), 195 get_text(TXT_1K, reallyhard_suffix()) 196); 197bench_match!( 198 reallyhard_32K, 199 reallyhard!(), 200 get_text(TXT_32K, reallyhard_suffix()) 201); 202bench_match!( 203 reallyhard_1MB, 204 reallyhard!(), 205 get_text(TXT_1MB, reallyhard_suffix()) 206); 207 208fn reallyhard2_suffix() -> String { 209 "Sherlock Holmes".to_string() 210} 211 212macro_rules! reallyhard2 { 213 () => { 214 r"\w+\s+Holmes" 215 }; 216} 217 218bench_match!( 219 reallyhard2_1K, 220 reallyhard2!(), 221 get_text(TXT_1K, reallyhard2_suffix()) 222); 223 224// 225// Benchmarks to justify the short-haystack NFA fallthrough optimization 226// implemented by `read_captures_at` in regex/src/exec.rs. See github issue 227// #348. 228// 229// The procedure used to try to determine the right hardcoded cutoff 230// for the short-haystack optimization in issue #348 is as follows. 231// 232// ``` 233// > cd bench 234// > cargo bench --features re-rust short_hay | tee dfa-nfa.res 235// > # modify the `MatchType::Dfa` branch in exec.rs:read_captures_at 236// > # to just execute the nfa 237// > cargo bench --features re-rust short_hay | tee nfa-only.res 238// > cargo benchcmp dfa-nfa.res nfa-only.res 239// ``` 240// 241// The expected result is that short inputs will go faster under 242// the nfa-only mode, but at some turnover point the dfa-nfa mode 243// will start to win again. Unfortunately, that is not what happened. 244// Instead there was no noticeable change in the bench results, so 245// I've opted to just do the more conservative anchor optimization. 246// 247bench_captures!( 248 short_haystack_1x, 249 Regex::new(r"(bbbb)cccc(bbb)").unwrap(), 250 2, 251 String::from("aaaabbbbccccbbbdddd") 252); 253bench_captures!( 254 short_haystack_2x, 255 Regex::new(r"(bbbb)cccc(bbb)").unwrap(), 256 2, 257 format!( 258 "{}bbbbccccbbb{}", 259 repeat("aaaa").take(2).collect::<String>(), 260 repeat("dddd").take(2).collect::<String>(), 261 ) 262); 263bench_captures!( 264 short_haystack_3x, 265 Regex::new(r"(bbbb)cccc(bbb)").unwrap(), 266 2, 267 format!( 268 "{}bbbbccccbbb{}", 269 repeat("aaaa").take(3).collect::<String>(), 270 repeat("dddd").take(3).collect::<String>(), 271 ) 272); 273bench_captures!( 274 short_haystack_4x, 275 Regex::new(r"(bbbb)cccc(bbb)").unwrap(), 276 2, 277 format!( 278 "{}bbbbccccbbb{}", 279 repeat("aaaa").take(4).collect::<String>(), 280 repeat("dddd").take(4).collect::<String>(), 281 ) 282); 283bench_captures!( 284 short_haystack_10x, 285 Regex::new(r"(bbbb)cccc(bbb)").unwrap(), 286 2, 287 format!( 288 "{}bbbbccccbbb{}", 289 repeat("aaaa").take(10).collect::<String>(), 290 repeat("dddd").take(10).collect::<String>(), 291 ) 292); 293bench_captures!( 294 short_haystack_100x, 295 Regex::new(r"(bbbb)cccc(bbb)").unwrap(), 296 2, 297 format!( 298 "{}bbbbccccbbb{}", 299 repeat("aaaa").take(100).collect::<String>(), 300 repeat("dddd").take(100).collect::<String>(), 301 ) 302); 303bench_captures!( 304 short_haystack_1000x, 305 Regex::new(r"(bbbb)cccc(bbb)").unwrap(), 306 2, 307 format!( 308 "{}bbbbccccbbb{}", 309 repeat("aaaa").take(1000).collect::<String>(), 310 repeat("dddd").take(1000).collect::<String>(), 311 ) 312); 313bench_captures!( 314 short_haystack_10000x, 315 Regex::new(r"(bbbb)cccc(bbb)").unwrap(), 316 2, 317 format!( 318 "{}bbbbccccbbb{}", 319 repeat("aaaa").take(10000).collect::<String>(), 320 repeat("dddd").take(10000).collect::<String>(), 321 ) 322); 323bench_captures!( 324 short_haystack_100000x, 325 Regex::new(r"(bbbb)cccc(bbb)").unwrap(), 326 2, 327 format!( 328 "{}bbbbccccbbb{}", 329 repeat("aaaa").take(100000).collect::<String>(), 330 repeat("dddd").take(100000).collect::<String>(), 331 ) 332); 333bench_captures!( 334 short_haystack_1000000x, 335 Regex::new(r"(bbbb)cccc(bbb)").unwrap(), 336 2, 337 format!( 338 "{}bbbbccccbbb{}", 339 repeat("aaaa").take(1000000).collect::<String>(), 340 repeat("dddd").take(1000000).collect::<String>(), 341 ) 342); 343 344#[cfg(any(feature = "re-rust", feature = "re-rust-bytes"))] 345bench_is_match_set!( 346 is_match_set, 347 true, 348 RegexSet::new(vec![ 349 "aaaaaaaaaaaaaaaaaaa", 350 "abc579", 351 "def.+", 352 "e24fg", 353 "a.*2c", 354 "23.", 355 ]) 356 .unwrap(), 357 format!( 358 "{}a482c{}", 359 repeat('a').take(10).collect::<String>(), 360 repeat('b').take(10).collect::<String>() 361 ) 362); 363 364#[cfg(any(feature = "re-rust", feature = "re-rust-bytes"))] 365bench_matches_set!( 366 matches_set, 367 true, 368 RegexSet::new(vec![ 369 "aaaaaaaaaaaaaaaaaaa", 370 "abc579", 371 "def.+", 372 "e24fg", 373 "a.*2c", 374 "23.", 375 ]) 376 .unwrap(), 377 format!( 378 "{}a482c{}", 379 repeat('a').take(10).collect::<String>(), 380 repeat('b').take(10).collect::<String>() 381 ) 382); 383