1/* 2This module defines a common API (by convention) for all of the different 3impls that we benchmark. The intent here is to 1) make it easy to write macros 4for generating benchmark definitions generic over impls and 2) make it easier 5to read the benchmarks themselves and grok how exactly each of the impls are 6being invoked. 7 8The naming scheme of each function follows the pertinent parts of our benchmark 9naming scheme (see parent module docs). Namely, it is 10 11 {impl}/{fwd|rev}/{config} 12 13Where 'impl' is the underlying implementation and 'config' is the manner of 14search. The slash indicates a module boundary. We use modules for this because 15it makes writing macros to define benchmarks for all variants much easier. 16*/ 17 18/// memchr's implementation of memmem. This is the implementation that we hope 19/// does approximately as well as all other implementations, and a lot better 20/// in at least some cases. 21pub(crate) mod krate { 22 pub(crate) fn available(_: &str) -> &'static [&'static str] { 23 &["reverse", "oneshot", "prebuilt", "oneshotiter", "prebuiltiter"] 24 } 25 26 pub(crate) mod fwd { 27 use memchr::memmem; 28 29 pub(crate) fn oneshot(haystack: &str, needle: &str) -> bool { 30 memmem::find(haystack.as_bytes(), needle.as_bytes()).is_some() 31 } 32 33 pub(crate) fn prebuilt( 34 needle: &str, 35 ) -> impl Fn(&str) -> bool + 'static { 36 let finder = memmem::Finder::new(needle).into_owned(); 37 move |h| finder.find(h.as_bytes()).is_some() 38 } 39 40 pub(crate) fn oneshotiter<'a>( 41 haystack: &'a str, 42 needle: &'a str, 43 ) -> impl Iterator<Item = usize> + 'a { 44 memmem::find_iter(haystack.as_bytes(), needle.as_bytes()) 45 } 46 47 pub(crate) fn prebuiltiter(needle: &str) -> PrebuiltIter { 48 PrebuiltIter(memmem::Finder::new(needle).into_owned()) 49 } 50 51 #[derive(Debug)] 52 pub(crate) struct PrebuiltIter(memmem::Finder<'static>); 53 54 impl PrebuiltIter { 55 pub(crate) fn iter<'a>( 56 &'a self, 57 haystack: &'a str, 58 ) -> impl Iterator<Item = usize> + 'a { 59 self.0.find_iter(haystack.as_bytes()) 60 } 61 } 62 } 63 64 pub(crate) mod rev { 65 use memchr::memmem; 66 67 pub(crate) fn oneshot(haystack: &str, needle: &str) -> bool { 68 memmem::rfind(haystack.as_bytes(), needle.as_bytes()).is_some() 69 } 70 71 pub(crate) fn prebuilt( 72 needle: &str, 73 ) -> impl Fn(&str) -> bool + 'static { 74 let finder = memmem::FinderRev::new(needle).into_owned(); 75 move |h| finder.rfind(h.as_bytes()).is_some() 76 } 77 78 pub(crate) fn oneshotiter<'a>( 79 haystack: &'a str, 80 needle: &'a str, 81 ) -> impl Iterator<Item = usize> + 'a { 82 memmem::rfind_iter(haystack.as_bytes(), needle.as_bytes()) 83 } 84 85 pub(crate) fn prebuiltiter(needle: &str) -> PrebuiltIter { 86 PrebuiltIter(memmem::FinderRev::new(needle).into_owned()) 87 } 88 89 #[derive(Debug)] 90 pub(crate) struct PrebuiltIter(memmem::FinderRev<'static>); 91 92 impl PrebuiltIter { 93 pub(crate) fn iter<'a>( 94 &'a self, 95 haystack: &'a str, 96 ) -> impl Iterator<Item = usize> + 'a { 97 self.0.rfind_iter(haystack.as_bytes()) 98 } 99 } 100 } 101} 102 103/// memchr's implementation of memmem, but without prefilters enabled. This 104/// exists because sometimes prefilters aren't the right choice, and it's good 105/// to be able to compare it against prefilter-accelerated searches to see 106/// where this might be faster. 107pub(crate) mod krate_nopre { 108 pub(crate) fn available(_: &str) -> &'static [&'static str] { 109 &["reverse", "oneshot", "prebuilt", "oneshotiter", "prebuiltiter"] 110 } 111 112 pub(crate) mod fwd { 113 use memchr::memmem; 114 115 fn finder(needle: &[u8]) -> memmem::Finder<'_> { 116 memmem::FinderBuilder::new() 117 .prefilter(memmem::Prefilter::None) 118 .build_forward(needle) 119 } 120 121 pub(crate) fn oneshot(haystack: &str, needle: &str) -> bool { 122 finder(needle.as_bytes()).find(haystack.as_bytes()).is_some() 123 } 124 125 pub(crate) fn prebuilt( 126 needle: &str, 127 ) -> impl Fn(&str) -> bool + 'static { 128 let finder = finder(needle.as_bytes()).into_owned(); 129 move |h| finder.find(h.as_bytes()).is_some() 130 } 131 132 pub(crate) fn oneshotiter<'a>( 133 haystack: &'a str, 134 needle: &'a str, 135 ) -> impl Iterator<Item = usize> + 'a { 136 super::super::iter_from_find( 137 haystack.as_bytes(), 138 needle.as_bytes(), 139 |h, n| finder(n).find(h), 140 ) 141 } 142 143 pub(crate) fn prebuiltiter(needle: &str) -> PrebuiltIter { 144 PrebuiltIter(finder(needle.as_bytes()).into_owned()) 145 } 146 147 #[derive(Debug)] 148 pub(crate) struct PrebuiltIter(memmem::Finder<'static>); 149 150 impl PrebuiltIter { 151 pub(crate) fn iter<'a>( 152 &'a self, 153 haystack: &'a str, 154 ) -> impl Iterator<Item = usize> + 'a { 155 self.0.find_iter(haystack.as_bytes()) 156 } 157 } 158 } 159 160 // N.B. memrmem/krate_nopre and memrmem/krate should be equivalent for now 161 // since reverse searching doesn't have any prefilter support. 162 pub(crate) mod rev { 163 use memchr::memmem; 164 165 fn finder(needle: &[u8]) -> memmem::FinderRev<'_> { 166 memmem::FinderBuilder::new() 167 .prefilter(memmem::Prefilter::None) 168 .build_reverse(needle) 169 } 170 171 pub(crate) fn oneshot(haystack: &str, needle: &str) -> bool { 172 finder(needle.as_bytes()).rfind(haystack.as_bytes()).is_some() 173 } 174 175 pub(crate) fn prebuilt( 176 needle: &str, 177 ) -> impl Fn(&str) -> bool + 'static { 178 let finder = finder(needle.as_bytes()).into_owned(); 179 move |h| finder.rfind(h.as_bytes()).is_some() 180 } 181 182 pub(crate) fn oneshotiter<'a>( 183 haystack: &'a str, 184 needle: &'a str, 185 ) -> impl Iterator<Item = usize> + 'a { 186 super::super::iter_from_rfind( 187 haystack.as_bytes(), 188 needle.as_bytes(), 189 |h, n| finder(n).rfind(h), 190 ) 191 } 192 193 pub(crate) fn prebuiltiter(needle: &str) -> PrebuiltIter { 194 PrebuiltIter(finder(needle.as_bytes()).into_owned()) 195 } 196 197 #[derive(Debug)] 198 pub(crate) struct PrebuiltIter(memmem::FinderRev<'static>); 199 200 impl PrebuiltIter { 201 pub(crate) fn iter<'a>( 202 &'a self, 203 haystack: &'a str, 204 ) -> impl Iterator<Item = usize> + 'a { 205 self.0.rfind_iter(haystack.as_bytes()) 206 } 207 } 208 } 209} 210 211/// bstr's implementation of memmem. 212/// 213/// The implementation in this crate was originally copied from bstr. 214/// Eventually, bstr will just use the implementation in this crate, but at time 215/// of writing, it was useful to benchmark against the "original" version. 216pub(crate) mod bstr { 217 pub(crate) fn available(_: &str) -> &'static [&'static str] { 218 &["reverse", "oneshot", "prebuilt", "oneshotiter", "prebuiltiter"] 219 } 220 221 pub(crate) mod fwd { 222 pub(crate) fn oneshot(haystack: &str, needle: &str) -> bool { 223 bstr::ByteSlice::find(haystack.as_bytes(), needle.as_bytes()) 224 .is_some() 225 } 226 227 pub(crate) fn prebuilt( 228 needle: &str, 229 ) -> impl Fn(&str) -> bool + 'static { 230 let finder = bstr::Finder::new(needle).into_owned(); 231 move |h| finder.find(h.as_bytes()).is_some() 232 } 233 234 pub(crate) fn oneshotiter<'a>( 235 haystack: &'a str, 236 needle: &'a str, 237 ) -> impl Iterator<Item = usize> + 'a { 238 bstr::ByteSlice::find_iter(haystack.as_bytes(), needle.as_bytes()) 239 } 240 241 pub(crate) fn prebuiltiter(needle: &str) -> PrebuiltIter { 242 PrebuiltIter(bstr::Finder::new(needle).into_owned()) 243 } 244 245 #[derive(Debug)] 246 pub(crate) struct PrebuiltIter(bstr::Finder<'static>); 247 248 impl PrebuiltIter { 249 pub(crate) fn iter<'a>( 250 &'a self, 251 haystack: &'a str, 252 ) -> impl Iterator<Item = usize> + 'a { 253 super::super::iter_from_find( 254 haystack.as_bytes(), 255 self.0.needle(), 256 move |h, _| self.0.find(h), 257 ) 258 } 259 } 260 } 261 262 pub(crate) mod rev { 263 pub(crate) fn oneshot(haystack: &str, needle: &str) -> bool { 264 bstr::ByteSlice::rfind(haystack.as_bytes(), needle.as_bytes()) 265 .is_some() 266 } 267 268 pub(crate) fn prebuilt( 269 needle: &str, 270 ) -> impl Fn(&str) -> bool + 'static { 271 let finder = bstr::FinderReverse::new(needle).into_owned(); 272 move |h| finder.rfind(h.as_bytes()).is_some() 273 } 274 275 pub(crate) fn oneshotiter<'a>( 276 haystack: &'a str, 277 needle: &'a str, 278 ) -> impl Iterator<Item = usize> + 'a { 279 bstr::ByteSlice::rfind_iter(haystack.as_bytes(), needle.as_bytes()) 280 } 281 282 pub(crate) fn prebuiltiter(needle: &str) -> PrebuiltIter { 283 PrebuiltIter(bstr::FinderReverse::new(needle).into_owned()) 284 } 285 286 #[derive(Debug)] 287 pub(crate) struct PrebuiltIter(bstr::FinderReverse<'static>); 288 289 impl PrebuiltIter { 290 pub(crate) fn iter<'a>( 291 &'a self, 292 haystack: &'a str, 293 ) -> impl Iterator<Item = usize> + 'a { 294 super::super::iter_from_rfind( 295 haystack.as_bytes(), 296 self.0.needle(), 297 move |h, _| self.0.rfind(h), 298 ) 299 } 300 } 301 } 302} 303 304/// regex's implementation of substring search. 305/// 306/// regex is where the concept of using heuristics based on an a priori 307/// assumption of byte frequency originated. Eventually, regex will just use the 308/// implementation in this crate, but it will still be useful to benchmark since 309/// regex tends to have higher latency. It would be good to measure that. 310/// 311/// For regex, we don't provide oneshots, since that requires compiling the 312/// regex which we know is going to be ridiculously slow. No real need to 313/// measure it I think. 314pub(crate) mod regex { 315 pub(crate) fn available(_: &str) -> &'static [&'static str] { 316 &["prebuilt", "prebuiltiter"] 317 } 318 319 pub(crate) mod fwd { 320 pub(crate) fn oneshot(_haystack: &str, _needle: &str) -> bool { 321 unimplemented!("regex does not support oneshot searches") 322 } 323 324 pub(crate) fn prebuilt( 325 needle: &str, 326 ) -> impl Fn(&str) -> bool + 'static { 327 let finder = regex::Regex::new(®ex::escape(needle)).unwrap(); 328 move |h| finder.is_match(h) 329 } 330 331 pub(crate) fn oneshotiter( 332 _haystack: &str, 333 _needle: &str, 334 ) -> impl Iterator<Item = usize> + 'static { 335 std::iter::from_fn(move || { 336 unimplemented!("regex does not support oneshot searches") 337 }) 338 } 339 340 pub(crate) fn prebuiltiter(needle: &str) -> PrebuiltIter { 341 PrebuiltIter(regex::Regex::new(®ex::escape(needle)).unwrap()) 342 } 343 344 #[derive(Debug)] 345 pub(crate) struct PrebuiltIter(regex::Regex); 346 347 impl PrebuiltIter { 348 pub(crate) fn iter<'a>( 349 &'a self, 350 haystack: &'a str, 351 ) -> impl Iterator<Item = usize> + 'a { 352 self.0.find_iter(haystack).map(|m| m.start()) 353 } 354 } 355 } 356 357 pub(crate) mod rev { 358 pub(crate) fn oneshot(_haystack: &str, _needle: &str) -> bool { 359 unimplemented!("regex does not support reverse searches") 360 } 361 362 pub(crate) fn prebuilt( 363 _needle: &str, 364 ) -> impl Fn(&str) -> bool + 'static { 365 |_| unimplemented!("regex does not support reverse searches") 366 } 367 368 pub(crate) fn oneshotiter( 369 _haystack: &str, 370 _needle: &str, 371 ) -> impl Iterator<Item = usize> + 'static { 372 std::iter::from_fn(move || { 373 unimplemented!("regex does not support reverse searches") 374 }) 375 } 376 377 pub(crate) fn prebuiltiter(_needle: &str) -> super::super::NoIter { 378 unimplemented!("regex does not support reverse searches") 379 } 380 } 381} 382 383/// std's substring search implementation. 384/// 385/// std uses Two-Way like this crate, but doesn't have any prefilter 386/// heuristics. 387/// 388/// std doesn't have any way to amortize the construction of the searcher, so 389/// we can't implement any of the prebuilt routines. 390pub(crate) mod stud { 391 pub(crate) fn available(_: &str) -> &'static [&'static str] { 392 &["reverse", "oneshot", "oneshotiter"] 393 } 394 395 pub(crate) mod fwd { 396 pub(crate) fn oneshot(haystack: &str, needle: &str) -> bool { 397 haystack.contains(needle) 398 } 399 400 pub(crate) fn prebuilt( 401 _needle: &str, 402 ) -> impl Fn(&str) -> bool + 'static { 403 |_| unimplemented!("std does not support prebuilt searches") 404 } 405 406 pub(crate) fn oneshotiter<'a>( 407 haystack: &'a str, 408 needle: &'a str, 409 ) -> impl Iterator<Item = usize> + 'a { 410 haystack.match_indices(needle).map(|(i, _)| i) 411 } 412 413 pub(crate) fn prebuiltiter(_needle: &str) -> super::super::NoIter { 414 super::super::NoIter { imp: "std" } 415 } 416 } 417 418 pub(crate) mod rev { 419 pub(crate) fn oneshot(haystack: &str, needle: &str) -> bool { 420 haystack.contains(needle) 421 } 422 423 pub(crate) fn prebuilt( 424 _needle: &str, 425 ) -> impl Fn(&str) -> bool + 'static { 426 |_| unimplemented!("std does not support prebuilt searches") 427 } 428 429 pub(crate) fn oneshotiter<'a>( 430 haystack: &'a str, 431 needle: &'a str, 432 ) -> impl Iterator<Item = usize> + 'a { 433 haystack.rmatch_indices(needle).map(|(i, _)| i) 434 } 435 436 pub(crate) fn prebuiltiter(_needle: &str) -> super::super::NoIter { 437 super::super::NoIter { imp: "std" } 438 } 439 } 440} 441 442/// Substring search from the twoway crate. 443/// 444/// twoway uses, obviously, Two-Way as an implementation. AIUI, it was taken 445/// from std at some point but heavily modified to support a prefilter via 446/// PCMPESTRI from the SSE 4.2 ISA extension. (And also uses memchr for 447/// single-byte needles.) 448/// 449/// Like std, there is no way to amortize the construction of the searcher, so 450/// we can't implement any of the prebuilt routines. 451pub(crate) mod twoway { 452 pub(crate) fn available(_: &str) -> &'static [&'static str] { 453 &["reverse", "oneshot", "oneshotiter"] 454 } 455 456 pub(crate) mod fwd { 457 pub(crate) fn oneshot(haystack: &str, needle: &str) -> bool { 458 twoway::find_bytes(haystack.as_bytes(), needle.as_bytes()) 459 .is_some() 460 } 461 462 pub(crate) fn prebuilt( 463 _needle: &str, 464 ) -> impl Fn(&str) -> bool + 'static { 465 |_| unimplemented!("twoway does not support prebuilt searches") 466 } 467 468 pub(crate) fn oneshotiter<'a>( 469 haystack: &'a str, 470 needle: &'a str, 471 ) -> impl Iterator<Item = usize> + 'a { 472 super::super::iter_from_find( 473 haystack.as_bytes(), 474 needle.as_bytes(), 475 twoway::find_bytes, 476 ) 477 } 478 479 pub(crate) fn prebuiltiter(_needle: &str) -> super::super::NoIter { 480 super::super::NoIter { imp: "twoway" } 481 } 482 } 483 484 pub(crate) mod rev { 485 pub(crate) fn oneshot(haystack: &str, needle: &str) -> bool { 486 twoway::rfind_bytes(haystack.as_bytes(), needle.as_bytes()) 487 .is_some() 488 } 489 490 pub(crate) fn prebuilt( 491 _needle: &str, 492 ) -> impl Fn(&str) -> bool + 'static { 493 |_| unimplemented!("twoway does not support prebuilt searches") 494 } 495 496 pub(crate) fn oneshotiter<'a>( 497 haystack: &'a str, 498 needle: &'a str, 499 ) -> impl Iterator<Item = usize> + 'a { 500 super::super::iter_from_rfind( 501 haystack.as_bytes(), 502 needle.as_bytes(), 503 twoway::rfind_bytes, 504 ) 505 } 506 507 pub(crate) fn prebuiltiter(_needle: &str) -> super::super::NoIter { 508 super::super::NoIter { imp: "twoway" } 509 } 510 } 511} 512 513/// Substring search from the sliceslice crate. 514/// 515/// This crate is what inspired me to write a vectorized memmem implementation 516/// in the memchr crate in the first place. In particular, it exposed some 517/// serious weaknesses in my implementation in the bstr crate. 518/// 519/// sliceslice doesn't actually do anything "new" other 520/// than bringing a long known SIMD algorithm to Rust: 521/// http://0x80.pl/articles/simd-strfind.html#algorithm-1-generic-simd 522/// 523/// The main thrust of the algorithm is that it picks a couple of bytes in the 524/// needle and uses SIMD to check whether those two bytes occur in the haystack 525/// in a way that could lead to a match. If so, then you do a simple memcmp 526/// confirmation step. The main problem with this algorithm is that its worst 527/// case is multiplicative: that confirmatory step can become quite costly if 528/// the SIMD prefilter isn't effective. The elegance of this method, however, 529/// is that the prefilter is routinely effective. 530/// 531/// The essence of memchr's implementation of memmem comes from sliceslice, 532/// but also from regex's original idea to use heuristics based on an a priori 533/// assumption of relative byte frequency AND from bstr's desire to have a 534/// constant space and worst case O(m+n) substring search. My claim is that 535/// it is the best of all words, and that's why this benchmark suite is so 536/// comprehensive. There are a lot of cases and implementations to test. 537/// 538/// NOTE: The API of sliceslice is quite constrained. My guess is that it was 539/// designed for a very specific use case, and the API is heavily constrained 540/// to that use case (whatever it is). While its API doesn't provide any 541/// oneshot routines, we emulate them. (Its main problem is that every such 542/// search requires copying the needle into a fresh allocation. The memchr 543/// crate avoids that problem by being generic over the needle: it can be owned 544/// or borrowed.) Also, since the API only enables testing whether a substring 545/// exists or not, we can't benchmark iteration. 546/// 547/// NOTE: sliceslice only works on x86_64 CPUs with AVX enabled. So not only 548/// do we conditionally compile the routines below, but we only run these 549/// benchmarks when AVX2 is available. 550#[cfg(target_arch = "x86_64")] 551pub(crate) mod sliceslice { 552 pub(crate) fn available(needle: &str) -> &'static [&'static str] { 553 // Apparently sliceslice doesn't support searching with an empty 554 // needle. Sheesh. 555 if !needle.is_empty() && is_x86_feature_detected!("avx2") { 556 &["oneshot", "prebuilt"] 557 } else { 558 &[] 559 } 560 } 561 562 pub(crate) mod fwd { 563 pub(crate) fn oneshot(haystack: &str, needle: &str) -> bool { 564 if !is_x86_feature_detected!("avx2") { 565 unreachable!("sliceslice cannot be called without avx2"); 566 } 567 let needle = needle.as_bytes(); 568 // SAFETY: This code path is only entered when AVX2 is enabled, 569 // which is the only requirement for using DynamicAvx2Searcher. 570 unsafe { 571 let finder = sliceslice::x86::DynamicAvx2Searcher::new(needle); 572 finder.search_in(haystack.as_bytes()) 573 } 574 } 575 576 pub(crate) fn prebuilt( 577 needle: &str, 578 ) -> impl Fn(&str) -> bool + 'static { 579 if !is_x86_feature_detected!("avx2") { 580 unreachable!("sliceslice cannot be called without avx2"); 581 } 582 let needle = needle.as_bytes().to_vec(); 583 // SAFETY: This code path is only entered when AVX2 is enabled, 584 // which is the only requirement for using DynamicAvx2Searcher. 585 unsafe { 586 let finder = sliceslice::x86::DynamicAvx2Searcher::new(needle); 587 move |h| finder.search_in(h.as_bytes()) 588 } 589 } 590 591 pub(crate) fn oneshotiter( 592 _haystack: &str, 593 _needle: &str, 594 ) -> impl Iterator<Item = usize> + 'static { 595 std::iter::from_fn(move || { 596 unimplemented!("sliceslice doesn't not support iteration") 597 }) 598 } 599 600 pub(crate) fn prebuiltiter(_needle: &str) -> super::super::NoIter { 601 unimplemented!("sliceslice doesn't support prebuilt iteration") 602 } 603 } 604 605 pub(crate) mod rev { 606 pub(crate) fn oneshot(_haystack: &str, _needle: &str) -> bool { 607 unimplemented!("sliceslice does not support reverse searches") 608 } 609 610 pub(crate) fn prebuilt( 611 _needle: &str, 612 ) -> impl Fn(&str) -> bool + 'static { 613 |_| unimplemented!("sliceslice does not support reverse searches") 614 } 615 616 pub(crate) fn oneshotiter( 617 _haystack: &str, 618 _needle: &str, 619 ) -> impl Iterator<Item = usize> + 'static { 620 std::iter::from_fn(move || { 621 unimplemented!("sliceslice does not support reverse searches") 622 }) 623 } 624 625 pub(crate) fn prebuiltiter(_needle: &str) -> super::super::NoIter { 626 unimplemented!("sliceslice does not support reverse searches") 627 } 628 } 629} 630 631#[cfg(not(target_arch = "x86_64"))] 632pub(crate) mod sliceslice { 633 pub(crate) fn available(_: &str) -> &'static [&'static str] { 634 &[] 635 } 636 637 pub(crate) mod fwd { 638 pub(crate) fn oneshot(_: &str, _: &str) -> bool { 639 unimplemented!("sliceslice only runs on x86") 640 } 641 642 pub(crate) fn prebuilt(_: &str) -> impl Fn(&str) -> bool + 'static { 643 if true { 644 unimplemented!("sliceslice only runs on x86") 645 } 646 |_| false 647 } 648 649 pub(crate) fn oneshotiter<'a>( 650 _haystack: &'a str, 651 _needle: &'a str, 652 ) -> impl Iterator<Item = usize> + 'static { 653 std::iter::from_fn(move || { 654 unimplemented!("sliceslice only runs on x86") 655 }) 656 } 657 658 pub(crate) fn prebuiltiter(_needle: &str) -> super::super::NoIter { 659 unimplemented!("sliceslice only runs on x86") 660 } 661 } 662 663 pub(crate) mod rev { 664 pub(crate) fn oneshot(_haystack: &str, _needle: &str) -> bool { 665 unimplemented!("sliceslice does not support reverse searches") 666 } 667 668 pub(crate) fn prebuilt( 669 _needle: &str, 670 ) -> impl Fn(&str) -> bool + 'static { 671 |_| unimplemented!("sliceslice does not support reverse searches") 672 } 673 674 pub(crate) fn oneshotiter( 675 _haystack: &str, 676 _needle: &str, 677 ) -> impl Iterator<Item = usize> + 'static { 678 std::iter::from_fn(move || { 679 unimplemented!("sliceslice does not support reverse searches") 680 }) 681 } 682 683 pub(crate) fn prebuiltiter(_needle: &str) -> super::super::NoIter { 684 unimplemented!("sliceslice does not support reverse searches") 685 } 686 } 687} 688 689/// libc's substring search implementation. 690/// 691/// libc doesn't have any way to amortize the construction of the searcher, so 692/// we can't implement any of the prebuilt routines. 693pub(crate) mod libc { 694 pub(crate) fn available(_: &str) -> &'static [&'static str] { 695 &["oneshot", "oneshotiter"] 696 } 697 698 pub(crate) mod fwd { 699 #[cfg(target_arch = "wasm32")] 700 extern "C" { 701 fn memmem( 702 haystack: *const libc::c_void, 703 haystack_len: usize, 704 needle: *const libc::c_void, 705 needle_len: usize, 706 ) -> *const libc::c_void; 707 } 708 #[cfg(not(target_arch = "wasm32"))] 709 use libc::memmem; 710 711 fn find(haystack: &[u8], needle: &[u8]) -> Option<usize> { 712 let p = unsafe { 713 memmem( 714 haystack.as_ptr() as *const libc::c_void, 715 haystack.len(), 716 needle.as_ptr() as *const libc::c_void, 717 needle.len(), 718 ) 719 }; 720 if p.is_null() { 721 None 722 } else { 723 Some(p as usize - (haystack.as_ptr() as usize)) 724 } 725 } 726 727 pub(crate) fn oneshot(haystack: &str, needle: &str) -> bool { 728 find(haystack.as_bytes(), needle.as_bytes()).is_some() 729 } 730 731 pub(crate) fn prebuilt( 732 _needle: &str, 733 ) -> impl Fn(&str) -> bool + 'static { 734 |_| unimplemented!("std does not support prebuilt searches") 735 } 736 737 pub(crate) fn oneshotiter<'a>( 738 haystack: &'a str, 739 needle: &'a str, 740 ) -> impl Iterator<Item = usize> + 'a { 741 super::super::iter_from_find( 742 haystack.as_bytes(), 743 needle.as_bytes(), 744 find, 745 ) 746 } 747 748 pub(crate) fn prebuiltiter(_needle: &str) -> super::super::NoIter { 749 super::super::NoIter { imp: "libc" } 750 } 751 } 752 753 pub(crate) mod rev { 754 pub(crate) fn oneshot(_haystack: &str, _needle: &str) -> bool { 755 unimplemented!("libc does not support reverse searches") 756 } 757 758 pub(crate) fn prebuilt( 759 _needle: &str, 760 ) -> impl Fn(&str) -> bool + 'static { 761 |_| unimplemented!("libc does not support reverse searches") 762 } 763 764 pub(crate) fn oneshotiter<'a>( 765 _haystack: &'a str, 766 _needle: &'a str, 767 ) -> impl Iterator<Item = usize> + 'a { 768 std::iter::from_fn(move || { 769 unimplemented!("libc does not support reverse searches") 770 }) 771 } 772 773 pub(crate) fn prebuiltiter(_needle: &str) -> super::super::NoIter { 774 unimplemented!("libc does not support reverse searches") 775 } 776 } 777} 778 779/// An iterator that looks like a PrebuilIter API-wise, but panics if it's 780/// called. This should be used for implementations that don't support 781/// prebuilt iteration. 782#[derive(Debug)] 783pub(crate) struct NoIter { 784 /// The name of the impl to use in the panic message in case it is invoked 785 /// by mistake. (But the benchmark harness should not invoke it, assuming 786 /// each impl's 'available' function is correct. 787 imp: &'static str, 788} 789 790impl NoIter { 791 pub(crate) fn iter( 792 &self, 793 _: &str, 794 ) -> impl Iterator<Item = usize> + 'static { 795 let imp = self.imp; 796 std::iter::from_fn(move || { 797 unimplemented!("{} does not support prebuilt iteration", imp) 798 }) 799 } 800} 801 802/// Accepts a corpus and a needle and a routine that implements substring 803/// search, and returns an iterator over all matches. This is useful for 804/// benchmarking "find all matches" for substring search implementations that 805/// don't expose a native way to do this. 806/// 807/// The closure given takes two parameters: the corpus and needle, in that 808/// order. 809fn iter_from_find<'a>( 810 haystack: &'a [u8], 811 needle: &'a [u8], 812 mut find: impl FnMut(&[u8], &[u8]) -> Option<usize> + 'a, 813) -> impl Iterator<Item = usize> + 'a { 814 let mut pos = 0; 815 std::iter::from_fn(move || { 816 if pos > haystack.len() { 817 return None; 818 } 819 match find(&haystack[pos..], needle) { 820 None => None, 821 Some(i) => { 822 let found = pos + i; 823 // We always need to add at least 1, in case of an empty needle. 824 pos += i + std::cmp::max(1, needle.len()); 825 Some(found) 826 } 827 } 828 }) 829} 830 831/// Like iter_from_find, but for reverse searching. 832fn iter_from_rfind<'a>( 833 haystack: &'a [u8], 834 needle: &'a [u8], 835 mut rfind: impl FnMut(&[u8], &[u8]) -> Option<usize> + 'a, 836) -> impl Iterator<Item = usize> + 'a { 837 let mut pos = Some(haystack.len()); 838 std::iter::from_fn(move || { 839 let end = match pos { 840 None => return None, 841 Some(end) => end, 842 }; 843 match rfind(&haystack[..end], needle) { 844 None => None, 845 Some(i) => { 846 if end == i { 847 // We always need to subtract at least 1, in case of an 848 // empty needle. 849 pos = end.checked_sub(1); 850 } else { 851 pos = Some(i); 852 } 853 Some(i) 854 } 855 } 856 }) 857} 858