1/*
2This module defines a common API (by convention) for all of the different
3impls that we benchmark. The intent here is to 1) make it easy to write macros
4for generating benchmark definitions generic over impls and 2) make it easier
5to read the benchmarks themselves and grok how exactly each of the impls are
6being invoked.
7
8The naming scheme of each function follows the pertinent parts of our benchmark
9naming scheme (see parent module docs). Namely, it is
10
11  {impl}/{fwd|rev}/{config}
12
13Where 'impl' is the underlying implementation and 'config' is the manner of
14search. The slash indicates a module boundary. We use modules for this because
15it makes writing macros to define benchmarks for all variants much easier.
16*/
17
18/// memchr's implementation of memmem. This is the implementation that we hope
19/// does approximately as well as all other implementations, and a lot better
20/// in at least some cases.
21pub(crate) mod krate {
22    pub(crate) fn available(_: &str) -> &'static [&'static str] {
23        &["reverse", "oneshot", "prebuilt", "oneshotiter", "prebuiltiter"]
24    }
25
26    pub(crate) mod fwd {
27        use memchr::memmem;
28
29        pub(crate) fn oneshot(haystack: &str, needle: &str) -> bool {
30            memmem::find(haystack.as_bytes(), needle.as_bytes()).is_some()
31        }
32
33        pub(crate) fn prebuilt(
34            needle: &str,
35        ) -> impl Fn(&str) -> bool + 'static {
36            let finder = memmem::Finder::new(needle).into_owned();
37            move |h| finder.find(h.as_bytes()).is_some()
38        }
39
40        pub(crate) fn oneshotiter<'a>(
41            haystack: &'a str,
42            needle: &'a str,
43        ) -> impl Iterator<Item = usize> + 'a {
44            memmem::find_iter(haystack.as_bytes(), needle.as_bytes())
45        }
46
47        pub(crate) fn prebuiltiter(needle: &str) -> PrebuiltIter {
48            PrebuiltIter(memmem::Finder::new(needle).into_owned())
49        }
50
51        #[derive(Debug)]
52        pub(crate) struct PrebuiltIter(memmem::Finder<'static>);
53
54        impl PrebuiltIter {
55            pub(crate) fn iter<'a>(
56                &'a self,
57                haystack: &'a str,
58            ) -> impl Iterator<Item = usize> + 'a {
59                self.0.find_iter(haystack.as_bytes())
60            }
61        }
62    }
63
64    pub(crate) mod rev {
65        use memchr::memmem;
66
67        pub(crate) fn oneshot(haystack: &str, needle: &str) -> bool {
68            memmem::rfind(haystack.as_bytes(), needle.as_bytes()).is_some()
69        }
70
71        pub(crate) fn prebuilt(
72            needle: &str,
73        ) -> impl Fn(&str) -> bool + 'static {
74            let finder = memmem::FinderRev::new(needle).into_owned();
75            move |h| finder.rfind(h.as_bytes()).is_some()
76        }
77
78        pub(crate) fn oneshotiter<'a>(
79            haystack: &'a str,
80            needle: &'a str,
81        ) -> impl Iterator<Item = usize> + 'a {
82            memmem::rfind_iter(haystack.as_bytes(), needle.as_bytes())
83        }
84
85        pub(crate) fn prebuiltiter(needle: &str) -> PrebuiltIter {
86            PrebuiltIter(memmem::FinderRev::new(needle).into_owned())
87        }
88
89        #[derive(Debug)]
90        pub(crate) struct PrebuiltIter(memmem::FinderRev<'static>);
91
92        impl PrebuiltIter {
93            pub(crate) fn iter<'a>(
94                &'a self,
95                haystack: &'a str,
96            ) -> impl Iterator<Item = usize> + 'a {
97                self.0.rfind_iter(haystack.as_bytes())
98            }
99        }
100    }
101}
102
103/// memchr's implementation of memmem, but without prefilters enabled. This
104/// exists because sometimes prefilters aren't the right choice, and it's good
105/// to be able to compare it against prefilter-accelerated searches to see
106/// where this might be faster.
107pub(crate) mod krate_nopre {
108    pub(crate) fn available(_: &str) -> &'static [&'static str] {
109        &["reverse", "oneshot", "prebuilt", "oneshotiter", "prebuiltiter"]
110    }
111
112    pub(crate) mod fwd {
113        use memchr::memmem;
114
115        fn finder(needle: &[u8]) -> memmem::Finder<'_> {
116            memmem::FinderBuilder::new()
117                .prefilter(memmem::Prefilter::None)
118                .build_forward(needle)
119        }
120
121        pub(crate) fn oneshot(haystack: &str, needle: &str) -> bool {
122            finder(needle.as_bytes()).find(haystack.as_bytes()).is_some()
123        }
124
125        pub(crate) fn prebuilt(
126            needle: &str,
127        ) -> impl Fn(&str) -> bool + 'static {
128            let finder = finder(needle.as_bytes()).into_owned();
129            move |h| finder.find(h.as_bytes()).is_some()
130        }
131
132        pub(crate) fn oneshotiter<'a>(
133            haystack: &'a str,
134            needle: &'a str,
135        ) -> impl Iterator<Item = usize> + 'a {
136            super::super::iter_from_find(
137                haystack.as_bytes(),
138                needle.as_bytes(),
139                |h, n| finder(n).find(h),
140            )
141        }
142
143        pub(crate) fn prebuiltiter(needle: &str) -> PrebuiltIter {
144            PrebuiltIter(finder(needle.as_bytes()).into_owned())
145        }
146
147        #[derive(Debug)]
148        pub(crate) struct PrebuiltIter(memmem::Finder<'static>);
149
150        impl PrebuiltIter {
151            pub(crate) fn iter<'a>(
152                &'a self,
153                haystack: &'a str,
154            ) -> impl Iterator<Item = usize> + 'a {
155                self.0.find_iter(haystack.as_bytes())
156            }
157        }
158    }
159
160    // N.B. memrmem/krate_nopre and memrmem/krate should be equivalent for now
161    // since reverse searching doesn't have any prefilter support.
162    pub(crate) mod rev {
163        use memchr::memmem;
164
165        fn finder(needle: &[u8]) -> memmem::FinderRev<'_> {
166            memmem::FinderBuilder::new()
167                .prefilter(memmem::Prefilter::None)
168                .build_reverse(needle)
169        }
170
171        pub(crate) fn oneshot(haystack: &str, needle: &str) -> bool {
172            finder(needle.as_bytes()).rfind(haystack.as_bytes()).is_some()
173        }
174
175        pub(crate) fn prebuilt(
176            needle: &str,
177        ) -> impl Fn(&str) -> bool + 'static {
178            let finder = finder(needle.as_bytes()).into_owned();
179            move |h| finder.rfind(h.as_bytes()).is_some()
180        }
181
182        pub(crate) fn oneshotiter<'a>(
183            haystack: &'a str,
184            needle: &'a str,
185        ) -> impl Iterator<Item = usize> + 'a {
186            super::super::iter_from_rfind(
187                haystack.as_bytes(),
188                needle.as_bytes(),
189                |h, n| finder(n).rfind(h),
190            )
191        }
192
193        pub(crate) fn prebuiltiter(needle: &str) -> PrebuiltIter {
194            PrebuiltIter(finder(needle.as_bytes()).into_owned())
195        }
196
197        #[derive(Debug)]
198        pub(crate) struct PrebuiltIter(memmem::FinderRev<'static>);
199
200        impl PrebuiltIter {
201            pub(crate) fn iter<'a>(
202                &'a self,
203                haystack: &'a str,
204            ) -> impl Iterator<Item = usize> + 'a {
205                self.0.rfind_iter(haystack.as_bytes())
206            }
207        }
208    }
209}
210
211/// bstr's implementation of memmem.
212///
213/// The implementation in this crate was originally copied from bstr.
214/// Eventually, bstr will just use the implementation in this crate, but at time
215/// of writing, it was useful to benchmark against the "original" version.
216pub(crate) mod bstr {
217    pub(crate) fn available(_: &str) -> &'static [&'static str] {
218        &["reverse", "oneshot", "prebuilt", "oneshotiter", "prebuiltiter"]
219    }
220
221    pub(crate) mod fwd {
222        pub(crate) fn oneshot(haystack: &str, needle: &str) -> bool {
223            bstr::ByteSlice::find(haystack.as_bytes(), needle.as_bytes())
224                .is_some()
225        }
226
227        pub(crate) fn prebuilt(
228            needle: &str,
229        ) -> impl Fn(&str) -> bool + 'static {
230            let finder = bstr::Finder::new(needle).into_owned();
231            move |h| finder.find(h.as_bytes()).is_some()
232        }
233
234        pub(crate) fn oneshotiter<'a>(
235            haystack: &'a str,
236            needle: &'a str,
237        ) -> impl Iterator<Item = usize> + 'a {
238            bstr::ByteSlice::find_iter(haystack.as_bytes(), needle.as_bytes())
239        }
240
241        pub(crate) fn prebuiltiter(needle: &str) -> PrebuiltIter {
242            PrebuiltIter(bstr::Finder::new(needle).into_owned())
243        }
244
245        #[derive(Debug)]
246        pub(crate) struct PrebuiltIter(bstr::Finder<'static>);
247
248        impl PrebuiltIter {
249            pub(crate) fn iter<'a>(
250                &'a self,
251                haystack: &'a str,
252            ) -> impl Iterator<Item = usize> + 'a {
253                super::super::iter_from_find(
254                    haystack.as_bytes(),
255                    self.0.needle(),
256                    move |h, _| self.0.find(h),
257                )
258            }
259        }
260    }
261
262    pub(crate) mod rev {
263        pub(crate) fn oneshot(haystack: &str, needle: &str) -> bool {
264            bstr::ByteSlice::rfind(haystack.as_bytes(), needle.as_bytes())
265                .is_some()
266        }
267
268        pub(crate) fn prebuilt(
269            needle: &str,
270        ) -> impl Fn(&str) -> bool + 'static {
271            let finder = bstr::FinderReverse::new(needle).into_owned();
272            move |h| finder.rfind(h.as_bytes()).is_some()
273        }
274
275        pub(crate) fn oneshotiter<'a>(
276            haystack: &'a str,
277            needle: &'a str,
278        ) -> impl Iterator<Item = usize> + 'a {
279            bstr::ByteSlice::rfind_iter(haystack.as_bytes(), needle.as_bytes())
280        }
281
282        pub(crate) fn prebuiltiter(needle: &str) -> PrebuiltIter {
283            PrebuiltIter(bstr::FinderReverse::new(needle).into_owned())
284        }
285
286        #[derive(Debug)]
287        pub(crate) struct PrebuiltIter(bstr::FinderReverse<'static>);
288
289        impl PrebuiltIter {
290            pub(crate) fn iter<'a>(
291                &'a self,
292                haystack: &'a str,
293            ) -> impl Iterator<Item = usize> + 'a {
294                super::super::iter_from_rfind(
295                    haystack.as_bytes(),
296                    self.0.needle(),
297                    move |h, _| self.0.rfind(h),
298                )
299            }
300        }
301    }
302}
303
304/// regex's implementation of substring search.
305///
306/// regex is where the concept of using heuristics based on an a priori
307/// assumption of byte frequency originated. Eventually, regex will just use the
308/// implementation in this crate, but it will still be useful to benchmark since
309/// regex tends to have higher latency. It would be good to measure that.
310///
311/// For regex, we don't provide oneshots, since that requires compiling the
312/// regex which we know is going to be ridiculously slow. No real need to
313/// measure it I think.
314pub(crate) mod regex {
315    pub(crate) fn available(_: &str) -> &'static [&'static str] {
316        &["prebuilt", "prebuiltiter"]
317    }
318
319    pub(crate) mod fwd {
320        pub(crate) fn oneshot(_haystack: &str, _needle: &str) -> bool {
321            unimplemented!("regex does not support oneshot searches")
322        }
323
324        pub(crate) fn prebuilt(
325            needle: &str,
326        ) -> impl Fn(&str) -> bool + 'static {
327            let finder = regex::Regex::new(&regex::escape(needle)).unwrap();
328            move |h| finder.is_match(h)
329        }
330
331        pub(crate) fn oneshotiter(
332            _haystack: &str,
333            _needle: &str,
334        ) -> impl Iterator<Item = usize> + 'static {
335            std::iter::from_fn(move || {
336                unimplemented!("regex does not support oneshot searches")
337            })
338        }
339
340        pub(crate) fn prebuiltiter(needle: &str) -> PrebuiltIter {
341            PrebuiltIter(regex::Regex::new(&regex::escape(needle)).unwrap())
342        }
343
344        #[derive(Debug)]
345        pub(crate) struct PrebuiltIter(regex::Regex);
346
347        impl PrebuiltIter {
348            pub(crate) fn iter<'a>(
349                &'a self,
350                haystack: &'a str,
351            ) -> impl Iterator<Item = usize> + 'a {
352                self.0.find_iter(haystack).map(|m| m.start())
353            }
354        }
355    }
356
357    pub(crate) mod rev {
358        pub(crate) fn oneshot(_haystack: &str, _needle: &str) -> bool {
359            unimplemented!("regex does not support reverse searches")
360        }
361
362        pub(crate) fn prebuilt(
363            _needle: &str,
364        ) -> impl Fn(&str) -> bool + 'static {
365            |_| unimplemented!("regex does not support reverse searches")
366        }
367
368        pub(crate) fn oneshotiter(
369            _haystack: &str,
370            _needle: &str,
371        ) -> impl Iterator<Item = usize> + 'static {
372            std::iter::from_fn(move || {
373                unimplemented!("regex does not support reverse searches")
374            })
375        }
376
377        pub(crate) fn prebuiltiter(_needle: &str) -> super::super::NoIter {
378            unimplemented!("regex does not support reverse searches")
379        }
380    }
381}
382
383/// std's substring search implementation.
384///
385/// std uses Two-Way like this crate, but doesn't have any prefilter
386/// heuristics.
387///
388/// std doesn't have any way to amortize the construction of the searcher, so
389/// we can't implement any of the prebuilt routines.
390pub(crate) mod stud {
391    pub(crate) fn available(_: &str) -> &'static [&'static str] {
392        &["reverse", "oneshot", "oneshotiter"]
393    }
394
395    pub(crate) mod fwd {
396        pub(crate) fn oneshot(haystack: &str, needle: &str) -> bool {
397            haystack.contains(needle)
398        }
399
400        pub(crate) fn prebuilt(
401            _needle: &str,
402        ) -> impl Fn(&str) -> bool + 'static {
403            |_| unimplemented!("std does not support prebuilt searches")
404        }
405
406        pub(crate) fn oneshotiter<'a>(
407            haystack: &'a str,
408            needle: &'a str,
409        ) -> impl Iterator<Item = usize> + 'a {
410            haystack.match_indices(needle).map(|(i, _)| i)
411        }
412
413        pub(crate) fn prebuiltiter(_needle: &str) -> super::super::NoIter {
414            super::super::NoIter { imp: "std" }
415        }
416    }
417
418    pub(crate) mod rev {
419        pub(crate) fn oneshot(haystack: &str, needle: &str) -> bool {
420            haystack.contains(needle)
421        }
422
423        pub(crate) fn prebuilt(
424            _needle: &str,
425        ) -> impl Fn(&str) -> bool + 'static {
426            |_| unimplemented!("std does not support prebuilt searches")
427        }
428
429        pub(crate) fn oneshotiter<'a>(
430            haystack: &'a str,
431            needle: &'a str,
432        ) -> impl Iterator<Item = usize> + 'a {
433            haystack.rmatch_indices(needle).map(|(i, _)| i)
434        }
435
436        pub(crate) fn prebuiltiter(_needle: &str) -> super::super::NoIter {
437            super::super::NoIter { imp: "std" }
438        }
439    }
440}
441
442/// Substring search from the twoway crate.
443///
444/// twoway uses, obviously, Two-Way as an implementation. AIUI, it was taken
445/// from std at some point but heavily modified to support a prefilter via
446/// PCMPESTRI from the SSE 4.2 ISA extension. (And also uses memchr for
447/// single-byte needles.)
448///
449/// Like std, there is no way to amortize the construction of the searcher, so
450/// we can't implement any of the prebuilt routines.
451pub(crate) mod twoway {
452    pub(crate) fn available(_: &str) -> &'static [&'static str] {
453        &["reverse", "oneshot", "oneshotiter"]
454    }
455
456    pub(crate) mod fwd {
457        pub(crate) fn oneshot(haystack: &str, needle: &str) -> bool {
458            twoway::find_bytes(haystack.as_bytes(), needle.as_bytes())
459                .is_some()
460        }
461
462        pub(crate) fn prebuilt(
463            _needle: &str,
464        ) -> impl Fn(&str) -> bool + 'static {
465            |_| unimplemented!("twoway does not support prebuilt searches")
466        }
467
468        pub(crate) fn oneshotiter<'a>(
469            haystack: &'a str,
470            needle: &'a str,
471        ) -> impl Iterator<Item = usize> + 'a {
472            super::super::iter_from_find(
473                haystack.as_bytes(),
474                needle.as_bytes(),
475                twoway::find_bytes,
476            )
477        }
478
479        pub(crate) fn prebuiltiter(_needle: &str) -> super::super::NoIter {
480            super::super::NoIter { imp: "twoway" }
481        }
482    }
483
484    pub(crate) mod rev {
485        pub(crate) fn oneshot(haystack: &str, needle: &str) -> bool {
486            twoway::rfind_bytes(haystack.as_bytes(), needle.as_bytes())
487                .is_some()
488        }
489
490        pub(crate) fn prebuilt(
491            _needle: &str,
492        ) -> impl Fn(&str) -> bool + 'static {
493            |_| unimplemented!("twoway does not support prebuilt searches")
494        }
495
496        pub(crate) fn oneshotiter<'a>(
497            haystack: &'a str,
498            needle: &'a str,
499        ) -> impl Iterator<Item = usize> + 'a {
500            super::super::iter_from_rfind(
501                haystack.as_bytes(),
502                needle.as_bytes(),
503                twoway::rfind_bytes,
504            )
505        }
506
507        pub(crate) fn prebuiltiter(_needle: &str) -> super::super::NoIter {
508            super::super::NoIter { imp: "twoway" }
509        }
510    }
511}
512
513/// Substring search from the sliceslice crate.
514///
515/// This crate is what inspired me to write a vectorized memmem implementation
516/// in the memchr crate in the first place. In particular, it exposed some
517/// serious weaknesses in my implementation in the bstr crate.
518///
519/// sliceslice doesn't actually do anything "new" other
520/// than bringing a long known SIMD algorithm to Rust:
521/// http://0x80.pl/articles/simd-strfind.html#algorithm-1-generic-simd
522///
523/// The main thrust of the algorithm is that it picks a couple of bytes in the
524/// needle and uses SIMD to check whether those two bytes occur in the haystack
525/// in a way that could lead to a match. If so, then you do a simple memcmp
526/// confirmation step. The main problem with this algorithm is that its worst
527/// case is multiplicative: that confirmatory step can become quite costly if
528/// the SIMD prefilter isn't effective. The elegance of this method, however,
529/// is that the prefilter is routinely effective.
530///
531/// The essence of memchr's implementation of memmem comes from sliceslice,
532/// but also from regex's original idea to use heuristics based on an a priori
533/// assumption of relative byte frequency AND from bstr's desire to have a
534/// constant space and worst case O(m+n) substring search. My claim is that
535/// it is the best of all words, and that's why this benchmark suite is so
536/// comprehensive. There are a lot of cases and implementations to test.
537///
538/// NOTE: The API of sliceslice is quite constrained. My guess is that it was
539/// designed for a very specific use case, and the API is heavily constrained
540/// to that use case (whatever it is). While its API doesn't provide any
541/// oneshot routines, we emulate them. (Its main problem is that every such
542/// search requires copying the needle into a fresh allocation. The memchr
543/// crate avoids that problem by being generic over the needle: it can be owned
544/// or borrowed.) Also, since the API only enables testing whether a substring
545/// exists or not, we can't benchmark iteration.
546///
547/// NOTE: sliceslice only works on x86_64 CPUs with AVX enabled. So not only
548/// do we conditionally compile the routines below, but we only run these
549/// benchmarks when AVX2 is available.
550#[cfg(target_arch = "x86_64")]
551pub(crate) mod sliceslice {
552    pub(crate) fn available(needle: &str) -> &'static [&'static str] {
553        // Apparently sliceslice doesn't support searching with an empty
554        // needle. Sheesh.
555        if !needle.is_empty() && is_x86_feature_detected!("avx2") {
556            &["oneshot", "prebuilt"]
557        } else {
558            &[]
559        }
560    }
561
562    pub(crate) mod fwd {
563        pub(crate) fn oneshot(haystack: &str, needle: &str) -> bool {
564            if !is_x86_feature_detected!("avx2") {
565                unreachable!("sliceslice cannot be called without avx2");
566            }
567            let needle = needle.as_bytes();
568            // SAFETY: This code path is only entered when AVX2 is enabled,
569            // which is the only requirement for using DynamicAvx2Searcher.
570            unsafe {
571                let finder = sliceslice::x86::DynamicAvx2Searcher::new(needle);
572                finder.search_in(haystack.as_bytes())
573            }
574        }
575
576        pub(crate) fn prebuilt(
577            needle: &str,
578        ) -> impl Fn(&str) -> bool + 'static {
579            if !is_x86_feature_detected!("avx2") {
580                unreachable!("sliceslice cannot be called without avx2");
581            }
582            let needle = needle.as_bytes().to_vec();
583            // SAFETY: This code path is only entered when AVX2 is enabled,
584            // which is the only requirement for using DynamicAvx2Searcher.
585            unsafe {
586                let finder = sliceslice::x86::DynamicAvx2Searcher::new(needle);
587                move |h| finder.search_in(h.as_bytes())
588            }
589        }
590
591        pub(crate) fn oneshotiter(
592            _haystack: &str,
593            _needle: &str,
594        ) -> impl Iterator<Item = usize> + 'static {
595            std::iter::from_fn(move || {
596                unimplemented!("sliceslice doesn't not support iteration")
597            })
598        }
599
600        pub(crate) fn prebuiltiter(_needle: &str) -> super::super::NoIter {
601            unimplemented!("sliceslice doesn't support prebuilt iteration")
602        }
603    }
604
605    pub(crate) mod rev {
606        pub(crate) fn oneshot(_haystack: &str, _needle: &str) -> bool {
607            unimplemented!("sliceslice does not support reverse searches")
608        }
609
610        pub(crate) fn prebuilt(
611            _needle: &str,
612        ) -> impl Fn(&str) -> bool + 'static {
613            |_| unimplemented!("sliceslice does not support reverse searches")
614        }
615
616        pub(crate) fn oneshotiter(
617            _haystack: &str,
618            _needle: &str,
619        ) -> impl Iterator<Item = usize> + 'static {
620            std::iter::from_fn(move || {
621                unimplemented!("sliceslice does not support reverse searches")
622            })
623        }
624
625        pub(crate) fn prebuiltiter(_needle: &str) -> super::super::NoIter {
626            unimplemented!("sliceslice does not support reverse searches")
627        }
628    }
629}
630
631#[cfg(not(target_arch = "x86_64"))]
632pub(crate) mod sliceslice {
633    pub(crate) fn available(_: &str) -> &'static [&'static str] {
634        &[]
635    }
636
637    pub(crate) mod fwd {
638        pub(crate) fn oneshot(_: &str, _: &str) -> bool {
639            unimplemented!("sliceslice only runs on x86")
640        }
641
642        pub(crate) fn prebuilt(_: &str) -> impl Fn(&str) -> bool + 'static {
643            if true {
644                unimplemented!("sliceslice only runs on x86")
645            }
646            |_| false
647        }
648
649        pub(crate) fn oneshotiter<'a>(
650            _haystack: &'a str,
651            _needle: &'a str,
652        ) -> impl Iterator<Item = usize> + 'static {
653            std::iter::from_fn(move || {
654                unimplemented!("sliceslice only runs on x86")
655            })
656        }
657
658        pub(crate) fn prebuiltiter(_needle: &str) -> super::super::NoIter {
659            unimplemented!("sliceslice only runs on x86")
660        }
661    }
662
663    pub(crate) mod rev {
664        pub(crate) fn oneshot(_haystack: &str, _needle: &str) -> bool {
665            unimplemented!("sliceslice does not support reverse searches")
666        }
667
668        pub(crate) fn prebuilt(
669            _needle: &str,
670        ) -> impl Fn(&str) -> bool + 'static {
671            |_| unimplemented!("sliceslice does not support reverse searches")
672        }
673
674        pub(crate) fn oneshotiter(
675            _haystack: &str,
676            _needle: &str,
677        ) -> impl Iterator<Item = usize> + 'static {
678            std::iter::from_fn(move || {
679                unimplemented!("sliceslice does not support reverse searches")
680            })
681        }
682
683        pub(crate) fn prebuiltiter(_needle: &str) -> super::super::NoIter {
684            unimplemented!("sliceslice does not support reverse searches")
685        }
686    }
687}
688
689/// libc's substring search implementation.
690///
691/// libc doesn't have any way to amortize the construction of the searcher, so
692/// we can't implement any of the prebuilt routines.
693pub(crate) mod libc {
694    pub(crate) fn available(_: &str) -> &'static [&'static str] {
695        &["oneshot", "oneshotiter"]
696    }
697
698    pub(crate) mod fwd {
699        #[cfg(target_arch = "wasm32")]
700        extern "C" {
701            fn memmem(
702                haystack: *const libc::c_void,
703                haystack_len: usize,
704                needle: *const libc::c_void,
705                needle_len: usize,
706            ) -> *const libc::c_void;
707        }
708        #[cfg(not(target_arch = "wasm32"))]
709        use libc::memmem;
710
711        fn find(haystack: &[u8], needle: &[u8]) -> Option<usize> {
712            let p = unsafe {
713                memmem(
714                    haystack.as_ptr() as *const libc::c_void,
715                    haystack.len(),
716                    needle.as_ptr() as *const libc::c_void,
717                    needle.len(),
718                )
719            };
720            if p.is_null() {
721                None
722            } else {
723                Some(p as usize - (haystack.as_ptr() as usize))
724            }
725        }
726
727        pub(crate) fn oneshot(haystack: &str, needle: &str) -> bool {
728            find(haystack.as_bytes(), needle.as_bytes()).is_some()
729        }
730
731        pub(crate) fn prebuilt(
732            _needle: &str,
733        ) -> impl Fn(&str) -> bool + 'static {
734            |_| unimplemented!("std does not support prebuilt searches")
735        }
736
737        pub(crate) fn oneshotiter<'a>(
738            haystack: &'a str,
739            needle: &'a str,
740        ) -> impl Iterator<Item = usize> + 'a {
741            super::super::iter_from_find(
742                haystack.as_bytes(),
743                needle.as_bytes(),
744                find,
745            )
746        }
747
748        pub(crate) fn prebuiltiter(_needle: &str) -> super::super::NoIter {
749            super::super::NoIter { imp: "libc" }
750        }
751    }
752
753    pub(crate) mod rev {
754        pub(crate) fn oneshot(_haystack: &str, _needle: &str) -> bool {
755            unimplemented!("libc does not support reverse searches")
756        }
757
758        pub(crate) fn prebuilt(
759            _needle: &str,
760        ) -> impl Fn(&str) -> bool + 'static {
761            |_| unimplemented!("libc does not support reverse searches")
762        }
763
764        pub(crate) fn oneshotiter<'a>(
765            _haystack: &'a str,
766            _needle: &'a str,
767        ) -> impl Iterator<Item = usize> + 'a {
768            std::iter::from_fn(move || {
769                unimplemented!("libc does not support reverse searches")
770            })
771        }
772
773        pub(crate) fn prebuiltiter(_needle: &str) -> super::super::NoIter {
774            unimplemented!("libc does not support reverse searches")
775        }
776    }
777}
778
779/// An iterator that looks like a PrebuilIter API-wise, but panics if it's
780/// called. This should be used for implementations that don't support
781/// prebuilt iteration.
782#[derive(Debug)]
783pub(crate) struct NoIter {
784    /// The name of the impl to use in the panic message in case it is invoked
785    /// by mistake. (But the benchmark harness should not invoke it, assuming
786    /// each impl's 'available' function is correct.
787    imp: &'static str,
788}
789
790impl NoIter {
791    pub(crate) fn iter(
792        &self,
793        _: &str,
794    ) -> impl Iterator<Item = usize> + 'static {
795        let imp = self.imp;
796        std::iter::from_fn(move || {
797            unimplemented!("{} does not support prebuilt iteration", imp)
798        })
799    }
800}
801
802/// Accepts a corpus and a needle and a routine that implements substring
803/// search, and returns an iterator over all matches. This is useful for
804/// benchmarking "find all matches" for substring search implementations that
805/// don't expose a native way to do this.
806///
807/// The closure given takes two parameters: the corpus and needle, in that
808/// order.
809fn iter_from_find<'a>(
810    haystack: &'a [u8],
811    needle: &'a [u8],
812    mut find: impl FnMut(&[u8], &[u8]) -> Option<usize> + 'a,
813) -> impl Iterator<Item = usize> + 'a {
814    let mut pos = 0;
815    std::iter::from_fn(move || {
816        if pos > haystack.len() {
817            return None;
818        }
819        match find(&haystack[pos..], needle) {
820            None => None,
821            Some(i) => {
822                let found = pos + i;
823                // We always need to add at least 1, in case of an empty needle.
824                pos += i + std::cmp::max(1, needle.len());
825                Some(found)
826            }
827        }
828    })
829}
830
831/// Like iter_from_find, but for reverse searching.
832fn iter_from_rfind<'a>(
833    haystack: &'a [u8],
834    needle: &'a [u8],
835    mut rfind: impl FnMut(&[u8], &[u8]) -> Option<usize> + 'a,
836) -> impl Iterator<Item = usize> + 'a {
837    let mut pos = Some(haystack.len());
838    std::iter::from_fn(move || {
839        let end = match pos {
840            None => return None,
841            Some(end) => end,
842        };
843        match rfind(&haystack[..end], needle) {
844            None => None,
845            Some(i) => {
846                if end == i {
847                    // We always need to subtract at least 1, in case of an
848                    // empty needle.
849                    pos = end.checked_sub(1);
850                } else {
851                    pos = Some(i);
852                }
853                Some(i)
854            }
855        }
856    })
857}
858