1/*
2This module defines benchmarks for the memmem family of functions.
3Benchmarking a substring algorithm is particularly difficult, especially
4when implementations (like this one, and others) use heuristics to speed up
5common cases, typically at the expense of less common cases. The job of this
6benchmark suite is to not only highlight the fast common cases, but to also put
7a spotlight on the less common or pathological cases. While some things are
8generally expected to be slower because of these heuristics, the benchmarks
9help us make sure they we don't let things get too slow.
10
11The naming scheme is as follows:
12
13  memr?mem/{impl}/{config}/{corpus}/{needle}
14
15Where {...} is a variable. Variables should never contain slashes. They are as
16follows:
17
18  impl
19    A brief name describing the implementation under test. Possible values:
20
21    krate
22      The implementation provided by this crate.
23    krate-nopre
24      The implementation provided by this crate without prefilters enabled.
25    bstr
26      The implementation provided by the bstr crate.
27      N.B. This is only applicable at time of writing, since bstr will
28      eventually just use this crate.
29    regex
30      The implementation of substring search provided by the regex crate.
31      N.B. This is only applicable at time of writing, since regex will
32      eventually just use this crate.
33    stud
34      The implementation of substring search provided by the standard
35      library. This implementation only works on valid UTF-8 by virtue of
36      how its API is exposed.
37    twoway
38      The implementation of substring search provided by the twoway crate.
39    sliceslice
40      The implementation of substring search provided by the sliceslice crate.
41    libc
42      The implementation of memmem in your friendly neighborhood libc.
43
44    Note that there is also a 'memmem' crate, but it is unmaintained and
45    appears to just be a snapshot of std's implementation at a particular
46    point in time (but exposed in a way to permit it to search arbitrary
47    bytes).
48
49  config
50    This should be a brief description of the configuration of the search. Not
51    all implementations can be benchmarked in all configurations. It depends on
52    the API they expose. Possible values:
53
54    oneshot
55      Executes a single search without pre-building a searcher. That
56      this measurement includes the time it takes to initialize a
57      searcher.
58    prebuilt
59      Executes a single search without measuring the time it takes to
60      build a searcher.
61    iter-oneshot
62      Counts the total number of matches. This measures the time it takes to
63      build the searcher.
64    iter-prebuilt
65      Counts the total number of matches. This does not measure the time it
66      takes to build the searcher.
67
68  corpus
69    A brief name describing the corpus or haystack used in the benchmark. In
70    general, we vary this with regard to size and language. Possible values:
71
72    subtitles-{en,ru,zh}
73      Text from the OpenSubtitles project, in one of English, Russian or
74      Chinese. This is the primary input meant to represent most kinds of
75      haystacks.
76    pathological-{...}
77      A haystack that has been specifically constructed to exploit a
78      pathological case in or more substring search implementations.
79    sliceslice-words
80      The haystack is varied across words in an English dictionary. Using
81      this corpus means the benchmark is measuring performance on very small
82      haystacks. This was taken from the sliceslice crate benchmarks.
83    sliceslice-i386
84      The haystack is an Intel 80386 reference manual.
85      This was also taken from the sliceslice crate benchmarks.
86
87  needle
88    A brief name describing the needle used. Unlike other variables, there
89    isn't a strong controlled vocabularly for this parameter. The needle
90    variable is meant to be largely self explanatory. For example, a needle
91    named "rare" probably means that the number of occurrences of the needle
92    is expected to be particularly low.
93*/
94
95use criterion::Criterion;
96
97use crate::{define, memmem::inputs::INPUTS};
98
99mod imp;
100mod inputs;
101mod sliceslice;
102
103pub fn all(c: &mut Criterion) {
104    oneshot(c);
105    prebuilt(c);
106    oneshot_iter(c);
107    prebuilt_iter(c);
108    sliceslice::all(c);
109}
110
111fn oneshot(c: &mut Criterion) {
112    macro_rules! def_impl {
113        ($inp:expr, $q:expr, $freq:expr, $impl:ident) => {
114            let config = "oneshot";
115            let available = imp::$impl::available($q.needle);
116            // We only define non-iter benchmarks when the count is <=1. Such
117            // queries are usually constructed to only appear at the end.
118            // Otherwise, for more common queries, the benchmark would be
119            // approximately duplicative with benchmarks on shorter haystacks
120            // for the implementations we benchmark.
121            if $q.count <= 1 && available.contains(&config) {
122                let expected = $q.count > 0;
123                macro_rules! define {
124                    ($dir:expr, $find:expr) => {
125                        let name = format!(
126                            "{dir}/{imp}/{config}/{inp}/{freq}-{q}",
127                            dir = $dir,
128                            imp = stringify!($impl),
129                            config = config,
130                            inp = $inp.name,
131                            freq = $freq,
132                            q = $q.name,
133                        );
134                        define(
135                            c,
136                            &name,
137                            $inp.corpus.as_bytes(),
138                            Box::new(move |b| {
139                                b.iter(|| {
140                                    assert_eq!(
141                                        expected,
142                                        $find($inp.corpus, $q.needle)
143                                    );
144                                });
145                            }),
146                        );
147                    };
148                }
149                define!("memmem", imp::$impl::fwd::oneshot);
150                if available.contains(&"reverse") {
151                    define!("memrmem", imp::$impl::rev::oneshot);
152                }
153            }
154        };
155    }
156    macro_rules! def_all_impls {
157        ($inp:expr, $q:expr, $freq:expr) => {
158            def_impl!($inp, $q, $freq, krate);
159            def_impl!($inp, $q, $freq, krate_nopre);
160            def_impl!($inp, $q, $freq, bstr);
161            def_impl!($inp, $q, $freq, regex);
162            def_impl!($inp, $q, $freq, stud);
163            def_impl!($inp, $q, $freq, twoway);
164            def_impl!($inp, $q, $freq, sliceslice);
165            def_impl!($inp, $q, $freq, libc);
166        };
167    }
168    for inp in INPUTS {
169        for q in inp.never {
170            def_all_impls!(inp, q, "never");
171        }
172        for q in inp.rare {
173            def_all_impls!(inp, q, "rare");
174        }
175        for q in inp.common {
176            def_all_impls!(inp, q, "common");
177        }
178    }
179}
180
181fn prebuilt(c: &mut Criterion) {
182    macro_rules! def_impl {
183        ($inp:expr, $q:expr, $freq:expr, $impl:ident) => {
184            let config = "prebuilt";
185            let available = imp::$impl::available($q.needle);
186            // We only define non-iter benchmarks when the count is <=1. Such
187            // queries are usually constructed to only appear at the end.
188            // Otherwise, for more common queries, the benchmark would be
189            // approximately duplicative with benchmarks on shorter haystacks
190            // for the implementations we benchmark.
191            if $q.count <= 1 && available.contains(&config) {
192                let expected = $q.count > 0;
193                macro_rules! define {
194                    ($dir:expr, $new_finder:expr) => {
195                        let name = format!(
196                            "{dir}/{imp}/{config}/{inp}/{freq}-{q}",
197                            dir = $dir,
198                            imp = stringify!($impl),
199                            config = config,
200                            inp = $inp.name,
201                            freq = $freq,
202                            q = $q.name,
203                        );
204                        define(
205                            c,
206                            &name,
207                            $inp.corpus.as_bytes(),
208                            Box::new(move |b| {
209                                let find = $new_finder($q.needle);
210                                b.iter(|| {
211                                    assert_eq!(expected, find($inp.corpus));
212                                });
213                            }),
214                        );
215                    };
216                }
217                define!("memmem", imp::$impl::fwd::prebuilt);
218                if available.contains(&"reverse") {
219                    define!("memrmem", imp::$impl::rev::prebuilt);
220                }
221            }
222        };
223    }
224    macro_rules! def_all_impls {
225        ($inp:expr, $q:expr, $freq:expr) => {
226            def_impl!($inp, $q, $freq, krate);
227            def_impl!($inp, $q, $freq, krate_nopre);
228            def_impl!($inp, $q, $freq, bstr);
229            def_impl!($inp, $q, $freq, regex);
230            def_impl!($inp, $q, $freq, stud);
231            def_impl!($inp, $q, $freq, twoway);
232            def_impl!($inp, $q, $freq, sliceslice);
233            def_impl!($inp, $q, $freq, libc);
234        };
235    }
236    for inp in INPUTS {
237        for q in inp.never {
238            def_all_impls!(inp, q, "never");
239        }
240        for q in inp.rare {
241            def_all_impls!(inp, q, "rare");
242        }
243        for q in inp.common {
244            def_all_impls!(inp, q, "common");
245        }
246    }
247}
248
249fn oneshot_iter(c: &mut Criterion) {
250    macro_rules! def_impl {
251        ($inp:expr, $q:expr, $freq:expr, $impl:ident) => {
252            let config = "oneshotiter";
253            let available = imp::$impl::available($q.needle);
254            // We only define iter benchmarks when the count is >1. Since
255            // queries with count<=1 are usually constructed such that the
256            // match appears at the end of the haystack, it doesn't make much
257            // sense to also benchmark iteration for that case. Instead, we only
258            // benchmark iteration for queries that match more frequently.
259            if $q.count > 1 && available.contains(&config) {
260                macro_rules! define {
261                    ($dir:expr, $find_iter:expr) => {
262                        let name = format!(
263                            "{dir}/{imp}/{config}/{inp}/{freq}-{q}",
264                            dir = $dir,
265                            imp = stringify!($impl),
266                            config = config,
267                            inp = $inp.name,
268                            freq = $freq,
269                            q = $q.name,
270                        );
271                        define(
272                            c,
273                            &name,
274                            $inp.corpus.as_bytes(),
275                            Box::new(move |b| {
276                                b.iter(|| {
277                                    let it =
278                                        $find_iter($inp.corpus, $q.needle);
279                                    assert_eq!($q.count, it.count());
280                                });
281                            }),
282                        );
283                    };
284                }
285                define!("memmem", imp::$impl::fwd::oneshotiter);
286                if available.contains(&"reverse") {
287                    define!("memrmem", imp::$impl::rev::oneshotiter);
288                }
289            }
290        };
291    }
292    macro_rules! def_all_impls {
293        ($inp:expr, $q:expr, $freq:expr) => {
294            def_impl!($inp, $q, $freq, krate);
295            def_impl!($inp, $q, $freq, krate_nopre);
296            def_impl!($inp, $q, $freq, bstr);
297            def_impl!($inp, $q, $freq, regex);
298            def_impl!($inp, $q, $freq, stud);
299            def_impl!($inp, $q, $freq, twoway);
300            def_impl!($inp, $q, $freq, sliceslice);
301            def_impl!($inp, $q, $freq, libc);
302        };
303    }
304    for inp in INPUTS {
305        for q in inp.never {
306            def_all_impls!(inp, q, "never");
307        }
308        for q in inp.rare {
309            def_all_impls!(inp, q, "rare");
310        }
311        for q in inp.common {
312            def_all_impls!(inp, q, "common");
313        }
314    }
315}
316
317fn prebuilt_iter(c: &mut Criterion) {
318    macro_rules! def_impl {
319        ($inp:expr, $q:expr, $freq:expr, $impl:ident) => {
320            let config = "prebuiltiter";
321            let available = imp::$impl::available($q.needle);
322            // We only define iter benchmarks when the count is >1. Since
323            // queries with count<=1 are usually constructed such that the
324            // match appears at the end of the haystack, it doesn't make much
325            // sense to also benchmark iteration for that case. Instead, we only
326            // benchmark iteration for queries that match more frequently.
327            if $q.count > 1 && available.contains(&config) {
328                macro_rules! define {
329                    ($dir:expr, $new_finder:expr) => {
330                        let name = format!(
331                            "{dir}/{imp}/{config}/{inp}/{freq}-{q}",
332                            dir = $dir,
333                            imp = stringify!($impl),
334                            config = config,
335                            inp = $inp.name,
336                            freq = $freq,
337                            q = $q.name,
338                        );
339                        define(
340                            c,
341                            &name,
342                            $inp.corpus.as_bytes(),
343                            Box::new(move |b| {
344                                let finder = $new_finder($q.needle);
345                                b.iter(|| {
346                                    let it = finder.iter($inp.corpus);
347                                    assert_eq!($q.count, it.count());
348                                });
349                            }),
350                        );
351                    };
352                }
353                define!("memmem", imp::$impl::fwd::prebuiltiter);
354                if available.contains(&"reverse") {
355                    define!("memrmem", imp::$impl::rev::prebuiltiter);
356                }
357            }
358        };
359    }
360    macro_rules! def_all_impls {
361        ($inp:expr, $q:expr, $freq:expr) => {
362            def_impl!($inp, $q, $freq, krate);
363            def_impl!($inp, $q, $freq, krate_nopre);
364            def_impl!($inp, $q, $freq, bstr);
365            def_impl!($inp, $q, $freq, regex);
366            def_impl!($inp, $q, $freq, stud);
367            def_impl!($inp, $q, $freq, twoway);
368            def_impl!($inp, $q, $freq, sliceslice);
369            def_impl!($inp, $q, $freq, libc);
370        };
371    }
372    for inp in INPUTS {
373        for q in inp.never {
374            def_all_impls!(inp, q, "never");
375        }
376        for q in inp.rare {
377            def_all_impls!(inp, q, "rare");
378        }
379        for q in inp.common {
380            def_all_impls!(inp, q, "common");
381        }
382    }
383}
384