1fb6c1f39Sopenharmony_ciuse crate::data;
2fb6c1f39Sopenharmony_ci
3fb6c1f39Sopenharmony_ci#[derive(Clone, Copy, Debug)]
4fb6c1f39Sopenharmony_cipub struct Input {
5fb6c1f39Sopenharmony_ci    /// A name describing the corpus, used to identify it in benchmarks.
6fb6c1f39Sopenharmony_ci    pub name: &'static str,
7fb6c1f39Sopenharmony_ci    /// The haystack to search.
8fb6c1f39Sopenharmony_ci    pub corpus: &'static str,
9fb6c1f39Sopenharmony_ci    /// Queries that are expected to never occur.
10fb6c1f39Sopenharmony_ci    pub never: &'static [Query],
11fb6c1f39Sopenharmony_ci    /// Queries that are expected to occur rarely.
12fb6c1f39Sopenharmony_ci    pub rare: &'static [Query],
13fb6c1f39Sopenharmony_ci    /// Queries that are expected to fairly common.
14fb6c1f39Sopenharmony_ci    pub common: &'static [Query],
15fb6c1f39Sopenharmony_ci}
16fb6c1f39Sopenharmony_ci
17fb6c1f39Sopenharmony_ci/// A substring search query for a particular haystack.
18fb6c1f39Sopenharmony_ci#[derive(Clone, Copy, Debug)]
19fb6c1f39Sopenharmony_cipub struct Query {
20fb6c1f39Sopenharmony_ci    /// A name for this query, used to identify it in benchmarks.
21fb6c1f39Sopenharmony_ci    pub name: &'static str,
22fb6c1f39Sopenharmony_ci    /// The needle to search for.
23fb6c1f39Sopenharmony_ci    pub needle: &'static str,
24fb6c1f39Sopenharmony_ci    /// The expected number of occurrences.
25fb6c1f39Sopenharmony_ci    pub count: usize,
26fb6c1f39Sopenharmony_ci}
27fb6c1f39Sopenharmony_ci
28fb6c1f39Sopenharmony_cipub const INPUTS: &'static [Input] = &[
29fb6c1f39Sopenharmony_ci    Input {
30fb6c1f39Sopenharmony_ci        name: "code-rust-library",
31fb6c1f39Sopenharmony_ci        corpus: data::CODE_RUST_LIBRARY,
32fb6c1f39Sopenharmony_ci        never: &[
33fb6c1f39Sopenharmony_ci            Query { name: "fn-strength", needle: "fn strength", count: 0 },
34fb6c1f39Sopenharmony_ci            Query {
35fb6c1f39Sopenharmony_ci                name: "fn-strength-paren",
36fb6c1f39Sopenharmony_ci                needle: "fn strength(",
37fb6c1f39Sopenharmony_ci                count: 0,
38fb6c1f39Sopenharmony_ci            },
39fb6c1f39Sopenharmony_ci            Query { name: "fn-quux", needle: "fn quux(", count: 0 },
40fb6c1f39Sopenharmony_ci        ],
41fb6c1f39Sopenharmony_ci        rare: &[
42fb6c1f39Sopenharmony_ci            Query {
43fb6c1f39Sopenharmony_ci                name: "fn-from-str",
44fb6c1f39Sopenharmony_ci                needle: "pub fn from_str(",
45fb6c1f39Sopenharmony_ci                count: 1,
46fb6c1f39Sopenharmony_ci            },
47fb6c1f39Sopenharmony_ci        ],
48fb6c1f39Sopenharmony_ci        common: &[
49fb6c1f39Sopenharmony_ci            Query { name: "fn-is-empty", needle: "fn is_empty(", count: 17 },
50fb6c1f39Sopenharmony_ci            Query { name: "fn", needle: "fn", count: 2985 },
51fb6c1f39Sopenharmony_ci            Query { name: "paren", needle: "(", count: 30193 },
52fb6c1f39Sopenharmony_ci            Query { name: "let", needle: "let", count: 4737 },
53fb6c1f39Sopenharmony_ci        ],
54fb6c1f39Sopenharmony_ci    },
55fb6c1f39Sopenharmony_ci    Input {
56fb6c1f39Sopenharmony_ci        name: "huge-en",
57fb6c1f39Sopenharmony_ci        corpus: data::SUBTITLE_EN_HUGE,
58fb6c1f39Sopenharmony_ci        never: &[
59fb6c1f39Sopenharmony_ci            Query { name: "john-watson", needle: "John Watson", count: 0 },
60fb6c1f39Sopenharmony_ci            Query { name: "all-common-bytes", needle: "sternness", count: 0 },
61fb6c1f39Sopenharmony_ci            Query { name: "some-rare-bytes", needle: "quartz", count: 0 },
62fb6c1f39Sopenharmony_ci            Query { name: "two-space", needle: "  ", count: 0 },
63fb6c1f39Sopenharmony_ci        ],
64fb6c1f39Sopenharmony_ci        rare: &[
65fb6c1f39Sopenharmony_ci            Query {
66fb6c1f39Sopenharmony_ci                name: "sherlock-holmes",
67fb6c1f39Sopenharmony_ci                needle: "Sherlock Holmes",
68fb6c1f39Sopenharmony_ci                count: 1,
69fb6c1f39Sopenharmony_ci            },
70fb6c1f39Sopenharmony_ci            Query { name: "sherlock", needle: "Sherlock", count: 1 },
71fb6c1f39Sopenharmony_ci            Query {
72fb6c1f39Sopenharmony_ci                name: "medium-needle",
73fb6c1f39Sopenharmony_ci                needle: "homer, marge, bart, lisa, maggie",
74fb6c1f39Sopenharmony_ci                count: 1,
75fb6c1f39Sopenharmony_ci            },
76fb6c1f39Sopenharmony_ci            Query {
77fb6c1f39Sopenharmony_ci                name: "long-needle",
78fb6c1f39Sopenharmony_ci                needle: "I feel afraid of Mostafa\nHe is stronger and older than I am, and more experienced\nShould I turn back?\nDoc you're beginning to sound like Sherlock Holmes.",
79fb6c1f39Sopenharmony_ci                count: 1,
80fb6c1f39Sopenharmony_ci            },
81fb6c1f39Sopenharmony_ci            Query {
82fb6c1f39Sopenharmony_ci                name: "huge-needle",
83fb6c1f39Sopenharmony_ci                needle: "Since we will meet anyway, then the sooner, the better\nTomorrow at 4:30 in front of the Horse-Riding Club\nNo, 4:30\nI am confused, almost lost\nAs if an invisible hand pushed me towards an unknown fate\nI needed someone by my side\nI needed someone to guide me to the path of security\nBut I had no one\nI couldn't ask my father's opinion, nor his wife's\nI felt just as lonely as I had before\nI feel afraid of Mostafa\nHe is stronger and older than I am, and more experienced\nShould I turn back?\nDoc you're beginning to sound like Sherlock Holmes.",
84fb6c1f39Sopenharmony_ci                count: 1,
85fb6c1f39Sopenharmony_ci            },
86fb6c1f39Sopenharmony_ci        ],
87fb6c1f39Sopenharmony_ci        common: &[
88fb6c1f39Sopenharmony_ci            Query { name: "that", needle: "that", count: 865 },
89fb6c1f39Sopenharmony_ci            Query { name: "one-space", needle: " ", count: 96606 },
90fb6c1f39Sopenharmony_ci            Query { name: "you", needle: "you", count: 5009 },
91fb6c1f39Sopenharmony_ci            // It would be nice to benchmark this case, although it's not
92fb6c1f39Sopenharmony_ci            // terribly important. The problem is that std's substring
93fb6c1f39Sopenharmony_ci            // implementation (correctly) never returns match offsets that
94fb6c1f39Sopenharmony_ci            // split an encoded codepoint, where as memmem on bytes will. So
95fb6c1f39Sopenharmony_ci            // the counts differ. We could modify our harness to skip this on
96fb6c1f39Sopenharmony_ci            // std, but it seems like much ado about nothing.
97fb6c1f39Sopenharmony_ci            // Query { name: "empty", needle: "", count: 613655 },
98fb6c1f39Sopenharmony_ci        ],
99fb6c1f39Sopenharmony_ci    },
100fb6c1f39Sopenharmony_ci    Input {
101fb6c1f39Sopenharmony_ci        name: "huge-ru",
102fb6c1f39Sopenharmony_ci        corpus: data::SUBTITLE_RU_HUGE,
103fb6c1f39Sopenharmony_ci        never: &[Query {
104fb6c1f39Sopenharmony_ci            name: "john-watson",
105fb6c1f39Sopenharmony_ci            needle: "Джон Уотсон",
106fb6c1f39Sopenharmony_ci            count: 0,
107fb6c1f39Sopenharmony_ci        }],
108fb6c1f39Sopenharmony_ci        rare: &[
109fb6c1f39Sopenharmony_ci            Query {
110fb6c1f39Sopenharmony_ci                name: "sherlock-holmes",
111fb6c1f39Sopenharmony_ci                needle: "Шерлок Холмс",
112fb6c1f39Sopenharmony_ci                count: 1,
113fb6c1f39Sopenharmony_ci            },
114fb6c1f39Sopenharmony_ci            Query { name: "sherlock", needle: "Шерлок", count: 1 },
115fb6c1f39Sopenharmony_ci        ],
116fb6c1f39Sopenharmony_ci        common: &[
117fb6c1f39Sopenharmony_ci            Query { name: "that", needle: "что", count: 998 },
118fb6c1f39Sopenharmony_ci            Query { name: "not", needle: "не", count: 3092 },
119fb6c1f39Sopenharmony_ci            Query { name: "one-space", needle: " ", count: 46941 },
120fb6c1f39Sopenharmony_ci        ],
121fb6c1f39Sopenharmony_ci    },
122fb6c1f39Sopenharmony_ci    Input {
123fb6c1f39Sopenharmony_ci        name: "huge-zh",
124fb6c1f39Sopenharmony_ci        corpus: data::SUBTITLE_ZH_HUGE,
125fb6c1f39Sopenharmony_ci        never: &[Query {
126fb6c1f39Sopenharmony_ci            name: "john-watson", needle: "约翰·沃森", count: 0
127fb6c1f39Sopenharmony_ci        }],
128fb6c1f39Sopenharmony_ci        rare: &[
129fb6c1f39Sopenharmony_ci            Query {
130fb6c1f39Sopenharmony_ci                name: "sherlock-holmes",
131fb6c1f39Sopenharmony_ci                needle: "夏洛克·福尔摩斯",
132fb6c1f39Sopenharmony_ci                count: 1,
133fb6c1f39Sopenharmony_ci            },
134fb6c1f39Sopenharmony_ci            Query { name: "sherlock", needle: "夏洛克", count: 1 },
135fb6c1f39Sopenharmony_ci        ],
136fb6c1f39Sopenharmony_ci        common: &[
137fb6c1f39Sopenharmony_ci            Query { name: "that", needle: "那", count: 1056 },
138fb6c1f39Sopenharmony_ci            Query { name: "do-not", needle: "不", count: 2751 },
139fb6c1f39Sopenharmony_ci            Query { name: "one-space", needle: " ", count: 17232 },
140fb6c1f39Sopenharmony_ci        ],
141fb6c1f39Sopenharmony_ci    },
142fb6c1f39Sopenharmony_ci    Input {
143fb6c1f39Sopenharmony_ci        name: "teeny-en",
144fb6c1f39Sopenharmony_ci        corpus: data::SUBTITLE_EN_TEENY,
145fb6c1f39Sopenharmony_ci        never: &[
146fb6c1f39Sopenharmony_ci            Query { name: "john-watson", needle: "John Watson", count: 0 },
147fb6c1f39Sopenharmony_ci            Query { name: "all-common-bytes", needle: "sternness", count: 0 },
148fb6c1f39Sopenharmony_ci            Query { name: "some-rare-bytes", needle: "quartz", count: 0 },
149fb6c1f39Sopenharmony_ci            Query { name: "two-space", needle: "  ", count: 0 },
150fb6c1f39Sopenharmony_ci        ],
151fb6c1f39Sopenharmony_ci        rare: &[
152fb6c1f39Sopenharmony_ci            Query {
153fb6c1f39Sopenharmony_ci                name: "sherlock-holmes",
154fb6c1f39Sopenharmony_ci                needle: "Sherlock Holmes",
155fb6c1f39Sopenharmony_ci                count: 1,
156fb6c1f39Sopenharmony_ci            },
157fb6c1f39Sopenharmony_ci            Query { name: "sherlock", needle: "Sherlock", count: 1 },
158fb6c1f39Sopenharmony_ci        ],
159fb6c1f39Sopenharmony_ci        common: &[],
160fb6c1f39Sopenharmony_ci    },
161fb6c1f39Sopenharmony_ci    Input {
162fb6c1f39Sopenharmony_ci        name: "teeny-ru",
163fb6c1f39Sopenharmony_ci        corpus: data::SUBTITLE_RU_TEENY,
164fb6c1f39Sopenharmony_ci        never: &[Query {
165fb6c1f39Sopenharmony_ci            name: "john-watson",
166fb6c1f39Sopenharmony_ci            needle: "Джон Уотсон",
167fb6c1f39Sopenharmony_ci            count: 0,
168fb6c1f39Sopenharmony_ci        }],
169fb6c1f39Sopenharmony_ci        rare: &[
170fb6c1f39Sopenharmony_ci            Query {
171fb6c1f39Sopenharmony_ci                name: "sherlock-holmes",
172fb6c1f39Sopenharmony_ci                needle: "Шерлок Холмс",
173fb6c1f39Sopenharmony_ci                count: 1,
174fb6c1f39Sopenharmony_ci            },
175fb6c1f39Sopenharmony_ci            Query { name: "sherlock", needle: "Шерлок", count: 1 },
176fb6c1f39Sopenharmony_ci        ],
177fb6c1f39Sopenharmony_ci        common: &[],
178fb6c1f39Sopenharmony_ci    },
179fb6c1f39Sopenharmony_ci    Input {
180fb6c1f39Sopenharmony_ci        name: "teeny-zh",
181fb6c1f39Sopenharmony_ci        corpus: data::SUBTITLE_ZH_TEENY,
182fb6c1f39Sopenharmony_ci        never: &[Query {
183fb6c1f39Sopenharmony_ci            name: "john-watson", needle: "约翰·沃森", count: 0
184fb6c1f39Sopenharmony_ci        }],
185fb6c1f39Sopenharmony_ci        rare: &[
186fb6c1f39Sopenharmony_ci            Query {
187fb6c1f39Sopenharmony_ci                name: "sherlock-holmes",
188fb6c1f39Sopenharmony_ci                needle: "夏洛克·福尔摩斯",
189fb6c1f39Sopenharmony_ci                count: 1,
190fb6c1f39Sopenharmony_ci            },
191fb6c1f39Sopenharmony_ci            Query { name: "sherlock", needle: "夏洛克", count: 1 },
192fb6c1f39Sopenharmony_ci        ],
193fb6c1f39Sopenharmony_ci        common: &[],
194fb6c1f39Sopenharmony_ci    },
195fb6c1f39Sopenharmony_ci    Input {
196fb6c1f39Sopenharmony_ci        name: "pathological-md5-huge",
197fb6c1f39Sopenharmony_ci        corpus: data::PATHOLOGICAL_MD5_HUGE,
198fb6c1f39Sopenharmony_ci        never: &[Query {
199fb6c1f39Sopenharmony_ci            name: "no-hash",
200fb6c1f39Sopenharmony_ci            needle: "61a1a40effcf97de24505f154a306597",
201fb6c1f39Sopenharmony_ci            count: 0,
202fb6c1f39Sopenharmony_ci        }],
203fb6c1f39Sopenharmony_ci        rare: &[Query {
204fb6c1f39Sopenharmony_ci            name: "last-hash",
205fb6c1f39Sopenharmony_ci            needle: "831df319d8597f5bc793d690f08b159b",
206fb6c1f39Sopenharmony_ci            count: 1,
207fb6c1f39Sopenharmony_ci        }],
208fb6c1f39Sopenharmony_ci        common: &[Query { name: "two-bytes", needle: "fe", count: 520 }],
209fb6c1f39Sopenharmony_ci    },
210fb6c1f39Sopenharmony_ci    Input {
211fb6c1f39Sopenharmony_ci        name: "pathological-repeated-rare-huge",
212fb6c1f39Sopenharmony_ci        corpus: data::PATHOLOGICAL_REPEATED_RARE_HUGE,
213fb6c1f39Sopenharmony_ci        never: &[Query { name: "tricky", needle: "abczdef", count: 0 }],
214fb6c1f39Sopenharmony_ci        rare: &[],
215fb6c1f39Sopenharmony_ci        common: &[Query { name: "match", needle: "zzzzzzzzzz", count: 50010 }],
216fb6c1f39Sopenharmony_ci    },
217fb6c1f39Sopenharmony_ci    Input {
218fb6c1f39Sopenharmony_ci        name: "pathological-repeated-rare-small",
219fb6c1f39Sopenharmony_ci        corpus: data::PATHOLOGICAL_REPEATED_RARE_SMALL,
220fb6c1f39Sopenharmony_ci        never: &[Query { name: "tricky", needle: "abczdef", count: 0 }],
221fb6c1f39Sopenharmony_ci        rare: &[],
222fb6c1f39Sopenharmony_ci        common: &[Query { name: "match", needle: "zzzzzzzzzz", count: 100 }],
223fb6c1f39Sopenharmony_ci    },
224fb6c1f39Sopenharmony_ci    Input {
225fb6c1f39Sopenharmony_ci        name: "pathological-defeat-simple-vector",
226fb6c1f39Sopenharmony_ci        corpus: data::PATHOLOGICAL_DEFEAT_SIMPLE_VECTOR,
227fb6c1f39Sopenharmony_ci        never: &[],
228fb6c1f39Sopenharmony_ci        rare: &[Query {
229fb6c1f39Sopenharmony_ci            name: "alphabet",
230fb6c1f39Sopenharmony_ci            needle: "qbz",
231fb6c1f39Sopenharmony_ci            count: 1,
232fb6c1f39Sopenharmony_ci        }],
233fb6c1f39Sopenharmony_ci        common: &[],
234fb6c1f39Sopenharmony_ci    },
235fb6c1f39Sopenharmony_ci    Input {
236fb6c1f39Sopenharmony_ci        name: "pathological-defeat-simple-vector-freq",
237fb6c1f39Sopenharmony_ci        corpus: data::PATHOLOGICAL_DEFEAT_SIMPLE_VECTOR_FREQ,
238fb6c1f39Sopenharmony_ci        never: &[],
239fb6c1f39Sopenharmony_ci        rare: &[Query {
240fb6c1f39Sopenharmony_ci            name: "alphabet",
241fb6c1f39Sopenharmony_ci            needle: "qjaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaz",
242fb6c1f39Sopenharmony_ci            count: 1,
243fb6c1f39Sopenharmony_ci        }],
244fb6c1f39Sopenharmony_ci        common: &[],
245fb6c1f39Sopenharmony_ci    },
246fb6c1f39Sopenharmony_ci    Input {
247fb6c1f39Sopenharmony_ci        name: "pathological-defeat-simple-vector-repeated",
248fb6c1f39Sopenharmony_ci        corpus: data::PATHOLOGICAL_DEFEAT_SIMPLE_VECTOR_REPEATED,
249fb6c1f39Sopenharmony_ci        never: &[],
250fb6c1f39Sopenharmony_ci        rare: &[Query {
251fb6c1f39Sopenharmony_ci            name: "alphabet",
252fb6c1f39Sopenharmony_ci            needle: "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzaz",
253fb6c1f39Sopenharmony_ci            count: 1,
254fb6c1f39Sopenharmony_ci        }],
255fb6c1f39Sopenharmony_ci        common: &[],
256fb6c1f39Sopenharmony_ci    },
257fb6c1f39Sopenharmony_ci];
258