xref: /third_party/rust/crates/regex/bench/src/bench.rs (revision c67d6573)
1// Enable the benchmarking harness.
2#![feature(test)]
3// It's too annoying to carefully define macros based on which regex engines
4// have which benchmarks, so just ignore these warnings.
5#![allow(unused_macros)]
6
7extern crate test;
8
9use cfg_if::cfg_if;
10
11cfg_if! {
12    if #[cfg(feature = "re-pcre1")] {
13        pub use ffi::pcre1::Regex;
14    } else if #[cfg(feature = "re-onig")] {
15        pub use ffi::onig::Regex;
16    } else if #[cfg(any(feature = "re-rust"))] {
17        pub use regex::{Regex, RegexSet};
18    } else if #[cfg(feature = "re-rust-bytes")] {
19        pub use regex::bytes::{Regex, RegexSet};
20    } else if #[cfg(feature = "re-re2")] {
21        pub use ffi::re2::Regex;
22    } else if #[cfg(feature = "re-pcre2")] {
23        pub use ffi::pcre2::Regex;
24    } else if #[cfg(feature = "re-tcl")] {
25        pub use ffi::tcl::Regex;
26    } else {
27        compile_error!(
28            "To run the benchmarks, see `./run -h` or the HACKING.md document"
29        );
30    }
31}
32
33// Usage: regex!(pattern)
34//
35// Builds a ::Regex from a borrowed string.
36//
37// Due to macro scoping rules, this definition only applies for the modules
38// defined below. Effectively, it allows us to use the same tests for both
39// native and dynamic regexes.
40macro_rules! regex {
41    ($re:expr) => {
42        crate::Regex::new(&$re.to_owned()).unwrap()
43    };
44}
45
46cfg_if! {
47    if #[cfg(feature = "re-tcl")] {
48        // Usage: text!(haystack)
49        //
50        // Builds a ::Text from an owned string.
51        //
52        // This macro is called on every input searched in every benchmark. It is
53        // called exactly once per benchmark and its time is not included in the
54        // benchmark timing.
55        //
56        // The text given to the macro is always a String, which is guaranteed to be
57        // valid UTF-8.
58        //
59        // The return type should be an owned value that can deref to whatever the
60        // regex accepts in its `is_match` and `find_iter` methods.
61        macro_rules! text {
62            ($text:expr) => {{
63                use crate::ffi::tcl::Text;
64                Text::new($text)
65            }}
66        }
67        type Text = ffi::tcl::Text;
68    } else if #[cfg(feature = "re-rust-bytes")] {
69        macro_rules! text {
70            ($text:expr) => {{
71                let text: String = $text;
72                text.into_bytes()
73            }}
74        }
75        type Text = Vec<u8>;
76    } else {
77        macro_rules! text {
78            ($text:expr) => { $text }
79        }
80        type Text = String;
81    }
82}
83
84// Macros for writing benchmarks easily. We provide macros for benchmarking
85// matches, non-matches and for finding all successive non-overlapping matches
86// in a string (including a check that the count is correct).
87
88// USAGE: bench_match!(name, pattern, haystack)
89//
90// This benchmarks how fast a regular expression can report whether it matches
91// a particular haystack. If the regex doesn't match, then the benchmark fails.
92// Regexes are compiled exactly once.
93//
94// name is an identifier for the benchmark.
95//
96// pattern should be a &'static str representing the regular expression.
97//
98// haystack should be a String.
99macro_rules! bench_match {
100    ($name:ident, $pattern:expr, $haystack:expr) => {
101        bench_is_match!($name, true, regex!($pattern), $haystack);
102    };
103}
104
105// USAGE: bench_not_match!(name, pattern, haystack)
106//
107// This benchmarks how fast a regular expression can report whether it matches
108// a particular haystack. If the regex matches, then the benchmark fails.
109// Regexes are compiled exactly once.
110//
111// name is an identifier for the benchmark.
112//
113// pattern should be a &'static str representing the regular expression.
114//
115// haystack should be a String.
116macro_rules! bench_not_match {
117    ($name:ident, $pattern:expr, $haystack:expr) => {
118        bench_is_match!($name, false, regex!($pattern), $haystack);
119    };
120}
121
122// USAGE: bench_is_match!(name, is_match, regex, haystack)
123//
124// This benchmarks how fast a regular expression can report whether it matches
125// a particular haystack. If the regex match status doesn't match is_match,
126// then the benchmark fails. Regexes are compiled exactly once.
127//
128// name is an identifier for the benchmark.
129//
130// is_match reports whether the regex is expected to match the haystack or not.
131//
132// regex should be a ::Regex.
133//
134// haystack should be a String.
135macro_rules! bench_is_match {
136    ($name:ident, $is_match:expr, $re:expr, $haystack:expr) => {
137        #[bench]
138        fn $name(b: &mut Bencher) {
139            use lazy_static::lazy_static;
140            use std::sync::Mutex;
141
142            // Why do we use lazy_static here? It seems sensible to just
143            // compile a regex outside of the b.iter() call and be done with
144            // it. However, it seems like Rust's benchmark harness actually
145            // calls the entire benchmark function multiple times. This doesn't
146            // factor into the timings reported in the benchmarks, but it does
147            // make the benchmarks take substantially longer to run because
148            // they're spending a lot of time recompiling regexes.
149            lazy_static! {
150                static ref RE: Mutex<Regex> = Mutex::new($re);
151                static ref TEXT: Mutex<Text> = Mutex::new(text!($haystack));
152            };
153            let re = RE.lock().unwrap();
154            let text = TEXT.lock().unwrap();
155            b.bytes = text.len() as u64;
156            b.iter(|| {
157                if re.is_match(&text) != $is_match {
158                    if $is_match {
159                        panic!("expected match, got not match");
160                    } else {
161                        panic!("expected no match, got match");
162                    }
163                }
164            });
165        }
166    };
167}
168
169// USAGE: bench_find!(name, pattern, count, haystack)
170//
171// This benchmarks how fast a regular expression can count all successive
172// non-overlapping matches in haystack. If the count reported does not match
173// the count given, then the benchmark fails.
174//
175// name is an identifier for the benchmark.
176//
177// pattern should be a &'static str representing the regular expression.
178//
179// haystack should be a String.
180macro_rules! bench_find {
181    ($name:ident, $pattern:expr, $count:expr, $haystack:expr) => {
182        #[bench]
183        fn $name(b: &mut Bencher) {
184            use lazy_static::lazy_static;
185            use std::sync::Mutex;
186
187            lazy_static! {
188                static ref RE: Mutex<Regex> = Mutex::new(regex!($pattern));
189                static ref TEXT: Mutex<Text> = Mutex::new(text!($haystack));
190            };
191            let re = RE.lock().unwrap();
192            let text = TEXT.lock().unwrap();
193            b.bytes = text.len() as u64;
194            b.iter(|| {
195                let count = re.find_iter(&text).count();
196                assert_eq!($count, count)
197            });
198        }
199    };
200}
201
202// USAGE: bench_captures!(name, pattern, groups, haystack);
203//
204// CONTRACT:
205//   Given:
206//     ident, the desired benchmarking function name
207//     pattern : ::Regex, the regular expression to be executed
208//     groups : usize, the number of capture groups
209//     haystack : String, the string to search
210//   bench_captures will benchmark how fast re.captures() produces
211//   the capture groups in question.
212macro_rules! bench_captures {
213    ($name:ident, $pattern:expr, $count:expr, $haystack:expr) => {
214        #[cfg(feature = "re-rust")]
215        #[bench]
216        fn $name(b: &mut Bencher) {
217            use lazy_static::lazy_static;
218            use std::sync::Mutex;
219
220            lazy_static! {
221                static ref RE: Mutex<Regex> = Mutex::new($pattern);
222                static ref TEXT: Mutex<Text> = Mutex::new(text!($haystack));
223            };
224            let re = RE.lock().unwrap();
225            let text = TEXT.lock().unwrap();
226            b.bytes = text.len() as u64;
227            b.iter(|| match re.captures(&text) {
228                None => assert!(false, "no captures"),
229                Some(caps) => assert_eq!($count + 1, caps.len()),
230            });
231        }
232    };
233}
234
235// USAGE: bench_is_match_set!(name, is_match, regex, haystack)
236macro_rules! bench_is_match_set {
237    ($name:ident, $is_match:expr, $re:expr, $haystack:expr) => {
238        #[bench]
239        fn $name(b: &mut Bencher) {
240            use lazy_static::lazy_static;
241            use std::sync::Mutex;
242
243            lazy_static! {
244                static ref RE: Mutex<RegexSet> = Mutex::new($re);
245                static ref TEXT: Mutex<Text> = Mutex::new(text!($haystack));
246            };
247            let re = RE.lock().unwrap();
248            let text = TEXT.lock().unwrap();
249            b.bytes = text.len() as u64;
250            b.iter(|| {
251                if re.is_match(&text) != $is_match {
252                    if $is_match {
253                        panic!("expected match, got not match");
254                    } else {
255                        panic!("expected no match, got match");
256                    }
257                }
258            });
259        }
260    };
261}
262
263// USAGE: bench_matches_set!(name, is_match, regex, haystack)
264macro_rules! bench_matches_set {
265    ($name:ident, $is_match:expr, $re:expr, $haystack:expr) => {
266        #[bench]
267        fn $name(b: &mut Bencher) {
268            use lazy_static::lazy_static;
269            use std::sync::Mutex;
270
271            lazy_static! {
272                static ref RE: Mutex<RegexSet> = Mutex::new($re);
273                static ref TEXT: Mutex<Text> = Mutex::new(text!($haystack));
274            };
275            let re = RE.lock().unwrap();
276            let text = TEXT.lock().unwrap();
277            b.bytes = text.len() as u64;
278            b.iter(|| {
279                if re.matches(&text).matched_any() != $is_match {
280                    if $is_match {
281                        panic!("expected match, got not match");
282                    } else {
283                        panic!("expected no match, got match");
284                    }
285                }
286            });
287        }
288    };
289}
290
291cfg_if! {
292    if #[cfg(any(
293        feature = "re-pcre1",
294        feature = "re-onig",
295        feature = "re-rust",
296        feature = "re-rust-bytes",
297        feature = "re-re2",
298        feature = "re-pcre2",
299        feature = "re-tcl"
300    ))] {
301        mod ffi;
302        mod misc;
303        mod regexdna;
304        mod sherlock;
305    }
306}
307
308#[cfg(any(feature = "re-rust", feature = "re-rust-bytes"))]
309mod rust_compile;
310