1c67d6573Sopenharmony_ci// Enable the benchmarking harness.
2c67d6573Sopenharmony_ci#![feature(test)]
3c67d6573Sopenharmony_ci// It's too annoying to carefully define macros based on which regex engines
4c67d6573Sopenharmony_ci// have which benchmarks, so just ignore these warnings.
5c67d6573Sopenharmony_ci#![allow(unused_macros)]
6c67d6573Sopenharmony_ci
7c67d6573Sopenharmony_ciextern crate test;
8c67d6573Sopenharmony_ci
9c67d6573Sopenharmony_ciuse cfg_if::cfg_if;
10c67d6573Sopenharmony_ci
11c67d6573Sopenharmony_cicfg_if! {
12c67d6573Sopenharmony_ci    if #[cfg(feature = "re-pcre1")] {
13c67d6573Sopenharmony_ci        pub use ffi::pcre1::Regex;
14c67d6573Sopenharmony_ci    } else if #[cfg(feature = "re-onig")] {
15c67d6573Sopenharmony_ci        pub use ffi::onig::Regex;
16c67d6573Sopenharmony_ci    } else if #[cfg(any(feature = "re-rust"))] {
17c67d6573Sopenharmony_ci        pub use regex::{Regex, RegexSet};
18c67d6573Sopenharmony_ci    } else if #[cfg(feature = "re-rust-bytes")] {
19c67d6573Sopenharmony_ci        pub use regex::bytes::{Regex, RegexSet};
20c67d6573Sopenharmony_ci    } else if #[cfg(feature = "re-re2")] {
21c67d6573Sopenharmony_ci        pub use ffi::re2::Regex;
22c67d6573Sopenharmony_ci    } else if #[cfg(feature = "re-pcre2")] {
23c67d6573Sopenharmony_ci        pub use ffi::pcre2::Regex;
24c67d6573Sopenharmony_ci    } else if #[cfg(feature = "re-tcl")] {
25c67d6573Sopenharmony_ci        pub use ffi::tcl::Regex;
26c67d6573Sopenharmony_ci    } else {
27c67d6573Sopenharmony_ci        compile_error!(
28c67d6573Sopenharmony_ci            "To run the benchmarks, see `./run -h` or the HACKING.md document"
29c67d6573Sopenharmony_ci        );
30c67d6573Sopenharmony_ci    }
31c67d6573Sopenharmony_ci}
32c67d6573Sopenharmony_ci
33c67d6573Sopenharmony_ci// Usage: regex!(pattern)
34c67d6573Sopenharmony_ci//
35c67d6573Sopenharmony_ci// Builds a ::Regex from a borrowed string.
36c67d6573Sopenharmony_ci//
37c67d6573Sopenharmony_ci// Due to macro scoping rules, this definition only applies for the modules
38c67d6573Sopenharmony_ci// defined below. Effectively, it allows us to use the same tests for both
39c67d6573Sopenharmony_ci// native and dynamic regexes.
40c67d6573Sopenharmony_cimacro_rules! regex {
41c67d6573Sopenharmony_ci    ($re:expr) => {
42c67d6573Sopenharmony_ci        crate::Regex::new(&$re.to_owned()).unwrap()
43c67d6573Sopenharmony_ci    };
44c67d6573Sopenharmony_ci}
45c67d6573Sopenharmony_ci
46c67d6573Sopenharmony_cicfg_if! {
47c67d6573Sopenharmony_ci    if #[cfg(feature = "re-tcl")] {
48c67d6573Sopenharmony_ci        // Usage: text!(haystack)
49c67d6573Sopenharmony_ci        //
50c67d6573Sopenharmony_ci        // Builds a ::Text from an owned string.
51c67d6573Sopenharmony_ci        //
52c67d6573Sopenharmony_ci        // This macro is called on every input searched in every benchmark. It is
53c67d6573Sopenharmony_ci        // called exactly once per benchmark and its time is not included in the
54c67d6573Sopenharmony_ci        // benchmark timing.
55c67d6573Sopenharmony_ci        //
56c67d6573Sopenharmony_ci        // The text given to the macro is always a String, which is guaranteed to be
57c67d6573Sopenharmony_ci        // valid UTF-8.
58c67d6573Sopenharmony_ci        //
59c67d6573Sopenharmony_ci        // The return type should be an owned value that can deref to whatever the
60c67d6573Sopenharmony_ci        // regex accepts in its `is_match` and `find_iter` methods.
61c67d6573Sopenharmony_ci        macro_rules! text {
62c67d6573Sopenharmony_ci            ($text:expr) => {{
63c67d6573Sopenharmony_ci                use crate::ffi::tcl::Text;
64c67d6573Sopenharmony_ci                Text::new($text)
65c67d6573Sopenharmony_ci            }}
66c67d6573Sopenharmony_ci        }
67c67d6573Sopenharmony_ci        type Text = ffi::tcl::Text;
68c67d6573Sopenharmony_ci    } else if #[cfg(feature = "re-rust-bytes")] {
69c67d6573Sopenharmony_ci        macro_rules! text {
70c67d6573Sopenharmony_ci            ($text:expr) => {{
71c67d6573Sopenharmony_ci                let text: String = $text;
72c67d6573Sopenharmony_ci                text.into_bytes()
73c67d6573Sopenharmony_ci            }}
74c67d6573Sopenharmony_ci        }
75c67d6573Sopenharmony_ci        type Text = Vec<u8>;
76c67d6573Sopenharmony_ci    } else {
77c67d6573Sopenharmony_ci        macro_rules! text {
78c67d6573Sopenharmony_ci            ($text:expr) => { $text }
79c67d6573Sopenharmony_ci        }
80c67d6573Sopenharmony_ci        type Text = String;
81c67d6573Sopenharmony_ci    }
82c67d6573Sopenharmony_ci}
83c67d6573Sopenharmony_ci
84c67d6573Sopenharmony_ci// Macros for writing benchmarks easily. We provide macros for benchmarking
85c67d6573Sopenharmony_ci// matches, non-matches and for finding all successive non-overlapping matches
86c67d6573Sopenharmony_ci// in a string (including a check that the count is correct).
87c67d6573Sopenharmony_ci
88c67d6573Sopenharmony_ci// USAGE: bench_match!(name, pattern, haystack)
89c67d6573Sopenharmony_ci//
90c67d6573Sopenharmony_ci// This benchmarks how fast a regular expression can report whether it matches
91c67d6573Sopenharmony_ci// a particular haystack. If the regex doesn't match, then the benchmark fails.
92c67d6573Sopenharmony_ci// Regexes are compiled exactly once.
93c67d6573Sopenharmony_ci//
94c67d6573Sopenharmony_ci// name is an identifier for the benchmark.
95c67d6573Sopenharmony_ci//
96c67d6573Sopenharmony_ci// pattern should be a &'static str representing the regular expression.
97c67d6573Sopenharmony_ci//
98c67d6573Sopenharmony_ci// haystack should be a String.
99c67d6573Sopenharmony_cimacro_rules! bench_match {
100c67d6573Sopenharmony_ci    ($name:ident, $pattern:expr, $haystack:expr) => {
101c67d6573Sopenharmony_ci        bench_is_match!($name, true, regex!($pattern), $haystack);
102c67d6573Sopenharmony_ci    };
103c67d6573Sopenharmony_ci}
104c67d6573Sopenharmony_ci
105c67d6573Sopenharmony_ci// USAGE: bench_not_match!(name, pattern, haystack)
106c67d6573Sopenharmony_ci//
107c67d6573Sopenharmony_ci// This benchmarks how fast a regular expression can report whether it matches
108c67d6573Sopenharmony_ci// a particular haystack. If the regex matches, then the benchmark fails.
109c67d6573Sopenharmony_ci// Regexes are compiled exactly once.
110c67d6573Sopenharmony_ci//
111c67d6573Sopenharmony_ci// name is an identifier for the benchmark.
112c67d6573Sopenharmony_ci//
113c67d6573Sopenharmony_ci// pattern should be a &'static str representing the regular expression.
114c67d6573Sopenharmony_ci//
115c67d6573Sopenharmony_ci// haystack should be a String.
116c67d6573Sopenharmony_cimacro_rules! bench_not_match {
117c67d6573Sopenharmony_ci    ($name:ident, $pattern:expr, $haystack:expr) => {
118c67d6573Sopenharmony_ci        bench_is_match!($name, false, regex!($pattern), $haystack);
119c67d6573Sopenharmony_ci    };
120c67d6573Sopenharmony_ci}
121c67d6573Sopenharmony_ci
122c67d6573Sopenharmony_ci// USAGE: bench_is_match!(name, is_match, regex, haystack)
123c67d6573Sopenharmony_ci//
124c67d6573Sopenharmony_ci// This benchmarks how fast a regular expression can report whether it matches
125c67d6573Sopenharmony_ci// a particular haystack. If the regex match status doesn't match is_match,
126c67d6573Sopenharmony_ci// then the benchmark fails. Regexes are compiled exactly once.
127c67d6573Sopenharmony_ci//
128c67d6573Sopenharmony_ci// name is an identifier for the benchmark.
129c67d6573Sopenharmony_ci//
130c67d6573Sopenharmony_ci// is_match reports whether the regex is expected to match the haystack or not.
131c67d6573Sopenharmony_ci//
132c67d6573Sopenharmony_ci// regex should be a ::Regex.
133c67d6573Sopenharmony_ci//
134c67d6573Sopenharmony_ci// haystack should be a String.
135c67d6573Sopenharmony_cimacro_rules! bench_is_match {
136c67d6573Sopenharmony_ci    ($name:ident, $is_match:expr, $re:expr, $haystack:expr) => {
137c67d6573Sopenharmony_ci        #[bench]
138c67d6573Sopenharmony_ci        fn $name(b: &mut Bencher) {
139c67d6573Sopenharmony_ci            use lazy_static::lazy_static;
140c67d6573Sopenharmony_ci            use std::sync::Mutex;
141c67d6573Sopenharmony_ci
142c67d6573Sopenharmony_ci            // Why do we use lazy_static here? It seems sensible to just
143c67d6573Sopenharmony_ci            // compile a regex outside of the b.iter() call and be done with
144c67d6573Sopenharmony_ci            // it. However, it seems like Rust's benchmark harness actually
145c67d6573Sopenharmony_ci            // calls the entire benchmark function multiple times. This doesn't
146c67d6573Sopenharmony_ci            // factor into the timings reported in the benchmarks, but it does
147c67d6573Sopenharmony_ci            // make the benchmarks take substantially longer to run because
148c67d6573Sopenharmony_ci            // they're spending a lot of time recompiling regexes.
149c67d6573Sopenharmony_ci            lazy_static! {
150c67d6573Sopenharmony_ci                static ref RE: Mutex<Regex> = Mutex::new($re);
151c67d6573Sopenharmony_ci                static ref TEXT: Mutex<Text> = Mutex::new(text!($haystack));
152c67d6573Sopenharmony_ci            };
153c67d6573Sopenharmony_ci            let re = RE.lock().unwrap();
154c67d6573Sopenharmony_ci            let text = TEXT.lock().unwrap();
155c67d6573Sopenharmony_ci            b.bytes = text.len() as u64;
156c67d6573Sopenharmony_ci            b.iter(|| {
157c67d6573Sopenharmony_ci                if re.is_match(&text) != $is_match {
158c67d6573Sopenharmony_ci                    if $is_match {
159c67d6573Sopenharmony_ci                        panic!("expected match, got not match");
160c67d6573Sopenharmony_ci                    } else {
161c67d6573Sopenharmony_ci                        panic!("expected no match, got match");
162c67d6573Sopenharmony_ci                    }
163c67d6573Sopenharmony_ci                }
164c67d6573Sopenharmony_ci            });
165c67d6573Sopenharmony_ci        }
166c67d6573Sopenharmony_ci    };
167c67d6573Sopenharmony_ci}
168c67d6573Sopenharmony_ci
169c67d6573Sopenharmony_ci// USAGE: bench_find!(name, pattern, count, haystack)
170c67d6573Sopenharmony_ci//
171c67d6573Sopenharmony_ci// This benchmarks how fast a regular expression can count all successive
172c67d6573Sopenharmony_ci// non-overlapping matches in haystack. If the count reported does not match
173c67d6573Sopenharmony_ci// the count given, then the benchmark fails.
174c67d6573Sopenharmony_ci//
175c67d6573Sopenharmony_ci// name is an identifier for the benchmark.
176c67d6573Sopenharmony_ci//
177c67d6573Sopenharmony_ci// pattern should be a &'static str representing the regular expression.
178c67d6573Sopenharmony_ci//
179c67d6573Sopenharmony_ci// haystack should be a String.
180c67d6573Sopenharmony_cimacro_rules! bench_find {
181c67d6573Sopenharmony_ci    ($name:ident, $pattern:expr, $count:expr, $haystack:expr) => {
182c67d6573Sopenharmony_ci        #[bench]
183c67d6573Sopenharmony_ci        fn $name(b: &mut Bencher) {
184c67d6573Sopenharmony_ci            use lazy_static::lazy_static;
185c67d6573Sopenharmony_ci            use std::sync::Mutex;
186c67d6573Sopenharmony_ci
187c67d6573Sopenharmony_ci            lazy_static! {
188c67d6573Sopenharmony_ci                static ref RE: Mutex<Regex> = Mutex::new(regex!($pattern));
189c67d6573Sopenharmony_ci                static ref TEXT: Mutex<Text> = Mutex::new(text!($haystack));
190c67d6573Sopenharmony_ci            };
191c67d6573Sopenharmony_ci            let re = RE.lock().unwrap();
192c67d6573Sopenharmony_ci            let text = TEXT.lock().unwrap();
193c67d6573Sopenharmony_ci            b.bytes = text.len() as u64;
194c67d6573Sopenharmony_ci            b.iter(|| {
195c67d6573Sopenharmony_ci                let count = re.find_iter(&text).count();
196c67d6573Sopenharmony_ci                assert_eq!($count, count)
197c67d6573Sopenharmony_ci            });
198c67d6573Sopenharmony_ci        }
199c67d6573Sopenharmony_ci    };
200c67d6573Sopenharmony_ci}
201c67d6573Sopenharmony_ci
202c67d6573Sopenharmony_ci// USAGE: bench_captures!(name, pattern, groups, haystack);
203c67d6573Sopenharmony_ci//
204c67d6573Sopenharmony_ci// CONTRACT:
205c67d6573Sopenharmony_ci//   Given:
206c67d6573Sopenharmony_ci//     ident, the desired benchmarking function name
207c67d6573Sopenharmony_ci//     pattern : ::Regex, the regular expression to be executed
208c67d6573Sopenharmony_ci//     groups : usize, the number of capture groups
209c67d6573Sopenharmony_ci//     haystack : String, the string to search
210c67d6573Sopenharmony_ci//   bench_captures will benchmark how fast re.captures() produces
211c67d6573Sopenharmony_ci//   the capture groups in question.
212c67d6573Sopenharmony_cimacro_rules! bench_captures {
213c67d6573Sopenharmony_ci    ($name:ident, $pattern:expr, $count:expr, $haystack:expr) => {
214c67d6573Sopenharmony_ci        #[cfg(feature = "re-rust")]
215c67d6573Sopenharmony_ci        #[bench]
216c67d6573Sopenharmony_ci        fn $name(b: &mut Bencher) {
217c67d6573Sopenharmony_ci            use lazy_static::lazy_static;
218c67d6573Sopenharmony_ci            use std::sync::Mutex;
219c67d6573Sopenharmony_ci
220c67d6573Sopenharmony_ci            lazy_static! {
221c67d6573Sopenharmony_ci                static ref RE: Mutex<Regex> = Mutex::new($pattern);
222c67d6573Sopenharmony_ci                static ref TEXT: Mutex<Text> = Mutex::new(text!($haystack));
223c67d6573Sopenharmony_ci            };
224c67d6573Sopenharmony_ci            let re = RE.lock().unwrap();
225c67d6573Sopenharmony_ci            let text = TEXT.lock().unwrap();
226c67d6573Sopenharmony_ci            b.bytes = text.len() as u64;
227c67d6573Sopenharmony_ci            b.iter(|| match re.captures(&text) {
228c67d6573Sopenharmony_ci                None => assert!(false, "no captures"),
229c67d6573Sopenharmony_ci                Some(caps) => assert_eq!($count + 1, caps.len()),
230c67d6573Sopenharmony_ci            });
231c67d6573Sopenharmony_ci        }
232c67d6573Sopenharmony_ci    };
233c67d6573Sopenharmony_ci}
234c67d6573Sopenharmony_ci
235c67d6573Sopenharmony_ci// USAGE: bench_is_match_set!(name, is_match, regex, haystack)
236c67d6573Sopenharmony_cimacro_rules! bench_is_match_set {
237c67d6573Sopenharmony_ci    ($name:ident, $is_match:expr, $re:expr, $haystack:expr) => {
238c67d6573Sopenharmony_ci        #[bench]
239c67d6573Sopenharmony_ci        fn $name(b: &mut Bencher) {
240c67d6573Sopenharmony_ci            use lazy_static::lazy_static;
241c67d6573Sopenharmony_ci            use std::sync::Mutex;
242c67d6573Sopenharmony_ci
243c67d6573Sopenharmony_ci            lazy_static! {
244c67d6573Sopenharmony_ci                static ref RE: Mutex<RegexSet> = Mutex::new($re);
245c67d6573Sopenharmony_ci                static ref TEXT: Mutex<Text> = Mutex::new(text!($haystack));
246c67d6573Sopenharmony_ci            };
247c67d6573Sopenharmony_ci            let re = RE.lock().unwrap();
248c67d6573Sopenharmony_ci            let text = TEXT.lock().unwrap();
249c67d6573Sopenharmony_ci            b.bytes = text.len() as u64;
250c67d6573Sopenharmony_ci            b.iter(|| {
251c67d6573Sopenharmony_ci                if re.is_match(&text) != $is_match {
252c67d6573Sopenharmony_ci                    if $is_match {
253c67d6573Sopenharmony_ci                        panic!("expected match, got not match");
254c67d6573Sopenharmony_ci                    } else {
255c67d6573Sopenharmony_ci                        panic!("expected no match, got match");
256c67d6573Sopenharmony_ci                    }
257c67d6573Sopenharmony_ci                }
258c67d6573Sopenharmony_ci            });
259c67d6573Sopenharmony_ci        }
260c67d6573Sopenharmony_ci    };
261c67d6573Sopenharmony_ci}
262c67d6573Sopenharmony_ci
263c67d6573Sopenharmony_ci// USAGE: bench_matches_set!(name, is_match, regex, haystack)
264c67d6573Sopenharmony_cimacro_rules! bench_matches_set {
265c67d6573Sopenharmony_ci    ($name:ident, $is_match:expr, $re:expr, $haystack:expr) => {
266c67d6573Sopenharmony_ci        #[bench]
267c67d6573Sopenharmony_ci        fn $name(b: &mut Bencher) {
268c67d6573Sopenharmony_ci            use lazy_static::lazy_static;
269c67d6573Sopenharmony_ci            use std::sync::Mutex;
270c67d6573Sopenharmony_ci
271c67d6573Sopenharmony_ci            lazy_static! {
272c67d6573Sopenharmony_ci                static ref RE: Mutex<RegexSet> = Mutex::new($re);
273c67d6573Sopenharmony_ci                static ref TEXT: Mutex<Text> = Mutex::new(text!($haystack));
274c67d6573Sopenharmony_ci            };
275c67d6573Sopenharmony_ci            let re = RE.lock().unwrap();
276c67d6573Sopenharmony_ci            let text = TEXT.lock().unwrap();
277c67d6573Sopenharmony_ci            b.bytes = text.len() as u64;
278c67d6573Sopenharmony_ci            b.iter(|| {
279c67d6573Sopenharmony_ci                if re.matches(&text).matched_any() != $is_match {
280c67d6573Sopenharmony_ci                    if $is_match {
281c67d6573Sopenharmony_ci                        panic!("expected match, got not match");
282c67d6573Sopenharmony_ci                    } else {
283c67d6573Sopenharmony_ci                        panic!("expected no match, got match");
284c67d6573Sopenharmony_ci                    }
285c67d6573Sopenharmony_ci                }
286c67d6573Sopenharmony_ci            });
287c67d6573Sopenharmony_ci        }
288c67d6573Sopenharmony_ci    };
289c67d6573Sopenharmony_ci}
290c67d6573Sopenharmony_ci
291c67d6573Sopenharmony_cicfg_if! {
292c67d6573Sopenharmony_ci    if #[cfg(any(
293c67d6573Sopenharmony_ci        feature = "re-pcre1",
294c67d6573Sopenharmony_ci        feature = "re-onig",
295c67d6573Sopenharmony_ci        feature = "re-rust",
296c67d6573Sopenharmony_ci        feature = "re-rust-bytes",
297c67d6573Sopenharmony_ci        feature = "re-re2",
298c67d6573Sopenharmony_ci        feature = "re-pcre2",
299c67d6573Sopenharmony_ci        feature = "re-tcl"
300c67d6573Sopenharmony_ci    ))] {
301c67d6573Sopenharmony_ci        mod ffi;
302c67d6573Sopenharmony_ci        mod misc;
303c67d6573Sopenharmony_ci        mod regexdna;
304c67d6573Sopenharmony_ci        mod sherlock;
305c67d6573Sopenharmony_ci    }
306c67d6573Sopenharmony_ci}
307c67d6573Sopenharmony_ci
308c67d6573Sopenharmony_ci#[cfg(any(feature = "re-rust", feature = "re-rust-bytes"))]
309c67d6573Sopenharmony_cimod rust_compile;
310