1c67d6573Sopenharmony_ciuse std::fs::File; 2c67d6573Sopenharmony_ciuse std::str; 3c67d6573Sopenharmony_ci 4c67d6573Sopenharmony_ciuse docopt::Docopt; 5c67d6573Sopenharmony_ciuse memmap::Mmap; 6c67d6573Sopenharmony_ci 7c67d6573Sopenharmony_cimod ffi; 8c67d6573Sopenharmony_ci 9c67d6573Sopenharmony_ciconst USAGE: &'static str = " 10c67d6573Sopenharmony_ciCount the number of matches of <pattern> in <file>. 11c67d6573Sopenharmony_ci 12c67d6573Sopenharmony_ciThis compiles the pattern once and counts all successive non-overlapping 13c67d6573Sopenharmony_cimatches in <file>. <file> is memory mapped. Matching is done as if <file> were 14c67d6573Sopenharmony_cia single string (it is not line oriented). 15c67d6573Sopenharmony_ci 16c67d6573Sopenharmony_ciSince this tool includes compilation of the <pattern>, sufficiently large 17c67d6573Sopenharmony_cihaystacks should be used to amortize the cost of compilation. (e.g., >1MB.) 18c67d6573Sopenharmony_ci 19c67d6573Sopenharmony_ciUsage: 20c67d6573Sopenharmony_ci regex-run-one [options] [onig | pcre1 | pcre2 | re2 | rust | rust-bytes | tcl] <file> <pattern> 21c67d6573Sopenharmony_ci regex-run-one [options] (-h | --help) 22c67d6573Sopenharmony_ci 23c67d6573Sopenharmony_ciOptions: 24c67d6573Sopenharmony_ci -h, --help Show this usage message. 25c67d6573Sopenharmony_ci"; 26c67d6573Sopenharmony_ci 27c67d6573Sopenharmony_ci#[derive(Debug, serde::Deserialize)] 28c67d6573Sopenharmony_cistruct Args { 29c67d6573Sopenharmony_ci arg_pattern: String, 30c67d6573Sopenharmony_ci arg_file: String, 31c67d6573Sopenharmony_ci cmd_onig: bool, 32c67d6573Sopenharmony_ci cmd_pcre1: bool, 33c67d6573Sopenharmony_ci cmd_pcre2: bool, 34c67d6573Sopenharmony_ci cmd_re2: bool, 35c67d6573Sopenharmony_ci cmd_rust: bool, 36c67d6573Sopenharmony_ci cmd_rust_bytes: bool, 37c67d6573Sopenharmony_ci cmd_tcl: bool, 38c67d6573Sopenharmony_ci} 39c67d6573Sopenharmony_ci 40c67d6573Sopenharmony_cifn main() { 41c67d6573Sopenharmony_ci let args: Args = Docopt::new(USAGE) 42c67d6573Sopenharmony_ci .and_then(|d| d.deserialize()) 43c67d6573Sopenharmony_ci .unwrap_or_else(|e| e.exit()); 44c67d6573Sopenharmony_ci 45c67d6573Sopenharmony_ci let mmap = 46c67d6573Sopenharmony_ci unsafe { Mmap::map(&File::open(&args.arg_file).unwrap()).unwrap() }; 47c67d6573Sopenharmony_ci let haystack = unsafe { str::from_utf8_unchecked(&mmap) }; 48c67d6573Sopenharmony_ci 49c67d6573Sopenharmony_ci println!("{}", args.count(&haystack)); 50c67d6573Sopenharmony_ci} 51c67d6573Sopenharmony_ci 52c67d6573Sopenharmony_ciimpl Args { 53c67d6573Sopenharmony_ci fn count(&self, haystack: &str) -> usize { 54c67d6573Sopenharmony_ci let pat = &self.arg_pattern; 55c67d6573Sopenharmony_ci if self.cmd_onig { 56c67d6573Sopenharmony_ci count_onig(pat, haystack) 57c67d6573Sopenharmony_ci } else if self.cmd_pcre1 { 58c67d6573Sopenharmony_ci count_pcre1(pat, haystack) 59c67d6573Sopenharmony_ci } else if self.cmd_pcre2 { 60c67d6573Sopenharmony_ci count_pcre2(pat, haystack) 61c67d6573Sopenharmony_ci } else if self.cmd_re2 { 62c67d6573Sopenharmony_ci count_re2(pat, haystack) 63c67d6573Sopenharmony_ci } else if self.cmd_rust { 64c67d6573Sopenharmony_ci count_rust(pat, haystack) 65c67d6573Sopenharmony_ci } else if self.cmd_rust_bytes { 66c67d6573Sopenharmony_ci count_rust_bytes(pat, haystack) 67c67d6573Sopenharmony_ci } else if self.cmd_tcl { 68c67d6573Sopenharmony_ci count_tcl(pat, haystack) 69c67d6573Sopenharmony_ci } else { 70c67d6573Sopenharmony_ci panic!("unreachable") 71c67d6573Sopenharmony_ci } 72c67d6573Sopenharmony_ci } 73c67d6573Sopenharmony_ci} 74c67d6573Sopenharmony_ci 75c67d6573Sopenharmony_cimacro_rules! nada { 76c67d6573Sopenharmony_ci ($feature:expr, $name:ident) => { 77c67d6573Sopenharmony_ci #[cfg(not(feature = $feature))] 78c67d6573Sopenharmony_ci fn $name(_pat: &str, _haystack: &str) -> usize { 79c67d6573Sopenharmony_ci panic!( 80c67d6573Sopenharmony_ci "Support not enabled. Re-compile with '--features {}' \ 81c67d6573Sopenharmony_ci to enable.", 82c67d6573Sopenharmony_ci $feature 83c67d6573Sopenharmony_ci ) 84c67d6573Sopenharmony_ci } 85c67d6573Sopenharmony_ci }; 86c67d6573Sopenharmony_ci} 87c67d6573Sopenharmony_ci 88c67d6573Sopenharmony_cinada!("re-onig", count_onig); 89c67d6573Sopenharmony_ci#[cfg(feature = "re-onig")] 90c67d6573Sopenharmony_cifn count_onig(pat: &str, haystack: &str) -> usize { 91c67d6573Sopenharmony_ci use ffi::onig::Regex; 92c67d6573Sopenharmony_ci Regex::new(pat).unwrap().find_iter(haystack).count() 93c67d6573Sopenharmony_ci} 94c67d6573Sopenharmony_ci 95c67d6573Sopenharmony_cinada!("re-pcre1", count_pcre1); 96c67d6573Sopenharmony_ci#[cfg(feature = "re-pcre1")] 97c67d6573Sopenharmony_cifn count_pcre1(pat: &str, haystack: &str) -> usize { 98c67d6573Sopenharmony_ci use ffi::pcre1::Regex; 99c67d6573Sopenharmony_ci Regex::new(pat).unwrap().find_iter(haystack).count() 100c67d6573Sopenharmony_ci} 101c67d6573Sopenharmony_ci 102c67d6573Sopenharmony_cinada!("re-pcre2", count_pcre2); 103c67d6573Sopenharmony_ci#[cfg(feature = "re-pcre2")] 104c67d6573Sopenharmony_cifn count_pcre2(pat: &str, haystack: &str) -> usize { 105c67d6573Sopenharmony_ci use ffi::pcre2::Regex; 106c67d6573Sopenharmony_ci Regex::new(pat).unwrap().find_iter(haystack).count() 107c67d6573Sopenharmony_ci} 108c67d6573Sopenharmony_ci 109c67d6573Sopenharmony_cinada!("re-re2", count_re2); 110c67d6573Sopenharmony_ci#[cfg(feature = "re-re2")] 111c67d6573Sopenharmony_cifn count_re2(pat: &str, haystack: &str) -> usize { 112c67d6573Sopenharmony_ci use ffi::re2::Regex; 113c67d6573Sopenharmony_ci Regex::new(pat).unwrap().find_iter(haystack).count() 114c67d6573Sopenharmony_ci} 115c67d6573Sopenharmony_ci 116c67d6573Sopenharmony_cinada!("re-rust", count_rust); 117c67d6573Sopenharmony_ci#[cfg(feature = "re-rust")] 118c67d6573Sopenharmony_cifn count_rust(pat: &str, haystack: &str) -> usize { 119c67d6573Sopenharmony_ci use regex::Regex; 120c67d6573Sopenharmony_ci Regex::new(pat).unwrap().find_iter(haystack).count() 121c67d6573Sopenharmony_ci} 122c67d6573Sopenharmony_ci 123c67d6573Sopenharmony_cinada!("re-rust-bytes", count_rust_bytes); 124c67d6573Sopenharmony_ci#[cfg(feature = "re-rust-bytes")] 125c67d6573Sopenharmony_cifn count_rust_bytes(pat: &str, haystack: &str) -> usize { 126c67d6573Sopenharmony_ci use regex::bytes::Regex; 127c67d6573Sopenharmony_ci Regex::new(pat).unwrap().find_iter(haystack.as_bytes()).count() 128c67d6573Sopenharmony_ci} 129c67d6573Sopenharmony_ci 130c67d6573Sopenharmony_cinada!("re-tcl", count_tcl); 131c67d6573Sopenharmony_ci#[cfg(feature = "re-tcl")] 132c67d6573Sopenharmony_cifn count_tcl(pat: &str, haystack: &str) -> usize { 133c67d6573Sopenharmony_ci use ffi::tcl::{Regex, Text}; 134c67d6573Sopenharmony_ci Regex::new(pat).unwrap().find_iter(&Text::new(haystack.to_owned())).count() 135c67d6573Sopenharmony_ci} 136