1fb6c1f39Sopenharmony_ci/* 2fb6c1f39Sopenharmony_ciThis module defines a common API (by convention) for all of the different 3fb6c1f39Sopenharmony_ciimpls that we benchmark. The intent here is to 1) make it easy to write macros 4fb6c1f39Sopenharmony_cifor generating benchmark definitions generic over impls and 2) make it easier 5fb6c1f39Sopenharmony_cito read the benchmarks themselves and grok how exactly each of the impls are 6fb6c1f39Sopenharmony_cibeing invoked. 7fb6c1f39Sopenharmony_ci 8fb6c1f39Sopenharmony_ciThe naming scheme of each function follows the pertinent parts of our benchmark 9fb6c1f39Sopenharmony_cinaming scheme (see parent module docs). Namely, it is 10fb6c1f39Sopenharmony_ci 11fb6c1f39Sopenharmony_ci {impl}/{fwd|rev}/{config} 12fb6c1f39Sopenharmony_ci 13fb6c1f39Sopenharmony_ciWhere 'impl' is the underlying implementation and 'config' is the manner of 14fb6c1f39Sopenharmony_cisearch. The slash indicates a module boundary. We use modules for this because 15fb6c1f39Sopenharmony_ciit makes writing macros to define benchmarks for all variants much easier. 16fb6c1f39Sopenharmony_ci*/ 17fb6c1f39Sopenharmony_ci 18fb6c1f39Sopenharmony_ci/// memchr's implementation of memmem. This is the implementation that we hope 19fb6c1f39Sopenharmony_ci/// does approximately as well as all other implementations, and a lot better 20fb6c1f39Sopenharmony_ci/// in at least some cases. 21fb6c1f39Sopenharmony_cipub(crate) mod krate { 22fb6c1f39Sopenharmony_ci pub(crate) fn available(_: &str) -> &'static [&'static str] { 23fb6c1f39Sopenharmony_ci &["reverse", "oneshot", "prebuilt", "oneshotiter", "prebuiltiter"] 24fb6c1f39Sopenharmony_ci } 25fb6c1f39Sopenharmony_ci 26fb6c1f39Sopenharmony_ci pub(crate) mod fwd { 27fb6c1f39Sopenharmony_ci use memchr::memmem; 28fb6c1f39Sopenharmony_ci 29fb6c1f39Sopenharmony_ci pub(crate) fn oneshot(haystack: &str, needle: &str) -> bool { 30fb6c1f39Sopenharmony_ci memmem::find(haystack.as_bytes(), needle.as_bytes()).is_some() 31fb6c1f39Sopenharmony_ci } 32fb6c1f39Sopenharmony_ci 33fb6c1f39Sopenharmony_ci pub(crate) fn prebuilt( 34fb6c1f39Sopenharmony_ci needle: &str, 35fb6c1f39Sopenharmony_ci ) -> impl Fn(&str) -> bool + 'static { 36fb6c1f39Sopenharmony_ci let finder = memmem::Finder::new(needle).into_owned(); 37fb6c1f39Sopenharmony_ci move |h| finder.find(h.as_bytes()).is_some() 38fb6c1f39Sopenharmony_ci } 39fb6c1f39Sopenharmony_ci 40fb6c1f39Sopenharmony_ci pub(crate) fn oneshotiter<'a>( 41fb6c1f39Sopenharmony_ci haystack: &'a str, 42fb6c1f39Sopenharmony_ci needle: &'a str, 43fb6c1f39Sopenharmony_ci ) -> impl Iterator<Item = usize> + 'a { 44fb6c1f39Sopenharmony_ci memmem::find_iter(haystack.as_bytes(), needle.as_bytes()) 45fb6c1f39Sopenharmony_ci } 46fb6c1f39Sopenharmony_ci 47fb6c1f39Sopenharmony_ci pub(crate) fn prebuiltiter(needle: &str) -> PrebuiltIter { 48fb6c1f39Sopenharmony_ci PrebuiltIter(memmem::Finder::new(needle).into_owned()) 49fb6c1f39Sopenharmony_ci } 50fb6c1f39Sopenharmony_ci 51fb6c1f39Sopenharmony_ci #[derive(Debug)] 52fb6c1f39Sopenharmony_ci pub(crate) struct PrebuiltIter(memmem::Finder<'static>); 53fb6c1f39Sopenharmony_ci 54fb6c1f39Sopenharmony_ci impl PrebuiltIter { 55fb6c1f39Sopenharmony_ci pub(crate) fn iter<'a>( 56fb6c1f39Sopenharmony_ci &'a self, 57fb6c1f39Sopenharmony_ci haystack: &'a str, 58fb6c1f39Sopenharmony_ci ) -> impl Iterator<Item = usize> + 'a { 59fb6c1f39Sopenharmony_ci self.0.find_iter(haystack.as_bytes()) 60fb6c1f39Sopenharmony_ci } 61fb6c1f39Sopenharmony_ci } 62fb6c1f39Sopenharmony_ci } 63fb6c1f39Sopenharmony_ci 64fb6c1f39Sopenharmony_ci pub(crate) mod rev { 65fb6c1f39Sopenharmony_ci use memchr::memmem; 66fb6c1f39Sopenharmony_ci 67fb6c1f39Sopenharmony_ci pub(crate) fn oneshot(haystack: &str, needle: &str) -> bool { 68fb6c1f39Sopenharmony_ci memmem::rfind(haystack.as_bytes(), needle.as_bytes()).is_some() 69fb6c1f39Sopenharmony_ci } 70fb6c1f39Sopenharmony_ci 71fb6c1f39Sopenharmony_ci pub(crate) fn prebuilt( 72fb6c1f39Sopenharmony_ci needle: &str, 73fb6c1f39Sopenharmony_ci ) -> impl Fn(&str) -> bool + 'static { 74fb6c1f39Sopenharmony_ci let finder = memmem::FinderRev::new(needle).into_owned(); 75fb6c1f39Sopenharmony_ci move |h| finder.rfind(h.as_bytes()).is_some() 76fb6c1f39Sopenharmony_ci } 77fb6c1f39Sopenharmony_ci 78fb6c1f39Sopenharmony_ci pub(crate) fn oneshotiter<'a>( 79fb6c1f39Sopenharmony_ci haystack: &'a str, 80fb6c1f39Sopenharmony_ci needle: &'a str, 81fb6c1f39Sopenharmony_ci ) -> impl Iterator<Item = usize> + 'a { 82fb6c1f39Sopenharmony_ci memmem::rfind_iter(haystack.as_bytes(), needle.as_bytes()) 83fb6c1f39Sopenharmony_ci } 84fb6c1f39Sopenharmony_ci 85fb6c1f39Sopenharmony_ci pub(crate) fn prebuiltiter(needle: &str) -> PrebuiltIter { 86fb6c1f39Sopenharmony_ci PrebuiltIter(memmem::FinderRev::new(needle).into_owned()) 87fb6c1f39Sopenharmony_ci } 88fb6c1f39Sopenharmony_ci 89fb6c1f39Sopenharmony_ci #[derive(Debug)] 90fb6c1f39Sopenharmony_ci pub(crate) struct PrebuiltIter(memmem::FinderRev<'static>); 91fb6c1f39Sopenharmony_ci 92fb6c1f39Sopenharmony_ci impl PrebuiltIter { 93fb6c1f39Sopenharmony_ci pub(crate) fn iter<'a>( 94fb6c1f39Sopenharmony_ci &'a self, 95fb6c1f39Sopenharmony_ci haystack: &'a str, 96fb6c1f39Sopenharmony_ci ) -> impl Iterator<Item = usize> + 'a { 97fb6c1f39Sopenharmony_ci self.0.rfind_iter(haystack.as_bytes()) 98fb6c1f39Sopenharmony_ci } 99fb6c1f39Sopenharmony_ci } 100fb6c1f39Sopenharmony_ci } 101fb6c1f39Sopenharmony_ci} 102fb6c1f39Sopenharmony_ci 103fb6c1f39Sopenharmony_ci/// memchr's implementation of memmem, but without prefilters enabled. This 104fb6c1f39Sopenharmony_ci/// exists because sometimes prefilters aren't the right choice, and it's good 105fb6c1f39Sopenharmony_ci/// to be able to compare it against prefilter-accelerated searches to see 106fb6c1f39Sopenharmony_ci/// where this might be faster. 107fb6c1f39Sopenharmony_cipub(crate) mod krate_nopre { 108fb6c1f39Sopenharmony_ci pub(crate) fn available(_: &str) -> &'static [&'static str] { 109fb6c1f39Sopenharmony_ci &["reverse", "oneshot", "prebuilt", "oneshotiter", "prebuiltiter"] 110fb6c1f39Sopenharmony_ci } 111fb6c1f39Sopenharmony_ci 112fb6c1f39Sopenharmony_ci pub(crate) mod fwd { 113fb6c1f39Sopenharmony_ci use memchr::memmem; 114fb6c1f39Sopenharmony_ci 115fb6c1f39Sopenharmony_ci fn finder(needle: &[u8]) -> memmem::Finder<'_> { 116fb6c1f39Sopenharmony_ci memmem::FinderBuilder::new() 117fb6c1f39Sopenharmony_ci .prefilter(memmem::Prefilter::None) 118fb6c1f39Sopenharmony_ci .build_forward(needle) 119fb6c1f39Sopenharmony_ci } 120fb6c1f39Sopenharmony_ci 121fb6c1f39Sopenharmony_ci pub(crate) fn oneshot(haystack: &str, needle: &str) -> bool { 122fb6c1f39Sopenharmony_ci finder(needle.as_bytes()).find(haystack.as_bytes()).is_some() 123fb6c1f39Sopenharmony_ci } 124fb6c1f39Sopenharmony_ci 125fb6c1f39Sopenharmony_ci pub(crate) fn prebuilt( 126fb6c1f39Sopenharmony_ci needle: &str, 127fb6c1f39Sopenharmony_ci ) -> impl Fn(&str) -> bool + 'static { 128fb6c1f39Sopenharmony_ci let finder = finder(needle.as_bytes()).into_owned(); 129fb6c1f39Sopenharmony_ci move |h| finder.find(h.as_bytes()).is_some() 130fb6c1f39Sopenharmony_ci } 131fb6c1f39Sopenharmony_ci 132fb6c1f39Sopenharmony_ci pub(crate) fn oneshotiter<'a>( 133fb6c1f39Sopenharmony_ci haystack: &'a str, 134fb6c1f39Sopenharmony_ci needle: &'a str, 135fb6c1f39Sopenharmony_ci ) -> impl Iterator<Item = usize> + 'a { 136fb6c1f39Sopenharmony_ci super::super::iter_from_find( 137fb6c1f39Sopenharmony_ci haystack.as_bytes(), 138fb6c1f39Sopenharmony_ci needle.as_bytes(), 139fb6c1f39Sopenharmony_ci |h, n| finder(n).find(h), 140fb6c1f39Sopenharmony_ci ) 141fb6c1f39Sopenharmony_ci } 142fb6c1f39Sopenharmony_ci 143fb6c1f39Sopenharmony_ci pub(crate) fn prebuiltiter(needle: &str) -> PrebuiltIter { 144fb6c1f39Sopenharmony_ci PrebuiltIter(finder(needle.as_bytes()).into_owned()) 145fb6c1f39Sopenharmony_ci } 146fb6c1f39Sopenharmony_ci 147fb6c1f39Sopenharmony_ci #[derive(Debug)] 148fb6c1f39Sopenharmony_ci pub(crate) struct PrebuiltIter(memmem::Finder<'static>); 149fb6c1f39Sopenharmony_ci 150fb6c1f39Sopenharmony_ci impl PrebuiltIter { 151fb6c1f39Sopenharmony_ci pub(crate) fn iter<'a>( 152fb6c1f39Sopenharmony_ci &'a self, 153fb6c1f39Sopenharmony_ci haystack: &'a str, 154fb6c1f39Sopenharmony_ci ) -> impl Iterator<Item = usize> + 'a { 155fb6c1f39Sopenharmony_ci self.0.find_iter(haystack.as_bytes()) 156fb6c1f39Sopenharmony_ci } 157fb6c1f39Sopenharmony_ci } 158fb6c1f39Sopenharmony_ci } 159fb6c1f39Sopenharmony_ci 160fb6c1f39Sopenharmony_ci // N.B. memrmem/krate_nopre and memrmem/krate should be equivalent for now 161fb6c1f39Sopenharmony_ci // since reverse searching doesn't have any prefilter support. 162fb6c1f39Sopenharmony_ci pub(crate) mod rev { 163fb6c1f39Sopenharmony_ci use memchr::memmem; 164fb6c1f39Sopenharmony_ci 165fb6c1f39Sopenharmony_ci fn finder(needle: &[u8]) -> memmem::FinderRev<'_> { 166fb6c1f39Sopenharmony_ci memmem::FinderBuilder::new() 167fb6c1f39Sopenharmony_ci .prefilter(memmem::Prefilter::None) 168fb6c1f39Sopenharmony_ci .build_reverse(needle) 169fb6c1f39Sopenharmony_ci } 170fb6c1f39Sopenharmony_ci 171fb6c1f39Sopenharmony_ci pub(crate) fn oneshot(haystack: &str, needle: &str) -> bool { 172fb6c1f39Sopenharmony_ci finder(needle.as_bytes()).rfind(haystack.as_bytes()).is_some() 173fb6c1f39Sopenharmony_ci } 174fb6c1f39Sopenharmony_ci 175fb6c1f39Sopenharmony_ci pub(crate) fn prebuilt( 176fb6c1f39Sopenharmony_ci needle: &str, 177fb6c1f39Sopenharmony_ci ) -> impl Fn(&str) -> bool + 'static { 178fb6c1f39Sopenharmony_ci let finder = finder(needle.as_bytes()).into_owned(); 179fb6c1f39Sopenharmony_ci move |h| finder.rfind(h.as_bytes()).is_some() 180fb6c1f39Sopenharmony_ci } 181fb6c1f39Sopenharmony_ci 182fb6c1f39Sopenharmony_ci pub(crate) fn oneshotiter<'a>( 183fb6c1f39Sopenharmony_ci haystack: &'a str, 184fb6c1f39Sopenharmony_ci needle: &'a str, 185fb6c1f39Sopenharmony_ci ) -> impl Iterator<Item = usize> + 'a { 186fb6c1f39Sopenharmony_ci super::super::iter_from_rfind( 187fb6c1f39Sopenharmony_ci haystack.as_bytes(), 188fb6c1f39Sopenharmony_ci needle.as_bytes(), 189fb6c1f39Sopenharmony_ci |h, n| finder(n).rfind(h), 190fb6c1f39Sopenharmony_ci ) 191fb6c1f39Sopenharmony_ci } 192fb6c1f39Sopenharmony_ci 193fb6c1f39Sopenharmony_ci pub(crate) fn prebuiltiter(needle: &str) -> PrebuiltIter { 194fb6c1f39Sopenharmony_ci PrebuiltIter(finder(needle.as_bytes()).into_owned()) 195fb6c1f39Sopenharmony_ci } 196fb6c1f39Sopenharmony_ci 197fb6c1f39Sopenharmony_ci #[derive(Debug)] 198fb6c1f39Sopenharmony_ci pub(crate) struct PrebuiltIter(memmem::FinderRev<'static>); 199fb6c1f39Sopenharmony_ci 200fb6c1f39Sopenharmony_ci impl PrebuiltIter { 201fb6c1f39Sopenharmony_ci pub(crate) fn iter<'a>( 202fb6c1f39Sopenharmony_ci &'a self, 203fb6c1f39Sopenharmony_ci haystack: &'a str, 204fb6c1f39Sopenharmony_ci ) -> impl Iterator<Item = usize> + 'a { 205fb6c1f39Sopenharmony_ci self.0.rfind_iter(haystack.as_bytes()) 206fb6c1f39Sopenharmony_ci } 207fb6c1f39Sopenharmony_ci } 208fb6c1f39Sopenharmony_ci } 209fb6c1f39Sopenharmony_ci} 210fb6c1f39Sopenharmony_ci 211fb6c1f39Sopenharmony_ci/// bstr's implementation of memmem. 212fb6c1f39Sopenharmony_ci/// 213fb6c1f39Sopenharmony_ci/// The implementation in this crate was originally copied from bstr. 214fb6c1f39Sopenharmony_ci/// Eventually, bstr will just use the implementation in this crate, but at time 215fb6c1f39Sopenharmony_ci/// of writing, it was useful to benchmark against the "original" version. 216fb6c1f39Sopenharmony_cipub(crate) mod bstr { 217fb6c1f39Sopenharmony_ci pub(crate) fn available(_: &str) -> &'static [&'static str] { 218fb6c1f39Sopenharmony_ci &["reverse", "oneshot", "prebuilt", "oneshotiter", "prebuiltiter"] 219fb6c1f39Sopenharmony_ci } 220fb6c1f39Sopenharmony_ci 221fb6c1f39Sopenharmony_ci pub(crate) mod fwd { 222fb6c1f39Sopenharmony_ci pub(crate) fn oneshot(haystack: &str, needle: &str) -> bool { 223fb6c1f39Sopenharmony_ci bstr::ByteSlice::find(haystack.as_bytes(), needle.as_bytes()) 224fb6c1f39Sopenharmony_ci .is_some() 225fb6c1f39Sopenharmony_ci } 226fb6c1f39Sopenharmony_ci 227fb6c1f39Sopenharmony_ci pub(crate) fn prebuilt( 228fb6c1f39Sopenharmony_ci needle: &str, 229fb6c1f39Sopenharmony_ci ) -> impl Fn(&str) -> bool + 'static { 230fb6c1f39Sopenharmony_ci let finder = bstr::Finder::new(needle).into_owned(); 231fb6c1f39Sopenharmony_ci move |h| finder.find(h.as_bytes()).is_some() 232fb6c1f39Sopenharmony_ci } 233fb6c1f39Sopenharmony_ci 234fb6c1f39Sopenharmony_ci pub(crate) fn oneshotiter<'a>( 235fb6c1f39Sopenharmony_ci haystack: &'a str, 236fb6c1f39Sopenharmony_ci needle: &'a str, 237fb6c1f39Sopenharmony_ci ) -> impl Iterator<Item = usize> + 'a { 238fb6c1f39Sopenharmony_ci bstr::ByteSlice::find_iter(haystack.as_bytes(), needle.as_bytes()) 239fb6c1f39Sopenharmony_ci } 240fb6c1f39Sopenharmony_ci 241fb6c1f39Sopenharmony_ci pub(crate) fn prebuiltiter(needle: &str) -> PrebuiltIter { 242fb6c1f39Sopenharmony_ci PrebuiltIter(bstr::Finder::new(needle).into_owned()) 243fb6c1f39Sopenharmony_ci } 244fb6c1f39Sopenharmony_ci 245fb6c1f39Sopenharmony_ci #[derive(Debug)] 246fb6c1f39Sopenharmony_ci pub(crate) struct PrebuiltIter(bstr::Finder<'static>); 247fb6c1f39Sopenharmony_ci 248fb6c1f39Sopenharmony_ci impl PrebuiltIter { 249fb6c1f39Sopenharmony_ci pub(crate) fn iter<'a>( 250fb6c1f39Sopenharmony_ci &'a self, 251fb6c1f39Sopenharmony_ci haystack: &'a str, 252fb6c1f39Sopenharmony_ci ) -> impl Iterator<Item = usize> + 'a { 253fb6c1f39Sopenharmony_ci super::super::iter_from_find( 254fb6c1f39Sopenharmony_ci haystack.as_bytes(), 255fb6c1f39Sopenharmony_ci self.0.needle(), 256fb6c1f39Sopenharmony_ci move |h, _| self.0.find(h), 257fb6c1f39Sopenharmony_ci ) 258fb6c1f39Sopenharmony_ci } 259fb6c1f39Sopenharmony_ci } 260fb6c1f39Sopenharmony_ci } 261fb6c1f39Sopenharmony_ci 262fb6c1f39Sopenharmony_ci pub(crate) mod rev { 263fb6c1f39Sopenharmony_ci pub(crate) fn oneshot(haystack: &str, needle: &str) -> bool { 264fb6c1f39Sopenharmony_ci bstr::ByteSlice::rfind(haystack.as_bytes(), needle.as_bytes()) 265fb6c1f39Sopenharmony_ci .is_some() 266fb6c1f39Sopenharmony_ci } 267fb6c1f39Sopenharmony_ci 268fb6c1f39Sopenharmony_ci pub(crate) fn prebuilt( 269fb6c1f39Sopenharmony_ci needle: &str, 270fb6c1f39Sopenharmony_ci ) -> impl Fn(&str) -> bool + 'static { 271fb6c1f39Sopenharmony_ci let finder = bstr::FinderReverse::new(needle).into_owned(); 272fb6c1f39Sopenharmony_ci move |h| finder.rfind(h.as_bytes()).is_some() 273fb6c1f39Sopenharmony_ci } 274fb6c1f39Sopenharmony_ci 275fb6c1f39Sopenharmony_ci pub(crate) fn oneshotiter<'a>( 276fb6c1f39Sopenharmony_ci haystack: &'a str, 277fb6c1f39Sopenharmony_ci needle: &'a str, 278fb6c1f39Sopenharmony_ci ) -> impl Iterator<Item = usize> + 'a { 279fb6c1f39Sopenharmony_ci bstr::ByteSlice::rfind_iter(haystack.as_bytes(), needle.as_bytes()) 280fb6c1f39Sopenharmony_ci } 281fb6c1f39Sopenharmony_ci 282fb6c1f39Sopenharmony_ci pub(crate) fn prebuiltiter(needle: &str) -> PrebuiltIter { 283fb6c1f39Sopenharmony_ci PrebuiltIter(bstr::FinderReverse::new(needle).into_owned()) 284fb6c1f39Sopenharmony_ci } 285fb6c1f39Sopenharmony_ci 286fb6c1f39Sopenharmony_ci #[derive(Debug)] 287fb6c1f39Sopenharmony_ci pub(crate) struct PrebuiltIter(bstr::FinderReverse<'static>); 288fb6c1f39Sopenharmony_ci 289fb6c1f39Sopenharmony_ci impl PrebuiltIter { 290fb6c1f39Sopenharmony_ci pub(crate) fn iter<'a>( 291fb6c1f39Sopenharmony_ci &'a self, 292fb6c1f39Sopenharmony_ci haystack: &'a str, 293fb6c1f39Sopenharmony_ci ) -> impl Iterator<Item = usize> + 'a { 294fb6c1f39Sopenharmony_ci super::super::iter_from_rfind( 295fb6c1f39Sopenharmony_ci haystack.as_bytes(), 296fb6c1f39Sopenharmony_ci self.0.needle(), 297fb6c1f39Sopenharmony_ci move |h, _| self.0.rfind(h), 298fb6c1f39Sopenharmony_ci ) 299fb6c1f39Sopenharmony_ci } 300fb6c1f39Sopenharmony_ci } 301fb6c1f39Sopenharmony_ci } 302fb6c1f39Sopenharmony_ci} 303fb6c1f39Sopenharmony_ci 304fb6c1f39Sopenharmony_ci/// regex's implementation of substring search. 305fb6c1f39Sopenharmony_ci/// 306fb6c1f39Sopenharmony_ci/// regex is where the concept of using heuristics based on an a priori 307fb6c1f39Sopenharmony_ci/// assumption of byte frequency originated. Eventually, regex will just use the 308fb6c1f39Sopenharmony_ci/// implementation in this crate, but it will still be useful to benchmark since 309fb6c1f39Sopenharmony_ci/// regex tends to have higher latency. It would be good to measure that. 310fb6c1f39Sopenharmony_ci/// 311fb6c1f39Sopenharmony_ci/// For regex, we don't provide oneshots, since that requires compiling the 312fb6c1f39Sopenharmony_ci/// regex which we know is going to be ridiculously slow. No real need to 313fb6c1f39Sopenharmony_ci/// measure it I think. 314fb6c1f39Sopenharmony_cipub(crate) mod regex { 315fb6c1f39Sopenharmony_ci pub(crate) fn available(_: &str) -> &'static [&'static str] { 316fb6c1f39Sopenharmony_ci &["prebuilt", "prebuiltiter"] 317fb6c1f39Sopenharmony_ci } 318fb6c1f39Sopenharmony_ci 319fb6c1f39Sopenharmony_ci pub(crate) mod fwd { 320fb6c1f39Sopenharmony_ci pub(crate) fn oneshot(_haystack: &str, _needle: &str) -> bool { 321fb6c1f39Sopenharmony_ci unimplemented!("regex does not support oneshot searches") 322fb6c1f39Sopenharmony_ci } 323fb6c1f39Sopenharmony_ci 324fb6c1f39Sopenharmony_ci pub(crate) fn prebuilt( 325fb6c1f39Sopenharmony_ci needle: &str, 326fb6c1f39Sopenharmony_ci ) -> impl Fn(&str) -> bool + 'static { 327fb6c1f39Sopenharmony_ci let finder = regex::Regex::new(®ex::escape(needle)).unwrap(); 328fb6c1f39Sopenharmony_ci move |h| finder.is_match(h) 329fb6c1f39Sopenharmony_ci } 330fb6c1f39Sopenharmony_ci 331fb6c1f39Sopenharmony_ci pub(crate) fn oneshotiter( 332fb6c1f39Sopenharmony_ci _haystack: &str, 333fb6c1f39Sopenharmony_ci _needle: &str, 334fb6c1f39Sopenharmony_ci ) -> impl Iterator<Item = usize> + 'static { 335fb6c1f39Sopenharmony_ci std::iter::from_fn(move || { 336fb6c1f39Sopenharmony_ci unimplemented!("regex does not support oneshot searches") 337fb6c1f39Sopenharmony_ci }) 338fb6c1f39Sopenharmony_ci } 339fb6c1f39Sopenharmony_ci 340fb6c1f39Sopenharmony_ci pub(crate) fn prebuiltiter(needle: &str) -> PrebuiltIter { 341fb6c1f39Sopenharmony_ci PrebuiltIter(regex::Regex::new(®ex::escape(needle)).unwrap()) 342fb6c1f39Sopenharmony_ci } 343fb6c1f39Sopenharmony_ci 344fb6c1f39Sopenharmony_ci #[derive(Debug)] 345fb6c1f39Sopenharmony_ci pub(crate) struct PrebuiltIter(regex::Regex); 346fb6c1f39Sopenharmony_ci 347fb6c1f39Sopenharmony_ci impl PrebuiltIter { 348fb6c1f39Sopenharmony_ci pub(crate) fn iter<'a>( 349fb6c1f39Sopenharmony_ci &'a self, 350fb6c1f39Sopenharmony_ci haystack: &'a str, 351fb6c1f39Sopenharmony_ci ) -> impl Iterator<Item = usize> + 'a { 352fb6c1f39Sopenharmony_ci self.0.find_iter(haystack).map(|m| m.start()) 353fb6c1f39Sopenharmony_ci } 354fb6c1f39Sopenharmony_ci } 355fb6c1f39Sopenharmony_ci } 356fb6c1f39Sopenharmony_ci 357fb6c1f39Sopenharmony_ci pub(crate) mod rev { 358fb6c1f39Sopenharmony_ci pub(crate) fn oneshot(_haystack: &str, _needle: &str) -> bool { 359fb6c1f39Sopenharmony_ci unimplemented!("regex does not support reverse searches") 360fb6c1f39Sopenharmony_ci } 361fb6c1f39Sopenharmony_ci 362fb6c1f39Sopenharmony_ci pub(crate) fn prebuilt( 363fb6c1f39Sopenharmony_ci _needle: &str, 364fb6c1f39Sopenharmony_ci ) -> impl Fn(&str) -> bool + 'static { 365fb6c1f39Sopenharmony_ci |_| unimplemented!("regex does not support reverse searches") 366fb6c1f39Sopenharmony_ci } 367fb6c1f39Sopenharmony_ci 368fb6c1f39Sopenharmony_ci pub(crate) fn oneshotiter( 369fb6c1f39Sopenharmony_ci _haystack: &str, 370fb6c1f39Sopenharmony_ci _needle: &str, 371fb6c1f39Sopenharmony_ci ) -> impl Iterator<Item = usize> + 'static { 372fb6c1f39Sopenharmony_ci std::iter::from_fn(move || { 373fb6c1f39Sopenharmony_ci unimplemented!("regex does not support reverse searches") 374fb6c1f39Sopenharmony_ci }) 375fb6c1f39Sopenharmony_ci } 376fb6c1f39Sopenharmony_ci 377fb6c1f39Sopenharmony_ci pub(crate) fn prebuiltiter(_needle: &str) -> super::super::NoIter { 378fb6c1f39Sopenharmony_ci unimplemented!("regex does not support reverse searches") 379fb6c1f39Sopenharmony_ci } 380fb6c1f39Sopenharmony_ci } 381fb6c1f39Sopenharmony_ci} 382fb6c1f39Sopenharmony_ci 383fb6c1f39Sopenharmony_ci/// std's substring search implementation. 384fb6c1f39Sopenharmony_ci/// 385fb6c1f39Sopenharmony_ci/// std uses Two-Way like this crate, but doesn't have any prefilter 386fb6c1f39Sopenharmony_ci/// heuristics. 387fb6c1f39Sopenharmony_ci/// 388fb6c1f39Sopenharmony_ci/// std doesn't have any way to amortize the construction of the searcher, so 389fb6c1f39Sopenharmony_ci/// we can't implement any of the prebuilt routines. 390fb6c1f39Sopenharmony_cipub(crate) mod stud { 391fb6c1f39Sopenharmony_ci pub(crate) fn available(_: &str) -> &'static [&'static str] { 392fb6c1f39Sopenharmony_ci &["reverse", "oneshot", "oneshotiter"] 393fb6c1f39Sopenharmony_ci } 394fb6c1f39Sopenharmony_ci 395fb6c1f39Sopenharmony_ci pub(crate) mod fwd { 396fb6c1f39Sopenharmony_ci pub(crate) fn oneshot(haystack: &str, needle: &str) -> bool { 397fb6c1f39Sopenharmony_ci haystack.contains(needle) 398fb6c1f39Sopenharmony_ci } 399fb6c1f39Sopenharmony_ci 400fb6c1f39Sopenharmony_ci pub(crate) fn prebuilt( 401fb6c1f39Sopenharmony_ci _needle: &str, 402fb6c1f39Sopenharmony_ci ) -> impl Fn(&str) -> bool + 'static { 403fb6c1f39Sopenharmony_ci |_| unimplemented!("std does not support prebuilt searches") 404fb6c1f39Sopenharmony_ci } 405fb6c1f39Sopenharmony_ci 406fb6c1f39Sopenharmony_ci pub(crate) fn oneshotiter<'a>( 407fb6c1f39Sopenharmony_ci haystack: &'a str, 408fb6c1f39Sopenharmony_ci needle: &'a str, 409fb6c1f39Sopenharmony_ci ) -> impl Iterator<Item = usize> + 'a { 410fb6c1f39Sopenharmony_ci haystack.match_indices(needle).map(|(i, _)| i) 411fb6c1f39Sopenharmony_ci } 412fb6c1f39Sopenharmony_ci 413fb6c1f39Sopenharmony_ci pub(crate) fn prebuiltiter(_needle: &str) -> super::super::NoIter { 414fb6c1f39Sopenharmony_ci super::super::NoIter { imp: "std" } 415fb6c1f39Sopenharmony_ci } 416fb6c1f39Sopenharmony_ci } 417fb6c1f39Sopenharmony_ci 418fb6c1f39Sopenharmony_ci pub(crate) mod rev { 419fb6c1f39Sopenharmony_ci pub(crate) fn oneshot(haystack: &str, needle: &str) -> bool { 420fb6c1f39Sopenharmony_ci haystack.contains(needle) 421fb6c1f39Sopenharmony_ci } 422fb6c1f39Sopenharmony_ci 423fb6c1f39Sopenharmony_ci pub(crate) fn prebuilt( 424fb6c1f39Sopenharmony_ci _needle: &str, 425fb6c1f39Sopenharmony_ci ) -> impl Fn(&str) -> bool + 'static { 426fb6c1f39Sopenharmony_ci |_| unimplemented!("std does not support prebuilt searches") 427fb6c1f39Sopenharmony_ci } 428fb6c1f39Sopenharmony_ci 429fb6c1f39Sopenharmony_ci pub(crate) fn oneshotiter<'a>( 430fb6c1f39Sopenharmony_ci haystack: &'a str, 431fb6c1f39Sopenharmony_ci needle: &'a str, 432fb6c1f39Sopenharmony_ci ) -> impl Iterator<Item = usize> + 'a { 433fb6c1f39Sopenharmony_ci haystack.rmatch_indices(needle).map(|(i, _)| i) 434fb6c1f39Sopenharmony_ci } 435fb6c1f39Sopenharmony_ci 436fb6c1f39Sopenharmony_ci pub(crate) fn prebuiltiter(_needle: &str) -> super::super::NoIter { 437fb6c1f39Sopenharmony_ci super::super::NoIter { imp: "std" } 438fb6c1f39Sopenharmony_ci } 439fb6c1f39Sopenharmony_ci } 440fb6c1f39Sopenharmony_ci} 441fb6c1f39Sopenharmony_ci 442fb6c1f39Sopenharmony_ci/// Substring search from the twoway crate. 443fb6c1f39Sopenharmony_ci/// 444fb6c1f39Sopenharmony_ci/// twoway uses, obviously, Two-Way as an implementation. AIUI, it was taken 445fb6c1f39Sopenharmony_ci/// from std at some point but heavily modified to support a prefilter via 446fb6c1f39Sopenharmony_ci/// PCMPESTRI from the SSE 4.2 ISA extension. (And also uses memchr for 447fb6c1f39Sopenharmony_ci/// single-byte needles.) 448fb6c1f39Sopenharmony_ci/// 449fb6c1f39Sopenharmony_ci/// Like std, there is no way to amortize the construction of the searcher, so 450fb6c1f39Sopenharmony_ci/// we can't implement any of the prebuilt routines. 451fb6c1f39Sopenharmony_cipub(crate) mod twoway { 452fb6c1f39Sopenharmony_ci pub(crate) fn available(_: &str) -> &'static [&'static str] { 453fb6c1f39Sopenharmony_ci &["reverse", "oneshot", "oneshotiter"] 454fb6c1f39Sopenharmony_ci } 455fb6c1f39Sopenharmony_ci 456fb6c1f39Sopenharmony_ci pub(crate) mod fwd { 457fb6c1f39Sopenharmony_ci pub(crate) fn oneshot(haystack: &str, needle: &str) -> bool { 458fb6c1f39Sopenharmony_ci twoway::find_bytes(haystack.as_bytes(), needle.as_bytes()) 459fb6c1f39Sopenharmony_ci .is_some() 460fb6c1f39Sopenharmony_ci } 461fb6c1f39Sopenharmony_ci 462fb6c1f39Sopenharmony_ci pub(crate) fn prebuilt( 463fb6c1f39Sopenharmony_ci _needle: &str, 464fb6c1f39Sopenharmony_ci ) -> impl Fn(&str) -> bool + 'static { 465fb6c1f39Sopenharmony_ci |_| unimplemented!("twoway does not support prebuilt searches") 466fb6c1f39Sopenharmony_ci } 467fb6c1f39Sopenharmony_ci 468fb6c1f39Sopenharmony_ci pub(crate) fn oneshotiter<'a>( 469fb6c1f39Sopenharmony_ci haystack: &'a str, 470fb6c1f39Sopenharmony_ci needle: &'a str, 471fb6c1f39Sopenharmony_ci ) -> impl Iterator<Item = usize> + 'a { 472fb6c1f39Sopenharmony_ci super::super::iter_from_find( 473fb6c1f39Sopenharmony_ci haystack.as_bytes(), 474fb6c1f39Sopenharmony_ci needle.as_bytes(), 475fb6c1f39Sopenharmony_ci twoway::find_bytes, 476fb6c1f39Sopenharmony_ci ) 477fb6c1f39Sopenharmony_ci } 478fb6c1f39Sopenharmony_ci 479fb6c1f39Sopenharmony_ci pub(crate) fn prebuiltiter(_needle: &str) -> super::super::NoIter { 480fb6c1f39Sopenharmony_ci super::super::NoIter { imp: "twoway" } 481fb6c1f39Sopenharmony_ci } 482fb6c1f39Sopenharmony_ci } 483fb6c1f39Sopenharmony_ci 484fb6c1f39Sopenharmony_ci pub(crate) mod rev { 485fb6c1f39Sopenharmony_ci pub(crate) fn oneshot(haystack: &str, needle: &str) -> bool { 486fb6c1f39Sopenharmony_ci twoway::rfind_bytes(haystack.as_bytes(), needle.as_bytes()) 487fb6c1f39Sopenharmony_ci .is_some() 488fb6c1f39Sopenharmony_ci } 489fb6c1f39Sopenharmony_ci 490fb6c1f39Sopenharmony_ci pub(crate) fn prebuilt( 491fb6c1f39Sopenharmony_ci _needle: &str, 492fb6c1f39Sopenharmony_ci ) -> impl Fn(&str) -> bool + 'static { 493fb6c1f39Sopenharmony_ci |_| unimplemented!("twoway does not support prebuilt searches") 494fb6c1f39Sopenharmony_ci } 495fb6c1f39Sopenharmony_ci 496fb6c1f39Sopenharmony_ci pub(crate) fn oneshotiter<'a>( 497fb6c1f39Sopenharmony_ci haystack: &'a str, 498fb6c1f39Sopenharmony_ci needle: &'a str, 499fb6c1f39Sopenharmony_ci ) -> impl Iterator<Item = usize> + 'a { 500fb6c1f39Sopenharmony_ci super::super::iter_from_rfind( 501fb6c1f39Sopenharmony_ci haystack.as_bytes(), 502fb6c1f39Sopenharmony_ci needle.as_bytes(), 503fb6c1f39Sopenharmony_ci twoway::rfind_bytes, 504fb6c1f39Sopenharmony_ci ) 505fb6c1f39Sopenharmony_ci } 506fb6c1f39Sopenharmony_ci 507fb6c1f39Sopenharmony_ci pub(crate) fn prebuiltiter(_needle: &str) -> super::super::NoIter { 508fb6c1f39Sopenharmony_ci super::super::NoIter { imp: "twoway" } 509fb6c1f39Sopenharmony_ci } 510fb6c1f39Sopenharmony_ci } 511fb6c1f39Sopenharmony_ci} 512fb6c1f39Sopenharmony_ci 513fb6c1f39Sopenharmony_ci/// Substring search from the sliceslice crate. 514fb6c1f39Sopenharmony_ci/// 515fb6c1f39Sopenharmony_ci/// This crate is what inspired me to write a vectorized memmem implementation 516fb6c1f39Sopenharmony_ci/// in the memchr crate in the first place. In particular, it exposed some 517fb6c1f39Sopenharmony_ci/// serious weaknesses in my implementation in the bstr crate. 518fb6c1f39Sopenharmony_ci/// 519fb6c1f39Sopenharmony_ci/// sliceslice doesn't actually do anything "new" other 520fb6c1f39Sopenharmony_ci/// than bringing a long known SIMD algorithm to Rust: 521fb6c1f39Sopenharmony_ci/// http://0x80.pl/articles/simd-strfind.html#algorithm-1-generic-simd 522fb6c1f39Sopenharmony_ci/// 523fb6c1f39Sopenharmony_ci/// The main thrust of the algorithm is that it picks a couple of bytes in the 524fb6c1f39Sopenharmony_ci/// needle and uses SIMD to check whether those two bytes occur in the haystack 525fb6c1f39Sopenharmony_ci/// in a way that could lead to a match. If so, then you do a simple memcmp 526fb6c1f39Sopenharmony_ci/// confirmation step. The main problem with this algorithm is that its worst 527fb6c1f39Sopenharmony_ci/// case is multiplicative: that confirmatory step can become quite costly if 528fb6c1f39Sopenharmony_ci/// the SIMD prefilter isn't effective. The elegance of this method, however, 529fb6c1f39Sopenharmony_ci/// is that the prefilter is routinely effective. 530fb6c1f39Sopenharmony_ci/// 531fb6c1f39Sopenharmony_ci/// The essence of memchr's implementation of memmem comes from sliceslice, 532fb6c1f39Sopenharmony_ci/// but also from regex's original idea to use heuristics based on an a priori 533fb6c1f39Sopenharmony_ci/// assumption of relative byte frequency AND from bstr's desire to have a 534fb6c1f39Sopenharmony_ci/// constant space and worst case O(m+n) substring search. My claim is that 535fb6c1f39Sopenharmony_ci/// it is the best of all words, and that's why this benchmark suite is so 536fb6c1f39Sopenharmony_ci/// comprehensive. There are a lot of cases and implementations to test. 537fb6c1f39Sopenharmony_ci/// 538fb6c1f39Sopenharmony_ci/// NOTE: The API of sliceslice is quite constrained. My guess is that it was 539fb6c1f39Sopenharmony_ci/// designed for a very specific use case, and the API is heavily constrained 540fb6c1f39Sopenharmony_ci/// to that use case (whatever it is). While its API doesn't provide any 541fb6c1f39Sopenharmony_ci/// oneshot routines, we emulate them. (Its main problem is that every such 542fb6c1f39Sopenharmony_ci/// search requires copying the needle into a fresh allocation. The memchr 543fb6c1f39Sopenharmony_ci/// crate avoids that problem by being generic over the needle: it can be owned 544fb6c1f39Sopenharmony_ci/// or borrowed.) Also, since the API only enables testing whether a substring 545fb6c1f39Sopenharmony_ci/// exists or not, we can't benchmark iteration. 546fb6c1f39Sopenharmony_ci/// 547fb6c1f39Sopenharmony_ci/// NOTE: sliceslice only works on x86_64 CPUs with AVX enabled. So not only 548fb6c1f39Sopenharmony_ci/// do we conditionally compile the routines below, but we only run these 549fb6c1f39Sopenharmony_ci/// benchmarks when AVX2 is available. 550fb6c1f39Sopenharmony_ci#[cfg(target_arch = "x86_64")] 551fb6c1f39Sopenharmony_cipub(crate) mod sliceslice { 552fb6c1f39Sopenharmony_ci pub(crate) fn available(needle: &str) -> &'static [&'static str] { 553fb6c1f39Sopenharmony_ci // Apparently sliceslice doesn't support searching with an empty 554fb6c1f39Sopenharmony_ci // needle. Sheesh. 555fb6c1f39Sopenharmony_ci if !needle.is_empty() && is_x86_feature_detected!("avx2") { 556fb6c1f39Sopenharmony_ci &["oneshot", "prebuilt"] 557fb6c1f39Sopenharmony_ci } else { 558fb6c1f39Sopenharmony_ci &[] 559fb6c1f39Sopenharmony_ci } 560fb6c1f39Sopenharmony_ci } 561fb6c1f39Sopenharmony_ci 562fb6c1f39Sopenharmony_ci pub(crate) mod fwd { 563fb6c1f39Sopenharmony_ci pub(crate) fn oneshot(haystack: &str, needle: &str) -> bool { 564fb6c1f39Sopenharmony_ci if !is_x86_feature_detected!("avx2") { 565fb6c1f39Sopenharmony_ci unreachable!("sliceslice cannot be called without avx2"); 566fb6c1f39Sopenharmony_ci } 567fb6c1f39Sopenharmony_ci let needle = needle.as_bytes(); 568fb6c1f39Sopenharmony_ci // SAFETY: This code path is only entered when AVX2 is enabled, 569fb6c1f39Sopenharmony_ci // which is the only requirement for using DynamicAvx2Searcher. 570fb6c1f39Sopenharmony_ci unsafe { 571fb6c1f39Sopenharmony_ci let finder = sliceslice::x86::DynamicAvx2Searcher::new(needle); 572fb6c1f39Sopenharmony_ci finder.search_in(haystack.as_bytes()) 573fb6c1f39Sopenharmony_ci } 574fb6c1f39Sopenharmony_ci } 575fb6c1f39Sopenharmony_ci 576fb6c1f39Sopenharmony_ci pub(crate) fn prebuilt( 577fb6c1f39Sopenharmony_ci needle: &str, 578fb6c1f39Sopenharmony_ci ) -> impl Fn(&str) -> bool + 'static { 579fb6c1f39Sopenharmony_ci if !is_x86_feature_detected!("avx2") { 580fb6c1f39Sopenharmony_ci unreachable!("sliceslice cannot be called without avx2"); 581fb6c1f39Sopenharmony_ci } 582fb6c1f39Sopenharmony_ci let needle = needle.as_bytes().to_vec(); 583fb6c1f39Sopenharmony_ci // SAFETY: This code path is only entered when AVX2 is enabled, 584fb6c1f39Sopenharmony_ci // which is the only requirement for using DynamicAvx2Searcher. 585fb6c1f39Sopenharmony_ci unsafe { 586fb6c1f39Sopenharmony_ci let finder = sliceslice::x86::DynamicAvx2Searcher::new(needle); 587fb6c1f39Sopenharmony_ci move |h| finder.search_in(h.as_bytes()) 588fb6c1f39Sopenharmony_ci } 589fb6c1f39Sopenharmony_ci } 590fb6c1f39Sopenharmony_ci 591fb6c1f39Sopenharmony_ci pub(crate) fn oneshotiter( 592fb6c1f39Sopenharmony_ci _haystack: &str, 593fb6c1f39Sopenharmony_ci _needle: &str, 594fb6c1f39Sopenharmony_ci ) -> impl Iterator<Item = usize> + 'static { 595fb6c1f39Sopenharmony_ci std::iter::from_fn(move || { 596fb6c1f39Sopenharmony_ci unimplemented!("sliceslice doesn't not support iteration") 597fb6c1f39Sopenharmony_ci }) 598fb6c1f39Sopenharmony_ci } 599fb6c1f39Sopenharmony_ci 600fb6c1f39Sopenharmony_ci pub(crate) fn prebuiltiter(_needle: &str) -> super::super::NoIter { 601fb6c1f39Sopenharmony_ci unimplemented!("sliceslice doesn't support prebuilt iteration") 602fb6c1f39Sopenharmony_ci } 603fb6c1f39Sopenharmony_ci } 604fb6c1f39Sopenharmony_ci 605fb6c1f39Sopenharmony_ci pub(crate) mod rev { 606fb6c1f39Sopenharmony_ci pub(crate) fn oneshot(_haystack: &str, _needle: &str) -> bool { 607fb6c1f39Sopenharmony_ci unimplemented!("sliceslice does not support reverse searches") 608fb6c1f39Sopenharmony_ci } 609fb6c1f39Sopenharmony_ci 610fb6c1f39Sopenharmony_ci pub(crate) fn prebuilt( 611fb6c1f39Sopenharmony_ci _needle: &str, 612fb6c1f39Sopenharmony_ci ) -> impl Fn(&str) -> bool + 'static { 613fb6c1f39Sopenharmony_ci |_| unimplemented!("sliceslice does not support reverse searches") 614fb6c1f39Sopenharmony_ci } 615fb6c1f39Sopenharmony_ci 616fb6c1f39Sopenharmony_ci pub(crate) fn oneshotiter( 617fb6c1f39Sopenharmony_ci _haystack: &str, 618fb6c1f39Sopenharmony_ci _needle: &str, 619fb6c1f39Sopenharmony_ci ) -> impl Iterator<Item = usize> + 'static { 620fb6c1f39Sopenharmony_ci std::iter::from_fn(move || { 621fb6c1f39Sopenharmony_ci unimplemented!("sliceslice does not support reverse searches") 622fb6c1f39Sopenharmony_ci }) 623fb6c1f39Sopenharmony_ci } 624fb6c1f39Sopenharmony_ci 625fb6c1f39Sopenharmony_ci pub(crate) fn prebuiltiter(_needle: &str) -> super::super::NoIter { 626fb6c1f39Sopenharmony_ci unimplemented!("sliceslice does not support reverse searches") 627fb6c1f39Sopenharmony_ci } 628fb6c1f39Sopenharmony_ci } 629fb6c1f39Sopenharmony_ci} 630fb6c1f39Sopenharmony_ci 631fb6c1f39Sopenharmony_ci#[cfg(not(target_arch = "x86_64"))] 632fb6c1f39Sopenharmony_cipub(crate) mod sliceslice { 633fb6c1f39Sopenharmony_ci pub(crate) fn available(_: &str) -> &'static [&'static str] { 634fb6c1f39Sopenharmony_ci &[] 635fb6c1f39Sopenharmony_ci } 636fb6c1f39Sopenharmony_ci 637fb6c1f39Sopenharmony_ci pub(crate) mod fwd { 638fb6c1f39Sopenharmony_ci pub(crate) fn oneshot(_: &str, _: &str) -> bool { 639fb6c1f39Sopenharmony_ci unimplemented!("sliceslice only runs on x86") 640fb6c1f39Sopenharmony_ci } 641fb6c1f39Sopenharmony_ci 642fb6c1f39Sopenharmony_ci pub(crate) fn prebuilt(_: &str) -> impl Fn(&str) -> bool + 'static { 643fb6c1f39Sopenharmony_ci if true { 644fb6c1f39Sopenharmony_ci unimplemented!("sliceslice only runs on x86") 645fb6c1f39Sopenharmony_ci } 646fb6c1f39Sopenharmony_ci |_| false 647fb6c1f39Sopenharmony_ci } 648fb6c1f39Sopenharmony_ci 649fb6c1f39Sopenharmony_ci pub(crate) fn oneshotiter<'a>( 650fb6c1f39Sopenharmony_ci _haystack: &'a str, 651fb6c1f39Sopenharmony_ci _needle: &'a str, 652fb6c1f39Sopenharmony_ci ) -> impl Iterator<Item = usize> + 'static { 653fb6c1f39Sopenharmony_ci std::iter::from_fn(move || { 654fb6c1f39Sopenharmony_ci unimplemented!("sliceslice only runs on x86") 655fb6c1f39Sopenharmony_ci }) 656fb6c1f39Sopenharmony_ci } 657fb6c1f39Sopenharmony_ci 658fb6c1f39Sopenharmony_ci pub(crate) fn prebuiltiter(_needle: &str) -> super::super::NoIter { 659fb6c1f39Sopenharmony_ci unimplemented!("sliceslice only runs on x86") 660fb6c1f39Sopenharmony_ci } 661fb6c1f39Sopenharmony_ci } 662fb6c1f39Sopenharmony_ci 663fb6c1f39Sopenharmony_ci pub(crate) mod rev { 664fb6c1f39Sopenharmony_ci pub(crate) fn oneshot(_haystack: &str, _needle: &str) -> bool { 665fb6c1f39Sopenharmony_ci unimplemented!("sliceslice does not support reverse searches") 666fb6c1f39Sopenharmony_ci } 667fb6c1f39Sopenharmony_ci 668fb6c1f39Sopenharmony_ci pub(crate) fn prebuilt( 669fb6c1f39Sopenharmony_ci _needle: &str, 670fb6c1f39Sopenharmony_ci ) -> impl Fn(&str) -> bool + 'static { 671fb6c1f39Sopenharmony_ci |_| unimplemented!("sliceslice does not support reverse searches") 672fb6c1f39Sopenharmony_ci } 673fb6c1f39Sopenharmony_ci 674fb6c1f39Sopenharmony_ci pub(crate) fn oneshotiter( 675fb6c1f39Sopenharmony_ci _haystack: &str, 676fb6c1f39Sopenharmony_ci _needle: &str, 677fb6c1f39Sopenharmony_ci ) -> impl Iterator<Item = usize> + 'static { 678fb6c1f39Sopenharmony_ci std::iter::from_fn(move || { 679fb6c1f39Sopenharmony_ci unimplemented!("sliceslice does not support reverse searches") 680fb6c1f39Sopenharmony_ci }) 681fb6c1f39Sopenharmony_ci } 682fb6c1f39Sopenharmony_ci 683fb6c1f39Sopenharmony_ci pub(crate) fn prebuiltiter(_needle: &str) -> super::super::NoIter { 684fb6c1f39Sopenharmony_ci unimplemented!("sliceslice does not support reverse searches") 685fb6c1f39Sopenharmony_ci } 686fb6c1f39Sopenharmony_ci } 687fb6c1f39Sopenharmony_ci} 688fb6c1f39Sopenharmony_ci 689fb6c1f39Sopenharmony_ci/// libc's substring search implementation. 690fb6c1f39Sopenharmony_ci/// 691fb6c1f39Sopenharmony_ci/// libc doesn't have any way to amortize the construction of the searcher, so 692fb6c1f39Sopenharmony_ci/// we can't implement any of the prebuilt routines. 693fb6c1f39Sopenharmony_cipub(crate) mod libc { 694fb6c1f39Sopenharmony_ci pub(crate) fn available(_: &str) -> &'static [&'static str] { 695fb6c1f39Sopenharmony_ci &["oneshot", "oneshotiter"] 696fb6c1f39Sopenharmony_ci } 697fb6c1f39Sopenharmony_ci 698fb6c1f39Sopenharmony_ci pub(crate) mod fwd { 699fb6c1f39Sopenharmony_ci #[cfg(target_arch = "wasm32")] 700fb6c1f39Sopenharmony_ci extern "C" { 701fb6c1f39Sopenharmony_ci fn memmem( 702fb6c1f39Sopenharmony_ci haystack: *const libc::c_void, 703fb6c1f39Sopenharmony_ci haystack_len: usize, 704fb6c1f39Sopenharmony_ci needle: *const libc::c_void, 705fb6c1f39Sopenharmony_ci needle_len: usize, 706fb6c1f39Sopenharmony_ci ) -> *const libc::c_void; 707fb6c1f39Sopenharmony_ci } 708fb6c1f39Sopenharmony_ci #[cfg(not(target_arch = "wasm32"))] 709fb6c1f39Sopenharmony_ci use libc::memmem; 710fb6c1f39Sopenharmony_ci 711fb6c1f39Sopenharmony_ci fn find(haystack: &[u8], needle: &[u8]) -> Option<usize> { 712fb6c1f39Sopenharmony_ci let p = unsafe { 713fb6c1f39Sopenharmony_ci memmem( 714fb6c1f39Sopenharmony_ci haystack.as_ptr() as *const libc::c_void, 715fb6c1f39Sopenharmony_ci haystack.len(), 716fb6c1f39Sopenharmony_ci needle.as_ptr() as *const libc::c_void, 717fb6c1f39Sopenharmony_ci needle.len(), 718fb6c1f39Sopenharmony_ci ) 719fb6c1f39Sopenharmony_ci }; 720fb6c1f39Sopenharmony_ci if p.is_null() { 721fb6c1f39Sopenharmony_ci None 722fb6c1f39Sopenharmony_ci } else { 723fb6c1f39Sopenharmony_ci Some(p as usize - (haystack.as_ptr() as usize)) 724fb6c1f39Sopenharmony_ci } 725fb6c1f39Sopenharmony_ci } 726fb6c1f39Sopenharmony_ci 727fb6c1f39Sopenharmony_ci pub(crate) fn oneshot(haystack: &str, needle: &str) -> bool { 728fb6c1f39Sopenharmony_ci find(haystack.as_bytes(), needle.as_bytes()).is_some() 729fb6c1f39Sopenharmony_ci } 730fb6c1f39Sopenharmony_ci 731fb6c1f39Sopenharmony_ci pub(crate) fn prebuilt( 732fb6c1f39Sopenharmony_ci _needle: &str, 733fb6c1f39Sopenharmony_ci ) -> impl Fn(&str) -> bool + 'static { 734fb6c1f39Sopenharmony_ci |_| unimplemented!("std does not support prebuilt searches") 735fb6c1f39Sopenharmony_ci } 736fb6c1f39Sopenharmony_ci 737fb6c1f39Sopenharmony_ci pub(crate) fn oneshotiter<'a>( 738fb6c1f39Sopenharmony_ci haystack: &'a str, 739fb6c1f39Sopenharmony_ci needle: &'a str, 740fb6c1f39Sopenharmony_ci ) -> impl Iterator<Item = usize> + 'a { 741fb6c1f39Sopenharmony_ci super::super::iter_from_find( 742fb6c1f39Sopenharmony_ci haystack.as_bytes(), 743fb6c1f39Sopenharmony_ci needle.as_bytes(), 744fb6c1f39Sopenharmony_ci find, 745fb6c1f39Sopenharmony_ci ) 746fb6c1f39Sopenharmony_ci } 747fb6c1f39Sopenharmony_ci 748fb6c1f39Sopenharmony_ci pub(crate) fn prebuiltiter(_needle: &str) -> super::super::NoIter { 749fb6c1f39Sopenharmony_ci super::super::NoIter { imp: "libc" } 750fb6c1f39Sopenharmony_ci } 751fb6c1f39Sopenharmony_ci } 752fb6c1f39Sopenharmony_ci 753fb6c1f39Sopenharmony_ci pub(crate) mod rev { 754fb6c1f39Sopenharmony_ci pub(crate) fn oneshot(_haystack: &str, _needle: &str) -> bool { 755fb6c1f39Sopenharmony_ci unimplemented!("libc does not support reverse searches") 756fb6c1f39Sopenharmony_ci } 757fb6c1f39Sopenharmony_ci 758fb6c1f39Sopenharmony_ci pub(crate) fn prebuilt( 759fb6c1f39Sopenharmony_ci _needle: &str, 760fb6c1f39Sopenharmony_ci ) -> impl Fn(&str) -> bool + 'static { 761fb6c1f39Sopenharmony_ci |_| unimplemented!("libc does not support reverse searches") 762fb6c1f39Sopenharmony_ci } 763fb6c1f39Sopenharmony_ci 764fb6c1f39Sopenharmony_ci pub(crate) fn oneshotiter<'a>( 765fb6c1f39Sopenharmony_ci _haystack: &'a str, 766fb6c1f39Sopenharmony_ci _needle: &'a str, 767fb6c1f39Sopenharmony_ci ) -> impl Iterator<Item = usize> + 'a { 768fb6c1f39Sopenharmony_ci std::iter::from_fn(move || { 769fb6c1f39Sopenharmony_ci unimplemented!("libc does not support reverse searches") 770fb6c1f39Sopenharmony_ci }) 771fb6c1f39Sopenharmony_ci } 772fb6c1f39Sopenharmony_ci 773fb6c1f39Sopenharmony_ci pub(crate) fn prebuiltiter(_needle: &str) -> super::super::NoIter { 774fb6c1f39Sopenharmony_ci unimplemented!("libc does not support reverse searches") 775fb6c1f39Sopenharmony_ci } 776fb6c1f39Sopenharmony_ci } 777fb6c1f39Sopenharmony_ci} 778fb6c1f39Sopenharmony_ci 779fb6c1f39Sopenharmony_ci/// An iterator that looks like a PrebuilIter API-wise, but panics if it's 780fb6c1f39Sopenharmony_ci/// called. This should be used for implementations that don't support 781fb6c1f39Sopenharmony_ci/// prebuilt iteration. 782fb6c1f39Sopenharmony_ci#[derive(Debug)] 783fb6c1f39Sopenharmony_cipub(crate) struct NoIter { 784fb6c1f39Sopenharmony_ci /// The name of the impl to use in the panic message in case it is invoked 785fb6c1f39Sopenharmony_ci /// by mistake. (But the benchmark harness should not invoke it, assuming 786fb6c1f39Sopenharmony_ci /// each impl's 'available' function is correct. 787fb6c1f39Sopenharmony_ci imp: &'static str, 788fb6c1f39Sopenharmony_ci} 789fb6c1f39Sopenharmony_ci 790fb6c1f39Sopenharmony_ciimpl NoIter { 791fb6c1f39Sopenharmony_ci pub(crate) fn iter( 792fb6c1f39Sopenharmony_ci &self, 793fb6c1f39Sopenharmony_ci _: &str, 794fb6c1f39Sopenharmony_ci ) -> impl Iterator<Item = usize> + 'static { 795fb6c1f39Sopenharmony_ci let imp = self.imp; 796fb6c1f39Sopenharmony_ci std::iter::from_fn(move || { 797fb6c1f39Sopenharmony_ci unimplemented!("{} does not support prebuilt iteration", imp) 798fb6c1f39Sopenharmony_ci }) 799fb6c1f39Sopenharmony_ci } 800fb6c1f39Sopenharmony_ci} 801fb6c1f39Sopenharmony_ci 802fb6c1f39Sopenharmony_ci/// Accepts a corpus and a needle and a routine that implements substring 803fb6c1f39Sopenharmony_ci/// search, and returns an iterator over all matches. This is useful for 804fb6c1f39Sopenharmony_ci/// benchmarking "find all matches" for substring search implementations that 805fb6c1f39Sopenharmony_ci/// don't expose a native way to do this. 806fb6c1f39Sopenharmony_ci/// 807fb6c1f39Sopenharmony_ci/// The closure given takes two parameters: the corpus and needle, in that 808fb6c1f39Sopenharmony_ci/// order. 809fb6c1f39Sopenharmony_cifn iter_from_find<'a>( 810fb6c1f39Sopenharmony_ci haystack: &'a [u8], 811fb6c1f39Sopenharmony_ci needle: &'a [u8], 812fb6c1f39Sopenharmony_ci mut find: impl FnMut(&[u8], &[u8]) -> Option<usize> + 'a, 813fb6c1f39Sopenharmony_ci) -> impl Iterator<Item = usize> + 'a { 814fb6c1f39Sopenharmony_ci let mut pos = 0; 815fb6c1f39Sopenharmony_ci std::iter::from_fn(move || { 816fb6c1f39Sopenharmony_ci if pos > haystack.len() { 817fb6c1f39Sopenharmony_ci return None; 818fb6c1f39Sopenharmony_ci } 819fb6c1f39Sopenharmony_ci match find(&haystack[pos..], needle) { 820fb6c1f39Sopenharmony_ci None => None, 821fb6c1f39Sopenharmony_ci Some(i) => { 822fb6c1f39Sopenharmony_ci let found = pos + i; 823fb6c1f39Sopenharmony_ci // We always need to add at least 1, in case of an empty needle. 824fb6c1f39Sopenharmony_ci pos += i + std::cmp::max(1, needle.len()); 825fb6c1f39Sopenharmony_ci Some(found) 826fb6c1f39Sopenharmony_ci } 827fb6c1f39Sopenharmony_ci } 828fb6c1f39Sopenharmony_ci }) 829fb6c1f39Sopenharmony_ci} 830fb6c1f39Sopenharmony_ci 831fb6c1f39Sopenharmony_ci/// Like iter_from_find, but for reverse searching. 832fb6c1f39Sopenharmony_cifn iter_from_rfind<'a>( 833fb6c1f39Sopenharmony_ci haystack: &'a [u8], 834fb6c1f39Sopenharmony_ci needle: &'a [u8], 835fb6c1f39Sopenharmony_ci mut rfind: impl FnMut(&[u8], &[u8]) -> Option<usize> + 'a, 836fb6c1f39Sopenharmony_ci) -> impl Iterator<Item = usize> + 'a { 837fb6c1f39Sopenharmony_ci let mut pos = Some(haystack.len()); 838fb6c1f39Sopenharmony_ci std::iter::from_fn(move || { 839fb6c1f39Sopenharmony_ci let end = match pos { 840fb6c1f39Sopenharmony_ci None => return None, 841fb6c1f39Sopenharmony_ci Some(end) => end, 842fb6c1f39Sopenharmony_ci }; 843fb6c1f39Sopenharmony_ci match rfind(&haystack[..end], needle) { 844fb6c1f39Sopenharmony_ci None => None, 845fb6c1f39Sopenharmony_ci Some(i) => { 846fb6c1f39Sopenharmony_ci if end == i { 847fb6c1f39Sopenharmony_ci // We always need to subtract at least 1, in case of an 848fb6c1f39Sopenharmony_ci // empty needle. 849fb6c1f39Sopenharmony_ci pos = end.checked_sub(1); 850fb6c1f39Sopenharmony_ci } else { 851fb6c1f39Sopenharmony_ci pos = Some(i); 852fb6c1f39Sopenharmony_ci } 853fb6c1f39Sopenharmony_ci Some(i) 854fb6c1f39Sopenharmony_ci } 855fb6c1f39Sopenharmony_ci } 856fb6c1f39Sopenharmony_ci }) 857fb6c1f39Sopenharmony_ci} 858