1c67d6573Sopenharmony_ciuse std::mem; 2c67d6573Sopenharmony_ci 3c67d6573Sopenharmony_ciuse aho_corasick::{self, packed, AhoCorasick, AhoCorasickBuilder}; 4c67d6573Sopenharmony_ciuse memchr::{memchr, memchr2, memchr3, memmem}; 5c67d6573Sopenharmony_ciuse regex_syntax::hir::literal::{Literal, Literals}; 6c67d6573Sopenharmony_ci 7c67d6573Sopenharmony_ci/// A prefix extracted from a compiled regular expression. 8c67d6573Sopenharmony_ci/// 9c67d6573Sopenharmony_ci/// A regex prefix is a set of literal strings that *must* be matched at the 10c67d6573Sopenharmony_ci/// beginning of a regex in order for the entire regex to match. Similarly 11c67d6573Sopenharmony_ci/// for a regex suffix. 12c67d6573Sopenharmony_ci#[derive(Clone, Debug)] 13c67d6573Sopenharmony_cipub struct LiteralSearcher { 14c67d6573Sopenharmony_ci complete: bool, 15c67d6573Sopenharmony_ci lcp: Memmem, 16c67d6573Sopenharmony_ci lcs: Memmem, 17c67d6573Sopenharmony_ci matcher: Matcher, 18c67d6573Sopenharmony_ci} 19c67d6573Sopenharmony_ci 20c67d6573Sopenharmony_ci#[derive(Clone, Debug)] 21c67d6573Sopenharmony_cienum Matcher { 22c67d6573Sopenharmony_ci /// No literals. (Never advances through the input.) 23c67d6573Sopenharmony_ci Empty, 24c67d6573Sopenharmony_ci /// A set of four or more single byte literals. 25c67d6573Sopenharmony_ci Bytes(SingleByteSet), 26c67d6573Sopenharmony_ci /// A single substring, using vector accelerated routines when available. 27c67d6573Sopenharmony_ci Memmem(Memmem), 28c67d6573Sopenharmony_ci /// An Aho-Corasick automaton. 29c67d6573Sopenharmony_ci AC { ac: AhoCorasick<u32>, lits: Vec<Literal> }, 30c67d6573Sopenharmony_ci /// A packed multiple substring searcher, using SIMD. 31c67d6573Sopenharmony_ci /// 32c67d6573Sopenharmony_ci /// Note that Aho-Corasick will actually use this packed searcher 33c67d6573Sopenharmony_ci /// internally automatically, however, there is some overhead associated 34c67d6573Sopenharmony_ci /// with going through the Aho-Corasick machinery. So using the packed 35c67d6573Sopenharmony_ci /// searcher directly results in some gains. 36c67d6573Sopenharmony_ci Packed { s: packed::Searcher, lits: Vec<Literal> }, 37c67d6573Sopenharmony_ci} 38c67d6573Sopenharmony_ci 39c67d6573Sopenharmony_ciimpl LiteralSearcher { 40c67d6573Sopenharmony_ci /// Returns a matcher that never matches and never advances the input. 41c67d6573Sopenharmony_ci pub fn empty() -> Self { 42c67d6573Sopenharmony_ci Self::new(Literals::empty(), Matcher::Empty) 43c67d6573Sopenharmony_ci } 44c67d6573Sopenharmony_ci 45c67d6573Sopenharmony_ci /// Returns a matcher for literal prefixes from the given set. 46c67d6573Sopenharmony_ci pub fn prefixes(lits: Literals) -> Self { 47c67d6573Sopenharmony_ci let matcher = Matcher::prefixes(&lits); 48c67d6573Sopenharmony_ci Self::new(lits, matcher) 49c67d6573Sopenharmony_ci } 50c67d6573Sopenharmony_ci 51c67d6573Sopenharmony_ci /// Returns a matcher for literal suffixes from the given set. 52c67d6573Sopenharmony_ci pub fn suffixes(lits: Literals) -> Self { 53c67d6573Sopenharmony_ci let matcher = Matcher::suffixes(&lits); 54c67d6573Sopenharmony_ci Self::new(lits, matcher) 55c67d6573Sopenharmony_ci } 56c67d6573Sopenharmony_ci 57c67d6573Sopenharmony_ci fn new(lits: Literals, matcher: Matcher) -> Self { 58c67d6573Sopenharmony_ci let complete = lits.all_complete(); 59c67d6573Sopenharmony_ci LiteralSearcher { 60c67d6573Sopenharmony_ci complete, 61c67d6573Sopenharmony_ci lcp: Memmem::new(lits.longest_common_prefix()), 62c67d6573Sopenharmony_ci lcs: Memmem::new(lits.longest_common_suffix()), 63c67d6573Sopenharmony_ci matcher, 64c67d6573Sopenharmony_ci } 65c67d6573Sopenharmony_ci } 66c67d6573Sopenharmony_ci 67c67d6573Sopenharmony_ci /// Returns true if all matches comprise the entire regular expression. 68c67d6573Sopenharmony_ci /// 69c67d6573Sopenharmony_ci /// This does not necessarily mean that a literal match implies a match 70c67d6573Sopenharmony_ci /// of the regular expression. For example, the regular expression `^a` 71c67d6573Sopenharmony_ci /// is comprised of a single complete literal `a`, but the regular 72c67d6573Sopenharmony_ci /// expression demands that it only match at the beginning of a string. 73c67d6573Sopenharmony_ci pub fn complete(&self) -> bool { 74c67d6573Sopenharmony_ci self.complete && !self.is_empty() 75c67d6573Sopenharmony_ci } 76c67d6573Sopenharmony_ci 77c67d6573Sopenharmony_ci /// Find the position of a literal in `haystack` if it exists. 78c67d6573Sopenharmony_ci #[cfg_attr(feature = "perf-inline", inline(always))] 79c67d6573Sopenharmony_ci pub fn find(&self, haystack: &[u8]) -> Option<(usize, usize)> { 80c67d6573Sopenharmony_ci use self::Matcher::*; 81c67d6573Sopenharmony_ci match self.matcher { 82c67d6573Sopenharmony_ci Empty => Some((0, 0)), 83c67d6573Sopenharmony_ci Bytes(ref sset) => sset.find(haystack).map(|i| (i, i + 1)), 84c67d6573Sopenharmony_ci Memmem(ref s) => s.find(haystack).map(|i| (i, i + s.len())), 85c67d6573Sopenharmony_ci AC { ref ac, .. } => { 86c67d6573Sopenharmony_ci ac.find(haystack).map(|m| (m.start(), m.end())) 87c67d6573Sopenharmony_ci } 88c67d6573Sopenharmony_ci Packed { ref s, .. } => { 89c67d6573Sopenharmony_ci s.find(haystack).map(|m| (m.start(), m.end())) 90c67d6573Sopenharmony_ci } 91c67d6573Sopenharmony_ci } 92c67d6573Sopenharmony_ci } 93c67d6573Sopenharmony_ci 94c67d6573Sopenharmony_ci /// Like find, except matches must start at index `0`. 95c67d6573Sopenharmony_ci pub fn find_start(&self, haystack: &[u8]) -> Option<(usize, usize)> { 96c67d6573Sopenharmony_ci for lit in self.iter() { 97c67d6573Sopenharmony_ci if lit.len() > haystack.len() { 98c67d6573Sopenharmony_ci continue; 99c67d6573Sopenharmony_ci } 100c67d6573Sopenharmony_ci if lit == &haystack[0..lit.len()] { 101c67d6573Sopenharmony_ci return Some((0, lit.len())); 102c67d6573Sopenharmony_ci } 103c67d6573Sopenharmony_ci } 104c67d6573Sopenharmony_ci None 105c67d6573Sopenharmony_ci } 106c67d6573Sopenharmony_ci 107c67d6573Sopenharmony_ci /// Like find, except matches must end at index `haystack.len()`. 108c67d6573Sopenharmony_ci pub fn find_end(&self, haystack: &[u8]) -> Option<(usize, usize)> { 109c67d6573Sopenharmony_ci for lit in self.iter() { 110c67d6573Sopenharmony_ci if lit.len() > haystack.len() { 111c67d6573Sopenharmony_ci continue; 112c67d6573Sopenharmony_ci } 113c67d6573Sopenharmony_ci if lit == &haystack[haystack.len() - lit.len()..] { 114c67d6573Sopenharmony_ci return Some((haystack.len() - lit.len(), haystack.len())); 115c67d6573Sopenharmony_ci } 116c67d6573Sopenharmony_ci } 117c67d6573Sopenharmony_ci None 118c67d6573Sopenharmony_ci } 119c67d6573Sopenharmony_ci 120c67d6573Sopenharmony_ci /// Returns an iterator over all literals to be matched. 121c67d6573Sopenharmony_ci pub fn iter(&self) -> LiteralIter<'_> { 122c67d6573Sopenharmony_ci match self.matcher { 123c67d6573Sopenharmony_ci Matcher::Empty => LiteralIter::Empty, 124c67d6573Sopenharmony_ci Matcher::Bytes(ref sset) => LiteralIter::Bytes(&sset.dense), 125c67d6573Sopenharmony_ci Matcher::Memmem(ref s) => LiteralIter::Single(&s.finder.needle()), 126c67d6573Sopenharmony_ci Matcher::AC { ref lits, .. } => LiteralIter::AC(lits), 127c67d6573Sopenharmony_ci Matcher::Packed { ref lits, .. } => LiteralIter::Packed(lits), 128c67d6573Sopenharmony_ci } 129c67d6573Sopenharmony_ci } 130c67d6573Sopenharmony_ci 131c67d6573Sopenharmony_ci /// Returns a matcher for the longest common prefix of this matcher. 132c67d6573Sopenharmony_ci pub fn lcp(&self) -> &Memmem { 133c67d6573Sopenharmony_ci &self.lcp 134c67d6573Sopenharmony_ci } 135c67d6573Sopenharmony_ci 136c67d6573Sopenharmony_ci /// Returns a matcher for the longest common suffix of this matcher. 137c67d6573Sopenharmony_ci pub fn lcs(&self) -> &Memmem { 138c67d6573Sopenharmony_ci &self.lcs 139c67d6573Sopenharmony_ci } 140c67d6573Sopenharmony_ci 141c67d6573Sopenharmony_ci /// Returns true iff this prefix is empty. 142c67d6573Sopenharmony_ci pub fn is_empty(&self) -> bool { 143c67d6573Sopenharmony_ci self.len() == 0 144c67d6573Sopenharmony_ci } 145c67d6573Sopenharmony_ci 146c67d6573Sopenharmony_ci /// Returns the number of prefixes in this machine. 147c67d6573Sopenharmony_ci pub fn len(&self) -> usize { 148c67d6573Sopenharmony_ci use self::Matcher::*; 149c67d6573Sopenharmony_ci match self.matcher { 150c67d6573Sopenharmony_ci Empty => 0, 151c67d6573Sopenharmony_ci Bytes(ref sset) => sset.dense.len(), 152c67d6573Sopenharmony_ci Memmem(_) => 1, 153c67d6573Sopenharmony_ci AC { ref ac, .. } => ac.pattern_count(), 154c67d6573Sopenharmony_ci Packed { ref lits, .. } => lits.len(), 155c67d6573Sopenharmony_ci } 156c67d6573Sopenharmony_ci } 157c67d6573Sopenharmony_ci 158c67d6573Sopenharmony_ci /// Return the approximate heap usage of literals in bytes. 159c67d6573Sopenharmony_ci pub fn approximate_size(&self) -> usize { 160c67d6573Sopenharmony_ci use self::Matcher::*; 161c67d6573Sopenharmony_ci match self.matcher { 162c67d6573Sopenharmony_ci Empty => 0, 163c67d6573Sopenharmony_ci Bytes(ref sset) => sset.approximate_size(), 164c67d6573Sopenharmony_ci Memmem(ref single) => single.approximate_size(), 165c67d6573Sopenharmony_ci AC { ref ac, .. } => ac.heap_bytes(), 166c67d6573Sopenharmony_ci Packed { ref s, .. } => s.heap_bytes(), 167c67d6573Sopenharmony_ci } 168c67d6573Sopenharmony_ci } 169c67d6573Sopenharmony_ci} 170c67d6573Sopenharmony_ci 171c67d6573Sopenharmony_ciimpl Matcher { 172c67d6573Sopenharmony_ci fn prefixes(lits: &Literals) -> Self { 173c67d6573Sopenharmony_ci let sset = SingleByteSet::prefixes(lits); 174c67d6573Sopenharmony_ci Matcher::new(lits, sset) 175c67d6573Sopenharmony_ci } 176c67d6573Sopenharmony_ci 177c67d6573Sopenharmony_ci fn suffixes(lits: &Literals) -> Self { 178c67d6573Sopenharmony_ci let sset = SingleByteSet::suffixes(lits); 179c67d6573Sopenharmony_ci Matcher::new(lits, sset) 180c67d6573Sopenharmony_ci } 181c67d6573Sopenharmony_ci 182c67d6573Sopenharmony_ci fn new(lits: &Literals, sset: SingleByteSet) -> Self { 183c67d6573Sopenharmony_ci if lits.literals().is_empty() { 184c67d6573Sopenharmony_ci return Matcher::Empty; 185c67d6573Sopenharmony_ci } 186c67d6573Sopenharmony_ci if sset.dense.len() >= 26 { 187c67d6573Sopenharmony_ci // Avoid trying to match a large number of single bytes. 188c67d6573Sopenharmony_ci // This is *very* sensitive to a frequency analysis comparison 189c67d6573Sopenharmony_ci // between the bytes in sset and the composition of the haystack. 190c67d6573Sopenharmony_ci // No matter the size of sset, if its members all are rare in the 191c67d6573Sopenharmony_ci // haystack, then it'd be worth using it. How to tune this... IDK. 192c67d6573Sopenharmony_ci // ---AG 193c67d6573Sopenharmony_ci return Matcher::Empty; 194c67d6573Sopenharmony_ci } 195c67d6573Sopenharmony_ci if sset.complete { 196c67d6573Sopenharmony_ci return Matcher::Bytes(sset); 197c67d6573Sopenharmony_ci } 198c67d6573Sopenharmony_ci if lits.literals().len() == 1 { 199c67d6573Sopenharmony_ci return Matcher::Memmem(Memmem::new(&lits.literals()[0])); 200c67d6573Sopenharmony_ci } 201c67d6573Sopenharmony_ci 202c67d6573Sopenharmony_ci let pats = lits.literals().to_owned(); 203c67d6573Sopenharmony_ci let is_aho_corasick_fast = sset.dense.len() <= 1 && sset.all_ascii; 204c67d6573Sopenharmony_ci if lits.literals().len() <= 100 && !is_aho_corasick_fast { 205c67d6573Sopenharmony_ci let mut builder = packed::Config::new() 206c67d6573Sopenharmony_ci .match_kind(packed::MatchKind::LeftmostFirst) 207c67d6573Sopenharmony_ci .builder(); 208c67d6573Sopenharmony_ci if let Some(s) = builder.extend(&pats).build() { 209c67d6573Sopenharmony_ci return Matcher::Packed { s, lits: pats }; 210c67d6573Sopenharmony_ci } 211c67d6573Sopenharmony_ci } 212c67d6573Sopenharmony_ci let ac = AhoCorasickBuilder::new() 213c67d6573Sopenharmony_ci .match_kind(aho_corasick::MatchKind::LeftmostFirst) 214c67d6573Sopenharmony_ci .dfa(true) 215c67d6573Sopenharmony_ci .build_with_size::<u32, _, _>(&pats) 216c67d6573Sopenharmony_ci .unwrap(); 217c67d6573Sopenharmony_ci Matcher::AC { ac, lits: pats } 218c67d6573Sopenharmony_ci } 219c67d6573Sopenharmony_ci} 220c67d6573Sopenharmony_ci 221c67d6573Sopenharmony_ci#[derive(Debug)] 222c67d6573Sopenharmony_cipub enum LiteralIter<'a> { 223c67d6573Sopenharmony_ci Empty, 224c67d6573Sopenharmony_ci Bytes(&'a [u8]), 225c67d6573Sopenharmony_ci Single(&'a [u8]), 226c67d6573Sopenharmony_ci AC(&'a [Literal]), 227c67d6573Sopenharmony_ci Packed(&'a [Literal]), 228c67d6573Sopenharmony_ci} 229c67d6573Sopenharmony_ci 230c67d6573Sopenharmony_ciimpl<'a> Iterator for LiteralIter<'a> { 231c67d6573Sopenharmony_ci type Item = &'a [u8]; 232c67d6573Sopenharmony_ci 233c67d6573Sopenharmony_ci fn next(&mut self) -> Option<Self::Item> { 234c67d6573Sopenharmony_ci match *self { 235c67d6573Sopenharmony_ci LiteralIter::Empty => None, 236c67d6573Sopenharmony_ci LiteralIter::Bytes(ref mut many) => { 237c67d6573Sopenharmony_ci if many.is_empty() { 238c67d6573Sopenharmony_ci None 239c67d6573Sopenharmony_ci } else { 240c67d6573Sopenharmony_ci let next = &many[0..1]; 241c67d6573Sopenharmony_ci *many = &many[1..]; 242c67d6573Sopenharmony_ci Some(next) 243c67d6573Sopenharmony_ci } 244c67d6573Sopenharmony_ci } 245c67d6573Sopenharmony_ci LiteralIter::Single(ref mut one) => { 246c67d6573Sopenharmony_ci if one.is_empty() { 247c67d6573Sopenharmony_ci None 248c67d6573Sopenharmony_ci } else { 249c67d6573Sopenharmony_ci let next = &one[..]; 250c67d6573Sopenharmony_ci *one = &[]; 251c67d6573Sopenharmony_ci Some(next) 252c67d6573Sopenharmony_ci } 253c67d6573Sopenharmony_ci } 254c67d6573Sopenharmony_ci LiteralIter::AC(ref mut lits) => { 255c67d6573Sopenharmony_ci if lits.is_empty() { 256c67d6573Sopenharmony_ci None 257c67d6573Sopenharmony_ci } else { 258c67d6573Sopenharmony_ci let next = &lits[0]; 259c67d6573Sopenharmony_ci *lits = &lits[1..]; 260c67d6573Sopenharmony_ci Some(&**next) 261c67d6573Sopenharmony_ci } 262c67d6573Sopenharmony_ci } 263c67d6573Sopenharmony_ci LiteralIter::Packed(ref mut lits) => { 264c67d6573Sopenharmony_ci if lits.is_empty() { 265c67d6573Sopenharmony_ci None 266c67d6573Sopenharmony_ci } else { 267c67d6573Sopenharmony_ci let next = &lits[0]; 268c67d6573Sopenharmony_ci *lits = &lits[1..]; 269c67d6573Sopenharmony_ci Some(&**next) 270c67d6573Sopenharmony_ci } 271c67d6573Sopenharmony_ci } 272c67d6573Sopenharmony_ci } 273c67d6573Sopenharmony_ci } 274c67d6573Sopenharmony_ci} 275c67d6573Sopenharmony_ci 276c67d6573Sopenharmony_ci#[derive(Clone, Debug)] 277c67d6573Sopenharmony_cistruct SingleByteSet { 278c67d6573Sopenharmony_ci sparse: Vec<bool>, 279c67d6573Sopenharmony_ci dense: Vec<u8>, 280c67d6573Sopenharmony_ci complete: bool, 281c67d6573Sopenharmony_ci all_ascii: bool, 282c67d6573Sopenharmony_ci} 283c67d6573Sopenharmony_ci 284c67d6573Sopenharmony_ciimpl SingleByteSet { 285c67d6573Sopenharmony_ci fn new() -> SingleByteSet { 286c67d6573Sopenharmony_ci SingleByteSet { 287c67d6573Sopenharmony_ci sparse: vec![false; 256], 288c67d6573Sopenharmony_ci dense: vec![], 289c67d6573Sopenharmony_ci complete: true, 290c67d6573Sopenharmony_ci all_ascii: true, 291c67d6573Sopenharmony_ci } 292c67d6573Sopenharmony_ci } 293c67d6573Sopenharmony_ci 294c67d6573Sopenharmony_ci fn prefixes(lits: &Literals) -> SingleByteSet { 295c67d6573Sopenharmony_ci let mut sset = SingleByteSet::new(); 296c67d6573Sopenharmony_ci for lit in lits.literals() { 297c67d6573Sopenharmony_ci sset.complete = sset.complete && lit.len() == 1; 298c67d6573Sopenharmony_ci if let Some(&b) = lit.get(0) { 299c67d6573Sopenharmony_ci if !sset.sparse[b as usize] { 300c67d6573Sopenharmony_ci if b > 0x7F { 301c67d6573Sopenharmony_ci sset.all_ascii = false; 302c67d6573Sopenharmony_ci } 303c67d6573Sopenharmony_ci sset.dense.push(b); 304c67d6573Sopenharmony_ci sset.sparse[b as usize] = true; 305c67d6573Sopenharmony_ci } 306c67d6573Sopenharmony_ci } 307c67d6573Sopenharmony_ci } 308c67d6573Sopenharmony_ci sset 309c67d6573Sopenharmony_ci } 310c67d6573Sopenharmony_ci 311c67d6573Sopenharmony_ci fn suffixes(lits: &Literals) -> SingleByteSet { 312c67d6573Sopenharmony_ci let mut sset = SingleByteSet::new(); 313c67d6573Sopenharmony_ci for lit in lits.literals() { 314c67d6573Sopenharmony_ci sset.complete = sset.complete && lit.len() == 1; 315c67d6573Sopenharmony_ci if let Some(&b) = lit.get(lit.len().checked_sub(1).unwrap()) { 316c67d6573Sopenharmony_ci if !sset.sparse[b as usize] { 317c67d6573Sopenharmony_ci if b > 0x7F { 318c67d6573Sopenharmony_ci sset.all_ascii = false; 319c67d6573Sopenharmony_ci } 320c67d6573Sopenharmony_ci sset.dense.push(b); 321c67d6573Sopenharmony_ci sset.sparse[b as usize] = true; 322c67d6573Sopenharmony_ci } 323c67d6573Sopenharmony_ci } 324c67d6573Sopenharmony_ci } 325c67d6573Sopenharmony_ci sset 326c67d6573Sopenharmony_ci } 327c67d6573Sopenharmony_ci 328c67d6573Sopenharmony_ci /// Faster find that special cases certain sizes to use memchr. 329c67d6573Sopenharmony_ci #[cfg_attr(feature = "perf-inline", inline(always))] 330c67d6573Sopenharmony_ci fn find(&self, text: &[u8]) -> Option<usize> { 331c67d6573Sopenharmony_ci match self.dense.len() { 332c67d6573Sopenharmony_ci 0 => None, 333c67d6573Sopenharmony_ci 1 => memchr(self.dense[0], text), 334c67d6573Sopenharmony_ci 2 => memchr2(self.dense[0], self.dense[1], text), 335c67d6573Sopenharmony_ci 3 => memchr3(self.dense[0], self.dense[1], self.dense[2], text), 336c67d6573Sopenharmony_ci _ => self._find(text), 337c67d6573Sopenharmony_ci } 338c67d6573Sopenharmony_ci } 339c67d6573Sopenharmony_ci 340c67d6573Sopenharmony_ci /// Generic find that works on any sized set. 341c67d6573Sopenharmony_ci fn _find(&self, haystack: &[u8]) -> Option<usize> { 342c67d6573Sopenharmony_ci for (i, &b) in haystack.iter().enumerate() { 343c67d6573Sopenharmony_ci if self.sparse[b as usize] { 344c67d6573Sopenharmony_ci return Some(i); 345c67d6573Sopenharmony_ci } 346c67d6573Sopenharmony_ci } 347c67d6573Sopenharmony_ci None 348c67d6573Sopenharmony_ci } 349c67d6573Sopenharmony_ci 350c67d6573Sopenharmony_ci fn approximate_size(&self) -> usize { 351c67d6573Sopenharmony_ci (self.dense.len() * mem::size_of::<u8>()) 352c67d6573Sopenharmony_ci + (self.sparse.len() * mem::size_of::<bool>()) 353c67d6573Sopenharmony_ci } 354c67d6573Sopenharmony_ci} 355c67d6573Sopenharmony_ci 356c67d6573Sopenharmony_ci/// A simple wrapper around the memchr crate's memmem implementation. 357c67d6573Sopenharmony_ci/// 358c67d6573Sopenharmony_ci/// The API this exposes mirrors the API of previous substring searchers that 359c67d6573Sopenharmony_ci/// this supplanted. 360c67d6573Sopenharmony_ci#[derive(Clone, Debug)] 361c67d6573Sopenharmony_cipub struct Memmem { 362c67d6573Sopenharmony_ci finder: memmem::Finder<'static>, 363c67d6573Sopenharmony_ci char_len: usize, 364c67d6573Sopenharmony_ci} 365c67d6573Sopenharmony_ci 366c67d6573Sopenharmony_ciimpl Memmem { 367c67d6573Sopenharmony_ci fn new(pat: &[u8]) -> Memmem { 368c67d6573Sopenharmony_ci Memmem { 369c67d6573Sopenharmony_ci finder: memmem::Finder::new(pat).into_owned(), 370c67d6573Sopenharmony_ci char_len: char_len_lossy(pat), 371c67d6573Sopenharmony_ci } 372c67d6573Sopenharmony_ci } 373c67d6573Sopenharmony_ci 374c67d6573Sopenharmony_ci #[cfg_attr(feature = "perf-inline", inline(always))] 375c67d6573Sopenharmony_ci pub fn find(&self, haystack: &[u8]) -> Option<usize> { 376c67d6573Sopenharmony_ci self.finder.find(haystack) 377c67d6573Sopenharmony_ci } 378c67d6573Sopenharmony_ci 379c67d6573Sopenharmony_ci #[cfg_attr(feature = "perf-inline", inline(always))] 380c67d6573Sopenharmony_ci pub fn is_suffix(&self, text: &[u8]) -> bool { 381c67d6573Sopenharmony_ci if text.len() < self.len() { 382c67d6573Sopenharmony_ci return false; 383c67d6573Sopenharmony_ci } 384c67d6573Sopenharmony_ci &text[text.len() - self.len()..] == self.finder.needle() 385c67d6573Sopenharmony_ci } 386c67d6573Sopenharmony_ci 387c67d6573Sopenharmony_ci pub fn len(&self) -> usize { 388c67d6573Sopenharmony_ci self.finder.needle().len() 389c67d6573Sopenharmony_ci } 390c67d6573Sopenharmony_ci 391c67d6573Sopenharmony_ci pub fn char_len(&self) -> usize { 392c67d6573Sopenharmony_ci self.char_len 393c67d6573Sopenharmony_ci } 394c67d6573Sopenharmony_ci 395c67d6573Sopenharmony_ci fn approximate_size(&self) -> usize { 396c67d6573Sopenharmony_ci self.finder.needle().len() * mem::size_of::<u8>() 397c67d6573Sopenharmony_ci } 398c67d6573Sopenharmony_ci} 399c67d6573Sopenharmony_ci 400c67d6573Sopenharmony_cifn char_len_lossy(bytes: &[u8]) -> usize { 401c67d6573Sopenharmony_ci String::from_utf8_lossy(bytes).chars().count() 402c67d6573Sopenharmony_ci} 403