1c67d6573Sopenharmony_ciuse std::mem;
2c67d6573Sopenharmony_ci
3c67d6573Sopenharmony_ciuse aho_corasick::{self, packed, AhoCorasick, AhoCorasickBuilder};
4c67d6573Sopenharmony_ciuse memchr::{memchr, memchr2, memchr3, memmem};
5c67d6573Sopenharmony_ciuse regex_syntax::hir::literal::{Literal, Literals};
6c67d6573Sopenharmony_ci
7c67d6573Sopenharmony_ci/// A prefix extracted from a compiled regular expression.
8c67d6573Sopenharmony_ci///
9c67d6573Sopenharmony_ci/// A regex prefix is a set of literal strings that *must* be matched at the
10c67d6573Sopenharmony_ci/// beginning of a regex in order for the entire regex to match. Similarly
11c67d6573Sopenharmony_ci/// for a regex suffix.
12c67d6573Sopenharmony_ci#[derive(Clone, Debug)]
13c67d6573Sopenharmony_cipub struct LiteralSearcher {
14c67d6573Sopenharmony_ci    complete: bool,
15c67d6573Sopenharmony_ci    lcp: Memmem,
16c67d6573Sopenharmony_ci    lcs: Memmem,
17c67d6573Sopenharmony_ci    matcher: Matcher,
18c67d6573Sopenharmony_ci}
19c67d6573Sopenharmony_ci
20c67d6573Sopenharmony_ci#[derive(Clone, Debug)]
21c67d6573Sopenharmony_cienum Matcher {
22c67d6573Sopenharmony_ci    /// No literals. (Never advances through the input.)
23c67d6573Sopenharmony_ci    Empty,
24c67d6573Sopenharmony_ci    /// A set of four or more single byte literals.
25c67d6573Sopenharmony_ci    Bytes(SingleByteSet),
26c67d6573Sopenharmony_ci    /// A single substring, using vector accelerated routines when available.
27c67d6573Sopenharmony_ci    Memmem(Memmem),
28c67d6573Sopenharmony_ci    /// An Aho-Corasick automaton.
29c67d6573Sopenharmony_ci    AC { ac: AhoCorasick<u32>, lits: Vec<Literal> },
30c67d6573Sopenharmony_ci    /// A packed multiple substring searcher, using SIMD.
31c67d6573Sopenharmony_ci    ///
32c67d6573Sopenharmony_ci    /// Note that Aho-Corasick will actually use this packed searcher
33c67d6573Sopenharmony_ci    /// internally automatically, however, there is some overhead associated
34c67d6573Sopenharmony_ci    /// with going through the Aho-Corasick machinery. So using the packed
35c67d6573Sopenharmony_ci    /// searcher directly results in some gains.
36c67d6573Sopenharmony_ci    Packed { s: packed::Searcher, lits: Vec<Literal> },
37c67d6573Sopenharmony_ci}
38c67d6573Sopenharmony_ci
39c67d6573Sopenharmony_ciimpl LiteralSearcher {
40c67d6573Sopenharmony_ci    /// Returns a matcher that never matches and never advances the input.
41c67d6573Sopenharmony_ci    pub fn empty() -> Self {
42c67d6573Sopenharmony_ci        Self::new(Literals::empty(), Matcher::Empty)
43c67d6573Sopenharmony_ci    }
44c67d6573Sopenharmony_ci
45c67d6573Sopenharmony_ci    /// Returns a matcher for literal prefixes from the given set.
46c67d6573Sopenharmony_ci    pub fn prefixes(lits: Literals) -> Self {
47c67d6573Sopenharmony_ci        let matcher = Matcher::prefixes(&lits);
48c67d6573Sopenharmony_ci        Self::new(lits, matcher)
49c67d6573Sopenharmony_ci    }
50c67d6573Sopenharmony_ci
51c67d6573Sopenharmony_ci    /// Returns a matcher for literal suffixes from the given set.
52c67d6573Sopenharmony_ci    pub fn suffixes(lits: Literals) -> Self {
53c67d6573Sopenharmony_ci        let matcher = Matcher::suffixes(&lits);
54c67d6573Sopenharmony_ci        Self::new(lits, matcher)
55c67d6573Sopenharmony_ci    }
56c67d6573Sopenharmony_ci
57c67d6573Sopenharmony_ci    fn new(lits: Literals, matcher: Matcher) -> Self {
58c67d6573Sopenharmony_ci        let complete = lits.all_complete();
59c67d6573Sopenharmony_ci        LiteralSearcher {
60c67d6573Sopenharmony_ci            complete,
61c67d6573Sopenharmony_ci            lcp: Memmem::new(lits.longest_common_prefix()),
62c67d6573Sopenharmony_ci            lcs: Memmem::new(lits.longest_common_suffix()),
63c67d6573Sopenharmony_ci            matcher,
64c67d6573Sopenharmony_ci        }
65c67d6573Sopenharmony_ci    }
66c67d6573Sopenharmony_ci
67c67d6573Sopenharmony_ci    /// Returns true if all matches comprise the entire regular expression.
68c67d6573Sopenharmony_ci    ///
69c67d6573Sopenharmony_ci    /// This does not necessarily mean that a literal match implies a match
70c67d6573Sopenharmony_ci    /// of the regular expression. For example, the regular expression `^a`
71c67d6573Sopenharmony_ci    /// is comprised of a single complete literal `a`, but the regular
72c67d6573Sopenharmony_ci    /// expression demands that it only match at the beginning of a string.
73c67d6573Sopenharmony_ci    pub fn complete(&self) -> bool {
74c67d6573Sopenharmony_ci        self.complete && !self.is_empty()
75c67d6573Sopenharmony_ci    }
76c67d6573Sopenharmony_ci
77c67d6573Sopenharmony_ci    /// Find the position of a literal in `haystack` if it exists.
78c67d6573Sopenharmony_ci    #[cfg_attr(feature = "perf-inline", inline(always))]
79c67d6573Sopenharmony_ci    pub fn find(&self, haystack: &[u8]) -> Option<(usize, usize)> {
80c67d6573Sopenharmony_ci        use self::Matcher::*;
81c67d6573Sopenharmony_ci        match self.matcher {
82c67d6573Sopenharmony_ci            Empty => Some((0, 0)),
83c67d6573Sopenharmony_ci            Bytes(ref sset) => sset.find(haystack).map(|i| (i, i + 1)),
84c67d6573Sopenharmony_ci            Memmem(ref s) => s.find(haystack).map(|i| (i, i + s.len())),
85c67d6573Sopenharmony_ci            AC { ref ac, .. } => {
86c67d6573Sopenharmony_ci                ac.find(haystack).map(|m| (m.start(), m.end()))
87c67d6573Sopenharmony_ci            }
88c67d6573Sopenharmony_ci            Packed { ref s, .. } => {
89c67d6573Sopenharmony_ci                s.find(haystack).map(|m| (m.start(), m.end()))
90c67d6573Sopenharmony_ci            }
91c67d6573Sopenharmony_ci        }
92c67d6573Sopenharmony_ci    }
93c67d6573Sopenharmony_ci
94c67d6573Sopenharmony_ci    /// Like find, except matches must start at index `0`.
95c67d6573Sopenharmony_ci    pub fn find_start(&self, haystack: &[u8]) -> Option<(usize, usize)> {
96c67d6573Sopenharmony_ci        for lit in self.iter() {
97c67d6573Sopenharmony_ci            if lit.len() > haystack.len() {
98c67d6573Sopenharmony_ci                continue;
99c67d6573Sopenharmony_ci            }
100c67d6573Sopenharmony_ci            if lit == &haystack[0..lit.len()] {
101c67d6573Sopenharmony_ci                return Some((0, lit.len()));
102c67d6573Sopenharmony_ci            }
103c67d6573Sopenharmony_ci        }
104c67d6573Sopenharmony_ci        None
105c67d6573Sopenharmony_ci    }
106c67d6573Sopenharmony_ci
107c67d6573Sopenharmony_ci    /// Like find, except matches must end at index `haystack.len()`.
108c67d6573Sopenharmony_ci    pub fn find_end(&self, haystack: &[u8]) -> Option<(usize, usize)> {
109c67d6573Sopenharmony_ci        for lit in self.iter() {
110c67d6573Sopenharmony_ci            if lit.len() > haystack.len() {
111c67d6573Sopenharmony_ci                continue;
112c67d6573Sopenharmony_ci            }
113c67d6573Sopenharmony_ci            if lit == &haystack[haystack.len() - lit.len()..] {
114c67d6573Sopenharmony_ci                return Some((haystack.len() - lit.len(), haystack.len()));
115c67d6573Sopenharmony_ci            }
116c67d6573Sopenharmony_ci        }
117c67d6573Sopenharmony_ci        None
118c67d6573Sopenharmony_ci    }
119c67d6573Sopenharmony_ci
120c67d6573Sopenharmony_ci    /// Returns an iterator over all literals to be matched.
121c67d6573Sopenharmony_ci    pub fn iter(&self) -> LiteralIter<'_> {
122c67d6573Sopenharmony_ci        match self.matcher {
123c67d6573Sopenharmony_ci            Matcher::Empty => LiteralIter::Empty,
124c67d6573Sopenharmony_ci            Matcher::Bytes(ref sset) => LiteralIter::Bytes(&sset.dense),
125c67d6573Sopenharmony_ci            Matcher::Memmem(ref s) => LiteralIter::Single(&s.finder.needle()),
126c67d6573Sopenharmony_ci            Matcher::AC { ref lits, .. } => LiteralIter::AC(lits),
127c67d6573Sopenharmony_ci            Matcher::Packed { ref lits, .. } => LiteralIter::Packed(lits),
128c67d6573Sopenharmony_ci        }
129c67d6573Sopenharmony_ci    }
130c67d6573Sopenharmony_ci
131c67d6573Sopenharmony_ci    /// Returns a matcher for the longest common prefix of this matcher.
132c67d6573Sopenharmony_ci    pub fn lcp(&self) -> &Memmem {
133c67d6573Sopenharmony_ci        &self.lcp
134c67d6573Sopenharmony_ci    }
135c67d6573Sopenharmony_ci
136c67d6573Sopenharmony_ci    /// Returns a matcher for the longest common suffix of this matcher.
137c67d6573Sopenharmony_ci    pub fn lcs(&self) -> &Memmem {
138c67d6573Sopenharmony_ci        &self.lcs
139c67d6573Sopenharmony_ci    }
140c67d6573Sopenharmony_ci
141c67d6573Sopenharmony_ci    /// Returns true iff this prefix is empty.
142c67d6573Sopenharmony_ci    pub fn is_empty(&self) -> bool {
143c67d6573Sopenharmony_ci        self.len() == 0
144c67d6573Sopenharmony_ci    }
145c67d6573Sopenharmony_ci
146c67d6573Sopenharmony_ci    /// Returns the number of prefixes in this machine.
147c67d6573Sopenharmony_ci    pub fn len(&self) -> usize {
148c67d6573Sopenharmony_ci        use self::Matcher::*;
149c67d6573Sopenharmony_ci        match self.matcher {
150c67d6573Sopenharmony_ci            Empty => 0,
151c67d6573Sopenharmony_ci            Bytes(ref sset) => sset.dense.len(),
152c67d6573Sopenharmony_ci            Memmem(_) => 1,
153c67d6573Sopenharmony_ci            AC { ref ac, .. } => ac.pattern_count(),
154c67d6573Sopenharmony_ci            Packed { ref lits, .. } => lits.len(),
155c67d6573Sopenharmony_ci        }
156c67d6573Sopenharmony_ci    }
157c67d6573Sopenharmony_ci
158c67d6573Sopenharmony_ci    /// Return the approximate heap usage of literals in bytes.
159c67d6573Sopenharmony_ci    pub fn approximate_size(&self) -> usize {
160c67d6573Sopenharmony_ci        use self::Matcher::*;
161c67d6573Sopenharmony_ci        match self.matcher {
162c67d6573Sopenharmony_ci            Empty => 0,
163c67d6573Sopenharmony_ci            Bytes(ref sset) => sset.approximate_size(),
164c67d6573Sopenharmony_ci            Memmem(ref single) => single.approximate_size(),
165c67d6573Sopenharmony_ci            AC { ref ac, .. } => ac.heap_bytes(),
166c67d6573Sopenharmony_ci            Packed { ref s, .. } => s.heap_bytes(),
167c67d6573Sopenharmony_ci        }
168c67d6573Sopenharmony_ci    }
169c67d6573Sopenharmony_ci}
170c67d6573Sopenharmony_ci
171c67d6573Sopenharmony_ciimpl Matcher {
172c67d6573Sopenharmony_ci    fn prefixes(lits: &Literals) -> Self {
173c67d6573Sopenharmony_ci        let sset = SingleByteSet::prefixes(lits);
174c67d6573Sopenharmony_ci        Matcher::new(lits, sset)
175c67d6573Sopenharmony_ci    }
176c67d6573Sopenharmony_ci
177c67d6573Sopenharmony_ci    fn suffixes(lits: &Literals) -> Self {
178c67d6573Sopenharmony_ci        let sset = SingleByteSet::suffixes(lits);
179c67d6573Sopenharmony_ci        Matcher::new(lits, sset)
180c67d6573Sopenharmony_ci    }
181c67d6573Sopenharmony_ci
182c67d6573Sopenharmony_ci    fn new(lits: &Literals, sset: SingleByteSet) -> Self {
183c67d6573Sopenharmony_ci        if lits.literals().is_empty() {
184c67d6573Sopenharmony_ci            return Matcher::Empty;
185c67d6573Sopenharmony_ci        }
186c67d6573Sopenharmony_ci        if sset.dense.len() >= 26 {
187c67d6573Sopenharmony_ci            // Avoid trying to match a large number of single bytes.
188c67d6573Sopenharmony_ci            // This is *very* sensitive to a frequency analysis comparison
189c67d6573Sopenharmony_ci            // between the bytes in sset and the composition of the haystack.
190c67d6573Sopenharmony_ci            // No matter the size of sset, if its members all are rare in the
191c67d6573Sopenharmony_ci            // haystack, then it'd be worth using it. How to tune this... IDK.
192c67d6573Sopenharmony_ci            // ---AG
193c67d6573Sopenharmony_ci            return Matcher::Empty;
194c67d6573Sopenharmony_ci        }
195c67d6573Sopenharmony_ci        if sset.complete {
196c67d6573Sopenharmony_ci            return Matcher::Bytes(sset);
197c67d6573Sopenharmony_ci        }
198c67d6573Sopenharmony_ci        if lits.literals().len() == 1 {
199c67d6573Sopenharmony_ci            return Matcher::Memmem(Memmem::new(&lits.literals()[0]));
200c67d6573Sopenharmony_ci        }
201c67d6573Sopenharmony_ci
202c67d6573Sopenharmony_ci        let pats = lits.literals().to_owned();
203c67d6573Sopenharmony_ci        let is_aho_corasick_fast = sset.dense.len() <= 1 && sset.all_ascii;
204c67d6573Sopenharmony_ci        if lits.literals().len() <= 100 && !is_aho_corasick_fast {
205c67d6573Sopenharmony_ci            let mut builder = packed::Config::new()
206c67d6573Sopenharmony_ci                .match_kind(packed::MatchKind::LeftmostFirst)
207c67d6573Sopenharmony_ci                .builder();
208c67d6573Sopenharmony_ci            if let Some(s) = builder.extend(&pats).build() {
209c67d6573Sopenharmony_ci                return Matcher::Packed { s, lits: pats };
210c67d6573Sopenharmony_ci            }
211c67d6573Sopenharmony_ci        }
212c67d6573Sopenharmony_ci        let ac = AhoCorasickBuilder::new()
213c67d6573Sopenharmony_ci            .match_kind(aho_corasick::MatchKind::LeftmostFirst)
214c67d6573Sopenharmony_ci            .dfa(true)
215c67d6573Sopenharmony_ci            .build_with_size::<u32, _, _>(&pats)
216c67d6573Sopenharmony_ci            .unwrap();
217c67d6573Sopenharmony_ci        Matcher::AC { ac, lits: pats }
218c67d6573Sopenharmony_ci    }
219c67d6573Sopenharmony_ci}
220c67d6573Sopenharmony_ci
221c67d6573Sopenharmony_ci#[derive(Debug)]
222c67d6573Sopenharmony_cipub enum LiteralIter<'a> {
223c67d6573Sopenharmony_ci    Empty,
224c67d6573Sopenharmony_ci    Bytes(&'a [u8]),
225c67d6573Sopenharmony_ci    Single(&'a [u8]),
226c67d6573Sopenharmony_ci    AC(&'a [Literal]),
227c67d6573Sopenharmony_ci    Packed(&'a [Literal]),
228c67d6573Sopenharmony_ci}
229c67d6573Sopenharmony_ci
230c67d6573Sopenharmony_ciimpl<'a> Iterator for LiteralIter<'a> {
231c67d6573Sopenharmony_ci    type Item = &'a [u8];
232c67d6573Sopenharmony_ci
233c67d6573Sopenharmony_ci    fn next(&mut self) -> Option<Self::Item> {
234c67d6573Sopenharmony_ci        match *self {
235c67d6573Sopenharmony_ci            LiteralIter::Empty => None,
236c67d6573Sopenharmony_ci            LiteralIter::Bytes(ref mut many) => {
237c67d6573Sopenharmony_ci                if many.is_empty() {
238c67d6573Sopenharmony_ci                    None
239c67d6573Sopenharmony_ci                } else {
240c67d6573Sopenharmony_ci                    let next = &many[0..1];
241c67d6573Sopenharmony_ci                    *many = &many[1..];
242c67d6573Sopenharmony_ci                    Some(next)
243c67d6573Sopenharmony_ci                }
244c67d6573Sopenharmony_ci            }
245c67d6573Sopenharmony_ci            LiteralIter::Single(ref mut one) => {
246c67d6573Sopenharmony_ci                if one.is_empty() {
247c67d6573Sopenharmony_ci                    None
248c67d6573Sopenharmony_ci                } else {
249c67d6573Sopenharmony_ci                    let next = &one[..];
250c67d6573Sopenharmony_ci                    *one = &[];
251c67d6573Sopenharmony_ci                    Some(next)
252c67d6573Sopenharmony_ci                }
253c67d6573Sopenharmony_ci            }
254c67d6573Sopenharmony_ci            LiteralIter::AC(ref mut lits) => {
255c67d6573Sopenharmony_ci                if lits.is_empty() {
256c67d6573Sopenharmony_ci                    None
257c67d6573Sopenharmony_ci                } else {
258c67d6573Sopenharmony_ci                    let next = &lits[0];
259c67d6573Sopenharmony_ci                    *lits = &lits[1..];
260c67d6573Sopenharmony_ci                    Some(&**next)
261c67d6573Sopenharmony_ci                }
262c67d6573Sopenharmony_ci            }
263c67d6573Sopenharmony_ci            LiteralIter::Packed(ref mut lits) => {
264c67d6573Sopenharmony_ci                if lits.is_empty() {
265c67d6573Sopenharmony_ci                    None
266c67d6573Sopenharmony_ci                } else {
267c67d6573Sopenharmony_ci                    let next = &lits[0];
268c67d6573Sopenharmony_ci                    *lits = &lits[1..];
269c67d6573Sopenharmony_ci                    Some(&**next)
270c67d6573Sopenharmony_ci                }
271c67d6573Sopenharmony_ci            }
272c67d6573Sopenharmony_ci        }
273c67d6573Sopenharmony_ci    }
274c67d6573Sopenharmony_ci}
275c67d6573Sopenharmony_ci
276c67d6573Sopenharmony_ci#[derive(Clone, Debug)]
277c67d6573Sopenharmony_cistruct SingleByteSet {
278c67d6573Sopenharmony_ci    sparse: Vec<bool>,
279c67d6573Sopenharmony_ci    dense: Vec<u8>,
280c67d6573Sopenharmony_ci    complete: bool,
281c67d6573Sopenharmony_ci    all_ascii: bool,
282c67d6573Sopenharmony_ci}
283c67d6573Sopenharmony_ci
284c67d6573Sopenharmony_ciimpl SingleByteSet {
285c67d6573Sopenharmony_ci    fn new() -> SingleByteSet {
286c67d6573Sopenharmony_ci        SingleByteSet {
287c67d6573Sopenharmony_ci            sparse: vec![false; 256],
288c67d6573Sopenharmony_ci            dense: vec![],
289c67d6573Sopenharmony_ci            complete: true,
290c67d6573Sopenharmony_ci            all_ascii: true,
291c67d6573Sopenharmony_ci        }
292c67d6573Sopenharmony_ci    }
293c67d6573Sopenharmony_ci
294c67d6573Sopenharmony_ci    fn prefixes(lits: &Literals) -> SingleByteSet {
295c67d6573Sopenharmony_ci        let mut sset = SingleByteSet::new();
296c67d6573Sopenharmony_ci        for lit in lits.literals() {
297c67d6573Sopenharmony_ci            sset.complete = sset.complete && lit.len() == 1;
298c67d6573Sopenharmony_ci            if let Some(&b) = lit.get(0) {
299c67d6573Sopenharmony_ci                if !sset.sparse[b as usize] {
300c67d6573Sopenharmony_ci                    if b > 0x7F {
301c67d6573Sopenharmony_ci                        sset.all_ascii = false;
302c67d6573Sopenharmony_ci                    }
303c67d6573Sopenharmony_ci                    sset.dense.push(b);
304c67d6573Sopenharmony_ci                    sset.sparse[b as usize] = true;
305c67d6573Sopenharmony_ci                }
306c67d6573Sopenharmony_ci            }
307c67d6573Sopenharmony_ci        }
308c67d6573Sopenharmony_ci        sset
309c67d6573Sopenharmony_ci    }
310c67d6573Sopenharmony_ci
311c67d6573Sopenharmony_ci    fn suffixes(lits: &Literals) -> SingleByteSet {
312c67d6573Sopenharmony_ci        let mut sset = SingleByteSet::new();
313c67d6573Sopenharmony_ci        for lit in lits.literals() {
314c67d6573Sopenharmony_ci            sset.complete = sset.complete && lit.len() == 1;
315c67d6573Sopenharmony_ci            if let Some(&b) = lit.get(lit.len().checked_sub(1).unwrap()) {
316c67d6573Sopenharmony_ci                if !sset.sparse[b as usize] {
317c67d6573Sopenharmony_ci                    if b > 0x7F {
318c67d6573Sopenharmony_ci                        sset.all_ascii = false;
319c67d6573Sopenharmony_ci                    }
320c67d6573Sopenharmony_ci                    sset.dense.push(b);
321c67d6573Sopenharmony_ci                    sset.sparse[b as usize] = true;
322c67d6573Sopenharmony_ci                }
323c67d6573Sopenharmony_ci            }
324c67d6573Sopenharmony_ci        }
325c67d6573Sopenharmony_ci        sset
326c67d6573Sopenharmony_ci    }
327c67d6573Sopenharmony_ci
328c67d6573Sopenharmony_ci    /// Faster find that special cases certain sizes to use memchr.
329c67d6573Sopenharmony_ci    #[cfg_attr(feature = "perf-inline", inline(always))]
330c67d6573Sopenharmony_ci    fn find(&self, text: &[u8]) -> Option<usize> {
331c67d6573Sopenharmony_ci        match self.dense.len() {
332c67d6573Sopenharmony_ci            0 => None,
333c67d6573Sopenharmony_ci            1 => memchr(self.dense[0], text),
334c67d6573Sopenharmony_ci            2 => memchr2(self.dense[0], self.dense[1], text),
335c67d6573Sopenharmony_ci            3 => memchr3(self.dense[0], self.dense[1], self.dense[2], text),
336c67d6573Sopenharmony_ci            _ => self._find(text),
337c67d6573Sopenharmony_ci        }
338c67d6573Sopenharmony_ci    }
339c67d6573Sopenharmony_ci
340c67d6573Sopenharmony_ci    /// Generic find that works on any sized set.
341c67d6573Sopenharmony_ci    fn _find(&self, haystack: &[u8]) -> Option<usize> {
342c67d6573Sopenharmony_ci        for (i, &b) in haystack.iter().enumerate() {
343c67d6573Sopenharmony_ci            if self.sparse[b as usize] {
344c67d6573Sopenharmony_ci                return Some(i);
345c67d6573Sopenharmony_ci            }
346c67d6573Sopenharmony_ci        }
347c67d6573Sopenharmony_ci        None
348c67d6573Sopenharmony_ci    }
349c67d6573Sopenharmony_ci
350c67d6573Sopenharmony_ci    fn approximate_size(&self) -> usize {
351c67d6573Sopenharmony_ci        (self.dense.len() * mem::size_of::<u8>())
352c67d6573Sopenharmony_ci            + (self.sparse.len() * mem::size_of::<bool>())
353c67d6573Sopenharmony_ci    }
354c67d6573Sopenharmony_ci}
355c67d6573Sopenharmony_ci
356c67d6573Sopenharmony_ci/// A simple wrapper around the memchr crate's memmem implementation.
357c67d6573Sopenharmony_ci///
358c67d6573Sopenharmony_ci/// The API this exposes mirrors the API of previous substring searchers that
359c67d6573Sopenharmony_ci/// this supplanted.
360c67d6573Sopenharmony_ci#[derive(Clone, Debug)]
361c67d6573Sopenharmony_cipub struct Memmem {
362c67d6573Sopenharmony_ci    finder: memmem::Finder<'static>,
363c67d6573Sopenharmony_ci    char_len: usize,
364c67d6573Sopenharmony_ci}
365c67d6573Sopenharmony_ci
366c67d6573Sopenharmony_ciimpl Memmem {
367c67d6573Sopenharmony_ci    fn new(pat: &[u8]) -> Memmem {
368c67d6573Sopenharmony_ci        Memmem {
369c67d6573Sopenharmony_ci            finder: memmem::Finder::new(pat).into_owned(),
370c67d6573Sopenharmony_ci            char_len: char_len_lossy(pat),
371c67d6573Sopenharmony_ci        }
372c67d6573Sopenharmony_ci    }
373c67d6573Sopenharmony_ci
374c67d6573Sopenharmony_ci    #[cfg_attr(feature = "perf-inline", inline(always))]
375c67d6573Sopenharmony_ci    pub fn find(&self, haystack: &[u8]) -> Option<usize> {
376c67d6573Sopenharmony_ci        self.finder.find(haystack)
377c67d6573Sopenharmony_ci    }
378c67d6573Sopenharmony_ci
379c67d6573Sopenharmony_ci    #[cfg_attr(feature = "perf-inline", inline(always))]
380c67d6573Sopenharmony_ci    pub fn is_suffix(&self, text: &[u8]) -> bool {
381c67d6573Sopenharmony_ci        if text.len() < self.len() {
382c67d6573Sopenharmony_ci            return false;
383c67d6573Sopenharmony_ci        }
384c67d6573Sopenharmony_ci        &text[text.len() - self.len()..] == self.finder.needle()
385c67d6573Sopenharmony_ci    }
386c67d6573Sopenharmony_ci
387c67d6573Sopenharmony_ci    pub fn len(&self) -> usize {
388c67d6573Sopenharmony_ci        self.finder.needle().len()
389c67d6573Sopenharmony_ci    }
390c67d6573Sopenharmony_ci
391c67d6573Sopenharmony_ci    pub fn char_len(&self) -> usize {
392c67d6573Sopenharmony_ci        self.char_len
393c67d6573Sopenharmony_ci    }
394c67d6573Sopenharmony_ci
395c67d6573Sopenharmony_ci    fn approximate_size(&self) -> usize {
396c67d6573Sopenharmony_ci        self.finder.needle().len() * mem::size_of::<u8>()
397c67d6573Sopenharmony_ci    }
398c67d6573Sopenharmony_ci}
399c67d6573Sopenharmony_ci
400c67d6573Sopenharmony_cifn char_len_lossy(bytes: &[u8]) -> usize {
401c67d6573Sopenharmony_ci    String::from_utf8_lossy(bytes).chars().count()
402c67d6573Sopenharmony_ci}
403