1c67d6573Sopenharmony_ciuse std::cell::RefCell;
2c67d6573Sopenharmony_ciuse std::collections::HashMap;
3c67d6573Sopenharmony_ciuse std::panic::AssertUnwindSafe;
4c67d6573Sopenharmony_ciuse std::sync::Arc;
5c67d6573Sopenharmony_ci
6c67d6573Sopenharmony_ci#[cfg(feature = "perf-literal")]
7c67d6573Sopenharmony_ciuse aho_corasick::{AhoCorasick, AhoCorasickBuilder, MatchKind};
8c67d6573Sopenharmony_ciuse regex_syntax::hir::literal::Literals;
9c67d6573Sopenharmony_ciuse regex_syntax::hir::Hir;
10c67d6573Sopenharmony_ciuse regex_syntax::ParserBuilder;
11c67d6573Sopenharmony_ci
12c67d6573Sopenharmony_ciuse crate::backtrack;
13c67d6573Sopenharmony_ciuse crate::compile::Compiler;
14c67d6573Sopenharmony_ci#[cfg(feature = "perf-dfa")]
15c67d6573Sopenharmony_ciuse crate::dfa;
16c67d6573Sopenharmony_ciuse crate::error::Error;
17c67d6573Sopenharmony_ciuse crate::input::{ByteInput, CharInput};
18c67d6573Sopenharmony_ciuse crate::literal::LiteralSearcher;
19c67d6573Sopenharmony_ciuse crate::pikevm;
20c67d6573Sopenharmony_ciuse crate::pool::{Pool, PoolGuard};
21c67d6573Sopenharmony_ciuse crate::prog::Program;
22c67d6573Sopenharmony_ciuse crate::re_builder::RegexOptions;
23c67d6573Sopenharmony_ciuse crate::re_bytes;
24c67d6573Sopenharmony_ciuse crate::re_set;
25c67d6573Sopenharmony_ciuse crate::re_trait::{Locations, RegularExpression, Slot};
26c67d6573Sopenharmony_ciuse crate::re_unicode;
27c67d6573Sopenharmony_ciuse crate::utf8::next_utf8;
28c67d6573Sopenharmony_ci
29c67d6573Sopenharmony_ci/// `Exec` manages the execution of a regular expression.
30c67d6573Sopenharmony_ci///
31c67d6573Sopenharmony_ci/// In particular, this manages the various compiled forms of a single regular
32c67d6573Sopenharmony_ci/// expression and the choice of which matching engine to use to execute a
33c67d6573Sopenharmony_ci/// regular expression.
34c67d6573Sopenharmony_ci#[derive(Debug)]
35c67d6573Sopenharmony_cipub struct Exec {
36c67d6573Sopenharmony_ci    /// All read only state.
37c67d6573Sopenharmony_ci    ro: Arc<ExecReadOnly>,
38c67d6573Sopenharmony_ci    /// A pool of reusable values for the various matching engines.
39c67d6573Sopenharmony_ci    ///
40c67d6573Sopenharmony_ci    /// Note that boxing this value is not strictly necessary, but it is an
41c67d6573Sopenharmony_ci    /// easy way to ensure that T does not bloat the stack sized used by a pool
42c67d6573Sopenharmony_ci    /// in the case where T is big. And this turns out to be the case at the
43c67d6573Sopenharmony_ci    /// time of writing for regex's use of this pool. At the time of writing,
44c67d6573Sopenharmony_ci    /// the size of a Regex on the stack is 856 bytes. Boxing this value
45c67d6573Sopenharmony_ci    /// reduces that size to 16 bytes.
46c67d6573Sopenharmony_ci    pool: Box<Pool<ProgramCache>>,
47c67d6573Sopenharmony_ci}
48c67d6573Sopenharmony_ci
49c67d6573Sopenharmony_ci/// `ExecNoSync` is like `Exec`, except it embeds a reference to a cache. This
50c67d6573Sopenharmony_ci/// means it is no longer Sync, but we can now avoid the overhead of
51c67d6573Sopenharmony_ci/// synchronization to fetch the cache.
52c67d6573Sopenharmony_ci#[derive(Debug)]
53c67d6573Sopenharmony_cipub struct ExecNoSync<'c> {
54c67d6573Sopenharmony_ci    /// All read only state.
55c67d6573Sopenharmony_ci    ro: &'c Arc<ExecReadOnly>,
56c67d6573Sopenharmony_ci    /// Caches for the various matching engines.
57c67d6573Sopenharmony_ci    cache: PoolGuard<'c, ProgramCache>,
58c67d6573Sopenharmony_ci}
59c67d6573Sopenharmony_ci
60c67d6573Sopenharmony_ci/// `ExecNoSyncStr` is like `ExecNoSync`, but matches on &str instead of &[u8].
61c67d6573Sopenharmony_ci#[derive(Debug)]
62c67d6573Sopenharmony_cipub struct ExecNoSyncStr<'c>(ExecNoSync<'c>);
63c67d6573Sopenharmony_ci
64c67d6573Sopenharmony_ci/// `ExecReadOnly` comprises all read only state for a regex. Namely, all such
65c67d6573Sopenharmony_ci/// state is determined at compile time and never changes during search.
66c67d6573Sopenharmony_ci#[derive(Debug)]
67c67d6573Sopenharmony_cistruct ExecReadOnly {
68c67d6573Sopenharmony_ci    /// The original regular expressions given by the caller to compile.
69c67d6573Sopenharmony_ci    res: Vec<String>,
70c67d6573Sopenharmony_ci    /// A compiled program that is used in the NFA simulation and backtracking.
71c67d6573Sopenharmony_ci    /// It can be byte-based or Unicode codepoint based.
72c67d6573Sopenharmony_ci    ///
73c67d6573Sopenharmony_ci    /// N.B. It is not possibly to make this byte-based from the public API.
74c67d6573Sopenharmony_ci    /// It is only used for testing byte based programs in the NFA simulations.
75c67d6573Sopenharmony_ci    nfa: Program,
76c67d6573Sopenharmony_ci    /// A compiled byte based program for DFA execution. This is only used
77c67d6573Sopenharmony_ci    /// if a DFA can be executed. (Currently, only word boundary assertions are
78c67d6573Sopenharmony_ci    /// not supported.) Note that this program contains an embedded `.*?`
79c67d6573Sopenharmony_ci    /// preceding the first capture group, unless the regex is anchored at the
80c67d6573Sopenharmony_ci    /// beginning.
81c67d6573Sopenharmony_ci    dfa: Program,
82c67d6573Sopenharmony_ci    /// The same as above, except the program is reversed (and there is no
83c67d6573Sopenharmony_ci    /// preceding `.*?`). This is used by the DFA to find the starting location
84c67d6573Sopenharmony_ci    /// of matches.
85c67d6573Sopenharmony_ci    dfa_reverse: Program,
86c67d6573Sopenharmony_ci    /// A set of suffix literals extracted from the regex.
87c67d6573Sopenharmony_ci    ///
88c67d6573Sopenharmony_ci    /// Prefix literals are stored on the `Program`, since they are used inside
89c67d6573Sopenharmony_ci    /// the matching engines.
90c67d6573Sopenharmony_ci    suffixes: LiteralSearcher,
91c67d6573Sopenharmony_ci    /// An Aho-Corasick automaton with leftmost-first match semantics.
92c67d6573Sopenharmony_ci    ///
93c67d6573Sopenharmony_ci    /// This is only set when the entire regex is a simple unanchored
94c67d6573Sopenharmony_ci    /// alternation of literals. We could probably use it more circumstances,
95c67d6573Sopenharmony_ci    /// but this is already hacky enough in this architecture.
96c67d6573Sopenharmony_ci    ///
97c67d6573Sopenharmony_ci    /// N.B. We use u32 as a state ID representation under the assumption that
98c67d6573Sopenharmony_ci    /// if we were to exhaust the ID space, we probably would have long
99c67d6573Sopenharmony_ci    /// surpassed the compilation size limit.
100c67d6573Sopenharmony_ci    #[cfg(feature = "perf-literal")]
101c67d6573Sopenharmony_ci    ac: Option<AhoCorasick<u32>>,
102c67d6573Sopenharmony_ci    /// match_type encodes as much upfront knowledge about how we're going to
103c67d6573Sopenharmony_ci    /// execute a search as possible.
104c67d6573Sopenharmony_ci    match_type: MatchType,
105c67d6573Sopenharmony_ci}
106c67d6573Sopenharmony_ci
107c67d6573Sopenharmony_ci/// Facilitates the construction of an executor by exposing various knobs
108c67d6573Sopenharmony_ci/// to control how a regex is executed and what kinds of resources it's
109c67d6573Sopenharmony_ci/// permitted to use.
110c67d6573Sopenharmony_ci// `ExecBuilder` is only public via the `internal` module, so avoid deriving
111c67d6573Sopenharmony_ci// `Debug`.
112c67d6573Sopenharmony_ci#[allow(missing_debug_implementations)]
113c67d6573Sopenharmony_cipub struct ExecBuilder {
114c67d6573Sopenharmony_ci    options: RegexOptions,
115c67d6573Sopenharmony_ci    match_type: Option<MatchType>,
116c67d6573Sopenharmony_ci    bytes: bool,
117c67d6573Sopenharmony_ci    only_utf8: bool,
118c67d6573Sopenharmony_ci}
119c67d6573Sopenharmony_ci
120c67d6573Sopenharmony_ci/// Parsed represents a set of parsed regular expressions and their detected
121c67d6573Sopenharmony_ci/// literals.
122c67d6573Sopenharmony_cistruct Parsed {
123c67d6573Sopenharmony_ci    exprs: Vec<Hir>,
124c67d6573Sopenharmony_ci    prefixes: Literals,
125c67d6573Sopenharmony_ci    suffixes: Literals,
126c67d6573Sopenharmony_ci    bytes: bool,
127c67d6573Sopenharmony_ci}
128c67d6573Sopenharmony_ci
129c67d6573Sopenharmony_ciimpl ExecBuilder {
130c67d6573Sopenharmony_ci    /// Create a regex execution builder.
131c67d6573Sopenharmony_ci    ///
132c67d6573Sopenharmony_ci    /// This uses default settings for everything except the regex itself,
133c67d6573Sopenharmony_ci    /// which must be provided. Further knobs can be set by calling methods,
134c67d6573Sopenharmony_ci    /// and then finally, `build` to actually create the executor.
135c67d6573Sopenharmony_ci    pub fn new(re: &str) -> Self {
136c67d6573Sopenharmony_ci        Self::new_many(&[re])
137c67d6573Sopenharmony_ci    }
138c67d6573Sopenharmony_ci
139c67d6573Sopenharmony_ci    /// Like new, but compiles the union of the given regular expressions.
140c67d6573Sopenharmony_ci    ///
141c67d6573Sopenharmony_ci    /// Note that when compiling 2 or more regular expressions, capture groups
142c67d6573Sopenharmony_ci    /// are completely unsupported. (This means both `find` and `captures`
143c67d6573Sopenharmony_ci    /// won't work.)
144c67d6573Sopenharmony_ci    pub fn new_many<I, S>(res: I) -> Self
145c67d6573Sopenharmony_ci    where
146c67d6573Sopenharmony_ci        S: AsRef<str>,
147c67d6573Sopenharmony_ci        I: IntoIterator<Item = S>,
148c67d6573Sopenharmony_ci    {
149c67d6573Sopenharmony_ci        let mut opts = RegexOptions::default();
150c67d6573Sopenharmony_ci        opts.pats = res.into_iter().map(|s| s.as_ref().to_owned()).collect();
151c67d6573Sopenharmony_ci        Self::new_options(opts)
152c67d6573Sopenharmony_ci    }
153c67d6573Sopenharmony_ci
154c67d6573Sopenharmony_ci    /// Create a regex execution builder.
155c67d6573Sopenharmony_ci    pub fn new_options(opts: RegexOptions) -> Self {
156c67d6573Sopenharmony_ci        ExecBuilder {
157c67d6573Sopenharmony_ci            options: opts,
158c67d6573Sopenharmony_ci            match_type: None,
159c67d6573Sopenharmony_ci            bytes: false,
160c67d6573Sopenharmony_ci            only_utf8: true,
161c67d6573Sopenharmony_ci        }
162c67d6573Sopenharmony_ci    }
163c67d6573Sopenharmony_ci
164c67d6573Sopenharmony_ci    /// Set the matching engine to be automatically determined.
165c67d6573Sopenharmony_ci    ///
166c67d6573Sopenharmony_ci    /// This is the default state and will apply whatever optimizations are
167c67d6573Sopenharmony_ci    /// possible, such as running a DFA.
168c67d6573Sopenharmony_ci    ///
169c67d6573Sopenharmony_ci    /// This overrides whatever was previously set via the `nfa` or
170c67d6573Sopenharmony_ci    /// `bounded_backtracking` methods.
171c67d6573Sopenharmony_ci    pub fn automatic(mut self) -> Self {
172c67d6573Sopenharmony_ci        self.match_type = None;
173c67d6573Sopenharmony_ci        self
174c67d6573Sopenharmony_ci    }
175c67d6573Sopenharmony_ci
176c67d6573Sopenharmony_ci    /// Sets the matching engine to use the NFA algorithm no matter what
177c67d6573Sopenharmony_ci    /// optimizations are possible.
178c67d6573Sopenharmony_ci    ///
179c67d6573Sopenharmony_ci    /// This overrides whatever was previously set via the `automatic` or
180c67d6573Sopenharmony_ci    /// `bounded_backtracking` methods.
181c67d6573Sopenharmony_ci    pub fn nfa(mut self) -> Self {
182c67d6573Sopenharmony_ci        self.match_type = Some(MatchType::Nfa(MatchNfaType::PikeVM));
183c67d6573Sopenharmony_ci        self
184c67d6573Sopenharmony_ci    }
185c67d6573Sopenharmony_ci
186c67d6573Sopenharmony_ci    /// Sets the matching engine to use a bounded backtracking engine no
187c67d6573Sopenharmony_ci    /// matter what optimizations are possible.
188c67d6573Sopenharmony_ci    ///
189c67d6573Sopenharmony_ci    /// One must use this with care, since the bounded backtracking engine
190c67d6573Sopenharmony_ci    /// uses memory proportion to `len(regex) * len(text)`.
191c67d6573Sopenharmony_ci    ///
192c67d6573Sopenharmony_ci    /// This overrides whatever was previously set via the `automatic` or
193c67d6573Sopenharmony_ci    /// `nfa` methods.
194c67d6573Sopenharmony_ci    pub fn bounded_backtracking(mut self) -> Self {
195c67d6573Sopenharmony_ci        self.match_type = Some(MatchType::Nfa(MatchNfaType::Backtrack));
196c67d6573Sopenharmony_ci        self
197c67d6573Sopenharmony_ci    }
198c67d6573Sopenharmony_ci
199c67d6573Sopenharmony_ci    /// Compiles byte based programs for use with the NFA matching engines.
200c67d6573Sopenharmony_ci    ///
201c67d6573Sopenharmony_ci    /// By default, the NFA engines match on Unicode scalar values. They can
202c67d6573Sopenharmony_ci    /// be made to use byte based programs instead. In general, the byte based
203c67d6573Sopenharmony_ci    /// programs are slower because of a less efficient encoding of character
204c67d6573Sopenharmony_ci    /// classes.
205c67d6573Sopenharmony_ci    ///
206c67d6573Sopenharmony_ci    /// Note that this does not impact DFA matching engines, which always
207c67d6573Sopenharmony_ci    /// execute on bytes.
208c67d6573Sopenharmony_ci    pub fn bytes(mut self, yes: bool) -> Self {
209c67d6573Sopenharmony_ci        self.bytes = yes;
210c67d6573Sopenharmony_ci        self
211c67d6573Sopenharmony_ci    }
212c67d6573Sopenharmony_ci
213c67d6573Sopenharmony_ci    /// When disabled, the program compiled may match arbitrary bytes.
214c67d6573Sopenharmony_ci    ///
215c67d6573Sopenharmony_ci    /// When enabled (the default), all compiled programs exclusively match
216c67d6573Sopenharmony_ci    /// valid UTF-8 bytes.
217c67d6573Sopenharmony_ci    pub fn only_utf8(mut self, yes: bool) -> Self {
218c67d6573Sopenharmony_ci        self.only_utf8 = yes;
219c67d6573Sopenharmony_ci        self
220c67d6573Sopenharmony_ci    }
221c67d6573Sopenharmony_ci
222c67d6573Sopenharmony_ci    /// Set the Unicode flag.
223c67d6573Sopenharmony_ci    pub fn unicode(mut self, yes: bool) -> Self {
224c67d6573Sopenharmony_ci        self.options.unicode = yes;
225c67d6573Sopenharmony_ci        self
226c67d6573Sopenharmony_ci    }
227c67d6573Sopenharmony_ci
228c67d6573Sopenharmony_ci    /// Parse the current set of patterns into their AST and extract literals.
229c67d6573Sopenharmony_ci    fn parse(&self) -> Result<Parsed, Error> {
230c67d6573Sopenharmony_ci        let mut exprs = Vec::with_capacity(self.options.pats.len());
231c67d6573Sopenharmony_ci        let mut prefixes = Some(Literals::empty());
232c67d6573Sopenharmony_ci        let mut suffixes = Some(Literals::empty());
233c67d6573Sopenharmony_ci        let mut bytes = false;
234c67d6573Sopenharmony_ci        let is_set = self.options.pats.len() > 1;
235c67d6573Sopenharmony_ci        // If we're compiling a regex set and that set has any anchored
236c67d6573Sopenharmony_ci        // expressions, then disable all literal optimizations.
237c67d6573Sopenharmony_ci        for pat in &self.options.pats {
238c67d6573Sopenharmony_ci            let mut parser = ParserBuilder::new()
239c67d6573Sopenharmony_ci                .octal(self.options.octal)
240c67d6573Sopenharmony_ci                .case_insensitive(self.options.case_insensitive)
241c67d6573Sopenharmony_ci                .multi_line(self.options.multi_line)
242c67d6573Sopenharmony_ci                .dot_matches_new_line(self.options.dot_matches_new_line)
243c67d6573Sopenharmony_ci                .swap_greed(self.options.swap_greed)
244c67d6573Sopenharmony_ci                .ignore_whitespace(self.options.ignore_whitespace)
245c67d6573Sopenharmony_ci                .unicode(self.options.unicode)
246c67d6573Sopenharmony_ci                .allow_invalid_utf8(!self.only_utf8)
247c67d6573Sopenharmony_ci                .nest_limit(self.options.nest_limit)
248c67d6573Sopenharmony_ci                .build();
249c67d6573Sopenharmony_ci            let expr =
250c67d6573Sopenharmony_ci                parser.parse(pat).map_err(|e| Error::Syntax(e.to_string()))?;
251c67d6573Sopenharmony_ci            bytes = bytes || !expr.is_always_utf8();
252c67d6573Sopenharmony_ci
253c67d6573Sopenharmony_ci            if cfg!(feature = "perf-literal") {
254c67d6573Sopenharmony_ci                if !expr.is_anchored_start() && expr.is_any_anchored_start() {
255c67d6573Sopenharmony_ci                    // Partial anchors unfortunately make it hard to use
256c67d6573Sopenharmony_ci                    // prefixes, so disable them.
257c67d6573Sopenharmony_ci                    prefixes = None;
258c67d6573Sopenharmony_ci                } else if is_set && expr.is_anchored_start() {
259c67d6573Sopenharmony_ci                    // Regex sets with anchors do not go well with literal
260c67d6573Sopenharmony_ci                    // optimizations.
261c67d6573Sopenharmony_ci                    prefixes = None;
262c67d6573Sopenharmony_ci                }
263c67d6573Sopenharmony_ci                prefixes = prefixes.and_then(|mut prefixes| {
264c67d6573Sopenharmony_ci                    if !prefixes.union_prefixes(&expr) {
265c67d6573Sopenharmony_ci                        None
266c67d6573Sopenharmony_ci                    } else {
267c67d6573Sopenharmony_ci                        Some(prefixes)
268c67d6573Sopenharmony_ci                    }
269c67d6573Sopenharmony_ci                });
270c67d6573Sopenharmony_ci
271c67d6573Sopenharmony_ci                if !expr.is_anchored_end() && expr.is_any_anchored_end() {
272c67d6573Sopenharmony_ci                    // Partial anchors unfortunately make it hard to use
273c67d6573Sopenharmony_ci                    // suffixes, so disable them.
274c67d6573Sopenharmony_ci                    suffixes = None;
275c67d6573Sopenharmony_ci                } else if is_set && expr.is_anchored_end() {
276c67d6573Sopenharmony_ci                    // Regex sets with anchors do not go well with literal
277c67d6573Sopenharmony_ci                    // optimizations.
278c67d6573Sopenharmony_ci                    suffixes = None;
279c67d6573Sopenharmony_ci                }
280c67d6573Sopenharmony_ci                suffixes = suffixes.and_then(|mut suffixes| {
281c67d6573Sopenharmony_ci                    if !suffixes.union_suffixes(&expr) {
282c67d6573Sopenharmony_ci                        None
283c67d6573Sopenharmony_ci                    } else {
284c67d6573Sopenharmony_ci                        Some(suffixes)
285c67d6573Sopenharmony_ci                    }
286c67d6573Sopenharmony_ci                });
287c67d6573Sopenharmony_ci            }
288c67d6573Sopenharmony_ci            exprs.push(expr);
289c67d6573Sopenharmony_ci        }
290c67d6573Sopenharmony_ci        Ok(Parsed {
291c67d6573Sopenharmony_ci            exprs,
292c67d6573Sopenharmony_ci            prefixes: prefixes.unwrap_or_else(Literals::empty),
293c67d6573Sopenharmony_ci            suffixes: suffixes.unwrap_or_else(Literals::empty),
294c67d6573Sopenharmony_ci            bytes,
295c67d6573Sopenharmony_ci        })
296c67d6573Sopenharmony_ci    }
297c67d6573Sopenharmony_ci
298c67d6573Sopenharmony_ci    /// Build an executor that can run a regular expression.
299c67d6573Sopenharmony_ci    pub fn build(self) -> Result<Exec, Error> {
300c67d6573Sopenharmony_ci        // Special case when we have no patterns to compile.
301c67d6573Sopenharmony_ci        // This can happen when compiling a regex set.
302c67d6573Sopenharmony_ci        if self.options.pats.is_empty() {
303c67d6573Sopenharmony_ci            let ro = Arc::new(ExecReadOnly {
304c67d6573Sopenharmony_ci                res: vec![],
305c67d6573Sopenharmony_ci                nfa: Program::new(),
306c67d6573Sopenharmony_ci                dfa: Program::new(),
307c67d6573Sopenharmony_ci                dfa_reverse: Program::new(),
308c67d6573Sopenharmony_ci                suffixes: LiteralSearcher::empty(),
309c67d6573Sopenharmony_ci                #[cfg(feature = "perf-literal")]
310c67d6573Sopenharmony_ci                ac: None,
311c67d6573Sopenharmony_ci                match_type: MatchType::Nothing,
312c67d6573Sopenharmony_ci            });
313c67d6573Sopenharmony_ci            let pool = ExecReadOnly::new_pool(&ro);
314c67d6573Sopenharmony_ci            return Ok(Exec { ro, pool });
315c67d6573Sopenharmony_ci        }
316c67d6573Sopenharmony_ci        let parsed = self.parse()?;
317c67d6573Sopenharmony_ci        let mut nfa = Compiler::new()
318c67d6573Sopenharmony_ci            .size_limit(self.options.size_limit)
319c67d6573Sopenharmony_ci            .bytes(self.bytes || parsed.bytes)
320c67d6573Sopenharmony_ci            .only_utf8(self.only_utf8)
321c67d6573Sopenharmony_ci            .compile(&parsed.exprs)?;
322c67d6573Sopenharmony_ci        let mut dfa = Compiler::new()
323c67d6573Sopenharmony_ci            .size_limit(self.options.size_limit)
324c67d6573Sopenharmony_ci            .dfa(true)
325c67d6573Sopenharmony_ci            .only_utf8(self.only_utf8)
326c67d6573Sopenharmony_ci            .compile(&parsed.exprs)?;
327c67d6573Sopenharmony_ci        let mut dfa_reverse = Compiler::new()
328c67d6573Sopenharmony_ci            .size_limit(self.options.size_limit)
329c67d6573Sopenharmony_ci            .dfa(true)
330c67d6573Sopenharmony_ci            .only_utf8(self.only_utf8)
331c67d6573Sopenharmony_ci            .reverse(true)
332c67d6573Sopenharmony_ci            .compile(&parsed.exprs)?;
333c67d6573Sopenharmony_ci
334c67d6573Sopenharmony_ci        #[cfg(feature = "perf-literal")]
335c67d6573Sopenharmony_ci        let ac = self.build_aho_corasick(&parsed);
336c67d6573Sopenharmony_ci        nfa.prefixes = LiteralSearcher::prefixes(parsed.prefixes);
337c67d6573Sopenharmony_ci        dfa.prefixes = nfa.prefixes.clone();
338c67d6573Sopenharmony_ci        dfa.dfa_size_limit = self.options.dfa_size_limit;
339c67d6573Sopenharmony_ci        dfa_reverse.dfa_size_limit = self.options.dfa_size_limit;
340c67d6573Sopenharmony_ci
341c67d6573Sopenharmony_ci        let mut ro = ExecReadOnly {
342c67d6573Sopenharmony_ci            res: self.options.pats,
343c67d6573Sopenharmony_ci            nfa,
344c67d6573Sopenharmony_ci            dfa,
345c67d6573Sopenharmony_ci            dfa_reverse,
346c67d6573Sopenharmony_ci            suffixes: LiteralSearcher::suffixes(parsed.suffixes),
347c67d6573Sopenharmony_ci            #[cfg(feature = "perf-literal")]
348c67d6573Sopenharmony_ci            ac,
349c67d6573Sopenharmony_ci            match_type: MatchType::Nothing,
350c67d6573Sopenharmony_ci        };
351c67d6573Sopenharmony_ci        ro.match_type = ro.choose_match_type(self.match_type);
352c67d6573Sopenharmony_ci
353c67d6573Sopenharmony_ci        let ro = Arc::new(ro);
354c67d6573Sopenharmony_ci        let pool = ExecReadOnly::new_pool(&ro);
355c67d6573Sopenharmony_ci        Ok(Exec { ro, pool })
356c67d6573Sopenharmony_ci    }
357c67d6573Sopenharmony_ci
358c67d6573Sopenharmony_ci    #[cfg(feature = "perf-literal")]
359c67d6573Sopenharmony_ci    fn build_aho_corasick(&self, parsed: &Parsed) -> Option<AhoCorasick<u32>> {
360c67d6573Sopenharmony_ci        if parsed.exprs.len() != 1 {
361c67d6573Sopenharmony_ci            return None;
362c67d6573Sopenharmony_ci        }
363c67d6573Sopenharmony_ci        let lits = match alternation_literals(&parsed.exprs[0]) {
364c67d6573Sopenharmony_ci            None => return None,
365c67d6573Sopenharmony_ci            Some(lits) => lits,
366c67d6573Sopenharmony_ci        };
367c67d6573Sopenharmony_ci        // If we have a small number of literals, then let Teddy handle
368c67d6573Sopenharmony_ci        // things (see literal/mod.rs).
369c67d6573Sopenharmony_ci        if lits.len() <= 32 {
370c67d6573Sopenharmony_ci            return None;
371c67d6573Sopenharmony_ci        }
372c67d6573Sopenharmony_ci        Some(
373c67d6573Sopenharmony_ci            AhoCorasickBuilder::new()
374c67d6573Sopenharmony_ci                .match_kind(MatchKind::LeftmostFirst)
375c67d6573Sopenharmony_ci                .auto_configure(&lits)
376c67d6573Sopenharmony_ci                .build_with_size::<u32, _, _>(&lits)
377c67d6573Sopenharmony_ci                // This should never happen because we'd long exceed the
378c67d6573Sopenharmony_ci                // compilation limit for regexes first.
379c67d6573Sopenharmony_ci                .expect("AC automaton too big"),
380c67d6573Sopenharmony_ci        )
381c67d6573Sopenharmony_ci    }
382c67d6573Sopenharmony_ci}
383c67d6573Sopenharmony_ci
384c67d6573Sopenharmony_ciimpl<'c> RegularExpression for ExecNoSyncStr<'c> {
385c67d6573Sopenharmony_ci    type Text = str;
386c67d6573Sopenharmony_ci
387c67d6573Sopenharmony_ci    fn slots_len(&self) -> usize {
388c67d6573Sopenharmony_ci        self.0.slots_len()
389c67d6573Sopenharmony_ci    }
390c67d6573Sopenharmony_ci
391c67d6573Sopenharmony_ci    fn next_after_empty(&self, text: &str, i: usize) -> usize {
392c67d6573Sopenharmony_ci        next_utf8(text.as_bytes(), i)
393c67d6573Sopenharmony_ci    }
394c67d6573Sopenharmony_ci
395c67d6573Sopenharmony_ci    #[cfg_attr(feature = "perf-inline", inline(always))]
396c67d6573Sopenharmony_ci    fn shortest_match_at(&self, text: &str, start: usize) -> Option<usize> {
397c67d6573Sopenharmony_ci        self.0.shortest_match_at(text.as_bytes(), start)
398c67d6573Sopenharmony_ci    }
399c67d6573Sopenharmony_ci
400c67d6573Sopenharmony_ci    #[cfg_attr(feature = "perf-inline", inline(always))]
401c67d6573Sopenharmony_ci    fn is_match_at(&self, text: &str, start: usize) -> bool {
402c67d6573Sopenharmony_ci        self.0.is_match_at(text.as_bytes(), start)
403c67d6573Sopenharmony_ci    }
404c67d6573Sopenharmony_ci
405c67d6573Sopenharmony_ci    #[cfg_attr(feature = "perf-inline", inline(always))]
406c67d6573Sopenharmony_ci    fn find_at(&self, text: &str, start: usize) -> Option<(usize, usize)> {
407c67d6573Sopenharmony_ci        self.0.find_at(text.as_bytes(), start)
408c67d6573Sopenharmony_ci    }
409c67d6573Sopenharmony_ci
410c67d6573Sopenharmony_ci    #[cfg_attr(feature = "perf-inline", inline(always))]
411c67d6573Sopenharmony_ci    fn captures_read_at(
412c67d6573Sopenharmony_ci        &self,
413c67d6573Sopenharmony_ci        locs: &mut Locations,
414c67d6573Sopenharmony_ci        text: &str,
415c67d6573Sopenharmony_ci        start: usize,
416c67d6573Sopenharmony_ci    ) -> Option<(usize, usize)> {
417c67d6573Sopenharmony_ci        self.0.captures_read_at(locs, text.as_bytes(), start)
418c67d6573Sopenharmony_ci    }
419c67d6573Sopenharmony_ci}
420c67d6573Sopenharmony_ci
421c67d6573Sopenharmony_ciimpl<'c> RegularExpression for ExecNoSync<'c> {
422c67d6573Sopenharmony_ci    type Text = [u8];
423c67d6573Sopenharmony_ci
424c67d6573Sopenharmony_ci    /// Returns the number of capture slots in the regular expression. (There
425c67d6573Sopenharmony_ci    /// are two slots for every capture group, corresponding to possibly empty
426c67d6573Sopenharmony_ci    /// start and end locations of the capture.)
427c67d6573Sopenharmony_ci    fn slots_len(&self) -> usize {
428c67d6573Sopenharmony_ci        self.ro.nfa.captures.len() * 2
429c67d6573Sopenharmony_ci    }
430c67d6573Sopenharmony_ci
431c67d6573Sopenharmony_ci    fn next_after_empty(&self, _text: &[u8], i: usize) -> usize {
432c67d6573Sopenharmony_ci        i + 1
433c67d6573Sopenharmony_ci    }
434c67d6573Sopenharmony_ci
435c67d6573Sopenharmony_ci    /// Returns the end of a match location, possibly occurring before the
436c67d6573Sopenharmony_ci    /// end location of the correct leftmost-first match.
437c67d6573Sopenharmony_ci    #[cfg_attr(feature = "perf-inline", inline(always))]
438c67d6573Sopenharmony_ci    fn shortest_match_at(&self, text: &[u8], start: usize) -> Option<usize> {
439c67d6573Sopenharmony_ci        if !self.is_anchor_end_match(text) {
440c67d6573Sopenharmony_ci            return None;
441c67d6573Sopenharmony_ci        }
442c67d6573Sopenharmony_ci        match self.ro.match_type {
443c67d6573Sopenharmony_ci            #[cfg(feature = "perf-literal")]
444c67d6573Sopenharmony_ci            MatchType::Literal(ty) => {
445c67d6573Sopenharmony_ci                self.find_literals(ty, text, start).map(|(_, e)| e)
446c67d6573Sopenharmony_ci            }
447c67d6573Sopenharmony_ci            #[cfg(feature = "perf-dfa")]
448c67d6573Sopenharmony_ci            MatchType::Dfa | MatchType::DfaMany => {
449c67d6573Sopenharmony_ci                match self.shortest_dfa(text, start) {
450c67d6573Sopenharmony_ci                    dfa::Result::Match(end) => Some(end),
451c67d6573Sopenharmony_ci                    dfa::Result::NoMatch(_) => None,
452c67d6573Sopenharmony_ci                    dfa::Result::Quit => self.shortest_nfa(text, start),
453c67d6573Sopenharmony_ci                }
454c67d6573Sopenharmony_ci            }
455c67d6573Sopenharmony_ci            #[cfg(feature = "perf-dfa")]
456c67d6573Sopenharmony_ci            MatchType::DfaAnchoredReverse => {
457c67d6573Sopenharmony_ci                match dfa::Fsm::reverse(
458c67d6573Sopenharmony_ci                    &self.ro.dfa_reverse,
459c67d6573Sopenharmony_ci                    self.cache.value(),
460c67d6573Sopenharmony_ci                    true,
461c67d6573Sopenharmony_ci                    &text[start..],
462c67d6573Sopenharmony_ci                    text.len(),
463c67d6573Sopenharmony_ci                ) {
464c67d6573Sopenharmony_ci                    dfa::Result::Match(_) => Some(text.len()),
465c67d6573Sopenharmony_ci                    dfa::Result::NoMatch(_) => None,
466c67d6573Sopenharmony_ci                    dfa::Result::Quit => self.shortest_nfa(text, start),
467c67d6573Sopenharmony_ci                }
468c67d6573Sopenharmony_ci            }
469c67d6573Sopenharmony_ci            #[cfg(all(feature = "perf-dfa", feature = "perf-literal"))]
470c67d6573Sopenharmony_ci            MatchType::DfaSuffix => {
471c67d6573Sopenharmony_ci                match self.shortest_dfa_reverse_suffix(text, start) {
472c67d6573Sopenharmony_ci                    dfa::Result::Match(e) => Some(e),
473c67d6573Sopenharmony_ci                    dfa::Result::NoMatch(_) => None,
474c67d6573Sopenharmony_ci                    dfa::Result::Quit => self.shortest_nfa(text, start),
475c67d6573Sopenharmony_ci                }
476c67d6573Sopenharmony_ci            }
477c67d6573Sopenharmony_ci            MatchType::Nfa(ty) => self.shortest_nfa_type(ty, text, start),
478c67d6573Sopenharmony_ci            MatchType::Nothing => None,
479c67d6573Sopenharmony_ci        }
480c67d6573Sopenharmony_ci    }
481c67d6573Sopenharmony_ci
482c67d6573Sopenharmony_ci    /// Returns true if and only if the regex matches text.
483c67d6573Sopenharmony_ci    ///
484c67d6573Sopenharmony_ci    /// For single regular expressions, this is equivalent to calling
485c67d6573Sopenharmony_ci    /// shortest_match(...).is_some().
486c67d6573Sopenharmony_ci    #[cfg_attr(feature = "perf-inline", inline(always))]
487c67d6573Sopenharmony_ci    fn is_match_at(&self, text: &[u8], start: usize) -> bool {
488c67d6573Sopenharmony_ci        if !self.is_anchor_end_match(text) {
489c67d6573Sopenharmony_ci            return false;
490c67d6573Sopenharmony_ci        }
491c67d6573Sopenharmony_ci        // We need to do this dance because shortest_match relies on the NFA
492c67d6573Sopenharmony_ci        // filling in captures[1], but a RegexSet has no captures. In other
493c67d6573Sopenharmony_ci        // words, a RegexSet can't (currently) use shortest_match. ---AG
494c67d6573Sopenharmony_ci        match self.ro.match_type {
495c67d6573Sopenharmony_ci            #[cfg(feature = "perf-literal")]
496c67d6573Sopenharmony_ci            MatchType::Literal(ty) => {
497c67d6573Sopenharmony_ci                self.find_literals(ty, text, start).is_some()
498c67d6573Sopenharmony_ci            }
499c67d6573Sopenharmony_ci            #[cfg(feature = "perf-dfa")]
500c67d6573Sopenharmony_ci            MatchType::Dfa | MatchType::DfaMany => {
501c67d6573Sopenharmony_ci                match self.shortest_dfa(text, start) {
502c67d6573Sopenharmony_ci                    dfa::Result::Match(_) => true,
503c67d6573Sopenharmony_ci                    dfa::Result::NoMatch(_) => false,
504c67d6573Sopenharmony_ci                    dfa::Result::Quit => self.match_nfa(text, start),
505c67d6573Sopenharmony_ci                }
506c67d6573Sopenharmony_ci            }
507c67d6573Sopenharmony_ci            #[cfg(feature = "perf-dfa")]
508c67d6573Sopenharmony_ci            MatchType::DfaAnchoredReverse => {
509c67d6573Sopenharmony_ci                match dfa::Fsm::reverse(
510c67d6573Sopenharmony_ci                    &self.ro.dfa_reverse,
511c67d6573Sopenharmony_ci                    self.cache.value(),
512c67d6573Sopenharmony_ci                    true,
513c67d6573Sopenharmony_ci                    &text[start..],
514c67d6573Sopenharmony_ci                    text.len(),
515c67d6573Sopenharmony_ci                ) {
516c67d6573Sopenharmony_ci                    dfa::Result::Match(_) => true,
517c67d6573Sopenharmony_ci                    dfa::Result::NoMatch(_) => false,
518c67d6573Sopenharmony_ci                    dfa::Result::Quit => self.match_nfa(text, start),
519c67d6573Sopenharmony_ci                }
520c67d6573Sopenharmony_ci            }
521c67d6573Sopenharmony_ci            #[cfg(all(feature = "perf-dfa", feature = "perf-literal"))]
522c67d6573Sopenharmony_ci            MatchType::DfaSuffix => {
523c67d6573Sopenharmony_ci                match self.shortest_dfa_reverse_suffix(text, start) {
524c67d6573Sopenharmony_ci                    dfa::Result::Match(_) => true,
525c67d6573Sopenharmony_ci                    dfa::Result::NoMatch(_) => false,
526c67d6573Sopenharmony_ci                    dfa::Result::Quit => self.match_nfa(text, start),
527c67d6573Sopenharmony_ci                }
528c67d6573Sopenharmony_ci            }
529c67d6573Sopenharmony_ci            MatchType::Nfa(ty) => self.match_nfa_type(ty, text, start),
530c67d6573Sopenharmony_ci            MatchType::Nothing => false,
531c67d6573Sopenharmony_ci        }
532c67d6573Sopenharmony_ci    }
533c67d6573Sopenharmony_ci
534c67d6573Sopenharmony_ci    /// Finds the start and end location of the leftmost-first match, starting
535c67d6573Sopenharmony_ci    /// at the given location.
536c67d6573Sopenharmony_ci    #[cfg_attr(feature = "perf-inline", inline(always))]
537c67d6573Sopenharmony_ci    fn find_at(&self, text: &[u8], start: usize) -> Option<(usize, usize)> {
538c67d6573Sopenharmony_ci        if !self.is_anchor_end_match(text) {
539c67d6573Sopenharmony_ci            return None;
540c67d6573Sopenharmony_ci        }
541c67d6573Sopenharmony_ci        match self.ro.match_type {
542c67d6573Sopenharmony_ci            #[cfg(feature = "perf-literal")]
543c67d6573Sopenharmony_ci            MatchType::Literal(ty) => self.find_literals(ty, text, start),
544c67d6573Sopenharmony_ci            #[cfg(feature = "perf-dfa")]
545c67d6573Sopenharmony_ci            MatchType::Dfa => match self.find_dfa_forward(text, start) {
546c67d6573Sopenharmony_ci                dfa::Result::Match((s, e)) => Some((s, e)),
547c67d6573Sopenharmony_ci                dfa::Result::NoMatch(_) => None,
548c67d6573Sopenharmony_ci                dfa::Result::Quit => {
549c67d6573Sopenharmony_ci                    self.find_nfa(MatchNfaType::Auto, text, start)
550c67d6573Sopenharmony_ci                }
551c67d6573Sopenharmony_ci            },
552c67d6573Sopenharmony_ci            #[cfg(feature = "perf-dfa")]
553c67d6573Sopenharmony_ci            MatchType::DfaAnchoredReverse => {
554c67d6573Sopenharmony_ci                match self.find_dfa_anchored_reverse(text, start) {
555c67d6573Sopenharmony_ci                    dfa::Result::Match((s, e)) => Some((s, e)),
556c67d6573Sopenharmony_ci                    dfa::Result::NoMatch(_) => None,
557c67d6573Sopenharmony_ci                    dfa::Result::Quit => {
558c67d6573Sopenharmony_ci                        self.find_nfa(MatchNfaType::Auto, text, start)
559c67d6573Sopenharmony_ci                    }
560c67d6573Sopenharmony_ci                }
561c67d6573Sopenharmony_ci            }
562c67d6573Sopenharmony_ci            #[cfg(all(feature = "perf-dfa", feature = "perf-literal"))]
563c67d6573Sopenharmony_ci            MatchType::DfaSuffix => {
564c67d6573Sopenharmony_ci                match self.find_dfa_reverse_suffix(text, start) {
565c67d6573Sopenharmony_ci                    dfa::Result::Match((s, e)) => Some((s, e)),
566c67d6573Sopenharmony_ci                    dfa::Result::NoMatch(_) => None,
567c67d6573Sopenharmony_ci                    dfa::Result::Quit => {
568c67d6573Sopenharmony_ci                        self.find_nfa(MatchNfaType::Auto, text, start)
569c67d6573Sopenharmony_ci                    }
570c67d6573Sopenharmony_ci                }
571c67d6573Sopenharmony_ci            }
572c67d6573Sopenharmony_ci            MatchType::Nfa(ty) => self.find_nfa(ty, text, start),
573c67d6573Sopenharmony_ci            MatchType::Nothing => None,
574c67d6573Sopenharmony_ci            #[cfg(feature = "perf-dfa")]
575c67d6573Sopenharmony_ci            MatchType::DfaMany => {
576c67d6573Sopenharmony_ci                unreachable!("BUG: RegexSet cannot be used with find")
577c67d6573Sopenharmony_ci            }
578c67d6573Sopenharmony_ci        }
579c67d6573Sopenharmony_ci    }
580c67d6573Sopenharmony_ci
581c67d6573Sopenharmony_ci    /// Finds the start and end location of the leftmost-first match and also
582c67d6573Sopenharmony_ci    /// fills in all matching capture groups.
583c67d6573Sopenharmony_ci    ///
584c67d6573Sopenharmony_ci    /// The number of capture slots given should be equal to the total number
585c67d6573Sopenharmony_ci    /// of capture slots in the compiled program.
586c67d6573Sopenharmony_ci    ///
587c67d6573Sopenharmony_ci    /// Note that the first two slots always correspond to the start and end
588c67d6573Sopenharmony_ci    /// locations of the overall match.
589c67d6573Sopenharmony_ci    fn captures_read_at(
590c67d6573Sopenharmony_ci        &self,
591c67d6573Sopenharmony_ci        locs: &mut Locations,
592c67d6573Sopenharmony_ci        text: &[u8],
593c67d6573Sopenharmony_ci        start: usize,
594c67d6573Sopenharmony_ci    ) -> Option<(usize, usize)> {
595c67d6573Sopenharmony_ci        let slots = locs.as_slots();
596c67d6573Sopenharmony_ci        for slot in slots.iter_mut() {
597c67d6573Sopenharmony_ci            *slot = None;
598c67d6573Sopenharmony_ci        }
599c67d6573Sopenharmony_ci        // If the caller unnecessarily uses this, then we try to save them
600c67d6573Sopenharmony_ci        // from themselves.
601c67d6573Sopenharmony_ci        match slots.len() {
602c67d6573Sopenharmony_ci            0 => return self.find_at(text, start),
603c67d6573Sopenharmony_ci            2 => {
604c67d6573Sopenharmony_ci                return self.find_at(text, start).map(|(s, e)| {
605c67d6573Sopenharmony_ci                    slots[0] = Some(s);
606c67d6573Sopenharmony_ci                    slots[1] = Some(e);
607c67d6573Sopenharmony_ci                    (s, e)
608c67d6573Sopenharmony_ci                });
609c67d6573Sopenharmony_ci            }
610c67d6573Sopenharmony_ci            _ => {} // fallthrough
611c67d6573Sopenharmony_ci        }
612c67d6573Sopenharmony_ci        if !self.is_anchor_end_match(text) {
613c67d6573Sopenharmony_ci            return None;
614c67d6573Sopenharmony_ci        }
615c67d6573Sopenharmony_ci        match self.ro.match_type {
616c67d6573Sopenharmony_ci            #[cfg(feature = "perf-literal")]
617c67d6573Sopenharmony_ci            MatchType::Literal(ty) => {
618c67d6573Sopenharmony_ci                self.find_literals(ty, text, start).and_then(|(s, e)| {
619c67d6573Sopenharmony_ci                    self.captures_nfa_type(
620c67d6573Sopenharmony_ci                        MatchNfaType::Auto,
621c67d6573Sopenharmony_ci                        slots,
622c67d6573Sopenharmony_ci                        text,
623c67d6573Sopenharmony_ci                        s,
624c67d6573Sopenharmony_ci                        e,
625c67d6573Sopenharmony_ci                    )
626c67d6573Sopenharmony_ci                })
627c67d6573Sopenharmony_ci            }
628c67d6573Sopenharmony_ci            #[cfg(feature = "perf-dfa")]
629c67d6573Sopenharmony_ci            MatchType::Dfa => {
630c67d6573Sopenharmony_ci                if self.ro.nfa.is_anchored_start {
631c67d6573Sopenharmony_ci                    self.captures_nfa(slots, text, start)
632c67d6573Sopenharmony_ci                } else {
633c67d6573Sopenharmony_ci                    match self.find_dfa_forward(text, start) {
634c67d6573Sopenharmony_ci                        dfa::Result::Match((s, e)) => self.captures_nfa_type(
635c67d6573Sopenharmony_ci                            MatchNfaType::Auto,
636c67d6573Sopenharmony_ci                            slots,
637c67d6573Sopenharmony_ci                            text,
638c67d6573Sopenharmony_ci                            s,
639c67d6573Sopenharmony_ci                            e,
640c67d6573Sopenharmony_ci                        ),
641c67d6573Sopenharmony_ci                        dfa::Result::NoMatch(_) => None,
642c67d6573Sopenharmony_ci                        dfa::Result::Quit => {
643c67d6573Sopenharmony_ci                            self.captures_nfa(slots, text, start)
644c67d6573Sopenharmony_ci                        }
645c67d6573Sopenharmony_ci                    }
646c67d6573Sopenharmony_ci                }
647c67d6573Sopenharmony_ci            }
648c67d6573Sopenharmony_ci            #[cfg(feature = "perf-dfa")]
649c67d6573Sopenharmony_ci            MatchType::DfaAnchoredReverse => {
650c67d6573Sopenharmony_ci                match self.find_dfa_anchored_reverse(text, start) {
651c67d6573Sopenharmony_ci                    dfa::Result::Match((s, e)) => self.captures_nfa_type(
652c67d6573Sopenharmony_ci                        MatchNfaType::Auto,
653c67d6573Sopenharmony_ci                        slots,
654c67d6573Sopenharmony_ci                        text,
655c67d6573Sopenharmony_ci                        s,
656c67d6573Sopenharmony_ci                        e,
657c67d6573Sopenharmony_ci                    ),
658c67d6573Sopenharmony_ci                    dfa::Result::NoMatch(_) => None,
659c67d6573Sopenharmony_ci                    dfa::Result::Quit => self.captures_nfa(slots, text, start),
660c67d6573Sopenharmony_ci                }
661c67d6573Sopenharmony_ci            }
662c67d6573Sopenharmony_ci            #[cfg(all(feature = "perf-dfa", feature = "perf-literal"))]
663c67d6573Sopenharmony_ci            MatchType::DfaSuffix => {
664c67d6573Sopenharmony_ci                match self.find_dfa_reverse_suffix(text, start) {
665c67d6573Sopenharmony_ci                    dfa::Result::Match((s, e)) => self.captures_nfa_type(
666c67d6573Sopenharmony_ci                        MatchNfaType::Auto,
667c67d6573Sopenharmony_ci                        slots,
668c67d6573Sopenharmony_ci                        text,
669c67d6573Sopenharmony_ci                        s,
670c67d6573Sopenharmony_ci                        e,
671c67d6573Sopenharmony_ci                    ),
672c67d6573Sopenharmony_ci                    dfa::Result::NoMatch(_) => None,
673c67d6573Sopenharmony_ci                    dfa::Result::Quit => self.captures_nfa(slots, text, start),
674c67d6573Sopenharmony_ci                }
675c67d6573Sopenharmony_ci            }
676c67d6573Sopenharmony_ci            MatchType::Nfa(ty) => {
677c67d6573Sopenharmony_ci                self.captures_nfa_type(ty, slots, text, start, text.len())
678c67d6573Sopenharmony_ci            }
679c67d6573Sopenharmony_ci            MatchType::Nothing => None,
680c67d6573Sopenharmony_ci            #[cfg(feature = "perf-dfa")]
681c67d6573Sopenharmony_ci            MatchType::DfaMany => {
682c67d6573Sopenharmony_ci                unreachable!("BUG: RegexSet cannot be used with captures")
683c67d6573Sopenharmony_ci            }
684c67d6573Sopenharmony_ci        }
685c67d6573Sopenharmony_ci    }
686c67d6573Sopenharmony_ci}
687c67d6573Sopenharmony_ci
688c67d6573Sopenharmony_ciimpl<'c> ExecNoSync<'c> {
689c67d6573Sopenharmony_ci    /// Finds the leftmost-first match using only literal search.
690c67d6573Sopenharmony_ci    #[cfg(feature = "perf-literal")]
691c67d6573Sopenharmony_ci    #[cfg_attr(feature = "perf-inline", inline(always))]
692c67d6573Sopenharmony_ci    fn find_literals(
693c67d6573Sopenharmony_ci        &self,
694c67d6573Sopenharmony_ci        ty: MatchLiteralType,
695c67d6573Sopenharmony_ci        text: &[u8],
696c67d6573Sopenharmony_ci        start: usize,
697c67d6573Sopenharmony_ci    ) -> Option<(usize, usize)> {
698c67d6573Sopenharmony_ci        use self::MatchLiteralType::*;
699c67d6573Sopenharmony_ci        match ty {
700c67d6573Sopenharmony_ci            Unanchored => {
701c67d6573Sopenharmony_ci                let lits = &self.ro.nfa.prefixes;
702c67d6573Sopenharmony_ci                lits.find(&text[start..]).map(|(s, e)| (start + s, start + e))
703c67d6573Sopenharmony_ci            }
704c67d6573Sopenharmony_ci            AnchoredStart => {
705c67d6573Sopenharmony_ci                let lits = &self.ro.nfa.prefixes;
706c67d6573Sopenharmony_ci                if start == 0 || !self.ro.nfa.is_anchored_start {
707c67d6573Sopenharmony_ci                    lits.find_start(&text[start..])
708c67d6573Sopenharmony_ci                        .map(|(s, e)| (start + s, start + e))
709c67d6573Sopenharmony_ci                } else {
710c67d6573Sopenharmony_ci                    None
711c67d6573Sopenharmony_ci                }
712c67d6573Sopenharmony_ci            }
713c67d6573Sopenharmony_ci            AnchoredEnd => {
714c67d6573Sopenharmony_ci                let lits = &self.ro.suffixes;
715c67d6573Sopenharmony_ci                lits.find_end(&text[start..])
716c67d6573Sopenharmony_ci                    .map(|(s, e)| (start + s, start + e))
717c67d6573Sopenharmony_ci            }
718c67d6573Sopenharmony_ci            AhoCorasick => self
719c67d6573Sopenharmony_ci                .ro
720c67d6573Sopenharmony_ci                .ac
721c67d6573Sopenharmony_ci                .as_ref()
722c67d6573Sopenharmony_ci                .unwrap()
723c67d6573Sopenharmony_ci                .find(&text[start..])
724c67d6573Sopenharmony_ci                .map(|m| (start + m.start(), start + m.end())),
725c67d6573Sopenharmony_ci        }
726c67d6573Sopenharmony_ci    }
727c67d6573Sopenharmony_ci
728c67d6573Sopenharmony_ci    /// Finds the leftmost-first match (start and end) using only the DFA.
729c67d6573Sopenharmony_ci    ///
730c67d6573Sopenharmony_ci    /// If the result returned indicates that the DFA quit, then another
731c67d6573Sopenharmony_ci    /// matching engine should be used.
732c67d6573Sopenharmony_ci    #[cfg(feature = "perf-dfa")]
733c67d6573Sopenharmony_ci    #[cfg_attr(feature = "perf-inline", inline(always))]
734c67d6573Sopenharmony_ci    fn find_dfa_forward(
735c67d6573Sopenharmony_ci        &self,
736c67d6573Sopenharmony_ci        text: &[u8],
737c67d6573Sopenharmony_ci        start: usize,
738c67d6573Sopenharmony_ci    ) -> dfa::Result<(usize, usize)> {
739c67d6573Sopenharmony_ci        use crate::dfa::Result::*;
740c67d6573Sopenharmony_ci        let end = match dfa::Fsm::forward(
741c67d6573Sopenharmony_ci            &self.ro.dfa,
742c67d6573Sopenharmony_ci            self.cache.value(),
743c67d6573Sopenharmony_ci            false,
744c67d6573Sopenharmony_ci            text,
745c67d6573Sopenharmony_ci            start,
746c67d6573Sopenharmony_ci        ) {
747c67d6573Sopenharmony_ci            NoMatch(i) => return NoMatch(i),
748c67d6573Sopenharmony_ci            Quit => return Quit,
749c67d6573Sopenharmony_ci            Match(end) if start == end => return Match((start, start)),
750c67d6573Sopenharmony_ci            Match(end) => end,
751c67d6573Sopenharmony_ci        };
752c67d6573Sopenharmony_ci        // Now run the DFA in reverse to find the start of the match.
753c67d6573Sopenharmony_ci        match dfa::Fsm::reverse(
754c67d6573Sopenharmony_ci            &self.ro.dfa_reverse,
755c67d6573Sopenharmony_ci            self.cache.value(),
756c67d6573Sopenharmony_ci            false,
757c67d6573Sopenharmony_ci            &text[start..],
758c67d6573Sopenharmony_ci            end - start,
759c67d6573Sopenharmony_ci        ) {
760c67d6573Sopenharmony_ci            Match(s) => Match((start + s, end)),
761c67d6573Sopenharmony_ci            NoMatch(i) => NoMatch(i),
762c67d6573Sopenharmony_ci            Quit => Quit,
763c67d6573Sopenharmony_ci        }
764c67d6573Sopenharmony_ci    }
765c67d6573Sopenharmony_ci
766c67d6573Sopenharmony_ci    /// Finds the leftmost-first match (start and end) using only the DFA,
767c67d6573Sopenharmony_ci    /// but assumes the regex is anchored at the end and therefore starts at
768c67d6573Sopenharmony_ci    /// the end of the regex and matches in reverse.
769c67d6573Sopenharmony_ci    ///
770c67d6573Sopenharmony_ci    /// If the result returned indicates that the DFA quit, then another
771c67d6573Sopenharmony_ci    /// matching engine should be used.
772c67d6573Sopenharmony_ci    #[cfg(feature = "perf-dfa")]
773c67d6573Sopenharmony_ci    #[cfg_attr(feature = "perf-inline", inline(always))]
774c67d6573Sopenharmony_ci    fn find_dfa_anchored_reverse(
775c67d6573Sopenharmony_ci        &self,
776c67d6573Sopenharmony_ci        text: &[u8],
777c67d6573Sopenharmony_ci        start: usize,
778c67d6573Sopenharmony_ci    ) -> dfa::Result<(usize, usize)> {
779c67d6573Sopenharmony_ci        use crate::dfa::Result::*;
780c67d6573Sopenharmony_ci        match dfa::Fsm::reverse(
781c67d6573Sopenharmony_ci            &self.ro.dfa_reverse,
782c67d6573Sopenharmony_ci            self.cache.value(),
783c67d6573Sopenharmony_ci            false,
784c67d6573Sopenharmony_ci            &text[start..],
785c67d6573Sopenharmony_ci            text.len() - start,
786c67d6573Sopenharmony_ci        ) {
787c67d6573Sopenharmony_ci            Match(s) => Match((start + s, text.len())),
788c67d6573Sopenharmony_ci            NoMatch(i) => NoMatch(i),
789c67d6573Sopenharmony_ci            Quit => Quit,
790c67d6573Sopenharmony_ci        }
791c67d6573Sopenharmony_ci    }
792c67d6573Sopenharmony_ci
793c67d6573Sopenharmony_ci    /// Finds the end of the shortest match using only the DFA.
794c67d6573Sopenharmony_ci    #[cfg(feature = "perf-dfa")]
795c67d6573Sopenharmony_ci    #[cfg_attr(feature = "perf-inline", inline(always))]
796c67d6573Sopenharmony_ci    fn shortest_dfa(&self, text: &[u8], start: usize) -> dfa::Result<usize> {
797c67d6573Sopenharmony_ci        dfa::Fsm::forward(&self.ro.dfa, self.cache.value(), true, text, start)
798c67d6573Sopenharmony_ci    }
799c67d6573Sopenharmony_ci
800c67d6573Sopenharmony_ci    /// Finds the end of the shortest match using only the DFA by scanning for
801c67d6573Sopenharmony_ci    /// suffix literals.
802c67d6573Sopenharmony_ci    #[cfg(all(feature = "perf-dfa", feature = "perf-literal"))]
803c67d6573Sopenharmony_ci    #[cfg_attr(feature = "perf-inline", inline(always))]
804c67d6573Sopenharmony_ci    fn shortest_dfa_reverse_suffix(
805c67d6573Sopenharmony_ci        &self,
806c67d6573Sopenharmony_ci        text: &[u8],
807c67d6573Sopenharmony_ci        start: usize,
808c67d6573Sopenharmony_ci    ) -> dfa::Result<usize> {
809c67d6573Sopenharmony_ci        match self.exec_dfa_reverse_suffix(text, start) {
810c67d6573Sopenharmony_ci            None => self.shortest_dfa(text, start),
811c67d6573Sopenharmony_ci            Some(r) => r.map(|(_, end)| end),
812c67d6573Sopenharmony_ci        }
813c67d6573Sopenharmony_ci    }
814c67d6573Sopenharmony_ci
815c67d6573Sopenharmony_ci    /// Finds the end of the shortest match using only the DFA by scanning for
816c67d6573Sopenharmony_ci    /// suffix literals. It also reports the start of the match.
817c67d6573Sopenharmony_ci    ///
818c67d6573Sopenharmony_ci    /// Note that if None is returned, then the optimization gave up to avoid
819c67d6573Sopenharmony_ci    /// worst case quadratic behavior. A forward scanning DFA should be tried
820c67d6573Sopenharmony_ci    /// next.
821c67d6573Sopenharmony_ci    ///
822c67d6573Sopenharmony_ci    /// If a match is returned and the full leftmost-first match is desired,
823c67d6573Sopenharmony_ci    /// then a forward scan starting from the beginning of the match must be
824c67d6573Sopenharmony_ci    /// done.
825c67d6573Sopenharmony_ci    ///
826c67d6573Sopenharmony_ci    /// If the result returned indicates that the DFA quit, then another
827c67d6573Sopenharmony_ci    /// matching engine should be used.
828c67d6573Sopenharmony_ci    #[cfg(all(feature = "perf-dfa", feature = "perf-literal"))]
829c67d6573Sopenharmony_ci    #[cfg_attr(feature = "perf-inline", inline(always))]
830c67d6573Sopenharmony_ci    fn exec_dfa_reverse_suffix(
831c67d6573Sopenharmony_ci        &self,
832c67d6573Sopenharmony_ci        text: &[u8],
833c67d6573Sopenharmony_ci        original_start: usize,
834c67d6573Sopenharmony_ci    ) -> Option<dfa::Result<(usize, usize)>> {
835c67d6573Sopenharmony_ci        use crate::dfa::Result::*;
836c67d6573Sopenharmony_ci
837c67d6573Sopenharmony_ci        let lcs = self.ro.suffixes.lcs();
838c67d6573Sopenharmony_ci        debug_assert!(lcs.len() >= 1);
839c67d6573Sopenharmony_ci        let mut start = original_start;
840c67d6573Sopenharmony_ci        let mut end = start;
841c67d6573Sopenharmony_ci        let mut last_literal = start;
842c67d6573Sopenharmony_ci        while end <= text.len() {
843c67d6573Sopenharmony_ci            last_literal += match lcs.find(&text[last_literal..]) {
844c67d6573Sopenharmony_ci                None => return Some(NoMatch(text.len())),
845c67d6573Sopenharmony_ci                Some(i) => i,
846c67d6573Sopenharmony_ci            };
847c67d6573Sopenharmony_ci            end = last_literal + lcs.len();
848c67d6573Sopenharmony_ci            match dfa::Fsm::reverse(
849c67d6573Sopenharmony_ci                &self.ro.dfa_reverse,
850c67d6573Sopenharmony_ci                self.cache.value(),
851c67d6573Sopenharmony_ci                false,
852c67d6573Sopenharmony_ci                &text[start..end],
853c67d6573Sopenharmony_ci                end - start,
854c67d6573Sopenharmony_ci            ) {
855c67d6573Sopenharmony_ci                Match(0) | NoMatch(0) => return None,
856c67d6573Sopenharmony_ci                Match(i) => return Some(Match((start + i, end))),
857c67d6573Sopenharmony_ci                NoMatch(i) => {
858c67d6573Sopenharmony_ci                    start += i;
859c67d6573Sopenharmony_ci                    last_literal += 1;
860c67d6573Sopenharmony_ci                    continue;
861c67d6573Sopenharmony_ci                }
862c67d6573Sopenharmony_ci                Quit => return Some(Quit),
863c67d6573Sopenharmony_ci            };
864c67d6573Sopenharmony_ci        }
865c67d6573Sopenharmony_ci        Some(NoMatch(text.len()))
866c67d6573Sopenharmony_ci    }
867c67d6573Sopenharmony_ci
868c67d6573Sopenharmony_ci    /// Finds the leftmost-first match (start and end) using only the DFA
869c67d6573Sopenharmony_ci    /// by scanning for suffix literals.
870c67d6573Sopenharmony_ci    ///
871c67d6573Sopenharmony_ci    /// If the result returned indicates that the DFA quit, then another
872c67d6573Sopenharmony_ci    /// matching engine should be used.
873c67d6573Sopenharmony_ci    #[cfg(all(feature = "perf-dfa", feature = "perf-literal"))]
874c67d6573Sopenharmony_ci    #[cfg_attr(feature = "perf-inline", inline(always))]
875c67d6573Sopenharmony_ci    fn find_dfa_reverse_suffix(
876c67d6573Sopenharmony_ci        &self,
877c67d6573Sopenharmony_ci        text: &[u8],
878c67d6573Sopenharmony_ci        start: usize,
879c67d6573Sopenharmony_ci    ) -> dfa::Result<(usize, usize)> {
880c67d6573Sopenharmony_ci        use crate::dfa::Result::*;
881c67d6573Sopenharmony_ci
882c67d6573Sopenharmony_ci        let match_start = match self.exec_dfa_reverse_suffix(text, start) {
883c67d6573Sopenharmony_ci            None => return self.find_dfa_forward(text, start),
884c67d6573Sopenharmony_ci            Some(Match((start, _))) => start,
885c67d6573Sopenharmony_ci            Some(r) => return r,
886c67d6573Sopenharmony_ci        };
887c67d6573Sopenharmony_ci        // At this point, we've found a match. The only way to quit now
888c67d6573Sopenharmony_ci        // without a match is if the DFA gives up (seems unlikely).
889c67d6573Sopenharmony_ci        //
890c67d6573Sopenharmony_ci        // Now run the DFA forwards to find the proper end of the match.
891c67d6573Sopenharmony_ci        // (The suffix literal match can only indicate the earliest
892c67d6573Sopenharmony_ci        // possible end location, which may appear before the end of the
893c67d6573Sopenharmony_ci        // leftmost-first match.)
894c67d6573Sopenharmony_ci        match dfa::Fsm::forward(
895c67d6573Sopenharmony_ci            &self.ro.dfa,
896c67d6573Sopenharmony_ci            self.cache.value(),
897c67d6573Sopenharmony_ci            false,
898c67d6573Sopenharmony_ci            text,
899c67d6573Sopenharmony_ci            match_start,
900c67d6573Sopenharmony_ci        ) {
901c67d6573Sopenharmony_ci            NoMatch(_) => panic!("BUG: reverse match implies forward match"),
902c67d6573Sopenharmony_ci            Quit => Quit,
903c67d6573Sopenharmony_ci            Match(e) => Match((match_start, e)),
904c67d6573Sopenharmony_ci        }
905c67d6573Sopenharmony_ci    }
906c67d6573Sopenharmony_ci
907c67d6573Sopenharmony_ci    /// Executes the NFA engine to return whether there is a match or not.
908c67d6573Sopenharmony_ci    ///
909c67d6573Sopenharmony_ci    /// Ideally, we could use shortest_nfa(...).is_some() and get the same
910c67d6573Sopenharmony_ci    /// performance characteristics, but regex sets don't have captures, which
911c67d6573Sopenharmony_ci    /// shortest_nfa depends on.
912c67d6573Sopenharmony_ci    #[cfg(feature = "perf-dfa")]
913c67d6573Sopenharmony_ci    fn match_nfa(&self, text: &[u8], start: usize) -> bool {
914c67d6573Sopenharmony_ci        self.match_nfa_type(MatchNfaType::Auto, text, start)
915c67d6573Sopenharmony_ci    }
916c67d6573Sopenharmony_ci
917c67d6573Sopenharmony_ci    /// Like match_nfa, but allows specification of the type of NFA engine.
918c67d6573Sopenharmony_ci    fn match_nfa_type(
919c67d6573Sopenharmony_ci        &self,
920c67d6573Sopenharmony_ci        ty: MatchNfaType,
921c67d6573Sopenharmony_ci        text: &[u8],
922c67d6573Sopenharmony_ci        start: usize,
923c67d6573Sopenharmony_ci    ) -> bool {
924c67d6573Sopenharmony_ci        self.exec_nfa(
925c67d6573Sopenharmony_ci            ty,
926c67d6573Sopenharmony_ci            &mut [false],
927c67d6573Sopenharmony_ci            &mut [],
928c67d6573Sopenharmony_ci            true,
929c67d6573Sopenharmony_ci            false,
930c67d6573Sopenharmony_ci            text,
931c67d6573Sopenharmony_ci            start,
932c67d6573Sopenharmony_ci            text.len(),
933c67d6573Sopenharmony_ci        )
934c67d6573Sopenharmony_ci    }
935c67d6573Sopenharmony_ci
936c67d6573Sopenharmony_ci    /// Finds the shortest match using an NFA.
937c67d6573Sopenharmony_ci    #[cfg(feature = "perf-dfa")]
938c67d6573Sopenharmony_ci    fn shortest_nfa(&self, text: &[u8], start: usize) -> Option<usize> {
939c67d6573Sopenharmony_ci        self.shortest_nfa_type(MatchNfaType::Auto, text, start)
940c67d6573Sopenharmony_ci    }
941c67d6573Sopenharmony_ci
942c67d6573Sopenharmony_ci    /// Like shortest_nfa, but allows specification of the type of NFA engine.
943c67d6573Sopenharmony_ci    fn shortest_nfa_type(
944c67d6573Sopenharmony_ci        &self,
945c67d6573Sopenharmony_ci        ty: MatchNfaType,
946c67d6573Sopenharmony_ci        text: &[u8],
947c67d6573Sopenharmony_ci        start: usize,
948c67d6573Sopenharmony_ci    ) -> Option<usize> {
949c67d6573Sopenharmony_ci        let mut slots = [None, None];
950c67d6573Sopenharmony_ci        if self.exec_nfa(
951c67d6573Sopenharmony_ci            ty,
952c67d6573Sopenharmony_ci            &mut [false],
953c67d6573Sopenharmony_ci            &mut slots,
954c67d6573Sopenharmony_ci            true,
955c67d6573Sopenharmony_ci            true,
956c67d6573Sopenharmony_ci            text,
957c67d6573Sopenharmony_ci            start,
958c67d6573Sopenharmony_ci            text.len(),
959c67d6573Sopenharmony_ci        ) {
960c67d6573Sopenharmony_ci            slots[1]
961c67d6573Sopenharmony_ci        } else {
962c67d6573Sopenharmony_ci            None
963c67d6573Sopenharmony_ci        }
964c67d6573Sopenharmony_ci    }
965c67d6573Sopenharmony_ci
966c67d6573Sopenharmony_ci    /// Like find, but executes an NFA engine.
967c67d6573Sopenharmony_ci    fn find_nfa(
968c67d6573Sopenharmony_ci        &self,
969c67d6573Sopenharmony_ci        ty: MatchNfaType,
970c67d6573Sopenharmony_ci        text: &[u8],
971c67d6573Sopenharmony_ci        start: usize,
972c67d6573Sopenharmony_ci    ) -> Option<(usize, usize)> {
973c67d6573Sopenharmony_ci        let mut slots = [None, None];
974c67d6573Sopenharmony_ci        if self.exec_nfa(
975c67d6573Sopenharmony_ci            ty,
976c67d6573Sopenharmony_ci            &mut [false],
977c67d6573Sopenharmony_ci            &mut slots,
978c67d6573Sopenharmony_ci            false,
979c67d6573Sopenharmony_ci            false,
980c67d6573Sopenharmony_ci            text,
981c67d6573Sopenharmony_ci            start,
982c67d6573Sopenharmony_ci            text.len(),
983c67d6573Sopenharmony_ci        ) {
984c67d6573Sopenharmony_ci            match (slots[0], slots[1]) {
985c67d6573Sopenharmony_ci                (Some(s), Some(e)) => Some((s, e)),
986c67d6573Sopenharmony_ci                _ => None,
987c67d6573Sopenharmony_ci            }
988c67d6573Sopenharmony_ci        } else {
989c67d6573Sopenharmony_ci            None
990c67d6573Sopenharmony_ci        }
991c67d6573Sopenharmony_ci    }
992c67d6573Sopenharmony_ci
993c67d6573Sopenharmony_ci    /// Like find_nfa, but fills in captures.
994c67d6573Sopenharmony_ci    ///
995c67d6573Sopenharmony_ci    /// `slots` should have length equal to `2 * nfa.captures.len()`.
996c67d6573Sopenharmony_ci    #[cfg(feature = "perf-dfa")]
997c67d6573Sopenharmony_ci    fn captures_nfa(
998c67d6573Sopenharmony_ci        &self,
999c67d6573Sopenharmony_ci        slots: &mut [Slot],
1000c67d6573Sopenharmony_ci        text: &[u8],
1001c67d6573Sopenharmony_ci        start: usize,
1002c67d6573Sopenharmony_ci    ) -> Option<(usize, usize)> {
1003c67d6573Sopenharmony_ci        self.captures_nfa_type(
1004c67d6573Sopenharmony_ci            MatchNfaType::Auto,
1005c67d6573Sopenharmony_ci            slots,
1006c67d6573Sopenharmony_ci            text,
1007c67d6573Sopenharmony_ci            start,
1008c67d6573Sopenharmony_ci            text.len(),
1009c67d6573Sopenharmony_ci        )
1010c67d6573Sopenharmony_ci    }
1011c67d6573Sopenharmony_ci
1012c67d6573Sopenharmony_ci    /// Like captures_nfa, but allows specification of type of NFA engine.
1013c67d6573Sopenharmony_ci    fn captures_nfa_type(
1014c67d6573Sopenharmony_ci        &self,
1015c67d6573Sopenharmony_ci        ty: MatchNfaType,
1016c67d6573Sopenharmony_ci        slots: &mut [Slot],
1017c67d6573Sopenharmony_ci        text: &[u8],
1018c67d6573Sopenharmony_ci        start: usize,
1019c67d6573Sopenharmony_ci        end: usize,
1020c67d6573Sopenharmony_ci    ) -> Option<(usize, usize)> {
1021c67d6573Sopenharmony_ci        if self.exec_nfa(
1022c67d6573Sopenharmony_ci            ty,
1023c67d6573Sopenharmony_ci            &mut [false],
1024c67d6573Sopenharmony_ci            slots,
1025c67d6573Sopenharmony_ci            false,
1026c67d6573Sopenharmony_ci            false,
1027c67d6573Sopenharmony_ci            text,
1028c67d6573Sopenharmony_ci            start,
1029c67d6573Sopenharmony_ci            end,
1030c67d6573Sopenharmony_ci        ) {
1031c67d6573Sopenharmony_ci            match (slots[0], slots[1]) {
1032c67d6573Sopenharmony_ci                (Some(s), Some(e)) => Some((s, e)),
1033c67d6573Sopenharmony_ci                _ => None,
1034c67d6573Sopenharmony_ci            }
1035c67d6573Sopenharmony_ci        } else {
1036c67d6573Sopenharmony_ci            None
1037c67d6573Sopenharmony_ci        }
1038c67d6573Sopenharmony_ci    }
1039c67d6573Sopenharmony_ci
1040c67d6573Sopenharmony_ci    fn exec_nfa(
1041c67d6573Sopenharmony_ci        &self,
1042c67d6573Sopenharmony_ci        mut ty: MatchNfaType,
1043c67d6573Sopenharmony_ci        matches: &mut [bool],
1044c67d6573Sopenharmony_ci        slots: &mut [Slot],
1045c67d6573Sopenharmony_ci        quit_after_match: bool,
1046c67d6573Sopenharmony_ci        quit_after_match_with_pos: bool,
1047c67d6573Sopenharmony_ci        text: &[u8],
1048c67d6573Sopenharmony_ci        start: usize,
1049c67d6573Sopenharmony_ci        end: usize,
1050c67d6573Sopenharmony_ci    ) -> bool {
1051c67d6573Sopenharmony_ci        use self::MatchNfaType::*;
1052c67d6573Sopenharmony_ci        if let Auto = ty {
1053c67d6573Sopenharmony_ci            if backtrack::should_exec(self.ro.nfa.len(), text.len()) {
1054c67d6573Sopenharmony_ci                ty = Backtrack;
1055c67d6573Sopenharmony_ci            } else {
1056c67d6573Sopenharmony_ci                ty = PikeVM;
1057c67d6573Sopenharmony_ci            }
1058c67d6573Sopenharmony_ci        }
1059c67d6573Sopenharmony_ci        // The backtracker can't return the shortest match position as it is
1060c67d6573Sopenharmony_ci        // implemented today. So if someone calls `shortest_match` and we need
1061c67d6573Sopenharmony_ci        // to run an NFA, then use the PikeVM.
1062c67d6573Sopenharmony_ci        if quit_after_match_with_pos || ty == PikeVM {
1063c67d6573Sopenharmony_ci            self.exec_pikevm(
1064c67d6573Sopenharmony_ci                matches,
1065c67d6573Sopenharmony_ci                slots,
1066c67d6573Sopenharmony_ci                quit_after_match,
1067c67d6573Sopenharmony_ci                text,
1068c67d6573Sopenharmony_ci                start,
1069c67d6573Sopenharmony_ci                end,
1070c67d6573Sopenharmony_ci            )
1071c67d6573Sopenharmony_ci        } else {
1072c67d6573Sopenharmony_ci            self.exec_backtrack(matches, slots, text, start, end)
1073c67d6573Sopenharmony_ci        }
1074c67d6573Sopenharmony_ci    }
1075c67d6573Sopenharmony_ci
1076c67d6573Sopenharmony_ci    /// Always run the NFA algorithm.
1077c67d6573Sopenharmony_ci    fn exec_pikevm(
1078c67d6573Sopenharmony_ci        &self,
1079c67d6573Sopenharmony_ci        matches: &mut [bool],
1080c67d6573Sopenharmony_ci        slots: &mut [Slot],
1081c67d6573Sopenharmony_ci        quit_after_match: bool,
1082c67d6573Sopenharmony_ci        text: &[u8],
1083c67d6573Sopenharmony_ci        start: usize,
1084c67d6573Sopenharmony_ci        end: usize,
1085c67d6573Sopenharmony_ci    ) -> bool {
1086c67d6573Sopenharmony_ci        if self.ro.nfa.uses_bytes() {
1087c67d6573Sopenharmony_ci            pikevm::Fsm::exec(
1088c67d6573Sopenharmony_ci                &self.ro.nfa,
1089c67d6573Sopenharmony_ci                self.cache.value(),
1090c67d6573Sopenharmony_ci                matches,
1091c67d6573Sopenharmony_ci                slots,
1092c67d6573Sopenharmony_ci                quit_after_match,
1093c67d6573Sopenharmony_ci                ByteInput::new(text, self.ro.nfa.only_utf8),
1094c67d6573Sopenharmony_ci                start,
1095c67d6573Sopenharmony_ci                end,
1096c67d6573Sopenharmony_ci            )
1097c67d6573Sopenharmony_ci        } else {
1098c67d6573Sopenharmony_ci            pikevm::Fsm::exec(
1099c67d6573Sopenharmony_ci                &self.ro.nfa,
1100c67d6573Sopenharmony_ci                self.cache.value(),
1101c67d6573Sopenharmony_ci                matches,
1102c67d6573Sopenharmony_ci                slots,
1103c67d6573Sopenharmony_ci                quit_after_match,
1104c67d6573Sopenharmony_ci                CharInput::new(text),
1105c67d6573Sopenharmony_ci                start,
1106c67d6573Sopenharmony_ci                end,
1107c67d6573Sopenharmony_ci            )
1108c67d6573Sopenharmony_ci        }
1109c67d6573Sopenharmony_ci    }
1110c67d6573Sopenharmony_ci
1111c67d6573Sopenharmony_ci    /// Always runs the NFA using bounded backtracking.
1112c67d6573Sopenharmony_ci    fn exec_backtrack(
1113c67d6573Sopenharmony_ci        &self,
1114c67d6573Sopenharmony_ci        matches: &mut [bool],
1115c67d6573Sopenharmony_ci        slots: &mut [Slot],
1116c67d6573Sopenharmony_ci        text: &[u8],
1117c67d6573Sopenharmony_ci        start: usize,
1118c67d6573Sopenharmony_ci        end: usize,
1119c67d6573Sopenharmony_ci    ) -> bool {
1120c67d6573Sopenharmony_ci        if self.ro.nfa.uses_bytes() {
1121c67d6573Sopenharmony_ci            backtrack::Bounded::exec(
1122c67d6573Sopenharmony_ci                &self.ro.nfa,
1123c67d6573Sopenharmony_ci                self.cache.value(),
1124c67d6573Sopenharmony_ci                matches,
1125c67d6573Sopenharmony_ci                slots,
1126c67d6573Sopenharmony_ci                ByteInput::new(text, self.ro.nfa.only_utf8),
1127c67d6573Sopenharmony_ci                start,
1128c67d6573Sopenharmony_ci                end,
1129c67d6573Sopenharmony_ci            )
1130c67d6573Sopenharmony_ci        } else {
1131c67d6573Sopenharmony_ci            backtrack::Bounded::exec(
1132c67d6573Sopenharmony_ci                &self.ro.nfa,
1133c67d6573Sopenharmony_ci                self.cache.value(),
1134c67d6573Sopenharmony_ci                matches,
1135c67d6573Sopenharmony_ci                slots,
1136c67d6573Sopenharmony_ci                CharInput::new(text),
1137c67d6573Sopenharmony_ci                start,
1138c67d6573Sopenharmony_ci                end,
1139c67d6573Sopenharmony_ci            )
1140c67d6573Sopenharmony_ci        }
1141c67d6573Sopenharmony_ci    }
1142c67d6573Sopenharmony_ci
1143c67d6573Sopenharmony_ci    /// Finds which regular expressions match the given text.
1144c67d6573Sopenharmony_ci    ///
1145c67d6573Sopenharmony_ci    /// `matches` should have length equal to the number of regexes being
1146c67d6573Sopenharmony_ci    /// searched.
1147c67d6573Sopenharmony_ci    ///
1148c67d6573Sopenharmony_ci    /// This is only useful when one wants to know which regexes in a set
1149c67d6573Sopenharmony_ci    /// match some text.
1150c67d6573Sopenharmony_ci    pub fn many_matches_at(
1151c67d6573Sopenharmony_ci        &self,
1152c67d6573Sopenharmony_ci        matches: &mut [bool],
1153c67d6573Sopenharmony_ci        text: &[u8],
1154c67d6573Sopenharmony_ci        start: usize,
1155c67d6573Sopenharmony_ci    ) -> bool {
1156c67d6573Sopenharmony_ci        use self::MatchType::*;
1157c67d6573Sopenharmony_ci        if !self.is_anchor_end_match(text) {
1158c67d6573Sopenharmony_ci            return false;
1159c67d6573Sopenharmony_ci        }
1160c67d6573Sopenharmony_ci        match self.ro.match_type {
1161c67d6573Sopenharmony_ci            #[cfg(feature = "perf-literal")]
1162c67d6573Sopenharmony_ci            Literal(ty) => {
1163c67d6573Sopenharmony_ci                debug_assert_eq!(matches.len(), 1);
1164c67d6573Sopenharmony_ci                matches[0] = self.find_literals(ty, text, start).is_some();
1165c67d6573Sopenharmony_ci                matches[0]
1166c67d6573Sopenharmony_ci            }
1167c67d6573Sopenharmony_ci            #[cfg(feature = "perf-dfa")]
1168c67d6573Sopenharmony_ci            Dfa | DfaAnchoredReverse | DfaMany => {
1169c67d6573Sopenharmony_ci                match dfa::Fsm::forward_many(
1170c67d6573Sopenharmony_ci                    &self.ro.dfa,
1171c67d6573Sopenharmony_ci                    self.cache.value(),
1172c67d6573Sopenharmony_ci                    matches,
1173c67d6573Sopenharmony_ci                    text,
1174c67d6573Sopenharmony_ci                    start,
1175c67d6573Sopenharmony_ci                ) {
1176c67d6573Sopenharmony_ci                    dfa::Result::Match(_) => true,
1177c67d6573Sopenharmony_ci                    dfa::Result::NoMatch(_) => false,
1178c67d6573Sopenharmony_ci                    dfa::Result::Quit => self.exec_nfa(
1179c67d6573Sopenharmony_ci                        MatchNfaType::Auto,
1180c67d6573Sopenharmony_ci                        matches,
1181c67d6573Sopenharmony_ci                        &mut [],
1182c67d6573Sopenharmony_ci                        false,
1183c67d6573Sopenharmony_ci                        false,
1184c67d6573Sopenharmony_ci                        text,
1185c67d6573Sopenharmony_ci                        start,
1186c67d6573Sopenharmony_ci                        text.len(),
1187c67d6573Sopenharmony_ci                    ),
1188c67d6573Sopenharmony_ci                }
1189c67d6573Sopenharmony_ci            }
1190c67d6573Sopenharmony_ci            #[cfg(all(feature = "perf-dfa", feature = "perf-literal"))]
1191c67d6573Sopenharmony_ci            DfaSuffix => {
1192c67d6573Sopenharmony_ci                match dfa::Fsm::forward_many(
1193c67d6573Sopenharmony_ci                    &self.ro.dfa,
1194c67d6573Sopenharmony_ci                    self.cache.value(),
1195c67d6573Sopenharmony_ci                    matches,
1196c67d6573Sopenharmony_ci                    text,
1197c67d6573Sopenharmony_ci                    start,
1198c67d6573Sopenharmony_ci                ) {
1199c67d6573Sopenharmony_ci                    dfa::Result::Match(_) => true,
1200c67d6573Sopenharmony_ci                    dfa::Result::NoMatch(_) => false,
1201c67d6573Sopenharmony_ci                    dfa::Result::Quit => self.exec_nfa(
1202c67d6573Sopenharmony_ci                        MatchNfaType::Auto,
1203c67d6573Sopenharmony_ci                        matches,
1204c67d6573Sopenharmony_ci                        &mut [],
1205c67d6573Sopenharmony_ci                        false,
1206c67d6573Sopenharmony_ci                        false,
1207c67d6573Sopenharmony_ci                        text,
1208c67d6573Sopenharmony_ci                        start,
1209c67d6573Sopenharmony_ci                        text.len(),
1210c67d6573Sopenharmony_ci                    ),
1211c67d6573Sopenharmony_ci                }
1212c67d6573Sopenharmony_ci            }
1213c67d6573Sopenharmony_ci            Nfa(ty) => self.exec_nfa(
1214c67d6573Sopenharmony_ci                ty,
1215c67d6573Sopenharmony_ci                matches,
1216c67d6573Sopenharmony_ci                &mut [],
1217c67d6573Sopenharmony_ci                false,
1218c67d6573Sopenharmony_ci                false,
1219c67d6573Sopenharmony_ci                text,
1220c67d6573Sopenharmony_ci                start,
1221c67d6573Sopenharmony_ci                text.len(),
1222c67d6573Sopenharmony_ci            ),
1223c67d6573Sopenharmony_ci            Nothing => false,
1224c67d6573Sopenharmony_ci        }
1225c67d6573Sopenharmony_ci    }
1226c67d6573Sopenharmony_ci
1227c67d6573Sopenharmony_ci    #[cfg_attr(feature = "perf-inline", inline(always))]
1228c67d6573Sopenharmony_ci    fn is_anchor_end_match(&self, text: &[u8]) -> bool {
1229c67d6573Sopenharmony_ci        #[cfg(not(feature = "perf-literal"))]
1230c67d6573Sopenharmony_ci        fn imp(_: &ExecReadOnly, _: &[u8]) -> bool {
1231c67d6573Sopenharmony_ci            true
1232c67d6573Sopenharmony_ci        }
1233c67d6573Sopenharmony_ci
1234c67d6573Sopenharmony_ci        #[cfg(feature = "perf-literal")]
1235c67d6573Sopenharmony_ci        fn imp(ro: &ExecReadOnly, text: &[u8]) -> bool {
1236c67d6573Sopenharmony_ci            // Only do this check if the haystack is big (>1MB).
1237c67d6573Sopenharmony_ci            if text.len() > (1 << 20) && ro.nfa.is_anchored_end {
1238c67d6573Sopenharmony_ci                let lcs = ro.suffixes.lcs();
1239c67d6573Sopenharmony_ci                if lcs.len() >= 1 && !lcs.is_suffix(text) {
1240c67d6573Sopenharmony_ci                    return false;
1241c67d6573Sopenharmony_ci                }
1242c67d6573Sopenharmony_ci            }
1243c67d6573Sopenharmony_ci            true
1244c67d6573Sopenharmony_ci        }
1245c67d6573Sopenharmony_ci
1246c67d6573Sopenharmony_ci        imp(&self.ro, text)
1247c67d6573Sopenharmony_ci    }
1248c67d6573Sopenharmony_ci
1249c67d6573Sopenharmony_ci    pub fn capture_name_idx(&self) -> &Arc<HashMap<String, usize>> {
1250c67d6573Sopenharmony_ci        &self.ro.nfa.capture_name_idx
1251c67d6573Sopenharmony_ci    }
1252c67d6573Sopenharmony_ci}
1253c67d6573Sopenharmony_ci
1254c67d6573Sopenharmony_ciimpl<'c> ExecNoSyncStr<'c> {
1255c67d6573Sopenharmony_ci    pub fn capture_name_idx(&self) -> &Arc<HashMap<String, usize>> {
1256c67d6573Sopenharmony_ci        self.0.capture_name_idx()
1257c67d6573Sopenharmony_ci    }
1258c67d6573Sopenharmony_ci}
1259c67d6573Sopenharmony_ci
1260c67d6573Sopenharmony_ciimpl Exec {
1261c67d6573Sopenharmony_ci    /// Get a searcher that isn't Sync.
1262c67d6573Sopenharmony_ci    #[cfg_attr(feature = "perf-inline", inline(always))]
1263c67d6573Sopenharmony_ci    pub fn searcher(&self) -> ExecNoSync<'_> {
1264c67d6573Sopenharmony_ci        ExecNoSync {
1265c67d6573Sopenharmony_ci            ro: &self.ro, // a clone is too expensive here! (and not needed)
1266c67d6573Sopenharmony_ci            cache: self.pool.get(),
1267c67d6573Sopenharmony_ci        }
1268c67d6573Sopenharmony_ci    }
1269c67d6573Sopenharmony_ci
1270c67d6573Sopenharmony_ci    /// Get a searcher that isn't Sync and can match on &str.
1271c67d6573Sopenharmony_ci    #[cfg_attr(feature = "perf-inline", inline(always))]
1272c67d6573Sopenharmony_ci    pub fn searcher_str(&self) -> ExecNoSyncStr<'_> {
1273c67d6573Sopenharmony_ci        ExecNoSyncStr(self.searcher())
1274c67d6573Sopenharmony_ci    }
1275c67d6573Sopenharmony_ci
1276c67d6573Sopenharmony_ci    /// Build a Regex from this executor.
1277c67d6573Sopenharmony_ci    pub fn into_regex(self) -> re_unicode::Regex {
1278c67d6573Sopenharmony_ci        re_unicode::Regex::from(self)
1279c67d6573Sopenharmony_ci    }
1280c67d6573Sopenharmony_ci
1281c67d6573Sopenharmony_ci    /// Build a RegexSet from this executor.
1282c67d6573Sopenharmony_ci    pub fn into_regex_set(self) -> re_set::unicode::RegexSet {
1283c67d6573Sopenharmony_ci        re_set::unicode::RegexSet::from(self)
1284c67d6573Sopenharmony_ci    }
1285c67d6573Sopenharmony_ci
1286c67d6573Sopenharmony_ci    /// Build a Regex from this executor that can match arbitrary bytes.
1287c67d6573Sopenharmony_ci    pub fn into_byte_regex(self) -> re_bytes::Regex {
1288c67d6573Sopenharmony_ci        re_bytes::Regex::from(self)
1289c67d6573Sopenharmony_ci    }
1290c67d6573Sopenharmony_ci
1291c67d6573Sopenharmony_ci    /// Build a RegexSet from this executor that can match arbitrary bytes.
1292c67d6573Sopenharmony_ci    pub fn into_byte_regex_set(self) -> re_set::bytes::RegexSet {
1293c67d6573Sopenharmony_ci        re_set::bytes::RegexSet::from(self)
1294c67d6573Sopenharmony_ci    }
1295c67d6573Sopenharmony_ci
1296c67d6573Sopenharmony_ci    /// The original regular expressions given by the caller that were
1297c67d6573Sopenharmony_ci    /// compiled.
1298c67d6573Sopenharmony_ci    pub fn regex_strings(&self) -> &[String] {
1299c67d6573Sopenharmony_ci        &self.ro.res
1300c67d6573Sopenharmony_ci    }
1301c67d6573Sopenharmony_ci
1302c67d6573Sopenharmony_ci    /// Return a slice of capture names.
1303c67d6573Sopenharmony_ci    ///
1304c67d6573Sopenharmony_ci    /// Any capture that isn't named is None.
1305c67d6573Sopenharmony_ci    pub fn capture_names(&self) -> &[Option<String>] {
1306c67d6573Sopenharmony_ci        &self.ro.nfa.captures
1307c67d6573Sopenharmony_ci    }
1308c67d6573Sopenharmony_ci
1309c67d6573Sopenharmony_ci    /// Return a reference to named groups mapping (from group name to
1310c67d6573Sopenharmony_ci    /// group position).
1311c67d6573Sopenharmony_ci    pub fn capture_name_idx(&self) -> &Arc<HashMap<String, usize>> {
1312c67d6573Sopenharmony_ci        &self.ro.nfa.capture_name_idx
1313c67d6573Sopenharmony_ci    }
1314c67d6573Sopenharmony_ci}
1315c67d6573Sopenharmony_ci
1316c67d6573Sopenharmony_ciimpl Clone for Exec {
1317c67d6573Sopenharmony_ci    fn clone(&self) -> Exec {
1318c67d6573Sopenharmony_ci        let pool = ExecReadOnly::new_pool(&self.ro);
1319c67d6573Sopenharmony_ci        Exec { ro: self.ro.clone(), pool }
1320c67d6573Sopenharmony_ci    }
1321c67d6573Sopenharmony_ci}
1322c67d6573Sopenharmony_ci
1323c67d6573Sopenharmony_ciimpl ExecReadOnly {
1324c67d6573Sopenharmony_ci    fn choose_match_type(&self, hint: Option<MatchType>) -> MatchType {
1325c67d6573Sopenharmony_ci        if let Some(MatchType::Nfa(_)) = hint {
1326c67d6573Sopenharmony_ci            return hint.unwrap();
1327c67d6573Sopenharmony_ci        }
1328c67d6573Sopenharmony_ci        // If the NFA is empty, then we'll never match anything.
1329c67d6573Sopenharmony_ci        if self.nfa.insts.is_empty() {
1330c67d6573Sopenharmony_ci            return MatchType::Nothing;
1331c67d6573Sopenharmony_ci        }
1332c67d6573Sopenharmony_ci        if let Some(literalty) = self.choose_literal_match_type() {
1333c67d6573Sopenharmony_ci            return literalty;
1334c67d6573Sopenharmony_ci        }
1335c67d6573Sopenharmony_ci        if let Some(dfaty) = self.choose_dfa_match_type() {
1336c67d6573Sopenharmony_ci            return dfaty;
1337c67d6573Sopenharmony_ci        }
1338c67d6573Sopenharmony_ci        // We're so totally hosed.
1339c67d6573Sopenharmony_ci        MatchType::Nfa(MatchNfaType::Auto)
1340c67d6573Sopenharmony_ci    }
1341c67d6573Sopenharmony_ci
1342c67d6573Sopenharmony_ci    /// If a plain literal scan can be used, then a corresponding literal
1343c67d6573Sopenharmony_ci    /// search type is returned.
1344c67d6573Sopenharmony_ci    fn choose_literal_match_type(&self) -> Option<MatchType> {
1345c67d6573Sopenharmony_ci        #[cfg(not(feature = "perf-literal"))]
1346c67d6573Sopenharmony_ci        fn imp(_: &ExecReadOnly) -> Option<MatchType> {
1347c67d6573Sopenharmony_ci            None
1348c67d6573Sopenharmony_ci        }
1349c67d6573Sopenharmony_ci
1350c67d6573Sopenharmony_ci        #[cfg(feature = "perf-literal")]
1351c67d6573Sopenharmony_ci        fn imp(ro: &ExecReadOnly) -> Option<MatchType> {
1352c67d6573Sopenharmony_ci            // If our set of prefixes is complete, then we can use it to find
1353c67d6573Sopenharmony_ci            // a match in lieu of a regex engine. This doesn't quite work well
1354c67d6573Sopenharmony_ci            // in the presence of multiple regexes, so only do it when there's
1355c67d6573Sopenharmony_ci            // one.
1356c67d6573Sopenharmony_ci            //
1357c67d6573Sopenharmony_ci            // TODO(burntsushi): Also, don't try to match literals if the regex
1358c67d6573Sopenharmony_ci            // is partially anchored. We could technically do it, but we'd need
1359c67d6573Sopenharmony_ci            // to create two sets of literals: all of them and then the subset
1360c67d6573Sopenharmony_ci            // that aren't anchored. We would then only search for all of them
1361c67d6573Sopenharmony_ci            // when at the beginning of the input and use the subset in all
1362c67d6573Sopenharmony_ci            // other cases.
1363c67d6573Sopenharmony_ci            if ro.res.len() != 1 {
1364c67d6573Sopenharmony_ci                return None;
1365c67d6573Sopenharmony_ci            }
1366c67d6573Sopenharmony_ci            if ro.ac.is_some() {
1367c67d6573Sopenharmony_ci                return Some(MatchType::Literal(
1368c67d6573Sopenharmony_ci                    MatchLiteralType::AhoCorasick,
1369c67d6573Sopenharmony_ci                ));
1370c67d6573Sopenharmony_ci            }
1371c67d6573Sopenharmony_ci            if ro.nfa.prefixes.complete() {
1372c67d6573Sopenharmony_ci                return if ro.nfa.is_anchored_start {
1373c67d6573Sopenharmony_ci                    Some(MatchType::Literal(MatchLiteralType::AnchoredStart))
1374c67d6573Sopenharmony_ci                } else {
1375c67d6573Sopenharmony_ci                    Some(MatchType::Literal(MatchLiteralType::Unanchored))
1376c67d6573Sopenharmony_ci                };
1377c67d6573Sopenharmony_ci            }
1378c67d6573Sopenharmony_ci            if ro.suffixes.complete() {
1379c67d6573Sopenharmony_ci                return if ro.nfa.is_anchored_end {
1380c67d6573Sopenharmony_ci                    Some(MatchType::Literal(MatchLiteralType::AnchoredEnd))
1381c67d6573Sopenharmony_ci                } else {
1382c67d6573Sopenharmony_ci                    // This case shouldn't happen. When the regex isn't
1383c67d6573Sopenharmony_ci                    // anchored, then complete prefixes should imply complete
1384c67d6573Sopenharmony_ci                    // suffixes.
1385c67d6573Sopenharmony_ci                    Some(MatchType::Literal(MatchLiteralType::Unanchored))
1386c67d6573Sopenharmony_ci                };
1387c67d6573Sopenharmony_ci            }
1388c67d6573Sopenharmony_ci            None
1389c67d6573Sopenharmony_ci        }
1390c67d6573Sopenharmony_ci
1391c67d6573Sopenharmony_ci        imp(self)
1392c67d6573Sopenharmony_ci    }
1393c67d6573Sopenharmony_ci
1394c67d6573Sopenharmony_ci    /// If a DFA scan can be used, then choose the appropriate DFA strategy.
1395c67d6573Sopenharmony_ci    fn choose_dfa_match_type(&self) -> Option<MatchType> {
1396c67d6573Sopenharmony_ci        #[cfg(not(feature = "perf-dfa"))]
1397c67d6573Sopenharmony_ci        fn imp(_: &ExecReadOnly) -> Option<MatchType> {
1398c67d6573Sopenharmony_ci            None
1399c67d6573Sopenharmony_ci        }
1400c67d6573Sopenharmony_ci
1401c67d6573Sopenharmony_ci        #[cfg(feature = "perf-dfa")]
1402c67d6573Sopenharmony_ci        fn imp(ro: &ExecReadOnly) -> Option<MatchType> {
1403c67d6573Sopenharmony_ci            if !dfa::can_exec(&ro.dfa) {
1404c67d6573Sopenharmony_ci                return None;
1405c67d6573Sopenharmony_ci            }
1406c67d6573Sopenharmony_ci            // Regex sets require a slightly specialized path.
1407c67d6573Sopenharmony_ci            if ro.res.len() >= 2 {
1408c67d6573Sopenharmony_ci                return Some(MatchType::DfaMany);
1409c67d6573Sopenharmony_ci            }
1410c67d6573Sopenharmony_ci            // If the regex is anchored at the end but not the start, then
1411c67d6573Sopenharmony_ci            // just match in reverse from the end of the haystack.
1412c67d6573Sopenharmony_ci            if !ro.nfa.is_anchored_start && ro.nfa.is_anchored_end {
1413c67d6573Sopenharmony_ci                return Some(MatchType::DfaAnchoredReverse);
1414c67d6573Sopenharmony_ci            }
1415c67d6573Sopenharmony_ci            #[cfg(feature = "perf-literal")]
1416c67d6573Sopenharmony_ci            {
1417c67d6573Sopenharmony_ci                // If there's a longish suffix literal, then it might be faster
1418c67d6573Sopenharmony_ci                // to look for that first.
1419c67d6573Sopenharmony_ci                if ro.should_suffix_scan() {
1420c67d6573Sopenharmony_ci                    return Some(MatchType::DfaSuffix);
1421c67d6573Sopenharmony_ci                }
1422c67d6573Sopenharmony_ci            }
1423c67d6573Sopenharmony_ci            // Fall back to your garden variety forward searching lazy DFA.
1424c67d6573Sopenharmony_ci            Some(MatchType::Dfa)
1425c67d6573Sopenharmony_ci        }
1426c67d6573Sopenharmony_ci
1427c67d6573Sopenharmony_ci        imp(self)
1428c67d6573Sopenharmony_ci    }
1429c67d6573Sopenharmony_ci
1430c67d6573Sopenharmony_ci    /// Returns true if the program is amenable to suffix scanning.
1431c67d6573Sopenharmony_ci    ///
1432c67d6573Sopenharmony_ci    /// When this is true, as a heuristic, we assume it is OK to quickly scan
1433c67d6573Sopenharmony_ci    /// for suffix literals and then do a *reverse* DFA match from any matches
1434c67d6573Sopenharmony_ci    /// produced by the literal scan. (And then followed by a forward DFA
1435c67d6573Sopenharmony_ci    /// search, since the previously found suffix literal maybe not actually be
1436c67d6573Sopenharmony_ci    /// the end of a match.)
1437c67d6573Sopenharmony_ci    ///
1438c67d6573Sopenharmony_ci    /// This is a bit of a specialized optimization, but can result in pretty
1439c67d6573Sopenharmony_ci    /// big performance wins if 1) there are no prefix literals and 2) the
1440c67d6573Sopenharmony_ci    /// suffix literals are pretty rare in the text. (1) is obviously easy to
1441c67d6573Sopenharmony_ci    /// account for but (2) is harder. As a proxy, we assume that longer
1442c67d6573Sopenharmony_ci    /// strings are generally rarer, so we only enable this optimization when
1443c67d6573Sopenharmony_ci    /// we have a meaty suffix.
1444c67d6573Sopenharmony_ci    #[cfg(all(feature = "perf-dfa", feature = "perf-literal"))]
1445c67d6573Sopenharmony_ci    fn should_suffix_scan(&self) -> bool {
1446c67d6573Sopenharmony_ci        if self.suffixes.is_empty() {
1447c67d6573Sopenharmony_ci            return false;
1448c67d6573Sopenharmony_ci        }
1449c67d6573Sopenharmony_ci        let lcs_len = self.suffixes.lcs().char_len();
1450c67d6573Sopenharmony_ci        lcs_len >= 3 && lcs_len > self.dfa.prefixes.lcp().char_len()
1451c67d6573Sopenharmony_ci    }
1452c67d6573Sopenharmony_ci
1453c67d6573Sopenharmony_ci    fn new_pool(ro: &Arc<ExecReadOnly>) -> Box<Pool<ProgramCache>> {
1454c67d6573Sopenharmony_ci        let ro = ro.clone();
1455c67d6573Sopenharmony_ci        Box::new(Pool::new(Box::new(move || {
1456c67d6573Sopenharmony_ci            AssertUnwindSafe(RefCell::new(ProgramCacheInner::new(&ro)))
1457c67d6573Sopenharmony_ci        })))
1458c67d6573Sopenharmony_ci    }
1459c67d6573Sopenharmony_ci}
1460c67d6573Sopenharmony_ci
1461c67d6573Sopenharmony_ci#[derive(Clone, Copy, Debug)]
1462c67d6573Sopenharmony_cienum MatchType {
1463c67d6573Sopenharmony_ci    /// A single or multiple literal search. This is only used when the regex
1464c67d6573Sopenharmony_ci    /// can be decomposed into a literal search.
1465c67d6573Sopenharmony_ci    #[cfg(feature = "perf-literal")]
1466c67d6573Sopenharmony_ci    Literal(MatchLiteralType),
1467c67d6573Sopenharmony_ci    /// A normal DFA search.
1468c67d6573Sopenharmony_ci    #[cfg(feature = "perf-dfa")]
1469c67d6573Sopenharmony_ci    Dfa,
1470c67d6573Sopenharmony_ci    /// A reverse DFA search starting from the end of a haystack.
1471c67d6573Sopenharmony_ci    #[cfg(feature = "perf-dfa")]
1472c67d6573Sopenharmony_ci    DfaAnchoredReverse,
1473c67d6573Sopenharmony_ci    /// A reverse DFA search with suffix literal scanning.
1474c67d6573Sopenharmony_ci    #[cfg(all(feature = "perf-dfa", feature = "perf-literal"))]
1475c67d6573Sopenharmony_ci    DfaSuffix,
1476c67d6573Sopenharmony_ci    /// Use the DFA on two or more regular expressions.
1477c67d6573Sopenharmony_ci    #[cfg(feature = "perf-dfa")]
1478c67d6573Sopenharmony_ci    DfaMany,
1479c67d6573Sopenharmony_ci    /// An NFA variant.
1480c67d6573Sopenharmony_ci    Nfa(MatchNfaType),
1481c67d6573Sopenharmony_ci    /// No match is ever possible, so don't ever try to search.
1482c67d6573Sopenharmony_ci    Nothing,
1483c67d6573Sopenharmony_ci}
1484c67d6573Sopenharmony_ci
1485c67d6573Sopenharmony_ci#[derive(Clone, Copy, Debug)]
1486c67d6573Sopenharmony_ci#[cfg(feature = "perf-literal")]
1487c67d6573Sopenharmony_cienum MatchLiteralType {
1488c67d6573Sopenharmony_ci    /// Match literals anywhere in text.
1489c67d6573Sopenharmony_ci    Unanchored,
1490c67d6573Sopenharmony_ci    /// Match literals only at the start of text.
1491c67d6573Sopenharmony_ci    AnchoredStart,
1492c67d6573Sopenharmony_ci    /// Match literals only at the end of text.
1493c67d6573Sopenharmony_ci    AnchoredEnd,
1494c67d6573Sopenharmony_ci    /// Use an Aho-Corasick automaton. This requires `ac` to be Some on
1495c67d6573Sopenharmony_ci    /// ExecReadOnly.
1496c67d6573Sopenharmony_ci    AhoCorasick,
1497c67d6573Sopenharmony_ci}
1498c67d6573Sopenharmony_ci
1499c67d6573Sopenharmony_ci#[derive(Clone, Copy, Debug, Eq, PartialEq)]
1500c67d6573Sopenharmony_cienum MatchNfaType {
1501c67d6573Sopenharmony_ci    /// Choose between Backtrack and PikeVM.
1502c67d6573Sopenharmony_ci    Auto,
1503c67d6573Sopenharmony_ci    /// NFA bounded backtracking.
1504c67d6573Sopenharmony_ci    ///
1505c67d6573Sopenharmony_ci    /// (This is only set by tests, since it never makes sense to always want
1506c67d6573Sopenharmony_ci    /// backtracking.)
1507c67d6573Sopenharmony_ci    Backtrack,
1508c67d6573Sopenharmony_ci    /// The Pike VM.
1509c67d6573Sopenharmony_ci    ///
1510c67d6573Sopenharmony_ci    /// (This is only set by tests, since it never makes sense to always want
1511c67d6573Sopenharmony_ci    /// the Pike VM.)
1512c67d6573Sopenharmony_ci    PikeVM,
1513c67d6573Sopenharmony_ci}
1514c67d6573Sopenharmony_ci
1515c67d6573Sopenharmony_ci/// `ProgramCache` maintains reusable allocations for each matching engine
1516c67d6573Sopenharmony_ci/// available to a particular program.
1517c67d6573Sopenharmony_ci///
1518c67d6573Sopenharmony_ci/// We declare this as unwind safe since it's a cache that's only used for
1519c67d6573Sopenharmony_ci/// performance purposes. If a panic occurs, it is (or should be) always safe
1520c67d6573Sopenharmony_ci/// to continue using the same regex object.
1521c67d6573Sopenharmony_cipub type ProgramCache = AssertUnwindSafe<RefCell<ProgramCacheInner>>;
1522c67d6573Sopenharmony_ci
1523c67d6573Sopenharmony_ci#[derive(Debug)]
1524c67d6573Sopenharmony_cipub struct ProgramCacheInner {
1525c67d6573Sopenharmony_ci    pub pikevm: pikevm::Cache,
1526c67d6573Sopenharmony_ci    pub backtrack: backtrack::Cache,
1527c67d6573Sopenharmony_ci    #[cfg(feature = "perf-dfa")]
1528c67d6573Sopenharmony_ci    pub dfa: dfa::Cache,
1529c67d6573Sopenharmony_ci    #[cfg(feature = "perf-dfa")]
1530c67d6573Sopenharmony_ci    pub dfa_reverse: dfa::Cache,
1531c67d6573Sopenharmony_ci}
1532c67d6573Sopenharmony_ci
1533c67d6573Sopenharmony_ciimpl ProgramCacheInner {
1534c67d6573Sopenharmony_ci    fn new(ro: &ExecReadOnly) -> Self {
1535c67d6573Sopenharmony_ci        ProgramCacheInner {
1536c67d6573Sopenharmony_ci            pikevm: pikevm::Cache::new(&ro.nfa),
1537c67d6573Sopenharmony_ci            backtrack: backtrack::Cache::new(&ro.nfa),
1538c67d6573Sopenharmony_ci            #[cfg(feature = "perf-dfa")]
1539c67d6573Sopenharmony_ci            dfa: dfa::Cache::new(&ro.dfa),
1540c67d6573Sopenharmony_ci            #[cfg(feature = "perf-dfa")]
1541c67d6573Sopenharmony_ci            dfa_reverse: dfa::Cache::new(&ro.dfa_reverse),
1542c67d6573Sopenharmony_ci        }
1543c67d6573Sopenharmony_ci    }
1544c67d6573Sopenharmony_ci}
1545c67d6573Sopenharmony_ci
1546c67d6573Sopenharmony_ci/// Alternation literals checks if the given HIR is a simple alternation of
1547c67d6573Sopenharmony_ci/// literals, and if so, returns them. Otherwise, this returns None.
1548c67d6573Sopenharmony_ci#[cfg(feature = "perf-literal")]
1549c67d6573Sopenharmony_cifn alternation_literals(expr: &Hir) -> Option<Vec<Vec<u8>>> {
1550c67d6573Sopenharmony_ci    use regex_syntax::hir::{HirKind, Literal};
1551c67d6573Sopenharmony_ci
1552c67d6573Sopenharmony_ci    // This is pretty hacky, but basically, if `is_alternation_literal` is
1553c67d6573Sopenharmony_ci    // true, then we can make several assumptions about the structure of our
1554c67d6573Sopenharmony_ci    // HIR. This is what justifies the `unreachable!` statements below.
1555c67d6573Sopenharmony_ci    //
1556c67d6573Sopenharmony_ci    // This code should be refactored once we overhaul this crate's
1557c67d6573Sopenharmony_ci    // optimization pipeline, because this is a terribly inflexible way to go
1558c67d6573Sopenharmony_ci    // about things.
1559c67d6573Sopenharmony_ci
1560c67d6573Sopenharmony_ci    if !expr.is_alternation_literal() {
1561c67d6573Sopenharmony_ci        return None;
1562c67d6573Sopenharmony_ci    }
1563c67d6573Sopenharmony_ci    let alts = match *expr.kind() {
1564c67d6573Sopenharmony_ci        HirKind::Alternation(ref alts) => alts,
1565c67d6573Sopenharmony_ci        _ => return None, // one literal isn't worth it
1566c67d6573Sopenharmony_ci    };
1567c67d6573Sopenharmony_ci
1568c67d6573Sopenharmony_ci    let extendlit = |lit: &Literal, dst: &mut Vec<u8>| match *lit {
1569c67d6573Sopenharmony_ci        Literal::Unicode(c) => {
1570c67d6573Sopenharmony_ci            let mut buf = [0; 4];
1571c67d6573Sopenharmony_ci            dst.extend_from_slice(c.encode_utf8(&mut buf).as_bytes());
1572c67d6573Sopenharmony_ci        }
1573c67d6573Sopenharmony_ci        Literal::Byte(b) => {
1574c67d6573Sopenharmony_ci            dst.push(b);
1575c67d6573Sopenharmony_ci        }
1576c67d6573Sopenharmony_ci    };
1577c67d6573Sopenharmony_ci
1578c67d6573Sopenharmony_ci    let mut lits = vec![];
1579c67d6573Sopenharmony_ci    for alt in alts {
1580c67d6573Sopenharmony_ci        let mut lit = vec![];
1581c67d6573Sopenharmony_ci        match *alt.kind() {
1582c67d6573Sopenharmony_ci            HirKind::Literal(ref x) => extendlit(x, &mut lit),
1583c67d6573Sopenharmony_ci            HirKind::Concat(ref exprs) => {
1584c67d6573Sopenharmony_ci                for e in exprs {
1585c67d6573Sopenharmony_ci                    match *e.kind() {
1586c67d6573Sopenharmony_ci                        HirKind::Literal(ref x) => extendlit(x, &mut lit),
1587c67d6573Sopenharmony_ci                        _ => unreachable!("expected literal, got {:?}", e),
1588c67d6573Sopenharmony_ci                    }
1589c67d6573Sopenharmony_ci                }
1590c67d6573Sopenharmony_ci            }
1591c67d6573Sopenharmony_ci            _ => unreachable!("expected literal or concat, got {:?}", alt),
1592c67d6573Sopenharmony_ci        }
1593c67d6573Sopenharmony_ci        lits.push(lit);
1594c67d6573Sopenharmony_ci    }
1595c67d6573Sopenharmony_ci    Some(lits)
1596c67d6573Sopenharmony_ci}
1597c67d6573Sopenharmony_ci
1598c67d6573Sopenharmony_ci#[cfg(test)]
1599c67d6573Sopenharmony_cimod test {
1600c67d6573Sopenharmony_ci    #[test]
1601c67d6573Sopenharmony_ci    fn uppercut_s_backtracking_bytes_default_bytes_mismatch() {
1602c67d6573Sopenharmony_ci        use crate::internal::ExecBuilder;
1603c67d6573Sopenharmony_ci
1604c67d6573Sopenharmony_ci        let backtrack_bytes_re = ExecBuilder::new("^S")
1605c67d6573Sopenharmony_ci            .bounded_backtracking()
1606c67d6573Sopenharmony_ci            .only_utf8(false)
1607c67d6573Sopenharmony_ci            .build()
1608c67d6573Sopenharmony_ci            .map(|exec| exec.into_byte_regex())
1609c67d6573Sopenharmony_ci            .map_err(|err| format!("{}", err))
1610c67d6573Sopenharmony_ci            .unwrap();
1611c67d6573Sopenharmony_ci
1612c67d6573Sopenharmony_ci        let default_bytes_re = ExecBuilder::new("^S")
1613c67d6573Sopenharmony_ci            .only_utf8(false)
1614c67d6573Sopenharmony_ci            .build()
1615c67d6573Sopenharmony_ci            .map(|exec| exec.into_byte_regex())
1616c67d6573Sopenharmony_ci            .map_err(|err| format!("{}", err))
1617c67d6573Sopenharmony_ci            .unwrap();
1618c67d6573Sopenharmony_ci
1619c67d6573Sopenharmony_ci        let input = vec![83, 83];
1620c67d6573Sopenharmony_ci
1621c67d6573Sopenharmony_ci        let s1 = backtrack_bytes_re.split(&input);
1622c67d6573Sopenharmony_ci        let s2 = default_bytes_re.split(&input);
1623c67d6573Sopenharmony_ci        for (chunk1, chunk2) in s1.zip(s2) {
1624c67d6573Sopenharmony_ci            assert_eq!(chunk1, chunk2);
1625c67d6573Sopenharmony_ci        }
1626c67d6573Sopenharmony_ci    }
1627c67d6573Sopenharmony_ci
1628c67d6573Sopenharmony_ci    #[test]
1629c67d6573Sopenharmony_ci    fn unicode_lit_star_backtracking_utf8bytes_default_utf8bytes_mismatch() {
1630c67d6573Sopenharmony_ci        use crate::internal::ExecBuilder;
1631c67d6573Sopenharmony_ci
1632c67d6573Sopenharmony_ci        let backtrack_bytes_re = ExecBuilder::new(r"^(?u:\*)")
1633c67d6573Sopenharmony_ci            .bounded_backtracking()
1634c67d6573Sopenharmony_ci            .bytes(true)
1635c67d6573Sopenharmony_ci            .build()
1636c67d6573Sopenharmony_ci            .map(|exec| exec.into_regex())
1637c67d6573Sopenharmony_ci            .map_err(|err| format!("{}", err))
1638c67d6573Sopenharmony_ci            .unwrap();
1639c67d6573Sopenharmony_ci
1640c67d6573Sopenharmony_ci        let default_bytes_re = ExecBuilder::new(r"^(?u:\*)")
1641c67d6573Sopenharmony_ci            .bytes(true)
1642c67d6573Sopenharmony_ci            .build()
1643c67d6573Sopenharmony_ci            .map(|exec| exec.into_regex())
1644c67d6573Sopenharmony_ci            .map_err(|err| format!("{}", err))
1645c67d6573Sopenharmony_ci            .unwrap();
1646c67d6573Sopenharmony_ci
1647c67d6573Sopenharmony_ci        let input = "**";
1648c67d6573Sopenharmony_ci
1649c67d6573Sopenharmony_ci        let s1 = backtrack_bytes_re.split(input);
1650c67d6573Sopenharmony_ci        let s2 = default_bytes_re.split(input);
1651c67d6573Sopenharmony_ci        for (chunk1, chunk2) in s1.zip(s2) {
1652c67d6573Sopenharmony_ci            assert_eq!(chunk1, chunk2);
1653c67d6573Sopenharmony_ci        }
1654c67d6573Sopenharmony_ci    }
1655c67d6573Sopenharmony_ci}
1656