xref: /third_party/rust/crates/regex/src/re_set.rs (revision c67d6573)
1c67d6573Sopenharmony_cimacro_rules! define_set {
2c67d6573Sopenharmony_ci    ($name:ident, $builder_mod:ident, $text_ty:ty, $as_bytes:expr,
3c67d6573Sopenharmony_ci     $(#[$doc_regexset_example:meta])* ) => {
4c67d6573Sopenharmony_ci        pub mod $name {
5c67d6573Sopenharmony_ci            use std::fmt;
6c67d6573Sopenharmony_ci            use std::iter;
7c67d6573Sopenharmony_ci            use std::slice;
8c67d6573Sopenharmony_ci            use std::vec;
9c67d6573Sopenharmony_ci
10c67d6573Sopenharmony_ci            use crate::error::Error;
11c67d6573Sopenharmony_ci            use crate::exec::Exec;
12c67d6573Sopenharmony_ci            use crate::re_builder::$builder_mod::RegexSetBuilder;
13c67d6573Sopenharmony_ci            use crate::re_trait::RegularExpression;
14c67d6573Sopenharmony_ci
15c67d6573Sopenharmony_ci/// Match multiple (possibly overlapping) regular expressions in a single scan.
16c67d6573Sopenharmony_ci///
17c67d6573Sopenharmony_ci/// A regex set corresponds to the union of two or more regular expressions.
18c67d6573Sopenharmony_ci/// That is, a regex set will match text where at least one of its
19c67d6573Sopenharmony_ci/// constituent regular expressions matches. A regex set as its formulated here
20c67d6573Sopenharmony_ci/// provides a touch more power: it will also report *which* regular
21c67d6573Sopenharmony_ci/// expressions in the set match. Indeed, this is the key difference between
22c67d6573Sopenharmony_ci/// regex sets and a single `Regex` with many alternates, since only one
23c67d6573Sopenharmony_ci/// alternate can match at a time.
24c67d6573Sopenharmony_ci///
25c67d6573Sopenharmony_ci/// For example, consider regular expressions to match email addresses and
26c67d6573Sopenharmony_ci/// domains: `[a-z]+@[a-z]+\.(com|org|net)` and `[a-z]+\.(com|org|net)`. If a
27c67d6573Sopenharmony_ci/// regex set is constructed from those regexes, then searching the text
28c67d6573Sopenharmony_ci/// `foo@example.com` will report both regexes as matching. Of course, one
29c67d6573Sopenharmony_ci/// could accomplish this by compiling each regex on its own and doing two
30c67d6573Sopenharmony_ci/// searches over the text. The key advantage of using a regex set is that it
31c67d6573Sopenharmony_ci/// will report the matching regexes using a *single pass through the text*.
32c67d6573Sopenharmony_ci/// If one has hundreds or thousands of regexes to match repeatedly (like a URL
33c67d6573Sopenharmony_ci/// router for a complex web application or a user agent matcher), then a regex
34c67d6573Sopenharmony_ci/// set can realize huge performance gains.
35c67d6573Sopenharmony_ci///
36c67d6573Sopenharmony_ci/// # Example
37c67d6573Sopenharmony_ci///
38c67d6573Sopenharmony_ci/// This shows how the above two regexes (for matching email addresses and
39c67d6573Sopenharmony_ci/// domains) might work:
40c67d6573Sopenharmony_ci///
41c67d6573Sopenharmony_ci$(#[$doc_regexset_example])*
42c67d6573Sopenharmony_ci///
43c67d6573Sopenharmony_ci/// Note that it would be possible to adapt the above example to using `Regex`
44c67d6573Sopenharmony_ci/// with an expression like:
45c67d6573Sopenharmony_ci///
46c67d6573Sopenharmony_ci/// ```text
47c67d6573Sopenharmony_ci/// (?P<email>[a-z]+@(?P<email_domain>[a-z]+[.](com|org|net)))|(?P<domain>[a-z]+[.](com|org|net))
48c67d6573Sopenharmony_ci/// ```
49c67d6573Sopenharmony_ci///
50c67d6573Sopenharmony_ci/// After a match, one could then inspect the capture groups to figure out
51c67d6573Sopenharmony_ci/// which alternates matched. The problem is that it is hard to make this
52c67d6573Sopenharmony_ci/// approach scale when there are many regexes since the overlap between each
53c67d6573Sopenharmony_ci/// alternate isn't always obvious to reason about.
54c67d6573Sopenharmony_ci///
55c67d6573Sopenharmony_ci/// # Limitations
56c67d6573Sopenharmony_ci///
57c67d6573Sopenharmony_ci/// Regex sets are limited to answering the following two questions:
58c67d6573Sopenharmony_ci///
59c67d6573Sopenharmony_ci/// 1. Does any regex in the set match?
60c67d6573Sopenharmony_ci/// 2. If so, which regexes in the set match?
61c67d6573Sopenharmony_ci///
62c67d6573Sopenharmony_ci/// As with the main [`Regex`][crate::Regex] type, it is cheaper to ask (1)
63c67d6573Sopenharmony_ci/// instead of (2) since the matching engines can stop after the first match
64c67d6573Sopenharmony_ci/// is found.
65c67d6573Sopenharmony_ci///
66c67d6573Sopenharmony_ci/// You cannot directly extract [`Match`][crate::Match] or
67c67d6573Sopenharmony_ci/// [`Captures`][crate::Captures] objects from a regex set. If you need these
68c67d6573Sopenharmony_ci/// operations, the recommended approach is to compile each pattern in the set
69c67d6573Sopenharmony_ci/// independently and scan the exact same input a second time with those
70c67d6573Sopenharmony_ci/// independently compiled patterns:
71c67d6573Sopenharmony_ci///
72c67d6573Sopenharmony_ci/// ```rust
73c67d6573Sopenharmony_ci/// use regex::{Regex, RegexSet};
74c67d6573Sopenharmony_ci///
75c67d6573Sopenharmony_ci/// let patterns = ["foo", "bar"];
76c67d6573Sopenharmony_ci/// // Both patterns will match different ranges of this string.
77c67d6573Sopenharmony_ci/// let text = "barfoo";
78c67d6573Sopenharmony_ci///
79c67d6573Sopenharmony_ci/// // Compile a set matching any of our patterns.
80c67d6573Sopenharmony_ci/// let set = RegexSet::new(&patterns).unwrap();
81c67d6573Sopenharmony_ci/// // Compile each pattern independently.
82c67d6573Sopenharmony_ci/// let regexes: Vec<_> = set.patterns().iter()
83c67d6573Sopenharmony_ci///     .map(|pat| Regex::new(pat).unwrap())
84c67d6573Sopenharmony_ci///     .collect();
85c67d6573Sopenharmony_ci///
86c67d6573Sopenharmony_ci/// // Match against the whole set first and identify the individual
87c67d6573Sopenharmony_ci/// // matching patterns.
88c67d6573Sopenharmony_ci/// let matches: Vec<&str> = set.matches(text).into_iter()
89c67d6573Sopenharmony_ci///     // Dereference the match index to get the corresponding
90c67d6573Sopenharmony_ci///     // compiled pattern.
91c67d6573Sopenharmony_ci///     .map(|match_idx| &regexes[match_idx])
92c67d6573Sopenharmony_ci///     // To get match locations or any other info, we then have to search
93c67d6573Sopenharmony_ci///     // the exact same text again, using our separately-compiled pattern.
94c67d6573Sopenharmony_ci///     .map(|pat| pat.find(text).unwrap().as_str())
95c67d6573Sopenharmony_ci///     .collect();
96c67d6573Sopenharmony_ci///
97c67d6573Sopenharmony_ci/// // Matches arrive in the order the constituent patterns were declared,
98c67d6573Sopenharmony_ci/// // not the order they appear in the input.
99c67d6573Sopenharmony_ci/// assert_eq!(vec!["foo", "bar"], matches);
100c67d6573Sopenharmony_ci/// ```
101c67d6573Sopenharmony_ci///
102c67d6573Sopenharmony_ci/// # Performance
103c67d6573Sopenharmony_ci///
104c67d6573Sopenharmony_ci/// A `RegexSet` has the same performance characteristics as `Regex`. Namely,
105c67d6573Sopenharmony_ci/// search takes `O(mn)` time, where `m` is proportional to the size of the
106c67d6573Sopenharmony_ci/// regex set and `n` is proportional to the length of the search text.
107c67d6573Sopenharmony_ci#[derive(Clone)]
108c67d6573Sopenharmony_cipub struct RegexSet(Exec);
109c67d6573Sopenharmony_ci
110c67d6573Sopenharmony_ciimpl RegexSet {
111c67d6573Sopenharmony_ci    /// Create a new regex set with the given regular expressions.
112c67d6573Sopenharmony_ci    ///
113c67d6573Sopenharmony_ci    /// This takes an iterator of `S`, where `S` is something that can produce
114c67d6573Sopenharmony_ci    /// a `&str`. If any of the strings in the iterator are not valid regular
115c67d6573Sopenharmony_ci    /// expressions, then an error is returned.
116c67d6573Sopenharmony_ci    ///
117c67d6573Sopenharmony_ci    /// # Example
118c67d6573Sopenharmony_ci    ///
119c67d6573Sopenharmony_ci    /// Create a new regex set from an iterator of strings:
120c67d6573Sopenharmony_ci    ///
121c67d6573Sopenharmony_ci    /// ```rust
122c67d6573Sopenharmony_ci    /// # use regex::RegexSet;
123c67d6573Sopenharmony_ci    /// let set = RegexSet::new(&[r"\w+", r"\d+"]).unwrap();
124c67d6573Sopenharmony_ci    /// assert!(set.is_match("foo"));
125c67d6573Sopenharmony_ci    /// ```
126c67d6573Sopenharmony_ci    pub fn new<I, S>(exprs: I) -> Result<RegexSet, Error>
127c67d6573Sopenharmony_ci            where S: AsRef<str>, I: IntoIterator<Item=S> {
128c67d6573Sopenharmony_ci        RegexSetBuilder::new(exprs).build()
129c67d6573Sopenharmony_ci    }
130c67d6573Sopenharmony_ci
131c67d6573Sopenharmony_ci    /// Create a new empty regex set.
132c67d6573Sopenharmony_ci    ///
133c67d6573Sopenharmony_ci    /// # Example
134c67d6573Sopenharmony_ci    ///
135c67d6573Sopenharmony_ci    /// ```rust
136c67d6573Sopenharmony_ci    /// # use regex::RegexSet;
137c67d6573Sopenharmony_ci    /// let set = RegexSet::empty();
138c67d6573Sopenharmony_ci    /// assert!(set.is_empty());
139c67d6573Sopenharmony_ci    /// ```
140c67d6573Sopenharmony_ci    pub fn empty() -> RegexSet {
141c67d6573Sopenharmony_ci        RegexSetBuilder::new(&[""; 0]).build().unwrap()
142c67d6573Sopenharmony_ci    }
143c67d6573Sopenharmony_ci
144c67d6573Sopenharmony_ci    /// Returns true if and only if one of the regexes in this set matches
145c67d6573Sopenharmony_ci    /// the text given.
146c67d6573Sopenharmony_ci    ///
147c67d6573Sopenharmony_ci    /// This method should be preferred if you only need to test whether any
148c67d6573Sopenharmony_ci    /// of the regexes in the set should match, but don't care about *which*
149c67d6573Sopenharmony_ci    /// regexes matched. This is because the underlying matching engine will
150c67d6573Sopenharmony_ci    /// quit immediately after seeing the first match instead of continuing to
151c67d6573Sopenharmony_ci    /// find all matches.
152c67d6573Sopenharmony_ci    ///
153c67d6573Sopenharmony_ci    /// Note that as with searches using `Regex`, the expression is unanchored
154c67d6573Sopenharmony_ci    /// by default. That is, if the regex does not start with `^` or `\A`, or
155c67d6573Sopenharmony_ci    /// end with `$` or `\z`, then it is permitted to match anywhere in the
156c67d6573Sopenharmony_ci    /// text.
157c67d6573Sopenharmony_ci    ///
158c67d6573Sopenharmony_ci    /// # Example
159c67d6573Sopenharmony_ci    ///
160c67d6573Sopenharmony_ci    /// Tests whether a set matches some text:
161c67d6573Sopenharmony_ci    ///
162c67d6573Sopenharmony_ci    /// ```rust
163c67d6573Sopenharmony_ci    /// # use regex::RegexSet;
164c67d6573Sopenharmony_ci    /// let set = RegexSet::new(&[r"\w+", r"\d+"]).unwrap();
165c67d6573Sopenharmony_ci    /// assert!(set.is_match("foo"));
166c67d6573Sopenharmony_ci    /// assert!(!set.is_match("☃"));
167c67d6573Sopenharmony_ci    /// ```
168c67d6573Sopenharmony_ci    pub fn is_match(&self, text: $text_ty) -> bool {
169c67d6573Sopenharmony_ci        self.is_match_at(text, 0)
170c67d6573Sopenharmony_ci    }
171c67d6573Sopenharmony_ci
172c67d6573Sopenharmony_ci    /// Returns the same as is_match, but starts the search at the given
173c67d6573Sopenharmony_ci    /// offset.
174c67d6573Sopenharmony_ci    ///
175c67d6573Sopenharmony_ci    /// The significance of the starting point is that it takes the surrounding
176c67d6573Sopenharmony_ci    /// context into consideration. For example, the `\A` anchor can only
177c67d6573Sopenharmony_ci    /// match when `start == 0`.
178c67d6573Sopenharmony_ci    #[doc(hidden)]
179c67d6573Sopenharmony_ci    pub fn is_match_at(&self, text: $text_ty, start: usize) -> bool {
180c67d6573Sopenharmony_ci        self.0.searcher().is_match_at($as_bytes(text), start)
181c67d6573Sopenharmony_ci    }
182c67d6573Sopenharmony_ci
183c67d6573Sopenharmony_ci    /// Returns the set of regular expressions that match in the given text.
184c67d6573Sopenharmony_ci    ///
185c67d6573Sopenharmony_ci    /// The set returned contains the index of each regular expression that
186c67d6573Sopenharmony_ci    /// matches in the given text. The index is in correspondence with the
187c67d6573Sopenharmony_ci    /// order of regular expressions given to `RegexSet`'s constructor.
188c67d6573Sopenharmony_ci    ///
189c67d6573Sopenharmony_ci    /// The set can also be used to iterate over the matched indices.
190c67d6573Sopenharmony_ci    ///
191c67d6573Sopenharmony_ci    /// Note that as with searches using `Regex`, the expression is unanchored
192c67d6573Sopenharmony_ci    /// by default. That is, if the regex does not start with `^` or `\A`, or
193c67d6573Sopenharmony_ci    /// end with `$` or `\z`, then it is permitted to match anywhere in the
194c67d6573Sopenharmony_ci    /// text.
195c67d6573Sopenharmony_ci    ///
196c67d6573Sopenharmony_ci    /// # Example
197c67d6573Sopenharmony_ci    ///
198c67d6573Sopenharmony_ci    /// Tests which regular expressions match the given text:
199c67d6573Sopenharmony_ci    ///
200c67d6573Sopenharmony_ci    /// ```rust
201c67d6573Sopenharmony_ci    /// # use regex::RegexSet;
202c67d6573Sopenharmony_ci    /// let set = RegexSet::new(&[
203c67d6573Sopenharmony_ci    ///     r"\w+",
204c67d6573Sopenharmony_ci    ///     r"\d+",
205c67d6573Sopenharmony_ci    ///     r"\pL+",
206c67d6573Sopenharmony_ci    ///     r"foo",
207c67d6573Sopenharmony_ci    ///     r"bar",
208c67d6573Sopenharmony_ci    ///     r"barfoo",
209c67d6573Sopenharmony_ci    ///     r"foobar",
210c67d6573Sopenharmony_ci    /// ]).unwrap();
211c67d6573Sopenharmony_ci    /// let matches: Vec<_> = set.matches("foobar").into_iter().collect();
212c67d6573Sopenharmony_ci    /// assert_eq!(matches, vec![0, 2, 3, 4, 6]);
213c67d6573Sopenharmony_ci    ///
214c67d6573Sopenharmony_ci    /// // You can also test whether a particular regex matched:
215c67d6573Sopenharmony_ci    /// let matches = set.matches("foobar");
216c67d6573Sopenharmony_ci    /// assert!(!matches.matched(5));
217c67d6573Sopenharmony_ci    /// assert!(matches.matched(6));
218c67d6573Sopenharmony_ci    /// ```
219c67d6573Sopenharmony_ci    pub fn matches(&self, text: $text_ty) -> SetMatches {
220c67d6573Sopenharmony_ci        let mut matches = vec![false; self.0.regex_strings().len()];
221c67d6573Sopenharmony_ci        let any = self.read_matches_at(&mut matches, text, 0);
222c67d6573Sopenharmony_ci        SetMatches {
223c67d6573Sopenharmony_ci            matched_any: any,
224c67d6573Sopenharmony_ci            matches: matches,
225c67d6573Sopenharmony_ci        }
226c67d6573Sopenharmony_ci    }
227c67d6573Sopenharmony_ci
228c67d6573Sopenharmony_ci    /// Returns the same as matches, but starts the search at the given
229c67d6573Sopenharmony_ci    /// offset and stores the matches into the slice given.
230c67d6573Sopenharmony_ci    ///
231c67d6573Sopenharmony_ci    /// The significance of the starting point is that it takes the surrounding
232c67d6573Sopenharmony_ci    /// context into consideration. For example, the `\A` anchor can only
233c67d6573Sopenharmony_ci    /// match when `start == 0`.
234c67d6573Sopenharmony_ci    ///
235c67d6573Sopenharmony_ci    /// `matches` must have a length that is at least the number of regexes
236c67d6573Sopenharmony_ci    /// in this set.
237c67d6573Sopenharmony_ci    ///
238c67d6573Sopenharmony_ci    /// This method returns true if and only if at least one member of
239c67d6573Sopenharmony_ci    /// `matches` is true after executing the set against `text`.
240c67d6573Sopenharmony_ci    #[doc(hidden)]
241c67d6573Sopenharmony_ci    pub fn read_matches_at(
242c67d6573Sopenharmony_ci        &self,
243c67d6573Sopenharmony_ci        matches: &mut [bool],
244c67d6573Sopenharmony_ci        text: $text_ty,
245c67d6573Sopenharmony_ci        start: usize,
246c67d6573Sopenharmony_ci    ) -> bool {
247c67d6573Sopenharmony_ci        self.0.searcher().many_matches_at(matches, $as_bytes(text), start)
248c67d6573Sopenharmony_ci    }
249c67d6573Sopenharmony_ci
250c67d6573Sopenharmony_ci    /// Returns the total number of regular expressions in this set.
251c67d6573Sopenharmony_ci    pub fn len(&self) -> usize {
252c67d6573Sopenharmony_ci        self.0.regex_strings().len()
253c67d6573Sopenharmony_ci    }
254c67d6573Sopenharmony_ci
255c67d6573Sopenharmony_ci    /// Returns `true` if this set contains no regular expressions.
256c67d6573Sopenharmony_ci    pub fn is_empty(&self) -> bool {
257c67d6573Sopenharmony_ci        self.0.regex_strings().is_empty()
258c67d6573Sopenharmony_ci    }
259c67d6573Sopenharmony_ci
260c67d6573Sopenharmony_ci    /// Returns the patterns that this set will match on.
261c67d6573Sopenharmony_ci    ///
262c67d6573Sopenharmony_ci    /// This function can be used to determine the pattern for a match. The
263c67d6573Sopenharmony_ci    /// slice returned has exactly as many patterns givens to this regex set,
264c67d6573Sopenharmony_ci    /// and the order of the slice is the same as the order of the patterns
265c67d6573Sopenharmony_ci    /// provided to the set.
266c67d6573Sopenharmony_ci    ///
267c67d6573Sopenharmony_ci    /// # Example
268c67d6573Sopenharmony_ci    ///
269c67d6573Sopenharmony_ci    /// ```rust
270c67d6573Sopenharmony_ci    /// # use regex::RegexSet;
271c67d6573Sopenharmony_ci    /// let set = RegexSet::new(&[
272c67d6573Sopenharmony_ci    ///     r"\w+",
273c67d6573Sopenharmony_ci    ///     r"\d+",
274c67d6573Sopenharmony_ci    ///     r"\pL+",
275c67d6573Sopenharmony_ci    ///     r"foo",
276c67d6573Sopenharmony_ci    ///     r"bar",
277c67d6573Sopenharmony_ci    ///     r"barfoo",
278c67d6573Sopenharmony_ci    ///     r"foobar",
279c67d6573Sopenharmony_ci    /// ]).unwrap();
280c67d6573Sopenharmony_ci    /// let matches: Vec<_> = set
281c67d6573Sopenharmony_ci    ///     .matches("foobar")
282c67d6573Sopenharmony_ci    ///     .into_iter()
283c67d6573Sopenharmony_ci    ///     .map(|match_idx| &set.patterns()[match_idx])
284c67d6573Sopenharmony_ci    ///     .collect();
285c67d6573Sopenharmony_ci    /// assert_eq!(matches, vec![r"\w+", r"\pL+", r"foo", r"bar", r"foobar"]);
286c67d6573Sopenharmony_ci    /// ```
287c67d6573Sopenharmony_ci    pub fn patterns(&self) -> &[String] {
288c67d6573Sopenharmony_ci        self.0.regex_strings()
289c67d6573Sopenharmony_ci    }
290c67d6573Sopenharmony_ci}
291c67d6573Sopenharmony_ci
292c67d6573Sopenharmony_ci/// A set of matches returned by a regex set.
293c67d6573Sopenharmony_ci#[derive(Clone, Debug)]
294c67d6573Sopenharmony_cipub struct SetMatches {
295c67d6573Sopenharmony_ci    matched_any: bool,
296c67d6573Sopenharmony_ci    matches: Vec<bool>,
297c67d6573Sopenharmony_ci}
298c67d6573Sopenharmony_ci
299c67d6573Sopenharmony_ciimpl SetMatches {
300c67d6573Sopenharmony_ci    /// Whether this set contains any matches.
301c67d6573Sopenharmony_ci    pub fn matched_any(&self) -> bool {
302c67d6573Sopenharmony_ci        self.matched_any
303c67d6573Sopenharmony_ci    }
304c67d6573Sopenharmony_ci
305c67d6573Sopenharmony_ci    /// Whether the regex at the given index matched.
306c67d6573Sopenharmony_ci    ///
307c67d6573Sopenharmony_ci    /// The index for a regex is determined by its insertion order upon the
308c67d6573Sopenharmony_ci    /// initial construction of a `RegexSet`, starting at `0`.
309c67d6573Sopenharmony_ci    ///
310c67d6573Sopenharmony_ci    /// # Panics
311c67d6573Sopenharmony_ci    ///
312c67d6573Sopenharmony_ci    /// If `regex_index` is greater than or equal to `self.len()`.
313c67d6573Sopenharmony_ci    pub fn matched(&self, regex_index: usize) -> bool {
314c67d6573Sopenharmony_ci        self.matches[regex_index]
315c67d6573Sopenharmony_ci    }
316c67d6573Sopenharmony_ci
317c67d6573Sopenharmony_ci    /// The total number of regexes in the set that created these matches.
318c67d6573Sopenharmony_ci    pub fn len(&self) -> usize {
319c67d6573Sopenharmony_ci        self.matches.len()
320c67d6573Sopenharmony_ci    }
321c67d6573Sopenharmony_ci
322c67d6573Sopenharmony_ci    /// Returns an iterator over indexes in the regex that matched.
323c67d6573Sopenharmony_ci    ///
324c67d6573Sopenharmony_ci    /// This will always produces matches in ascending order of index, where
325c67d6573Sopenharmony_ci    /// the index corresponds to the index of the regex that matched with
326c67d6573Sopenharmony_ci    /// respect to its position when initially building the set.
327c67d6573Sopenharmony_ci    pub fn iter(&self) -> SetMatchesIter<'_> {
328c67d6573Sopenharmony_ci        SetMatchesIter((&*self.matches).into_iter().enumerate())
329c67d6573Sopenharmony_ci    }
330c67d6573Sopenharmony_ci}
331c67d6573Sopenharmony_ci
332c67d6573Sopenharmony_ciimpl IntoIterator for SetMatches {
333c67d6573Sopenharmony_ci    type IntoIter = SetMatchesIntoIter;
334c67d6573Sopenharmony_ci    type Item = usize;
335c67d6573Sopenharmony_ci
336c67d6573Sopenharmony_ci    fn into_iter(self) -> Self::IntoIter {
337c67d6573Sopenharmony_ci        SetMatchesIntoIter(self.matches.into_iter().enumerate())
338c67d6573Sopenharmony_ci    }
339c67d6573Sopenharmony_ci}
340c67d6573Sopenharmony_ci
341c67d6573Sopenharmony_ciimpl<'a> IntoIterator for &'a SetMatches {
342c67d6573Sopenharmony_ci    type IntoIter = SetMatchesIter<'a>;
343c67d6573Sopenharmony_ci    type Item = usize;
344c67d6573Sopenharmony_ci
345c67d6573Sopenharmony_ci    fn into_iter(self) -> Self::IntoIter {
346c67d6573Sopenharmony_ci        self.iter()
347c67d6573Sopenharmony_ci    }
348c67d6573Sopenharmony_ci}
349c67d6573Sopenharmony_ci
350c67d6573Sopenharmony_ci/// An owned iterator over the set of matches from a regex set.
351c67d6573Sopenharmony_ci///
352c67d6573Sopenharmony_ci/// This will always produces matches in ascending order of index, where the
353c67d6573Sopenharmony_ci/// index corresponds to the index of the regex that matched with respect to
354c67d6573Sopenharmony_ci/// its position when initially building the set.
355c67d6573Sopenharmony_ci#[derive(Debug)]
356c67d6573Sopenharmony_cipub struct SetMatchesIntoIter(iter::Enumerate<vec::IntoIter<bool>>);
357c67d6573Sopenharmony_ci
358c67d6573Sopenharmony_ciimpl Iterator for SetMatchesIntoIter {
359c67d6573Sopenharmony_ci    type Item = usize;
360c67d6573Sopenharmony_ci
361c67d6573Sopenharmony_ci    fn next(&mut self) -> Option<usize> {
362c67d6573Sopenharmony_ci        loop {
363c67d6573Sopenharmony_ci            match self.0.next() {
364c67d6573Sopenharmony_ci                None => return None,
365c67d6573Sopenharmony_ci                Some((_, false)) => {}
366c67d6573Sopenharmony_ci                Some((i, true)) => return Some(i),
367c67d6573Sopenharmony_ci            }
368c67d6573Sopenharmony_ci        }
369c67d6573Sopenharmony_ci    }
370c67d6573Sopenharmony_ci
371c67d6573Sopenharmony_ci    fn size_hint(&self) -> (usize, Option<usize>) {
372c67d6573Sopenharmony_ci        self.0.size_hint()
373c67d6573Sopenharmony_ci    }
374c67d6573Sopenharmony_ci}
375c67d6573Sopenharmony_ci
376c67d6573Sopenharmony_ciimpl DoubleEndedIterator for SetMatchesIntoIter {
377c67d6573Sopenharmony_ci    fn next_back(&mut self) -> Option<usize> {
378c67d6573Sopenharmony_ci        loop {
379c67d6573Sopenharmony_ci            match self.0.next_back() {
380c67d6573Sopenharmony_ci                None => return None,
381c67d6573Sopenharmony_ci                Some((_, false)) => {}
382c67d6573Sopenharmony_ci                Some((i, true)) => return Some(i),
383c67d6573Sopenharmony_ci            }
384c67d6573Sopenharmony_ci        }
385c67d6573Sopenharmony_ci    }
386c67d6573Sopenharmony_ci}
387c67d6573Sopenharmony_ci
388c67d6573Sopenharmony_ciimpl iter::FusedIterator for SetMatchesIntoIter {}
389c67d6573Sopenharmony_ci
390c67d6573Sopenharmony_ci/// A borrowed iterator over the set of matches from a regex set.
391c67d6573Sopenharmony_ci///
392c67d6573Sopenharmony_ci/// The lifetime `'a` refers to the lifetime of a `SetMatches` value.
393c67d6573Sopenharmony_ci///
394c67d6573Sopenharmony_ci/// This will always produces matches in ascending order of index, where the
395c67d6573Sopenharmony_ci/// index corresponds to the index of the regex that matched with respect to
396c67d6573Sopenharmony_ci/// its position when initially building the set.
397c67d6573Sopenharmony_ci#[derive(Clone, Debug)]
398c67d6573Sopenharmony_cipub struct SetMatchesIter<'a>(iter::Enumerate<slice::Iter<'a, bool>>);
399c67d6573Sopenharmony_ci
400c67d6573Sopenharmony_ciimpl<'a> Iterator for SetMatchesIter<'a> {
401c67d6573Sopenharmony_ci    type Item = usize;
402c67d6573Sopenharmony_ci
403c67d6573Sopenharmony_ci    fn next(&mut self) -> Option<usize> {
404c67d6573Sopenharmony_ci        loop {
405c67d6573Sopenharmony_ci            match self.0.next() {
406c67d6573Sopenharmony_ci                None => return None,
407c67d6573Sopenharmony_ci                Some((_, &false)) => {}
408c67d6573Sopenharmony_ci                Some((i, &true)) => return Some(i),
409c67d6573Sopenharmony_ci            }
410c67d6573Sopenharmony_ci        }
411c67d6573Sopenharmony_ci    }
412c67d6573Sopenharmony_ci
413c67d6573Sopenharmony_ci    fn size_hint(&self) -> (usize, Option<usize>) {
414c67d6573Sopenharmony_ci        self.0.size_hint()
415c67d6573Sopenharmony_ci    }
416c67d6573Sopenharmony_ci}
417c67d6573Sopenharmony_ci
418c67d6573Sopenharmony_ciimpl<'a> DoubleEndedIterator for SetMatchesIter<'a> {
419c67d6573Sopenharmony_ci    fn next_back(&mut self) -> Option<usize> {
420c67d6573Sopenharmony_ci        loop {
421c67d6573Sopenharmony_ci            match self.0.next_back() {
422c67d6573Sopenharmony_ci                None => return None,
423c67d6573Sopenharmony_ci                Some((_, &false)) => {}
424c67d6573Sopenharmony_ci                Some((i, &true)) => return Some(i),
425c67d6573Sopenharmony_ci            }
426c67d6573Sopenharmony_ci        }
427c67d6573Sopenharmony_ci    }
428c67d6573Sopenharmony_ci}
429c67d6573Sopenharmony_ci
430c67d6573Sopenharmony_ciimpl<'a> iter::FusedIterator for SetMatchesIter<'a> {}
431c67d6573Sopenharmony_ci
432c67d6573Sopenharmony_ci#[doc(hidden)]
433c67d6573Sopenharmony_ciimpl From<Exec> for RegexSet {
434c67d6573Sopenharmony_ci    fn from(exec: Exec) -> Self {
435c67d6573Sopenharmony_ci        RegexSet(exec)
436c67d6573Sopenharmony_ci    }
437c67d6573Sopenharmony_ci}
438c67d6573Sopenharmony_ci
439c67d6573Sopenharmony_ciimpl fmt::Debug for RegexSet {
440c67d6573Sopenharmony_ci    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
441c67d6573Sopenharmony_ci        write!(f, "RegexSet({:?})", self.0.regex_strings())
442c67d6573Sopenharmony_ci    }
443c67d6573Sopenharmony_ci}
444c67d6573Sopenharmony_ci
445c67d6573Sopenharmony_ci#[allow(dead_code)] fn as_bytes_str(text: &str) -> &[u8] { text.as_bytes() }
446c67d6573Sopenharmony_ci#[allow(dead_code)] fn as_bytes_bytes(text: &[u8]) -> &[u8] { text }
447c67d6573Sopenharmony_ci        }
448c67d6573Sopenharmony_ci    }
449c67d6573Sopenharmony_ci}
450c67d6573Sopenharmony_ci
451c67d6573Sopenharmony_cidefine_set! {
452c67d6573Sopenharmony_ci    unicode,
453c67d6573Sopenharmony_ci    set_unicode,
454c67d6573Sopenharmony_ci    &str,
455c67d6573Sopenharmony_ci    as_bytes_str,
456c67d6573Sopenharmony_ci/// ```rust
457c67d6573Sopenharmony_ci/// # use regex::RegexSet;
458c67d6573Sopenharmony_ci/// let set = RegexSet::new(&[
459c67d6573Sopenharmony_ci///     r"[a-z]+@[a-z]+\.(com|org|net)",
460c67d6573Sopenharmony_ci///     r"[a-z]+\.(com|org|net)",
461c67d6573Sopenharmony_ci/// ]).unwrap();
462c67d6573Sopenharmony_ci///
463c67d6573Sopenharmony_ci/// // Ask whether any regexes in the set match.
464c67d6573Sopenharmony_ci/// assert!(set.is_match("foo@example.com"));
465c67d6573Sopenharmony_ci///
466c67d6573Sopenharmony_ci/// // Identify which regexes in the set match.
467c67d6573Sopenharmony_ci/// let matches: Vec<_> = set.matches("foo@example.com").into_iter().collect();
468c67d6573Sopenharmony_ci/// assert_eq!(vec![0, 1], matches);
469c67d6573Sopenharmony_ci///
470c67d6573Sopenharmony_ci/// // Try again, but with text that only matches one of the regexes.
471c67d6573Sopenharmony_ci/// let matches: Vec<_> = set.matches("example.com").into_iter().collect();
472c67d6573Sopenharmony_ci/// assert_eq!(vec![1], matches);
473c67d6573Sopenharmony_ci///
474c67d6573Sopenharmony_ci/// // Try again, but with text that doesn't match any regex in the set.
475c67d6573Sopenharmony_ci/// let matches: Vec<_> = set.matches("example").into_iter().collect();
476c67d6573Sopenharmony_ci/// assert!(matches.is_empty());
477c67d6573Sopenharmony_ci/// ```
478c67d6573Sopenharmony_ci}
479c67d6573Sopenharmony_ci
480c67d6573Sopenharmony_cidefine_set! {
481c67d6573Sopenharmony_ci    bytes,
482c67d6573Sopenharmony_ci    set_bytes,
483c67d6573Sopenharmony_ci    &[u8],
484c67d6573Sopenharmony_ci    as_bytes_bytes,
485c67d6573Sopenharmony_ci/// ```rust
486c67d6573Sopenharmony_ci/// # use regex::bytes::RegexSet;
487c67d6573Sopenharmony_ci/// let set = RegexSet::new(&[
488c67d6573Sopenharmony_ci///     r"[a-z]+@[a-z]+\.(com|org|net)",
489c67d6573Sopenharmony_ci///     r"[a-z]+\.(com|org|net)",
490c67d6573Sopenharmony_ci/// ]).unwrap();
491c67d6573Sopenharmony_ci///
492c67d6573Sopenharmony_ci/// // Ask whether any regexes in the set match.
493c67d6573Sopenharmony_ci/// assert!(set.is_match(b"foo@example.com"));
494c67d6573Sopenharmony_ci///
495c67d6573Sopenharmony_ci/// // Identify which regexes in the set match.
496c67d6573Sopenharmony_ci/// let matches: Vec<_> = set.matches(b"foo@example.com").into_iter().collect();
497c67d6573Sopenharmony_ci/// assert_eq!(vec![0, 1], matches);
498c67d6573Sopenharmony_ci///
499c67d6573Sopenharmony_ci/// // Try again, but with text that only matches one of the regexes.
500c67d6573Sopenharmony_ci/// let matches: Vec<_> = set.matches(b"example.com").into_iter().collect();
501c67d6573Sopenharmony_ci/// assert_eq!(vec![1], matches);
502c67d6573Sopenharmony_ci///
503c67d6573Sopenharmony_ci/// // Try again, but with text that doesn't match any regex in the set.
504c67d6573Sopenharmony_ci/// let matches: Vec<_> = set.matches(b"example").into_iter().collect();
505c67d6573Sopenharmony_ci/// assert!(matches.is_empty());
506c67d6573Sopenharmony_ci/// ```
507c67d6573Sopenharmony_ci}
508