1c67d6573Sopenharmony_ci/// The set of user configurable options for compiling zero or more regexes.
2c67d6573Sopenharmony_ci#[derive(Clone, Debug)]
3c67d6573Sopenharmony_ci#[allow(missing_docs)]
4c67d6573Sopenharmony_cipub struct RegexOptions {
5c67d6573Sopenharmony_ci    pub pats: Vec<String>,
6c67d6573Sopenharmony_ci    pub size_limit: usize,
7c67d6573Sopenharmony_ci    pub dfa_size_limit: usize,
8c67d6573Sopenharmony_ci    pub nest_limit: u32,
9c67d6573Sopenharmony_ci    pub case_insensitive: bool,
10c67d6573Sopenharmony_ci    pub multi_line: bool,
11c67d6573Sopenharmony_ci    pub dot_matches_new_line: bool,
12c67d6573Sopenharmony_ci    pub swap_greed: bool,
13c67d6573Sopenharmony_ci    pub ignore_whitespace: bool,
14c67d6573Sopenharmony_ci    pub unicode: bool,
15c67d6573Sopenharmony_ci    pub octal: bool,
16c67d6573Sopenharmony_ci}
17c67d6573Sopenharmony_ci
18c67d6573Sopenharmony_ciimpl Default for RegexOptions {
19c67d6573Sopenharmony_ci    fn default() -> Self {
20c67d6573Sopenharmony_ci        RegexOptions {
21c67d6573Sopenharmony_ci            pats: vec![],
22c67d6573Sopenharmony_ci            size_limit: 10 * (1 << 20),
23c67d6573Sopenharmony_ci            dfa_size_limit: 2 * (1 << 20),
24c67d6573Sopenharmony_ci            nest_limit: 250,
25c67d6573Sopenharmony_ci            case_insensitive: false,
26c67d6573Sopenharmony_ci            multi_line: false,
27c67d6573Sopenharmony_ci            dot_matches_new_line: false,
28c67d6573Sopenharmony_ci            swap_greed: false,
29c67d6573Sopenharmony_ci            ignore_whitespace: false,
30c67d6573Sopenharmony_ci            unicode: true,
31c67d6573Sopenharmony_ci            octal: false,
32c67d6573Sopenharmony_ci        }
33c67d6573Sopenharmony_ci    }
34c67d6573Sopenharmony_ci}
35c67d6573Sopenharmony_ci
36c67d6573Sopenharmony_cimacro_rules! define_builder {
37c67d6573Sopenharmony_ci    ($name:ident, $regex_mod:ident, $only_utf8:expr) => {
38c67d6573Sopenharmony_ci        pub mod $name {
39c67d6573Sopenharmony_ci            use super::RegexOptions;
40c67d6573Sopenharmony_ci            use crate::error::Error;
41c67d6573Sopenharmony_ci            use crate::exec::ExecBuilder;
42c67d6573Sopenharmony_ci
43c67d6573Sopenharmony_ci            use crate::$regex_mod::Regex;
44c67d6573Sopenharmony_ci
45c67d6573Sopenharmony_ci            /// A configurable builder for a regular expression.
46c67d6573Sopenharmony_ci            ///
47c67d6573Sopenharmony_ci            /// A builder can be used to configure how the regex is built, for example, by
48c67d6573Sopenharmony_ci            /// setting the default flags (which can be overridden in the expression
49c67d6573Sopenharmony_ci            /// itself) or setting various limits.
50c67d6573Sopenharmony_ci            #[derive(Debug)]
51c67d6573Sopenharmony_ci            pub struct RegexBuilder(RegexOptions);
52c67d6573Sopenharmony_ci
53c67d6573Sopenharmony_ci            impl RegexBuilder {
54c67d6573Sopenharmony_ci                /// Create a new regular expression builder with the given pattern.
55c67d6573Sopenharmony_ci                ///
56c67d6573Sopenharmony_ci                /// If the pattern is invalid, then an error will be returned when
57c67d6573Sopenharmony_ci                /// `build` is called.
58c67d6573Sopenharmony_ci                pub fn new(pattern: &str) -> RegexBuilder {
59c67d6573Sopenharmony_ci                    let mut builder = RegexBuilder(RegexOptions::default());
60c67d6573Sopenharmony_ci                    builder.0.pats.push(pattern.to_owned());
61c67d6573Sopenharmony_ci                    builder
62c67d6573Sopenharmony_ci                }
63c67d6573Sopenharmony_ci
64c67d6573Sopenharmony_ci                /// Consume the builder and compile the regular expression.
65c67d6573Sopenharmony_ci                ///
66c67d6573Sopenharmony_ci                /// Note that calling `as_str` on the resulting `Regex` will produce the
67c67d6573Sopenharmony_ci                /// pattern given to `new` verbatim. Notably, it will not incorporate any
68c67d6573Sopenharmony_ci                /// of the flags set on this builder.
69c67d6573Sopenharmony_ci                pub fn build(&self) -> Result<Regex, Error> {
70c67d6573Sopenharmony_ci                    ExecBuilder::new_options(self.0.clone())
71c67d6573Sopenharmony_ci                        .only_utf8($only_utf8)
72c67d6573Sopenharmony_ci                        .build()
73c67d6573Sopenharmony_ci                        .map(Regex::from)
74c67d6573Sopenharmony_ci                }
75c67d6573Sopenharmony_ci
76c67d6573Sopenharmony_ci                /// Set the value for the case insensitive (`i`) flag.
77c67d6573Sopenharmony_ci                ///
78c67d6573Sopenharmony_ci                /// When enabled, letters in the pattern will match both upper case and
79c67d6573Sopenharmony_ci                /// lower case variants.
80c67d6573Sopenharmony_ci                pub fn case_insensitive(
81c67d6573Sopenharmony_ci                    &mut self,
82c67d6573Sopenharmony_ci                    yes: bool,
83c67d6573Sopenharmony_ci                ) -> &mut RegexBuilder {
84c67d6573Sopenharmony_ci                    self.0.case_insensitive = yes;
85c67d6573Sopenharmony_ci                    self
86c67d6573Sopenharmony_ci                }
87c67d6573Sopenharmony_ci
88c67d6573Sopenharmony_ci                /// Set the value for the multi-line matching (`m`) flag.
89c67d6573Sopenharmony_ci                ///
90c67d6573Sopenharmony_ci                /// When enabled, `^` matches the beginning of lines and `$` matches the
91c67d6573Sopenharmony_ci                /// end of lines.
92c67d6573Sopenharmony_ci                ///
93c67d6573Sopenharmony_ci                /// By default, they match beginning/end of the input.
94c67d6573Sopenharmony_ci                pub fn multi_line(&mut self, yes: bool) -> &mut RegexBuilder {
95c67d6573Sopenharmony_ci                    self.0.multi_line = yes;
96c67d6573Sopenharmony_ci                    self
97c67d6573Sopenharmony_ci                }
98c67d6573Sopenharmony_ci
99c67d6573Sopenharmony_ci                /// Set the value for the any character (`s`) flag, where in `.` matches
100c67d6573Sopenharmony_ci                /// anything when `s` is set and matches anything except for new line when
101c67d6573Sopenharmony_ci                /// it is not set (the default).
102c67d6573Sopenharmony_ci                ///
103c67d6573Sopenharmony_ci                /// N.B. "matches anything" means "any byte" when Unicode is disabled and
104c67d6573Sopenharmony_ci                /// means "any valid UTF-8 encoding of any Unicode scalar value" when
105c67d6573Sopenharmony_ci                /// Unicode is enabled.
106c67d6573Sopenharmony_ci                pub fn dot_matches_new_line(
107c67d6573Sopenharmony_ci                    &mut self,
108c67d6573Sopenharmony_ci                    yes: bool,
109c67d6573Sopenharmony_ci                ) -> &mut RegexBuilder {
110c67d6573Sopenharmony_ci                    self.0.dot_matches_new_line = yes;
111c67d6573Sopenharmony_ci                    self
112c67d6573Sopenharmony_ci                }
113c67d6573Sopenharmony_ci
114c67d6573Sopenharmony_ci                /// Set the value for the greedy swap (`U`) flag.
115c67d6573Sopenharmony_ci                ///
116c67d6573Sopenharmony_ci                /// When enabled, a pattern like `a*` is lazy (tries to find shortest
117c67d6573Sopenharmony_ci                /// match) and `a*?` is greedy (tries to find longest match).
118c67d6573Sopenharmony_ci                ///
119c67d6573Sopenharmony_ci                /// By default, `a*` is greedy and `a*?` is lazy.
120c67d6573Sopenharmony_ci                pub fn swap_greed(&mut self, yes: bool) -> &mut RegexBuilder {
121c67d6573Sopenharmony_ci                    self.0.swap_greed = yes;
122c67d6573Sopenharmony_ci                    self
123c67d6573Sopenharmony_ci                }
124c67d6573Sopenharmony_ci
125c67d6573Sopenharmony_ci                /// Set the value for the ignore whitespace (`x`) flag.
126c67d6573Sopenharmony_ci                ///
127c67d6573Sopenharmony_ci                /// When enabled, whitespace such as new lines and spaces will be ignored
128c67d6573Sopenharmony_ci                /// between expressions of the pattern, and `#` can be used to start a
129c67d6573Sopenharmony_ci                /// comment until the next new line.
130c67d6573Sopenharmony_ci                pub fn ignore_whitespace(
131c67d6573Sopenharmony_ci                    &mut self,
132c67d6573Sopenharmony_ci                    yes: bool,
133c67d6573Sopenharmony_ci                ) -> &mut RegexBuilder {
134c67d6573Sopenharmony_ci                    self.0.ignore_whitespace = yes;
135c67d6573Sopenharmony_ci                    self
136c67d6573Sopenharmony_ci                }
137c67d6573Sopenharmony_ci
138c67d6573Sopenharmony_ci                /// Set the value for the Unicode (`u`) flag.
139c67d6573Sopenharmony_ci                ///
140c67d6573Sopenharmony_ci                /// Enabled by default. When disabled, character classes such as `\w` only
141c67d6573Sopenharmony_ci                /// match ASCII word characters instead of all Unicode word characters.
142c67d6573Sopenharmony_ci                pub fn unicode(&mut self, yes: bool) -> &mut RegexBuilder {
143c67d6573Sopenharmony_ci                    self.0.unicode = yes;
144c67d6573Sopenharmony_ci                    self
145c67d6573Sopenharmony_ci                }
146c67d6573Sopenharmony_ci
147c67d6573Sopenharmony_ci                /// Whether to support octal syntax or not.
148c67d6573Sopenharmony_ci                ///
149c67d6573Sopenharmony_ci                /// Octal syntax is a little-known way of uttering Unicode codepoints in
150c67d6573Sopenharmony_ci                /// a regular expression. For example, `a`, `\x61`, `\u0061` and
151c67d6573Sopenharmony_ci                /// `\141` are all equivalent regular expressions, where the last example
152c67d6573Sopenharmony_ci                /// shows octal syntax.
153c67d6573Sopenharmony_ci                ///
154c67d6573Sopenharmony_ci                /// While supporting octal syntax isn't in and of itself a problem, it does
155c67d6573Sopenharmony_ci                /// make good error messages harder. That is, in PCRE based regex engines,
156c67d6573Sopenharmony_ci                /// syntax like `\0` invokes a backreference, which is explicitly
157c67d6573Sopenharmony_ci                /// unsupported in Rust's regex engine. However, many users expect it to
158c67d6573Sopenharmony_ci                /// be supported. Therefore, when octal support is disabled, the error
159c67d6573Sopenharmony_ci                /// message will explicitly mention that backreferences aren't supported.
160c67d6573Sopenharmony_ci                ///
161c67d6573Sopenharmony_ci                /// Octal syntax is disabled by default.
162c67d6573Sopenharmony_ci                pub fn octal(&mut self, yes: bool) -> &mut RegexBuilder {
163c67d6573Sopenharmony_ci                    self.0.octal = yes;
164c67d6573Sopenharmony_ci                    self
165c67d6573Sopenharmony_ci                }
166c67d6573Sopenharmony_ci
167c67d6573Sopenharmony_ci                /// Set the approximate size limit of the compiled regular expression.
168c67d6573Sopenharmony_ci                ///
169c67d6573Sopenharmony_ci                /// This roughly corresponds to the number of bytes occupied by a single
170c67d6573Sopenharmony_ci                /// compiled program. If the program exceeds this number, then a
171c67d6573Sopenharmony_ci                /// compilation error is returned.
172c67d6573Sopenharmony_ci                pub fn size_limit(
173c67d6573Sopenharmony_ci                    &mut self,
174c67d6573Sopenharmony_ci                    limit: usize,
175c67d6573Sopenharmony_ci                ) -> &mut RegexBuilder {
176c67d6573Sopenharmony_ci                    self.0.size_limit = limit;
177c67d6573Sopenharmony_ci                    self
178c67d6573Sopenharmony_ci                }
179c67d6573Sopenharmony_ci
180c67d6573Sopenharmony_ci                /// Set the approximate size of the cache used by the DFA.
181c67d6573Sopenharmony_ci                ///
182c67d6573Sopenharmony_ci                /// This roughly corresponds to the number of bytes that the DFA will
183c67d6573Sopenharmony_ci                /// use while searching.
184c67d6573Sopenharmony_ci                ///
185c67d6573Sopenharmony_ci                /// Note that this is a *per thread* limit. There is no way to set a global
186c67d6573Sopenharmony_ci                /// limit. In particular, if a regex is used from multiple threads
187c67d6573Sopenharmony_ci                /// simultaneously, then each thread may use up to the number of bytes
188c67d6573Sopenharmony_ci                /// specified here.
189c67d6573Sopenharmony_ci                pub fn dfa_size_limit(
190c67d6573Sopenharmony_ci                    &mut self,
191c67d6573Sopenharmony_ci                    limit: usize,
192c67d6573Sopenharmony_ci                ) -> &mut RegexBuilder {
193c67d6573Sopenharmony_ci                    self.0.dfa_size_limit = limit;
194c67d6573Sopenharmony_ci                    self
195c67d6573Sopenharmony_ci                }
196c67d6573Sopenharmony_ci
197c67d6573Sopenharmony_ci                /// Set the nesting limit for this parser.
198c67d6573Sopenharmony_ci                ///
199c67d6573Sopenharmony_ci                /// The nesting limit controls how deep the abstract syntax tree is allowed
200c67d6573Sopenharmony_ci                /// to be. If the AST exceeds the given limit (e.g., with too many nested
201c67d6573Sopenharmony_ci                /// groups), then an error is returned by the parser.
202c67d6573Sopenharmony_ci                ///
203c67d6573Sopenharmony_ci                /// The purpose of this limit is to act as a heuristic to prevent stack
204c67d6573Sopenharmony_ci                /// overflow for consumers that do structural induction on an `Ast` using
205c67d6573Sopenharmony_ci                /// explicit recursion. While this crate never does this (instead using
206c67d6573Sopenharmony_ci                /// constant stack space and moving the call stack to the heap), other
207c67d6573Sopenharmony_ci                /// crates may.
208c67d6573Sopenharmony_ci                ///
209c67d6573Sopenharmony_ci                /// This limit is not checked until the entire Ast is parsed. Therefore,
210c67d6573Sopenharmony_ci                /// if callers want to put a limit on the amount of heap space used, then
211c67d6573Sopenharmony_ci                /// they should impose a limit on the length, in bytes, of the concrete
212c67d6573Sopenharmony_ci                /// pattern string. In particular, this is viable since this parser
213c67d6573Sopenharmony_ci                /// implementation will limit itself to heap space proportional to the
214c67d6573Sopenharmony_ci                /// length of the pattern string.
215c67d6573Sopenharmony_ci                ///
216c67d6573Sopenharmony_ci                /// Note that a nest limit of `0` will return a nest limit error for most
217c67d6573Sopenharmony_ci                /// patterns but not all. For example, a nest limit of `0` permits `a` but
218c67d6573Sopenharmony_ci                /// not `ab`, since `ab` requires a concatenation, which results in a nest
219c67d6573Sopenharmony_ci                /// depth of `1`. In general, a nest limit is not something that manifests
220c67d6573Sopenharmony_ci                /// in an obvious way in the concrete syntax, therefore, it should not be
221c67d6573Sopenharmony_ci                /// used in a granular way.
222c67d6573Sopenharmony_ci                pub fn nest_limit(&mut self, limit: u32) -> &mut RegexBuilder {
223c67d6573Sopenharmony_ci                    self.0.nest_limit = limit;
224c67d6573Sopenharmony_ci                    self
225c67d6573Sopenharmony_ci                }
226c67d6573Sopenharmony_ci            }
227c67d6573Sopenharmony_ci        }
228c67d6573Sopenharmony_ci    };
229c67d6573Sopenharmony_ci}
230c67d6573Sopenharmony_ci
231c67d6573Sopenharmony_cidefine_builder!(bytes, re_bytes, false);
232c67d6573Sopenharmony_cidefine_builder!(unicode, re_unicode, true);
233c67d6573Sopenharmony_ci
234c67d6573Sopenharmony_cimacro_rules! define_set_builder {
235c67d6573Sopenharmony_ci    ($name:ident, $regex_mod:ident, $only_utf8:expr) => {
236c67d6573Sopenharmony_ci        pub mod $name {
237c67d6573Sopenharmony_ci            use super::RegexOptions;
238c67d6573Sopenharmony_ci            use crate::error::Error;
239c67d6573Sopenharmony_ci            use crate::exec::ExecBuilder;
240c67d6573Sopenharmony_ci
241c67d6573Sopenharmony_ci            use crate::re_set::$regex_mod::RegexSet;
242c67d6573Sopenharmony_ci
243c67d6573Sopenharmony_ci            /// A configurable builder for a set of regular expressions.
244c67d6573Sopenharmony_ci            ///
245c67d6573Sopenharmony_ci            /// A builder can be used to configure how the regexes are built, for example,
246c67d6573Sopenharmony_ci            /// by setting the default flags (which can be overridden in the expression
247c67d6573Sopenharmony_ci            /// itself) or setting various limits.
248c67d6573Sopenharmony_ci            #[derive(Debug)]
249c67d6573Sopenharmony_ci            pub struct RegexSetBuilder(RegexOptions);
250c67d6573Sopenharmony_ci
251c67d6573Sopenharmony_ci            impl RegexSetBuilder {
252c67d6573Sopenharmony_ci                /// Create a new regular expression builder with the given pattern.
253c67d6573Sopenharmony_ci                ///
254c67d6573Sopenharmony_ci                /// If the pattern is invalid, then an error will be returned when
255c67d6573Sopenharmony_ci                /// `build` is called.
256c67d6573Sopenharmony_ci                pub fn new<I, S>(patterns: I) -> RegexSetBuilder
257c67d6573Sopenharmony_ci                where
258c67d6573Sopenharmony_ci                    S: AsRef<str>,
259c67d6573Sopenharmony_ci                    I: IntoIterator<Item = S>,
260c67d6573Sopenharmony_ci                {
261c67d6573Sopenharmony_ci                    let mut builder = RegexSetBuilder(RegexOptions::default());
262c67d6573Sopenharmony_ci                    for pat in patterns {
263c67d6573Sopenharmony_ci                        builder.0.pats.push(pat.as_ref().to_owned());
264c67d6573Sopenharmony_ci                    }
265c67d6573Sopenharmony_ci                    builder
266c67d6573Sopenharmony_ci                }
267c67d6573Sopenharmony_ci
268c67d6573Sopenharmony_ci                /// Consume the builder and compile the regular expressions into a set.
269c67d6573Sopenharmony_ci                pub fn build(&self) -> Result<RegexSet, Error> {
270c67d6573Sopenharmony_ci                    ExecBuilder::new_options(self.0.clone())
271c67d6573Sopenharmony_ci                        .only_utf8($only_utf8)
272c67d6573Sopenharmony_ci                        .build()
273c67d6573Sopenharmony_ci                        .map(RegexSet::from)
274c67d6573Sopenharmony_ci                }
275c67d6573Sopenharmony_ci
276c67d6573Sopenharmony_ci                /// Set the value for the case insensitive (`i`) flag.
277c67d6573Sopenharmony_ci                pub fn case_insensitive(
278c67d6573Sopenharmony_ci                    &mut self,
279c67d6573Sopenharmony_ci                    yes: bool,
280c67d6573Sopenharmony_ci                ) -> &mut RegexSetBuilder {
281c67d6573Sopenharmony_ci                    self.0.case_insensitive = yes;
282c67d6573Sopenharmony_ci                    self
283c67d6573Sopenharmony_ci                }
284c67d6573Sopenharmony_ci
285c67d6573Sopenharmony_ci                /// Set the value for the multi-line matching (`m`) flag.
286c67d6573Sopenharmony_ci                pub fn multi_line(
287c67d6573Sopenharmony_ci                    &mut self,
288c67d6573Sopenharmony_ci                    yes: bool,
289c67d6573Sopenharmony_ci                ) -> &mut RegexSetBuilder {
290c67d6573Sopenharmony_ci                    self.0.multi_line = yes;
291c67d6573Sopenharmony_ci                    self
292c67d6573Sopenharmony_ci                }
293c67d6573Sopenharmony_ci
294c67d6573Sopenharmony_ci                /// Set the value for the any character (`s`) flag, where in `.` matches
295c67d6573Sopenharmony_ci                /// anything when `s` is set and matches anything except for new line when
296c67d6573Sopenharmony_ci                /// it is not set (the default).
297c67d6573Sopenharmony_ci                ///
298c67d6573Sopenharmony_ci                /// N.B. "matches anything" means "any byte" for `regex::bytes::RegexSet`
299c67d6573Sopenharmony_ci                /// expressions and means "any Unicode scalar value" for `regex::RegexSet`
300c67d6573Sopenharmony_ci                /// expressions.
301c67d6573Sopenharmony_ci                pub fn dot_matches_new_line(
302c67d6573Sopenharmony_ci                    &mut self,
303c67d6573Sopenharmony_ci                    yes: bool,
304c67d6573Sopenharmony_ci                ) -> &mut RegexSetBuilder {
305c67d6573Sopenharmony_ci                    self.0.dot_matches_new_line = yes;
306c67d6573Sopenharmony_ci                    self
307c67d6573Sopenharmony_ci                }
308c67d6573Sopenharmony_ci
309c67d6573Sopenharmony_ci                /// Set the value for the greedy swap (`U`) flag.
310c67d6573Sopenharmony_ci                pub fn swap_greed(
311c67d6573Sopenharmony_ci                    &mut self,
312c67d6573Sopenharmony_ci                    yes: bool,
313c67d6573Sopenharmony_ci                ) -> &mut RegexSetBuilder {
314c67d6573Sopenharmony_ci                    self.0.swap_greed = yes;
315c67d6573Sopenharmony_ci                    self
316c67d6573Sopenharmony_ci                }
317c67d6573Sopenharmony_ci
318c67d6573Sopenharmony_ci                /// Set the value for the ignore whitespace (`x`) flag.
319c67d6573Sopenharmony_ci                pub fn ignore_whitespace(
320c67d6573Sopenharmony_ci                    &mut self,
321c67d6573Sopenharmony_ci                    yes: bool,
322c67d6573Sopenharmony_ci                ) -> &mut RegexSetBuilder {
323c67d6573Sopenharmony_ci                    self.0.ignore_whitespace = yes;
324c67d6573Sopenharmony_ci                    self
325c67d6573Sopenharmony_ci                }
326c67d6573Sopenharmony_ci
327c67d6573Sopenharmony_ci                /// Set the value for the Unicode (`u`) flag.
328c67d6573Sopenharmony_ci                pub fn unicode(&mut self, yes: bool) -> &mut RegexSetBuilder {
329c67d6573Sopenharmony_ci                    self.0.unicode = yes;
330c67d6573Sopenharmony_ci                    self
331c67d6573Sopenharmony_ci                }
332c67d6573Sopenharmony_ci
333c67d6573Sopenharmony_ci                /// Whether to support octal syntax or not.
334c67d6573Sopenharmony_ci                ///
335c67d6573Sopenharmony_ci                /// Octal syntax is a little-known way of uttering Unicode codepoints in
336c67d6573Sopenharmony_ci                /// a regular expression. For example, `a`, `\x61`, `\u0061` and
337c67d6573Sopenharmony_ci                /// `\141` are all equivalent regular expressions, where the last example
338c67d6573Sopenharmony_ci                /// shows octal syntax.
339c67d6573Sopenharmony_ci                ///
340c67d6573Sopenharmony_ci                /// While supporting octal syntax isn't in and of itself a problem, it does
341c67d6573Sopenharmony_ci                /// make good error messages harder. That is, in PCRE based regex engines,
342c67d6573Sopenharmony_ci                /// syntax like `\0` invokes a backreference, which is explicitly
343c67d6573Sopenharmony_ci                /// unsupported in Rust's regex engine. However, many users expect it to
344c67d6573Sopenharmony_ci                /// be supported. Therefore, when octal support is disabled, the error
345c67d6573Sopenharmony_ci                /// message will explicitly mention that backreferences aren't supported.
346c67d6573Sopenharmony_ci                ///
347c67d6573Sopenharmony_ci                /// Octal syntax is disabled by default.
348c67d6573Sopenharmony_ci                pub fn octal(&mut self, yes: bool) -> &mut RegexSetBuilder {
349c67d6573Sopenharmony_ci                    self.0.octal = yes;
350c67d6573Sopenharmony_ci                    self
351c67d6573Sopenharmony_ci                }
352c67d6573Sopenharmony_ci
353c67d6573Sopenharmony_ci                /// Set the approximate size limit of the compiled regular expression.
354c67d6573Sopenharmony_ci                ///
355c67d6573Sopenharmony_ci                /// This roughly corresponds to the number of bytes occupied by a single
356c67d6573Sopenharmony_ci                /// compiled program. If the program exceeds this number, then a
357c67d6573Sopenharmony_ci                /// compilation error is returned.
358c67d6573Sopenharmony_ci                pub fn size_limit(
359c67d6573Sopenharmony_ci                    &mut self,
360c67d6573Sopenharmony_ci                    limit: usize,
361c67d6573Sopenharmony_ci                ) -> &mut RegexSetBuilder {
362c67d6573Sopenharmony_ci                    self.0.size_limit = limit;
363c67d6573Sopenharmony_ci                    self
364c67d6573Sopenharmony_ci                }
365c67d6573Sopenharmony_ci
366c67d6573Sopenharmony_ci                /// Set the approximate size of the cache used by the DFA.
367c67d6573Sopenharmony_ci                ///
368c67d6573Sopenharmony_ci                /// This roughly corresponds to the number of bytes that the DFA will
369c67d6573Sopenharmony_ci                /// use while searching.
370c67d6573Sopenharmony_ci                ///
371c67d6573Sopenharmony_ci                /// Note that this is a *per thread* limit. There is no way to set a global
372c67d6573Sopenharmony_ci                /// limit. In particular, if a regex is used from multiple threads
373c67d6573Sopenharmony_ci                /// simultaneously, then each thread may use up to the number of bytes
374c67d6573Sopenharmony_ci                /// specified here.
375c67d6573Sopenharmony_ci                pub fn dfa_size_limit(
376c67d6573Sopenharmony_ci                    &mut self,
377c67d6573Sopenharmony_ci                    limit: usize,
378c67d6573Sopenharmony_ci                ) -> &mut RegexSetBuilder {
379c67d6573Sopenharmony_ci                    self.0.dfa_size_limit = limit;
380c67d6573Sopenharmony_ci                    self
381c67d6573Sopenharmony_ci                }
382c67d6573Sopenharmony_ci
383c67d6573Sopenharmony_ci                /// Set the nesting limit for this parser.
384c67d6573Sopenharmony_ci                ///
385c67d6573Sopenharmony_ci                /// The nesting limit controls how deep the abstract syntax tree is allowed
386c67d6573Sopenharmony_ci                /// to be. If the AST exceeds the given limit (e.g., with too many nested
387c67d6573Sopenharmony_ci                /// groups), then an error is returned by the parser.
388c67d6573Sopenharmony_ci                ///
389c67d6573Sopenharmony_ci                /// The purpose of this limit is to act as a heuristic to prevent stack
390c67d6573Sopenharmony_ci                /// overflow for consumers that do structural induction on an `Ast` using
391c67d6573Sopenharmony_ci                /// explicit recursion. While this crate never does this (instead using
392c67d6573Sopenharmony_ci                /// constant stack space and moving the call stack to the heap), other
393c67d6573Sopenharmony_ci                /// crates may.
394c67d6573Sopenharmony_ci                ///
395c67d6573Sopenharmony_ci                /// This limit is not checked until the entire Ast is parsed. Therefore,
396c67d6573Sopenharmony_ci                /// if callers want to put a limit on the amount of heap space used, then
397c67d6573Sopenharmony_ci                /// they should impose a limit on the length, in bytes, of the concrete
398c67d6573Sopenharmony_ci                /// pattern string. In particular, this is viable since this parser
399c67d6573Sopenharmony_ci                /// implementation will limit itself to heap space proportional to the
400c67d6573Sopenharmony_ci                /// length of the pattern string.
401c67d6573Sopenharmony_ci                ///
402c67d6573Sopenharmony_ci                /// Note that a nest limit of `0` will return a nest limit error for most
403c67d6573Sopenharmony_ci                /// patterns but not all. For example, a nest limit of `0` permits `a` but
404c67d6573Sopenharmony_ci                /// not `ab`, since `ab` requires a concatenation, which results in a nest
405c67d6573Sopenharmony_ci                /// depth of `1`. In general, a nest limit is not something that manifests
406c67d6573Sopenharmony_ci                /// in an obvious way in the concrete syntax, therefore, it should not be
407c67d6573Sopenharmony_ci                /// used in a granular way.
408c67d6573Sopenharmony_ci                pub fn nest_limit(
409c67d6573Sopenharmony_ci                    &mut self,
410c67d6573Sopenharmony_ci                    limit: u32,
411c67d6573Sopenharmony_ci                ) -> &mut RegexSetBuilder {
412c67d6573Sopenharmony_ci                    self.0.nest_limit = limit;
413c67d6573Sopenharmony_ci                    self
414c67d6573Sopenharmony_ci                }
415c67d6573Sopenharmony_ci            }
416c67d6573Sopenharmony_ci        }
417c67d6573Sopenharmony_ci    };
418c67d6573Sopenharmony_ci}
419c67d6573Sopenharmony_ci
420c67d6573Sopenharmony_cidefine_set_builder!(set_bytes, bytes, false);
421c67d6573Sopenharmony_cidefine_set_builder!(set_unicode, unicode, true);
422