1c67d6573Sopenharmony_ci/// The set of user configurable options for compiling zero or more regexes. 2c67d6573Sopenharmony_ci#[derive(Clone, Debug)] 3c67d6573Sopenharmony_ci#[allow(missing_docs)] 4c67d6573Sopenharmony_cipub struct RegexOptions { 5c67d6573Sopenharmony_ci pub pats: Vec<String>, 6c67d6573Sopenharmony_ci pub size_limit: usize, 7c67d6573Sopenharmony_ci pub dfa_size_limit: usize, 8c67d6573Sopenharmony_ci pub nest_limit: u32, 9c67d6573Sopenharmony_ci pub case_insensitive: bool, 10c67d6573Sopenharmony_ci pub multi_line: bool, 11c67d6573Sopenharmony_ci pub dot_matches_new_line: bool, 12c67d6573Sopenharmony_ci pub swap_greed: bool, 13c67d6573Sopenharmony_ci pub ignore_whitespace: bool, 14c67d6573Sopenharmony_ci pub unicode: bool, 15c67d6573Sopenharmony_ci pub octal: bool, 16c67d6573Sopenharmony_ci} 17c67d6573Sopenharmony_ci 18c67d6573Sopenharmony_ciimpl Default for RegexOptions { 19c67d6573Sopenharmony_ci fn default() -> Self { 20c67d6573Sopenharmony_ci RegexOptions { 21c67d6573Sopenharmony_ci pats: vec![], 22c67d6573Sopenharmony_ci size_limit: 10 * (1 << 20), 23c67d6573Sopenharmony_ci dfa_size_limit: 2 * (1 << 20), 24c67d6573Sopenharmony_ci nest_limit: 250, 25c67d6573Sopenharmony_ci case_insensitive: false, 26c67d6573Sopenharmony_ci multi_line: false, 27c67d6573Sopenharmony_ci dot_matches_new_line: false, 28c67d6573Sopenharmony_ci swap_greed: false, 29c67d6573Sopenharmony_ci ignore_whitespace: false, 30c67d6573Sopenharmony_ci unicode: true, 31c67d6573Sopenharmony_ci octal: false, 32c67d6573Sopenharmony_ci } 33c67d6573Sopenharmony_ci } 34c67d6573Sopenharmony_ci} 35c67d6573Sopenharmony_ci 36c67d6573Sopenharmony_cimacro_rules! define_builder { 37c67d6573Sopenharmony_ci ($name:ident, $regex_mod:ident, $only_utf8:expr) => { 38c67d6573Sopenharmony_ci pub mod $name { 39c67d6573Sopenharmony_ci use super::RegexOptions; 40c67d6573Sopenharmony_ci use crate::error::Error; 41c67d6573Sopenharmony_ci use crate::exec::ExecBuilder; 42c67d6573Sopenharmony_ci 43c67d6573Sopenharmony_ci use crate::$regex_mod::Regex; 44c67d6573Sopenharmony_ci 45c67d6573Sopenharmony_ci /// A configurable builder for a regular expression. 46c67d6573Sopenharmony_ci /// 47c67d6573Sopenharmony_ci /// A builder can be used to configure how the regex is built, for example, by 48c67d6573Sopenharmony_ci /// setting the default flags (which can be overridden in the expression 49c67d6573Sopenharmony_ci /// itself) or setting various limits. 50c67d6573Sopenharmony_ci #[derive(Debug)] 51c67d6573Sopenharmony_ci pub struct RegexBuilder(RegexOptions); 52c67d6573Sopenharmony_ci 53c67d6573Sopenharmony_ci impl RegexBuilder { 54c67d6573Sopenharmony_ci /// Create a new regular expression builder with the given pattern. 55c67d6573Sopenharmony_ci /// 56c67d6573Sopenharmony_ci /// If the pattern is invalid, then an error will be returned when 57c67d6573Sopenharmony_ci /// `build` is called. 58c67d6573Sopenharmony_ci pub fn new(pattern: &str) -> RegexBuilder { 59c67d6573Sopenharmony_ci let mut builder = RegexBuilder(RegexOptions::default()); 60c67d6573Sopenharmony_ci builder.0.pats.push(pattern.to_owned()); 61c67d6573Sopenharmony_ci builder 62c67d6573Sopenharmony_ci } 63c67d6573Sopenharmony_ci 64c67d6573Sopenharmony_ci /// Consume the builder and compile the regular expression. 65c67d6573Sopenharmony_ci /// 66c67d6573Sopenharmony_ci /// Note that calling `as_str` on the resulting `Regex` will produce the 67c67d6573Sopenharmony_ci /// pattern given to `new` verbatim. Notably, it will not incorporate any 68c67d6573Sopenharmony_ci /// of the flags set on this builder. 69c67d6573Sopenharmony_ci pub fn build(&self) -> Result<Regex, Error> { 70c67d6573Sopenharmony_ci ExecBuilder::new_options(self.0.clone()) 71c67d6573Sopenharmony_ci .only_utf8($only_utf8) 72c67d6573Sopenharmony_ci .build() 73c67d6573Sopenharmony_ci .map(Regex::from) 74c67d6573Sopenharmony_ci } 75c67d6573Sopenharmony_ci 76c67d6573Sopenharmony_ci /// Set the value for the case insensitive (`i`) flag. 77c67d6573Sopenharmony_ci /// 78c67d6573Sopenharmony_ci /// When enabled, letters in the pattern will match both upper case and 79c67d6573Sopenharmony_ci /// lower case variants. 80c67d6573Sopenharmony_ci pub fn case_insensitive( 81c67d6573Sopenharmony_ci &mut self, 82c67d6573Sopenharmony_ci yes: bool, 83c67d6573Sopenharmony_ci ) -> &mut RegexBuilder { 84c67d6573Sopenharmony_ci self.0.case_insensitive = yes; 85c67d6573Sopenharmony_ci self 86c67d6573Sopenharmony_ci } 87c67d6573Sopenharmony_ci 88c67d6573Sopenharmony_ci /// Set the value for the multi-line matching (`m`) flag. 89c67d6573Sopenharmony_ci /// 90c67d6573Sopenharmony_ci /// When enabled, `^` matches the beginning of lines and `$` matches the 91c67d6573Sopenharmony_ci /// end of lines. 92c67d6573Sopenharmony_ci /// 93c67d6573Sopenharmony_ci /// By default, they match beginning/end of the input. 94c67d6573Sopenharmony_ci pub fn multi_line(&mut self, yes: bool) -> &mut RegexBuilder { 95c67d6573Sopenharmony_ci self.0.multi_line = yes; 96c67d6573Sopenharmony_ci self 97c67d6573Sopenharmony_ci } 98c67d6573Sopenharmony_ci 99c67d6573Sopenharmony_ci /// Set the value for the any character (`s`) flag, where in `.` matches 100c67d6573Sopenharmony_ci /// anything when `s` is set and matches anything except for new line when 101c67d6573Sopenharmony_ci /// it is not set (the default). 102c67d6573Sopenharmony_ci /// 103c67d6573Sopenharmony_ci /// N.B. "matches anything" means "any byte" when Unicode is disabled and 104c67d6573Sopenharmony_ci /// means "any valid UTF-8 encoding of any Unicode scalar value" when 105c67d6573Sopenharmony_ci /// Unicode is enabled. 106c67d6573Sopenharmony_ci pub fn dot_matches_new_line( 107c67d6573Sopenharmony_ci &mut self, 108c67d6573Sopenharmony_ci yes: bool, 109c67d6573Sopenharmony_ci ) -> &mut RegexBuilder { 110c67d6573Sopenharmony_ci self.0.dot_matches_new_line = yes; 111c67d6573Sopenharmony_ci self 112c67d6573Sopenharmony_ci } 113c67d6573Sopenharmony_ci 114c67d6573Sopenharmony_ci /// Set the value for the greedy swap (`U`) flag. 115c67d6573Sopenharmony_ci /// 116c67d6573Sopenharmony_ci /// When enabled, a pattern like `a*` is lazy (tries to find shortest 117c67d6573Sopenharmony_ci /// match) and `a*?` is greedy (tries to find longest match). 118c67d6573Sopenharmony_ci /// 119c67d6573Sopenharmony_ci /// By default, `a*` is greedy and `a*?` is lazy. 120c67d6573Sopenharmony_ci pub fn swap_greed(&mut self, yes: bool) -> &mut RegexBuilder { 121c67d6573Sopenharmony_ci self.0.swap_greed = yes; 122c67d6573Sopenharmony_ci self 123c67d6573Sopenharmony_ci } 124c67d6573Sopenharmony_ci 125c67d6573Sopenharmony_ci /// Set the value for the ignore whitespace (`x`) flag. 126c67d6573Sopenharmony_ci /// 127c67d6573Sopenharmony_ci /// When enabled, whitespace such as new lines and spaces will be ignored 128c67d6573Sopenharmony_ci /// between expressions of the pattern, and `#` can be used to start a 129c67d6573Sopenharmony_ci /// comment until the next new line. 130c67d6573Sopenharmony_ci pub fn ignore_whitespace( 131c67d6573Sopenharmony_ci &mut self, 132c67d6573Sopenharmony_ci yes: bool, 133c67d6573Sopenharmony_ci ) -> &mut RegexBuilder { 134c67d6573Sopenharmony_ci self.0.ignore_whitespace = yes; 135c67d6573Sopenharmony_ci self 136c67d6573Sopenharmony_ci } 137c67d6573Sopenharmony_ci 138c67d6573Sopenharmony_ci /// Set the value for the Unicode (`u`) flag. 139c67d6573Sopenharmony_ci /// 140c67d6573Sopenharmony_ci /// Enabled by default. When disabled, character classes such as `\w` only 141c67d6573Sopenharmony_ci /// match ASCII word characters instead of all Unicode word characters. 142c67d6573Sopenharmony_ci pub fn unicode(&mut self, yes: bool) -> &mut RegexBuilder { 143c67d6573Sopenharmony_ci self.0.unicode = yes; 144c67d6573Sopenharmony_ci self 145c67d6573Sopenharmony_ci } 146c67d6573Sopenharmony_ci 147c67d6573Sopenharmony_ci /// Whether to support octal syntax or not. 148c67d6573Sopenharmony_ci /// 149c67d6573Sopenharmony_ci /// Octal syntax is a little-known way of uttering Unicode codepoints in 150c67d6573Sopenharmony_ci /// a regular expression. For example, `a`, `\x61`, `\u0061` and 151c67d6573Sopenharmony_ci /// `\141` are all equivalent regular expressions, where the last example 152c67d6573Sopenharmony_ci /// shows octal syntax. 153c67d6573Sopenharmony_ci /// 154c67d6573Sopenharmony_ci /// While supporting octal syntax isn't in and of itself a problem, it does 155c67d6573Sopenharmony_ci /// make good error messages harder. That is, in PCRE based regex engines, 156c67d6573Sopenharmony_ci /// syntax like `\0` invokes a backreference, which is explicitly 157c67d6573Sopenharmony_ci /// unsupported in Rust's regex engine. However, many users expect it to 158c67d6573Sopenharmony_ci /// be supported. Therefore, when octal support is disabled, the error 159c67d6573Sopenharmony_ci /// message will explicitly mention that backreferences aren't supported. 160c67d6573Sopenharmony_ci /// 161c67d6573Sopenharmony_ci /// Octal syntax is disabled by default. 162c67d6573Sopenharmony_ci pub fn octal(&mut self, yes: bool) -> &mut RegexBuilder { 163c67d6573Sopenharmony_ci self.0.octal = yes; 164c67d6573Sopenharmony_ci self 165c67d6573Sopenharmony_ci } 166c67d6573Sopenharmony_ci 167c67d6573Sopenharmony_ci /// Set the approximate size limit of the compiled regular expression. 168c67d6573Sopenharmony_ci /// 169c67d6573Sopenharmony_ci /// This roughly corresponds to the number of bytes occupied by a single 170c67d6573Sopenharmony_ci /// compiled program. If the program exceeds this number, then a 171c67d6573Sopenharmony_ci /// compilation error is returned. 172c67d6573Sopenharmony_ci pub fn size_limit( 173c67d6573Sopenharmony_ci &mut self, 174c67d6573Sopenharmony_ci limit: usize, 175c67d6573Sopenharmony_ci ) -> &mut RegexBuilder { 176c67d6573Sopenharmony_ci self.0.size_limit = limit; 177c67d6573Sopenharmony_ci self 178c67d6573Sopenharmony_ci } 179c67d6573Sopenharmony_ci 180c67d6573Sopenharmony_ci /// Set the approximate size of the cache used by the DFA. 181c67d6573Sopenharmony_ci /// 182c67d6573Sopenharmony_ci /// This roughly corresponds to the number of bytes that the DFA will 183c67d6573Sopenharmony_ci /// use while searching. 184c67d6573Sopenharmony_ci /// 185c67d6573Sopenharmony_ci /// Note that this is a *per thread* limit. There is no way to set a global 186c67d6573Sopenharmony_ci /// limit. In particular, if a regex is used from multiple threads 187c67d6573Sopenharmony_ci /// simultaneously, then each thread may use up to the number of bytes 188c67d6573Sopenharmony_ci /// specified here. 189c67d6573Sopenharmony_ci pub fn dfa_size_limit( 190c67d6573Sopenharmony_ci &mut self, 191c67d6573Sopenharmony_ci limit: usize, 192c67d6573Sopenharmony_ci ) -> &mut RegexBuilder { 193c67d6573Sopenharmony_ci self.0.dfa_size_limit = limit; 194c67d6573Sopenharmony_ci self 195c67d6573Sopenharmony_ci } 196c67d6573Sopenharmony_ci 197c67d6573Sopenharmony_ci /// Set the nesting limit for this parser. 198c67d6573Sopenharmony_ci /// 199c67d6573Sopenharmony_ci /// The nesting limit controls how deep the abstract syntax tree is allowed 200c67d6573Sopenharmony_ci /// to be. If the AST exceeds the given limit (e.g., with too many nested 201c67d6573Sopenharmony_ci /// groups), then an error is returned by the parser. 202c67d6573Sopenharmony_ci /// 203c67d6573Sopenharmony_ci /// The purpose of this limit is to act as a heuristic to prevent stack 204c67d6573Sopenharmony_ci /// overflow for consumers that do structural induction on an `Ast` using 205c67d6573Sopenharmony_ci /// explicit recursion. While this crate never does this (instead using 206c67d6573Sopenharmony_ci /// constant stack space and moving the call stack to the heap), other 207c67d6573Sopenharmony_ci /// crates may. 208c67d6573Sopenharmony_ci /// 209c67d6573Sopenharmony_ci /// This limit is not checked until the entire Ast is parsed. Therefore, 210c67d6573Sopenharmony_ci /// if callers want to put a limit on the amount of heap space used, then 211c67d6573Sopenharmony_ci /// they should impose a limit on the length, in bytes, of the concrete 212c67d6573Sopenharmony_ci /// pattern string. In particular, this is viable since this parser 213c67d6573Sopenharmony_ci /// implementation will limit itself to heap space proportional to the 214c67d6573Sopenharmony_ci /// length of the pattern string. 215c67d6573Sopenharmony_ci /// 216c67d6573Sopenharmony_ci /// Note that a nest limit of `0` will return a nest limit error for most 217c67d6573Sopenharmony_ci /// patterns but not all. For example, a nest limit of `0` permits `a` but 218c67d6573Sopenharmony_ci /// not `ab`, since `ab` requires a concatenation, which results in a nest 219c67d6573Sopenharmony_ci /// depth of `1`. In general, a nest limit is not something that manifests 220c67d6573Sopenharmony_ci /// in an obvious way in the concrete syntax, therefore, it should not be 221c67d6573Sopenharmony_ci /// used in a granular way. 222c67d6573Sopenharmony_ci pub fn nest_limit(&mut self, limit: u32) -> &mut RegexBuilder { 223c67d6573Sopenharmony_ci self.0.nest_limit = limit; 224c67d6573Sopenharmony_ci self 225c67d6573Sopenharmony_ci } 226c67d6573Sopenharmony_ci } 227c67d6573Sopenharmony_ci } 228c67d6573Sopenharmony_ci }; 229c67d6573Sopenharmony_ci} 230c67d6573Sopenharmony_ci 231c67d6573Sopenharmony_cidefine_builder!(bytes, re_bytes, false); 232c67d6573Sopenharmony_cidefine_builder!(unicode, re_unicode, true); 233c67d6573Sopenharmony_ci 234c67d6573Sopenharmony_cimacro_rules! define_set_builder { 235c67d6573Sopenharmony_ci ($name:ident, $regex_mod:ident, $only_utf8:expr) => { 236c67d6573Sopenharmony_ci pub mod $name { 237c67d6573Sopenharmony_ci use super::RegexOptions; 238c67d6573Sopenharmony_ci use crate::error::Error; 239c67d6573Sopenharmony_ci use crate::exec::ExecBuilder; 240c67d6573Sopenharmony_ci 241c67d6573Sopenharmony_ci use crate::re_set::$regex_mod::RegexSet; 242c67d6573Sopenharmony_ci 243c67d6573Sopenharmony_ci /// A configurable builder for a set of regular expressions. 244c67d6573Sopenharmony_ci /// 245c67d6573Sopenharmony_ci /// A builder can be used to configure how the regexes are built, for example, 246c67d6573Sopenharmony_ci /// by setting the default flags (which can be overridden in the expression 247c67d6573Sopenharmony_ci /// itself) or setting various limits. 248c67d6573Sopenharmony_ci #[derive(Debug)] 249c67d6573Sopenharmony_ci pub struct RegexSetBuilder(RegexOptions); 250c67d6573Sopenharmony_ci 251c67d6573Sopenharmony_ci impl RegexSetBuilder { 252c67d6573Sopenharmony_ci /// Create a new regular expression builder with the given pattern. 253c67d6573Sopenharmony_ci /// 254c67d6573Sopenharmony_ci /// If the pattern is invalid, then an error will be returned when 255c67d6573Sopenharmony_ci /// `build` is called. 256c67d6573Sopenharmony_ci pub fn new<I, S>(patterns: I) -> RegexSetBuilder 257c67d6573Sopenharmony_ci where 258c67d6573Sopenharmony_ci S: AsRef<str>, 259c67d6573Sopenharmony_ci I: IntoIterator<Item = S>, 260c67d6573Sopenharmony_ci { 261c67d6573Sopenharmony_ci let mut builder = RegexSetBuilder(RegexOptions::default()); 262c67d6573Sopenharmony_ci for pat in patterns { 263c67d6573Sopenharmony_ci builder.0.pats.push(pat.as_ref().to_owned()); 264c67d6573Sopenharmony_ci } 265c67d6573Sopenharmony_ci builder 266c67d6573Sopenharmony_ci } 267c67d6573Sopenharmony_ci 268c67d6573Sopenharmony_ci /// Consume the builder and compile the regular expressions into a set. 269c67d6573Sopenharmony_ci pub fn build(&self) -> Result<RegexSet, Error> { 270c67d6573Sopenharmony_ci ExecBuilder::new_options(self.0.clone()) 271c67d6573Sopenharmony_ci .only_utf8($only_utf8) 272c67d6573Sopenharmony_ci .build() 273c67d6573Sopenharmony_ci .map(RegexSet::from) 274c67d6573Sopenharmony_ci } 275c67d6573Sopenharmony_ci 276c67d6573Sopenharmony_ci /// Set the value for the case insensitive (`i`) flag. 277c67d6573Sopenharmony_ci pub fn case_insensitive( 278c67d6573Sopenharmony_ci &mut self, 279c67d6573Sopenharmony_ci yes: bool, 280c67d6573Sopenharmony_ci ) -> &mut RegexSetBuilder { 281c67d6573Sopenharmony_ci self.0.case_insensitive = yes; 282c67d6573Sopenharmony_ci self 283c67d6573Sopenharmony_ci } 284c67d6573Sopenharmony_ci 285c67d6573Sopenharmony_ci /// Set the value for the multi-line matching (`m`) flag. 286c67d6573Sopenharmony_ci pub fn multi_line( 287c67d6573Sopenharmony_ci &mut self, 288c67d6573Sopenharmony_ci yes: bool, 289c67d6573Sopenharmony_ci ) -> &mut RegexSetBuilder { 290c67d6573Sopenharmony_ci self.0.multi_line = yes; 291c67d6573Sopenharmony_ci self 292c67d6573Sopenharmony_ci } 293c67d6573Sopenharmony_ci 294c67d6573Sopenharmony_ci /// Set the value for the any character (`s`) flag, where in `.` matches 295c67d6573Sopenharmony_ci /// anything when `s` is set and matches anything except for new line when 296c67d6573Sopenharmony_ci /// it is not set (the default). 297c67d6573Sopenharmony_ci /// 298c67d6573Sopenharmony_ci /// N.B. "matches anything" means "any byte" for `regex::bytes::RegexSet` 299c67d6573Sopenharmony_ci /// expressions and means "any Unicode scalar value" for `regex::RegexSet` 300c67d6573Sopenharmony_ci /// expressions. 301c67d6573Sopenharmony_ci pub fn dot_matches_new_line( 302c67d6573Sopenharmony_ci &mut self, 303c67d6573Sopenharmony_ci yes: bool, 304c67d6573Sopenharmony_ci ) -> &mut RegexSetBuilder { 305c67d6573Sopenharmony_ci self.0.dot_matches_new_line = yes; 306c67d6573Sopenharmony_ci self 307c67d6573Sopenharmony_ci } 308c67d6573Sopenharmony_ci 309c67d6573Sopenharmony_ci /// Set the value for the greedy swap (`U`) flag. 310c67d6573Sopenharmony_ci pub fn swap_greed( 311c67d6573Sopenharmony_ci &mut self, 312c67d6573Sopenharmony_ci yes: bool, 313c67d6573Sopenharmony_ci ) -> &mut RegexSetBuilder { 314c67d6573Sopenharmony_ci self.0.swap_greed = yes; 315c67d6573Sopenharmony_ci self 316c67d6573Sopenharmony_ci } 317c67d6573Sopenharmony_ci 318c67d6573Sopenharmony_ci /// Set the value for the ignore whitespace (`x`) flag. 319c67d6573Sopenharmony_ci pub fn ignore_whitespace( 320c67d6573Sopenharmony_ci &mut self, 321c67d6573Sopenharmony_ci yes: bool, 322c67d6573Sopenharmony_ci ) -> &mut RegexSetBuilder { 323c67d6573Sopenharmony_ci self.0.ignore_whitespace = yes; 324c67d6573Sopenharmony_ci self 325c67d6573Sopenharmony_ci } 326c67d6573Sopenharmony_ci 327c67d6573Sopenharmony_ci /// Set the value for the Unicode (`u`) flag. 328c67d6573Sopenharmony_ci pub fn unicode(&mut self, yes: bool) -> &mut RegexSetBuilder { 329c67d6573Sopenharmony_ci self.0.unicode = yes; 330c67d6573Sopenharmony_ci self 331c67d6573Sopenharmony_ci } 332c67d6573Sopenharmony_ci 333c67d6573Sopenharmony_ci /// Whether to support octal syntax or not. 334c67d6573Sopenharmony_ci /// 335c67d6573Sopenharmony_ci /// Octal syntax is a little-known way of uttering Unicode codepoints in 336c67d6573Sopenharmony_ci /// a regular expression. For example, `a`, `\x61`, `\u0061` and 337c67d6573Sopenharmony_ci /// `\141` are all equivalent regular expressions, where the last example 338c67d6573Sopenharmony_ci /// shows octal syntax. 339c67d6573Sopenharmony_ci /// 340c67d6573Sopenharmony_ci /// While supporting octal syntax isn't in and of itself a problem, it does 341c67d6573Sopenharmony_ci /// make good error messages harder. That is, in PCRE based regex engines, 342c67d6573Sopenharmony_ci /// syntax like `\0` invokes a backreference, which is explicitly 343c67d6573Sopenharmony_ci /// unsupported in Rust's regex engine. However, many users expect it to 344c67d6573Sopenharmony_ci /// be supported. Therefore, when octal support is disabled, the error 345c67d6573Sopenharmony_ci /// message will explicitly mention that backreferences aren't supported. 346c67d6573Sopenharmony_ci /// 347c67d6573Sopenharmony_ci /// Octal syntax is disabled by default. 348c67d6573Sopenharmony_ci pub fn octal(&mut self, yes: bool) -> &mut RegexSetBuilder { 349c67d6573Sopenharmony_ci self.0.octal = yes; 350c67d6573Sopenharmony_ci self 351c67d6573Sopenharmony_ci } 352c67d6573Sopenharmony_ci 353c67d6573Sopenharmony_ci /// Set the approximate size limit of the compiled regular expression. 354c67d6573Sopenharmony_ci /// 355c67d6573Sopenharmony_ci /// This roughly corresponds to the number of bytes occupied by a single 356c67d6573Sopenharmony_ci /// compiled program. If the program exceeds this number, then a 357c67d6573Sopenharmony_ci /// compilation error is returned. 358c67d6573Sopenharmony_ci pub fn size_limit( 359c67d6573Sopenharmony_ci &mut self, 360c67d6573Sopenharmony_ci limit: usize, 361c67d6573Sopenharmony_ci ) -> &mut RegexSetBuilder { 362c67d6573Sopenharmony_ci self.0.size_limit = limit; 363c67d6573Sopenharmony_ci self 364c67d6573Sopenharmony_ci } 365c67d6573Sopenharmony_ci 366c67d6573Sopenharmony_ci /// Set the approximate size of the cache used by the DFA. 367c67d6573Sopenharmony_ci /// 368c67d6573Sopenharmony_ci /// This roughly corresponds to the number of bytes that the DFA will 369c67d6573Sopenharmony_ci /// use while searching. 370c67d6573Sopenharmony_ci /// 371c67d6573Sopenharmony_ci /// Note that this is a *per thread* limit. There is no way to set a global 372c67d6573Sopenharmony_ci /// limit. In particular, if a regex is used from multiple threads 373c67d6573Sopenharmony_ci /// simultaneously, then each thread may use up to the number of bytes 374c67d6573Sopenharmony_ci /// specified here. 375c67d6573Sopenharmony_ci pub fn dfa_size_limit( 376c67d6573Sopenharmony_ci &mut self, 377c67d6573Sopenharmony_ci limit: usize, 378c67d6573Sopenharmony_ci ) -> &mut RegexSetBuilder { 379c67d6573Sopenharmony_ci self.0.dfa_size_limit = limit; 380c67d6573Sopenharmony_ci self 381c67d6573Sopenharmony_ci } 382c67d6573Sopenharmony_ci 383c67d6573Sopenharmony_ci /// Set the nesting limit for this parser. 384c67d6573Sopenharmony_ci /// 385c67d6573Sopenharmony_ci /// The nesting limit controls how deep the abstract syntax tree is allowed 386c67d6573Sopenharmony_ci /// to be. If the AST exceeds the given limit (e.g., with too many nested 387c67d6573Sopenharmony_ci /// groups), then an error is returned by the parser. 388c67d6573Sopenharmony_ci /// 389c67d6573Sopenharmony_ci /// The purpose of this limit is to act as a heuristic to prevent stack 390c67d6573Sopenharmony_ci /// overflow for consumers that do structural induction on an `Ast` using 391c67d6573Sopenharmony_ci /// explicit recursion. While this crate never does this (instead using 392c67d6573Sopenharmony_ci /// constant stack space and moving the call stack to the heap), other 393c67d6573Sopenharmony_ci /// crates may. 394c67d6573Sopenharmony_ci /// 395c67d6573Sopenharmony_ci /// This limit is not checked until the entire Ast is parsed. Therefore, 396c67d6573Sopenharmony_ci /// if callers want to put a limit on the amount of heap space used, then 397c67d6573Sopenharmony_ci /// they should impose a limit on the length, in bytes, of the concrete 398c67d6573Sopenharmony_ci /// pattern string. In particular, this is viable since this parser 399c67d6573Sopenharmony_ci /// implementation will limit itself to heap space proportional to the 400c67d6573Sopenharmony_ci /// length of the pattern string. 401c67d6573Sopenharmony_ci /// 402c67d6573Sopenharmony_ci /// Note that a nest limit of `0` will return a nest limit error for most 403c67d6573Sopenharmony_ci /// patterns but not all. For example, a nest limit of `0` permits `a` but 404c67d6573Sopenharmony_ci /// not `ab`, since `ab` requires a concatenation, which results in a nest 405c67d6573Sopenharmony_ci /// depth of `1`. In general, a nest limit is not something that manifests 406c67d6573Sopenharmony_ci /// in an obvious way in the concrete syntax, therefore, it should not be 407c67d6573Sopenharmony_ci /// used in a granular way. 408c67d6573Sopenharmony_ci pub fn nest_limit( 409c67d6573Sopenharmony_ci &mut self, 410c67d6573Sopenharmony_ci limit: u32, 411c67d6573Sopenharmony_ci ) -> &mut RegexSetBuilder { 412c67d6573Sopenharmony_ci self.0.nest_limit = limit; 413c67d6573Sopenharmony_ci self 414c67d6573Sopenharmony_ci } 415c67d6573Sopenharmony_ci } 416c67d6573Sopenharmony_ci } 417c67d6573Sopenharmony_ci }; 418c67d6573Sopenharmony_ci} 419c67d6573Sopenharmony_ci 420c67d6573Sopenharmony_cidefine_set_builder!(set_bytes, bytes, false); 421c67d6573Sopenharmony_cidefine_set_builder!(set_unicode, unicode, true); 422