1c67d6573Sopenharmony_ci/*! 2c67d6573Sopenharmony_ciThis module provides a regular expression parser. 3c67d6573Sopenharmony_ci*/ 4c67d6573Sopenharmony_ci 5c67d6573Sopenharmony_ciuse std::borrow::Borrow; 6c67d6573Sopenharmony_ciuse std::cell::{Cell, RefCell}; 7c67d6573Sopenharmony_ciuse std::mem; 8c67d6573Sopenharmony_ciuse std::result; 9c67d6573Sopenharmony_ci 10c67d6573Sopenharmony_ciuse crate::ast::{self, Ast, Position, Span}; 11c67d6573Sopenharmony_ciuse crate::either::Either; 12c67d6573Sopenharmony_ci 13c67d6573Sopenharmony_ciuse crate::is_meta_character; 14c67d6573Sopenharmony_ci 15c67d6573Sopenharmony_citype Result<T> = result::Result<T, ast::Error>; 16c67d6573Sopenharmony_ci 17c67d6573Sopenharmony_ci/// A primitive is an expression with no sub-expressions. This includes 18c67d6573Sopenharmony_ci/// literals, assertions and non-set character classes. This representation 19c67d6573Sopenharmony_ci/// is used as intermediate state in the parser. 20c67d6573Sopenharmony_ci/// 21c67d6573Sopenharmony_ci/// This does not include ASCII character classes, since they can only appear 22c67d6573Sopenharmony_ci/// within a set character class. 23c67d6573Sopenharmony_ci#[derive(Clone, Debug, Eq, PartialEq)] 24c67d6573Sopenharmony_cienum Primitive { 25c67d6573Sopenharmony_ci Literal(ast::Literal), 26c67d6573Sopenharmony_ci Assertion(ast::Assertion), 27c67d6573Sopenharmony_ci Dot(Span), 28c67d6573Sopenharmony_ci Perl(ast::ClassPerl), 29c67d6573Sopenharmony_ci Unicode(ast::ClassUnicode), 30c67d6573Sopenharmony_ci} 31c67d6573Sopenharmony_ci 32c67d6573Sopenharmony_ciimpl Primitive { 33c67d6573Sopenharmony_ci /// Return the span of this primitive. 34c67d6573Sopenharmony_ci fn span(&self) -> &Span { 35c67d6573Sopenharmony_ci match *self { 36c67d6573Sopenharmony_ci Primitive::Literal(ref x) => &x.span, 37c67d6573Sopenharmony_ci Primitive::Assertion(ref x) => &x.span, 38c67d6573Sopenharmony_ci Primitive::Dot(ref span) => span, 39c67d6573Sopenharmony_ci Primitive::Perl(ref x) => &x.span, 40c67d6573Sopenharmony_ci Primitive::Unicode(ref x) => &x.span, 41c67d6573Sopenharmony_ci } 42c67d6573Sopenharmony_ci } 43c67d6573Sopenharmony_ci 44c67d6573Sopenharmony_ci /// Convert this primitive into a proper AST. 45c67d6573Sopenharmony_ci fn into_ast(self) -> Ast { 46c67d6573Sopenharmony_ci match self { 47c67d6573Sopenharmony_ci Primitive::Literal(lit) => Ast::Literal(lit), 48c67d6573Sopenharmony_ci Primitive::Assertion(assert) => Ast::Assertion(assert), 49c67d6573Sopenharmony_ci Primitive::Dot(span) => Ast::Dot(span), 50c67d6573Sopenharmony_ci Primitive::Perl(cls) => Ast::Class(ast::Class::Perl(cls)), 51c67d6573Sopenharmony_ci Primitive::Unicode(cls) => Ast::Class(ast::Class::Unicode(cls)), 52c67d6573Sopenharmony_ci } 53c67d6573Sopenharmony_ci } 54c67d6573Sopenharmony_ci 55c67d6573Sopenharmony_ci /// Convert this primitive into an item in a character class. 56c67d6573Sopenharmony_ci /// 57c67d6573Sopenharmony_ci /// If this primitive is not a legal item (i.e., an assertion or a dot), 58c67d6573Sopenharmony_ci /// then return an error. 59c67d6573Sopenharmony_ci fn into_class_set_item<P: Borrow<Parser>>( 60c67d6573Sopenharmony_ci self, 61c67d6573Sopenharmony_ci p: &ParserI<'_, P>, 62c67d6573Sopenharmony_ci ) -> Result<ast::ClassSetItem> { 63c67d6573Sopenharmony_ci use self::Primitive::*; 64c67d6573Sopenharmony_ci use crate::ast::ClassSetItem; 65c67d6573Sopenharmony_ci 66c67d6573Sopenharmony_ci match self { 67c67d6573Sopenharmony_ci Literal(lit) => Ok(ClassSetItem::Literal(lit)), 68c67d6573Sopenharmony_ci Perl(cls) => Ok(ClassSetItem::Perl(cls)), 69c67d6573Sopenharmony_ci Unicode(cls) => Ok(ClassSetItem::Unicode(cls)), 70c67d6573Sopenharmony_ci x => Err(p.error(*x.span(), ast::ErrorKind::ClassEscapeInvalid)), 71c67d6573Sopenharmony_ci } 72c67d6573Sopenharmony_ci } 73c67d6573Sopenharmony_ci 74c67d6573Sopenharmony_ci /// Convert this primitive into a literal in a character class. In 75c67d6573Sopenharmony_ci /// particular, literals are the only valid items that can appear in 76c67d6573Sopenharmony_ci /// ranges. 77c67d6573Sopenharmony_ci /// 78c67d6573Sopenharmony_ci /// If this primitive is not a legal item (i.e., a class, assertion or a 79c67d6573Sopenharmony_ci /// dot), then return an error. 80c67d6573Sopenharmony_ci fn into_class_literal<P: Borrow<Parser>>( 81c67d6573Sopenharmony_ci self, 82c67d6573Sopenharmony_ci p: &ParserI<'_, P>, 83c67d6573Sopenharmony_ci ) -> Result<ast::Literal> { 84c67d6573Sopenharmony_ci use self::Primitive::*; 85c67d6573Sopenharmony_ci 86c67d6573Sopenharmony_ci match self { 87c67d6573Sopenharmony_ci Literal(lit) => Ok(lit), 88c67d6573Sopenharmony_ci x => Err(p.error(*x.span(), ast::ErrorKind::ClassRangeLiteral)), 89c67d6573Sopenharmony_ci } 90c67d6573Sopenharmony_ci } 91c67d6573Sopenharmony_ci} 92c67d6573Sopenharmony_ci 93c67d6573Sopenharmony_ci/// Returns true if the given character is a hexadecimal digit. 94c67d6573Sopenharmony_cifn is_hex(c: char) -> bool { 95c67d6573Sopenharmony_ci ('0' <= c && c <= '9') || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F') 96c67d6573Sopenharmony_ci} 97c67d6573Sopenharmony_ci 98c67d6573Sopenharmony_ci/// Returns true if the given character is a valid in a capture group name. 99c67d6573Sopenharmony_ci/// 100c67d6573Sopenharmony_ci/// If `first` is true, then `c` is treated as the first character in the 101c67d6573Sopenharmony_ci/// group name (which must be alphabetic or underscore). 102c67d6573Sopenharmony_cifn is_capture_char(c: char, first: bool) -> bool { 103c67d6573Sopenharmony_ci c == '_' 104c67d6573Sopenharmony_ci || (!first 105c67d6573Sopenharmony_ci && (('0' <= c && c <= '9') || c == '.' || c == '[' || c == ']')) 106c67d6573Sopenharmony_ci || ('A' <= c && c <= 'Z') 107c67d6573Sopenharmony_ci || ('a' <= c && c <= 'z') 108c67d6573Sopenharmony_ci} 109c67d6573Sopenharmony_ci 110c67d6573Sopenharmony_ci/// A builder for a regular expression parser. 111c67d6573Sopenharmony_ci/// 112c67d6573Sopenharmony_ci/// This builder permits modifying configuration options for the parser. 113c67d6573Sopenharmony_ci#[derive(Clone, Debug)] 114c67d6573Sopenharmony_cipub struct ParserBuilder { 115c67d6573Sopenharmony_ci ignore_whitespace: bool, 116c67d6573Sopenharmony_ci nest_limit: u32, 117c67d6573Sopenharmony_ci octal: bool, 118c67d6573Sopenharmony_ci} 119c67d6573Sopenharmony_ci 120c67d6573Sopenharmony_ciimpl Default for ParserBuilder { 121c67d6573Sopenharmony_ci fn default() -> ParserBuilder { 122c67d6573Sopenharmony_ci ParserBuilder::new() 123c67d6573Sopenharmony_ci } 124c67d6573Sopenharmony_ci} 125c67d6573Sopenharmony_ci 126c67d6573Sopenharmony_ciimpl ParserBuilder { 127c67d6573Sopenharmony_ci /// Create a new parser builder with a default configuration. 128c67d6573Sopenharmony_ci pub fn new() -> ParserBuilder { 129c67d6573Sopenharmony_ci ParserBuilder { 130c67d6573Sopenharmony_ci ignore_whitespace: false, 131c67d6573Sopenharmony_ci nest_limit: 250, 132c67d6573Sopenharmony_ci octal: false, 133c67d6573Sopenharmony_ci } 134c67d6573Sopenharmony_ci } 135c67d6573Sopenharmony_ci 136c67d6573Sopenharmony_ci /// Build a parser from this configuration with the given pattern. 137c67d6573Sopenharmony_ci pub fn build(&self) -> Parser { 138c67d6573Sopenharmony_ci Parser { 139c67d6573Sopenharmony_ci pos: Cell::new(Position { offset: 0, line: 1, column: 1 }), 140c67d6573Sopenharmony_ci capture_index: Cell::new(0), 141c67d6573Sopenharmony_ci nest_limit: self.nest_limit, 142c67d6573Sopenharmony_ci octal: self.octal, 143c67d6573Sopenharmony_ci initial_ignore_whitespace: self.ignore_whitespace, 144c67d6573Sopenharmony_ci ignore_whitespace: Cell::new(self.ignore_whitespace), 145c67d6573Sopenharmony_ci comments: RefCell::new(vec![]), 146c67d6573Sopenharmony_ci stack_group: RefCell::new(vec![]), 147c67d6573Sopenharmony_ci stack_class: RefCell::new(vec![]), 148c67d6573Sopenharmony_ci capture_names: RefCell::new(vec![]), 149c67d6573Sopenharmony_ci scratch: RefCell::new(String::new()), 150c67d6573Sopenharmony_ci } 151c67d6573Sopenharmony_ci } 152c67d6573Sopenharmony_ci 153c67d6573Sopenharmony_ci /// Set the nesting limit for this parser. 154c67d6573Sopenharmony_ci /// 155c67d6573Sopenharmony_ci /// The nesting limit controls how deep the abstract syntax tree is allowed 156c67d6573Sopenharmony_ci /// to be. If the AST exceeds the given limit (e.g., with too many nested 157c67d6573Sopenharmony_ci /// groups), then an error is returned by the parser. 158c67d6573Sopenharmony_ci /// 159c67d6573Sopenharmony_ci /// The purpose of this limit is to act as a heuristic to prevent stack 160c67d6573Sopenharmony_ci /// overflow for consumers that do structural induction on an `Ast` using 161c67d6573Sopenharmony_ci /// explicit recursion. While this crate never does this (instead using 162c67d6573Sopenharmony_ci /// constant stack space and moving the call stack to the heap), other 163c67d6573Sopenharmony_ci /// crates may. 164c67d6573Sopenharmony_ci /// 165c67d6573Sopenharmony_ci /// This limit is not checked until the entire Ast is parsed. Therefore, 166c67d6573Sopenharmony_ci /// if callers want to put a limit on the amount of heap space used, then 167c67d6573Sopenharmony_ci /// they should impose a limit on the length, in bytes, of the concrete 168c67d6573Sopenharmony_ci /// pattern string. In particular, this is viable since this parser 169c67d6573Sopenharmony_ci /// implementation will limit itself to heap space proportional to the 170c67d6573Sopenharmony_ci /// length of the pattern string. 171c67d6573Sopenharmony_ci /// 172c67d6573Sopenharmony_ci /// Note that a nest limit of `0` will return a nest limit error for most 173c67d6573Sopenharmony_ci /// patterns but not all. For example, a nest limit of `0` permits `a` but 174c67d6573Sopenharmony_ci /// not `ab`, since `ab` requires a concatenation, which results in a nest 175c67d6573Sopenharmony_ci /// depth of `1`. In general, a nest limit is not something that manifests 176c67d6573Sopenharmony_ci /// in an obvious way in the concrete syntax, therefore, it should not be 177c67d6573Sopenharmony_ci /// used in a granular way. 178c67d6573Sopenharmony_ci pub fn nest_limit(&mut self, limit: u32) -> &mut ParserBuilder { 179c67d6573Sopenharmony_ci self.nest_limit = limit; 180c67d6573Sopenharmony_ci self 181c67d6573Sopenharmony_ci } 182c67d6573Sopenharmony_ci 183c67d6573Sopenharmony_ci /// Whether to support octal syntax or not. 184c67d6573Sopenharmony_ci /// 185c67d6573Sopenharmony_ci /// Octal syntax is a little-known way of uttering Unicode codepoints in 186c67d6573Sopenharmony_ci /// a regular expression. For example, `a`, `\x61`, `\u0061` and 187c67d6573Sopenharmony_ci /// `\141` are all equivalent regular expressions, where the last example 188c67d6573Sopenharmony_ci /// shows octal syntax. 189c67d6573Sopenharmony_ci /// 190c67d6573Sopenharmony_ci /// While supporting octal syntax isn't in and of itself a problem, it does 191c67d6573Sopenharmony_ci /// make good error messages harder. That is, in PCRE based regex engines, 192c67d6573Sopenharmony_ci /// syntax like `\0` invokes a backreference, which is explicitly 193c67d6573Sopenharmony_ci /// unsupported in Rust's regex engine. However, many users expect it to 194c67d6573Sopenharmony_ci /// be supported. Therefore, when octal support is disabled, the error 195c67d6573Sopenharmony_ci /// message will explicitly mention that backreferences aren't supported. 196c67d6573Sopenharmony_ci /// 197c67d6573Sopenharmony_ci /// Octal syntax is disabled by default. 198c67d6573Sopenharmony_ci pub fn octal(&mut self, yes: bool) -> &mut ParserBuilder { 199c67d6573Sopenharmony_ci self.octal = yes; 200c67d6573Sopenharmony_ci self 201c67d6573Sopenharmony_ci } 202c67d6573Sopenharmony_ci 203c67d6573Sopenharmony_ci /// Enable verbose mode in the regular expression. 204c67d6573Sopenharmony_ci /// 205c67d6573Sopenharmony_ci /// When enabled, verbose mode permits insignificant whitespace in many 206c67d6573Sopenharmony_ci /// places in the regular expression, as well as comments. Comments are 207c67d6573Sopenharmony_ci /// started using `#` and continue until the end of the line. 208c67d6573Sopenharmony_ci /// 209c67d6573Sopenharmony_ci /// By default, this is disabled. It may be selectively enabled in the 210c67d6573Sopenharmony_ci /// regular expression by using the `x` flag regardless of this setting. 211c67d6573Sopenharmony_ci pub fn ignore_whitespace(&mut self, yes: bool) -> &mut ParserBuilder { 212c67d6573Sopenharmony_ci self.ignore_whitespace = yes; 213c67d6573Sopenharmony_ci self 214c67d6573Sopenharmony_ci } 215c67d6573Sopenharmony_ci} 216c67d6573Sopenharmony_ci 217c67d6573Sopenharmony_ci/// A regular expression parser. 218c67d6573Sopenharmony_ci/// 219c67d6573Sopenharmony_ci/// This parses a string representation of a regular expression into an 220c67d6573Sopenharmony_ci/// abstract syntax tree. The size of the tree is proportional to the length 221c67d6573Sopenharmony_ci/// of the regular expression pattern. 222c67d6573Sopenharmony_ci/// 223c67d6573Sopenharmony_ci/// A `Parser` can be configured in more detail via a 224c67d6573Sopenharmony_ci/// [`ParserBuilder`](struct.ParserBuilder.html). 225c67d6573Sopenharmony_ci#[derive(Clone, Debug)] 226c67d6573Sopenharmony_cipub struct Parser { 227c67d6573Sopenharmony_ci /// The current position of the parser. 228c67d6573Sopenharmony_ci pos: Cell<Position>, 229c67d6573Sopenharmony_ci /// The current capture index. 230c67d6573Sopenharmony_ci capture_index: Cell<u32>, 231c67d6573Sopenharmony_ci /// The maximum number of open parens/brackets allowed. If the parser 232c67d6573Sopenharmony_ci /// exceeds this number, then an error is returned. 233c67d6573Sopenharmony_ci nest_limit: u32, 234c67d6573Sopenharmony_ci /// Whether to support octal syntax or not. When `false`, the parser will 235c67d6573Sopenharmony_ci /// return an error helpfully pointing out that backreferences are not 236c67d6573Sopenharmony_ci /// supported. 237c67d6573Sopenharmony_ci octal: bool, 238c67d6573Sopenharmony_ci /// The initial setting for `ignore_whitespace` as provided by 239c67d6573Sopenharmony_ci /// `ParserBuilder`. It is used when resetting the parser's state. 240c67d6573Sopenharmony_ci initial_ignore_whitespace: bool, 241c67d6573Sopenharmony_ci /// Whether whitespace should be ignored. When enabled, comments are 242c67d6573Sopenharmony_ci /// also permitted. 243c67d6573Sopenharmony_ci ignore_whitespace: Cell<bool>, 244c67d6573Sopenharmony_ci /// A list of comments, in order of appearance. 245c67d6573Sopenharmony_ci comments: RefCell<Vec<ast::Comment>>, 246c67d6573Sopenharmony_ci /// A stack of grouped sub-expressions, including alternations. 247c67d6573Sopenharmony_ci stack_group: RefCell<Vec<GroupState>>, 248c67d6573Sopenharmony_ci /// A stack of nested character classes. This is only non-empty when 249c67d6573Sopenharmony_ci /// parsing a class. 250c67d6573Sopenharmony_ci stack_class: RefCell<Vec<ClassState>>, 251c67d6573Sopenharmony_ci /// A sorted sequence of capture names. This is used to detect duplicate 252c67d6573Sopenharmony_ci /// capture names and report an error if one is detected. 253c67d6573Sopenharmony_ci capture_names: RefCell<Vec<ast::CaptureName>>, 254c67d6573Sopenharmony_ci /// A scratch buffer used in various places. Mostly this is used to 255c67d6573Sopenharmony_ci /// accumulate relevant characters from parts of a pattern. 256c67d6573Sopenharmony_ci scratch: RefCell<String>, 257c67d6573Sopenharmony_ci} 258c67d6573Sopenharmony_ci 259c67d6573Sopenharmony_ci/// ParserI is the internal parser implementation. 260c67d6573Sopenharmony_ci/// 261c67d6573Sopenharmony_ci/// We use this separate type so that we can carry the provided pattern string 262c67d6573Sopenharmony_ci/// along with us. In particular, a `Parser` internal state is not tied to any 263c67d6573Sopenharmony_ci/// one pattern, but `ParserI` is. 264c67d6573Sopenharmony_ci/// 265c67d6573Sopenharmony_ci/// This type also lets us use `ParserI<&Parser>` in production code while 266c67d6573Sopenharmony_ci/// retaining the convenience of `ParserI<Parser>` for tests, which sometimes 267c67d6573Sopenharmony_ci/// work against the internal interface of the parser. 268c67d6573Sopenharmony_ci#[derive(Clone, Debug)] 269c67d6573Sopenharmony_cistruct ParserI<'s, P> { 270c67d6573Sopenharmony_ci /// The parser state/configuration. 271c67d6573Sopenharmony_ci parser: P, 272c67d6573Sopenharmony_ci /// The full regular expression provided by the user. 273c67d6573Sopenharmony_ci pattern: &'s str, 274c67d6573Sopenharmony_ci} 275c67d6573Sopenharmony_ci 276c67d6573Sopenharmony_ci/// GroupState represents a single stack frame while parsing nested groups 277c67d6573Sopenharmony_ci/// and alternations. Each frame records the state up to an opening parenthesis 278c67d6573Sopenharmony_ci/// or a alternating bracket `|`. 279c67d6573Sopenharmony_ci#[derive(Clone, Debug)] 280c67d6573Sopenharmony_cienum GroupState { 281c67d6573Sopenharmony_ci /// This state is pushed whenever an opening group is found. 282c67d6573Sopenharmony_ci Group { 283c67d6573Sopenharmony_ci /// The concatenation immediately preceding the opening group. 284c67d6573Sopenharmony_ci concat: ast::Concat, 285c67d6573Sopenharmony_ci /// The group that has been opened. Its sub-AST is always empty. 286c67d6573Sopenharmony_ci group: ast::Group, 287c67d6573Sopenharmony_ci /// Whether this group has the `x` flag enabled or not. 288c67d6573Sopenharmony_ci ignore_whitespace: bool, 289c67d6573Sopenharmony_ci }, 290c67d6573Sopenharmony_ci /// This state is pushed whenever a new alternation branch is found. If 291c67d6573Sopenharmony_ci /// an alternation branch is found and this state is at the top of the 292c67d6573Sopenharmony_ci /// stack, then this state should be modified to include the new 293c67d6573Sopenharmony_ci /// alternation. 294c67d6573Sopenharmony_ci Alternation(ast::Alternation), 295c67d6573Sopenharmony_ci} 296c67d6573Sopenharmony_ci 297c67d6573Sopenharmony_ci/// ClassState represents a single stack frame while parsing character classes. 298c67d6573Sopenharmony_ci/// Each frame records the state up to an intersection, difference, symmetric 299c67d6573Sopenharmony_ci/// difference or nested class. 300c67d6573Sopenharmony_ci/// 301c67d6573Sopenharmony_ci/// Note that a parser's character class stack is only non-empty when parsing 302c67d6573Sopenharmony_ci/// a character class. In all other cases, it is empty. 303c67d6573Sopenharmony_ci#[derive(Clone, Debug)] 304c67d6573Sopenharmony_cienum ClassState { 305c67d6573Sopenharmony_ci /// This state is pushed whenever an opening bracket is found. 306c67d6573Sopenharmony_ci Open { 307c67d6573Sopenharmony_ci /// The union of class items immediately preceding this class. 308c67d6573Sopenharmony_ci union: ast::ClassSetUnion, 309c67d6573Sopenharmony_ci /// The class that has been opened. Typically this just corresponds 310c67d6573Sopenharmony_ci /// to the `[`, but it can also include `[^` since `^` indicates 311c67d6573Sopenharmony_ci /// negation of the class. 312c67d6573Sopenharmony_ci set: ast::ClassBracketed, 313c67d6573Sopenharmony_ci }, 314c67d6573Sopenharmony_ci /// This state is pushed when a operator is seen. When popped, the stored 315c67d6573Sopenharmony_ci /// set becomes the left hand side of the operator. 316c67d6573Sopenharmony_ci Op { 317c67d6573Sopenharmony_ci /// The type of the operation, i.e., &&, -- or ~~. 318c67d6573Sopenharmony_ci kind: ast::ClassSetBinaryOpKind, 319c67d6573Sopenharmony_ci /// The left-hand side of the operator. 320c67d6573Sopenharmony_ci lhs: ast::ClassSet, 321c67d6573Sopenharmony_ci }, 322c67d6573Sopenharmony_ci} 323c67d6573Sopenharmony_ci 324c67d6573Sopenharmony_ciimpl Parser { 325c67d6573Sopenharmony_ci /// Create a new parser with a default configuration. 326c67d6573Sopenharmony_ci /// 327c67d6573Sopenharmony_ci /// The parser can be run with either the `parse` or `parse_with_comments` 328c67d6573Sopenharmony_ci /// methods. The parse methods return an abstract syntax tree. 329c67d6573Sopenharmony_ci /// 330c67d6573Sopenharmony_ci /// To set configuration options on the parser, use 331c67d6573Sopenharmony_ci /// [`ParserBuilder`](struct.ParserBuilder.html). 332c67d6573Sopenharmony_ci pub fn new() -> Parser { 333c67d6573Sopenharmony_ci ParserBuilder::new().build() 334c67d6573Sopenharmony_ci } 335c67d6573Sopenharmony_ci 336c67d6573Sopenharmony_ci /// Parse the regular expression into an abstract syntax tree. 337c67d6573Sopenharmony_ci pub fn parse(&mut self, pattern: &str) -> Result<Ast> { 338c67d6573Sopenharmony_ci ParserI::new(self, pattern).parse() 339c67d6573Sopenharmony_ci } 340c67d6573Sopenharmony_ci 341c67d6573Sopenharmony_ci /// Parse the regular expression and return an abstract syntax tree with 342c67d6573Sopenharmony_ci /// all of the comments found in the pattern. 343c67d6573Sopenharmony_ci pub fn parse_with_comments( 344c67d6573Sopenharmony_ci &mut self, 345c67d6573Sopenharmony_ci pattern: &str, 346c67d6573Sopenharmony_ci ) -> Result<ast::WithComments> { 347c67d6573Sopenharmony_ci ParserI::new(self, pattern).parse_with_comments() 348c67d6573Sopenharmony_ci } 349c67d6573Sopenharmony_ci 350c67d6573Sopenharmony_ci /// Reset the internal state of a parser. 351c67d6573Sopenharmony_ci /// 352c67d6573Sopenharmony_ci /// This is called at the beginning of every parse. This prevents the 353c67d6573Sopenharmony_ci /// parser from running with inconsistent state (say, if a previous 354c67d6573Sopenharmony_ci /// invocation returned an error and the parser is reused). 355c67d6573Sopenharmony_ci fn reset(&self) { 356c67d6573Sopenharmony_ci // These settings should be in line with the construction 357c67d6573Sopenharmony_ci // in `ParserBuilder::build`. 358c67d6573Sopenharmony_ci self.pos.set(Position { offset: 0, line: 1, column: 1 }); 359c67d6573Sopenharmony_ci self.ignore_whitespace.set(self.initial_ignore_whitespace); 360c67d6573Sopenharmony_ci self.comments.borrow_mut().clear(); 361c67d6573Sopenharmony_ci self.stack_group.borrow_mut().clear(); 362c67d6573Sopenharmony_ci self.stack_class.borrow_mut().clear(); 363c67d6573Sopenharmony_ci } 364c67d6573Sopenharmony_ci} 365c67d6573Sopenharmony_ci 366c67d6573Sopenharmony_ciimpl<'s, P: Borrow<Parser>> ParserI<'s, P> { 367c67d6573Sopenharmony_ci /// Build an internal parser from a parser configuration and a pattern. 368c67d6573Sopenharmony_ci fn new(parser: P, pattern: &'s str) -> ParserI<'s, P> { 369c67d6573Sopenharmony_ci ParserI { parser, pattern } 370c67d6573Sopenharmony_ci } 371c67d6573Sopenharmony_ci 372c67d6573Sopenharmony_ci /// Return a reference to the parser state. 373c67d6573Sopenharmony_ci fn parser(&self) -> &Parser { 374c67d6573Sopenharmony_ci self.parser.borrow() 375c67d6573Sopenharmony_ci } 376c67d6573Sopenharmony_ci 377c67d6573Sopenharmony_ci /// Return a reference to the pattern being parsed. 378c67d6573Sopenharmony_ci fn pattern(&self) -> &str { 379c67d6573Sopenharmony_ci self.pattern.borrow() 380c67d6573Sopenharmony_ci } 381c67d6573Sopenharmony_ci 382c67d6573Sopenharmony_ci /// Create a new error with the given span and error type. 383c67d6573Sopenharmony_ci fn error(&self, span: Span, kind: ast::ErrorKind) -> ast::Error { 384c67d6573Sopenharmony_ci ast::Error { kind, pattern: self.pattern().to_string(), span } 385c67d6573Sopenharmony_ci } 386c67d6573Sopenharmony_ci 387c67d6573Sopenharmony_ci /// Return the current offset of the parser. 388c67d6573Sopenharmony_ci /// 389c67d6573Sopenharmony_ci /// The offset starts at `0` from the beginning of the regular expression 390c67d6573Sopenharmony_ci /// pattern string. 391c67d6573Sopenharmony_ci fn offset(&self) -> usize { 392c67d6573Sopenharmony_ci self.parser().pos.get().offset 393c67d6573Sopenharmony_ci } 394c67d6573Sopenharmony_ci 395c67d6573Sopenharmony_ci /// Return the current line number of the parser. 396c67d6573Sopenharmony_ci /// 397c67d6573Sopenharmony_ci /// The line number starts at `1`. 398c67d6573Sopenharmony_ci fn line(&self) -> usize { 399c67d6573Sopenharmony_ci self.parser().pos.get().line 400c67d6573Sopenharmony_ci } 401c67d6573Sopenharmony_ci 402c67d6573Sopenharmony_ci /// Return the current column of the parser. 403c67d6573Sopenharmony_ci /// 404c67d6573Sopenharmony_ci /// The column number starts at `1` and is reset whenever a `\n` is seen. 405c67d6573Sopenharmony_ci fn column(&self) -> usize { 406c67d6573Sopenharmony_ci self.parser().pos.get().column 407c67d6573Sopenharmony_ci } 408c67d6573Sopenharmony_ci 409c67d6573Sopenharmony_ci /// Return the next capturing index. Each subsequent call increments the 410c67d6573Sopenharmony_ci /// internal index. 411c67d6573Sopenharmony_ci /// 412c67d6573Sopenharmony_ci /// The span given should correspond to the location of the opening 413c67d6573Sopenharmony_ci /// parenthesis. 414c67d6573Sopenharmony_ci /// 415c67d6573Sopenharmony_ci /// If the capture limit is exceeded, then an error is returned. 416c67d6573Sopenharmony_ci fn next_capture_index(&self, span: Span) -> Result<u32> { 417c67d6573Sopenharmony_ci let current = self.parser().capture_index.get(); 418c67d6573Sopenharmony_ci let i = current.checked_add(1).ok_or_else(|| { 419c67d6573Sopenharmony_ci self.error(span, ast::ErrorKind::CaptureLimitExceeded) 420c67d6573Sopenharmony_ci })?; 421c67d6573Sopenharmony_ci self.parser().capture_index.set(i); 422c67d6573Sopenharmony_ci Ok(i) 423c67d6573Sopenharmony_ci } 424c67d6573Sopenharmony_ci 425c67d6573Sopenharmony_ci /// Adds the given capture name to this parser. If this capture name has 426c67d6573Sopenharmony_ci /// already been used, then an error is returned. 427c67d6573Sopenharmony_ci fn add_capture_name(&self, cap: &ast::CaptureName) -> Result<()> { 428c67d6573Sopenharmony_ci let mut names = self.parser().capture_names.borrow_mut(); 429c67d6573Sopenharmony_ci match names 430c67d6573Sopenharmony_ci .binary_search_by_key(&cap.name.as_str(), |c| c.name.as_str()) 431c67d6573Sopenharmony_ci { 432c67d6573Sopenharmony_ci Err(i) => { 433c67d6573Sopenharmony_ci names.insert(i, cap.clone()); 434c67d6573Sopenharmony_ci Ok(()) 435c67d6573Sopenharmony_ci } 436c67d6573Sopenharmony_ci Ok(i) => Err(self.error( 437c67d6573Sopenharmony_ci cap.span, 438c67d6573Sopenharmony_ci ast::ErrorKind::GroupNameDuplicate { original: names[i].span }, 439c67d6573Sopenharmony_ci )), 440c67d6573Sopenharmony_ci } 441c67d6573Sopenharmony_ci } 442c67d6573Sopenharmony_ci 443c67d6573Sopenharmony_ci /// Return whether the parser should ignore whitespace or not. 444c67d6573Sopenharmony_ci fn ignore_whitespace(&self) -> bool { 445c67d6573Sopenharmony_ci self.parser().ignore_whitespace.get() 446c67d6573Sopenharmony_ci } 447c67d6573Sopenharmony_ci 448c67d6573Sopenharmony_ci /// Return the character at the current position of the parser. 449c67d6573Sopenharmony_ci /// 450c67d6573Sopenharmony_ci /// This panics if the current position does not point to a valid char. 451c67d6573Sopenharmony_ci fn char(&self) -> char { 452c67d6573Sopenharmony_ci self.char_at(self.offset()) 453c67d6573Sopenharmony_ci } 454c67d6573Sopenharmony_ci 455c67d6573Sopenharmony_ci /// Return the character at the given position. 456c67d6573Sopenharmony_ci /// 457c67d6573Sopenharmony_ci /// This panics if the given position does not point to a valid char. 458c67d6573Sopenharmony_ci fn char_at(&self, i: usize) -> char { 459c67d6573Sopenharmony_ci self.pattern()[i..] 460c67d6573Sopenharmony_ci .chars() 461c67d6573Sopenharmony_ci .next() 462c67d6573Sopenharmony_ci .unwrap_or_else(|| panic!("expected char at offset {}", i)) 463c67d6573Sopenharmony_ci } 464c67d6573Sopenharmony_ci 465c67d6573Sopenharmony_ci /// Bump the parser to the next Unicode scalar value. 466c67d6573Sopenharmony_ci /// 467c67d6573Sopenharmony_ci /// If the end of the input has been reached, then `false` is returned. 468c67d6573Sopenharmony_ci fn bump(&self) -> bool { 469c67d6573Sopenharmony_ci if self.is_eof() { 470c67d6573Sopenharmony_ci return false; 471c67d6573Sopenharmony_ci } 472c67d6573Sopenharmony_ci let Position { mut offset, mut line, mut column } = self.pos(); 473c67d6573Sopenharmony_ci if self.char() == '\n' { 474c67d6573Sopenharmony_ci line = line.checked_add(1).unwrap(); 475c67d6573Sopenharmony_ci column = 1; 476c67d6573Sopenharmony_ci } else { 477c67d6573Sopenharmony_ci column = column.checked_add(1).unwrap(); 478c67d6573Sopenharmony_ci } 479c67d6573Sopenharmony_ci offset += self.char().len_utf8(); 480c67d6573Sopenharmony_ci self.parser().pos.set(Position { offset, line, column }); 481c67d6573Sopenharmony_ci self.pattern()[self.offset()..].chars().next().is_some() 482c67d6573Sopenharmony_ci } 483c67d6573Sopenharmony_ci 484c67d6573Sopenharmony_ci /// If the substring starting at the current position of the parser has 485c67d6573Sopenharmony_ci /// the given prefix, then bump the parser to the character immediately 486c67d6573Sopenharmony_ci /// following the prefix and return true. Otherwise, don't bump the parser 487c67d6573Sopenharmony_ci /// and return false. 488c67d6573Sopenharmony_ci fn bump_if(&self, prefix: &str) -> bool { 489c67d6573Sopenharmony_ci if self.pattern()[self.offset()..].starts_with(prefix) { 490c67d6573Sopenharmony_ci for _ in 0..prefix.chars().count() { 491c67d6573Sopenharmony_ci self.bump(); 492c67d6573Sopenharmony_ci } 493c67d6573Sopenharmony_ci true 494c67d6573Sopenharmony_ci } else { 495c67d6573Sopenharmony_ci false 496c67d6573Sopenharmony_ci } 497c67d6573Sopenharmony_ci } 498c67d6573Sopenharmony_ci 499c67d6573Sopenharmony_ci /// Returns true if and only if the parser is positioned at a look-around 500c67d6573Sopenharmony_ci /// prefix. The conditions under which this returns true must always 501c67d6573Sopenharmony_ci /// correspond to a regular expression that would otherwise be consider 502c67d6573Sopenharmony_ci /// invalid. 503c67d6573Sopenharmony_ci /// 504c67d6573Sopenharmony_ci /// This should only be called immediately after parsing the opening of 505c67d6573Sopenharmony_ci /// a group or a set of flags. 506c67d6573Sopenharmony_ci fn is_lookaround_prefix(&self) -> bool { 507c67d6573Sopenharmony_ci self.bump_if("?=") 508c67d6573Sopenharmony_ci || self.bump_if("?!") 509c67d6573Sopenharmony_ci || self.bump_if("?<=") 510c67d6573Sopenharmony_ci || self.bump_if("?<!") 511c67d6573Sopenharmony_ci } 512c67d6573Sopenharmony_ci 513c67d6573Sopenharmony_ci /// Bump the parser, and if the `x` flag is enabled, bump through any 514c67d6573Sopenharmony_ci /// subsequent spaces. Return true if and only if the parser is not at 515c67d6573Sopenharmony_ci /// EOF. 516c67d6573Sopenharmony_ci fn bump_and_bump_space(&self) -> bool { 517c67d6573Sopenharmony_ci if !self.bump() { 518c67d6573Sopenharmony_ci return false; 519c67d6573Sopenharmony_ci } 520c67d6573Sopenharmony_ci self.bump_space(); 521c67d6573Sopenharmony_ci !self.is_eof() 522c67d6573Sopenharmony_ci } 523c67d6573Sopenharmony_ci 524c67d6573Sopenharmony_ci /// If the `x` flag is enabled (i.e., whitespace insensitivity with 525c67d6573Sopenharmony_ci /// comments), then this will advance the parser through all whitespace 526c67d6573Sopenharmony_ci /// and comments to the next non-whitespace non-comment byte. 527c67d6573Sopenharmony_ci /// 528c67d6573Sopenharmony_ci /// If the `x` flag is disabled, then this is a no-op. 529c67d6573Sopenharmony_ci /// 530c67d6573Sopenharmony_ci /// This should be used selectively throughout the parser where 531c67d6573Sopenharmony_ci /// arbitrary whitespace is permitted when the `x` flag is enabled. For 532c67d6573Sopenharmony_ci /// example, `{ 5 , 6}` is equivalent to `{5,6}`. 533c67d6573Sopenharmony_ci fn bump_space(&self) { 534c67d6573Sopenharmony_ci if !self.ignore_whitespace() { 535c67d6573Sopenharmony_ci return; 536c67d6573Sopenharmony_ci } 537c67d6573Sopenharmony_ci while !self.is_eof() { 538c67d6573Sopenharmony_ci if self.char().is_whitespace() { 539c67d6573Sopenharmony_ci self.bump(); 540c67d6573Sopenharmony_ci } else if self.char() == '#' { 541c67d6573Sopenharmony_ci let start = self.pos(); 542c67d6573Sopenharmony_ci let mut comment_text = String::new(); 543c67d6573Sopenharmony_ci self.bump(); 544c67d6573Sopenharmony_ci while !self.is_eof() { 545c67d6573Sopenharmony_ci let c = self.char(); 546c67d6573Sopenharmony_ci self.bump(); 547c67d6573Sopenharmony_ci if c == '\n' { 548c67d6573Sopenharmony_ci break; 549c67d6573Sopenharmony_ci } 550c67d6573Sopenharmony_ci comment_text.push(c); 551c67d6573Sopenharmony_ci } 552c67d6573Sopenharmony_ci let comment = ast::Comment { 553c67d6573Sopenharmony_ci span: Span::new(start, self.pos()), 554c67d6573Sopenharmony_ci comment: comment_text, 555c67d6573Sopenharmony_ci }; 556c67d6573Sopenharmony_ci self.parser().comments.borrow_mut().push(comment); 557c67d6573Sopenharmony_ci } else { 558c67d6573Sopenharmony_ci break; 559c67d6573Sopenharmony_ci } 560c67d6573Sopenharmony_ci } 561c67d6573Sopenharmony_ci } 562c67d6573Sopenharmony_ci 563c67d6573Sopenharmony_ci /// Peek at the next character in the input without advancing the parser. 564c67d6573Sopenharmony_ci /// 565c67d6573Sopenharmony_ci /// If the input has been exhausted, then this returns `None`. 566c67d6573Sopenharmony_ci fn peek(&self) -> Option<char> { 567c67d6573Sopenharmony_ci if self.is_eof() { 568c67d6573Sopenharmony_ci return None; 569c67d6573Sopenharmony_ci } 570c67d6573Sopenharmony_ci self.pattern()[self.offset() + self.char().len_utf8()..].chars().next() 571c67d6573Sopenharmony_ci } 572c67d6573Sopenharmony_ci 573c67d6573Sopenharmony_ci /// Like peek, but will ignore spaces when the parser is in whitespace 574c67d6573Sopenharmony_ci /// insensitive mode. 575c67d6573Sopenharmony_ci fn peek_space(&self) -> Option<char> { 576c67d6573Sopenharmony_ci if !self.ignore_whitespace() { 577c67d6573Sopenharmony_ci return self.peek(); 578c67d6573Sopenharmony_ci } 579c67d6573Sopenharmony_ci if self.is_eof() { 580c67d6573Sopenharmony_ci return None; 581c67d6573Sopenharmony_ci } 582c67d6573Sopenharmony_ci let mut start = self.offset() + self.char().len_utf8(); 583c67d6573Sopenharmony_ci let mut in_comment = false; 584c67d6573Sopenharmony_ci for (i, c) in self.pattern()[start..].char_indices() { 585c67d6573Sopenharmony_ci if c.is_whitespace() { 586c67d6573Sopenharmony_ci continue; 587c67d6573Sopenharmony_ci } else if !in_comment && c == '#' { 588c67d6573Sopenharmony_ci in_comment = true; 589c67d6573Sopenharmony_ci } else if in_comment && c == '\n' { 590c67d6573Sopenharmony_ci in_comment = false; 591c67d6573Sopenharmony_ci } else { 592c67d6573Sopenharmony_ci start += i; 593c67d6573Sopenharmony_ci break; 594c67d6573Sopenharmony_ci } 595c67d6573Sopenharmony_ci } 596c67d6573Sopenharmony_ci self.pattern()[start..].chars().next() 597c67d6573Sopenharmony_ci } 598c67d6573Sopenharmony_ci 599c67d6573Sopenharmony_ci /// Returns true if the next call to `bump` would return false. 600c67d6573Sopenharmony_ci fn is_eof(&self) -> bool { 601c67d6573Sopenharmony_ci self.offset() == self.pattern().len() 602c67d6573Sopenharmony_ci } 603c67d6573Sopenharmony_ci 604c67d6573Sopenharmony_ci /// Return the current position of the parser, which includes the offset, 605c67d6573Sopenharmony_ci /// line and column. 606c67d6573Sopenharmony_ci fn pos(&self) -> Position { 607c67d6573Sopenharmony_ci self.parser().pos.get() 608c67d6573Sopenharmony_ci } 609c67d6573Sopenharmony_ci 610c67d6573Sopenharmony_ci /// Create a span at the current position of the parser. Both the start 611c67d6573Sopenharmony_ci /// and end of the span are set. 612c67d6573Sopenharmony_ci fn span(&self) -> Span { 613c67d6573Sopenharmony_ci Span::splat(self.pos()) 614c67d6573Sopenharmony_ci } 615c67d6573Sopenharmony_ci 616c67d6573Sopenharmony_ci /// Create a span that covers the current character. 617c67d6573Sopenharmony_ci fn span_char(&self) -> Span { 618c67d6573Sopenharmony_ci let mut next = Position { 619c67d6573Sopenharmony_ci offset: self.offset().checked_add(self.char().len_utf8()).unwrap(), 620c67d6573Sopenharmony_ci line: self.line(), 621c67d6573Sopenharmony_ci column: self.column().checked_add(1).unwrap(), 622c67d6573Sopenharmony_ci }; 623c67d6573Sopenharmony_ci if self.char() == '\n' { 624c67d6573Sopenharmony_ci next.line += 1; 625c67d6573Sopenharmony_ci next.column = 1; 626c67d6573Sopenharmony_ci } 627c67d6573Sopenharmony_ci Span::new(self.pos(), next) 628c67d6573Sopenharmony_ci } 629c67d6573Sopenharmony_ci 630c67d6573Sopenharmony_ci /// Parse and push a single alternation on to the parser's internal stack. 631c67d6573Sopenharmony_ci /// If the top of the stack already has an alternation, then add to that 632c67d6573Sopenharmony_ci /// instead of pushing a new one. 633c67d6573Sopenharmony_ci /// 634c67d6573Sopenharmony_ci /// The concatenation given corresponds to a single alternation branch. 635c67d6573Sopenharmony_ci /// The concatenation returned starts the next branch and is empty. 636c67d6573Sopenharmony_ci /// 637c67d6573Sopenharmony_ci /// This assumes the parser is currently positioned at `|` and will advance 638c67d6573Sopenharmony_ci /// the parser to the character following `|`. 639c67d6573Sopenharmony_ci #[inline(never)] 640c67d6573Sopenharmony_ci fn push_alternate(&self, mut concat: ast::Concat) -> Result<ast::Concat> { 641c67d6573Sopenharmony_ci assert_eq!(self.char(), '|'); 642c67d6573Sopenharmony_ci concat.span.end = self.pos(); 643c67d6573Sopenharmony_ci self.push_or_add_alternation(concat); 644c67d6573Sopenharmony_ci self.bump(); 645c67d6573Sopenharmony_ci Ok(ast::Concat { span: self.span(), asts: vec![] }) 646c67d6573Sopenharmony_ci } 647c67d6573Sopenharmony_ci 648c67d6573Sopenharmony_ci /// Pushes or adds the given branch of an alternation to the parser's 649c67d6573Sopenharmony_ci /// internal stack of state. 650c67d6573Sopenharmony_ci fn push_or_add_alternation(&self, concat: ast::Concat) { 651c67d6573Sopenharmony_ci use self::GroupState::*; 652c67d6573Sopenharmony_ci 653c67d6573Sopenharmony_ci let mut stack = self.parser().stack_group.borrow_mut(); 654c67d6573Sopenharmony_ci if let Some(&mut Alternation(ref mut alts)) = stack.last_mut() { 655c67d6573Sopenharmony_ci alts.asts.push(concat.into_ast()); 656c67d6573Sopenharmony_ci return; 657c67d6573Sopenharmony_ci } 658c67d6573Sopenharmony_ci stack.push(Alternation(ast::Alternation { 659c67d6573Sopenharmony_ci span: Span::new(concat.span.start, self.pos()), 660c67d6573Sopenharmony_ci asts: vec![concat.into_ast()], 661c67d6573Sopenharmony_ci })); 662c67d6573Sopenharmony_ci } 663c67d6573Sopenharmony_ci 664c67d6573Sopenharmony_ci /// Parse and push a group AST (and its parent concatenation) on to the 665c67d6573Sopenharmony_ci /// parser's internal stack. Return a fresh concatenation corresponding 666c67d6573Sopenharmony_ci /// to the group's sub-AST. 667c67d6573Sopenharmony_ci /// 668c67d6573Sopenharmony_ci /// If a set of flags was found (with no group), then the concatenation 669c67d6573Sopenharmony_ci /// is returned with that set of flags added. 670c67d6573Sopenharmony_ci /// 671c67d6573Sopenharmony_ci /// This assumes that the parser is currently positioned on the opening 672c67d6573Sopenharmony_ci /// parenthesis. It advances the parser to the character at the start 673c67d6573Sopenharmony_ci /// of the sub-expression (or adjoining expression). 674c67d6573Sopenharmony_ci /// 675c67d6573Sopenharmony_ci /// If there was a problem parsing the start of the group, then an error 676c67d6573Sopenharmony_ci /// is returned. 677c67d6573Sopenharmony_ci #[inline(never)] 678c67d6573Sopenharmony_ci fn push_group(&self, mut concat: ast::Concat) -> Result<ast::Concat> { 679c67d6573Sopenharmony_ci assert_eq!(self.char(), '('); 680c67d6573Sopenharmony_ci match self.parse_group()? { 681c67d6573Sopenharmony_ci Either::Left(set) => { 682c67d6573Sopenharmony_ci let ignore = set.flags.flag_state(ast::Flag::IgnoreWhitespace); 683c67d6573Sopenharmony_ci if let Some(v) = ignore { 684c67d6573Sopenharmony_ci self.parser().ignore_whitespace.set(v); 685c67d6573Sopenharmony_ci } 686c67d6573Sopenharmony_ci 687c67d6573Sopenharmony_ci concat.asts.push(Ast::Flags(set)); 688c67d6573Sopenharmony_ci Ok(concat) 689c67d6573Sopenharmony_ci } 690c67d6573Sopenharmony_ci Either::Right(group) => { 691c67d6573Sopenharmony_ci let old_ignore_whitespace = self.ignore_whitespace(); 692c67d6573Sopenharmony_ci let new_ignore_whitespace = group 693c67d6573Sopenharmony_ci .flags() 694c67d6573Sopenharmony_ci .and_then(|f| f.flag_state(ast::Flag::IgnoreWhitespace)) 695c67d6573Sopenharmony_ci .unwrap_or(old_ignore_whitespace); 696c67d6573Sopenharmony_ci self.parser().stack_group.borrow_mut().push( 697c67d6573Sopenharmony_ci GroupState::Group { 698c67d6573Sopenharmony_ci concat, 699c67d6573Sopenharmony_ci group, 700c67d6573Sopenharmony_ci ignore_whitespace: old_ignore_whitespace, 701c67d6573Sopenharmony_ci }, 702c67d6573Sopenharmony_ci ); 703c67d6573Sopenharmony_ci self.parser().ignore_whitespace.set(new_ignore_whitespace); 704c67d6573Sopenharmony_ci Ok(ast::Concat { span: self.span(), asts: vec![] }) 705c67d6573Sopenharmony_ci } 706c67d6573Sopenharmony_ci } 707c67d6573Sopenharmony_ci } 708c67d6573Sopenharmony_ci 709c67d6573Sopenharmony_ci /// Pop a group AST from the parser's internal stack and set the group's 710c67d6573Sopenharmony_ci /// AST to the given concatenation. Return the concatenation containing 711c67d6573Sopenharmony_ci /// the group. 712c67d6573Sopenharmony_ci /// 713c67d6573Sopenharmony_ci /// This assumes that the parser is currently positioned on the closing 714c67d6573Sopenharmony_ci /// parenthesis and advances the parser to the character following the `)`. 715c67d6573Sopenharmony_ci /// 716c67d6573Sopenharmony_ci /// If no such group could be popped, then an unopened group error is 717c67d6573Sopenharmony_ci /// returned. 718c67d6573Sopenharmony_ci #[inline(never)] 719c67d6573Sopenharmony_ci fn pop_group(&self, mut group_concat: ast::Concat) -> Result<ast::Concat> { 720c67d6573Sopenharmony_ci use self::GroupState::*; 721c67d6573Sopenharmony_ci 722c67d6573Sopenharmony_ci assert_eq!(self.char(), ')'); 723c67d6573Sopenharmony_ci let mut stack = self.parser().stack_group.borrow_mut(); 724c67d6573Sopenharmony_ci let (mut prior_concat, mut group, ignore_whitespace, alt) = match stack 725c67d6573Sopenharmony_ci .pop() 726c67d6573Sopenharmony_ci { 727c67d6573Sopenharmony_ci Some(Group { concat, group, ignore_whitespace }) => { 728c67d6573Sopenharmony_ci (concat, group, ignore_whitespace, None) 729c67d6573Sopenharmony_ci } 730c67d6573Sopenharmony_ci Some(Alternation(alt)) => match stack.pop() { 731c67d6573Sopenharmony_ci Some(Group { concat, group, ignore_whitespace }) => { 732c67d6573Sopenharmony_ci (concat, group, ignore_whitespace, Some(alt)) 733c67d6573Sopenharmony_ci } 734c67d6573Sopenharmony_ci None | Some(Alternation(_)) => { 735c67d6573Sopenharmony_ci return Err(self.error( 736c67d6573Sopenharmony_ci self.span_char(), 737c67d6573Sopenharmony_ci ast::ErrorKind::GroupUnopened, 738c67d6573Sopenharmony_ci )); 739c67d6573Sopenharmony_ci } 740c67d6573Sopenharmony_ci }, 741c67d6573Sopenharmony_ci None => { 742c67d6573Sopenharmony_ci return Err(self 743c67d6573Sopenharmony_ci .error(self.span_char(), ast::ErrorKind::GroupUnopened)); 744c67d6573Sopenharmony_ci } 745c67d6573Sopenharmony_ci }; 746c67d6573Sopenharmony_ci self.parser().ignore_whitespace.set(ignore_whitespace); 747c67d6573Sopenharmony_ci group_concat.span.end = self.pos(); 748c67d6573Sopenharmony_ci self.bump(); 749c67d6573Sopenharmony_ci group.span.end = self.pos(); 750c67d6573Sopenharmony_ci match alt { 751c67d6573Sopenharmony_ci Some(mut alt) => { 752c67d6573Sopenharmony_ci alt.span.end = group_concat.span.end; 753c67d6573Sopenharmony_ci alt.asts.push(group_concat.into_ast()); 754c67d6573Sopenharmony_ci group.ast = Box::new(alt.into_ast()); 755c67d6573Sopenharmony_ci } 756c67d6573Sopenharmony_ci None => { 757c67d6573Sopenharmony_ci group.ast = Box::new(group_concat.into_ast()); 758c67d6573Sopenharmony_ci } 759c67d6573Sopenharmony_ci } 760c67d6573Sopenharmony_ci prior_concat.asts.push(Ast::Group(group)); 761c67d6573Sopenharmony_ci Ok(prior_concat) 762c67d6573Sopenharmony_ci } 763c67d6573Sopenharmony_ci 764c67d6573Sopenharmony_ci /// Pop the last state from the parser's internal stack, if it exists, and 765c67d6573Sopenharmony_ci /// add the given concatenation to it. There either must be no state or a 766c67d6573Sopenharmony_ci /// single alternation item on the stack. Any other scenario produces an 767c67d6573Sopenharmony_ci /// error. 768c67d6573Sopenharmony_ci /// 769c67d6573Sopenharmony_ci /// This assumes that the parser has advanced to the end. 770c67d6573Sopenharmony_ci #[inline(never)] 771c67d6573Sopenharmony_ci fn pop_group_end(&self, mut concat: ast::Concat) -> Result<Ast> { 772c67d6573Sopenharmony_ci concat.span.end = self.pos(); 773c67d6573Sopenharmony_ci let mut stack = self.parser().stack_group.borrow_mut(); 774c67d6573Sopenharmony_ci let ast = match stack.pop() { 775c67d6573Sopenharmony_ci None => Ok(concat.into_ast()), 776c67d6573Sopenharmony_ci Some(GroupState::Alternation(mut alt)) => { 777c67d6573Sopenharmony_ci alt.span.end = self.pos(); 778c67d6573Sopenharmony_ci alt.asts.push(concat.into_ast()); 779c67d6573Sopenharmony_ci Ok(Ast::Alternation(alt)) 780c67d6573Sopenharmony_ci } 781c67d6573Sopenharmony_ci Some(GroupState::Group { group, .. }) => { 782c67d6573Sopenharmony_ci return Err( 783c67d6573Sopenharmony_ci self.error(group.span, ast::ErrorKind::GroupUnclosed) 784c67d6573Sopenharmony_ci ); 785c67d6573Sopenharmony_ci } 786c67d6573Sopenharmony_ci }; 787c67d6573Sopenharmony_ci // If we try to pop again, there should be nothing. 788c67d6573Sopenharmony_ci match stack.pop() { 789c67d6573Sopenharmony_ci None => ast, 790c67d6573Sopenharmony_ci Some(GroupState::Alternation(_)) => { 791c67d6573Sopenharmony_ci // This unreachable is unfortunate. This case can't happen 792c67d6573Sopenharmony_ci // because the only way we can be here is if there were two 793c67d6573Sopenharmony_ci // `GroupState::Alternation`s adjacent in the parser's stack, 794c67d6573Sopenharmony_ci // which we guarantee to never happen because we never push a 795c67d6573Sopenharmony_ci // `GroupState::Alternation` if one is already at the top of 796c67d6573Sopenharmony_ci // the stack. 797c67d6573Sopenharmony_ci unreachable!() 798c67d6573Sopenharmony_ci } 799c67d6573Sopenharmony_ci Some(GroupState::Group { group, .. }) => { 800c67d6573Sopenharmony_ci Err(self.error(group.span, ast::ErrorKind::GroupUnclosed)) 801c67d6573Sopenharmony_ci } 802c67d6573Sopenharmony_ci } 803c67d6573Sopenharmony_ci } 804c67d6573Sopenharmony_ci 805c67d6573Sopenharmony_ci /// Parse the opening of a character class and push the current class 806c67d6573Sopenharmony_ci /// parsing context onto the parser's stack. This assumes that the parser 807c67d6573Sopenharmony_ci /// is positioned at an opening `[`. The given union should correspond to 808c67d6573Sopenharmony_ci /// the union of set items built up before seeing the `[`. 809c67d6573Sopenharmony_ci /// 810c67d6573Sopenharmony_ci /// If there was a problem parsing the opening of the class, then an error 811c67d6573Sopenharmony_ci /// is returned. Otherwise, a new union of set items for the class is 812c67d6573Sopenharmony_ci /// returned (which may be populated with either a `]` or a `-`). 813c67d6573Sopenharmony_ci #[inline(never)] 814c67d6573Sopenharmony_ci fn push_class_open( 815c67d6573Sopenharmony_ci &self, 816c67d6573Sopenharmony_ci parent_union: ast::ClassSetUnion, 817c67d6573Sopenharmony_ci ) -> Result<ast::ClassSetUnion> { 818c67d6573Sopenharmony_ci assert_eq!(self.char(), '['); 819c67d6573Sopenharmony_ci 820c67d6573Sopenharmony_ci let (nested_set, nested_union) = self.parse_set_class_open()?; 821c67d6573Sopenharmony_ci self.parser() 822c67d6573Sopenharmony_ci .stack_class 823c67d6573Sopenharmony_ci .borrow_mut() 824c67d6573Sopenharmony_ci .push(ClassState::Open { union: parent_union, set: nested_set }); 825c67d6573Sopenharmony_ci Ok(nested_union) 826c67d6573Sopenharmony_ci } 827c67d6573Sopenharmony_ci 828c67d6573Sopenharmony_ci /// Parse the end of a character class set and pop the character class 829c67d6573Sopenharmony_ci /// parser stack. The union given corresponds to the last union built 830c67d6573Sopenharmony_ci /// before seeing the closing `]`. The union returned corresponds to the 831c67d6573Sopenharmony_ci /// parent character class set with the nested class added to it. 832c67d6573Sopenharmony_ci /// 833c67d6573Sopenharmony_ci /// This assumes that the parser is positioned at a `]` and will advance 834c67d6573Sopenharmony_ci /// the parser to the byte immediately following the `]`. 835c67d6573Sopenharmony_ci /// 836c67d6573Sopenharmony_ci /// If the stack is empty after popping, then this returns the final 837c67d6573Sopenharmony_ci /// "top-level" character class AST (where a "top-level" character class 838c67d6573Sopenharmony_ci /// is one that is not nested inside any other character class). 839c67d6573Sopenharmony_ci /// 840c67d6573Sopenharmony_ci /// If there is no corresponding opening bracket on the parser's stack, 841c67d6573Sopenharmony_ci /// then an error is returned. 842c67d6573Sopenharmony_ci #[inline(never)] 843c67d6573Sopenharmony_ci fn pop_class( 844c67d6573Sopenharmony_ci &self, 845c67d6573Sopenharmony_ci nested_union: ast::ClassSetUnion, 846c67d6573Sopenharmony_ci ) -> Result<Either<ast::ClassSetUnion, ast::Class>> { 847c67d6573Sopenharmony_ci assert_eq!(self.char(), ']'); 848c67d6573Sopenharmony_ci 849c67d6573Sopenharmony_ci let item = ast::ClassSet::Item(nested_union.into_item()); 850c67d6573Sopenharmony_ci let prevset = self.pop_class_op(item); 851c67d6573Sopenharmony_ci let mut stack = self.parser().stack_class.borrow_mut(); 852c67d6573Sopenharmony_ci match stack.pop() { 853c67d6573Sopenharmony_ci None => { 854c67d6573Sopenharmony_ci // We can never observe an empty stack: 855c67d6573Sopenharmony_ci // 856c67d6573Sopenharmony_ci // 1) We are guaranteed to start with a non-empty stack since 857c67d6573Sopenharmony_ci // the character class parser is only initiated when it sees 858c67d6573Sopenharmony_ci // a `[`. 859c67d6573Sopenharmony_ci // 2) If we ever observe an empty stack while popping after 860c67d6573Sopenharmony_ci // seeing a `]`, then we signal the character class parser 861c67d6573Sopenharmony_ci // to terminate. 862c67d6573Sopenharmony_ci panic!("unexpected empty character class stack") 863c67d6573Sopenharmony_ci } 864c67d6573Sopenharmony_ci Some(ClassState::Op { .. }) => { 865c67d6573Sopenharmony_ci // This panic is unfortunate, but this case is impossible 866c67d6573Sopenharmony_ci // since we already popped the Op state if one exists above. 867c67d6573Sopenharmony_ci // Namely, every push to the class parser stack is guarded by 868c67d6573Sopenharmony_ci // whether an existing Op is already on the top of the stack. 869c67d6573Sopenharmony_ci // If it is, the existing Op is modified. That is, the stack 870c67d6573Sopenharmony_ci // can never have consecutive Op states. 871c67d6573Sopenharmony_ci panic!("unexpected ClassState::Op") 872c67d6573Sopenharmony_ci } 873c67d6573Sopenharmony_ci Some(ClassState::Open { mut union, mut set }) => { 874c67d6573Sopenharmony_ci self.bump(); 875c67d6573Sopenharmony_ci set.span.end = self.pos(); 876c67d6573Sopenharmony_ci set.kind = prevset; 877c67d6573Sopenharmony_ci if stack.is_empty() { 878c67d6573Sopenharmony_ci Ok(Either::Right(ast::Class::Bracketed(set))) 879c67d6573Sopenharmony_ci } else { 880c67d6573Sopenharmony_ci union.push(ast::ClassSetItem::Bracketed(Box::new(set))); 881c67d6573Sopenharmony_ci Ok(Either::Left(union)) 882c67d6573Sopenharmony_ci } 883c67d6573Sopenharmony_ci } 884c67d6573Sopenharmony_ci } 885c67d6573Sopenharmony_ci } 886c67d6573Sopenharmony_ci 887c67d6573Sopenharmony_ci /// Return an "unclosed class" error whose span points to the most 888c67d6573Sopenharmony_ci /// recently opened class. 889c67d6573Sopenharmony_ci /// 890c67d6573Sopenharmony_ci /// This should only be called while parsing a character class. 891c67d6573Sopenharmony_ci #[inline(never)] 892c67d6573Sopenharmony_ci fn unclosed_class_error(&self) -> ast::Error { 893c67d6573Sopenharmony_ci for state in self.parser().stack_class.borrow().iter().rev() { 894c67d6573Sopenharmony_ci if let ClassState::Open { ref set, .. } = *state { 895c67d6573Sopenharmony_ci return self.error(set.span, ast::ErrorKind::ClassUnclosed); 896c67d6573Sopenharmony_ci } 897c67d6573Sopenharmony_ci } 898c67d6573Sopenharmony_ci // We are guaranteed to have a non-empty stack with at least 899c67d6573Sopenharmony_ci // one open bracket, so we should never get here. 900c67d6573Sopenharmony_ci panic!("no open character class found") 901c67d6573Sopenharmony_ci } 902c67d6573Sopenharmony_ci 903c67d6573Sopenharmony_ci /// Push the current set of class items on to the class parser's stack as 904c67d6573Sopenharmony_ci /// the left hand side of the given operator. 905c67d6573Sopenharmony_ci /// 906c67d6573Sopenharmony_ci /// A fresh set union is returned, which should be used to build the right 907c67d6573Sopenharmony_ci /// hand side of this operator. 908c67d6573Sopenharmony_ci #[inline(never)] 909c67d6573Sopenharmony_ci fn push_class_op( 910c67d6573Sopenharmony_ci &self, 911c67d6573Sopenharmony_ci next_kind: ast::ClassSetBinaryOpKind, 912c67d6573Sopenharmony_ci next_union: ast::ClassSetUnion, 913c67d6573Sopenharmony_ci ) -> ast::ClassSetUnion { 914c67d6573Sopenharmony_ci let item = ast::ClassSet::Item(next_union.into_item()); 915c67d6573Sopenharmony_ci let new_lhs = self.pop_class_op(item); 916c67d6573Sopenharmony_ci self.parser() 917c67d6573Sopenharmony_ci .stack_class 918c67d6573Sopenharmony_ci .borrow_mut() 919c67d6573Sopenharmony_ci .push(ClassState::Op { kind: next_kind, lhs: new_lhs }); 920c67d6573Sopenharmony_ci ast::ClassSetUnion { span: self.span(), items: vec![] } 921c67d6573Sopenharmony_ci } 922c67d6573Sopenharmony_ci 923c67d6573Sopenharmony_ci /// Pop a character class set from the character class parser stack. If the 924c67d6573Sopenharmony_ci /// top of the stack is just an item (not an operation), then return the 925c67d6573Sopenharmony_ci /// given set unchanged. If the top of the stack is an operation, then the 926c67d6573Sopenharmony_ci /// given set will be used as the rhs of the operation on the top of the 927c67d6573Sopenharmony_ci /// stack. In that case, the binary operation is returned as a set. 928c67d6573Sopenharmony_ci #[inline(never)] 929c67d6573Sopenharmony_ci fn pop_class_op(&self, rhs: ast::ClassSet) -> ast::ClassSet { 930c67d6573Sopenharmony_ci let mut stack = self.parser().stack_class.borrow_mut(); 931c67d6573Sopenharmony_ci let (kind, lhs) = match stack.pop() { 932c67d6573Sopenharmony_ci Some(ClassState::Op { kind, lhs }) => (kind, lhs), 933c67d6573Sopenharmony_ci Some(state @ ClassState::Open { .. }) => { 934c67d6573Sopenharmony_ci stack.push(state); 935c67d6573Sopenharmony_ci return rhs; 936c67d6573Sopenharmony_ci } 937c67d6573Sopenharmony_ci None => unreachable!(), 938c67d6573Sopenharmony_ci }; 939c67d6573Sopenharmony_ci let span = Span::new(lhs.span().start, rhs.span().end); 940c67d6573Sopenharmony_ci ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp { 941c67d6573Sopenharmony_ci span, 942c67d6573Sopenharmony_ci kind, 943c67d6573Sopenharmony_ci lhs: Box::new(lhs), 944c67d6573Sopenharmony_ci rhs: Box::new(rhs), 945c67d6573Sopenharmony_ci }) 946c67d6573Sopenharmony_ci } 947c67d6573Sopenharmony_ci} 948c67d6573Sopenharmony_ci 949c67d6573Sopenharmony_ciimpl<'s, P: Borrow<Parser>> ParserI<'s, P> { 950c67d6573Sopenharmony_ci /// Parse the regular expression into an abstract syntax tree. 951c67d6573Sopenharmony_ci fn parse(&self) -> Result<Ast> { 952c67d6573Sopenharmony_ci self.parse_with_comments().map(|astc| astc.ast) 953c67d6573Sopenharmony_ci } 954c67d6573Sopenharmony_ci 955c67d6573Sopenharmony_ci /// Parse the regular expression and return an abstract syntax tree with 956c67d6573Sopenharmony_ci /// all of the comments found in the pattern. 957c67d6573Sopenharmony_ci fn parse_with_comments(&self) -> Result<ast::WithComments> { 958c67d6573Sopenharmony_ci assert_eq!(self.offset(), 0, "parser can only be used once"); 959c67d6573Sopenharmony_ci self.parser().reset(); 960c67d6573Sopenharmony_ci let mut concat = ast::Concat { span: self.span(), asts: vec![] }; 961c67d6573Sopenharmony_ci loop { 962c67d6573Sopenharmony_ci self.bump_space(); 963c67d6573Sopenharmony_ci if self.is_eof() { 964c67d6573Sopenharmony_ci break; 965c67d6573Sopenharmony_ci } 966c67d6573Sopenharmony_ci match self.char() { 967c67d6573Sopenharmony_ci '(' => concat = self.push_group(concat)?, 968c67d6573Sopenharmony_ci ')' => concat = self.pop_group(concat)?, 969c67d6573Sopenharmony_ci '|' => concat = self.push_alternate(concat)?, 970c67d6573Sopenharmony_ci '[' => { 971c67d6573Sopenharmony_ci let class = self.parse_set_class()?; 972c67d6573Sopenharmony_ci concat.asts.push(Ast::Class(class)); 973c67d6573Sopenharmony_ci } 974c67d6573Sopenharmony_ci '?' => { 975c67d6573Sopenharmony_ci concat = self.parse_uncounted_repetition( 976c67d6573Sopenharmony_ci concat, 977c67d6573Sopenharmony_ci ast::RepetitionKind::ZeroOrOne, 978c67d6573Sopenharmony_ci )?; 979c67d6573Sopenharmony_ci } 980c67d6573Sopenharmony_ci '*' => { 981c67d6573Sopenharmony_ci concat = self.parse_uncounted_repetition( 982c67d6573Sopenharmony_ci concat, 983c67d6573Sopenharmony_ci ast::RepetitionKind::ZeroOrMore, 984c67d6573Sopenharmony_ci )?; 985c67d6573Sopenharmony_ci } 986c67d6573Sopenharmony_ci '+' => { 987c67d6573Sopenharmony_ci concat = self.parse_uncounted_repetition( 988c67d6573Sopenharmony_ci concat, 989c67d6573Sopenharmony_ci ast::RepetitionKind::OneOrMore, 990c67d6573Sopenharmony_ci )?; 991c67d6573Sopenharmony_ci } 992c67d6573Sopenharmony_ci '{' => { 993c67d6573Sopenharmony_ci concat = self.parse_counted_repetition(concat)?; 994c67d6573Sopenharmony_ci } 995c67d6573Sopenharmony_ci _ => concat.asts.push(self.parse_primitive()?.into_ast()), 996c67d6573Sopenharmony_ci } 997c67d6573Sopenharmony_ci } 998c67d6573Sopenharmony_ci let ast = self.pop_group_end(concat)?; 999c67d6573Sopenharmony_ci NestLimiter::new(self).check(&ast)?; 1000c67d6573Sopenharmony_ci Ok(ast::WithComments { 1001c67d6573Sopenharmony_ci ast, 1002c67d6573Sopenharmony_ci comments: mem::replace( 1003c67d6573Sopenharmony_ci &mut *self.parser().comments.borrow_mut(), 1004c67d6573Sopenharmony_ci vec![], 1005c67d6573Sopenharmony_ci ), 1006c67d6573Sopenharmony_ci }) 1007c67d6573Sopenharmony_ci } 1008c67d6573Sopenharmony_ci 1009c67d6573Sopenharmony_ci /// Parses an uncounted repetition operation. An uncounted repetition 1010c67d6573Sopenharmony_ci /// operator includes ?, * and +, but does not include the {m,n} syntax. 1011c67d6573Sopenharmony_ci /// The given `kind` should correspond to the operator observed by the 1012c67d6573Sopenharmony_ci /// caller. 1013c67d6573Sopenharmony_ci /// 1014c67d6573Sopenharmony_ci /// This assumes that the parser is currently positioned at the repetition 1015c67d6573Sopenharmony_ci /// operator and advances the parser to the first character after the 1016c67d6573Sopenharmony_ci /// operator. (Note that the operator may include a single additional `?`, 1017c67d6573Sopenharmony_ci /// which makes the operator ungreedy.) 1018c67d6573Sopenharmony_ci /// 1019c67d6573Sopenharmony_ci /// The caller should include the concatenation that is being built. The 1020c67d6573Sopenharmony_ci /// concatenation returned includes the repetition operator applied to the 1021c67d6573Sopenharmony_ci /// last expression in the given concatenation. 1022c67d6573Sopenharmony_ci #[inline(never)] 1023c67d6573Sopenharmony_ci fn parse_uncounted_repetition( 1024c67d6573Sopenharmony_ci &self, 1025c67d6573Sopenharmony_ci mut concat: ast::Concat, 1026c67d6573Sopenharmony_ci kind: ast::RepetitionKind, 1027c67d6573Sopenharmony_ci ) -> Result<ast::Concat> { 1028c67d6573Sopenharmony_ci assert!( 1029c67d6573Sopenharmony_ci self.char() == '?' || self.char() == '*' || self.char() == '+' 1030c67d6573Sopenharmony_ci ); 1031c67d6573Sopenharmony_ci let op_start = self.pos(); 1032c67d6573Sopenharmony_ci let ast = match concat.asts.pop() { 1033c67d6573Sopenharmony_ci Some(ast) => ast, 1034c67d6573Sopenharmony_ci None => { 1035c67d6573Sopenharmony_ci return Err( 1036c67d6573Sopenharmony_ci self.error(self.span(), ast::ErrorKind::RepetitionMissing) 1037c67d6573Sopenharmony_ci ) 1038c67d6573Sopenharmony_ci } 1039c67d6573Sopenharmony_ci }; 1040c67d6573Sopenharmony_ci match ast { 1041c67d6573Sopenharmony_ci Ast::Empty(_) | Ast::Flags(_) => { 1042c67d6573Sopenharmony_ci return Err( 1043c67d6573Sopenharmony_ci self.error(self.span(), ast::ErrorKind::RepetitionMissing) 1044c67d6573Sopenharmony_ci ) 1045c67d6573Sopenharmony_ci } 1046c67d6573Sopenharmony_ci _ => {} 1047c67d6573Sopenharmony_ci } 1048c67d6573Sopenharmony_ci let mut greedy = true; 1049c67d6573Sopenharmony_ci if self.bump() && self.char() == '?' { 1050c67d6573Sopenharmony_ci greedy = false; 1051c67d6573Sopenharmony_ci self.bump(); 1052c67d6573Sopenharmony_ci } 1053c67d6573Sopenharmony_ci concat.asts.push(Ast::Repetition(ast::Repetition { 1054c67d6573Sopenharmony_ci span: ast.span().with_end(self.pos()), 1055c67d6573Sopenharmony_ci op: ast::RepetitionOp { 1056c67d6573Sopenharmony_ci span: Span::new(op_start, self.pos()), 1057c67d6573Sopenharmony_ci kind, 1058c67d6573Sopenharmony_ci }, 1059c67d6573Sopenharmony_ci greedy, 1060c67d6573Sopenharmony_ci ast: Box::new(ast), 1061c67d6573Sopenharmony_ci })); 1062c67d6573Sopenharmony_ci Ok(concat) 1063c67d6573Sopenharmony_ci } 1064c67d6573Sopenharmony_ci 1065c67d6573Sopenharmony_ci /// Parses a counted repetition operation. A counted repetition operator 1066c67d6573Sopenharmony_ci /// corresponds to the {m,n} syntax, and does not include the ?, * or + 1067c67d6573Sopenharmony_ci /// operators. 1068c67d6573Sopenharmony_ci /// 1069c67d6573Sopenharmony_ci /// This assumes that the parser is currently positioned at the opening `{` 1070c67d6573Sopenharmony_ci /// and advances the parser to the first character after the operator. 1071c67d6573Sopenharmony_ci /// (Note that the operator may include a single additional `?`, which 1072c67d6573Sopenharmony_ci /// makes the operator ungreedy.) 1073c67d6573Sopenharmony_ci /// 1074c67d6573Sopenharmony_ci /// The caller should include the concatenation that is being built. The 1075c67d6573Sopenharmony_ci /// concatenation returned includes the repetition operator applied to the 1076c67d6573Sopenharmony_ci /// last expression in the given concatenation. 1077c67d6573Sopenharmony_ci #[inline(never)] 1078c67d6573Sopenharmony_ci fn parse_counted_repetition( 1079c67d6573Sopenharmony_ci &self, 1080c67d6573Sopenharmony_ci mut concat: ast::Concat, 1081c67d6573Sopenharmony_ci ) -> Result<ast::Concat> { 1082c67d6573Sopenharmony_ci assert!(self.char() == '{'); 1083c67d6573Sopenharmony_ci let start = self.pos(); 1084c67d6573Sopenharmony_ci let ast = match concat.asts.pop() { 1085c67d6573Sopenharmony_ci Some(ast) => ast, 1086c67d6573Sopenharmony_ci None => { 1087c67d6573Sopenharmony_ci return Err( 1088c67d6573Sopenharmony_ci self.error(self.span(), ast::ErrorKind::RepetitionMissing) 1089c67d6573Sopenharmony_ci ) 1090c67d6573Sopenharmony_ci } 1091c67d6573Sopenharmony_ci }; 1092c67d6573Sopenharmony_ci match ast { 1093c67d6573Sopenharmony_ci Ast::Empty(_) | Ast::Flags(_) => { 1094c67d6573Sopenharmony_ci return Err( 1095c67d6573Sopenharmony_ci self.error(self.span(), ast::ErrorKind::RepetitionMissing) 1096c67d6573Sopenharmony_ci ) 1097c67d6573Sopenharmony_ci } 1098c67d6573Sopenharmony_ci _ => {} 1099c67d6573Sopenharmony_ci } 1100c67d6573Sopenharmony_ci if !self.bump_and_bump_space() { 1101c67d6573Sopenharmony_ci return Err(self.error( 1102c67d6573Sopenharmony_ci Span::new(start, self.pos()), 1103c67d6573Sopenharmony_ci ast::ErrorKind::RepetitionCountUnclosed, 1104c67d6573Sopenharmony_ci )); 1105c67d6573Sopenharmony_ci } 1106c67d6573Sopenharmony_ci let count_start = specialize_err( 1107c67d6573Sopenharmony_ci self.parse_decimal(), 1108c67d6573Sopenharmony_ci ast::ErrorKind::DecimalEmpty, 1109c67d6573Sopenharmony_ci ast::ErrorKind::RepetitionCountDecimalEmpty, 1110c67d6573Sopenharmony_ci )?; 1111c67d6573Sopenharmony_ci let mut range = ast::RepetitionRange::Exactly(count_start); 1112c67d6573Sopenharmony_ci if self.is_eof() { 1113c67d6573Sopenharmony_ci return Err(self.error( 1114c67d6573Sopenharmony_ci Span::new(start, self.pos()), 1115c67d6573Sopenharmony_ci ast::ErrorKind::RepetitionCountUnclosed, 1116c67d6573Sopenharmony_ci )); 1117c67d6573Sopenharmony_ci } 1118c67d6573Sopenharmony_ci if self.char() == ',' { 1119c67d6573Sopenharmony_ci if !self.bump_and_bump_space() { 1120c67d6573Sopenharmony_ci return Err(self.error( 1121c67d6573Sopenharmony_ci Span::new(start, self.pos()), 1122c67d6573Sopenharmony_ci ast::ErrorKind::RepetitionCountUnclosed, 1123c67d6573Sopenharmony_ci )); 1124c67d6573Sopenharmony_ci } 1125c67d6573Sopenharmony_ci if self.char() != '}' { 1126c67d6573Sopenharmony_ci let count_end = specialize_err( 1127c67d6573Sopenharmony_ci self.parse_decimal(), 1128c67d6573Sopenharmony_ci ast::ErrorKind::DecimalEmpty, 1129c67d6573Sopenharmony_ci ast::ErrorKind::RepetitionCountDecimalEmpty, 1130c67d6573Sopenharmony_ci )?; 1131c67d6573Sopenharmony_ci range = ast::RepetitionRange::Bounded(count_start, count_end); 1132c67d6573Sopenharmony_ci } else { 1133c67d6573Sopenharmony_ci range = ast::RepetitionRange::AtLeast(count_start); 1134c67d6573Sopenharmony_ci } 1135c67d6573Sopenharmony_ci } 1136c67d6573Sopenharmony_ci if self.is_eof() || self.char() != '}' { 1137c67d6573Sopenharmony_ci return Err(self.error( 1138c67d6573Sopenharmony_ci Span::new(start, self.pos()), 1139c67d6573Sopenharmony_ci ast::ErrorKind::RepetitionCountUnclosed, 1140c67d6573Sopenharmony_ci )); 1141c67d6573Sopenharmony_ci } 1142c67d6573Sopenharmony_ci 1143c67d6573Sopenharmony_ci let mut greedy = true; 1144c67d6573Sopenharmony_ci if self.bump_and_bump_space() && self.char() == '?' { 1145c67d6573Sopenharmony_ci greedy = false; 1146c67d6573Sopenharmony_ci self.bump(); 1147c67d6573Sopenharmony_ci } 1148c67d6573Sopenharmony_ci 1149c67d6573Sopenharmony_ci let op_span = Span::new(start, self.pos()); 1150c67d6573Sopenharmony_ci if !range.is_valid() { 1151c67d6573Sopenharmony_ci return Err( 1152c67d6573Sopenharmony_ci self.error(op_span, ast::ErrorKind::RepetitionCountInvalid) 1153c67d6573Sopenharmony_ci ); 1154c67d6573Sopenharmony_ci } 1155c67d6573Sopenharmony_ci concat.asts.push(Ast::Repetition(ast::Repetition { 1156c67d6573Sopenharmony_ci span: ast.span().with_end(self.pos()), 1157c67d6573Sopenharmony_ci op: ast::RepetitionOp { 1158c67d6573Sopenharmony_ci span: op_span, 1159c67d6573Sopenharmony_ci kind: ast::RepetitionKind::Range(range), 1160c67d6573Sopenharmony_ci }, 1161c67d6573Sopenharmony_ci greedy, 1162c67d6573Sopenharmony_ci ast: Box::new(ast), 1163c67d6573Sopenharmony_ci })); 1164c67d6573Sopenharmony_ci Ok(concat) 1165c67d6573Sopenharmony_ci } 1166c67d6573Sopenharmony_ci 1167c67d6573Sopenharmony_ci /// Parse a group (which contains a sub-expression) or a set of flags. 1168c67d6573Sopenharmony_ci /// 1169c67d6573Sopenharmony_ci /// If a group was found, then it is returned with an empty AST. If a set 1170c67d6573Sopenharmony_ci /// of flags is found, then that set is returned. 1171c67d6573Sopenharmony_ci /// 1172c67d6573Sopenharmony_ci /// The parser should be positioned at the opening parenthesis. 1173c67d6573Sopenharmony_ci /// 1174c67d6573Sopenharmony_ci /// This advances the parser to the character before the start of the 1175c67d6573Sopenharmony_ci /// sub-expression (in the case of a group) or to the closing parenthesis 1176c67d6573Sopenharmony_ci /// immediately following the set of flags. 1177c67d6573Sopenharmony_ci /// 1178c67d6573Sopenharmony_ci /// # Errors 1179c67d6573Sopenharmony_ci /// 1180c67d6573Sopenharmony_ci /// If flags are given and incorrectly specified, then a corresponding 1181c67d6573Sopenharmony_ci /// error is returned. 1182c67d6573Sopenharmony_ci /// 1183c67d6573Sopenharmony_ci /// If a capture name is given and it is incorrectly specified, then a 1184c67d6573Sopenharmony_ci /// corresponding error is returned. 1185c67d6573Sopenharmony_ci #[inline(never)] 1186c67d6573Sopenharmony_ci fn parse_group(&self) -> Result<Either<ast::SetFlags, ast::Group>> { 1187c67d6573Sopenharmony_ci assert_eq!(self.char(), '('); 1188c67d6573Sopenharmony_ci let open_span = self.span_char(); 1189c67d6573Sopenharmony_ci self.bump(); 1190c67d6573Sopenharmony_ci self.bump_space(); 1191c67d6573Sopenharmony_ci if self.is_lookaround_prefix() { 1192c67d6573Sopenharmony_ci return Err(self.error( 1193c67d6573Sopenharmony_ci Span::new(open_span.start, self.span().end), 1194c67d6573Sopenharmony_ci ast::ErrorKind::UnsupportedLookAround, 1195c67d6573Sopenharmony_ci )); 1196c67d6573Sopenharmony_ci } 1197c67d6573Sopenharmony_ci let inner_span = self.span(); 1198c67d6573Sopenharmony_ci if self.bump_if("?P<") { 1199c67d6573Sopenharmony_ci let capture_index = self.next_capture_index(open_span)?; 1200c67d6573Sopenharmony_ci let cap = self.parse_capture_name(capture_index)?; 1201c67d6573Sopenharmony_ci Ok(Either::Right(ast::Group { 1202c67d6573Sopenharmony_ci span: open_span, 1203c67d6573Sopenharmony_ci kind: ast::GroupKind::CaptureName(cap), 1204c67d6573Sopenharmony_ci ast: Box::new(Ast::Empty(self.span())), 1205c67d6573Sopenharmony_ci })) 1206c67d6573Sopenharmony_ci } else if self.bump_if("?") { 1207c67d6573Sopenharmony_ci if self.is_eof() { 1208c67d6573Sopenharmony_ci return Err( 1209c67d6573Sopenharmony_ci self.error(open_span, ast::ErrorKind::GroupUnclosed) 1210c67d6573Sopenharmony_ci ); 1211c67d6573Sopenharmony_ci } 1212c67d6573Sopenharmony_ci let flags = self.parse_flags()?; 1213c67d6573Sopenharmony_ci let char_end = self.char(); 1214c67d6573Sopenharmony_ci self.bump(); 1215c67d6573Sopenharmony_ci if char_end == ')' { 1216c67d6573Sopenharmony_ci // We don't allow empty flags, e.g., `(?)`. We instead 1217c67d6573Sopenharmony_ci // interpret it as a repetition operator missing its argument. 1218c67d6573Sopenharmony_ci if flags.items.is_empty() { 1219c67d6573Sopenharmony_ci return Err(self.error( 1220c67d6573Sopenharmony_ci inner_span, 1221c67d6573Sopenharmony_ci ast::ErrorKind::RepetitionMissing, 1222c67d6573Sopenharmony_ci )); 1223c67d6573Sopenharmony_ci } 1224c67d6573Sopenharmony_ci Ok(Either::Left(ast::SetFlags { 1225c67d6573Sopenharmony_ci span: Span { end: self.pos(), ..open_span }, 1226c67d6573Sopenharmony_ci flags, 1227c67d6573Sopenharmony_ci })) 1228c67d6573Sopenharmony_ci } else { 1229c67d6573Sopenharmony_ci assert_eq!(char_end, ':'); 1230c67d6573Sopenharmony_ci Ok(Either::Right(ast::Group { 1231c67d6573Sopenharmony_ci span: open_span, 1232c67d6573Sopenharmony_ci kind: ast::GroupKind::NonCapturing(flags), 1233c67d6573Sopenharmony_ci ast: Box::new(Ast::Empty(self.span())), 1234c67d6573Sopenharmony_ci })) 1235c67d6573Sopenharmony_ci } 1236c67d6573Sopenharmony_ci } else { 1237c67d6573Sopenharmony_ci let capture_index = self.next_capture_index(open_span)?; 1238c67d6573Sopenharmony_ci Ok(Either::Right(ast::Group { 1239c67d6573Sopenharmony_ci span: open_span, 1240c67d6573Sopenharmony_ci kind: ast::GroupKind::CaptureIndex(capture_index), 1241c67d6573Sopenharmony_ci ast: Box::new(Ast::Empty(self.span())), 1242c67d6573Sopenharmony_ci })) 1243c67d6573Sopenharmony_ci } 1244c67d6573Sopenharmony_ci } 1245c67d6573Sopenharmony_ci 1246c67d6573Sopenharmony_ci /// Parses a capture group name. Assumes that the parser is positioned at 1247c67d6573Sopenharmony_ci /// the first character in the name following the opening `<` (and may 1248c67d6573Sopenharmony_ci /// possibly be EOF). This advances the parser to the first character 1249c67d6573Sopenharmony_ci /// following the closing `>`. 1250c67d6573Sopenharmony_ci /// 1251c67d6573Sopenharmony_ci /// The caller must provide the capture index of the group for this name. 1252c67d6573Sopenharmony_ci #[inline(never)] 1253c67d6573Sopenharmony_ci fn parse_capture_name( 1254c67d6573Sopenharmony_ci &self, 1255c67d6573Sopenharmony_ci capture_index: u32, 1256c67d6573Sopenharmony_ci ) -> Result<ast::CaptureName> { 1257c67d6573Sopenharmony_ci if self.is_eof() { 1258c67d6573Sopenharmony_ci return Err(self 1259c67d6573Sopenharmony_ci .error(self.span(), ast::ErrorKind::GroupNameUnexpectedEof)); 1260c67d6573Sopenharmony_ci } 1261c67d6573Sopenharmony_ci let start = self.pos(); 1262c67d6573Sopenharmony_ci loop { 1263c67d6573Sopenharmony_ci if self.char() == '>' { 1264c67d6573Sopenharmony_ci break; 1265c67d6573Sopenharmony_ci } 1266c67d6573Sopenharmony_ci if !is_capture_char(self.char(), self.pos() == start) { 1267c67d6573Sopenharmony_ci return Err(self.error( 1268c67d6573Sopenharmony_ci self.span_char(), 1269c67d6573Sopenharmony_ci ast::ErrorKind::GroupNameInvalid, 1270c67d6573Sopenharmony_ci )); 1271c67d6573Sopenharmony_ci } 1272c67d6573Sopenharmony_ci if !self.bump() { 1273c67d6573Sopenharmony_ci break; 1274c67d6573Sopenharmony_ci } 1275c67d6573Sopenharmony_ci } 1276c67d6573Sopenharmony_ci let end = self.pos(); 1277c67d6573Sopenharmony_ci if self.is_eof() { 1278c67d6573Sopenharmony_ci return Err(self 1279c67d6573Sopenharmony_ci .error(self.span(), ast::ErrorKind::GroupNameUnexpectedEof)); 1280c67d6573Sopenharmony_ci } 1281c67d6573Sopenharmony_ci assert_eq!(self.char(), '>'); 1282c67d6573Sopenharmony_ci self.bump(); 1283c67d6573Sopenharmony_ci let name = &self.pattern()[start.offset..end.offset]; 1284c67d6573Sopenharmony_ci if name.is_empty() { 1285c67d6573Sopenharmony_ci return Err(self.error( 1286c67d6573Sopenharmony_ci Span::new(start, start), 1287c67d6573Sopenharmony_ci ast::ErrorKind::GroupNameEmpty, 1288c67d6573Sopenharmony_ci )); 1289c67d6573Sopenharmony_ci } 1290c67d6573Sopenharmony_ci let capname = ast::CaptureName { 1291c67d6573Sopenharmony_ci span: Span::new(start, end), 1292c67d6573Sopenharmony_ci name: name.to_string(), 1293c67d6573Sopenharmony_ci index: capture_index, 1294c67d6573Sopenharmony_ci }; 1295c67d6573Sopenharmony_ci self.add_capture_name(&capname)?; 1296c67d6573Sopenharmony_ci Ok(capname) 1297c67d6573Sopenharmony_ci } 1298c67d6573Sopenharmony_ci 1299c67d6573Sopenharmony_ci /// Parse a sequence of flags starting at the current character. 1300c67d6573Sopenharmony_ci /// 1301c67d6573Sopenharmony_ci /// This advances the parser to the character immediately following the 1302c67d6573Sopenharmony_ci /// flags, which is guaranteed to be either `:` or `)`. 1303c67d6573Sopenharmony_ci /// 1304c67d6573Sopenharmony_ci /// # Errors 1305c67d6573Sopenharmony_ci /// 1306c67d6573Sopenharmony_ci /// If any flags are duplicated, then an error is returned. 1307c67d6573Sopenharmony_ci /// 1308c67d6573Sopenharmony_ci /// If the negation operator is used more than once, then an error is 1309c67d6573Sopenharmony_ci /// returned. 1310c67d6573Sopenharmony_ci /// 1311c67d6573Sopenharmony_ci /// If no flags could be found or if the negation operation is not followed 1312c67d6573Sopenharmony_ci /// by any flags, then an error is returned. 1313c67d6573Sopenharmony_ci #[inline(never)] 1314c67d6573Sopenharmony_ci fn parse_flags(&self) -> Result<ast::Flags> { 1315c67d6573Sopenharmony_ci let mut flags = ast::Flags { span: self.span(), items: vec![] }; 1316c67d6573Sopenharmony_ci let mut last_was_negation = None; 1317c67d6573Sopenharmony_ci while self.char() != ':' && self.char() != ')' { 1318c67d6573Sopenharmony_ci if self.char() == '-' { 1319c67d6573Sopenharmony_ci last_was_negation = Some(self.span_char()); 1320c67d6573Sopenharmony_ci let item = ast::FlagsItem { 1321c67d6573Sopenharmony_ci span: self.span_char(), 1322c67d6573Sopenharmony_ci kind: ast::FlagsItemKind::Negation, 1323c67d6573Sopenharmony_ci }; 1324c67d6573Sopenharmony_ci if let Some(i) = flags.add_item(item) { 1325c67d6573Sopenharmony_ci return Err(self.error( 1326c67d6573Sopenharmony_ci self.span_char(), 1327c67d6573Sopenharmony_ci ast::ErrorKind::FlagRepeatedNegation { 1328c67d6573Sopenharmony_ci original: flags.items[i].span, 1329c67d6573Sopenharmony_ci }, 1330c67d6573Sopenharmony_ci )); 1331c67d6573Sopenharmony_ci } 1332c67d6573Sopenharmony_ci } else { 1333c67d6573Sopenharmony_ci last_was_negation = None; 1334c67d6573Sopenharmony_ci let item = ast::FlagsItem { 1335c67d6573Sopenharmony_ci span: self.span_char(), 1336c67d6573Sopenharmony_ci kind: ast::FlagsItemKind::Flag(self.parse_flag()?), 1337c67d6573Sopenharmony_ci }; 1338c67d6573Sopenharmony_ci if let Some(i) = flags.add_item(item) { 1339c67d6573Sopenharmony_ci return Err(self.error( 1340c67d6573Sopenharmony_ci self.span_char(), 1341c67d6573Sopenharmony_ci ast::ErrorKind::FlagDuplicate { 1342c67d6573Sopenharmony_ci original: flags.items[i].span, 1343c67d6573Sopenharmony_ci }, 1344c67d6573Sopenharmony_ci )); 1345c67d6573Sopenharmony_ci } 1346c67d6573Sopenharmony_ci } 1347c67d6573Sopenharmony_ci if !self.bump() { 1348c67d6573Sopenharmony_ci return Err( 1349c67d6573Sopenharmony_ci self.error(self.span(), ast::ErrorKind::FlagUnexpectedEof) 1350c67d6573Sopenharmony_ci ); 1351c67d6573Sopenharmony_ci } 1352c67d6573Sopenharmony_ci } 1353c67d6573Sopenharmony_ci if let Some(span) = last_was_negation { 1354c67d6573Sopenharmony_ci return Err(self.error(span, ast::ErrorKind::FlagDanglingNegation)); 1355c67d6573Sopenharmony_ci } 1356c67d6573Sopenharmony_ci flags.span.end = self.pos(); 1357c67d6573Sopenharmony_ci Ok(flags) 1358c67d6573Sopenharmony_ci } 1359c67d6573Sopenharmony_ci 1360c67d6573Sopenharmony_ci /// Parse the current character as a flag. Do not advance the parser. 1361c67d6573Sopenharmony_ci /// 1362c67d6573Sopenharmony_ci /// # Errors 1363c67d6573Sopenharmony_ci /// 1364c67d6573Sopenharmony_ci /// If the flag is not recognized, then an error is returned. 1365c67d6573Sopenharmony_ci #[inline(never)] 1366c67d6573Sopenharmony_ci fn parse_flag(&self) -> Result<ast::Flag> { 1367c67d6573Sopenharmony_ci match self.char() { 1368c67d6573Sopenharmony_ci 'i' => Ok(ast::Flag::CaseInsensitive), 1369c67d6573Sopenharmony_ci 'm' => Ok(ast::Flag::MultiLine), 1370c67d6573Sopenharmony_ci 's' => Ok(ast::Flag::DotMatchesNewLine), 1371c67d6573Sopenharmony_ci 'U' => Ok(ast::Flag::SwapGreed), 1372c67d6573Sopenharmony_ci 'u' => Ok(ast::Flag::Unicode), 1373c67d6573Sopenharmony_ci 'x' => Ok(ast::Flag::IgnoreWhitespace), 1374c67d6573Sopenharmony_ci _ => { 1375c67d6573Sopenharmony_ci Err(self 1376c67d6573Sopenharmony_ci .error(self.span_char(), ast::ErrorKind::FlagUnrecognized)) 1377c67d6573Sopenharmony_ci } 1378c67d6573Sopenharmony_ci } 1379c67d6573Sopenharmony_ci } 1380c67d6573Sopenharmony_ci 1381c67d6573Sopenharmony_ci /// Parse a primitive AST. e.g., A literal, non-set character class or 1382c67d6573Sopenharmony_ci /// assertion. 1383c67d6573Sopenharmony_ci /// 1384c67d6573Sopenharmony_ci /// This assumes that the parser expects a primitive at the current 1385c67d6573Sopenharmony_ci /// location. i.e., All other non-primitive cases have been handled. 1386c67d6573Sopenharmony_ci /// For example, if the parser's position is at `|`, then `|` will be 1387c67d6573Sopenharmony_ci /// treated as a literal (e.g., inside a character class). 1388c67d6573Sopenharmony_ci /// 1389c67d6573Sopenharmony_ci /// This advances the parser to the first character immediately following 1390c67d6573Sopenharmony_ci /// the primitive. 1391c67d6573Sopenharmony_ci fn parse_primitive(&self) -> Result<Primitive> { 1392c67d6573Sopenharmony_ci match self.char() { 1393c67d6573Sopenharmony_ci '\\' => self.parse_escape(), 1394c67d6573Sopenharmony_ci '.' => { 1395c67d6573Sopenharmony_ci let ast = Primitive::Dot(self.span_char()); 1396c67d6573Sopenharmony_ci self.bump(); 1397c67d6573Sopenharmony_ci Ok(ast) 1398c67d6573Sopenharmony_ci } 1399c67d6573Sopenharmony_ci '^' => { 1400c67d6573Sopenharmony_ci let ast = Primitive::Assertion(ast::Assertion { 1401c67d6573Sopenharmony_ci span: self.span_char(), 1402c67d6573Sopenharmony_ci kind: ast::AssertionKind::StartLine, 1403c67d6573Sopenharmony_ci }); 1404c67d6573Sopenharmony_ci self.bump(); 1405c67d6573Sopenharmony_ci Ok(ast) 1406c67d6573Sopenharmony_ci } 1407c67d6573Sopenharmony_ci '$' => { 1408c67d6573Sopenharmony_ci let ast = Primitive::Assertion(ast::Assertion { 1409c67d6573Sopenharmony_ci span: self.span_char(), 1410c67d6573Sopenharmony_ci kind: ast::AssertionKind::EndLine, 1411c67d6573Sopenharmony_ci }); 1412c67d6573Sopenharmony_ci self.bump(); 1413c67d6573Sopenharmony_ci Ok(ast) 1414c67d6573Sopenharmony_ci } 1415c67d6573Sopenharmony_ci c => { 1416c67d6573Sopenharmony_ci let ast = Primitive::Literal(ast::Literal { 1417c67d6573Sopenharmony_ci span: self.span_char(), 1418c67d6573Sopenharmony_ci kind: ast::LiteralKind::Verbatim, 1419c67d6573Sopenharmony_ci c, 1420c67d6573Sopenharmony_ci }); 1421c67d6573Sopenharmony_ci self.bump(); 1422c67d6573Sopenharmony_ci Ok(ast) 1423c67d6573Sopenharmony_ci } 1424c67d6573Sopenharmony_ci } 1425c67d6573Sopenharmony_ci } 1426c67d6573Sopenharmony_ci 1427c67d6573Sopenharmony_ci /// Parse an escape sequence as a primitive AST. 1428c67d6573Sopenharmony_ci /// 1429c67d6573Sopenharmony_ci /// This assumes the parser is positioned at the start of the escape 1430c67d6573Sopenharmony_ci /// sequence, i.e., `\`. It advances the parser to the first position 1431c67d6573Sopenharmony_ci /// immediately following the escape sequence. 1432c67d6573Sopenharmony_ci #[inline(never)] 1433c67d6573Sopenharmony_ci fn parse_escape(&self) -> Result<Primitive> { 1434c67d6573Sopenharmony_ci assert_eq!(self.char(), '\\'); 1435c67d6573Sopenharmony_ci let start = self.pos(); 1436c67d6573Sopenharmony_ci if !self.bump() { 1437c67d6573Sopenharmony_ci return Err(self.error( 1438c67d6573Sopenharmony_ci Span::new(start, self.pos()), 1439c67d6573Sopenharmony_ci ast::ErrorKind::EscapeUnexpectedEof, 1440c67d6573Sopenharmony_ci )); 1441c67d6573Sopenharmony_ci } 1442c67d6573Sopenharmony_ci let c = self.char(); 1443c67d6573Sopenharmony_ci // Put some of the more complicated routines into helpers. 1444c67d6573Sopenharmony_ci match c { 1445c67d6573Sopenharmony_ci '0'..='7' => { 1446c67d6573Sopenharmony_ci if !self.parser().octal { 1447c67d6573Sopenharmony_ci return Err(self.error( 1448c67d6573Sopenharmony_ci Span::new(start, self.span_char().end), 1449c67d6573Sopenharmony_ci ast::ErrorKind::UnsupportedBackreference, 1450c67d6573Sopenharmony_ci )); 1451c67d6573Sopenharmony_ci } 1452c67d6573Sopenharmony_ci let mut lit = self.parse_octal(); 1453c67d6573Sopenharmony_ci lit.span.start = start; 1454c67d6573Sopenharmony_ci return Ok(Primitive::Literal(lit)); 1455c67d6573Sopenharmony_ci } 1456c67d6573Sopenharmony_ci '8'..='9' if !self.parser().octal => { 1457c67d6573Sopenharmony_ci return Err(self.error( 1458c67d6573Sopenharmony_ci Span::new(start, self.span_char().end), 1459c67d6573Sopenharmony_ci ast::ErrorKind::UnsupportedBackreference, 1460c67d6573Sopenharmony_ci )); 1461c67d6573Sopenharmony_ci } 1462c67d6573Sopenharmony_ci 'x' | 'u' | 'U' => { 1463c67d6573Sopenharmony_ci let mut lit = self.parse_hex()?; 1464c67d6573Sopenharmony_ci lit.span.start = start; 1465c67d6573Sopenharmony_ci return Ok(Primitive::Literal(lit)); 1466c67d6573Sopenharmony_ci } 1467c67d6573Sopenharmony_ci 'p' | 'P' => { 1468c67d6573Sopenharmony_ci let mut cls = self.parse_unicode_class()?; 1469c67d6573Sopenharmony_ci cls.span.start = start; 1470c67d6573Sopenharmony_ci return Ok(Primitive::Unicode(cls)); 1471c67d6573Sopenharmony_ci } 1472c67d6573Sopenharmony_ci 'd' | 's' | 'w' | 'D' | 'S' | 'W' => { 1473c67d6573Sopenharmony_ci let mut cls = self.parse_perl_class(); 1474c67d6573Sopenharmony_ci cls.span.start = start; 1475c67d6573Sopenharmony_ci return Ok(Primitive::Perl(cls)); 1476c67d6573Sopenharmony_ci } 1477c67d6573Sopenharmony_ci _ => {} 1478c67d6573Sopenharmony_ci } 1479c67d6573Sopenharmony_ci 1480c67d6573Sopenharmony_ci // Handle all of the one letter sequences inline. 1481c67d6573Sopenharmony_ci self.bump(); 1482c67d6573Sopenharmony_ci let span = Span::new(start, self.pos()); 1483c67d6573Sopenharmony_ci if is_meta_character(c) { 1484c67d6573Sopenharmony_ci return Ok(Primitive::Literal(ast::Literal { 1485c67d6573Sopenharmony_ci span, 1486c67d6573Sopenharmony_ci kind: ast::LiteralKind::Punctuation, 1487c67d6573Sopenharmony_ci c, 1488c67d6573Sopenharmony_ci })); 1489c67d6573Sopenharmony_ci } 1490c67d6573Sopenharmony_ci let special = |kind, c| { 1491c67d6573Sopenharmony_ci Ok(Primitive::Literal(ast::Literal { 1492c67d6573Sopenharmony_ci span, 1493c67d6573Sopenharmony_ci kind: ast::LiteralKind::Special(kind), 1494c67d6573Sopenharmony_ci c, 1495c67d6573Sopenharmony_ci })) 1496c67d6573Sopenharmony_ci }; 1497c67d6573Sopenharmony_ci match c { 1498c67d6573Sopenharmony_ci 'a' => special(ast::SpecialLiteralKind::Bell, '\x07'), 1499c67d6573Sopenharmony_ci 'f' => special(ast::SpecialLiteralKind::FormFeed, '\x0C'), 1500c67d6573Sopenharmony_ci 't' => special(ast::SpecialLiteralKind::Tab, '\t'), 1501c67d6573Sopenharmony_ci 'n' => special(ast::SpecialLiteralKind::LineFeed, '\n'), 1502c67d6573Sopenharmony_ci 'r' => special(ast::SpecialLiteralKind::CarriageReturn, '\r'), 1503c67d6573Sopenharmony_ci 'v' => special(ast::SpecialLiteralKind::VerticalTab, '\x0B'), 1504c67d6573Sopenharmony_ci ' ' if self.ignore_whitespace() => { 1505c67d6573Sopenharmony_ci special(ast::SpecialLiteralKind::Space, ' ') 1506c67d6573Sopenharmony_ci } 1507c67d6573Sopenharmony_ci 'A' => Ok(Primitive::Assertion(ast::Assertion { 1508c67d6573Sopenharmony_ci span, 1509c67d6573Sopenharmony_ci kind: ast::AssertionKind::StartText, 1510c67d6573Sopenharmony_ci })), 1511c67d6573Sopenharmony_ci 'z' => Ok(Primitive::Assertion(ast::Assertion { 1512c67d6573Sopenharmony_ci span, 1513c67d6573Sopenharmony_ci kind: ast::AssertionKind::EndText, 1514c67d6573Sopenharmony_ci })), 1515c67d6573Sopenharmony_ci 'b' => Ok(Primitive::Assertion(ast::Assertion { 1516c67d6573Sopenharmony_ci span, 1517c67d6573Sopenharmony_ci kind: ast::AssertionKind::WordBoundary, 1518c67d6573Sopenharmony_ci })), 1519c67d6573Sopenharmony_ci 'B' => Ok(Primitive::Assertion(ast::Assertion { 1520c67d6573Sopenharmony_ci span, 1521c67d6573Sopenharmony_ci kind: ast::AssertionKind::NotWordBoundary, 1522c67d6573Sopenharmony_ci })), 1523c67d6573Sopenharmony_ci _ => Err(self.error(span, ast::ErrorKind::EscapeUnrecognized)), 1524c67d6573Sopenharmony_ci } 1525c67d6573Sopenharmony_ci } 1526c67d6573Sopenharmony_ci 1527c67d6573Sopenharmony_ci /// Parse an octal representation of a Unicode codepoint up to 3 digits 1528c67d6573Sopenharmony_ci /// long. This expects the parser to be positioned at the first octal 1529c67d6573Sopenharmony_ci /// digit and advances the parser to the first character immediately 1530c67d6573Sopenharmony_ci /// following the octal number. This also assumes that parsing octal 1531c67d6573Sopenharmony_ci /// escapes is enabled. 1532c67d6573Sopenharmony_ci /// 1533c67d6573Sopenharmony_ci /// Assuming the preconditions are met, this routine can never fail. 1534c67d6573Sopenharmony_ci #[inline(never)] 1535c67d6573Sopenharmony_ci fn parse_octal(&self) -> ast::Literal { 1536c67d6573Sopenharmony_ci use std::char; 1537c67d6573Sopenharmony_ci use std::u32; 1538c67d6573Sopenharmony_ci 1539c67d6573Sopenharmony_ci assert!(self.parser().octal); 1540c67d6573Sopenharmony_ci assert!('0' <= self.char() && self.char() <= '7'); 1541c67d6573Sopenharmony_ci let start = self.pos(); 1542c67d6573Sopenharmony_ci // Parse up to two more digits. 1543c67d6573Sopenharmony_ci while self.bump() 1544c67d6573Sopenharmony_ci && '0' <= self.char() 1545c67d6573Sopenharmony_ci && self.char() <= '7' 1546c67d6573Sopenharmony_ci && self.pos().offset - start.offset <= 2 1547c67d6573Sopenharmony_ci {} 1548c67d6573Sopenharmony_ci let end = self.pos(); 1549c67d6573Sopenharmony_ci let octal = &self.pattern()[start.offset..end.offset]; 1550c67d6573Sopenharmony_ci // Parsing the octal should never fail since the above guarantees a 1551c67d6573Sopenharmony_ci // valid number. 1552c67d6573Sopenharmony_ci let codepoint = 1553c67d6573Sopenharmony_ci u32::from_str_radix(octal, 8).expect("valid octal number"); 1554c67d6573Sopenharmony_ci // The max value for 3 digit octal is 0777 = 511 and [0, 511] has no 1555c67d6573Sopenharmony_ci // invalid Unicode scalar values. 1556c67d6573Sopenharmony_ci let c = char::from_u32(codepoint).expect("Unicode scalar value"); 1557c67d6573Sopenharmony_ci ast::Literal { 1558c67d6573Sopenharmony_ci span: Span::new(start, end), 1559c67d6573Sopenharmony_ci kind: ast::LiteralKind::Octal, 1560c67d6573Sopenharmony_ci c, 1561c67d6573Sopenharmony_ci } 1562c67d6573Sopenharmony_ci } 1563c67d6573Sopenharmony_ci 1564c67d6573Sopenharmony_ci /// Parse a hex representation of a Unicode codepoint. This handles both 1565c67d6573Sopenharmony_ci /// hex notations, i.e., `\xFF` and `\x{FFFF}`. This expects the parser to 1566c67d6573Sopenharmony_ci /// be positioned at the `x`, `u` or `U` prefix. The parser is advanced to 1567c67d6573Sopenharmony_ci /// the first character immediately following the hexadecimal literal. 1568c67d6573Sopenharmony_ci #[inline(never)] 1569c67d6573Sopenharmony_ci fn parse_hex(&self) -> Result<ast::Literal> { 1570c67d6573Sopenharmony_ci assert!( 1571c67d6573Sopenharmony_ci self.char() == 'x' || self.char() == 'u' || self.char() == 'U' 1572c67d6573Sopenharmony_ci ); 1573c67d6573Sopenharmony_ci 1574c67d6573Sopenharmony_ci let hex_kind = match self.char() { 1575c67d6573Sopenharmony_ci 'x' => ast::HexLiteralKind::X, 1576c67d6573Sopenharmony_ci 'u' => ast::HexLiteralKind::UnicodeShort, 1577c67d6573Sopenharmony_ci _ => ast::HexLiteralKind::UnicodeLong, 1578c67d6573Sopenharmony_ci }; 1579c67d6573Sopenharmony_ci if !self.bump_and_bump_space() { 1580c67d6573Sopenharmony_ci return Err( 1581c67d6573Sopenharmony_ci self.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof) 1582c67d6573Sopenharmony_ci ); 1583c67d6573Sopenharmony_ci } 1584c67d6573Sopenharmony_ci if self.char() == '{' { 1585c67d6573Sopenharmony_ci self.parse_hex_brace(hex_kind) 1586c67d6573Sopenharmony_ci } else { 1587c67d6573Sopenharmony_ci self.parse_hex_digits(hex_kind) 1588c67d6573Sopenharmony_ci } 1589c67d6573Sopenharmony_ci } 1590c67d6573Sopenharmony_ci 1591c67d6573Sopenharmony_ci /// Parse an N-digit hex representation of a Unicode codepoint. This 1592c67d6573Sopenharmony_ci /// expects the parser to be positioned at the first digit and will advance 1593c67d6573Sopenharmony_ci /// the parser to the first character immediately following the escape 1594c67d6573Sopenharmony_ci /// sequence. 1595c67d6573Sopenharmony_ci /// 1596c67d6573Sopenharmony_ci /// The number of digits given must be 2 (for `\xNN`), 4 (for `\uNNNN`) 1597c67d6573Sopenharmony_ci /// or 8 (for `\UNNNNNNNN`). 1598c67d6573Sopenharmony_ci #[inline(never)] 1599c67d6573Sopenharmony_ci fn parse_hex_digits( 1600c67d6573Sopenharmony_ci &self, 1601c67d6573Sopenharmony_ci kind: ast::HexLiteralKind, 1602c67d6573Sopenharmony_ci ) -> Result<ast::Literal> { 1603c67d6573Sopenharmony_ci use std::char; 1604c67d6573Sopenharmony_ci use std::u32; 1605c67d6573Sopenharmony_ci 1606c67d6573Sopenharmony_ci let mut scratch = self.parser().scratch.borrow_mut(); 1607c67d6573Sopenharmony_ci scratch.clear(); 1608c67d6573Sopenharmony_ci 1609c67d6573Sopenharmony_ci let start = self.pos(); 1610c67d6573Sopenharmony_ci for i in 0..kind.digits() { 1611c67d6573Sopenharmony_ci if i > 0 && !self.bump_and_bump_space() { 1612c67d6573Sopenharmony_ci return Err(self 1613c67d6573Sopenharmony_ci .error(self.span(), ast::ErrorKind::EscapeUnexpectedEof)); 1614c67d6573Sopenharmony_ci } 1615c67d6573Sopenharmony_ci if !is_hex(self.char()) { 1616c67d6573Sopenharmony_ci return Err(self.error( 1617c67d6573Sopenharmony_ci self.span_char(), 1618c67d6573Sopenharmony_ci ast::ErrorKind::EscapeHexInvalidDigit, 1619c67d6573Sopenharmony_ci )); 1620c67d6573Sopenharmony_ci } 1621c67d6573Sopenharmony_ci scratch.push(self.char()); 1622c67d6573Sopenharmony_ci } 1623c67d6573Sopenharmony_ci // The final bump just moves the parser past the literal, which may 1624c67d6573Sopenharmony_ci // be EOF. 1625c67d6573Sopenharmony_ci self.bump_and_bump_space(); 1626c67d6573Sopenharmony_ci let end = self.pos(); 1627c67d6573Sopenharmony_ci let hex = scratch.as_str(); 1628c67d6573Sopenharmony_ci match u32::from_str_radix(hex, 16).ok().and_then(char::from_u32) { 1629c67d6573Sopenharmony_ci None => Err(self.error( 1630c67d6573Sopenharmony_ci Span::new(start, end), 1631c67d6573Sopenharmony_ci ast::ErrorKind::EscapeHexInvalid, 1632c67d6573Sopenharmony_ci )), 1633c67d6573Sopenharmony_ci Some(c) => Ok(ast::Literal { 1634c67d6573Sopenharmony_ci span: Span::new(start, end), 1635c67d6573Sopenharmony_ci kind: ast::LiteralKind::HexFixed(kind), 1636c67d6573Sopenharmony_ci c, 1637c67d6573Sopenharmony_ci }), 1638c67d6573Sopenharmony_ci } 1639c67d6573Sopenharmony_ci } 1640c67d6573Sopenharmony_ci 1641c67d6573Sopenharmony_ci /// Parse a hex representation of any Unicode scalar value. This expects 1642c67d6573Sopenharmony_ci /// the parser to be positioned at the opening brace `{` and will advance 1643c67d6573Sopenharmony_ci /// the parser to the first character following the closing brace `}`. 1644c67d6573Sopenharmony_ci #[inline(never)] 1645c67d6573Sopenharmony_ci fn parse_hex_brace( 1646c67d6573Sopenharmony_ci &self, 1647c67d6573Sopenharmony_ci kind: ast::HexLiteralKind, 1648c67d6573Sopenharmony_ci ) -> Result<ast::Literal> { 1649c67d6573Sopenharmony_ci use std::char; 1650c67d6573Sopenharmony_ci use std::u32; 1651c67d6573Sopenharmony_ci 1652c67d6573Sopenharmony_ci let mut scratch = self.parser().scratch.borrow_mut(); 1653c67d6573Sopenharmony_ci scratch.clear(); 1654c67d6573Sopenharmony_ci 1655c67d6573Sopenharmony_ci let brace_pos = self.pos(); 1656c67d6573Sopenharmony_ci let start = self.span_char().end; 1657c67d6573Sopenharmony_ci while self.bump_and_bump_space() && self.char() != '}' { 1658c67d6573Sopenharmony_ci if !is_hex(self.char()) { 1659c67d6573Sopenharmony_ci return Err(self.error( 1660c67d6573Sopenharmony_ci self.span_char(), 1661c67d6573Sopenharmony_ci ast::ErrorKind::EscapeHexInvalidDigit, 1662c67d6573Sopenharmony_ci )); 1663c67d6573Sopenharmony_ci } 1664c67d6573Sopenharmony_ci scratch.push(self.char()); 1665c67d6573Sopenharmony_ci } 1666c67d6573Sopenharmony_ci if self.is_eof() { 1667c67d6573Sopenharmony_ci return Err(self.error( 1668c67d6573Sopenharmony_ci Span::new(brace_pos, self.pos()), 1669c67d6573Sopenharmony_ci ast::ErrorKind::EscapeUnexpectedEof, 1670c67d6573Sopenharmony_ci )); 1671c67d6573Sopenharmony_ci } 1672c67d6573Sopenharmony_ci let end = self.pos(); 1673c67d6573Sopenharmony_ci let hex = scratch.as_str(); 1674c67d6573Sopenharmony_ci assert_eq!(self.char(), '}'); 1675c67d6573Sopenharmony_ci self.bump_and_bump_space(); 1676c67d6573Sopenharmony_ci 1677c67d6573Sopenharmony_ci if hex.is_empty() { 1678c67d6573Sopenharmony_ci return Err(self.error( 1679c67d6573Sopenharmony_ci Span::new(brace_pos, self.pos()), 1680c67d6573Sopenharmony_ci ast::ErrorKind::EscapeHexEmpty, 1681c67d6573Sopenharmony_ci )); 1682c67d6573Sopenharmony_ci } 1683c67d6573Sopenharmony_ci match u32::from_str_radix(hex, 16).ok().and_then(char::from_u32) { 1684c67d6573Sopenharmony_ci None => Err(self.error( 1685c67d6573Sopenharmony_ci Span::new(start, end), 1686c67d6573Sopenharmony_ci ast::ErrorKind::EscapeHexInvalid, 1687c67d6573Sopenharmony_ci )), 1688c67d6573Sopenharmony_ci Some(c) => Ok(ast::Literal { 1689c67d6573Sopenharmony_ci span: Span::new(start, self.pos()), 1690c67d6573Sopenharmony_ci kind: ast::LiteralKind::HexBrace(kind), 1691c67d6573Sopenharmony_ci c, 1692c67d6573Sopenharmony_ci }), 1693c67d6573Sopenharmony_ci } 1694c67d6573Sopenharmony_ci } 1695c67d6573Sopenharmony_ci 1696c67d6573Sopenharmony_ci /// Parse a decimal number into a u32 while trimming leading and trailing 1697c67d6573Sopenharmony_ci /// whitespace. 1698c67d6573Sopenharmony_ci /// 1699c67d6573Sopenharmony_ci /// This expects the parser to be positioned at the first position where 1700c67d6573Sopenharmony_ci /// a decimal digit could occur. This will advance the parser to the byte 1701c67d6573Sopenharmony_ci /// immediately following the last contiguous decimal digit. 1702c67d6573Sopenharmony_ci /// 1703c67d6573Sopenharmony_ci /// If no decimal digit could be found or if there was a problem parsing 1704c67d6573Sopenharmony_ci /// the complete set of digits into a u32, then an error is returned. 1705c67d6573Sopenharmony_ci fn parse_decimal(&self) -> Result<u32> { 1706c67d6573Sopenharmony_ci let mut scratch = self.parser().scratch.borrow_mut(); 1707c67d6573Sopenharmony_ci scratch.clear(); 1708c67d6573Sopenharmony_ci 1709c67d6573Sopenharmony_ci while !self.is_eof() && self.char().is_whitespace() { 1710c67d6573Sopenharmony_ci self.bump(); 1711c67d6573Sopenharmony_ci } 1712c67d6573Sopenharmony_ci let start = self.pos(); 1713c67d6573Sopenharmony_ci while !self.is_eof() && '0' <= self.char() && self.char() <= '9' { 1714c67d6573Sopenharmony_ci scratch.push(self.char()); 1715c67d6573Sopenharmony_ci self.bump_and_bump_space(); 1716c67d6573Sopenharmony_ci } 1717c67d6573Sopenharmony_ci let span = Span::new(start, self.pos()); 1718c67d6573Sopenharmony_ci while !self.is_eof() && self.char().is_whitespace() { 1719c67d6573Sopenharmony_ci self.bump_and_bump_space(); 1720c67d6573Sopenharmony_ci } 1721c67d6573Sopenharmony_ci let digits = scratch.as_str(); 1722c67d6573Sopenharmony_ci if digits.is_empty() { 1723c67d6573Sopenharmony_ci return Err(self.error(span, ast::ErrorKind::DecimalEmpty)); 1724c67d6573Sopenharmony_ci } 1725c67d6573Sopenharmony_ci match u32::from_str_radix(digits, 10).ok() { 1726c67d6573Sopenharmony_ci Some(n) => Ok(n), 1727c67d6573Sopenharmony_ci None => Err(self.error(span, ast::ErrorKind::DecimalInvalid)), 1728c67d6573Sopenharmony_ci } 1729c67d6573Sopenharmony_ci } 1730c67d6573Sopenharmony_ci 1731c67d6573Sopenharmony_ci /// Parse a standard character class consisting primarily of characters or 1732c67d6573Sopenharmony_ci /// character ranges, but can also contain nested character classes of 1733c67d6573Sopenharmony_ci /// any type (sans `.`). 1734c67d6573Sopenharmony_ci /// 1735c67d6573Sopenharmony_ci /// This assumes the parser is positioned at the opening `[`. If parsing 1736c67d6573Sopenharmony_ci /// is successful, then the parser is advanced to the position immediately 1737c67d6573Sopenharmony_ci /// following the closing `]`. 1738c67d6573Sopenharmony_ci #[inline(never)] 1739c67d6573Sopenharmony_ci fn parse_set_class(&self) -> Result<ast::Class> { 1740c67d6573Sopenharmony_ci assert_eq!(self.char(), '['); 1741c67d6573Sopenharmony_ci 1742c67d6573Sopenharmony_ci let mut union = 1743c67d6573Sopenharmony_ci ast::ClassSetUnion { span: self.span(), items: vec![] }; 1744c67d6573Sopenharmony_ci loop { 1745c67d6573Sopenharmony_ci self.bump_space(); 1746c67d6573Sopenharmony_ci if self.is_eof() { 1747c67d6573Sopenharmony_ci return Err(self.unclosed_class_error()); 1748c67d6573Sopenharmony_ci } 1749c67d6573Sopenharmony_ci match self.char() { 1750c67d6573Sopenharmony_ci '[' => { 1751c67d6573Sopenharmony_ci // If we've already parsed the opening bracket, then 1752c67d6573Sopenharmony_ci // attempt to treat this as the beginning of an ASCII 1753c67d6573Sopenharmony_ci // class. If ASCII class parsing fails, then the parser 1754c67d6573Sopenharmony_ci // backs up to `[`. 1755c67d6573Sopenharmony_ci if !self.parser().stack_class.borrow().is_empty() { 1756c67d6573Sopenharmony_ci if let Some(cls) = self.maybe_parse_ascii_class() { 1757c67d6573Sopenharmony_ci union.push(ast::ClassSetItem::Ascii(cls)); 1758c67d6573Sopenharmony_ci continue; 1759c67d6573Sopenharmony_ci } 1760c67d6573Sopenharmony_ci } 1761c67d6573Sopenharmony_ci union = self.push_class_open(union)?; 1762c67d6573Sopenharmony_ci } 1763c67d6573Sopenharmony_ci ']' => match self.pop_class(union)? { 1764c67d6573Sopenharmony_ci Either::Left(nested_union) => { 1765c67d6573Sopenharmony_ci union = nested_union; 1766c67d6573Sopenharmony_ci } 1767c67d6573Sopenharmony_ci Either::Right(class) => return Ok(class), 1768c67d6573Sopenharmony_ci }, 1769c67d6573Sopenharmony_ci '&' if self.peek() == Some('&') => { 1770c67d6573Sopenharmony_ci assert!(self.bump_if("&&")); 1771c67d6573Sopenharmony_ci union = self.push_class_op( 1772c67d6573Sopenharmony_ci ast::ClassSetBinaryOpKind::Intersection, 1773c67d6573Sopenharmony_ci union, 1774c67d6573Sopenharmony_ci ); 1775c67d6573Sopenharmony_ci } 1776c67d6573Sopenharmony_ci '-' if self.peek() == Some('-') => { 1777c67d6573Sopenharmony_ci assert!(self.bump_if("--")); 1778c67d6573Sopenharmony_ci union = self.push_class_op( 1779c67d6573Sopenharmony_ci ast::ClassSetBinaryOpKind::Difference, 1780c67d6573Sopenharmony_ci union, 1781c67d6573Sopenharmony_ci ); 1782c67d6573Sopenharmony_ci } 1783c67d6573Sopenharmony_ci '~' if self.peek() == Some('~') => { 1784c67d6573Sopenharmony_ci assert!(self.bump_if("~~")); 1785c67d6573Sopenharmony_ci union = self.push_class_op( 1786c67d6573Sopenharmony_ci ast::ClassSetBinaryOpKind::SymmetricDifference, 1787c67d6573Sopenharmony_ci union, 1788c67d6573Sopenharmony_ci ); 1789c67d6573Sopenharmony_ci } 1790c67d6573Sopenharmony_ci _ => { 1791c67d6573Sopenharmony_ci union.push(self.parse_set_class_range()?); 1792c67d6573Sopenharmony_ci } 1793c67d6573Sopenharmony_ci } 1794c67d6573Sopenharmony_ci } 1795c67d6573Sopenharmony_ci } 1796c67d6573Sopenharmony_ci 1797c67d6573Sopenharmony_ci /// Parse a single primitive item in a character class set. The item to 1798c67d6573Sopenharmony_ci /// be parsed can either be one of a simple literal character, a range 1799c67d6573Sopenharmony_ci /// between two simple literal characters or a "primitive" character 1800c67d6573Sopenharmony_ci /// class like \w or \p{Greek}. 1801c67d6573Sopenharmony_ci /// 1802c67d6573Sopenharmony_ci /// If an invalid escape is found, or if a character class is found where 1803c67d6573Sopenharmony_ci /// a simple literal is expected (e.g., in a range), then an error is 1804c67d6573Sopenharmony_ci /// returned. 1805c67d6573Sopenharmony_ci #[inline(never)] 1806c67d6573Sopenharmony_ci fn parse_set_class_range(&self) -> Result<ast::ClassSetItem> { 1807c67d6573Sopenharmony_ci let prim1 = self.parse_set_class_item()?; 1808c67d6573Sopenharmony_ci self.bump_space(); 1809c67d6573Sopenharmony_ci if self.is_eof() { 1810c67d6573Sopenharmony_ci return Err(self.unclosed_class_error()); 1811c67d6573Sopenharmony_ci } 1812c67d6573Sopenharmony_ci // If the next char isn't a `-`, then we don't have a range. 1813c67d6573Sopenharmony_ci // There are two exceptions. If the char after a `-` is a `]`, then 1814c67d6573Sopenharmony_ci // `-` is interpreted as a literal `-`. Alternatively, if the char 1815c67d6573Sopenharmony_ci // after a `-` is a `-`, then `--` corresponds to a "difference" 1816c67d6573Sopenharmony_ci // operation. 1817c67d6573Sopenharmony_ci if self.char() != '-' 1818c67d6573Sopenharmony_ci || self.peek_space() == Some(']') 1819c67d6573Sopenharmony_ci || self.peek_space() == Some('-') 1820c67d6573Sopenharmony_ci { 1821c67d6573Sopenharmony_ci return prim1.into_class_set_item(self); 1822c67d6573Sopenharmony_ci } 1823c67d6573Sopenharmony_ci // OK, now we're parsing a range, so bump past the `-` and parse the 1824c67d6573Sopenharmony_ci // second half of the range. 1825c67d6573Sopenharmony_ci if !self.bump_and_bump_space() { 1826c67d6573Sopenharmony_ci return Err(self.unclosed_class_error()); 1827c67d6573Sopenharmony_ci } 1828c67d6573Sopenharmony_ci let prim2 = self.parse_set_class_item()?; 1829c67d6573Sopenharmony_ci let range = ast::ClassSetRange { 1830c67d6573Sopenharmony_ci span: Span::new(prim1.span().start, prim2.span().end), 1831c67d6573Sopenharmony_ci start: prim1.into_class_literal(self)?, 1832c67d6573Sopenharmony_ci end: prim2.into_class_literal(self)?, 1833c67d6573Sopenharmony_ci }; 1834c67d6573Sopenharmony_ci if !range.is_valid() { 1835c67d6573Sopenharmony_ci return Err( 1836c67d6573Sopenharmony_ci self.error(range.span, ast::ErrorKind::ClassRangeInvalid) 1837c67d6573Sopenharmony_ci ); 1838c67d6573Sopenharmony_ci } 1839c67d6573Sopenharmony_ci Ok(ast::ClassSetItem::Range(range)) 1840c67d6573Sopenharmony_ci } 1841c67d6573Sopenharmony_ci 1842c67d6573Sopenharmony_ci /// Parse a single item in a character class as a primitive, where the 1843c67d6573Sopenharmony_ci /// primitive either consists of a verbatim literal or a single escape 1844c67d6573Sopenharmony_ci /// sequence. 1845c67d6573Sopenharmony_ci /// 1846c67d6573Sopenharmony_ci /// This assumes the parser is positioned at the beginning of a primitive, 1847c67d6573Sopenharmony_ci /// and advances the parser to the first position after the primitive if 1848c67d6573Sopenharmony_ci /// successful. 1849c67d6573Sopenharmony_ci /// 1850c67d6573Sopenharmony_ci /// Note that it is the caller's responsibility to report an error if an 1851c67d6573Sopenharmony_ci /// illegal primitive was parsed. 1852c67d6573Sopenharmony_ci #[inline(never)] 1853c67d6573Sopenharmony_ci fn parse_set_class_item(&self) -> Result<Primitive> { 1854c67d6573Sopenharmony_ci if self.char() == '\\' { 1855c67d6573Sopenharmony_ci self.parse_escape() 1856c67d6573Sopenharmony_ci } else { 1857c67d6573Sopenharmony_ci let x = Primitive::Literal(ast::Literal { 1858c67d6573Sopenharmony_ci span: self.span_char(), 1859c67d6573Sopenharmony_ci kind: ast::LiteralKind::Verbatim, 1860c67d6573Sopenharmony_ci c: self.char(), 1861c67d6573Sopenharmony_ci }); 1862c67d6573Sopenharmony_ci self.bump(); 1863c67d6573Sopenharmony_ci Ok(x) 1864c67d6573Sopenharmony_ci } 1865c67d6573Sopenharmony_ci } 1866c67d6573Sopenharmony_ci 1867c67d6573Sopenharmony_ci /// Parses the opening of a character class set. This includes the opening 1868c67d6573Sopenharmony_ci /// bracket along with `^` if present to indicate negation. This also 1869c67d6573Sopenharmony_ci /// starts parsing the opening set of unioned items if applicable, since 1870c67d6573Sopenharmony_ci /// there are special rules applied to certain characters in the opening 1871c67d6573Sopenharmony_ci /// of a character class. For example, `[^]]` is the class of all 1872c67d6573Sopenharmony_ci /// characters not equal to `]`. (`]` would need to be escaped in any other 1873c67d6573Sopenharmony_ci /// position.) Similarly for `-`. 1874c67d6573Sopenharmony_ci /// 1875c67d6573Sopenharmony_ci /// In all cases, the op inside the returned `ast::ClassBracketed` is an 1876c67d6573Sopenharmony_ci /// empty union. This empty union should be replaced with the actual item 1877c67d6573Sopenharmony_ci /// when it is popped from the parser's stack. 1878c67d6573Sopenharmony_ci /// 1879c67d6573Sopenharmony_ci /// This assumes the parser is positioned at the opening `[` and advances 1880c67d6573Sopenharmony_ci /// the parser to the first non-special byte of the character class. 1881c67d6573Sopenharmony_ci /// 1882c67d6573Sopenharmony_ci /// An error is returned if EOF is found. 1883c67d6573Sopenharmony_ci #[inline(never)] 1884c67d6573Sopenharmony_ci fn parse_set_class_open( 1885c67d6573Sopenharmony_ci &self, 1886c67d6573Sopenharmony_ci ) -> Result<(ast::ClassBracketed, ast::ClassSetUnion)> { 1887c67d6573Sopenharmony_ci assert_eq!(self.char(), '['); 1888c67d6573Sopenharmony_ci let start = self.pos(); 1889c67d6573Sopenharmony_ci if !self.bump_and_bump_space() { 1890c67d6573Sopenharmony_ci return Err(self.error( 1891c67d6573Sopenharmony_ci Span::new(start, self.pos()), 1892c67d6573Sopenharmony_ci ast::ErrorKind::ClassUnclosed, 1893c67d6573Sopenharmony_ci )); 1894c67d6573Sopenharmony_ci } 1895c67d6573Sopenharmony_ci 1896c67d6573Sopenharmony_ci let negated = if self.char() != '^' { 1897c67d6573Sopenharmony_ci false 1898c67d6573Sopenharmony_ci } else { 1899c67d6573Sopenharmony_ci if !self.bump_and_bump_space() { 1900c67d6573Sopenharmony_ci return Err(self.error( 1901c67d6573Sopenharmony_ci Span::new(start, self.pos()), 1902c67d6573Sopenharmony_ci ast::ErrorKind::ClassUnclosed, 1903c67d6573Sopenharmony_ci )); 1904c67d6573Sopenharmony_ci } 1905c67d6573Sopenharmony_ci true 1906c67d6573Sopenharmony_ci }; 1907c67d6573Sopenharmony_ci // Accept any number of `-` as literal `-`. 1908c67d6573Sopenharmony_ci let mut union = 1909c67d6573Sopenharmony_ci ast::ClassSetUnion { span: self.span(), items: vec![] }; 1910c67d6573Sopenharmony_ci while self.char() == '-' { 1911c67d6573Sopenharmony_ci union.push(ast::ClassSetItem::Literal(ast::Literal { 1912c67d6573Sopenharmony_ci span: self.span_char(), 1913c67d6573Sopenharmony_ci kind: ast::LiteralKind::Verbatim, 1914c67d6573Sopenharmony_ci c: '-', 1915c67d6573Sopenharmony_ci })); 1916c67d6573Sopenharmony_ci if !self.bump_and_bump_space() { 1917c67d6573Sopenharmony_ci return Err(self.error( 1918c67d6573Sopenharmony_ci Span::new(start, start), 1919c67d6573Sopenharmony_ci ast::ErrorKind::ClassUnclosed, 1920c67d6573Sopenharmony_ci )); 1921c67d6573Sopenharmony_ci } 1922c67d6573Sopenharmony_ci } 1923c67d6573Sopenharmony_ci // If `]` is the *first* char in a set, then interpret it as a literal 1924c67d6573Sopenharmony_ci // `]`. That is, an empty class is impossible to write. 1925c67d6573Sopenharmony_ci if union.items.is_empty() && self.char() == ']' { 1926c67d6573Sopenharmony_ci union.push(ast::ClassSetItem::Literal(ast::Literal { 1927c67d6573Sopenharmony_ci span: self.span_char(), 1928c67d6573Sopenharmony_ci kind: ast::LiteralKind::Verbatim, 1929c67d6573Sopenharmony_ci c: ']', 1930c67d6573Sopenharmony_ci })); 1931c67d6573Sopenharmony_ci if !self.bump_and_bump_space() { 1932c67d6573Sopenharmony_ci return Err(self.error( 1933c67d6573Sopenharmony_ci Span::new(start, self.pos()), 1934c67d6573Sopenharmony_ci ast::ErrorKind::ClassUnclosed, 1935c67d6573Sopenharmony_ci )); 1936c67d6573Sopenharmony_ci } 1937c67d6573Sopenharmony_ci } 1938c67d6573Sopenharmony_ci let set = ast::ClassBracketed { 1939c67d6573Sopenharmony_ci span: Span::new(start, self.pos()), 1940c67d6573Sopenharmony_ci negated, 1941c67d6573Sopenharmony_ci kind: ast::ClassSet::union(ast::ClassSetUnion { 1942c67d6573Sopenharmony_ci span: Span::new(union.span.start, union.span.start), 1943c67d6573Sopenharmony_ci items: vec![], 1944c67d6573Sopenharmony_ci }), 1945c67d6573Sopenharmony_ci }; 1946c67d6573Sopenharmony_ci Ok((set, union)) 1947c67d6573Sopenharmony_ci } 1948c67d6573Sopenharmony_ci 1949c67d6573Sopenharmony_ci /// Attempt to parse an ASCII character class, e.g., `[:alnum:]`. 1950c67d6573Sopenharmony_ci /// 1951c67d6573Sopenharmony_ci /// This assumes the parser is positioned at the opening `[`. 1952c67d6573Sopenharmony_ci /// 1953c67d6573Sopenharmony_ci /// If no valid ASCII character class could be found, then this does not 1954c67d6573Sopenharmony_ci /// advance the parser and `None` is returned. Otherwise, the parser is 1955c67d6573Sopenharmony_ci /// advanced to the first byte following the closing `]` and the 1956c67d6573Sopenharmony_ci /// corresponding ASCII class is returned. 1957c67d6573Sopenharmony_ci #[inline(never)] 1958c67d6573Sopenharmony_ci fn maybe_parse_ascii_class(&self) -> Option<ast::ClassAscii> { 1959c67d6573Sopenharmony_ci // ASCII character classes are interesting from a parsing perspective 1960c67d6573Sopenharmony_ci // because parsing cannot fail with any interesting error. For example, 1961c67d6573Sopenharmony_ci // in order to use an ASCII character class, it must be enclosed in 1962c67d6573Sopenharmony_ci // double brackets, e.g., `[[:alnum:]]`. Alternatively, you might think 1963c67d6573Sopenharmony_ci // of it as "ASCII character characters have the syntax `[:NAME:]` 1964c67d6573Sopenharmony_ci // which can only appear within character brackets." This means that 1965c67d6573Sopenharmony_ci // things like `[[:lower:]A]` are legal constructs. 1966c67d6573Sopenharmony_ci // 1967c67d6573Sopenharmony_ci // However, if one types an incorrect ASCII character class, e.g., 1968c67d6573Sopenharmony_ci // `[[:loower:]]`, then we treat that as a normal nested character 1969c67d6573Sopenharmony_ci // class containing the characters `:elorw`. One might argue that we 1970c67d6573Sopenharmony_ci // should return an error instead since the repeated colons give away 1971c67d6573Sopenharmony_ci // the intent to write an ASCII class. But what if the user typed 1972c67d6573Sopenharmony_ci // `[[:lower]]` instead? How can we tell that was intended to be an 1973c67d6573Sopenharmony_ci // ASCII class and not just a normal nested class? 1974c67d6573Sopenharmony_ci // 1975c67d6573Sopenharmony_ci // Reasonable people can probably disagree over this, but for better 1976c67d6573Sopenharmony_ci // or worse, we implement semantics that never fails at the expense 1977c67d6573Sopenharmony_ci // of better failure modes. 1978c67d6573Sopenharmony_ci assert_eq!(self.char(), '['); 1979c67d6573Sopenharmony_ci // If parsing fails, then we back up the parser to this starting point. 1980c67d6573Sopenharmony_ci let start = self.pos(); 1981c67d6573Sopenharmony_ci let mut negated = false; 1982c67d6573Sopenharmony_ci if !self.bump() || self.char() != ':' { 1983c67d6573Sopenharmony_ci self.parser().pos.set(start); 1984c67d6573Sopenharmony_ci return None; 1985c67d6573Sopenharmony_ci } 1986c67d6573Sopenharmony_ci if !self.bump() { 1987c67d6573Sopenharmony_ci self.parser().pos.set(start); 1988c67d6573Sopenharmony_ci return None; 1989c67d6573Sopenharmony_ci } 1990c67d6573Sopenharmony_ci if self.char() == '^' { 1991c67d6573Sopenharmony_ci negated = true; 1992c67d6573Sopenharmony_ci if !self.bump() { 1993c67d6573Sopenharmony_ci self.parser().pos.set(start); 1994c67d6573Sopenharmony_ci return None; 1995c67d6573Sopenharmony_ci } 1996c67d6573Sopenharmony_ci } 1997c67d6573Sopenharmony_ci let name_start = self.offset(); 1998c67d6573Sopenharmony_ci while self.char() != ':' && self.bump() {} 1999c67d6573Sopenharmony_ci if self.is_eof() { 2000c67d6573Sopenharmony_ci self.parser().pos.set(start); 2001c67d6573Sopenharmony_ci return None; 2002c67d6573Sopenharmony_ci } 2003c67d6573Sopenharmony_ci let name = &self.pattern()[name_start..self.offset()]; 2004c67d6573Sopenharmony_ci if !self.bump_if(":]") { 2005c67d6573Sopenharmony_ci self.parser().pos.set(start); 2006c67d6573Sopenharmony_ci return None; 2007c67d6573Sopenharmony_ci } 2008c67d6573Sopenharmony_ci let kind = match ast::ClassAsciiKind::from_name(name) { 2009c67d6573Sopenharmony_ci Some(kind) => kind, 2010c67d6573Sopenharmony_ci None => { 2011c67d6573Sopenharmony_ci self.parser().pos.set(start); 2012c67d6573Sopenharmony_ci return None; 2013c67d6573Sopenharmony_ci } 2014c67d6573Sopenharmony_ci }; 2015c67d6573Sopenharmony_ci Some(ast::ClassAscii { 2016c67d6573Sopenharmony_ci span: Span::new(start, self.pos()), 2017c67d6573Sopenharmony_ci kind, 2018c67d6573Sopenharmony_ci negated, 2019c67d6573Sopenharmony_ci }) 2020c67d6573Sopenharmony_ci } 2021c67d6573Sopenharmony_ci 2022c67d6573Sopenharmony_ci /// Parse a Unicode class in either the single character notation, `\pN` 2023c67d6573Sopenharmony_ci /// or the multi-character bracketed notation, `\p{Greek}`. This assumes 2024c67d6573Sopenharmony_ci /// the parser is positioned at the `p` (or `P` for negation) and will 2025c67d6573Sopenharmony_ci /// advance the parser to the character immediately following the class. 2026c67d6573Sopenharmony_ci /// 2027c67d6573Sopenharmony_ci /// Note that this does not check whether the class name is valid or not. 2028c67d6573Sopenharmony_ci #[inline(never)] 2029c67d6573Sopenharmony_ci fn parse_unicode_class(&self) -> Result<ast::ClassUnicode> { 2030c67d6573Sopenharmony_ci assert!(self.char() == 'p' || self.char() == 'P'); 2031c67d6573Sopenharmony_ci 2032c67d6573Sopenharmony_ci let mut scratch = self.parser().scratch.borrow_mut(); 2033c67d6573Sopenharmony_ci scratch.clear(); 2034c67d6573Sopenharmony_ci 2035c67d6573Sopenharmony_ci let negated = self.char() == 'P'; 2036c67d6573Sopenharmony_ci if !self.bump_and_bump_space() { 2037c67d6573Sopenharmony_ci return Err( 2038c67d6573Sopenharmony_ci self.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof) 2039c67d6573Sopenharmony_ci ); 2040c67d6573Sopenharmony_ci } 2041c67d6573Sopenharmony_ci let (start, kind) = if self.char() == '{' { 2042c67d6573Sopenharmony_ci let start = self.span_char().end; 2043c67d6573Sopenharmony_ci while self.bump_and_bump_space() && self.char() != '}' { 2044c67d6573Sopenharmony_ci scratch.push(self.char()); 2045c67d6573Sopenharmony_ci } 2046c67d6573Sopenharmony_ci if self.is_eof() { 2047c67d6573Sopenharmony_ci return Err(self 2048c67d6573Sopenharmony_ci .error(self.span(), ast::ErrorKind::EscapeUnexpectedEof)); 2049c67d6573Sopenharmony_ci } 2050c67d6573Sopenharmony_ci assert_eq!(self.char(), '}'); 2051c67d6573Sopenharmony_ci self.bump(); 2052c67d6573Sopenharmony_ci 2053c67d6573Sopenharmony_ci let name = scratch.as_str(); 2054c67d6573Sopenharmony_ci if let Some(i) = name.find("!=") { 2055c67d6573Sopenharmony_ci ( 2056c67d6573Sopenharmony_ci start, 2057c67d6573Sopenharmony_ci ast::ClassUnicodeKind::NamedValue { 2058c67d6573Sopenharmony_ci op: ast::ClassUnicodeOpKind::NotEqual, 2059c67d6573Sopenharmony_ci name: name[..i].to_string(), 2060c67d6573Sopenharmony_ci value: name[i + 2..].to_string(), 2061c67d6573Sopenharmony_ci }, 2062c67d6573Sopenharmony_ci ) 2063c67d6573Sopenharmony_ci } else if let Some(i) = name.find(':') { 2064c67d6573Sopenharmony_ci ( 2065c67d6573Sopenharmony_ci start, 2066c67d6573Sopenharmony_ci ast::ClassUnicodeKind::NamedValue { 2067c67d6573Sopenharmony_ci op: ast::ClassUnicodeOpKind::Colon, 2068c67d6573Sopenharmony_ci name: name[..i].to_string(), 2069c67d6573Sopenharmony_ci value: name[i + 1..].to_string(), 2070c67d6573Sopenharmony_ci }, 2071c67d6573Sopenharmony_ci ) 2072c67d6573Sopenharmony_ci } else if let Some(i) = name.find('=') { 2073c67d6573Sopenharmony_ci ( 2074c67d6573Sopenharmony_ci start, 2075c67d6573Sopenharmony_ci ast::ClassUnicodeKind::NamedValue { 2076c67d6573Sopenharmony_ci op: ast::ClassUnicodeOpKind::Equal, 2077c67d6573Sopenharmony_ci name: name[..i].to_string(), 2078c67d6573Sopenharmony_ci value: name[i + 1..].to_string(), 2079c67d6573Sopenharmony_ci }, 2080c67d6573Sopenharmony_ci ) 2081c67d6573Sopenharmony_ci } else { 2082c67d6573Sopenharmony_ci (start, ast::ClassUnicodeKind::Named(name.to_string())) 2083c67d6573Sopenharmony_ci } 2084c67d6573Sopenharmony_ci } else { 2085c67d6573Sopenharmony_ci let start = self.pos(); 2086c67d6573Sopenharmony_ci let c = self.char(); 2087c67d6573Sopenharmony_ci if c == '\\' { 2088c67d6573Sopenharmony_ci return Err(self.error( 2089c67d6573Sopenharmony_ci self.span_char(), 2090c67d6573Sopenharmony_ci ast::ErrorKind::UnicodeClassInvalid, 2091c67d6573Sopenharmony_ci )); 2092c67d6573Sopenharmony_ci } 2093c67d6573Sopenharmony_ci self.bump_and_bump_space(); 2094c67d6573Sopenharmony_ci let kind = ast::ClassUnicodeKind::OneLetter(c); 2095c67d6573Sopenharmony_ci (start, kind) 2096c67d6573Sopenharmony_ci }; 2097c67d6573Sopenharmony_ci Ok(ast::ClassUnicode { 2098c67d6573Sopenharmony_ci span: Span::new(start, self.pos()), 2099c67d6573Sopenharmony_ci negated, 2100c67d6573Sopenharmony_ci kind, 2101c67d6573Sopenharmony_ci }) 2102c67d6573Sopenharmony_ci } 2103c67d6573Sopenharmony_ci 2104c67d6573Sopenharmony_ci /// Parse a Perl character class, e.g., `\d` or `\W`. This assumes the 2105c67d6573Sopenharmony_ci /// parser is currently at a valid character class name and will be 2106c67d6573Sopenharmony_ci /// advanced to the character immediately following the class. 2107c67d6573Sopenharmony_ci #[inline(never)] 2108c67d6573Sopenharmony_ci fn parse_perl_class(&self) -> ast::ClassPerl { 2109c67d6573Sopenharmony_ci let c = self.char(); 2110c67d6573Sopenharmony_ci let span = self.span_char(); 2111c67d6573Sopenharmony_ci self.bump(); 2112c67d6573Sopenharmony_ci let (negated, kind) = match c { 2113c67d6573Sopenharmony_ci 'd' => (false, ast::ClassPerlKind::Digit), 2114c67d6573Sopenharmony_ci 'D' => (true, ast::ClassPerlKind::Digit), 2115c67d6573Sopenharmony_ci 's' => (false, ast::ClassPerlKind::Space), 2116c67d6573Sopenharmony_ci 'S' => (true, ast::ClassPerlKind::Space), 2117c67d6573Sopenharmony_ci 'w' => (false, ast::ClassPerlKind::Word), 2118c67d6573Sopenharmony_ci 'W' => (true, ast::ClassPerlKind::Word), 2119c67d6573Sopenharmony_ci c => panic!("expected valid Perl class but got '{}'", c), 2120c67d6573Sopenharmony_ci }; 2121c67d6573Sopenharmony_ci ast::ClassPerl { span, kind, negated } 2122c67d6573Sopenharmony_ci } 2123c67d6573Sopenharmony_ci} 2124c67d6573Sopenharmony_ci 2125c67d6573Sopenharmony_ci/// A type that traverses a fully parsed Ast and checks whether its depth 2126c67d6573Sopenharmony_ci/// exceeds the specified nesting limit. If it does, then an error is returned. 2127c67d6573Sopenharmony_ci#[derive(Debug)] 2128c67d6573Sopenharmony_cistruct NestLimiter<'p, 's, P> { 2129c67d6573Sopenharmony_ci /// The parser that is checking the nest limit. 2130c67d6573Sopenharmony_ci p: &'p ParserI<'s, P>, 2131c67d6573Sopenharmony_ci /// The current depth while walking an Ast. 2132c67d6573Sopenharmony_ci depth: u32, 2133c67d6573Sopenharmony_ci} 2134c67d6573Sopenharmony_ci 2135c67d6573Sopenharmony_ciimpl<'p, 's, P: Borrow<Parser>> NestLimiter<'p, 's, P> { 2136c67d6573Sopenharmony_ci fn new(p: &'p ParserI<'s, P>) -> NestLimiter<'p, 's, P> { 2137c67d6573Sopenharmony_ci NestLimiter { p, depth: 0 } 2138c67d6573Sopenharmony_ci } 2139c67d6573Sopenharmony_ci 2140c67d6573Sopenharmony_ci #[inline(never)] 2141c67d6573Sopenharmony_ci fn check(self, ast: &Ast) -> Result<()> { 2142c67d6573Sopenharmony_ci ast::visit(ast, self) 2143c67d6573Sopenharmony_ci } 2144c67d6573Sopenharmony_ci 2145c67d6573Sopenharmony_ci fn increment_depth(&mut self, span: &Span) -> Result<()> { 2146c67d6573Sopenharmony_ci let new = self.depth.checked_add(1).ok_or_else(|| { 2147c67d6573Sopenharmony_ci self.p.error( 2148c67d6573Sopenharmony_ci span.clone(), 2149c67d6573Sopenharmony_ci ast::ErrorKind::NestLimitExceeded(::std::u32::MAX), 2150c67d6573Sopenharmony_ci ) 2151c67d6573Sopenharmony_ci })?; 2152c67d6573Sopenharmony_ci let limit = self.p.parser().nest_limit; 2153c67d6573Sopenharmony_ci if new > limit { 2154c67d6573Sopenharmony_ci return Err(self.p.error( 2155c67d6573Sopenharmony_ci span.clone(), 2156c67d6573Sopenharmony_ci ast::ErrorKind::NestLimitExceeded(limit), 2157c67d6573Sopenharmony_ci )); 2158c67d6573Sopenharmony_ci } 2159c67d6573Sopenharmony_ci self.depth = new; 2160c67d6573Sopenharmony_ci Ok(()) 2161c67d6573Sopenharmony_ci } 2162c67d6573Sopenharmony_ci 2163c67d6573Sopenharmony_ci fn decrement_depth(&mut self) { 2164c67d6573Sopenharmony_ci // Assuming the correctness of the visitor, this should never drop 2165c67d6573Sopenharmony_ci // below 0. 2166c67d6573Sopenharmony_ci self.depth = self.depth.checked_sub(1).unwrap(); 2167c67d6573Sopenharmony_ci } 2168c67d6573Sopenharmony_ci} 2169c67d6573Sopenharmony_ci 2170c67d6573Sopenharmony_ciimpl<'p, 's, P: Borrow<Parser>> ast::Visitor for NestLimiter<'p, 's, P> { 2171c67d6573Sopenharmony_ci type Output = (); 2172c67d6573Sopenharmony_ci type Err = ast::Error; 2173c67d6573Sopenharmony_ci 2174c67d6573Sopenharmony_ci fn finish(self) -> Result<()> { 2175c67d6573Sopenharmony_ci Ok(()) 2176c67d6573Sopenharmony_ci } 2177c67d6573Sopenharmony_ci 2178c67d6573Sopenharmony_ci fn visit_pre(&mut self, ast: &Ast) -> Result<()> { 2179c67d6573Sopenharmony_ci let span = match *ast { 2180c67d6573Sopenharmony_ci Ast::Empty(_) 2181c67d6573Sopenharmony_ci | Ast::Flags(_) 2182c67d6573Sopenharmony_ci | Ast::Literal(_) 2183c67d6573Sopenharmony_ci | Ast::Dot(_) 2184c67d6573Sopenharmony_ci | Ast::Assertion(_) 2185c67d6573Sopenharmony_ci | Ast::Class(ast::Class::Unicode(_)) 2186c67d6573Sopenharmony_ci | Ast::Class(ast::Class::Perl(_)) => { 2187c67d6573Sopenharmony_ci // These are all base cases, so we don't increment depth. 2188c67d6573Sopenharmony_ci return Ok(()); 2189c67d6573Sopenharmony_ci } 2190c67d6573Sopenharmony_ci Ast::Class(ast::Class::Bracketed(ref x)) => &x.span, 2191c67d6573Sopenharmony_ci Ast::Repetition(ref x) => &x.span, 2192c67d6573Sopenharmony_ci Ast::Group(ref x) => &x.span, 2193c67d6573Sopenharmony_ci Ast::Alternation(ref x) => &x.span, 2194c67d6573Sopenharmony_ci Ast::Concat(ref x) => &x.span, 2195c67d6573Sopenharmony_ci }; 2196c67d6573Sopenharmony_ci self.increment_depth(span) 2197c67d6573Sopenharmony_ci } 2198c67d6573Sopenharmony_ci 2199c67d6573Sopenharmony_ci fn visit_post(&mut self, ast: &Ast) -> Result<()> { 2200c67d6573Sopenharmony_ci match *ast { 2201c67d6573Sopenharmony_ci Ast::Empty(_) 2202c67d6573Sopenharmony_ci | Ast::Flags(_) 2203c67d6573Sopenharmony_ci | Ast::Literal(_) 2204c67d6573Sopenharmony_ci | Ast::Dot(_) 2205c67d6573Sopenharmony_ci | Ast::Assertion(_) 2206c67d6573Sopenharmony_ci | Ast::Class(ast::Class::Unicode(_)) 2207c67d6573Sopenharmony_ci | Ast::Class(ast::Class::Perl(_)) => { 2208c67d6573Sopenharmony_ci // These are all base cases, so we don't decrement depth. 2209c67d6573Sopenharmony_ci Ok(()) 2210c67d6573Sopenharmony_ci } 2211c67d6573Sopenharmony_ci Ast::Class(ast::Class::Bracketed(_)) 2212c67d6573Sopenharmony_ci | Ast::Repetition(_) 2213c67d6573Sopenharmony_ci | Ast::Group(_) 2214c67d6573Sopenharmony_ci | Ast::Alternation(_) 2215c67d6573Sopenharmony_ci | Ast::Concat(_) => { 2216c67d6573Sopenharmony_ci self.decrement_depth(); 2217c67d6573Sopenharmony_ci Ok(()) 2218c67d6573Sopenharmony_ci } 2219c67d6573Sopenharmony_ci } 2220c67d6573Sopenharmony_ci } 2221c67d6573Sopenharmony_ci 2222c67d6573Sopenharmony_ci fn visit_class_set_item_pre( 2223c67d6573Sopenharmony_ci &mut self, 2224c67d6573Sopenharmony_ci ast: &ast::ClassSetItem, 2225c67d6573Sopenharmony_ci ) -> Result<()> { 2226c67d6573Sopenharmony_ci let span = match *ast { 2227c67d6573Sopenharmony_ci ast::ClassSetItem::Empty(_) 2228c67d6573Sopenharmony_ci | ast::ClassSetItem::Literal(_) 2229c67d6573Sopenharmony_ci | ast::ClassSetItem::Range(_) 2230c67d6573Sopenharmony_ci | ast::ClassSetItem::Ascii(_) 2231c67d6573Sopenharmony_ci | ast::ClassSetItem::Unicode(_) 2232c67d6573Sopenharmony_ci | ast::ClassSetItem::Perl(_) => { 2233c67d6573Sopenharmony_ci // These are all base cases, so we don't increment depth. 2234c67d6573Sopenharmony_ci return Ok(()); 2235c67d6573Sopenharmony_ci } 2236c67d6573Sopenharmony_ci ast::ClassSetItem::Bracketed(ref x) => &x.span, 2237c67d6573Sopenharmony_ci ast::ClassSetItem::Union(ref x) => &x.span, 2238c67d6573Sopenharmony_ci }; 2239c67d6573Sopenharmony_ci self.increment_depth(span) 2240c67d6573Sopenharmony_ci } 2241c67d6573Sopenharmony_ci 2242c67d6573Sopenharmony_ci fn visit_class_set_item_post( 2243c67d6573Sopenharmony_ci &mut self, 2244c67d6573Sopenharmony_ci ast: &ast::ClassSetItem, 2245c67d6573Sopenharmony_ci ) -> Result<()> { 2246c67d6573Sopenharmony_ci match *ast { 2247c67d6573Sopenharmony_ci ast::ClassSetItem::Empty(_) 2248c67d6573Sopenharmony_ci | ast::ClassSetItem::Literal(_) 2249c67d6573Sopenharmony_ci | ast::ClassSetItem::Range(_) 2250c67d6573Sopenharmony_ci | ast::ClassSetItem::Ascii(_) 2251c67d6573Sopenharmony_ci | ast::ClassSetItem::Unicode(_) 2252c67d6573Sopenharmony_ci | ast::ClassSetItem::Perl(_) => { 2253c67d6573Sopenharmony_ci // These are all base cases, so we don't decrement depth. 2254c67d6573Sopenharmony_ci Ok(()) 2255c67d6573Sopenharmony_ci } 2256c67d6573Sopenharmony_ci ast::ClassSetItem::Bracketed(_) | ast::ClassSetItem::Union(_) => { 2257c67d6573Sopenharmony_ci self.decrement_depth(); 2258c67d6573Sopenharmony_ci Ok(()) 2259c67d6573Sopenharmony_ci } 2260c67d6573Sopenharmony_ci } 2261c67d6573Sopenharmony_ci } 2262c67d6573Sopenharmony_ci 2263c67d6573Sopenharmony_ci fn visit_class_set_binary_op_pre( 2264c67d6573Sopenharmony_ci &mut self, 2265c67d6573Sopenharmony_ci ast: &ast::ClassSetBinaryOp, 2266c67d6573Sopenharmony_ci ) -> Result<()> { 2267c67d6573Sopenharmony_ci self.increment_depth(&ast.span) 2268c67d6573Sopenharmony_ci } 2269c67d6573Sopenharmony_ci 2270c67d6573Sopenharmony_ci fn visit_class_set_binary_op_post( 2271c67d6573Sopenharmony_ci &mut self, 2272c67d6573Sopenharmony_ci _ast: &ast::ClassSetBinaryOp, 2273c67d6573Sopenharmony_ci ) -> Result<()> { 2274c67d6573Sopenharmony_ci self.decrement_depth(); 2275c67d6573Sopenharmony_ci Ok(()) 2276c67d6573Sopenharmony_ci } 2277c67d6573Sopenharmony_ci} 2278c67d6573Sopenharmony_ci 2279c67d6573Sopenharmony_ci/// When the result is an error, transforms the ast::ErrorKind from the source 2280c67d6573Sopenharmony_ci/// Result into another one. This function is used to return clearer error 2281c67d6573Sopenharmony_ci/// messages when possible. 2282c67d6573Sopenharmony_cifn specialize_err<T>( 2283c67d6573Sopenharmony_ci result: Result<T>, 2284c67d6573Sopenharmony_ci from: ast::ErrorKind, 2285c67d6573Sopenharmony_ci to: ast::ErrorKind, 2286c67d6573Sopenharmony_ci) -> Result<T> { 2287c67d6573Sopenharmony_ci if let Err(e) = result { 2288c67d6573Sopenharmony_ci if e.kind == from { 2289c67d6573Sopenharmony_ci Err(ast::Error { kind: to, pattern: e.pattern, span: e.span }) 2290c67d6573Sopenharmony_ci } else { 2291c67d6573Sopenharmony_ci Err(e) 2292c67d6573Sopenharmony_ci } 2293c67d6573Sopenharmony_ci } else { 2294c67d6573Sopenharmony_ci result 2295c67d6573Sopenharmony_ci } 2296c67d6573Sopenharmony_ci} 2297c67d6573Sopenharmony_ci 2298c67d6573Sopenharmony_ci#[cfg(test)] 2299c67d6573Sopenharmony_cimod tests { 2300c67d6573Sopenharmony_ci use std::ops::Range; 2301c67d6573Sopenharmony_ci 2302c67d6573Sopenharmony_ci use super::{Parser, ParserBuilder, ParserI, Primitive}; 2303c67d6573Sopenharmony_ci use crate::ast::{self, Ast, Position, Span}; 2304c67d6573Sopenharmony_ci 2305c67d6573Sopenharmony_ci // Our own assert_eq, which has slightly better formatting (but honestly 2306c67d6573Sopenharmony_ci // still kind of crappy). 2307c67d6573Sopenharmony_ci macro_rules! assert_eq { 2308c67d6573Sopenharmony_ci ($left:expr, $right:expr) => {{ 2309c67d6573Sopenharmony_ci match (&$left, &$right) { 2310c67d6573Sopenharmony_ci (left_val, right_val) => { 2311c67d6573Sopenharmony_ci if !(*left_val == *right_val) { 2312c67d6573Sopenharmony_ci panic!( 2313c67d6573Sopenharmony_ci "assertion failed: `(left == right)`\n\n\ 2314c67d6573Sopenharmony_ci left: `{:?}`\nright: `{:?}`\n\n", 2315c67d6573Sopenharmony_ci left_val, right_val 2316c67d6573Sopenharmony_ci ) 2317c67d6573Sopenharmony_ci } 2318c67d6573Sopenharmony_ci } 2319c67d6573Sopenharmony_ci } 2320c67d6573Sopenharmony_ci }}; 2321c67d6573Sopenharmony_ci } 2322c67d6573Sopenharmony_ci 2323c67d6573Sopenharmony_ci // We create these errors to compare with real ast::Errors in the tests. 2324c67d6573Sopenharmony_ci // We define equality between TestError and ast::Error to disregard the 2325c67d6573Sopenharmony_ci // pattern string in ast::Error, which is annoying to provide in tests. 2326c67d6573Sopenharmony_ci #[derive(Clone, Debug)] 2327c67d6573Sopenharmony_ci struct TestError { 2328c67d6573Sopenharmony_ci span: Span, 2329c67d6573Sopenharmony_ci kind: ast::ErrorKind, 2330c67d6573Sopenharmony_ci } 2331c67d6573Sopenharmony_ci 2332c67d6573Sopenharmony_ci impl PartialEq<ast::Error> for TestError { 2333c67d6573Sopenharmony_ci fn eq(&self, other: &ast::Error) -> bool { 2334c67d6573Sopenharmony_ci self.span == other.span && self.kind == other.kind 2335c67d6573Sopenharmony_ci } 2336c67d6573Sopenharmony_ci } 2337c67d6573Sopenharmony_ci 2338c67d6573Sopenharmony_ci impl PartialEq<TestError> for ast::Error { 2339c67d6573Sopenharmony_ci fn eq(&self, other: &TestError) -> bool { 2340c67d6573Sopenharmony_ci self.span == other.span && self.kind == other.kind 2341c67d6573Sopenharmony_ci } 2342c67d6573Sopenharmony_ci } 2343c67d6573Sopenharmony_ci 2344c67d6573Sopenharmony_ci fn s(str: &str) -> String { 2345c67d6573Sopenharmony_ci str.to_string() 2346c67d6573Sopenharmony_ci } 2347c67d6573Sopenharmony_ci 2348c67d6573Sopenharmony_ci fn parser(pattern: &str) -> ParserI<'_, Parser> { 2349c67d6573Sopenharmony_ci ParserI::new(Parser::new(), pattern) 2350c67d6573Sopenharmony_ci } 2351c67d6573Sopenharmony_ci 2352c67d6573Sopenharmony_ci fn parser_octal(pattern: &str) -> ParserI<'_, Parser> { 2353c67d6573Sopenharmony_ci let parser = ParserBuilder::new().octal(true).build(); 2354c67d6573Sopenharmony_ci ParserI::new(parser, pattern) 2355c67d6573Sopenharmony_ci } 2356c67d6573Sopenharmony_ci 2357c67d6573Sopenharmony_ci fn parser_nest_limit( 2358c67d6573Sopenharmony_ci pattern: &str, 2359c67d6573Sopenharmony_ci nest_limit: u32, 2360c67d6573Sopenharmony_ci ) -> ParserI<'_, Parser> { 2361c67d6573Sopenharmony_ci let p = ParserBuilder::new().nest_limit(nest_limit).build(); 2362c67d6573Sopenharmony_ci ParserI::new(p, pattern) 2363c67d6573Sopenharmony_ci } 2364c67d6573Sopenharmony_ci 2365c67d6573Sopenharmony_ci fn parser_ignore_whitespace(pattern: &str) -> ParserI<'_, Parser> { 2366c67d6573Sopenharmony_ci let p = ParserBuilder::new().ignore_whitespace(true).build(); 2367c67d6573Sopenharmony_ci ParserI::new(p, pattern) 2368c67d6573Sopenharmony_ci } 2369c67d6573Sopenharmony_ci 2370c67d6573Sopenharmony_ci /// Short alias for creating a new span. 2371c67d6573Sopenharmony_ci fn nspan(start: Position, end: Position) -> Span { 2372c67d6573Sopenharmony_ci Span::new(start, end) 2373c67d6573Sopenharmony_ci } 2374c67d6573Sopenharmony_ci 2375c67d6573Sopenharmony_ci /// Short alias for creating a new position. 2376c67d6573Sopenharmony_ci fn npos(offset: usize, line: usize, column: usize) -> Position { 2377c67d6573Sopenharmony_ci Position::new(offset, line, column) 2378c67d6573Sopenharmony_ci } 2379c67d6573Sopenharmony_ci 2380c67d6573Sopenharmony_ci /// Create a new span from the given offset range. This assumes a single 2381c67d6573Sopenharmony_ci /// line and sets the columns based on the offsets. i.e., This only works 2382c67d6573Sopenharmony_ci /// out of the box for ASCII, which is fine for most tests. 2383c67d6573Sopenharmony_ci fn span(range: Range<usize>) -> Span { 2384c67d6573Sopenharmony_ci let start = Position::new(range.start, 1, range.start + 1); 2385c67d6573Sopenharmony_ci let end = Position::new(range.end, 1, range.end + 1); 2386c67d6573Sopenharmony_ci Span::new(start, end) 2387c67d6573Sopenharmony_ci } 2388c67d6573Sopenharmony_ci 2389c67d6573Sopenharmony_ci /// Create a new span for the corresponding byte range in the given string. 2390c67d6573Sopenharmony_ci fn span_range(subject: &str, range: Range<usize>) -> Span { 2391c67d6573Sopenharmony_ci let start = Position { 2392c67d6573Sopenharmony_ci offset: range.start, 2393c67d6573Sopenharmony_ci line: 1 + subject[..range.start].matches('\n').count(), 2394c67d6573Sopenharmony_ci column: 1 + subject[..range.start] 2395c67d6573Sopenharmony_ci .chars() 2396c67d6573Sopenharmony_ci .rev() 2397c67d6573Sopenharmony_ci .position(|c| c == '\n') 2398c67d6573Sopenharmony_ci .unwrap_or(subject[..range.start].chars().count()), 2399c67d6573Sopenharmony_ci }; 2400c67d6573Sopenharmony_ci let end = Position { 2401c67d6573Sopenharmony_ci offset: range.end, 2402c67d6573Sopenharmony_ci line: 1 + subject[..range.end].matches('\n').count(), 2403c67d6573Sopenharmony_ci column: 1 + subject[..range.end] 2404c67d6573Sopenharmony_ci .chars() 2405c67d6573Sopenharmony_ci .rev() 2406c67d6573Sopenharmony_ci .position(|c| c == '\n') 2407c67d6573Sopenharmony_ci .unwrap_or(subject[..range.end].chars().count()), 2408c67d6573Sopenharmony_ci }; 2409c67d6573Sopenharmony_ci Span::new(start, end) 2410c67d6573Sopenharmony_ci } 2411c67d6573Sopenharmony_ci 2412c67d6573Sopenharmony_ci /// Create a verbatim literal starting at the given position. 2413c67d6573Sopenharmony_ci fn lit(c: char, start: usize) -> Ast { 2414c67d6573Sopenharmony_ci lit_with(c, span(start..start + c.len_utf8())) 2415c67d6573Sopenharmony_ci } 2416c67d6573Sopenharmony_ci 2417c67d6573Sopenharmony_ci /// Create a punctuation literal starting at the given position. 2418c67d6573Sopenharmony_ci fn punct_lit(c: char, span: Span) -> Ast { 2419c67d6573Sopenharmony_ci Ast::Literal(ast::Literal { 2420c67d6573Sopenharmony_ci span, 2421c67d6573Sopenharmony_ci kind: ast::LiteralKind::Punctuation, 2422c67d6573Sopenharmony_ci c, 2423c67d6573Sopenharmony_ci }) 2424c67d6573Sopenharmony_ci } 2425c67d6573Sopenharmony_ci 2426c67d6573Sopenharmony_ci /// Create a verbatim literal with the given span. 2427c67d6573Sopenharmony_ci fn lit_with(c: char, span: Span) -> Ast { 2428c67d6573Sopenharmony_ci Ast::Literal(ast::Literal { 2429c67d6573Sopenharmony_ci span, 2430c67d6573Sopenharmony_ci kind: ast::LiteralKind::Verbatim, 2431c67d6573Sopenharmony_ci c, 2432c67d6573Sopenharmony_ci }) 2433c67d6573Sopenharmony_ci } 2434c67d6573Sopenharmony_ci 2435c67d6573Sopenharmony_ci /// Create a concatenation with the given range. 2436c67d6573Sopenharmony_ci fn concat(range: Range<usize>, asts: Vec<Ast>) -> Ast { 2437c67d6573Sopenharmony_ci concat_with(span(range), asts) 2438c67d6573Sopenharmony_ci } 2439c67d6573Sopenharmony_ci 2440c67d6573Sopenharmony_ci /// Create a concatenation with the given span. 2441c67d6573Sopenharmony_ci fn concat_with(span: Span, asts: Vec<Ast>) -> Ast { 2442c67d6573Sopenharmony_ci Ast::Concat(ast::Concat { span, asts }) 2443c67d6573Sopenharmony_ci } 2444c67d6573Sopenharmony_ci 2445c67d6573Sopenharmony_ci /// Create an alternation with the given span. 2446c67d6573Sopenharmony_ci fn alt(range: Range<usize>, asts: Vec<Ast>) -> Ast { 2447c67d6573Sopenharmony_ci Ast::Alternation(ast::Alternation { span: span(range), asts }) 2448c67d6573Sopenharmony_ci } 2449c67d6573Sopenharmony_ci 2450c67d6573Sopenharmony_ci /// Create a capturing group with the given span. 2451c67d6573Sopenharmony_ci fn group(range: Range<usize>, index: u32, ast: Ast) -> Ast { 2452c67d6573Sopenharmony_ci Ast::Group(ast::Group { 2453c67d6573Sopenharmony_ci span: span(range), 2454c67d6573Sopenharmony_ci kind: ast::GroupKind::CaptureIndex(index), 2455c67d6573Sopenharmony_ci ast: Box::new(ast), 2456c67d6573Sopenharmony_ci }) 2457c67d6573Sopenharmony_ci } 2458c67d6573Sopenharmony_ci 2459c67d6573Sopenharmony_ci /// Create an ast::SetFlags. 2460c67d6573Sopenharmony_ci /// 2461c67d6573Sopenharmony_ci /// The given pattern should be the full pattern string. The range given 2462c67d6573Sopenharmony_ci /// should correspond to the byte offsets where the flag set occurs. 2463c67d6573Sopenharmony_ci /// 2464c67d6573Sopenharmony_ci /// If negated is true, then the set is interpreted as beginning with a 2465c67d6573Sopenharmony_ci /// negation. 2466c67d6573Sopenharmony_ci fn flag_set( 2467c67d6573Sopenharmony_ci pat: &str, 2468c67d6573Sopenharmony_ci range: Range<usize>, 2469c67d6573Sopenharmony_ci flag: ast::Flag, 2470c67d6573Sopenharmony_ci negated: bool, 2471c67d6573Sopenharmony_ci ) -> Ast { 2472c67d6573Sopenharmony_ci let mut items = vec![ast::FlagsItem { 2473c67d6573Sopenharmony_ci span: span_range(pat, (range.end - 2)..(range.end - 1)), 2474c67d6573Sopenharmony_ci kind: ast::FlagsItemKind::Flag(flag), 2475c67d6573Sopenharmony_ci }]; 2476c67d6573Sopenharmony_ci if negated { 2477c67d6573Sopenharmony_ci items.insert( 2478c67d6573Sopenharmony_ci 0, 2479c67d6573Sopenharmony_ci ast::FlagsItem { 2480c67d6573Sopenharmony_ci span: span_range(pat, (range.start + 2)..(range.end - 2)), 2481c67d6573Sopenharmony_ci kind: ast::FlagsItemKind::Negation, 2482c67d6573Sopenharmony_ci }, 2483c67d6573Sopenharmony_ci ); 2484c67d6573Sopenharmony_ci } 2485c67d6573Sopenharmony_ci Ast::Flags(ast::SetFlags { 2486c67d6573Sopenharmony_ci span: span_range(pat, range.clone()), 2487c67d6573Sopenharmony_ci flags: ast::Flags { 2488c67d6573Sopenharmony_ci span: span_range(pat, (range.start + 2)..(range.end - 1)), 2489c67d6573Sopenharmony_ci items, 2490c67d6573Sopenharmony_ci }, 2491c67d6573Sopenharmony_ci }) 2492c67d6573Sopenharmony_ci } 2493c67d6573Sopenharmony_ci 2494c67d6573Sopenharmony_ci #[test] 2495c67d6573Sopenharmony_ci fn parse_nest_limit() { 2496c67d6573Sopenharmony_ci // A nest limit of 0 still allows some types of regexes. 2497c67d6573Sopenharmony_ci assert_eq!( 2498c67d6573Sopenharmony_ci parser_nest_limit("", 0).parse(), 2499c67d6573Sopenharmony_ci Ok(Ast::Empty(span(0..0))) 2500c67d6573Sopenharmony_ci ); 2501c67d6573Sopenharmony_ci assert_eq!(parser_nest_limit("a", 0).parse(), Ok(lit('a', 0))); 2502c67d6573Sopenharmony_ci 2503c67d6573Sopenharmony_ci // Test repetition operations, which require one level of nesting. 2504c67d6573Sopenharmony_ci assert_eq!( 2505c67d6573Sopenharmony_ci parser_nest_limit("a+", 0).parse().unwrap_err(), 2506c67d6573Sopenharmony_ci TestError { 2507c67d6573Sopenharmony_ci span: span(0..2), 2508c67d6573Sopenharmony_ci kind: ast::ErrorKind::NestLimitExceeded(0), 2509c67d6573Sopenharmony_ci } 2510c67d6573Sopenharmony_ci ); 2511c67d6573Sopenharmony_ci assert_eq!( 2512c67d6573Sopenharmony_ci parser_nest_limit("a+", 1).parse(), 2513c67d6573Sopenharmony_ci Ok(Ast::Repetition(ast::Repetition { 2514c67d6573Sopenharmony_ci span: span(0..2), 2515c67d6573Sopenharmony_ci op: ast::RepetitionOp { 2516c67d6573Sopenharmony_ci span: span(1..2), 2517c67d6573Sopenharmony_ci kind: ast::RepetitionKind::OneOrMore, 2518c67d6573Sopenharmony_ci }, 2519c67d6573Sopenharmony_ci greedy: true, 2520c67d6573Sopenharmony_ci ast: Box::new(lit('a', 0)), 2521c67d6573Sopenharmony_ci })) 2522c67d6573Sopenharmony_ci ); 2523c67d6573Sopenharmony_ci assert_eq!( 2524c67d6573Sopenharmony_ci parser_nest_limit("(a)+", 1).parse().unwrap_err(), 2525c67d6573Sopenharmony_ci TestError { 2526c67d6573Sopenharmony_ci span: span(0..3), 2527c67d6573Sopenharmony_ci kind: ast::ErrorKind::NestLimitExceeded(1), 2528c67d6573Sopenharmony_ci } 2529c67d6573Sopenharmony_ci ); 2530c67d6573Sopenharmony_ci assert_eq!( 2531c67d6573Sopenharmony_ci parser_nest_limit("a+*", 1).parse().unwrap_err(), 2532c67d6573Sopenharmony_ci TestError { 2533c67d6573Sopenharmony_ci span: span(0..2), 2534c67d6573Sopenharmony_ci kind: ast::ErrorKind::NestLimitExceeded(1), 2535c67d6573Sopenharmony_ci } 2536c67d6573Sopenharmony_ci ); 2537c67d6573Sopenharmony_ci assert_eq!( 2538c67d6573Sopenharmony_ci parser_nest_limit("a+*", 2).parse(), 2539c67d6573Sopenharmony_ci Ok(Ast::Repetition(ast::Repetition { 2540c67d6573Sopenharmony_ci span: span(0..3), 2541c67d6573Sopenharmony_ci op: ast::RepetitionOp { 2542c67d6573Sopenharmony_ci span: span(2..3), 2543c67d6573Sopenharmony_ci kind: ast::RepetitionKind::ZeroOrMore, 2544c67d6573Sopenharmony_ci }, 2545c67d6573Sopenharmony_ci greedy: true, 2546c67d6573Sopenharmony_ci ast: Box::new(Ast::Repetition(ast::Repetition { 2547c67d6573Sopenharmony_ci span: span(0..2), 2548c67d6573Sopenharmony_ci op: ast::RepetitionOp { 2549c67d6573Sopenharmony_ci span: span(1..2), 2550c67d6573Sopenharmony_ci kind: ast::RepetitionKind::OneOrMore, 2551c67d6573Sopenharmony_ci }, 2552c67d6573Sopenharmony_ci greedy: true, 2553c67d6573Sopenharmony_ci ast: Box::new(lit('a', 0)), 2554c67d6573Sopenharmony_ci })), 2555c67d6573Sopenharmony_ci })) 2556c67d6573Sopenharmony_ci ); 2557c67d6573Sopenharmony_ci 2558c67d6573Sopenharmony_ci // Test concatenations. A concatenation requires one level of nesting. 2559c67d6573Sopenharmony_ci assert_eq!( 2560c67d6573Sopenharmony_ci parser_nest_limit("ab", 0).parse().unwrap_err(), 2561c67d6573Sopenharmony_ci TestError { 2562c67d6573Sopenharmony_ci span: span(0..2), 2563c67d6573Sopenharmony_ci kind: ast::ErrorKind::NestLimitExceeded(0), 2564c67d6573Sopenharmony_ci } 2565c67d6573Sopenharmony_ci ); 2566c67d6573Sopenharmony_ci assert_eq!( 2567c67d6573Sopenharmony_ci parser_nest_limit("ab", 1).parse(), 2568c67d6573Sopenharmony_ci Ok(concat(0..2, vec![lit('a', 0), lit('b', 1)])) 2569c67d6573Sopenharmony_ci ); 2570c67d6573Sopenharmony_ci assert_eq!( 2571c67d6573Sopenharmony_ci parser_nest_limit("abc", 1).parse(), 2572c67d6573Sopenharmony_ci Ok(concat(0..3, vec![lit('a', 0), lit('b', 1), lit('c', 2)])) 2573c67d6573Sopenharmony_ci ); 2574c67d6573Sopenharmony_ci 2575c67d6573Sopenharmony_ci // Test alternations. An alternation requires one level of nesting. 2576c67d6573Sopenharmony_ci assert_eq!( 2577c67d6573Sopenharmony_ci parser_nest_limit("a|b", 0).parse().unwrap_err(), 2578c67d6573Sopenharmony_ci TestError { 2579c67d6573Sopenharmony_ci span: span(0..3), 2580c67d6573Sopenharmony_ci kind: ast::ErrorKind::NestLimitExceeded(0), 2581c67d6573Sopenharmony_ci } 2582c67d6573Sopenharmony_ci ); 2583c67d6573Sopenharmony_ci assert_eq!( 2584c67d6573Sopenharmony_ci parser_nest_limit("a|b", 1).parse(), 2585c67d6573Sopenharmony_ci Ok(alt(0..3, vec![lit('a', 0), lit('b', 2)])) 2586c67d6573Sopenharmony_ci ); 2587c67d6573Sopenharmony_ci assert_eq!( 2588c67d6573Sopenharmony_ci parser_nest_limit("a|b|c", 1).parse(), 2589c67d6573Sopenharmony_ci Ok(alt(0..5, vec![lit('a', 0), lit('b', 2), lit('c', 4)])) 2590c67d6573Sopenharmony_ci ); 2591c67d6573Sopenharmony_ci 2592c67d6573Sopenharmony_ci // Test character classes. Classes form their own mini-recursive 2593c67d6573Sopenharmony_ci // syntax! 2594c67d6573Sopenharmony_ci assert_eq!( 2595c67d6573Sopenharmony_ci parser_nest_limit("[a]", 0).parse().unwrap_err(), 2596c67d6573Sopenharmony_ci TestError { 2597c67d6573Sopenharmony_ci span: span(0..3), 2598c67d6573Sopenharmony_ci kind: ast::ErrorKind::NestLimitExceeded(0), 2599c67d6573Sopenharmony_ci } 2600c67d6573Sopenharmony_ci ); 2601c67d6573Sopenharmony_ci assert_eq!( 2602c67d6573Sopenharmony_ci parser_nest_limit("[a]", 1).parse(), 2603c67d6573Sopenharmony_ci Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { 2604c67d6573Sopenharmony_ci span: span(0..3), 2605c67d6573Sopenharmony_ci negated: false, 2606c67d6573Sopenharmony_ci kind: ast::ClassSet::Item(ast::ClassSetItem::Literal( 2607c67d6573Sopenharmony_ci ast::Literal { 2608c67d6573Sopenharmony_ci span: span(1..2), 2609c67d6573Sopenharmony_ci kind: ast::LiteralKind::Verbatim, 2610c67d6573Sopenharmony_ci c: 'a', 2611c67d6573Sopenharmony_ci } 2612c67d6573Sopenharmony_ci )), 2613c67d6573Sopenharmony_ci }))) 2614c67d6573Sopenharmony_ci ); 2615c67d6573Sopenharmony_ci assert_eq!( 2616c67d6573Sopenharmony_ci parser_nest_limit("[ab]", 1).parse().unwrap_err(), 2617c67d6573Sopenharmony_ci TestError { 2618c67d6573Sopenharmony_ci span: span(1..3), 2619c67d6573Sopenharmony_ci kind: ast::ErrorKind::NestLimitExceeded(1), 2620c67d6573Sopenharmony_ci } 2621c67d6573Sopenharmony_ci ); 2622c67d6573Sopenharmony_ci assert_eq!( 2623c67d6573Sopenharmony_ci parser_nest_limit("[ab[cd]]", 2).parse().unwrap_err(), 2624c67d6573Sopenharmony_ci TestError { 2625c67d6573Sopenharmony_ci span: span(3..7), 2626c67d6573Sopenharmony_ci kind: ast::ErrorKind::NestLimitExceeded(2), 2627c67d6573Sopenharmony_ci } 2628c67d6573Sopenharmony_ci ); 2629c67d6573Sopenharmony_ci assert_eq!( 2630c67d6573Sopenharmony_ci parser_nest_limit("[ab[cd]]", 3).parse().unwrap_err(), 2631c67d6573Sopenharmony_ci TestError { 2632c67d6573Sopenharmony_ci span: span(4..6), 2633c67d6573Sopenharmony_ci kind: ast::ErrorKind::NestLimitExceeded(3), 2634c67d6573Sopenharmony_ci } 2635c67d6573Sopenharmony_ci ); 2636c67d6573Sopenharmony_ci assert_eq!( 2637c67d6573Sopenharmony_ci parser_nest_limit("[a--b]", 1).parse().unwrap_err(), 2638c67d6573Sopenharmony_ci TestError { 2639c67d6573Sopenharmony_ci span: span(1..5), 2640c67d6573Sopenharmony_ci kind: ast::ErrorKind::NestLimitExceeded(1), 2641c67d6573Sopenharmony_ci } 2642c67d6573Sopenharmony_ci ); 2643c67d6573Sopenharmony_ci assert_eq!( 2644c67d6573Sopenharmony_ci parser_nest_limit("[a--bc]", 2).parse().unwrap_err(), 2645c67d6573Sopenharmony_ci TestError { 2646c67d6573Sopenharmony_ci span: span(4..6), 2647c67d6573Sopenharmony_ci kind: ast::ErrorKind::NestLimitExceeded(2), 2648c67d6573Sopenharmony_ci } 2649c67d6573Sopenharmony_ci ); 2650c67d6573Sopenharmony_ci } 2651c67d6573Sopenharmony_ci 2652c67d6573Sopenharmony_ci #[test] 2653c67d6573Sopenharmony_ci fn parse_comments() { 2654c67d6573Sopenharmony_ci let pat = "(?x) 2655c67d6573Sopenharmony_ci# This is comment 1. 2656c67d6573Sopenharmony_cifoo # This is comment 2. 2657c67d6573Sopenharmony_ci # This is comment 3. 2658c67d6573Sopenharmony_cibar 2659c67d6573Sopenharmony_ci# This is comment 4."; 2660c67d6573Sopenharmony_ci let astc = parser(pat).parse_with_comments().unwrap(); 2661c67d6573Sopenharmony_ci assert_eq!( 2662c67d6573Sopenharmony_ci astc.ast, 2663c67d6573Sopenharmony_ci concat_with( 2664c67d6573Sopenharmony_ci span_range(pat, 0..pat.len()), 2665c67d6573Sopenharmony_ci vec![ 2666c67d6573Sopenharmony_ci flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false), 2667c67d6573Sopenharmony_ci lit_with('f', span_range(pat, 26..27)), 2668c67d6573Sopenharmony_ci lit_with('o', span_range(pat, 27..28)), 2669c67d6573Sopenharmony_ci lit_with('o', span_range(pat, 28..29)), 2670c67d6573Sopenharmony_ci lit_with('b', span_range(pat, 74..75)), 2671c67d6573Sopenharmony_ci lit_with('a', span_range(pat, 75..76)), 2672c67d6573Sopenharmony_ci lit_with('r', span_range(pat, 76..77)), 2673c67d6573Sopenharmony_ci ] 2674c67d6573Sopenharmony_ci ) 2675c67d6573Sopenharmony_ci ); 2676c67d6573Sopenharmony_ci assert_eq!( 2677c67d6573Sopenharmony_ci astc.comments, 2678c67d6573Sopenharmony_ci vec![ 2679c67d6573Sopenharmony_ci ast::Comment { 2680c67d6573Sopenharmony_ci span: span_range(pat, 5..26), 2681c67d6573Sopenharmony_ci comment: s(" This is comment 1."), 2682c67d6573Sopenharmony_ci }, 2683c67d6573Sopenharmony_ci ast::Comment { 2684c67d6573Sopenharmony_ci span: span_range(pat, 30..51), 2685c67d6573Sopenharmony_ci comment: s(" This is comment 2."), 2686c67d6573Sopenharmony_ci }, 2687c67d6573Sopenharmony_ci ast::Comment { 2688c67d6573Sopenharmony_ci span: span_range(pat, 53..74), 2689c67d6573Sopenharmony_ci comment: s(" This is comment 3."), 2690c67d6573Sopenharmony_ci }, 2691c67d6573Sopenharmony_ci ast::Comment { 2692c67d6573Sopenharmony_ci span: span_range(pat, 78..98), 2693c67d6573Sopenharmony_ci comment: s(" This is comment 4."), 2694c67d6573Sopenharmony_ci }, 2695c67d6573Sopenharmony_ci ] 2696c67d6573Sopenharmony_ci ); 2697c67d6573Sopenharmony_ci } 2698c67d6573Sopenharmony_ci 2699c67d6573Sopenharmony_ci #[test] 2700c67d6573Sopenharmony_ci fn parse_holistic() { 2701c67d6573Sopenharmony_ci assert_eq!(parser("]").parse(), Ok(lit(']', 0))); 2702c67d6573Sopenharmony_ci assert_eq!( 2703c67d6573Sopenharmony_ci parser(r"\\\.\+\*\?\(\)\|\[\]\{\}\^\$\#\&\-\~").parse(), 2704c67d6573Sopenharmony_ci Ok(concat( 2705c67d6573Sopenharmony_ci 0..36, 2706c67d6573Sopenharmony_ci vec![ 2707c67d6573Sopenharmony_ci punct_lit('\\', span(0..2)), 2708c67d6573Sopenharmony_ci punct_lit('.', span(2..4)), 2709c67d6573Sopenharmony_ci punct_lit('+', span(4..6)), 2710c67d6573Sopenharmony_ci punct_lit('*', span(6..8)), 2711c67d6573Sopenharmony_ci punct_lit('?', span(8..10)), 2712c67d6573Sopenharmony_ci punct_lit('(', span(10..12)), 2713c67d6573Sopenharmony_ci punct_lit(')', span(12..14)), 2714c67d6573Sopenharmony_ci punct_lit('|', span(14..16)), 2715c67d6573Sopenharmony_ci punct_lit('[', span(16..18)), 2716c67d6573Sopenharmony_ci punct_lit(']', span(18..20)), 2717c67d6573Sopenharmony_ci punct_lit('{', span(20..22)), 2718c67d6573Sopenharmony_ci punct_lit('}', span(22..24)), 2719c67d6573Sopenharmony_ci punct_lit('^', span(24..26)), 2720c67d6573Sopenharmony_ci punct_lit('$', span(26..28)), 2721c67d6573Sopenharmony_ci punct_lit('#', span(28..30)), 2722c67d6573Sopenharmony_ci punct_lit('&', span(30..32)), 2723c67d6573Sopenharmony_ci punct_lit('-', span(32..34)), 2724c67d6573Sopenharmony_ci punct_lit('~', span(34..36)), 2725c67d6573Sopenharmony_ci ] 2726c67d6573Sopenharmony_ci )) 2727c67d6573Sopenharmony_ci ); 2728c67d6573Sopenharmony_ci } 2729c67d6573Sopenharmony_ci 2730c67d6573Sopenharmony_ci #[test] 2731c67d6573Sopenharmony_ci fn parse_ignore_whitespace() { 2732c67d6573Sopenharmony_ci // Test that basic whitespace insensitivity works. 2733c67d6573Sopenharmony_ci let pat = "(?x)a b"; 2734c67d6573Sopenharmony_ci assert_eq!( 2735c67d6573Sopenharmony_ci parser(pat).parse(), 2736c67d6573Sopenharmony_ci Ok(concat_with( 2737c67d6573Sopenharmony_ci nspan(npos(0, 1, 1), npos(7, 1, 8)), 2738c67d6573Sopenharmony_ci vec![ 2739c67d6573Sopenharmony_ci flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false), 2740c67d6573Sopenharmony_ci lit_with('a', nspan(npos(4, 1, 5), npos(5, 1, 6))), 2741c67d6573Sopenharmony_ci lit_with('b', nspan(npos(6, 1, 7), npos(7, 1, 8))), 2742c67d6573Sopenharmony_ci ] 2743c67d6573Sopenharmony_ci )) 2744c67d6573Sopenharmony_ci ); 2745c67d6573Sopenharmony_ci 2746c67d6573Sopenharmony_ci // Test that we can toggle whitespace insensitivity. 2747c67d6573Sopenharmony_ci let pat = "(?x)a b(?-x)a b"; 2748c67d6573Sopenharmony_ci assert_eq!( 2749c67d6573Sopenharmony_ci parser(pat).parse(), 2750c67d6573Sopenharmony_ci Ok(concat_with( 2751c67d6573Sopenharmony_ci nspan(npos(0, 1, 1), npos(15, 1, 16)), 2752c67d6573Sopenharmony_ci vec![ 2753c67d6573Sopenharmony_ci flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false), 2754c67d6573Sopenharmony_ci lit_with('a', nspan(npos(4, 1, 5), npos(5, 1, 6))), 2755c67d6573Sopenharmony_ci lit_with('b', nspan(npos(6, 1, 7), npos(7, 1, 8))), 2756c67d6573Sopenharmony_ci flag_set(pat, 7..12, ast::Flag::IgnoreWhitespace, true), 2757c67d6573Sopenharmony_ci lit_with('a', nspan(npos(12, 1, 13), npos(13, 1, 14))), 2758c67d6573Sopenharmony_ci lit_with(' ', nspan(npos(13, 1, 14), npos(14, 1, 15))), 2759c67d6573Sopenharmony_ci lit_with('b', nspan(npos(14, 1, 15), npos(15, 1, 16))), 2760c67d6573Sopenharmony_ci ] 2761c67d6573Sopenharmony_ci )) 2762c67d6573Sopenharmony_ci ); 2763c67d6573Sopenharmony_ci 2764c67d6573Sopenharmony_ci // Test that nesting whitespace insensitive flags works. 2765c67d6573Sopenharmony_ci let pat = "a (?x:a )a "; 2766c67d6573Sopenharmony_ci assert_eq!( 2767c67d6573Sopenharmony_ci parser(pat).parse(), 2768c67d6573Sopenharmony_ci Ok(concat_with( 2769c67d6573Sopenharmony_ci span_range(pat, 0..11), 2770c67d6573Sopenharmony_ci vec![ 2771c67d6573Sopenharmony_ci lit_with('a', span_range(pat, 0..1)), 2772c67d6573Sopenharmony_ci lit_with(' ', span_range(pat, 1..2)), 2773c67d6573Sopenharmony_ci Ast::Group(ast::Group { 2774c67d6573Sopenharmony_ci span: span_range(pat, 2..9), 2775c67d6573Sopenharmony_ci kind: ast::GroupKind::NonCapturing(ast::Flags { 2776c67d6573Sopenharmony_ci span: span_range(pat, 4..5), 2777c67d6573Sopenharmony_ci items: vec![ast::FlagsItem { 2778c67d6573Sopenharmony_ci span: span_range(pat, 4..5), 2779c67d6573Sopenharmony_ci kind: ast::FlagsItemKind::Flag( 2780c67d6573Sopenharmony_ci ast::Flag::IgnoreWhitespace 2781c67d6573Sopenharmony_ci ), 2782c67d6573Sopenharmony_ci },], 2783c67d6573Sopenharmony_ci }), 2784c67d6573Sopenharmony_ci ast: Box::new(lit_with('a', span_range(pat, 6..7))), 2785c67d6573Sopenharmony_ci }), 2786c67d6573Sopenharmony_ci lit_with('a', span_range(pat, 9..10)), 2787c67d6573Sopenharmony_ci lit_with(' ', span_range(pat, 10..11)), 2788c67d6573Sopenharmony_ci ] 2789c67d6573Sopenharmony_ci )) 2790c67d6573Sopenharmony_ci ); 2791c67d6573Sopenharmony_ci 2792c67d6573Sopenharmony_ci // Test that whitespace after an opening paren is insignificant. 2793c67d6573Sopenharmony_ci let pat = "(?x)( ?P<foo> a )"; 2794c67d6573Sopenharmony_ci assert_eq!( 2795c67d6573Sopenharmony_ci parser(pat).parse(), 2796c67d6573Sopenharmony_ci Ok(concat_with( 2797c67d6573Sopenharmony_ci span_range(pat, 0..pat.len()), 2798c67d6573Sopenharmony_ci vec![ 2799c67d6573Sopenharmony_ci flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false), 2800c67d6573Sopenharmony_ci Ast::Group(ast::Group { 2801c67d6573Sopenharmony_ci span: span_range(pat, 4..pat.len()), 2802c67d6573Sopenharmony_ci kind: ast::GroupKind::CaptureName(ast::CaptureName { 2803c67d6573Sopenharmony_ci span: span_range(pat, 9..12), 2804c67d6573Sopenharmony_ci name: s("foo"), 2805c67d6573Sopenharmony_ci index: 1, 2806c67d6573Sopenharmony_ci }), 2807c67d6573Sopenharmony_ci ast: Box::new(lit_with('a', span_range(pat, 14..15))), 2808c67d6573Sopenharmony_ci }), 2809c67d6573Sopenharmony_ci ] 2810c67d6573Sopenharmony_ci )) 2811c67d6573Sopenharmony_ci ); 2812c67d6573Sopenharmony_ci let pat = "(?x)( a )"; 2813c67d6573Sopenharmony_ci assert_eq!( 2814c67d6573Sopenharmony_ci parser(pat).parse(), 2815c67d6573Sopenharmony_ci Ok(concat_with( 2816c67d6573Sopenharmony_ci span_range(pat, 0..pat.len()), 2817c67d6573Sopenharmony_ci vec![ 2818c67d6573Sopenharmony_ci flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false), 2819c67d6573Sopenharmony_ci Ast::Group(ast::Group { 2820c67d6573Sopenharmony_ci span: span_range(pat, 4..pat.len()), 2821c67d6573Sopenharmony_ci kind: ast::GroupKind::CaptureIndex(1), 2822c67d6573Sopenharmony_ci ast: Box::new(lit_with('a', span_range(pat, 7..8))), 2823c67d6573Sopenharmony_ci }), 2824c67d6573Sopenharmony_ci ] 2825c67d6573Sopenharmony_ci )) 2826c67d6573Sopenharmony_ci ); 2827c67d6573Sopenharmony_ci let pat = "(?x)( ?: a )"; 2828c67d6573Sopenharmony_ci assert_eq!( 2829c67d6573Sopenharmony_ci parser(pat).parse(), 2830c67d6573Sopenharmony_ci Ok(concat_with( 2831c67d6573Sopenharmony_ci span_range(pat, 0..pat.len()), 2832c67d6573Sopenharmony_ci vec![ 2833c67d6573Sopenharmony_ci flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false), 2834c67d6573Sopenharmony_ci Ast::Group(ast::Group { 2835c67d6573Sopenharmony_ci span: span_range(pat, 4..pat.len()), 2836c67d6573Sopenharmony_ci kind: ast::GroupKind::NonCapturing(ast::Flags { 2837c67d6573Sopenharmony_ci span: span_range(pat, 8..8), 2838c67d6573Sopenharmony_ci items: vec![], 2839c67d6573Sopenharmony_ci }), 2840c67d6573Sopenharmony_ci ast: Box::new(lit_with('a', span_range(pat, 11..12))), 2841c67d6573Sopenharmony_ci }), 2842c67d6573Sopenharmony_ci ] 2843c67d6573Sopenharmony_ci )) 2844c67d6573Sopenharmony_ci ); 2845c67d6573Sopenharmony_ci let pat = r"(?x)\x { 53 }"; 2846c67d6573Sopenharmony_ci assert_eq!( 2847c67d6573Sopenharmony_ci parser(pat).parse(), 2848c67d6573Sopenharmony_ci Ok(concat_with( 2849c67d6573Sopenharmony_ci span_range(pat, 0..pat.len()), 2850c67d6573Sopenharmony_ci vec![ 2851c67d6573Sopenharmony_ci flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false), 2852c67d6573Sopenharmony_ci Ast::Literal(ast::Literal { 2853c67d6573Sopenharmony_ci span: span(4..13), 2854c67d6573Sopenharmony_ci kind: ast::LiteralKind::HexBrace( 2855c67d6573Sopenharmony_ci ast::HexLiteralKind::X 2856c67d6573Sopenharmony_ci ), 2857c67d6573Sopenharmony_ci c: 'S', 2858c67d6573Sopenharmony_ci }), 2859c67d6573Sopenharmony_ci ] 2860c67d6573Sopenharmony_ci )) 2861c67d6573Sopenharmony_ci ); 2862c67d6573Sopenharmony_ci 2863c67d6573Sopenharmony_ci // Test that whitespace after an escape is OK. 2864c67d6573Sopenharmony_ci let pat = r"(?x)\ "; 2865c67d6573Sopenharmony_ci assert_eq!( 2866c67d6573Sopenharmony_ci parser(pat).parse(), 2867c67d6573Sopenharmony_ci Ok(concat_with( 2868c67d6573Sopenharmony_ci span_range(pat, 0..pat.len()), 2869c67d6573Sopenharmony_ci vec![ 2870c67d6573Sopenharmony_ci flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false), 2871c67d6573Sopenharmony_ci Ast::Literal(ast::Literal { 2872c67d6573Sopenharmony_ci span: span_range(pat, 4..6), 2873c67d6573Sopenharmony_ci kind: ast::LiteralKind::Special( 2874c67d6573Sopenharmony_ci ast::SpecialLiteralKind::Space 2875c67d6573Sopenharmony_ci ), 2876c67d6573Sopenharmony_ci c: ' ', 2877c67d6573Sopenharmony_ci }), 2878c67d6573Sopenharmony_ci ] 2879c67d6573Sopenharmony_ci )) 2880c67d6573Sopenharmony_ci ); 2881c67d6573Sopenharmony_ci // ... but only when `x` mode is enabled. 2882c67d6573Sopenharmony_ci let pat = r"\ "; 2883c67d6573Sopenharmony_ci assert_eq!( 2884c67d6573Sopenharmony_ci parser(pat).parse().unwrap_err(), 2885c67d6573Sopenharmony_ci TestError { 2886c67d6573Sopenharmony_ci span: span_range(pat, 0..2), 2887c67d6573Sopenharmony_ci kind: ast::ErrorKind::EscapeUnrecognized, 2888c67d6573Sopenharmony_ci } 2889c67d6573Sopenharmony_ci ); 2890c67d6573Sopenharmony_ci } 2891c67d6573Sopenharmony_ci 2892c67d6573Sopenharmony_ci #[test] 2893c67d6573Sopenharmony_ci fn parse_newlines() { 2894c67d6573Sopenharmony_ci let pat = ".\n."; 2895c67d6573Sopenharmony_ci assert_eq!( 2896c67d6573Sopenharmony_ci parser(pat).parse(), 2897c67d6573Sopenharmony_ci Ok(concat_with( 2898c67d6573Sopenharmony_ci span_range(pat, 0..3), 2899c67d6573Sopenharmony_ci vec![ 2900c67d6573Sopenharmony_ci Ast::Dot(span_range(pat, 0..1)), 2901c67d6573Sopenharmony_ci lit_with('\n', span_range(pat, 1..2)), 2902c67d6573Sopenharmony_ci Ast::Dot(span_range(pat, 2..3)), 2903c67d6573Sopenharmony_ci ] 2904c67d6573Sopenharmony_ci )) 2905c67d6573Sopenharmony_ci ); 2906c67d6573Sopenharmony_ci 2907c67d6573Sopenharmony_ci let pat = "foobar\nbaz\nquux\n"; 2908c67d6573Sopenharmony_ci assert_eq!( 2909c67d6573Sopenharmony_ci parser(pat).parse(), 2910c67d6573Sopenharmony_ci Ok(concat_with( 2911c67d6573Sopenharmony_ci span_range(pat, 0..pat.len()), 2912c67d6573Sopenharmony_ci vec![ 2913c67d6573Sopenharmony_ci lit_with('f', nspan(npos(0, 1, 1), npos(1, 1, 2))), 2914c67d6573Sopenharmony_ci lit_with('o', nspan(npos(1, 1, 2), npos(2, 1, 3))), 2915c67d6573Sopenharmony_ci lit_with('o', nspan(npos(2, 1, 3), npos(3, 1, 4))), 2916c67d6573Sopenharmony_ci lit_with('b', nspan(npos(3, 1, 4), npos(4, 1, 5))), 2917c67d6573Sopenharmony_ci lit_with('a', nspan(npos(4, 1, 5), npos(5, 1, 6))), 2918c67d6573Sopenharmony_ci lit_with('r', nspan(npos(5, 1, 6), npos(6, 1, 7))), 2919c67d6573Sopenharmony_ci lit_with('\n', nspan(npos(6, 1, 7), npos(7, 2, 1))), 2920c67d6573Sopenharmony_ci lit_with('b', nspan(npos(7, 2, 1), npos(8, 2, 2))), 2921c67d6573Sopenharmony_ci lit_with('a', nspan(npos(8, 2, 2), npos(9, 2, 3))), 2922c67d6573Sopenharmony_ci lit_with('z', nspan(npos(9, 2, 3), npos(10, 2, 4))), 2923c67d6573Sopenharmony_ci lit_with('\n', nspan(npos(10, 2, 4), npos(11, 3, 1))), 2924c67d6573Sopenharmony_ci lit_with('q', nspan(npos(11, 3, 1), npos(12, 3, 2))), 2925c67d6573Sopenharmony_ci lit_with('u', nspan(npos(12, 3, 2), npos(13, 3, 3))), 2926c67d6573Sopenharmony_ci lit_with('u', nspan(npos(13, 3, 3), npos(14, 3, 4))), 2927c67d6573Sopenharmony_ci lit_with('x', nspan(npos(14, 3, 4), npos(15, 3, 5))), 2928c67d6573Sopenharmony_ci lit_with('\n', nspan(npos(15, 3, 5), npos(16, 4, 1))), 2929c67d6573Sopenharmony_ci ] 2930c67d6573Sopenharmony_ci )) 2931c67d6573Sopenharmony_ci ); 2932c67d6573Sopenharmony_ci } 2933c67d6573Sopenharmony_ci 2934c67d6573Sopenharmony_ci #[test] 2935c67d6573Sopenharmony_ci fn parse_uncounted_repetition() { 2936c67d6573Sopenharmony_ci assert_eq!( 2937c67d6573Sopenharmony_ci parser(r"a*").parse(), 2938c67d6573Sopenharmony_ci Ok(Ast::Repetition(ast::Repetition { 2939c67d6573Sopenharmony_ci span: span(0..2), 2940c67d6573Sopenharmony_ci op: ast::RepetitionOp { 2941c67d6573Sopenharmony_ci span: span(1..2), 2942c67d6573Sopenharmony_ci kind: ast::RepetitionKind::ZeroOrMore, 2943c67d6573Sopenharmony_ci }, 2944c67d6573Sopenharmony_ci greedy: true, 2945c67d6573Sopenharmony_ci ast: Box::new(lit('a', 0)), 2946c67d6573Sopenharmony_ci })) 2947c67d6573Sopenharmony_ci ); 2948c67d6573Sopenharmony_ci assert_eq!( 2949c67d6573Sopenharmony_ci parser(r"a+").parse(), 2950c67d6573Sopenharmony_ci Ok(Ast::Repetition(ast::Repetition { 2951c67d6573Sopenharmony_ci span: span(0..2), 2952c67d6573Sopenharmony_ci op: ast::RepetitionOp { 2953c67d6573Sopenharmony_ci span: span(1..2), 2954c67d6573Sopenharmony_ci kind: ast::RepetitionKind::OneOrMore, 2955c67d6573Sopenharmony_ci }, 2956c67d6573Sopenharmony_ci greedy: true, 2957c67d6573Sopenharmony_ci ast: Box::new(lit('a', 0)), 2958c67d6573Sopenharmony_ci })) 2959c67d6573Sopenharmony_ci ); 2960c67d6573Sopenharmony_ci 2961c67d6573Sopenharmony_ci assert_eq!( 2962c67d6573Sopenharmony_ci parser(r"a?").parse(), 2963c67d6573Sopenharmony_ci Ok(Ast::Repetition(ast::Repetition { 2964c67d6573Sopenharmony_ci span: span(0..2), 2965c67d6573Sopenharmony_ci op: ast::RepetitionOp { 2966c67d6573Sopenharmony_ci span: span(1..2), 2967c67d6573Sopenharmony_ci kind: ast::RepetitionKind::ZeroOrOne, 2968c67d6573Sopenharmony_ci }, 2969c67d6573Sopenharmony_ci greedy: true, 2970c67d6573Sopenharmony_ci ast: Box::new(lit('a', 0)), 2971c67d6573Sopenharmony_ci })) 2972c67d6573Sopenharmony_ci ); 2973c67d6573Sopenharmony_ci assert_eq!( 2974c67d6573Sopenharmony_ci parser(r"a??").parse(), 2975c67d6573Sopenharmony_ci Ok(Ast::Repetition(ast::Repetition { 2976c67d6573Sopenharmony_ci span: span(0..3), 2977c67d6573Sopenharmony_ci op: ast::RepetitionOp { 2978c67d6573Sopenharmony_ci span: span(1..3), 2979c67d6573Sopenharmony_ci kind: ast::RepetitionKind::ZeroOrOne, 2980c67d6573Sopenharmony_ci }, 2981c67d6573Sopenharmony_ci greedy: false, 2982c67d6573Sopenharmony_ci ast: Box::new(lit('a', 0)), 2983c67d6573Sopenharmony_ci })) 2984c67d6573Sopenharmony_ci ); 2985c67d6573Sopenharmony_ci assert_eq!( 2986c67d6573Sopenharmony_ci parser(r"a?").parse(), 2987c67d6573Sopenharmony_ci Ok(Ast::Repetition(ast::Repetition { 2988c67d6573Sopenharmony_ci span: span(0..2), 2989c67d6573Sopenharmony_ci op: ast::RepetitionOp { 2990c67d6573Sopenharmony_ci span: span(1..2), 2991c67d6573Sopenharmony_ci kind: ast::RepetitionKind::ZeroOrOne, 2992c67d6573Sopenharmony_ci }, 2993c67d6573Sopenharmony_ci greedy: true, 2994c67d6573Sopenharmony_ci ast: Box::new(lit('a', 0)), 2995c67d6573Sopenharmony_ci })) 2996c67d6573Sopenharmony_ci ); 2997c67d6573Sopenharmony_ci assert_eq!( 2998c67d6573Sopenharmony_ci parser(r"a?b").parse(), 2999c67d6573Sopenharmony_ci Ok(concat( 3000c67d6573Sopenharmony_ci 0..3, 3001c67d6573Sopenharmony_ci vec![ 3002c67d6573Sopenharmony_ci Ast::Repetition(ast::Repetition { 3003c67d6573Sopenharmony_ci span: span(0..2), 3004c67d6573Sopenharmony_ci op: ast::RepetitionOp { 3005c67d6573Sopenharmony_ci span: span(1..2), 3006c67d6573Sopenharmony_ci kind: ast::RepetitionKind::ZeroOrOne, 3007c67d6573Sopenharmony_ci }, 3008c67d6573Sopenharmony_ci greedy: true, 3009c67d6573Sopenharmony_ci ast: Box::new(lit('a', 0)), 3010c67d6573Sopenharmony_ci }), 3011c67d6573Sopenharmony_ci lit('b', 2), 3012c67d6573Sopenharmony_ci ] 3013c67d6573Sopenharmony_ci )) 3014c67d6573Sopenharmony_ci ); 3015c67d6573Sopenharmony_ci assert_eq!( 3016c67d6573Sopenharmony_ci parser(r"a??b").parse(), 3017c67d6573Sopenharmony_ci Ok(concat( 3018c67d6573Sopenharmony_ci 0..4, 3019c67d6573Sopenharmony_ci vec![ 3020c67d6573Sopenharmony_ci Ast::Repetition(ast::Repetition { 3021c67d6573Sopenharmony_ci span: span(0..3), 3022c67d6573Sopenharmony_ci op: ast::RepetitionOp { 3023c67d6573Sopenharmony_ci span: span(1..3), 3024c67d6573Sopenharmony_ci kind: ast::RepetitionKind::ZeroOrOne, 3025c67d6573Sopenharmony_ci }, 3026c67d6573Sopenharmony_ci greedy: false, 3027c67d6573Sopenharmony_ci ast: Box::new(lit('a', 0)), 3028c67d6573Sopenharmony_ci }), 3029c67d6573Sopenharmony_ci lit('b', 3), 3030c67d6573Sopenharmony_ci ] 3031c67d6573Sopenharmony_ci )) 3032c67d6573Sopenharmony_ci ); 3033c67d6573Sopenharmony_ci assert_eq!( 3034c67d6573Sopenharmony_ci parser(r"ab?").parse(), 3035c67d6573Sopenharmony_ci Ok(concat( 3036c67d6573Sopenharmony_ci 0..3, 3037c67d6573Sopenharmony_ci vec![ 3038c67d6573Sopenharmony_ci lit('a', 0), 3039c67d6573Sopenharmony_ci Ast::Repetition(ast::Repetition { 3040c67d6573Sopenharmony_ci span: span(1..3), 3041c67d6573Sopenharmony_ci op: ast::RepetitionOp { 3042c67d6573Sopenharmony_ci span: span(2..3), 3043c67d6573Sopenharmony_ci kind: ast::RepetitionKind::ZeroOrOne, 3044c67d6573Sopenharmony_ci }, 3045c67d6573Sopenharmony_ci greedy: true, 3046c67d6573Sopenharmony_ci ast: Box::new(lit('b', 1)), 3047c67d6573Sopenharmony_ci }), 3048c67d6573Sopenharmony_ci ] 3049c67d6573Sopenharmony_ci )) 3050c67d6573Sopenharmony_ci ); 3051c67d6573Sopenharmony_ci assert_eq!( 3052c67d6573Sopenharmony_ci parser(r"(ab)?").parse(), 3053c67d6573Sopenharmony_ci Ok(Ast::Repetition(ast::Repetition { 3054c67d6573Sopenharmony_ci span: span(0..5), 3055c67d6573Sopenharmony_ci op: ast::RepetitionOp { 3056c67d6573Sopenharmony_ci span: span(4..5), 3057c67d6573Sopenharmony_ci kind: ast::RepetitionKind::ZeroOrOne, 3058c67d6573Sopenharmony_ci }, 3059c67d6573Sopenharmony_ci greedy: true, 3060c67d6573Sopenharmony_ci ast: Box::new(group( 3061c67d6573Sopenharmony_ci 0..4, 3062c67d6573Sopenharmony_ci 1, 3063c67d6573Sopenharmony_ci concat(1..3, vec![lit('a', 1), lit('b', 2),]) 3064c67d6573Sopenharmony_ci )), 3065c67d6573Sopenharmony_ci })) 3066c67d6573Sopenharmony_ci ); 3067c67d6573Sopenharmony_ci assert_eq!( 3068c67d6573Sopenharmony_ci parser(r"|a?").parse(), 3069c67d6573Sopenharmony_ci Ok(alt( 3070c67d6573Sopenharmony_ci 0..3, 3071c67d6573Sopenharmony_ci vec![ 3072c67d6573Sopenharmony_ci Ast::Empty(span(0..0)), 3073c67d6573Sopenharmony_ci Ast::Repetition(ast::Repetition { 3074c67d6573Sopenharmony_ci span: span(1..3), 3075c67d6573Sopenharmony_ci op: ast::RepetitionOp { 3076c67d6573Sopenharmony_ci span: span(2..3), 3077c67d6573Sopenharmony_ci kind: ast::RepetitionKind::ZeroOrOne, 3078c67d6573Sopenharmony_ci }, 3079c67d6573Sopenharmony_ci greedy: true, 3080c67d6573Sopenharmony_ci ast: Box::new(lit('a', 1)), 3081c67d6573Sopenharmony_ci }), 3082c67d6573Sopenharmony_ci ] 3083c67d6573Sopenharmony_ci )) 3084c67d6573Sopenharmony_ci ); 3085c67d6573Sopenharmony_ci 3086c67d6573Sopenharmony_ci assert_eq!( 3087c67d6573Sopenharmony_ci parser(r"*").parse().unwrap_err(), 3088c67d6573Sopenharmony_ci TestError { 3089c67d6573Sopenharmony_ci span: span(0..0), 3090c67d6573Sopenharmony_ci kind: ast::ErrorKind::RepetitionMissing, 3091c67d6573Sopenharmony_ci } 3092c67d6573Sopenharmony_ci ); 3093c67d6573Sopenharmony_ci assert_eq!( 3094c67d6573Sopenharmony_ci parser(r"(?i)*").parse().unwrap_err(), 3095c67d6573Sopenharmony_ci TestError { 3096c67d6573Sopenharmony_ci span: span(4..4), 3097c67d6573Sopenharmony_ci kind: ast::ErrorKind::RepetitionMissing, 3098c67d6573Sopenharmony_ci } 3099c67d6573Sopenharmony_ci ); 3100c67d6573Sopenharmony_ci assert_eq!( 3101c67d6573Sopenharmony_ci parser(r"(*)").parse().unwrap_err(), 3102c67d6573Sopenharmony_ci TestError { 3103c67d6573Sopenharmony_ci span: span(1..1), 3104c67d6573Sopenharmony_ci kind: ast::ErrorKind::RepetitionMissing, 3105c67d6573Sopenharmony_ci } 3106c67d6573Sopenharmony_ci ); 3107c67d6573Sopenharmony_ci assert_eq!( 3108c67d6573Sopenharmony_ci parser(r"(?:?)").parse().unwrap_err(), 3109c67d6573Sopenharmony_ci TestError { 3110c67d6573Sopenharmony_ci span: span(3..3), 3111c67d6573Sopenharmony_ci kind: ast::ErrorKind::RepetitionMissing, 3112c67d6573Sopenharmony_ci } 3113c67d6573Sopenharmony_ci ); 3114c67d6573Sopenharmony_ci assert_eq!( 3115c67d6573Sopenharmony_ci parser(r"+").parse().unwrap_err(), 3116c67d6573Sopenharmony_ci TestError { 3117c67d6573Sopenharmony_ci span: span(0..0), 3118c67d6573Sopenharmony_ci kind: ast::ErrorKind::RepetitionMissing, 3119c67d6573Sopenharmony_ci } 3120c67d6573Sopenharmony_ci ); 3121c67d6573Sopenharmony_ci assert_eq!( 3122c67d6573Sopenharmony_ci parser(r"?").parse().unwrap_err(), 3123c67d6573Sopenharmony_ci TestError { 3124c67d6573Sopenharmony_ci span: span(0..0), 3125c67d6573Sopenharmony_ci kind: ast::ErrorKind::RepetitionMissing, 3126c67d6573Sopenharmony_ci } 3127c67d6573Sopenharmony_ci ); 3128c67d6573Sopenharmony_ci assert_eq!( 3129c67d6573Sopenharmony_ci parser(r"(?)").parse().unwrap_err(), 3130c67d6573Sopenharmony_ci TestError { 3131c67d6573Sopenharmony_ci span: span(1..1), 3132c67d6573Sopenharmony_ci kind: ast::ErrorKind::RepetitionMissing, 3133c67d6573Sopenharmony_ci } 3134c67d6573Sopenharmony_ci ); 3135c67d6573Sopenharmony_ci assert_eq!( 3136c67d6573Sopenharmony_ci parser(r"|*").parse().unwrap_err(), 3137c67d6573Sopenharmony_ci TestError { 3138c67d6573Sopenharmony_ci span: span(1..1), 3139c67d6573Sopenharmony_ci kind: ast::ErrorKind::RepetitionMissing, 3140c67d6573Sopenharmony_ci } 3141c67d6573Sopenharmony_ci ); 3142c67d6573Sopenharmony_ci assert_eq!( 3143c67d6573Sopenharmony_ci parser(r"|+").parse().unwrap_err(), 3144c67d6573Sopenharmony_ci TestError { 3145c67d6573Sopenharmony_ci span: span(1..1), 3146c67d6573Sopenharmony_ci kind: ast::ErrorKind::RepetitionMissing, 3147c67d6573Sopenharmony_ci } 3148c67d6573Sopenharmony_ci ); 3149c67d6573Sopenharmony_ci assert_eq!( 3150c67d6573Sopenharmony_ci parser(r"|?").parse().unwrap_err(), 3151c67d6573Sopenharmony_ci TestError { 3152c67d6573Sopenharmony_ci span: span(1..1), 3153c67d6573Sopenharmony_ci kind: ast::ErrorKind::RepetitionMissing, 3154c67d6573Sopenharmony_ci } 3155c67d6573Sopenharmony_ci ); 3156c67d6573Sopenharmony_ci } 3157c67d6573Sopenharmony_ci 3158c67d6573Sopenharmony_ci #[test] 3159c67d6573Sopenharmony_ci fn parse_counted_repetition() { 3160c67d6573Sopenharmony_ci assert_eq!( 3161c67d6573Sopenharmony_ci parser(r"a{5}").parse(), 3162c67d6573Sopenharmony_ci Ok(Ast::Repetition(ast::Repetition { 3163c67d6573Sopenharmony_ci span: span(0..4), 3164c67d6573Sopenharmony_ci op: ast::RepetitionOp { 3165c67d6573Sopenharmony_ci span: span(1..4), 3166c67d6573Sopenharmony_ci kind: ast::RepetitionKind::Range( 3167c67d6573Sopenharmony_ci ast::RepetitionRange::Exactly(5) 3168c67d6573Sopenharmony_ci ), 3169c67d6573Sopenharmony_ci }, 3170c67d6573Sopenharmony_ci greedy: true, 3171c67d6573Sopenharmony_ci ast: Box::new(lit('a', 0)), 3172c67d6573Sopenharmony_ci })) 3173c67d6573Sopenharmony_ci ); 3174c67d6573Sopenharmony_ci assert_eq!( 3175c67d6573Sopenharmony_ci parser(r"a{5,}").parse(), 3176c67d6573Sopenharmony_ci Ok(Ast::Repetition(ast::Repetition { 3177c67d6573Sopenharmony_ci span: span(0..5), 3178c67d6573Sopenharmony_ci op: ast::RepetitionOp { 3179c67d6573Sopenharmony_ci span: span(1..5), 3180c67d6573Sopenharmony_ci kind: ast::RepetitionKind::Range( 3181c67d6573Sopenharmony_ci ast::RepetitionRange::AtLeast(5) 3182c67d6573Sopenharmony_ci ), 3183c67d6573Sopenharmony_ci }, 3184c67d6573Sopenharmony_ci greedy: true, 3185c67d6573Sopenharmony_ci ast: Box::new(lit('a', 0)), 3186c67d6573Sopenharmony_ci })) 3187c67d6573Sopenharmony_ci ); 3188c67d6573Sopenharmony_ci assert_eq!( 3189c67d6573Sopenharmony_ci parser(r"a{5,9}").parse(), 3190c67d6573Sopenharmony_ci Ok(Ast::Repetition(ast::Repetition { 3191c67d6573Sopenharmony_ci span: span(0..6), 3192c67d6573Sopenharmony_ci op: ast::RepetitionOp { 3193c67d6573Sopenharmony_ci span: span(1..6), 3194c67d6573Sopenharmony_ci kind: ast::RepetitionKind::Range( 3195c67d6573Sopenharmony_ci ast::RepetitionRange::Bounded(5, 9) 3196c67d6573Sopenharmony_ci ), 3197c67d6573Sopenharmony_ci }, 3198c67d6573Sopenharmony_ci greedy: true, 3199c67d6573Sopenharmony_ci ast: Box::new(lit('a', 0)), 3200c67d6573Sopenharmony_ci })) 3201c67d6573Sopenharmony_ci ); 3202c67d6573Sopenharmony_ci assert_eq!( 3203c67d6573Sopenharmony_ci parser(r"a{5}?").parse(), 3204c67d6573Sopenharmony_ci Ok(Ast::Repetition(ast::Repetition { 3205c67d6573Sopenharmony_ci span: span(0..5), 3206c67d6573Sopenharmony_ci op: ast::RepetitionOp { 3207c67d6573Sopenharmony_ci span: span(1..5), 3208c67d6573Sopenharmony_ci kind: ast::RepetitionKind::Range( 3209c67d6573Sopenharmony_ci ast::RepetitionRange::Exactly(5) 3210c67d6573Sopenharmony_ci ), 3211c67d6573Sopenharmony_ci }, 3212c67d6573Sopenharmony_ci greedy: false, 3213c67d6573Sopenharmony_ci ast: Box::new(lit('a', 0)), 3214c67d6573Sopenharmony_ci })) 3215c67d6573Sopenharmony_ci ); 3216c67d6573Sopenharmony_ci assert_eq!( 3217c67d6573Sopenharmony_ci parser(r"ab{5}").parse(), 3218c67d6573Sopenharmony_ci Ok(concat( 3219c67d6573Sopenharmony_ci 0..5, 3220c67d6573Sopenharmony_ci vec![ 3221c67d6573Sopenharmony_ci lit('a', 0), 3222c67d6573Sopenharmony_ci Ast::Repetition(ast::Repetition { 3223c67d6573Sopenharmony_ci span: span(1..5), 3224c67d6573Sopenharmony_ci op: ast::RepetitionOp { 3225c67d6573Sopenharmony_ci span: span(2..5), 3226c67d6573Sopenharmony_ci kind: ast::RepetitionKind::Range( 3227c67d6573Sopenharmony_ci ast::RepetitionRange::Exactly(5) 3228c67d6573Sopenharmony_ci ), 3229c67d6573Sopenharmony_ci }, 3230c67d6573Sopenharmony_ci greedy: true, 3231c67d6573Sopenharmony_ci ast: Box::new(lit('b', 1)), 3232c67d6573Sopenharmony_ci }), 3233c67d6573Sopenharmony_ci ] 3234c67d6573Sopenharmony_ci )) 3235c67d6573Sopenharmony_ci ); 3236c67d6573Sopenharmony_ci assert_eq!( 3237c67d6573Sopenharmony_ci parser(r"ab{5}c").parse(), 3238c67d6573Sopenharmony_ci Ok(concat( 3239c67d6573Sopenharmony_ci 0..6, 3240c67d6573Sopenharmony_ci vec![ 3241c67d6573Sopenharmony_ci lit('a', 0), 3242c67d6573Sopenharmony_ci Ast::Repetition(ast::Repetition { 3243c67d6573Sopenharmony_ci span: span(1..5), 3244c67d6573Sopenharmony_ci op: ast::RepetitionOp { 3245c67d6573Sopenharmony_ci span: span(2..5), 3246c67d6573Sopenharmony_ci kind: ast::RepetitionKind::Range( 3247c67d6573Sopenharmony_ci ast::RepetitionRange::Exactly(5) 3248c67d6573Sopenharmony_ci ), 3249c67d6573Sopenharmony_ci }, 3250c67d6573Sopenharmony_ci greedy: true, 3251c67d6573Sopenharmony_ci ast: Box::new(lit('b', 1)), 3252c67d6573Sopenharmony_ci }), 3253c67d6573Sopenharmony_ci lit('c', 5), 3254c67d6573Sopenharmony_ci ] 3255c67d6573Sopenharmony_ci )) 3256c67d6573Sopenharmony_ci ); 3257c67d6573Sopenharmony_ci 3258c67d6573Sopenharmony_ci assert_eq!( 3259c67d6573Sopenharmony_ci parser(r"a{ 5 }").parse(), 3260c67d6573Sopenharmony_ci Ok(Ast::Repetition(ast::Repetition { 3261c67d6573Sopenharmony_ci span: span(0..6), 3262c67d6573Sopenharmony_ci op: ast::RepetitionOp { 3263c67d6573Sopenharmony_ci span: span(1..6), 3264c67d6573Sopenharmony_ci kind: ast::RepetitionKind::Range( 3265c67d6573Sopenharmony_ci ast::RepetitionRange::Exactly(5) 3266c67d6573Sopenharmony_ci ), 3267c67d6573Sopenharmony_ci }, 3268c67d6573Sopenharmony_ci greedy: true, 3269c67d6573Sopenharmony_ci ast: Box::new(lit('a', 0)), 3270c67d6573Sopenharmony_ci })) 3271c67d6573Sopenharmony_ci ); 3272c67d6573Sopenharmony_ci assert_eq!( 3273c67d6573Sopenharmony_ci parser(r"a{ 5 , 9 }").parse(), 3274c67d6573Sopenharmony_ci Ok(Ast::Repetition(ast::Repetition { 3275c67d6573Sopenharmony_ci span: span(0..10), 3276c67d6573Sopenharmony_ci op: ast::RepetitionOp { 3277c67d6573Sopenharmony_ci span: span(1..10), 3278c67d6573Sopenharmony_ci kind: ast::RepetitionKind::Range( 3279c67d6573Sopenharmony_ci ast::RepetitionRange::Bounded(5, 9) 3280c67d6573Sopenharmony_ci ), 3281c67d6573Sopenharmony_ci }, 3282c67d6573Sopenharmony_ci greedy: true, 3283c67d6573Sopenharmony_ci ast: Box::new(lit('a', 0)), 3284c67d6573Sopenharmony_ci })) 3285c67d6573Sopenharmony_ci ); 3286c67d6573Sopenharmony_ci assert_eq!( 3287c67d6573Sopenharmony_ci parser_ignore_whitespace(r"a{5,9} ?").parse(), 3288c67d6573Sopenharmony_ci Ok(Ast::Repetition(ast::Repetition { 3289c67d6573Sopenharmony_ci span: span(0..8), 3290c67d6573Sopenharmony_ci op: ast::RepetitionOp { 3291c67d6573Sopenharmony_ci span: span(1..8), 3292c67d6573Sopenharmony_ci kind: ast::RepetitionKind::Range( 3293c67d6573Sopenharmony_ci ast::RepetitionRange::Bounded(5, 9) 3294c67d6573Sopenharmony_ci ), 3295c67d6573Sopenharmony_ci }, 3296c67d6573Sopenharmony_ci greedy: false, 3297c67d6573Sopenharmony_ci ast: Box::new(lit('a', 0)), 3298c67d6573Sopenharmony_ci })) 3299c67d6573Sopenharmony_ci ); 3300c67d6573Sopenharmony_ci 3301c67d6573Sopenharmony_ci assert_eq!( 3302c67d6573Sopenharmony_ci parser(r"(?i){0}").parse().unwrap_err(), 3303c67d6573Sopenharmony_ci TestError { 3304c67d6573Sopenharmony_ci span: span(4..4), 3305c67d6573Sopenharmony_ci kind: ast::ErrorKind::RepetitionMissing, 3306c67d6573Sopenharmony_ci } 3307c67d6573Sopenharmony_ci ); 3308c67d6573Sopenharmony_ci assert_eq!( 3309c67d6573Sopenharmony_ci parser(r"(?m){1,1}").parse().unwrap_err(), 3310c67d6573Sopenharmony_ci TestError { 3311c67d6573Sopenharmony_ci span: span(4..4), 3312c67d6573Sopenharmony_ci kind: ast::ErrorKind::RepetitionMissing, 3313c67d6573Sopenharmony_ci } 3314c67d6573Sopenharmony_ci ); 3315c67d6573Sopenharmony_ci assert_eq!( 3316c67d6573Sopenharmony_ci parser(r"a{]}").parse().unwrap_err(), 3317c67d6573Sopenharmony_ci TestError { 3318c67d6573Sopenharmony_ci span: span(2..2), 3319c67d6573Sopenharmony_ci kind: ast::ErrorKind::RepetitionCountDecimalEmpty, 3320c67d6573Sopenharmony_ci } 3321c67d6573Sopenharmony_ci ); 3322c67d6573Sopenharmony_ci assert_eq!( 3323c67d6573Sopenharmony_ci parser(r"a{1,]}").parse().unwrap_err(), 3324c67d6573Sopenharmony_ci TestError { 3325c67d6573Sopenharmony_ci span: span(4..4), 3326c67d6573Sopenharmony_ci kind: ast::ErrorKind::RepetitionCountDecimalEmpty, 3327c67d6573Sopenharmony_ci } 3328c67d6573Sopenharmony_ci ); 3329c67d6573Sopenharmony_ci assert_eq!( 3330c67d6573Sopenharmony_ci parser(r"a{").parse().unwrap_err(), 3331c67d6573Sopenharmony_ci TestError { 3332c67d6573Sopenharmony_ci span: span(1..2), 3333c67d6573Sopenharmony_ci kind: ast::ErrorKind::RepetitionCountUnclosed, 3334c67d6573Sopenharmony_ci } 3335c67d6573Sopenharmony_ci ); 3336c67d6573Sopenharmony_ci assert_eq!( 3337c67d6573Sopenharmony_ci parser(r"a{}").parse().unwrap_err(), 3338c67d6573Sopenharmony_ci TestError { 3339c67d6573Sopenharmony_ci span: span(2..2), 3340c67d6573Sopenharmony_ci kind: ast::ErrorKind::RepetitionCountDecimalEmpty, 3341c67d6573Sopenharmony_ci } 3342c67d6573Sopenharmony_ci ); 3343c67d6573Sopenharmony_ci assert_eq!( 3344c67d6573Sopenharmony_ci parser(r"a{a").parse().unwrap_err(), 3345c67d6573Sopenharmony_ci TestError { 3346c67d6573Sopenharmony_ci span: span(2..2), 3347c67d6573Sopenharmony_ci kind: ast::ErrorKind::RepetitionCountDecimalEmpty, 3348c67d6573Sopenharmony_ci } 3349c67d6573Sopenharmony_ci ); 3350c67d6573Sopenharmony_ci assert_eq!( 3351c67d6573Sopenharmony_ci parser(r"a{9999999999}").parse().unwrap_err(), 3352c67d6573Sopenharmony_ci TestError { 3353c67d6573Sopenharmony_ci span: span(2..12), 3354c67d6573Sopenharmony_ci kind: ast::ErrorKind::DecimalInvalid, 3355c67d6573Sopenharmony_ci } 3356c67d6573Sopenharmony_ci ); 3357c67d6573Sopenharmony_ci assert_eq!( 3358c67d6573Sopenharmony_ci parser(r"a{9").parse().unwrap_err(), 3359c67d6573Sopenharmony_ci TestError { 3360c67d6573Sopenharmony_ci span: span(1..3), 3361c67d6573Sopenharmony_ci kind: ast::ErrorKind::RepetitionCountUnclosed, 3362c67d6573Sopenharmony_ci } 3363c67d6573Sopenharmony_ci ); 3364c67d6573Sopenharmony_ci assert_eq!( 3365c67d6573Sopenharmony_ci parser(r"a{9,a").parse().unwrap_err(), 3366c67d6573Sopenharmony_ci TestError { 3367c67d6573Sopenharmony_ci span: span(4..4), 3368c67d6573Sopenharmony_ci kind: ast::ErrorKind::RepetitionCountDecimalEmpty, 3369c67d6573Sopenharmony_ci } 3370c67d6573Sopenharmony_ci ); 3371c67d6573Sopenharmony_ci assert_eq!( 3372c67d6573Sopenharmony_ci parser(r"a{9,9999999999}").parse().unwrap_err(), 3373c67d6573Sopenharmony_ci TestError { 3374c67d6573Sopenharmony_ci span: span(4..14), 3375c67d6573Sopenharmony_ci kind: ast::ErrorKind::DecimalInvalid, 3376c67d6573Sopenharmony_ci } 3377c67d6573Sopenharmony_ci ); 3378c67d6573Sopenharmony_ci assert_eq!( 3379c67d6573Sopenharmony_ci parser(r"a{9,").parse().unwrap_err(), 3380c67d6573Sopenharmony_ci TestError { 3381c67d6573Sopenharmony_ci span: span(1..4), 3382c67d6573Sopenharmony_ci kind: ast::ErrorKind::RepetitionCountUnclosed, 3383c67d6573Sopenharmony_ci } 3384c67d6573Sopenharmony_ci ); 3385c67d6573Sopenharmony_ci assert_eq!( 3386c67d6573Sopenharmony_ci parser(r"a{9,11").parse().unwrap_err(), 3387c67d6573Sopenharmony_ci TestError { 3388c67d6573Sopenharmony_ci span: span(1..6), 3389c67d6573Sopenharmony_ci kind: ast::ErrorKind::RepetitionCountUnclosed, 3390c67d6573Sopenharmony_ci } 3391c67d6573Sopenharmony_ci ); 3392c67d6573Sopenharmony_ci assert_eq!( 3393c67d6573Sopenharmony_ci parser(r"a{2,1}").parse().unwrap_err(), 3394c67d6573Sopenharmony_ci TestError { 3395c67d6573Sopenharmony_ci span: span(1..6), 3396c67d6573Sopenharmony_ci kind: ast::ErrorKind::RepetitionCountInvalid, 3397c67d6573Sopenharmony_ci } 3398c67d6573Sopenharmony_ci ); 3399c67d6573Sopenharmony_ci assert_eq!( 3400c67d6573Sopenharmony_ci parser(r"{5}").parse().unwrap_err(), 3401c67d6573Sopenharmony_ci TestError { 3402c67d6573Sopenharmony_ci span: span(0..0), 3403c67d6573Sopenharmony_ci kind: ast::ErrorKind::RepetitionMissing, 3404c67d6573Sopenharmony_ci } 3405c67d6573Sopenharmony_ci ); 3406c67d6573Sopenharmony_ci assert_eq!( 3407c67d6573Sopenharmony_ci parser(r"|{5}").parse().unwrap_err(), 3408c67d6573Sopenharmony_ci TestError { 3409c67d6573Sopenharmony_ci span: span(1..1), 3410c67d6573Sopenharmony_ci kind: ast::ErrorKind::RepetitionMissing, 3411c67d6573Sopenharmony_ci } 3412c67d6573Sopenharmony_ci ); 3413c67d6573Sopenharmony_ci } 3414c67d6573Sopenharmony_ci 3415c67d6573Sopenharmony_ci #[test] 3416c67d6573Sopenharmony_ci fn parse_alternate() { 3417c67d6573Sopenharmony_ci assert_eq!( 3418c67d6573Sopenharmony_ci parser(r"a|b").parse(), 3419c67d6573Sopenharmony_ci Ok(Ast::Alternation(ast::Alternation { 3420c67d6573Sopenharmony_ci span: span(0..3), 3421c67d6573Sopenharmony_ci asts: vec![lit('a', 0), lit('b', 2)], 3422c67d6573Sopenharmony_ci })) 3423c67d6573Sopenharmony_ci ); 3424c67d6573Sopenharmony_ci assert_eq!( 3425c67d6573Sopenharmony_ci parser(r"(a|b)").parse(), 3426c67d6573Sopenharmony_ci Ok(group( 3427c67d6573Sopenharmony_ci 0..5, 3428c67d6573Sopenharmony_ci 1, 3429c67d6573Sopenharmony_ci Ast::Alternation(ast::Alternation { 3430c67d6573Sopenharmony_ci span: span(1..4), 3431c67d6573Sopenharmony_ci asts: vec![lit('a', 1), lit('b', 3)], 3432c67d6573Sopenharmony_ci }) 3433c67d6573Sopenharmony_ci )) 3434c67d6573Sopenharmony_ci ); 3435c67d6573Sopenharmony_ci 3436c67d6573Sopenharmony_ci assert_eq!( 3437c67d6573Sopenharmony_ci parser(r"a|b|c").parse(), 3438c67d6573Sopenharmony_ci Ok(Ast::Alternation(ast::Alternation { 3439c67d6573Sopenharmony_ci span: span(0..5), 3440c67d6573Sopenharmony_ci asts: vec![lit('a', 0), lit('b', 2), lit('c', 4)], 3441c67d6573Sopenharmony_ci })) 3442c67d6573Sopenharmony_ci ); 3443c67d6573Sopenharmony_ci assert_eq!( 3444c67d6573Sopenharmony_ci parser(r"ax|by|cz").parse(), 3445c67d6573Sopenharmony_ci Ok(Ast::Alternation(ast::Alternation { 3446c67d6573Sopenharmony_ci span: span(0..8), 3447c67d6573Sopenharmony_ci asts: vec![ 3448c67d6573Sopenharmony_ci concat(0..2, vec![lit('a', 0), lit('x', 1)]), 3449c67d6573Sopenharmony_ci concat(3..5, vec![lit('b', 3), lit('y', 4)]), 3450c67d6573Sopenharmony_ci concat(6..8, vec![lit('c', 6), lit('z', 7)]), 3451c67d6573Sopenharmony_ci ], 3452c67d6573Sopenharmony_ci })) 3453c67d6573Sopenharmony_ci ); 3454c67d6573Sopenharmony_ci assert_eq!( 3455c67d6573Sopenharmony_ci parser(r"(ax|by|cz)").parse(), 3456c67d6573Sopenharmony_ci Ok(group( 3457c67d6573Sopenharmony_ci 0..10, 3458c67d6573Sopenharmony_ci 1, 3459c67d6573Sopenharmony_ci Ast::Alternation(ast::Alternation { 3460c67d6573Sopenharmony_ci span: span(1..9), 3461c67d6573Sopenharmony_ci asts: vec![ 3462c67d6573Sopenharmony_ci concat(1..3, vec![lit('a', 1), lit('x', 2)]), 3463c67d6573Sopenharmony_ci concat(4..6, vec![lit('b', 4), lit('y', 5)]), 3464c67d6573Sopenharmony_ci concat(7..9, vec![lit('c', 7), lit('z', 8)]), 3465c67d6573Sopenharmony_ci ], 3466c67d6573Sopenharmony_ci }) 3467c67d6573Sopenharmony_ci )) 3468c67d6573Sopenharmony_ci ); 3469c67d6573Sopenharmony_ci assert_eq!( 3470c67d6573Sopenharmony_ci parser(r"(ax|(by|(cz)))").parse(), 3471c67d6573Sopenharmony_ci Ok(group( 3472c67d6573Sopenharmony_ci 0..14, 3473c67d6573Sopenharmony_ci 1, 3474c67d6573Sopenharmony_ci alt( 3475c67d6573Sopenharmony_ci 1..13, 3476c67d6573Sopenharmony_ci vec![ 3477c67d6573Sopenharmony_ci concat(1..3, vec![lit('a', 1), lit('x', 2)]), 3478c67d6573Sopenharmony_ci group( 3479c67d6573Sopenharmony_ci 4..13, 3480c67d6573Sopenharmony_ci 2, 3481c67d6573Sopenharmony_ci alt( 3482c67d6573Sopenharmony_ci 5..12, 3483c67d6573Sopenharmony_ci vec![ 3484c67d6573Sopenharmony_ci concat( 3485c67d6573Sopenharmony_ci 5..7, 3486c67d6573Sopenharmony_ci vec![lit('b', 5), lit('y', 6)] 3487c67d6573Sopenharmony_ci ), 3488c67d6573Sopenharmony_ci group( 3489c67d6573Sopenharmony_ci 8..12, 3490c67d6573Sopenharmony_ci 3, 3491c67d6573Sopenharmony_ci concat( 3492c67d6573Sopenharmony_ci 9..11, 3493c67d6573Sopenharmony_ci vec![lit('c', 9), lit('z', 10),] 3494c67d6573Sopenharmony_ci ) 3495c67d6573Sopenharmony_ci ), 3496c67d6573Sopenharmony_ci ] 3497c67d6573Sopenharmony_ci ) 3498c67d6573Sopenharmony_ci ), 3499c67d6573Sopenharmony_ci ] 3500c67d6573Sopenharmony_ci ) 3501c67d6573Sopenharmony_ci )) 3502c67d6573Sopenharmony_ci ); 3503c67d6573Sopenharmony_ci 3504c67d6573Sopenharmony_ci assert_eq!( 3505c67d6573Sopenharmony_ci parser(r"|").parse(), 3506c67d6573Sopenharmony_ci Ok(alt( 3507c67d6573Sopenharmony_ci 0..1, 3508c67d6573Sopenharmony_ci vec![Ast::Empty(span(0..0)), Ast::Empty(span(1..1)),] 3509c67d6573Sopenharmony_ci )) 3510c67d6573Sopenharmony_ci ); 3511c67d6573Sopenharmony_ci assert_eq!( 3512c67d6573Sopenharmony_ci parser(r"||").parse(), 3513c67d6573Sopenharmony_ci Ok(alt( 3514c67d6573Sopenharmony_ci 0..2, 3515c67d6573Sopenharmony_ci vec![ 3516c67d6573Sopenharmony_ci Ast::Empty(span(0..0)), 3517c67d6573Sopenharmony_ci Ast::Empty(span(1..1)), 3518c67d6573Sopenharmony_ci Ast::Empty(span(2..2)), 3519c67d6573Sopenharmony_ci ] 3520c67d6573Sopenharmony_ci )) 3521c67d6573Sopenharmony_ci ); 3522c67d6573Sopenharmony_ci assert_eq!( 3523c67d6573Sopenharmony_ci parser(r"a|").parse(), 3524c67d6573Sopenharmony_ci Ok(alt(0..2, vec![lit('a', 0), Ast::Empty(span(2..2)),])) 3525c67d6573Sopenharmony_ci ); 3526c67d6573Sopenharmony_ci assert_eq!( 3527c67d6573Sopenharmony_ci parser(r"|a").parse(), 3528c67d6573Sopenharmony_ci Ok(alt(0..2, vec![Ast::Empty(span(0..0)), lit('a', 1),])) 3529c67d6573Sopenharmony_ci ); 3530c67d6573Sopenharmony_ci 3531c67d6573Sopenharmony_ci assert_eq!( 3532c67d6573Sopenharmony_ci parser(r"(|)").parse(), 3533c67d6573Sopenharmony_ci Ok(group( 3534c67d6573Sopenharmony_ci 0..3, 3535c67d6573Sopenharmony_ci 1, 3536c67d6573Sopenharmony_ci alt( 3537c67d6573Sopenharmony_ci 1..2, 3538c67d6573Sopenharmony_ci vec![Ast::Empty(span(1..1)), Ast::Empty(span(2..2)),] 3539c67d6573Sopenharmony_ci ) 3540c67d6573Sopenharmony_ci )) 3541c67d6573Sopenharmony_ci ); 3542c67d6573Sopenharmony_ci assert_eq!( 3543c67d6573Sopenharmony_ci parser(r"(a|)").parse(), 3544c67d6573Sopenharmony_ci Ok(group( 3545c67d6573Sopenharmony_ci 0..4, 3546c67d6573Sopenharmony_ci 1, 3547c67d6573Sopenharmony_ci alt(1..3, vec![lit('a', 1), Ast::Empty(span(3..3)),]) 3548c67d6573Sopenharmony_ci )) 3549c67d6573Sopenharmony_ci ); 3550c67d6573Sopenharmony_ci assert_eq!( 3551c67d6573Sopenharmony_ci parser(r"(|a)").parse(), 3552c67d6573Sopenharmony_ci Ok(group( 3553c67d6573Sopenharmony_ci 0..4, 3554c67d6573Sopenharmony_ci 1, 3555c67d6573Sopenharmony_ci alt(1..3, vec![Ast::Empty(span(1..1)), lit('a', 2),]) 3556c67d6573Sopenharmony_ci )) 3557c67d6573Sopenharmony_ci ); 3558c67d6573Sopenharmony_ci 3559c67d6573Sopenharmony_ci assert_eq!( 3560c67d6573Sopenharmony_ci parser(r"a|b)").parse().unwrap_err(), 3561c67d6573Sopenharmony_ci TestError { 3562c67d6573Sopenharmony_ci span: span(3..4), 3563c67d6573Sopenharmony_ci kind: ast::ErrorKind::GroupUnopened, 3564c67d6573Sopenharmony_ci } 3565c67d6573Sopenharmony_ci ); 3566c67d6573Sopenharmony_ci assert_eq!( 3567c67d6573Sopenharmony_ci parser(r"(a|b").parse().unwrap_err(), 3568c67d6573Sopenharmony_ci TestError { 3569c67d6573Sopenharmony_ci span: span(0..1), 3570c67d6573Sopenharmony_ci kind: ast::ErrorKind::GroupUnclosed, 3571c67d6573Sopenharmony_ci } 3572c67d6573Sopenharmony_ci ); 3573c67d6573Sopenharmony_ci } 3574c67d6573Sopenharmony_ci 3575c67d6573Sopenharmony_ci #[test] 3576c67d6573Sopenharmony_ci fn parse_unsupported_lookaround() { 3577c67d6573Sopenharmony_ci assert_eq!( 3578c67d6573Sopenharmony_ci parser(r"(?=a)").parse().unwrap_err(), 3579c67d6573Sopenharmony_ci TestError { 3580c67d6573Sopenharmony_ci span: span(0..3), 3581c67d6573Sopenharmony_ci kind: ast::ErrorKind::UnsupportedLookAround, 3582c67d6573Sopenharmony_ci } 3583c67d6573Sopenharmony_ci ); 3584c67d6573Sopenharmony_ci assert_eq!( 3585c67d6573Sopenharmony_ci parser(r"(?!a)").parse().unwrap_err(), 3586c67d6573Sopenharmony_ci TestError { 3587c67d6573Sopenharmony_ci span: span(0..3), 3588c67d6573Sopenharmony_ci kind: ast::ErrorKind::UnsupportedLookAround, 3589c67d6573Sopenharmony_ci } 3590c67d6573Sopenharmony_ci ); 3591c67d6573Sopenharmony_ci assert_eq!( 3592c67d6573Sopenharmony_ci parser(r"(?<=a)").parse().unwrap_err(), 3593c67d6573Sopenharmony_ci TestError { 3594c67d6573Sopenharmony_ci span: span(0..4), 3595c67d6573Sopenharmony_ci kind: ast::ErrorKind::UnsupportedLookAround, 3596c67d6573Sopenharmony_ci } 3597c67d6573Sopenharmony_ci ); 3598c67d6573Sopenharmony_ci assert_eq!( 3599c67d6573Sopenharmony_ci parser(r"(?<!a)").parse().unwrap_err(), 3600c67d6573Sopenharmony_ci TestError { 3601c67d6573Sopenharmony_ci span: span(0..4), 3602c67d6573Sopenharmony_ci kind: ast::ErrorKind::UnsupportedLookAround, 3603c67d6573Sopenharmony_ci } 3604c67d6573Sopenharmony_ci ); 3605c67d6573Sopenharmony_ci } 3606c67d6573Sopenharmony_ci 3607c67d6573Sopenharmony_ci #[test] 3608c67d6573Sopenharmony_ci fn parse_group() { 3609c67d6573Sopenharmony_ci assert_eq!( 3610c67d6573Sopenharmony_ci parser("(?i)").parse(), 3611c67d6573Sopenharmony_ci Ok(Ast::Flags(ast::SetFlags { 3612c67d6573Sopenharmony_ci span: span(0..4), 3613c67d6573Sopenharmony_ci flags: ast::Flags { 3614c67d6573Sopenharmony_ci span: span(2..3), 3615c67d6573Sopenharmony_ci items: vec![ast::FlagsItem { 3616c67d6573Sopenharmony_ci span: span(2..3), 3617c67d6573Sopenharmony_ci kind: ast::FlagsItemKind::Flag( 3618c67d6573Sopenharmony_ci ast::Flag::CaseInsensitive 3619c67d6573Sopenharmony_ci ), 3620c67d6573Sopenharmony_ci }], 3621c67d6573Sopenharmony_ci }, 3622c67d6573Sopenharmony_ci })) 3623c67d6573Sopenharmony_ci ); 3624c67d6573Sopenharmony_ci assert_eq!( 3625c67d6573Sopenharmony_ci parser("(?iU)").parse(), 3626c67d6573Sopenharmony_ci Ok(Ast::Flags(ast::SetFlags { 3627c67d6573Sopenharmony_ci span: span(0..5), 3628c67d6573Sopenharmony_ci flags: ast::Flags { 3629c67d6573Sopenharmony_ci span: span(2..4), 3630c67d6573Sopenharmony_ci items: vec![ 3631c67d6573Sopenharmony_ci ast::FlagsItem { 3632c67d6573Sopenharmony_ci span: span(2..3), 3633c67d6573Sopenharmony_ci kind: ast::FlagsItemKind::Flag( 3634c67d6573Sopenharmony_ci ast::Flag::CaseInsensitive 3635c67d6573Sopenharmony_ci ), 3636c67d6573Sopenharmony_ci }, 3637c67d6573Sopenharmony_ci ast::FlagsItem { 3638c67d6573Sopenharmony_ci span: span(3..4), 3639c67d6573Sopenharmony_ci kind: ast::FlagsItemKind::Flag( 3640c67d6573Sopenharmony_ci ast::Flag::SwapGreed 3641c67d6573Sopenharmony_ci ), 3642c67d6573Sopenharmony_ci }, 3643c67d6573Sopenharmony_ci ], 3644c67d6573Sopenharmony_ci }, 3645c67d6573Sopenharmony_ci })) 3646c67d6573Sopenharmony_ci ); 3647c67d6573Sopenharmony_ci assert_eq!( 3648c67d6573Sopenharmony_ci parser("(?i-U)").parse(), 3649c67d6573Sopenharmony_ci Ok(Ast::Flags(ast::SetFlags { 3650c67d6573Sopenharmony_ci span: span(0..6), 3651c67d6573Sopenharmony_ci flags: ast::Flags { 3652c67d6573Sopenharmony_ci span: span(2..5), 3653c67d6573Sopenharmony_ci items: vec![ 3654c67d6573Sopenharmony_ci ast::FlagsItem { 3655c67d6573Sopenharmony_ci span: span(2..3), 3656c67d6573Sopenharmony_ci kind: ast::FlagsItemKind::Flag( 3657c67d6573Sopenharmony_ci ast::Flag::CaseInsensitive 3658c67d6573Sopenharmony_ci ), 3659c67d6573Sopenharmony_ci }, 3660c67d6573Sopenharmony_ci ast::FlagsItem { 3661c67d6573Sopenharmony_ci span: span(3..4), 3662c67d6573Sopenharmony_ci kind: ast::FlagsItemKind::Negation, 3663c67d6573Sopenharmony_ci }, 3664c67d6573Sopenharmony_ci ast::FlagsItem { 3665c67d6573Sopenharmony_ci span: span(4..5), 3666c67d6573Sopenharmony_ci kind: ast::FlagsItemKind::Flag( 3667c67d6573Sopenharmony_ci ast::Flag::SwapGreed 3668c67d6573Sopenharmony_ci ), 3669c67d6573Sopenharmony_ci }, 3670c67d6573Sopenharmony_ci ], 3671c67d6573Sopenharmony_ci }, 3672c67d6573Sopenharmony_ci })) 3673c67d6573Sopenharmony_ci ); 3674c67d6573Sopenharmony_ci 3675c67d6573Sopenharmony_ci assert_eq!( 3676c67d6573Sopenharmony_ci parser("()").parse(), 3677c67d6573Sopenharmony_ci Ok(Ast::Group(ast::Group { 3678c67d6573Sopenharmony_ci span: span(0..2), 3679c67d6573Sopenharmony_ci kind: ast::GroupKind::CaptureIndex(1), 3680c67d6573Sopenharmony_ci ast: Box::new(Ast::Empty(span(1..1))), 3681c67d6573Sopenharmony_ci })) 3682c67d6573Sopenharmony_ci ); 3683c67d6573Sopenharmony_ci assert_eq!( 3684c67d6573Sopenharmony_ci parser("(a)").parse(), 3685c67d6573Sopenharmony_ci Ok(Ast::Group(ast::Group { 3686c67d6573Sopenharmony_ci span: span(0..3), 3687c67d6573Sopenharmony_ci kind: ast::GroupKind::CaptureIndex(1), 3688c67d6573Sopenharmony_ci ast: Box::new(lit('a', 1)), 3689c67d6573Sopenharmony_ci })) 3690c67d6573Sopenharmony_ci ); 3691c67d6573Sopenharmony_ci assert_eq!( 3692c67d6573Sopenharmony_ci parser("(())").parse(), 3693c67d6573Sopenharmony_ci Ok(Ast::Group(ast::Group { 3694c67d6573Sopenharmony_ci span: span(0..4), 3695c67d6573Sopenharmony_ci kind: ast::GroupKind::CaptureIndex(1), 3696c67d6573Sopenharmony_ci ast: Box::new(Ast::Group(ast::Group { 3697c67d6573Sopenharmony_ci span: span(1..3), 3698c67d6573Sopenharmony_ci kind: ast::GroupKind::CaptureIndex(2), 3699c67d6573Sopenharmony_ci ast: Box::new(Ast::Empty(span(2..2))), 3700c67d6573Sopenharmony_ci })), 3701c67d6573Sopenharmony_ci })) 3702c67d6573Sopenharmony_ci ); 3703c67d6573Sopenharmony_ci 3704c67d6573Sopenharmony_ci assert_eq!( 3705c67d6573Sopenharmony_ci parser("(?:a)").parse(), 3706c67d6573Sopenharmony_ci Ok(Ast::Group(ast::Group { 3707c67d6573Sopenharmony_ci span: span(0..5), 3708c67d6573Sopenharmony_ci kind: ast::GroupKind::NonCapturing(ast::Flags { 3709c67d6573Sopenharmony_ci span: span(2..2), 3710c67d6573Sopenharmony_ci items: vec![], 3711c67d6573Sopenharmony_ci }), 3712c67d6573Sopenharmony_ci ast: Box::new(lit('a', 3)), 3713c67d6573Sopenharmony_ci })) 3714c67d6573Sopenharmony_ci ); 3715c67d6573Sopenharmony_ci 3716c67d6573Sopenharmony_ci assert_eq!( 3717c67d6573Sopenharmony_ci parser("(?i:a)").parse(), 3718c67d6573Sopenharmony_ci Ok(Ast::Group(ast::Group { 3719c67d6573Sopenharmony_ci span: span(0..6), 3720c67d6573Sopenharmony_ci kind: ast::GroupKind::NonCapturing(ast::Flags { 3721c67d6573Sopenharmony_ci span: span(2..3), 3722c67d6573Sopenharmony_ci items: vec![ast::FlagsItem { 3723c67d6573Sopenharmony_ci span: span(2..3), 3724c67d6573Sopenharmony_ci kind: ast::FlagsItemKind::Flag( 3725c67d6573Sopenharmony_ci ast::Flag::CaseInsensitive 3726c67d6573Sopenharmony_ci ), 3727c67d6573Sopenharmony_ci },], 3728c67d6573Sopenharmony_ci }), 3729c67d6573Sopenharmony_ci ast: Box::new(lit('a', 4)), 3730c67d6573Sopenharmony_ci })) 3731c67d6573Sopenharmony_ci ); 3732c67d6573Sopenharmony_ci assert_eq!( 3733c67d6573Sopenharmony_ci parser("(?i-U:a)").parse(), 3734c67d6573Sopenharmony_ci Ok(Ast::Group(ast::Group { 3735c67d6573Sopenharmony_ci span: span(0..8), 3736c67d6573Sopenharmony_ci kind: ast::GroupKind::NonCapturing(ast::Flags { 3737c67d6573Sopenharmony_ci span: span(2..5), 3738c67d6573Sopenharmony_ci items: vec![ 3739c67d6573Sopenharmony_ci ast::FlagsItem { 3740c67d6573Sopenharmony_ci span: span(2..3), 3741c67d6573Sopenharmony_ci kind: ast::FlagsItemKind::Flag( 3742c67d6573Sopenharmony_ci ast::Flag::CaseInsensitive 3743c67d6573Sopenharmony_ci ), 3744c67d6573Sopenharmony_ci }, 3745c67d6573Sopenharmony_ci ast::FlagsItem { 3746c67d6573Sopenharmony_ci span: span(3..4), 3747c67d6573Sopenharmony_ci kind: ast::FlagsItemKind::Negation, 3748c67d6573Sopenharmony_ci }, 3749c67d6573Sopenharmony_ci ast::FlagsItem { 3750c67d6573Sopenharmony_ci span: span(4..5), 3751c67d6573Sopenharmony_ci kind: ast::FlagsItemKind::Flag( 3752c67d6573Sopenharmony_ci ast::Flag::SwapGreed 3753c67d6573Sopenharmony_ci ), 3754c67d6573Sopenharmony_ci }, 3755c67d6573Sopenharmony_ci ], 3756c67d6573Sopenharmony_ci }), 3757c67d6573Sopenharmony_ci ast: Box::new(lit('a', 6)), 3758c67d6573Sopenharmony_ci })) 3759c67d6573Sopenharmony_ci ); 3760c67d6573Sopenharmony_ci 3761c67d6573Sopenharmony_ci assert_eq!( 3762c67d6573Sopenharmony_ci parser("(").parse().unwrap_err(), 3763c67d6573Sopenharmony_ci TestError { 3764c67d6573Sopenharmony_ci span: span(0..1), 3765c67d6573Sopenharmony_ci kind: ast::ErrorKind::GroupUnclosed, 3766c67d6573Sopenharmony_ci } 3767c67d6573Sopenharmony_ci ); 3768c67d6573Sopenharmony_ci assert_eq!( 3769c67d6573Sopenharmony_ci parser("(?").parse().unwrap_err(), 3770c67d6573Sopenharmony_ci TestError { 3771c67d6573Sopenharmony_ci span: span(0..1), 3772c67d6573Sopenharmony_ci kind: ast::ErrorKind::GroupUnclosed, 3773c67d6573Sopenharmony_ci } 3774c67d6573Sopenharmony_ci ); 3775c67d6573Sopenharmony_ci assert_eq!( 3776c67d6573Sopenharmony_ci parser("(?P").parse().unwrap_err(), 3777c67d6573Sopenharmony_ci TestError { 3778c67d6573Sopenharmony_ci span: span(2..3), 3779c67d6573Sopenharmony_ci kind: ast::ErrorKind::FlagUnrecognized, 3780c67d6573Sopenharmony_ci } 3781c67d6573Sopenharmony_ci ); 3782c67d6573Sopenharmony_ci assert_eq!( 3783c67d6573Sopenharmony_ci parser("(?P<").parse().unwrap_err(), 3784c67d6573Sopenharmony_ci TestError { 3785c67d6573Sopenharmony_ci span: span(4..4), 3786c67d6573Sopenharmony_ci kind: ast::ErrorKind::GroupNameUnexpectedEof, 3787c67d6573Sopenharmony_ci } 3788c67d6573Sopenharmony_ci ); 3789c67d6573Sopenharmony_ci assert_eq!( 3790c67d6573Sopenharmony_ci parser("(a").parse().unwrap_err(), 3791c67d6573Sopenharmony_ci TestError { 3792c67d6573Sopenharmony_ci span: span(0..1), 3793c67d6573Sopenharmony_ci kind: ast::ErrorKind::GroupUnclosed, 3794c67d6573Sopenharmony_ci } 3795c67d6573Sopenharmony_ci ); 3796c67d6573Sopenharmony_ci assert_eq!( 3797c67d6573Sopenharmony_ci parser("(()").parse().unwrap_err(), 3798c67d6573Sopenharmony_ci TestError { 3799c67d6573Sopenharmony_ci span: span(0..1), 3800c67d6573Sopenharmony_ci kind: ast::ErrorKind::GroupUnclosed, 3801c67d6573Sopenharmony_ci } 3802c67d6573Sopenharmony_ci ); 3803c67d6573Sopenharmony_ci assert_eq!( 3804c67d6573Sopenharmony_ci parser(")").parse().unwrap_err(), 3805c67d6573Sopenharmony_ci TestError { 3806c67d6573Sopenharmony_ci span: span(0..1), 3807c67d6573Sopenharmony_ci kind: ast::ErrorKind::GroupUnopened, 3808c67d6573Sopenharmony_ci } 3809c67d6573Sopenharmony_ci ); 3810c67d6573Sopenharmony_ci assert_eq!( 3811c67d6573Sopenharmony_ci parser("a)").parse().unwrap_err(), 3812c67d6573Sopenharmony_ci TestError { 3813c67d6573Sopenharmony_ci span: span(1..2), 3814c67d6573Sopenharmony_ci kind: ast::ErrorKind::GroupUnopened, 3815c67d6573Sopenharmony_ci } 3816c67d6573Sopenharmony_ci ); 3817c67d6573Sopenharmony_ci } 3818c67d6573Sopenharmony_ci 3819c67d6573Sopenharmony_ci #[test] 3820c67d6573Sopenharmony_ci fn parse_capture_name() { 3821c67d6573Sopenharmony_ci assert_eq!( 3822c67d6573Sopenharmony_ci parser("(?P<a>z)").parse(), 3823c67d6573Sopenharmony_ci Ok(Ast::Group(ast::Group { 3824c67d6573Sopenharmony_ci span: span(0..8), 3825c67d6573Sopenharmony_ci kind: ast::GroupKind::CaptureName(ast::CaptureName { 3826c67d6573Sopenharmony_ci span: span(4..5), 3827c67d6573Sopenharmony_ci name: s("a"), 3828c67d6573Sopenharmony_ci index: 1, 3829c67d6573Sopenharmony_ci }), 3830c67d6573Sopenharmony_ci ast: Box::new(lit('z', 6)), 3831c67d6573Sopenharmony_ci })) 3832c67d6573Sopenharmony_ci ); 3833c67d6573Sopenharmony_ci assert_eq!( 3834c67d6573Sopenharmony_ci parser("(?P<abc>z)").parse(), 3835c67d6573Sopenharmony_ci Ok(Ast::Group(ast::Group { 3836c67d6573Sopenharmony_ci span: span(0..10), 3837c67d6573Sopenharmony_ci kind: ast::GroupKind::CaptureName(ast::CaptureName { 3838c67d6573Sopenharmony_ci span: span(4..7), 3839c67d6573Sopenharmony_ci name: s("abc"), 3840c67d6573Sopenharmony_ci index: 1, 3841c67d6573Sopenharmony_ci }), 3842c67d6573Sopenharmony_ci ast: Box::new(lit('z', 8)), 3843c67d6573Sopenharmony_ci })) 3844c67d6573Sopenharmony_ci ); 3845c67d6573Sopenharmony_ci 3846c67d6573Sopenharmony_ci assert_eq!( 3847c67d6573Sopenharmony_ci parser("(?P<a_1>z)").parse(), 3848c67d6573Sopenharmony_ci Ok(Ast::Group(ast::Group { 3849c67d6573Sopenharmony_ci span: span(0..10), 3850c67d6573Sopenharmony_ci kind: ast::GroupKind::CaptureName(ast::CaptureName { 3851c67d6573Sopenharmony_ci span: span(4..7), 3852c67d6573Sopenharmony_ci name: s("a_1"), 3853c67d6573Sopenharmony_ci index: 1, 3854c67d6573Sopenharmony_ci }), 3855c67d6573Sopenharmony_ci ast: Box::new(lit('z', 8)), 3856c67d6573Sopenharmony_ci })) 3857c67d6573Sopenharmony_ci ); 3858c67d6573Sopenharmony_ci 3859c67d6573Sopenharmony_ci assert_eq!( 3860c67d6573Sopenharmony_ci parser("(?P<a.1>z)").parse(), 3861c67d6573Sopenharmony_ci Ok(Ast::Group(ast::Group { 3862c67d6573Sopenharmony_ci span: span(0..10), 3863c67d6573Sopenharmony_ci kind: ast::GroupKind::CaptureName(ast::CaptureName { 3864c67d6573Sopenharmony_ci span: span(4..7), 3865c67d6573Sopenharmony_ci name: s("a.1"), 3866c67d6573Sopenharmony_ci index: 1, 3867c67d6573Sopenharmony_ci }), 3868c67d6573Sopenharmony_ci ast: Box::new(lit('z', 8)), 3869c67d6573Sopenharmony_ci })) 3870c67d6573Sopenharmony_ci ); 3871c67d6573Sopenharmony_ci 3872c67d6573Sopenharmony_ci assert_eq!( 3873c67d6573Sopenharmony_ci parser("(?P<a[1]>z)").parse(), 3874c67d6573Sopenharmony_ci Ok(Ast::Group(ast::Group { 3875c67d6573Sopenharmony_ci span: span(0..11), 3876c67d6573Sopenharmony_ci kind: ast::GroupKind::CaptureName(ast::CaptureName { 3877c67d6573Sopenharmony_ci span: span(4..8), 3878c67d6573Sopenharmony_ci name: s("a[1]"), 3879c67d6573Sopenharmony_ci index: 1, 3880c67d6573Sopenharmony_ci }), 3881c67d6573Sopenharmony_ci ast: Box::new(lit('z', 9)), 3882c67d6573Sopenharmony_ci })) 3883c67d6573Sopenharmony_ci ); 3884c67d6573Sopenharmony_ci 3885c67d6573Sopenharmony_ci assert_eq!( 3886c67d6573Sopenharmony_ci parser("(?P<").parse().unwrap_err(), 3887c67d6573Sopenharmony_ci TestError { 3888c67d6573Sopenharmony_ci span: span(4..4), 3889c67d6573Sopenharmony_ci kind: ast::ErrorKind::GroupNameUnexpectedEof, 3890c67d6573Sopenharmony_ci } 3891c67d6573Sopenharmony_ci ); 3892c67d6573Sopenharmony_ci assert_eq!( 3893c67d6573Sopenharmony_ci parser("(?P<>z)").parse().unwrap_err(), 3894c67d6573Sopenharmony_ci TestError { 3895c67d6573Sopenharmony_ci span: span(4..4), 3896c67d6573Sopenharmony_ci kind: ast::ErrorKind::GroupNameEmpty, 3897c67d6573Sopenharmony_ci } 3898c67d6573Sopenharmony_ci ); 3899c67d6573Sopenharmony_ci assert_eq!( 3900c67d6573Sopenharmony_ci parser("(?P<a").parse().unwrap_err(), 3901c67d6573Sopenharmony_ci TestError { 3902c67d6573Sopenharmony_ci span: span(5..5), 3903c67d6573Sopenharmony_ci kind: ast::ErrorKind::GroupNameUnexpectedEof, 3904c67d6573Sopenharmony_ci } 3905c67d6573Sopenharmony_ci ); 3906c67d6573Sopenharmony_ci assert_eq!( 3907c67d6573Sopenharmony_ci parser("(?P<ab").parse().unwrap_err(), 3908c67d6573Sopenharmony_ci TestError { 3909c67d6573Sopenharmony_ci span: span(6..6), 3910c67d6573Sopenharmony_ci kind: ast::ErrorKind::GroupNameUnexpectedEof, 3911c67d6573Sopenharmony_ci } 3912c67d6573Sopenharmony_ci ); 3913c67d6573Sopenharmony_ci assert_eq!( 3914c67d6573Sopenharmony_ci parser("(?P<0a").parse().unwrap_err(), 3915c67d6573Sopenharmony_ci TestError { 3916c67d6573Sopenharmony_ci span: span(4..5), 3917c67d6573Sopenharmony_ci kind: ast::ErrorKind::GroupNameInvalid, 3918c67d6573Sopenharmony_ci } 3919c67d6573Sopenharmony_ci ); 3920c67d6573Sopenharmony_ci assert_eq!( 3921c67d6573Sopenharmony_ci parser("(?P<~").parse().unwrap_err(), 3922c67d6573Sopenharmony_ci TestError { 3923c67d6573Sopenharmony_ci span: span(4..5), 3924c67d6573Sopenharmony_ci kind: ast::ErrorKind::GroupNameInvalid, 3925c67d6573Sopenharmony_ci } 3926c67d6573Sopenharmony_ci ); 3927c67d6573Sopenharmony_ci assert_eq!( 3928c67d6573Sopenharmony_ci parser("(?P<abc~").parse().unwrap_err(), 3929c67d6573Sopenharmony_ci TestError { 3930c67d6573Sopenharmony_ci span: span(7..8), 3931c67d6573Sopenharmony_ci kind: ast::ErrorKind::GroupNameInvalid, 3932c67d6573Sopenharmony_ci } 3933c67d6573Sopenharmony_ci ); 3934c67d6573Sopenharmony_ci assert_eq!( 3935c67d6573Sopenharmony_ci parser("(?P<a>y)(?P<a>z)").parse().unwrap_err(), 3936c67d6573Sopenharmony_ci TestError { 3937c67d6573Sopenharmony_ci span: span(12..13), 3938c67d6573Sopenharmony_ci kind: ast::ErrorKind::GroupNameDuplicate { 3939c67d6573Sopenharmony_ci original: span(4..5), 3940c67d6573Sopenharmony_ci }, 3941c67d6573Sopenharmony_ci } 3942c67d6573Sopenharmony_ci ); 3943c67d6573Sopenharmony_ci } 3944c67d6573Sopenharmony_ci 3945c67d6573Sopenharmony_ci #[test] 3946c67d6573Sopenharmony_ci fn parse_flags() { 3947c67d6573Sopenharmony_ci assert_eq!( 3948c67d6573Sopenharmony_ci parser("i:").parse_flags(), 3949c67d6573Sopenharmony_ci Ok(ast::Flags { 3950c67d6573Sopenharmony_ci span: span(0..1), 3951c67d6573Sopenharmony_ci items: vec![ast::FlagsItem { 3952c67d6573Sopenharmony_ci span: span(0..1), 3953c67d6573Sopenharmony_ci kind: ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive), 3954c67d6573Sopenharmony_ci }], 3955c67d6573Sopenharmony_ci }) 3956c67d6573Sopenharmony_ci ); 3957c67d6573Sopenharmony_ci assert_eq!( 3958c67d6573Sopenharmony_ci parser("i)").parse_flags(), 3959c67d6573Sopenharmony_ci Ok(ast::Flags { 3960c67d6573Sopenharmony_ci span: span(0..1), 3961c67d6573Sopenharmony_ci items: vec![ast::FlagsItem { 3962c67d6573Sopenharmony_ci span: span(0..1), 3963c67d6573Sopenharmony_ci kind: ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive), 3964c67d6573Sopenharmony_ci }], 3965c67d6573Sopenharmony_ci }) 3966c67d6573Sopenharmony_ci ); 3967c67d6573Sopenharmony_ci 3968c67d6573Sopenharmony_ci assert_eq!( 3969c67d6573Sopenharmony_ci parser("isU:").parse_flags(), 3970c67d6573Sopenharmony_ci Ok(ast::Flags { 3971c67d6573Sopenharmony_ci span: span(0..3), 3972c67d6573Sopenharmony_ci items: vec![ 3973c67d6573Sopenharmony_ci ast::FlagsItem { 3974c67d6573Sopenharmony_ci span: span(0..1), 3975c67d6573Sopenharmony_ci kind: ast::FlagsItemKind::Flag( 3976c67d6573Sopenharmony_ci ast::Flag::CaseInsensitive 3977c67d6573Sopenharmony_ci ), 3978c67d6573Sopenharmony_ci }, 3979c67d6573Sopenharmony_ci ast::FlagsItem { 3980c67d6573Sopenharmony_ci span: span(1..2), 3981c67d6573Sopenharmony_ci kind: ast::FlagsItemKind::Flag( 3982c67d6573Sopenharmony_ci ast::Flag::DotMatchesNewLine 3983c67d6573Sopenharmony_ci ), 3984c67d6573Sopenharmony_ci }, 3985c67d6573Sopenharmony_ci ast::FlagsItem { 3986c67d6573Sopenharmony_ci span: span(2..3), 3987c67d6573Sopenharmony_ci kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed), 3988c67d6573Sopenharmony_ci }, 3989c67d6573Sopenharmony_ci ], 3990c67d6573Sopenharmony_ci }) 3991c67d6573Sopenharmony_ci ); 3992c67d6573Sopenharmony_ci 3993c67d6573Sopenharmony_ci assert_eq!( 3994c67d6573Sopenharmony_ci parser("-isU:").parse_flags(), 3995c67d6573Sopenharmony_ci Ok(ast::Flags { 3996c67d6573Sopenharmony_ci span: span(0..4), 3997c67d6573Sopenharmony_ci items: vec![ 3998c67d6573Sopenharmony_ci ast::FlagsItem { 3999c67d6573Sopenharmony_ci span: span(0..1), 4000c67d6573Sopenharmony_ci kind: ast::FlagsItemKind::Negation, 4001c67d6573Sopenharmony_ci }, 4002c67d6573Sopenharmony_ci ast::FlagsItem { 4003c67d6573Sopenharmony_ci span: span(1..2), 4004c67d6573Sopenharmony_ci kind: ast::FlagsItemKind::Flag( 4005c67d6573Sopenharmony_ci ast::Flag::CaseInsensitive 4006c67d6573Sopenharmony_ci ), 4007c67d6573Sopenharmony_ci }, 4008c67d6573Sopenharmony_ci ast::FlagsItem { 4009c67d6573Sopenharmony_ci span: span(2..3), 4010c67d6573Sopenharmony_ci kind: ast::FlagsItemKind::Flag( 4011c67d6573Sopenharmony_ci ast::Flag::DotMatchesNewLine 4012c67d6573Sopenharmony_ci ), 4013c67d6573Sopenharmony_ci }, 4014c67d6573Sopenharmony_ci ast::FlagsItem { 4015c67d6573Sopenharmony_ci span: span(3..4), 4016c67d6573Sopenharmony_ci kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed), 4017c67d6573Sopenharmony_ci }, 4018c67d6573Sopenharmony_ci ], 4019c67d6573Sopenharmony_ci }) 4020c67d6573Sopenharmony_ci ); 4021c67d6573Sopenharmony_ci assert_eq!( 4022c67d6573Sopenharmony_ci parser("i-sU:").parse_flags(), 4023c67d6573Sopenharmony_ci Ok(ast::Flags { 4024c67d6573Sopenharmony_ci span: span(0..4), 4025c67d6573Sopenharmony_ci items: vec![ 4026c67d6573Sopenharmony_ci ast::FlagsItem { 4027c67d6573Sopenharmony_ci span: span(0..1), 4028c67d6573Sopenharmony_ci kind: ast::FlagsItemKind::Flag( 4029c67d6573Sopenharmony_ci ast::Flag::CaseInsensitive 4030c67d6573Sopenharmony_ci ), 4031c67d6573Sopenharmony_ci }, 4032c67d6573Sopenharmony_ci ast::FlagsItem { 4033c67d6573Sopenharmony_ci span: span(1..2), 4034c67d6573Sopenharmony_ci kind: ast::FlagsItemKind::Negation, 4035c67d6573Sopenharmony_ci }, 4036c67d6573Sopenharmony_ci ast::FlagsItem { 4037c67d6573Sopenharmony_ci span: span(2..3), 4038c67d6573Sopenharmony_ci kind: ast::FlagsItemKind::Flag( 4039c67d6573Sopenharmony_ci ast::Flag::DotMatchesNewLine 4040c67d6573Sopenharmony_ci ), 4041c67d6573Sopenharmony_ci }, 4042c67d6573Sopenharmony_ci ast::FlagsItem { 4043c67d6573Sopenharmony_ci span: span(3..4), 4044c67d6573Sopenharmony_ci kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed), 4045c67d6573Sopenharmony_ci }, 4046c67d6573Sopenharmony_ci ], 4047c67d6573Sopenharmony_ci }) 4048c67d6573Sopenharmony_ci ); 4049c67d6573Sopenharmony_ci 4050c67d6573Sopenharmony_ci assert_eq!( 4051c67d6573Sopenharmony_ci parser("isU").parse_flags().unwrap_err(), 4052c67d6573Sopenharmony_ci TestError { 4053c67d6573Sopenharmony_ci span: span(3..3), 4054c67d6573Sopenharmony_ci kind: ast::ErrorKind::FlagUnexpectedEof, 4055c67d6573Sopenharmony_ci } 4056c67d6573Sopenharmony_ci ); 4057c67d6573Sopenharmony_ci assert_eq!( 4058c67d6573Sopenharmony_ci parser("isUa:").parse_flags().unwrap_err(), 4059c67d6573Sopenharmony_ci TestError { 4060c67d6573Sopenharmony_ci span: span(3..4), 4061c67d6573Sopenharmony_ci kind: ast::ErrorKind::FlagUnrecognized, 4062c67d6573Sopenharmony_ci } 4063c67d6573Sopenharmony_ci ); 4064c67d6573Sopenharmony_ci assert_eq!( 4065c67d6573Sopenharmony_ci parser("isUi:").parse_flags().unwrap_err(), 4066c67d6573Sopenharmony_ci TestError { 4067c67d6573Sopenharmony_ci span: span(3..4), 4068c67d6573Sopenharmony_ci kind: ast::ErrorKind::FlagDuplicate { original: span(0..1) }, 4069c67d6573Sopenharmony_ci } 4070c67d6573Sopenharmony_ci ); 4071c67d6573Sopenharmony_ci assert_eq!( 4072c67d6573Sopenharmony_ci parser("i-sU-i:").parse_flags().unwrap_err(), 4073c67d6573Sopenharmony_ci TestError { 4074c67d6573Sopenharmony_ci span: span(4..5), 4075c67d6573Sopenharmony_ci kind: ast::ErrorKind::FlagRepeatedNegation { 4076c67d6573Sopenharmony_ci original: span(1..2), 4077c67d6573Sopenharmony_ci }, 4078c67d6573Sopenharmony_ci } 4079c67d6573Sopenharmony_ci ); 4080c67d6573Sopenharmony_ci assert_eq!( 4081c67d6573Sopenharmony_ci parser("-)").parse_flags().unwrap_err(), 4082c67d6573Sopenharmony_ci TestError { 4083c67d6573Sopenharmony_ci span: span(0..1), 4084c67d6573Sopenharmony_ci kind: ast::ErrorKind::FlagDanglingNegation, 4085c67d6573Sopenharmony_ci } 4086c67d6573Sopenharmony_ci ); 4087c67d6573Sopenharmony_ci assert_eq!( 4088c67d6573Sopenharmony_ci parser("i-)").parse_flags().unwrap_err(), 4089c67d6573Sopenharmony_ci TestError { 4090c67d6573Sopenharmony_ci span: span(1..2), 4091c67d6573Sopenharmony_ci kind: ast::ErrorKind::FlagDanglingNegation, 4092c67d6573Sopenharmony_ci } 4093c67d6573Sopenharmony_ci ); 4094c67d6573Sopenharmony_ci assert_eq!( 4095c67d6573Sopenharmony_ci parser("iU-)").parse_flags().unwrap_err(), 4096c67d6573Sopenharmony_ci TestError { 4097c67d6573Sopenharmony_ci span: span(2..3), 4098c67d6573Sopenharmony_ci kind: ast::ErrorKind::FlagDanglingNegation, 4099c67d6573Sopenharmony_ci } 4100c67d6573Sopenharmony_ci ); 4101c67d6573Sopenharmony_ci } 4102c67d6573Sopenharmony_ci 4103c67d6573Sopenharmony_ci #[test] 4104c67d6573Sopenharmony_ci fn parse_flag() { 4105c67d6573Sopenharmony_ci assert_eq!(parser("i").parse_flag(), Ok(ast::Flag::CaseInsensitive)); 4106c67d6573Sopenharmony_ci assert_eq!(parser("m").parse_flag(), Ok(ast::Flag::MultiLine)); 4107c67d6573Sopenharmony_ci assert_eq!(parser("s").parse_flag(), Ok(ast::Flag::DotMatchesNewLine)); 4108c67d6573Sopenharmony_ci assert_eq!(parser("U").parse_flag(), Ok(ast::Flag::SwapGreed)); 4109c67d6573Sopenharmony_ci assert_eq!(parser("u").parse_flag(), Ok(ast::Flag::Unicode)); 4110c67d6573Sopenharmony_ci assert_eq!(parser("x").parse_flag(), Ok(ast::Flag::IgnoreWhitespace)); 4111c67d6573Sopenharmony_ci 4112c67d6573Sopenharmony_ci assert_eq!( 4113c67d6573Sopenharmony_ci parser("a").parse_flag().unwrap_err(), 4114c67d6573Sopenharmony_ci TestError { 4115c67d6573Sopenharmony_ci span: span(0..1), 4116c67d6573Sopenharmony_ci kind: ast::ErrorKind::FlagUnrecognized, 4117c67d6573Sopenharmony_ci } 4118c67d6573Sopenharmony_ci ); 4119c67d6573Sopenharmony_ci assert_eq!( 4120c67d6573Sopenharmony_ci parser("☃").parse_flag().unwrap_err(), 4121c67d6573Sopenharmony_ci TestError { 4122c67d6573Sopenharmony_ci span: span_range("☃", 0..3), 4123c67d6573Sopenharmony_ci kind: ast::ErrorKind::FlagUnrecognized, 4124c67d6573Sopenharmony_ci } 4125c67d6573Sopenharmony_ci ); 4126c67d6573Sopenharmony_ci } 4127c67d6573Sopenharmony_ci 4128c67d6573Sopenharmony_ci #[test] 4129c67d6573Sopenharmony_ci fn parse_primitive_non_escape() { 4130c67d6573Sopenharmony_ci assert_eq!( 4131c67d6573Sopenharmony_ci parser(r".").parse_primitive(), 4132c67d6573Sopenharmony_ci Ok(Primitive::Dot(span(0..1))) 4133c67d6573Sopenharmony_ci ); 4134c67d6573Sopenharmony_ci assert_eq!( 4135c67d6573Sopenharmony_ci parser(r"^").parse_primitive(), 4136c67d6573Sopenharmony_ci Ok(Primitive::Assertion(ast::Assertion { 4137c67d6573Sopenharmony_ci span: span(0..1), 4138c67d6573Sopenharmony_ci kind: ast::AssertionKind::StartLine, 4139c67d6573Sopenharmony_ci })) 4140c67d6573Sopenharmony_ci ); 4141c67d6573Sopenharmony_ci assert_eq!( 4142c67d6573Sopenharmony_ci parser(r"$").parse_primitive(), 4143c67d6573Sopenharmony_ci Ok(Primitive::Assertion(ast::Assertion { 4144c67d6573Sopenharmony_ci span: span(0..1), 4145c67d6573Sopenharmony_ci kind: ast::AssertionKind::EndLine, 4146c67d6573Sopenharmony_ci })) 4147c67d6573Sopenharmony_ci ); 4148c67d6573Sopenharmony_ci 4149c67d6573Sopenharmony_ci assert_eq!( 4150c67d6573Sopenharmony_ci parser(r"a").parse_primitive(), 4151c67d6573Sopenharmony_ci Ok(Primitive::Literal(ast::Literal { 4152c67d6573Sopenharmony_ci span: span(0..1), 4153c67d6573Sopenharmony_ci kind: ast::LiteralKind::Verbatim, 4154c67d6573Sopenharmony_ci c: 'a', 4155c67d6573Sopenharmony_ci })) 4156c67d6573Sopenharmony_ci ); 4157c67d6573Sopenharmony_ci assert_eq!( 4158c67d6573Sopenharmony_ci parser(r"|").parse_primitive(), 4159c67d6573Sopenharmony_ci Ok(Primitive::Literal(ast::Literal { 4160c67d6573Sopenharmony_ci span: span(0..1), 4161c67d6573Sopenharmony_ci kind: ast::LiteralKind::Verbatim, 4162c67d6573Sopenharmony_ci c: '|', 4163c67d6573Sopenharmony_ci })) 4164c67d6573Sopenharmony_ci ); 4165c67d6573Sopenharmony_ci assert_eq!( 4166c67d6573Sopenharmony_ci parser(r"☃").parse_primitive(), 4167c67d6573Sopenharmony_ci Ok(Primitive::Literal(ast::Literal { 4168c67d6573Sopenharmony_ci span: span_range("☃", 0..3), 4169c67d6573Sopenharmony_ci kind: ast::LiteralKind::Verbatim, 4170c67d6573Sopenharmony_ci c: '☃', 4171c67d6573Sopenharmony_ci })) 4172c67d6573Sopenharmony_ci ); 4173c67d6573Sopenharmony_ci } 4174c67d6573Sopenharmony_ci 4175c67d6573Sopenharmony_ci #[test] 4176c67d6573Sopenharmony_ci fn parse_escape() { 4177c67d6573Sopenharmony_ci assert_eq!( 4178c67d6573Sopenharmony_ci parser(r"\|").parse_primitive(), 4179c67d6573Sopenharmony_ci Ok(Primitive::Literal(ast::Literal { 4180c67d6573Sopenharmony_ci span: span(0..2), 4181c67d6573Sopenharmony_ci kind: ast::LiteralKind::Punctuation, 4182c67d6573Sopenharmony_ci c: '|', 4183c67d6573Sopenharmony_ci })) 4184c67d6573Sopenharmony_ci ); 4185c67d6573Sopenharmony_ci let specials = &[ 4186c67d6573Sopenharmony_ci (r"\a", '\x07', ast::SpecialLiteralKind::Bell), 4187c67d6573Sopenharmony_ci (r"\f", '\x0C', ast::SpecialLiteralKind::FormFeed), 4188c67d6573Sopenharmony_ci (r"\t", '\t', ast::SpecialLiteralKind::Tab), 4189c67d6573Sopenharmony_ci (r"\n", '\n', ast::SpecialLiteralKind::LineFeed), 4190c67d6573Sopenharmony_ci (r"\r", '\r', ast::SpecialLiteralKind::CarriageReturn), 4191c67d6573Sopenharmony_ci (r"\v", '\x0B', ast::SpecialLiteralKind::VerticalTab), 4192c67d6573Sopenharmony_ci ]; 4193c67d6573Sopenharmony_ci for &(pat, c, ref kind) in specials { 4194c67d6573Sopenharmony_ci assert_eq!( 4195c67d6573Sopenharmony_ci parser(pat).parse_primitive(), 4196c67d6573Sopenharmony_ci Ok(Primitive::Literal(ast::Literal { 4197c67d6573Sopenharmony_ci span: span(0..2), 4198c67d6573Sopenharmony_ci kind: ast::LiteralKind::Special(kind.clone()), 4199c67d6573Sopenharmony_ci c, 4200c67d6573Sopenharmony_ci })) 4201c67d6573Sopenharmony_ci ); 4202c67d6573Sopenharmony_ci } 4203c67d6573Sopenharmony_ci assert_eq!( 4204c67d6573Sopenharmony_ci parser(r"\A").parse_primitive(), 4205c67d6573Sopenharmony_ci Ok(Primitive::Assertion(ast::Assertion { 4206c67d6573Sopenharmony_ci span: span(0..2), 4207c67d6573Sopenharmony_ci kind: ast::AssertionKind::StartText, 4208c67d6573Sopenharmony_ci })) 4209c67d6573Sopenharmony_ci ); 4210c67d6573Sopenharmony_ci assert_eq!( 4211c67d6573Sopenharmony_ci parser(r"\z").parse_primitive(), 4212c67d6573Sopenharmony_ci Ok(Primitive::Assertion(ast::Assertion { 4213c67d6573Sopenharmony_ci span: span(0..2), 4214c67d6573Sopenharmony_ci kind: ast::AssertionKind::EndText, 4215c67d6573Sopenharmony_ci })) 4216c67d6573Sopenharmony_ci ); 4217c67d6573Sopenharmony_ci assert_eq!( 4218c67d6573Sopenharmony_ci parser(r"\b").parse_primitive(), 4219c67d6573Sopenharmony_ci Ok(Primitive::Assertion(ast::Assertion { 4220c67d6573Sopenharmony_ci span: span(0..2), 4221c67d6573Sopenharmony_ci kind: ast::AssertionKind::WordBoundary, 4222c67d6573Sopenharmony_ci })) 4223c67d6573Sopenharmony_ci ); 4224c67d6573Sopenharmony_ci assert_eq!( 4225c67d6573Sopenharmony_ci parser(r"\B").parse_primitive(), 4226c67d6573Sopenharmony_ci Ok(Primitive::Assertion(ast::Assertion { 4227c67d6573Sopenharmony_ci span: span(0..2), 4228c67d6573Sopenharmony_ci kind: ast::AssertionKind::NotWordBoundary, 4229c67d6573Sopenharmony_ci })) 4230c67d6573Sopenharmony_ci ); 4231c67d6573Sopenharmony_ci 4232c67d6573Sopenharmony_ci assert_eq!( 4233c67d6573Sopenharmony_ci parser(r"\").parse_escape().unwrap_err(), 4234c67d6573Sopenharmony_ci TestError { 4235c67d6573Sopenharmony_ci span: span(0..1), 4236c67d6573Sopenharmony_ci kind: ast::ErrorKind::EscapeUnexpectedEof, 4237c67d6573Sopenharmony_ci } 4238c67d6573Sopenharmony_ci ); 4239c67d6573Sopenharmony_ci assert_eq!( 4240c67d6573Sopenharmony_ci parser(r"\y").parse_escape().unwrap_err(), 4241c67d6573Sopenharmony_ci TestError { 4242c67d6573Sopenharmony_ci span: span(0..2), 4243c67d6573Sopenharmony_ci kind: ast::ErrorKind::EscapeUnrecognized, 4244c67d6573Sopenharmony_ci } 4245c67d6573Sopenharmony_ci ); 4246c67d6573Sopenharmony_ci } 4247c67d6573Sopenharmony_ci 4248c67d6573Sopenharmony_ci #[test] 4249c67d6573Sopenharmony_ci fn parse_unsupported_backreference() { 4250c67d6573Sopenharmony_ci assert_eq!( 4251c67d6573Sopenharmony_ci parser(r"\0").parse_escape().unwrap_err(), 4252c67d6573Sopenharmony_ci TestError { 4253c67d6573Sopenharmony_ci span: span(0..2), 4254c67d6573Sopenharmony_ci kind: ast::ErrorKind::UnsupportedBackreference, 4255c67d6573Sopenharmony_ci } 4256c67d6573Sopenharmony_ci ); 4257c67d6573Sopenharmony_ci assert_eq!( 4258c67d6573Sopenharmony_ci parser(r"\9").parse_escape().unwrap_err(), 4259c67d6573Sopenharmony_ci TestError { 4260c67d6573Sopenharmony_ci span: span(0..2), 4261c67d6573Sopenharmony_ci kind: ast::ErrorKind::UnsupportedBackreference, 4262c67d6573Sopenharmony_ci } 4263c67d6573Sopenharmony_ci ); 4264c67d6573Sopenharmony_ci } 4265c67d6573Sopenharmony_ci 4266c67d6573Sopenharmony_ci #[test] 4267c67d6573Sopenharmony_ci fn parse_octal() { 4268c67d6573Sopenharmony_ci for i in 0..511 { 4269c67d6573Sopenharmony_ci let pat = format!(r"\{:o}", i); 4270c67d6573Sopenharmony_ci assert_eq!( 4271c67d6573Sopenharmony_ci parser_octal(&pat).parse_escape(), 4272c67d6573Sopenharmony_ci Ok(Primitive::Literal(ast::Literal { 4273c67d6573Sopenharmony_ci span: span(0..pat.len()), 4274c67d6573Sopenharmony_ci kind: ast::LiteralKind::Octal, 4275c67d6573Sopenharmony_ci c: ::std::char::from_u32(i).unwrap(), 4276c67d6573Sopenharmony_ci })) 4277c67d6573Sopenharmony_ci ); 4278c67d6573Sopenharmony_ci } 4279c67d6573Sopenharmony_ci assert_eq!( 4280c67d6573Sopenharmony_ci parser_octal(r"\778").parse_escape(), 4281c67d6573Sopenharmony_ci Ok(Primitive::Literal(ast::Literal { 4282c67d6573Sopenharmony_ci span: span(0..3), 4283c67d6573Sopenharmony_ci kind: ast::LiteralKind::Octal, 4284c67d6573Sopenharmony_ci c: '?', 4285c67d6573Sopenharmony_ci })) 4286c67d6573Sopenharmony_ci ); 4287c67d6573Sopenharmony_ci assert_eq!( 4288c67d6573Sopenharmony_ci parser_octal(r"\7777").parse_escape(), 4289c67d6573Sopenharmony_ci Ok(Primitive::Literal(ast::Literal { 4290c67d6573Sopenharmony_ci span: span(0..4), 4291c67d6573Sopenharmony_ci kind: ast::LiteralKind::Octal, 4292c67d6573Sopenharmony_ci c: '\u{01FF}', 4293c67d6573Sopenharmony_ci })) 4294c67d6573Sopenharmony_ci ); 4295c67d6573Sopenharmony_ci assert_eq!( 4296c67d6573Sopenharmony_ci parser_octal(r"\778").parse(), 4297c67d6573Sopenharmony_ci Ok(Ast::Concat(ast::Concat { 4298c67d6573Sopenharmony_ci span: span(0..4), 4299c67d6573Sopenharmony_ci asts: vec![ 4300c67d6573Sopenharmony_ci Ast::Literal(ast::Literal { 4301c67d6573Sopenharmony_ci span: span(0..3), 4302c67d6573Sopenharmony_ci kind: ast::LiteralKind::Octal, 4303c67d6573Sopenharmony_ci c: '?', 4304c67d6573Sopenharmony_ci }), 4305c67d6573Sopenharmony_ci Ast::Literal(ast::Literal { 4306c67d6573Sopenharmony_ci span: span(3..4), 4307c67d6573Sopenharmony_ci kind: ast::LiteralKind::Verbatim, 4308c67d6573Sopenharmony_ci c: '8', 4309c67d6573Sopenharmony_ci }), 4310c67d6573Sopenharmony_ci ], 4311c67d6573Sopenharmony_ci })) 4312c67d6573Sopenharmony_ci ); 4313c67d6573Sopenharmony_ci assert_eq!( 4314c67d6573Sopenharmony_ci parser_octal(r"\7777").parse(), 4315c67d6573Sopenharmony_ci Ok(Ast::Concat(ast::Concat { 4316c67d6573Sopenharmony_ci span: span(0..5), 4317c67d6573Sopenharmony_ci asts: vec![ 4318c67d6573Sopenharmony_ci Ast::Literal(ast::Literal { 4319c67d6573Sopenharmony_ci span: span(0..4), 4320c67d6573Sopenharmony_ci kind: ast::LiteralKind::Octal, 4321c67d6573Sopenharmony_ci c: '\u{01FF}', 4322c67d6573Sopenharmony_ci }), 4323c67d6573Sopenharmony_ci Ast::Literal(ast::Literal { 4324c67d6573Sopenharmony_ci span: span(4..5), 4325c67d6573Sopenharmony_ci kind: ast::LiteralKind::Verbatim, 4326c67d6573Sopenharmony_ci c: '7', 4327c67d6573Sopenharmony_ci }), 4328c67d6573Sopenharmony_ci ], 4329c67d6573Sopenharmony_ci })) 4330c67d6573Sopenharmony_ci ); 4331c67d6573Sopenharmony_ci 4332c67d6573Sopenharmony_ci assert_eq!( 4333c67d6573Sopenharmony_ci parser_octal(r"\8").parse_escape().unwrap_err(), 4334c67d6573Sopenharmony_ci TestError { 4335c67d6573Sopenharmony_ci span: span(0..2), 4336c67d6573Sopenharmony_ci kind: ast::ErrorKind::EscapeUnrecognized, 4337c67d6573Sopenharmony_ci } 4338c67d6573Sopenharmony_ci ); 4339c67d6573Sopenharmony_ci } 4340c67d6573Sopenharmony_ci 4341c67d6573Sopenharmony_ci #[test] 4342c67d6573Sopenharmony_ci fn parse_hex_two() { 4343c67d6573Sopenharmony_ci for i in 0..256 { 4344c67d6573Sopenharmony_ci let pat = format!(r"\x{:02x}", i); 4345c67d6573Sopenharmony_ci assert_eq!( 4346c67d6573Sopenharmony_ci parser(&pat).parse_escape(), 4347c67d6573Sopenharmony_ci Ok(Primitive::Literal(ast::Literal { 4348c67d6573Sopenharmony_ci span: span(0..pat.len()), 4349c67d6573Sopenharmony_ci kind: ast::LiteralKind::HexFixed(ast::HexLiteralKind::X), 4350c67d6573Sopenharmony_ci c: ::std::char::from_u32(i).unwrap(), 4351c67d6573Sopenharmony_ci })) 4352c67d6573Sopenharmony_ci ); 4353c67d6573Sopenharmony_ci } 4354c67d6573Sopenharmony_ci 4355c67d6573Sopenharmony_ci assert_eq!( 4356c67d6573Sopenharmony_ci parser(r"\xF").parse_escape().unwrap_err(), 4357c67d6573Sopenharmony_ci TestError { 4358c67d6573Sopenharmony_ci span: span(3..3), 4359c67d6573Sopenharmony_ci kind: ast::ErrorKind::EscapeUnexpectedEof, 4360c67d6573Sopenharmony_ci } 4361c67d6573Sopenharmony_ci ); 4362c67d6573Sopenharmony_ci assert_eq!( 4363c67d6573Sopenharmony_ci parser(r"\xG").parse_escape().unwrap_err(), 4364c67d6573Sopenharmony_ci TestError { 4365c67d6573Sopenharmony_ci span: span(2..3), 4366c67d6573Sopenharmony_ci kind: ast::ErrorKind::EscapeHexInvalidDigit, 4367c67d6573Sopenharmony_ci } 4368c67d6573Sopenharmony_ci ); 4369c67d6573Sopenharmony_ci assert_eq!( 4370c67d6573Sopenharmony_ci parser(r"\xFG").parse_escape().unwrap_err(), 4371c67d6573Sopenharmony_ci TestError { 4372c67d6573Sopenharmony_ci span: span(3..4), 4373c67d6573Sopenharmony_ci kind: ast::ErrorKind::EscapeHexInvalidDigit, 4374c67d6573Sopenharmony_ci } 4375c67d6573Sopenharmony_ci ); 4376c67d6573Sopenharmony_ci } 4377c67d6573Sopenharmony_ci 4378c67d6573Sopenharmony_ci #[test] 4379c67d6573Sopenharmony_ci fn parse_hex_four() { 4380c67d6573Sopenharmony_ci for i in 0..65536 { 4381c67d6573Sopenharmony_ci let c = match ::std::char::from_u32(i) { 4382c67d6573Sopenharmony_ci None => continue, 4383c67d6573Sopenharmony_ci Some(c) => c, 4384c67d6573Sopenharmony_ci }; 4385c67d6573Sopenharmony_ci let pat = format!(r"\u{:04x}", i); 4386c67d6573Sopenharmony_ci assert_eq!( 4387c67d6573Sopenharmony_ci parser(&pat).parse_escape(), 4388c67d6573Sopenharmony_ci Ok(Primitive::Literal(ast::Literal { 4389c67d6573Sopenharmony_ci span: span(0..pat.len()), 4390c67d6573Sopenharmony_ci kind: ast::LiteralKind::HexFixed( 4391c67d6573Sopenharmony_ci ast::HexLiteralKind::UnicodeShort 4392c67d6573Sopenharmony_ci ), 4393c67d6573Sopenharmony_ci c, 4394c67d6573Sopenharmony_ci })) 4395c67d6573Sopenharmony_ci ); 4396c67d6573Sopenharmony_ci } 4397c67d6573Sopenharmony_ci 4398c67d6573Sopenharmony_ci assert_eq!( 4399c67d6573Sopenharmony_ci parser(r"\uF").parse_escape().unwrap_err(), 4400c67d6573Sopenharmony_ci TestError { 4401c67d6573Sopenharmony_ci span: span(3..3), 4402c67d6573Sopenharmony_ci kind: ast::ErrorKind::EscapeUnexpectedEof, 4403c67d6573Sopenharmony_ci } 4404c67d6573Sopenharmony_ci ); 4405c67d6573Sopenharmony_ci assert_eq!( 4406c67d6573Sopenharmony_ci parser(r"\uG").parse_escape().unwrap_err(), 4407c67d6573Sopenharmony_ci TestError { 4408c67d6573Sopenharmony_ci span: span(2..3), 4409c67d6573Sopenharmony_ci kind: ast::ErrorKind::EscapeHexInvalidDigit, 4410c67d6573Sopenharmony_ci } 4411c67d6573Sopenharmony_ci ); 4412c67d6573Sopenharmony_ci assert_eq!( 4413c67d6573Sopenharmony_ci parser(r"\uFG").parse_escape().unwrap_err(), 4414c67d6573Sopenharmony_ci TestError { 4415c67d6573Sopenharmony_ci span: span(3..4), 4416c67d6573Sopenharmony_ci kind: ast::ErrorKind::EscapeHexInvalidDigit, 4417c67d6573Sopenharmony_ci } 4418c67d6573Sopenharmony_ci ); 4419c67d6573Sopenharmony_ci assert_eq!( 4420c67d6573Sopenharmony_ci parser(r"\uFFG").parse_escape().unwrap_err(), 4421c67d6573Sopenharmony_ci TestError { 4422c67d6573Sopenharmony_ci span: span(4..5), 4423c67d6573Sopenharmony_ci kind: ast::ErrorKind::EscapeHexInvalidDigit, 4424c67d6573Sopenharmony_ci } 4425c67d6573Sopenharmony_ci ); 4426c67d6573Sopenharmony_ci assert_eq!( 4427c67d6573Sopenharmony_ci parser(r"\uFFFG").parse_escape().unwrap_err(), 4428c67d6573Sopenharmony_ci TestError { 4429c67d6573Sopenharmony_ci span: span(5..6), 4430c67d6573Sopenharmony_ci kind: ast::ErrorKind::EscapeHexInvalidDigit, 4431c67d6573Sopenharmony_ci } 4432c67d6573Sopenharmony_ci ); 4433c67d6573Sopenharmony_ci assert_eq!( 4434c67d6573Sopenharmony_ci parser(r"\uD800").parse_escape().unwrap_err(), 4435c67d6573Sopenharmony_ci TestError { 4436c67d6573Sopenharmony_ci span: span(2..6), 4437c67d6573Sopenharmony_ci kind: ast::ErrorKind::EscapeHexInvalid, 4438c67d6573Sopenharmony_ci } 4439c67d6573Sopenharmony_ci ); 4440c67d6573Sopenharmony_ci } 4441c67d6573Sopenharmony_ci 4442c67d6573Sopenharmony_ci #[test] 4443c67d6573Sopenharmony_ci fn parse_hex_eight() { 4444c67d6573Sopenharmony_ci for i in 0..65536 { 4445c67d6573Sopenharmony_ci let c = match ::std::char::from_u32(i) { 4446c67d6573Sopenharmony_ci None => continue, 4447c67d6573Sopenharmony_ci Some(c) => c, 4448c67d6573Sopenharmony_ci }; 4449c67d6573Sopenharmony_ci let pat = format!(r"\U{:08x}", i); 4450c67d6573Sopenharmony_ci assert_eq!( 4451c67d6573Sopenharmony_ci parser(&pat).parse_escape(), 4452c67d6573Sopenharmony_ci Ok(Primitive::Literal(ast::Literal { 4453c67d6573Sopenharmony_ci span: span(0..pat.len()), 4454c67d6573Sopenharmony_ci kind: ast::LiteralKind::HexFixed( 4455c67d6573Sopenharmony_ci ast::HexLiteralKind::UnicodeLong 4456c67d6573Sopenharmony_ci ), 4457c67d6573Sopenharmony_ci c, 4458c67d6573Sopenharmony_ci })) 4459c67d6573Sopenharmony_ci ); 4460c67d6573Sopenharmony_ci } 4461c67d6573Sopenharmony_ci 4462c67d6573Sopenharmony_ci assert_eq!( 4463c67d6573Sopenharmony_ci parser(r"\UF").parse_escape().unwrap_err(), 4464c67d6573Sopenharmony_ci TestError { 4465c67d6573Sopenharmony_ci span: span(3..3), 4466c67d6573Sopenharmony_ci kind: ast::ErrorKind::EscapeUnexpectedEof, 4467c67d6573Sopenharmony_ci } 4468c67d6573Sopenharmony_ci ); 4469c67d6573Sopenharmony_ci assert_eq!( 4470c67d6573Sopenharmony_ci parser(r"\UG").parse_escape().unwrap_err(), 4471c67d6573Sopenharmony_ci TestError { 4472c67d6573Sopenharmony_ci span: span(2..3), 4473c67d6573Sopenharmony_ci kind: ast::ErrorKind::EscapeHexInvalidDigit, 4474c67d6573Sopenharmony_ci } 4475c67d6573Sopenharmony_ci ); 4476c67d6573Sopenharmony_ci assert_eq!( 4477c67d6573Sopenharmony_ci parser(r"\UFG").parse_escape().unwrap_err(), 4478c67d6573Sopenharmony_ci TestError { 4479c67d6573Sopenharmony_ci span: span(3..4), 4480c67d6573Sopenharmony_ci kind: ast::ErrorKind::EscapeHexInvalidDigit, 4481c67d6573Sopenharmony_ci } 4482c67d6573Sopenharmony_ci ); 4483c67d6573Sopenharmony_ci assert_eq!( 4484c67d6573Sopenharmony_ci parser(r"\UFFG").parse_escape().unwrap_err(), 4485c67d6573Sopenharmony_ci TestError { 4486c67d6573Sopenharmony_ci span: span(4..5), 4487c67d6573Sopenharmony_ci kind: ast::ErrorKind::EscapeHexInvalidDigit, 4488c67d6573Sopenharmony_ci } 4489c67d6573Sopenharmony_ci ); 4490c67d6573Sopenharmony_ci assert_eq!( 4491c67d6573Sopenharmony_ci parser(r"\UFFFG").parse_escape().unwrap_err(), 4492c67d6573Sopenharmony_ci TestError { 4493c67d6573Sopenharmony_ci span: span(5..6), 4494c67d6573Sopenharmony_ci kind: ast::ErrorKind::EscapeHexInvalidDigit, 4495c67d6573Sopenharmony_ci } 4496c67d6573Sopenharmony_ci ); 4497c67d6573Sopenharmony_ci assert_eq!( 4498c67d6573Sopenharmony_ci parser(r"\UFFFFG").parse_escape().unwrap_err(), 4499c67d6573Sopenharmony_ci TestError { 4500c67d6573Sopenharmony_ci span: span(6..7), 4501c67d6573Sopenharmony_ci kind: ast::ErrorKind::EscapeHexInvalidDigit, 4502c67d6573Sopenharmony_ci } 4503c67d6573Sopenharmony_ci ); 4504c67d6573Sopenharmony_ci assert_eq!( 4505c67d6573Sopenharmony_ci parser(r"\UFFFFFG").parse_escape().unwrap_err(), 4506c67d6573Sopenharmony_ci TestError { 4507c67d6573Sopenharmony_ci span: span(7..8), 4508c67d6573Sopenharmony_ci kind: ast::ErrorKind::EscapeHexInvalidDigit, 4509c67d6573Sopenharmony_ci } 4510c67d6573Sopenharmony_ci ); 4511c67d6573Sopenharmony_ci assert_eq!( 4512c67d6573Sopenharmony_ci parser(r"\UFFFFFFG").parse_escape().unwrap_err(), 4513c67d6573Sopenharmony_ci TestError { 4514c67d6573Sopenharmony_ci span: span(8..9), 4515c67d6573Sopenharmony_ci kind: ast::ErrorKind::EscapeHexInvalidDigit, 4516c67d6573Sopenharmony_ci } 4517c67d6573Sopenharmony_ci ); 4518c67d6573Sopenharmony_ci assert_eq!( 4519c67d6573Sopenharmony_ci parser(r"\UFFFFFFFG").parse_escape().unwrap_err(), 4520c67d6573Sopenharmony_ci TestError { 4521c67d6573Sopenharmony_ci span: span(9..10), 4522c67d6573Sopenharmony_ci kind: ast::ErrorKind::EscapeHexInvalidDigit, 4523c67d6573Sopenharmony_ci } 4524c67d6573Sopenharmony_ci ); 4525c67d6573Sopenharmony_ci } 4526c67d6573Sopenharmony_ci 4527c67d6573Sopenharmony_ci #[test] 4528c67d6573Sopenharmony_ci fn parse_hex_brace() { 4529c67d6573Sopenharmony_ci assert_eq!( 4530c67d6573Sopenharmony_ci parser(r"\u{26c4}").parse_escape(), 4531c67d6573Sopenharmony_ci Ok(Primitive::Literal(ast::Literal { 4532c67d6573Sopenharmony_ci span: span(0..8), 4533c67d6573Sopenharmony_ci kind: ast::LiteralKind::HexBrace( 4534c67d6573Sopenharmony_ci ast::HexLiteralKind::UnicodeShort 4535c67d6573Sopenharmony_ci ), 4536c67d6573Sopenharmony_ci c: '⛄', 4537c67d6573Sopenharmony_ci })) 4538c67d6573Sopenharmony_ci ); 4539c67d6573Sopenharmony_ci assert_eq!( 4540c67d6573Sopenharmony_ci parser(r"\U{26c4}").parse_escape(), 4541c67d6573Sopenharmony_ci Ok(Primitive::Literal(ast::Literal { 4542c67d6573Sopenharmony_ci span: span(0..8), 4543c67d6573Sopenharmony_ci kind: ast::LiteralKind::HexBrace( 4544c67d6573Sopenharmony_ci ast::HexLiteralKind::UnicodeLong 4545c67d6573Sopenharmony_ci ), 4546c67d6573Sopenharmony_ci c: '⛄', 4547c67d6573Sopenharmony_ci })) 4548c67d6573Sopenharmony_ci ); 4549c67d6573Sopenharmony_ci assert_eq!( 4550c67d6573Sopenharmony_ci parser(r"\x{26c4}").parse_escape(), 4551c67d6573Sopenharmony_ci Ok(Primitive::Literal(ast::Literal { 4552c67d6573Sopenharmony_ci span: span(0..8), 4553c67d6573Sopenharmony_ci kind: ast::LiteralKind::HexBrace(ast::HexLiteralKind::X), 4554c67d6573Sopenharmony_ci c: '⛄', 4555c67d6573Sopenharmony_ci })) 4556c67d6573Sopenharmony_ci ); 4557c67d6573Sopenharmony_ci assert_eq!( 4558c67d6573Sopenharmony_ci parser(r"\x{26C4}").parse_escape(), 4559c67d6573Sopenharmony_ci Ok(Primitive::Literal(ast::Literal { 4560c67d6573Sopenharmony_ci span: span(0..8), 4561c67d6573Sopenharmony_ci kind: ast::LiteralKind::HexBrace(ast::HexLiteralKind::X), 4562c67d6573Sopenharmony_ci c: '⛄', 4563c67d6573Sopenharmony_ci })) 4564c67d6573Sopenharmony_ci ); 4565c67d6573Sopenharmony_ci assert_eq!( 4566c67d6573Sopenharmony_ci parser(r"\x{10fFfF}").parse_escape(), 4567c67d6573Sopenharmony_ci Ok(Primitive::Literal(ast::Literal { 4568c67d6573Sopenharmony_ci span: span(0..10), 4569c67d6573Sopenharmony_ci kind: ast::LiteralKind::HexBrace(ast::HexLiteralKind::X), 4570c67d6573Sopenharmony_ci c: '\u{10FFFF}', 4571c67d6573Sopenharmony_ci })) 4572c67d6573Sopenharmony_ci ); 4573c67d6573Sopenharmony_ci 4574c67d6573Sopenharmony_ci assert_eq!( 4575c67d6573Sopenharmony_ci parser(r"\x").parse_escape().unwrap_err(), 4576c67d6573Sopenharmony_ci TestError { 4577c67d6573Sopenharmony_ci span: span(2..2), 4578c67d6573Sopenharmony_ci kind: ast::ErrorKind::EscapeUnexpectedEof, 4579c67d6573Sopenharmony_ci } 4580c67d6573Sopenharmony_ci ); 4581c67d6573Sopenharmony_ci assert_eq!( 4582c67d6573Sopenharmony_ci parser(r"\x{").parse_escape().unwrap_err(), 4583c67d6573Sopenharmony_ci TestError { 4584c67d6573Sopenharmony_ci span: span(2..3), 4585c67d6573Sopenharmony_ci kind: ast::ErrorKind::EscapeUnexpectedEof, 4586c67d6573Sopenharmony_ci } 4587c67d6573Sopenharmony_ci ); 4588c67d6573Sopenharmony_ci assert_eq!( 4589c67d6573Sopenharmony_ci parser(r"\x{FF").parse_escape().unwrap_err(), 4590c67d6573Sopenharmony_ci TestError { 4591c67d6573Sopenharmony_ci span: span(2..5), 4592c67d6573Sopenharmony_ci kind: ast::ErrorKind::EscapeUnexpectedEof, 4593c67d6573Sopenharmony_ci } 4594c67d6573Sopenharmony_ci ); 4595c67d6573Sopenharmony_ci assert_eq!( 4596c67d6573Sopenharmony_ci parser(r"\x{}").parse_escape().unwrap_err(), 4597c67d6573Sopenharmony_ci TestError { 4598c67d6573Sopenharmony_ci span: span(2..4), 4599c67d6573Sopenharmony_ci kind: ast::ErrorKind::EscapeHexEmpty, 4600c67d6573Sopenharmony_ci } 4601c67d6573Sopenharmony_ci ); 4602c67d6573Sopenharmony_ci assert_eq!( 4603c67d6573Sopenharmony_ci parser(r"\x{FGF}").parse_escape().unwrap_err(), 4604c67d6573Sopenharmony_ci TestError { 4605c67d6573Sopenharmony_ci span: span(4..5), 4606c67d6573Sopenharmony_ci kind: ast::ErrorKind::EscapeHexInvalidDigit, 4607c67d6573Sopenharmony_ci } 4608c67d6573Sopenharmony_ci ); 4609c67d6573Sopenharmony_ci assert_eq!( 4610c67d6573Sopenharmony_ci parser(r"\x{FFFFFF}").parse_escape().unwrap_err(), 4611c67d6573Sopenharmony_ci TestError { 4612c67d6573Sopenharmony_ci span: span(3..9), 4613c67d6573Sopenharmony_ci kind: ast::ErrorKind::EscapeHexInvalid, 4614c67d6573Sopenharmony_ci } 4615c67d6573Sopenharmony_ci ); 4616c67d6573Sopenharmony_ci assert_eq!( 4617c67d6573Sopenharmony_ci parser(r"\x{D800}").parse_escape().unwrap_err(), 4618c67d6573Sopenharmony_ci TestError { 4619c67d6573Sopenharmony_ci span: span(3..7), 4620c67d6573Sopenharmony_ci kind: ast::ErrorKind::EscapeHexInvalid, 4621c67d6573Sopenharmony_ci } 4622c67d6573Sopenharmony_ci ); 4623c67d6573Sopenharmony_ci assert_eq!( 4624c67d6573Sopenharmony_ci parser(r"\x{FFFFFFFFF}").parse_escape().unwrap_err(), 4625c67d6573Sopenharmony_ci TestError { 4626c67d6573Sopenharmony_ci span: span(3..12), 4627c67d6573Sopenharmony_ci kind: ast::ErrorKind::EscapeHexInvalid, 4628c67d6573Sopenharmony_ci } 4629c67d6573Sopenharmony_ci ); 4630c67d6573Sopenharmony_ci } 4631c67d6573Sopenharmony_ci 4632c67d6573Sopenharmony_ci #[test] 4633c67d6573Sopenharmony_ci fn parse_decimal() { 4634c67d6573Sopenharmony_ci assert_eq!(parser("123").parse_decimal(), Ok(123)); 4635c67d6573Sopenharmony_ci assert_eq!(parser("0").parse_decimal(), Ok(0)); 4636c67d6573Sopenharmony_ci assert_eq!(parser("01").parse_decimal(), Ok(1)); 4637c67d6573Sopenharmony_ci 4638c67d6573Sopenharmony_ci assert_eq!( 4639c67d6573Sopenharmony_ci parser("-1").parse_decimal().unwrap_err(), 4640c67d6573Sopenharmony_ci TestError { span: span(0..0), kind: ast::ErrorKind::DecimalEmpty } 4641c67d6573Sopenharmony_ci ); 4642c67d6573Sopenharmony_ci assert_eq!( 4643c67d6573Sopenharmony_ci parser("").parse_decimal().unwrap_err(), 4644c67d6573Sopenharmony_ci TestError { span: span(0..0), kind: ast::ErrorKind::DecimalEmpty } 4645c67d6573Sopenharmony_ci ); 4646c67d6573Sopenharmony_ci assert_eq!( 4647c67d6573Sopenharmony_ci parser("9999999999").parse_decimal().unwrap_err(), 4648c67d6573Sopenharmony_ci TestError { 4649c67d6573Sopenharmony_ci span: span(0..10), 4650c67d6573Sopenharmony_ci kind: ast::ErrorKind::DecimalInvalid, 4651c67d6573Sopenharmony_ci } 4652c67d6573Sopenharmony_ci ); 4653c67d6573Sopenharmony_ci } 4654c67d6573Sopenharmony_ci 4655c67d6573Sopenharmony_ci #[test] 4656c67d6573Sopenharmony_ci fn parse_set_class() { 4657c67d6573Sopenharmony_ci fn union(span: Span, items: Vec<ast::ClassSetItem>) -> ast::ClassSet { 4658c67d6573Sopenharmony_ci ast::ClassSet::union(ast::ClassSetUnion { span, items }) 4659c67d6573Sopenharmony_ci } 4660c67d6573Sopenharmony_ci 4661c67d6573Sopenharmony_ci fn intersection( 4662c67d6573Sopenharmony_ci span: Span, 4663c67d6573Sopenharmony_ci lhs: ast::ClassSet, 4664c67d6573Sopenharmony_ci rhs: ast::ClassSet, 4665c67d6573Sopenharmony_ci ) -> ast::ClassSet { 4666c67d6573Sopenharmony_ci ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp { 4667c67d6573Sopenharmony_ci span, 4668c67d6573Sopenharmony_ci kind: ast::ClassSetBinaryOpKind::Intersection, 4669c67d6573Sopenharmony_ci lhs: Box::new(lhs), 4670c67d6573Sopenharmony_ci rhs: Box::new(rhs), 4671c67d6573Sopenharmony_ci }) 4672c67d6573Sopenharmony_ci } 4673c67d6573Sopenharmony_ci 4674c67d6573Sopenharmony_ci fn difference( 4675c67d6573Sopenharmony_ci span: Span, 4676c67d6573Sopenharmony_ci lhs: ast::ClassSet, 4677c67d6573Sopenharmony_ci rhs: ast::ClassSet, 4678c67d6573Sopenharmony_ci ) -> ast::ClassSet { 4679c67d6573Sopenharmony_ci ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp { 4680c67d6573Sopenharmony_ci span, 4681c67d6573Sopenharmony_ci kind: ast::ClassSetBinaryOpKind::Difference, 4682c67d6573Sopenharmony_ci lhs: Box::new(lhs), 4683c67d6573Sopenharmony_ci rhs: Box::new(rhs), 4684c67d6573Sopenharmony_ci }) 4685c67d6573Sopenharmony_ci } 4686c67d6573Sopenharmony_ci 4687c67d6573Sopenharmony_ci fn symdifference( 4688c67d6573Sopenharmony_ci span: Span, 4689c67d6573Sopenharmony_ci lhs: ast::ClassSet, 4690c67d6573Sopenharmony_ci rhs: ast::ClassSet, 4691c67d6573Sopenharmony_ci ) -> ast::ClassSet { 4692c67d6573Sopenharmony_ci ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp { 4693c67d6573Sopenharmony_ci span, 4694c67d6573Sopenharmony_ci kind: ast::ClassSetBinaryOpKind::SymmetricDifference, 4695c67d6573Sopenharmony_ci lhs: Box::new(lhs), 4696c67d6573Sopenharmony_ci rhs: Box::new(rhs), 4697c67d6573Sopenharmony_ci }) 4698c67d6573Sopenharmony_ci } 4699c67d6573Sopenharmony_ci 4700c67d6573Sopenharmony_ci fn itemset(item: ast::ClassSetItem) -> ast::ClassSet { 4701c67d6573Sopenharmony_ci ast::ClassSet::Item(item) 4702c67d6573Sopenharmony_ci } 4703c67d6573Sopenharmony_ci 4704c67d6573Sopenharmony_ci fn item_ascii(cls: ast::ClassAscii) -> ast::ClassSetItem { 4705c67d6573Sopenharmony_ci ast::ClassSetItem::Ascii(cls) 4706c67d6573Sopenharmony_ci } 4707c67d6573Sopenharmony_ci 4708c67d6573Sopenharmony_ci fn item_unicode(cls: ast::ClassUnicode) -> ast::ClassSetItem { 4709c67d6573Sopenharmony_ci ast::ClassSetItem::Unicode(cls) 4710c67d6573Sopenharmony_ci } 4711c67d6573Sopenharmony_ci 4712c67d6573Sopenharmony_ci fn item_perl(cls: ast::ClassPerl) -> ast::ClassSetItem { 4713c67d6573Sopenharmony_ci ast::ClassSetItem::Perl(cls) 4714c67d6573Sopenharmony_ci } 4715c67d6573Sopenharmony_ci 4716c67d6573Sopenharmony_ci fn item_bracket(cls: ast::ClassBracketed) -> ast::ClassSetItem { 4717c67d6573Sopenharmony_ci ast::ClassSetItem::Bracketed(Box::new(cls)) 4718c67d6573Sopenharmony_ci } 4719c67d6573Sopenharmony_ci 4720c67d6573Sopenharmony_ci fn lit(span: Span, c: char) -> ast::ClassSetItem { 4721c67d6573Sopenharmony_ci ast::ClassSetItem::Literal(ast::Literal { 4722c67d6573Sopenharmony_ci span, 4723c67d6573Sopenharmony_ci kind: ast::LiteralKind::Verbatim, 4724c67d6573Sopenharmony_ci c, 4725c67d6573Sopenharmony_ci }) 4726c67d6573Sopenharmony_ci } 4727c67d6573Sopenharmony_ci 4728c67d6573Sopenharmony_ci fn empty(span: Span) -> ast::ClassSetItem { 4729c67d6573Sopenharmony_ci ast::ClassSetItem::Empty(span) 4730c67d6573Sopenharmony_ci } 4731c67d6573Sopenharmony_ci 4732c67d6573Sopenharmony_ci fn range(span: Span, start: char, end: char) -> ast::ClassSetItem { 4733c67d6573Sopenharmony_ci let pos1 = Position { 4734c67d6573Sopenharmony_ci offset: span.start.offset + start.len_utf8(), 4735c67d6573Sopenharmony_ci column: span.start.column + 1, 4736c67d6573Sopenharmony_ci ..span.start 4737c67d6573Sopenharmony_ci }; 4738c67d6573Sopenharmony_ci let pos2 = Position { 4739c67d6573Sopenharmony_ci offset: span.end.offset - end.len_utf8(), 4740c67d6573Sopenharmony_ci column: span.end.column - 1, 4741c67d6573Sopenharmony_ci ..span.end 4742c67d6573Sopenharmony_ci }; 4743c67d6573Sopenharmony_ci ast::ClassSetItem::Range(ast::ClassSetRange { 4744c67d6573Sopenharmony_ci span, 4745c67d6573Sopenharmony_ci start: ast::Literal { 4746c67d6573Sopenharmony_ci span: Span { end: pos1, ..span }, 4747c67d6573Sopenharmony_ci kind: ast::LiteralKind::Verbatim, 4748c67d6573Sopenharmony_ci c: start, 4749c67d6573Sopenharmony_ci }, 4750c67d6573Sopenharmony_ci end: ast::Literal { 4751c67d6573Sopenharmony_ci span: Span { start: pos2, ..span }, 4752c67d6573Sopenharmony_ci kind: ast::LiteralKind::Verbatim, 4753c67d6573Sopenharmony_ci c: end, 4754c67d6573Sopenharmony_ci }, 4755c67d6573Sopenharmony_ci }) 4756c67d6573Sopenharmony_ci } 4757c67d6573Sopenharmony_ci 4758c67d6573Sopenharmony_ci fn alnum(span: Span, negated: bool) -> ast::ClassAscii { 4759c67d6573Sopenharmony_ci ast::ClassAscii { span, kind: ast::ClassAsciiKind::Alnum, negated } 4760c67d6573Sopenharmony_ci } 4761c67d6573Sopenharmony_ci 4762c67d6573Sopenharmony_ci fn lower(span: Span, negated: bool) -> ast::ClassAscii { 4763c67d6573Sopenharmony_ci ast::ClassAscii { span, kind: ast::ClassAsciiKind::Lower, negated } 4764c67d6573Sopenharmony_ci } 4765c67d6573Sopenharmony_ci 4766c67d6573Sopenharmony_ci assert_eq!( 4767c67d6573Sopenharmony_ci parser("[[:alnum:]]").parse(), 4768c67d6573Sopenharmony_ci Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { 4769c67d6573Sopenharmony_ci span: span(0..11), 4770c67d6573Sopenharmony_ci negated: false, 4771c67d6573Sopenharmony_ci kind: itemset(item_ascii(alnum(span(1..10), false))), 4772c67d6573Sopenharmony_ci }))) 4773c67d6573Sopenharmony_ci ); 4774c67d6573Sopenharmony_ci assert_eq!( 4775c67d6573Sopenharmony_ci parser("[[[:alnum:]]]").parse(), 4776c67d6573Sopenharmony_ci Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { 4777c67d6573Sopenharmony_ci span: span(0..13), 4778c67d6573Sopenharmony_ci negated: false, 4779c67d6573Sopenharmony_ci kind: itemset(item_bracket(ast::ClassBracketed { 4780c67d6573Sopenharmony_ci span: span(1..12), 4781c67d6573Sopenharmony_ci negated: false, 4782c67d6573Sopenharmony_ci kind: itemset(item_ascii(alnum(span(2..11), false))), 4783c67d6573Sopenharmony_ci })), 4784c67d6573Sopenharmony_ci }))) 4785c67d6573Sopenharmony_ci ); 4786c67d6573Sopenharmony_ci assert_eq!( 4787c67d6573Sopenharmony_ci parser("[[:alnum:]&&[:lower:]]").parse(), 4788c67d6573Sopenharmony_ci Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { 4789c67d6573Sopenharmony_ci span: span(0..22), 4790c67d6573Sopenharmony_ci negated: false, 4791c67d6573Sopenharmony_ci kind: intersection( 4792c67d6573Sopenharmony_ci span(1..21), 4793c67d6573Sopenharmony_ci itemset(item_ascii(alnum(span(1..10), false))), 4794c67d6573Sopenharmony_ci itemset(item_ascii(lower(span(12..21), false))), 4795c67d6573Sopenharmony_ci ), 4796c67d6573Sopenharmony_ci }))) 4797c67d6573Sopenharmony_ci ); 4798c67d6573Sopenharmony_ci assert_eq!( 4799c67d6573Sopenharmony_ci parser("[[:alnum:]--[:lower:]]").parse(), 4800c67d6573Sopenharmony_ci Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { 4801c67d6573Sopenharmony_ci span: span(0..22), 4802c67d6573Sopenharmony_ci negated: false, 4803c67d6573Sopenharmony_ci kind: difference( 4804c67d6573Sopenharmony_ci span(1..21), 4805c67d6573Sopenharmony_ci itemset(item_ascii(alnum(span(1..10), false))), 4806c67d6573Sopenharmony_ci itemset(item_ascii(lower(span(12..21), false))), 4807c67d6573Sopenharmony_ci ), 4808c67d6573Sopenharmony_ci }))) 4809c67d6573Sopenharmony_ci ); 4810c67d6573Sopenharmony_ci assert_eq!( 4811c67d6573Sopenharmony_ci parser("[[:alnum:]~~[:lower:]]").parse(), 4812c67d6573Sopenharmony_ci Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { 4813c67d6573Sopenharmony_ci span: span(0..22), 4814c67d6573Sopenharmony_ci negated: false, 4815c67d6573Sopenharmony_ci kind: symdifference( 4816c67d6573Sopenharmony_ci span(1..21), 4817c67d6573Sopenharmony_ci itemset(item_ascii(alnum(span(1..10), false))), 4818c67d6573Sopenharmony_ci itemset(item_ascii(lower(span(12..21), false))), 4819c67d6573Sopenharmony_ci ), 4820c67d6573Sopenharmony_ci }))) 4821c67d6573Sopenharmony_ci ); 4822c67d6573Sopenharmony_ci 4823c67d6573Sopenharmony_ci assert_eq!( 4824c67d6573Sopenharmony_ci parser("[a]").parse(), 4825c67d6573Sopenharmony_ci Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { 4826c67d6573Sopenharmony_ci span: span(0..3), 4827c67d6573Sopenharmony_ci negated: false, 4828c67d6573Sopenharmony_ci kind: itemset(lit(span(1..2), 'a')), 4829c67d6573Sopenharmony_ci }))) 4830c67d6573Sopenharmony_ci ); 4831c67d6573Sopenharmony_ci assert_eq!( 4832c67d6573Sopenharmony_ci parser(r"[a\]]").parse(), 4833c67d6573Sopenharmony_ci Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { 4834c67d6573Sopenharmony_ci span: span(0..5), 4835c67d6573Sopenharmony_ci negated: false, 4836c67d6573Sopenharmony_ci kind: union( 4837c67d6573Sopenharmony_ci span(1..4), 4838c67d6573Sopenharmony_ci vec![ 4839c67d6573Sopenharmony_ci lit(span(1..2), 'a'), 4840c67d6573Sopenharmony_ci ast::ClassSetItem::Literal(ast::Literal { 4841c67d6573Sopenharmony_ci span: span(2..4), 4842c67d6573Sopenharmony_ci kind: ast::LiteralKind::Punctuation, 4843c67d6573Sopenharmony_ci c: ']', 4844c67d6573Sopenharmony_ci }), 4845c67d6573Sopenharmony_ci ] 4846c67d6573Sopenharmony_ci ), 4847c67d6573Sopenharmony_ci }))) 4848c67d6573Sopenharmony_ci ); 4849c67d6573Sopenharmony_ci assert_eq!( 4850c67d6573Sopenharmony_ci parser(r"[a\-z]").parse(), 4851c67d6573Sopenharmony_ci Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { 4852c67d6573Sopenharmony_ci span: span(0..6), 4853c67d6573Sopenharmony_ci negated: false, 4854c67d6573Sopenharmony_ci kind: union( 4855c67d6573Sopenharmony_ci span(1..5), 4856c67d6573Sopenharmony_ci vec![ 4857c67d6573Sopenharmony_ci lit(span(1..2), 'a'), 4858c67d6573Sopenharmony_ci ast::ClassSetItem::Literal(ast::Literal { 4859c67d6573Sopenharmony_ci span: span(2..4), 4860c67d6573Sopenharmony_ci kind: ast::LiteralKind::Punctuation, 4861c67d6573Sopenharmony_ci c: '-', 4862c67d6573Sopenharmony_ci }), 4863c67d6573Sopenharmony_ci lit(span(4..5), 'z'), 4864c67d6573Sopenharmony_ci ] 4865c67d6573Sopenharmony_ci ), 4866c67d6573Sopenharmony_ci }))) 4867c67d6573Sopenharmony_ci ); 4868c67d6573Sopenharmony_ci assert_eq!( 4869c67d6573Sopenharmony_ci parser("[ab]").parse(), 4870c67d6573Sopenharmony_ci Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { 4871c67d6573Sopenharmony_ci span: span(0..4), 4872c67d6573Sopenharmony_ci negated: false, 4873c67d6573Sopenharmony_ci kind: union( 4874c67d6573Sopenharmony_ci span(1..3), 4875c67d6573Sopenharmony_ci vec![lit(span(1..2), 'a'), lit(span(2..3), 'b'),] 4876c67d6573Sopenharmony_ci ), 4877c67d6573Sopenharmony_ci }))) 4878c67d6573Sopenharmony_ci ); 4879c67d6573Sopenharmony_ci assert_eq!( 4880c67d6573Sopenharmony_ci parser("[a-]").parse(), 4881c67d6573Sopenharmony_ci Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { 4882c67d6573Sopenharmony_ci span: span(0..4), 4883c67d6573Sopenharmony_ci negated: false, 4884c67d6573Sopenharmony_ci kind: union( 4885c67d6573Sopenharmony_ci span(1..3), 4886c67d6573Sopenharmony_ci vec![lit(span(1..2), 'a'), lit(span(2..3), '-'),] 4887c67d6573Sopenharmony_ci ), 4888c67d6573Sopenharmony_ci }))) 4889c67d6573Sopenharmony_ci ); 4890c67d6573Sopenharmony_ci assert_eq!( 4891c67d6573Sopenharmony_ci parser("[-a]").parse(), 4892c67d6573Sopenharmony_ci Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { 4893c67d6573Sopenharmony_ci span: span(0..4), 4894c67d6573Sopenharmony_ci negated: false, 4895c67d6573Sopenharmony_ci kind: union( 4896c67d6573Sopenharmony_ci span(1..3), 4897c67d6573Sopenharmony_ci vec![lit(span(1..2), '-'), lit(span(2..3), 'a'),] 4898c67d6573Sopenharmony_ci ), 4899c67d6573Sopenharmony_ci }))) 4900c67d6573Sopenharmony_ci ); 4901c67d6573Sopenharmony_ci assert_eq!( 4902c67d6573Sopenharmony_ci parser(r"[\pL]").parse(), 4903c67d6573Sopenharmony_ci Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { 4904c67d6573Sopenharmony_ci span: span(0..5), 4905c67d6573Sopenharmony_ci negated: false, 4906c67d6573Sopenharmony_ci kind: itemset(item_unicode(ast::ClassUnicode { 4907c67d6573Sopenharmony_ci span: span(1..4), 4908c67d6573Sopenharmony_ci negated: false, 4909c67d6573Sopenharmony_ci kind: ast::ClassUnicodeKind::OneLetter('L'), 4910c67d6573Sopenharmony_ci })), 4911c67d6573Sopenharmony_ci }))) 4912c67d6573Sopenharmony_ci ); 4913c67d6573Sopenharmony_ci assert_eq!( 4914c67d6573Sopenharmony_ci parser(r"[\w]").parse(), 4915c67d6573Sopenharmony_ci Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { 4916c67d6573Sopenharmony_ci span: span(0..4), 4917c67d6573Sopenharmony_ci negated: false, 4918c67d6573Sopenharmony_ci kind: itemset(item_perl(ast::ClassPerl { 4919c67d6573Sopenharmony_ci span: span(1..3), 4920c67d6573Sopenharmony_ci kind: ast::ClassPerlKind::Word, 4921c67d6573Sopenharmony_ci negated: false, 4922c67d6573Sopenharmony_ci })), 4923c67d6573Sopenharmony_ci }))) 4924c67d6573Sopenharmony_ci ); 4925c67d6573Sopenharmony_ci assert_eq!( 4926c67d6573Sopenharmony_ci parser(r"[a\wz]").parse(), 4927c67d6573Sopenharmony_ci Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { 4928c67d6573Sopenharmony_ci span: span(0..6), 4929c67d6573Sopenharmony_ci negated: false, 4930c67d6573Sopenharmony_ci kind: union( 4931c67d6573Sopenharmony_ci span(1..5), 4932c67d6573Sopenharmony_ci vec![ 4933c67d6573Sopenharmony_ci lit(span(1..2), 'a'), 4934c67d6573Sopenharmony_ci item_perl(ast::ClassPerl { 4935c67d6573Sopenharmony_ci span: span(2..4), 4936c67d6573Sopenharmony_ci kind: ast::ClassPerlKind::Word, 4937c67d6573Sopenharmony_ci negated: false, 4938c67d6573Sopenharmony_ci }), 4939c67d6573Sopenharmony_ci lit(span(4..5), 'z'), 4940c67d6573Sopenharmony_ci ] 4941c67d6573Sopenharmony_ci ), 4942c67d6573Sopenharmony_ci }))) 4943c67d6573Sopenharmony_ci ); 4944c67d6573Sopenharmony_ci 4945c67d6573Sopenharmony_ci assert_eq!( 4946c67d6573Sopenharmony_ci parser("[a-z]").parse(), 4947c67d6573Sopenharmony_ci Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { 4948c67d6573Sopenharmony_ci span: span(0..5), 4949c67d6573Sopenharmony_ci negated: false, 4950c67d6573Sopenharmony_ci kind: itemset(range(span(1..4), 'a', 'z')), 4951c67d6573Sopenharmony_ci }))) 4952c67d6573Sopenharmony_ci ); 4953c67d6573Sopenharmony_ci assert_eq!( 4954c67d6573Sopenharmony_ci parser("[a-cx-z]").parse(), 4955c67d6573Sopenharmony_ci Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { 4956c67d6573Sopenharmony_ci span: span(0..8), 4957c67d6573Sopenharmony_ci negated: false, 4958c67d6573Sopenharmony_ci kind: union( 4959c67d6573Sopenharmony_ci span(1..7), 4960c67d6573Sopenharmony_ci vec![ 4961c67d6573Sopenharmony_ci range(span(1..4), 'a', 'c'), 4962c67d6573Sopenharmony_ci range(span(4..7), 'x', 'z'), 4963c67d6573Sopenharmony_ci ] 4964c67d6573Sopenharmony_ci ), 4965c67d6573Sopenharmony_ci }))) 4966c67d6573Sopenharmony_ci ); 4967c67d6573Sopenharmony_ci assert_eq!( 4968c67d6573Sopenharmony_ci parser(r"[\w&&a-cx-z]").parse(), 4969c67d6573Sopenharmony_ci Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { 4970c67d6573Sopenharmony_ci span: span(0..12), 4971c67d6573Sopenharmony_ci negated: false, 4972c67d6573Sopenharmony_ci kind: intersection( 4973c67d6573Sopenharmony_ci span(1..11), 4974c67d6573Sopenharmony_ci itemset(item_perl(ast::ClassPerl { 4975c67d6573Sopenharmony_ci span: span(1..3), 4976c67d6573Sopenharmony_ci kind: ast::ClassPerlKind::Word, 4977c67d6573Sopenharmony_ci negated: false, 4978c67d6573Sopenharmony_ci })), 4979c67d6573Sopenharmony_ci union( 4980c67d6573Sopenharmony_ci span(5..11), 4981c67d6573Sopenharmony_ci vec![ 4982c67d6573Sopenharmony_ci range(span(5..8), 'a', 'c'), 4983c67d6573Sopenharmony_ci range(span(8..11), 'x', 'z'), 4984c67d6573Sopenharmony_ci ] 4985c67d6573Sopenharmony_ci ), 4986c67d6573Sopenharmony_ci ), 4987c67d6573Sopenharmony_ci }))) 4988c67d6573Sopenharmony_ci ); 4989c67d6573Sopenharmony_ci assert_eq!( 4990c67d6573Sopenharmony_ci parser(r"[a-cx-z&&\w]").parse(), 4991c67d6573Sopenharmony_ci Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { 4992c67d6573Sopenharmony_ci span: span(0..12), 4993c67d6573Sopenharmony_ci negated: false, 4994c67d6573Sopenharmony_ci kind: intersection( 4995c67d6573Sopenharmony_ci span(1..11), 4996c67d6573Sopenharmony_ci union( 4997c67d6573Sopenharmony_ci span(1..7), 4998c67d6573Sopenharmony_ci vec![ 4999c67d6573Sopenharmony_ci range(span(1..4), 'a', 'c'), 5000c67d6573Sopenharmony_ci range(span(4..7), 'x', 'z'), 5001c67d6573Sopenharmony_ci ] 5002c67d6573Sopenharmony_ci ), 5003c67d6573Sopenharmony_ci itemset(item_perl(ast::ClassPerl { 5004c67d6573Sopenharmony_ci span: span(9..11), 5005c67d6573Sopenharmony_ci kind: ast::ClassPerlKind::Word, 5006c67d6573Sopenharmony_ci negated: false, 5007c67d6573Sopenharmony_ci })), 5008c67d6573Sopenharmony_ci ), 5009c67d6573Sopenharmony_ci }))) 5010c67d6573Sopenharmony_ci ); 5011c67d6573Sopenharmony_ci assert_eq!( 5012c67d6573Sopenharmony_ci parser(r"[a--b--c]").parse(), 5013c67d6573Sopenharmony_ci Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { 5014c67d6573Sopenharmony_ci span: span(0..9), 5015c67d6573Sopenharmony_ci negated: false, 5016c67d6573Sopenharmony_ci kind: difference( 5017c67d6573Sopenharmony_ci span(1..8), 5018c67d6573Sopenharmony_ci difference( 5019c67d6573Sopenharmony_ci span(1..5), 5020c67d6573Sopenharmony_ci itemset(lit(span(1..2), 'a')), 5021c67d6573Sopenharmony_ci itemset(lit(span(4..5), 'b')), 5022c67d6573Sopenharmony_ci ), 5023c67d6573Sopenharmony_ci itemset(lit(span(7..8), 'c')), 5024c67d6573Sopenharmony_ci ), 5025c67d6573Sopenharmony_ci }))) 5026c67d6573Sopenharmony_ci ); 5027c67d6573Sopenharmony_ci assert_eq!( 5028c67d6573Sopenharmony_ci parser(r"[a~~b~~c]").parse(), 5029c67d6573Sopenharmony_ci Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { 5030c67d6573Sopenharmony_ci span: span(0..9), 5031c67d6573Sopenharmony_ci negated: false, 5032c67d6573Sopenharmony_ci kind: symdifference( 5033c67d6573Sopenharmony_ci span(1..8), 5034c67d6573Sopenharmony_ci symdifference( 5035c67d6573Sopenharmony_ci span(1..5), 5036c67d6573Sopenharmony_ci itemset(lit(span(1..2), 'a')), 5037c67d6573Sopenharmony_ci itemset(lit(span(4..5), 'b')), 5038c67d6573Sopenharmony_ci ), 5039c67d6573Sopenharmony_ci itemset(lit(span(7..8), 'c')), 5040c67d6573Sopenharmony_ci ), 5041c67d6573Sopenharmony_ci }))) 5042c67d6573Sopenharmony_ci ); 5043c67d6573Sopenharmony_ci assert_eq!( 5044c67d6573Sopenharmony_ci parser(r"[\^&&^]").parse(), 5045c67d6573Sopenharmony_ci Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { 5046c67d6573Sopenharmony_ci span: span(0..7), 5047c67d6573Sopenharmony_ci negated: false, 5048c67d6573Sopenharmony_ci kind: intersection( 5049c67d6573Sopenharmony_ci span(1..6), 5050c67d6573Sopenharmony_ci itemset(ast::ClassSetItem::Literal(ast::Literal { 5051c67d6573Sopenharmony_ci span: span(1..3), 5052c67d6573Sopenharmony_ci kind: ast::LiteralKind::Punctuation, 5053c67d6573Sopenharmony_ci c: '^', 5054c67d6573Sopenharmony_ci })), 5055c67d6573Sopenharmony_ci itemset(lit(span(5..6), '^')), 5056c67d6573Sopenharmony_ci ), 5057c67d6573Sopenharmony_ci }))) 5058c67d6573Sopenharmony_ci ); 5059c67d6573Sopenharmony_ci assert_eq!( 5060c67d6573Sopenharmony_ci parser(r"[\&&&&]").parse(), 5061c67d6573Sopenharmony_ci Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { 5062c67d6573Sopenharmony_ci span: span(0..7), 5063c67d6573Sopenharmony_ci negated: false, 5064c67d6573Sopenharmony_ci kind: intersection( 5065c67d6573Sopenharmony_ci span(1..6), 5066c67d6573Sopenharmony_ci itemset(ast::ClassSetItem::Literal(ast::Literal { 5067c67d6573Sopenharmony_ci span: span(1..3), 5068c67d6573Sopenharmony_ci kind: ast::LiteralKind::Punctuation, 5069c67d6573Sopenharmony_ci c: '&', 5070c67d6573Sopenharmony_ci })), 5071c67d6573Sopenharmony_ci itemset(lit(span(5..6), '&')), 5072c67d6573Sopenharmony_ci ), 5073c67d6573Sopenharmony_ci }))) 5074c67d6573Sopenharmony_ci ); 5075c67d6573Sopenharmony_ci assert_eq!( 5076c67d6573Sopenharmony_ci parser(r"[&&&&]").parse(), 5077c67d6573Sopenharmony_ci Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { 5078c67d6573Sopenharmony_ci span: span(0..6), 5079c67d6573Sopenharmony_ci negated: false, 5080c67d6573Sopenharmony_ci kind: intersection( 5081c67d6573Sopenharmony_ci span(1..5), 5082c67d6573Sopenharmony_ci intersection( 5083c67d6573Sopenharmony_ci span(1..3), 5084c67d6573Sopenharmony_ci itemset(empty(span(1..1))), 5085c67d6573Sopenharmony_ci itemset(empty(span(3..3))), 5086c67d6573Sopenharmony_ci ), 5087c67d6573Sopenharmony_ci itemset(empty(span(5..5))), 5088c67d6573Sopenharmony_ci ), 5089c67d6573Sopenharmony_ci }))) 5090c67d6573Sopenharmony_ci ); 5091c67d6573Sopenharmony_ci 5092c67d6573Sopenharmony_ci let pat = "[☃-⛄]"; 5093c67d6573Sopenharmony_ci assert_eq!( 5094c67d6573Sopenharmony_ci parser(pat).parse(), 5095c67d6573Sopenharmony_ci Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { 5096c67d6573Sopenharmony_ci span: span_range(pat, 0..9), 5097c67d6573Sopenharmony_ci negated: false, 5098c67d6573Sopenharmony_ci kind: itemset(ast::ClassSetItem::Range(ast::ClassSetRange { 5099c67d6573Sopenharmony_ci span: span_range(pat, 1..8), 5100c67d6573Sopenharmony_ci start: ast::Literal { 5101c67d6573Sopenharmony_ci span: span_range(pat, 1..4), 5102c67d6573Sopenharmony_ci kind: ast::LiteralKind::Verbatim, 5103c67d6573Sopenharmony_ci c: '☃', 5104c67d6573Sopenharmony_ci }, 5105c67d6573Sopenharmony_ci end: ast::Literal { 5106c67d6573Sopenharmony_ci span: span_range(pat, 5..8), 5107c67d6573Sopenharmony_ci kind: ast::LiteralKind::Verbatim, 5108c67d6573Sopenharmony_ci c: '⛄', 5109c67d6573Sopenharmony_ci }, 5110c67d6573Sopenharmony_ci })), 5111c67d6573Sopenharmony_ci }))) 5112c67d6573Sopenharmony_ci ); 5113c67d6573Sopenharmony_ci 5114c67d6573Sopenharmony_ci assert_eq!( 5115c67d6573Sopenharmony_ci parser(r"[]]").parse(), 5116c67d6573Sopenharmony_ci Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { 5117c67d6573Sopenharmony_ci span: span(0..3), 5118c67d6573Sopenharmony_ci negated: false, 5119c67d6573Sopenharmony_ci kind: itemset(lit(span(1..2), ']')), 5120c67d6573Sopenharmony_ci }))) 5121c67d6573Sopenharmony_ci ); 5122c67d6573Sopenharmony_ci assert_eq!( 5123c67d6573Sopenharmony_ci parser(r"[]\[]").parse(), 5124c67d6573Sopenharmony_ci Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { 5125c67d6573Sopenharmony_ci span: span(0..5), 5126c67d6573Sopenharmony_ci negated: false, 5127c67d6573Sopenharmony_ci kind: union( 5128c67d6573Sopenharmony_ci span(1..4), 5129c67d6573Sopenharmony_ci vec![ 5130c67d6573Sopenharmony_ci lit(span(1..2), ']'), 5131c67d6573Sopenharmony_ci ast::ClassSetItem::Literal(ast::Literal { 5132c67d6573Sopenharmony_ci span: span(2..4), 5133c67d6573Sopenharmony_ci kind: ast::LiteralKind::Punctuation, 5134c67d6573Sopenharmony_ci c: '[', 5135c67d6573Sopenharmony_ci }), 5136c67d6573Sopenharmony_ci ] 5137c67d6573Sopenharmony_ci ), 5138c67d6573Sopenharmony_ci }))) 5139c67d6573Sopenharmony_ci ); 5140c67d6573Sopenharmony_ci assert_eq!( 5141c67d6573Sopenharmony_ci parser(r"[\[]]").parse(), 5142c67d6573Sopenharmony_ci Ok(concat( 5143c67d6573Sopenharmony_ci 0..5, 5144c67d6573Sopenharmony_ci vec![ 5145c67d6573Sopenharmony_ci Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { 5146c67d6573Sopenharmony_ci span: span(0..4), 5147c67d6573Sopenharmony_ci negated: false, 5148c67d6573Sopenharmony_ci kind: itemset(ast::ClassSetItem::Literal( 5149c67d6573Sopenharmony_ci ast::Literal { 5150c67d6573Sopenharmony_ci span: span(1..3), 5151c67d6573Sopenharmony_ci kind: ast::LiteralKind::Punctuation, 5152c67d6573Sopenharmony_ci c: '[', 5153c67d6573Sopenharmony_ci } 5154c67d6573Sopenharmony_ci )), 5155c67d6573Sopenharmony_ci })), 5156c67d6573Sopenharmony_ci Ast::Literal(ast::Literal { 5157c67d6573Sopenharmony_ci span: span(4..5), 5158c67d6573Sopenharmony_ci kind: ast::LiteralKind::Verbatim, 5159c67d6573Sopenharmony_ci c: ']', 5160c67d6573Sopenharmony_ci }), 5161c67d6573Sopenharmony_ci ] 5162c67d6573Sopenharmony_ci )) 5163c67d6573Sopenharmony_ci ); 5164c67d6573Sopenharmony_ci 5165c67d6573Sopenharmony_ci assert_eq!( 5166c67d6573Sopenharmony_ci parser("[").parse().unwrap_err(), 5167c67d6573Sopenharmony_ci TestError { 5168c67d6573Sopenharmony_ci span: span(0..1), 5169c67d6573Sopenharmony_ci kind: ast::ErrorKind::ClassUnclosed, 5170c67d6573Sopenharmony_ci } 5171c67d6573Sopenharmony_ci ); 5172c67d6573Sopenharmony_ci assert_eq!( 5173c67d6573Sopenharmony_ci parser("[[").parse().unwrap_err(), 5174c67d6573Sopenharmony_ci TestError { 5175c67d6573Sopenharmony_ci span: span(1..2), 5176c67d6573Sopenharmony_ci kind: ast::ErrorKind::ClassUnclosed, 5177c67d6573Sopenharmony_ci } 5178c67d6573Sopenharmony_ci ); 5179c67d6573Sopenharmony_ci assert_eq!( 5180c67d6573Sopenharmony_ci parser("[[-]").parse().unwrap_err(), 5181c67d6573Sopenharmony_ci TestError { 5182c67d6573Sopenharmony_ci span: span(0..1), 5183c67d6573Sopenharmony_ci kind: ast::ErrorKind::ClassUnclosed, 5184c67d6573Sopenharmony_ci } 5185c67d6573Sopenharmony_ci ); 5186c67d6573Sopenharmony_ci assert_eq!( 5187c67d6573Sopenharmony_ci parser("[[[:alnum:]").parse().unwrap_err(), 5188c67d6573Sopenharmony_ci TestError { 5189c67d6573Sopenharmony_ci span: span(1..2), 5190c67d6573Sopenharmony_ci kind: ast::ErrorKind::ClassUnclosed, 5191c67d6573Sopenharmony_ci } 5192c67d6573Sopenharmony_ci ); 5193c67d6573Sopenharmony_ci assert_eq!( 5194c67d6573Sopenharmony_ci parser(r"[\b]").parse().unwrap_err(), 5195c67d6573Sopenharmony_ci TestError { 5196c67d6573Sopenharmony_ci span: span(1..3), 5197c67d6573Sopenharmony_ci kind: ast::ErrorKind::ClassEscapeInvalid, 5198c67d6573Sopenharmony_ci } 5199c67d6573Sopenharmony_ci ); 5200c67d6573Sopenharmony_ci assert_eq!( 5201c67d6573Sopenharmony_ci parser(r"[\w-a]").parse().unwrap_err(), 5202c67d6573Sopenharmony_ci TestError { 5203c67d6573Sopenharmony_ci span: span(1..3), 5204c67d6573Sopenharmony_ci kind: ast::ErrorKind::ClassRangeLiteral, 5205c67d6573Sopenharmony_ci } 5206c67d6573Sopenharmony_ci ); 5207c67d6573Sopenharmony_ci assert_eq!( 5208c67d6573Sopenharmony_ci parser(r"[a-\w]").parse().unwrap_err(), 5209c67d6573Sopenharmony_ci TestError { 5210c67d6573Sopenharmony_ci span: span(3..5), 5211c67d6573Sopenharmony_ci kind: ast::ErrorKind::ClassRangeLiteral, 5212c67d6573Sopenharmony_ci } 5213c67d6573Sopenharmony_ci ); 5214c67d6573Sopenharmony_ci assert_eq!( 5215c67d6573Sopenharmony_ci parser(r"[z-a]").parse().unwrap_err(), 5216c67d6573Sopenharmony_ci TestError { 5217c67d6573Sopenharmony_ci span: span(1..4), 5218c67d6573Sopenharmony_ci kind: ast::ErrorKind::ClassRangeInvalid, 5219c67d6573Sopenharmony_ci } 5220c67d6573Sopenharmony_ci ); 5221c67d6573Sopenharmony_ci 5222c67d6573Sopenharmony_ci assert_eq!( 5223c67d6573Sopenharmony_ci parser_ignore_whitespace("[a ").parse().unwrap_err(), 5224c67d6573Sopenharmony_ci TestError { 5225c67d6573Sopenharmony_ci span: span(0..1), 5226c67d6573Sopenharmony_ci kind: ast::ErrorKind::ClassUnclosed, 5227c67d6573Sopenharmony_ci } 5228c67d6573Sopenharmony_ci ); 5229c67d6573Sopenharmony_ci assert_eq!( 5230c67d6573Sopenharmony_ci parser_ignore_whitespace("[a- ").parse().unwrap_err(), 5231c67d6573Sopenharmony_ci TestError { 5232c67d6573Sopenharmony_ci span: span(0..1), 5233c67d6573Sopenharmony_ci kind: ast::ErrorKind::ClassUnclosed, 5234c67d6573Sopenharmony_ci } 5235c67d6573Sopenharmony_ci ); 5236c67d6573Sopenharmony_ci } 5237c67d6573Sopenharmony_ci 5238c67d6573Sopenharmony_ci #[test] 5239c67d6573Sopenharmony_ci fn parse_set_class_open() { 5240c67d6573Sopenharmony_ci assert_eq!(parser("[a]").parse_set_class_open(), { 5241c67d6573Sopenharmony_ci let set = ast::ClassBracketed { 5242c67d6573Sopenharmony_ci span: span(0..1), 5243c67d6573Sopenharmony_ci negated: false, 5244c67d6573Sopenharmony_ci kind: ast::ClassSet::union(ast::ClassSetUnion { 5245c67d6573Sopenharmony_ci span: span(1..1), 5246c67d6573Sopenharmony_ci items: vec![], 5247c67d6573Sopenharmony_ci }), 5248c67d6573Sopenharmony_ci }; 5249c67d6573Sopenharmony_ci let union = ast::ClassSetUnion { span: span(1..1), items: vec![] }; 5250c67d6573Sopenharmony_ci Ok((set, union)) 5251c67d6573Sopenharmony_ci }); 5252c67d6573Sopenharmony_ci assert_eq!( 5253c67d6573Sopenharmony_ci parser_ignore_whitespace("[ a]").parse_set_class_open(), 5254c67d6573Sopenharmony_ci { 5255c67d6573Sopenharmony_ci let set = ast::ClassBracketed { 5256c67d6573Sopenharmony_ci span: span(0..4), 5257c67d6573Sopenharmony_ci negated: false, 5258c67d6573Sopenharmony_ci kind: ast::ClassSet::union(ast::ClassSetUnion { 5259c67d6573Sopenharmony_ci span: span(4..4), 5260c67d6573Sopenharmony_ci items: vec![], 5261c67d6573Sopenharmony_ci }), 5262c67d6573Sopenharmony_ci }; 5263c67d6573Sopenharmony_ci let union = 5264c67d6573Sopenharmony_ci ast::ClassSetUnion { span: span(4..4), items: vec![] }; 5265c67d6573Sopenharmony_ci Ok((set, union)) 5266c67d6573Sopenharmony_ci } 5267c67d6573Sopenharmony_ci ); 5268c67d6573Sopenharmony_ci assert_eq!(parser("[^a]").parse_set_class_open(), { 5269c67d6573Sopenharmony_ci let set = ast::ClassBracketed { 5270c67d6573Sopenharmony_ci span: span(0..2), 5271c67d6573Sopenharmony_ci negated: true, 5272c67d6573Sopenharmony_ci kind: ast::ClassSet::union(ast::ClassSetUnion { 5273c67d6573Sopenharmony_ci span: span(2..2), 5274c67d6573Sopenharmony_ci items: vec![], 5275c67d6573Sopenharmony_ci }), 5276c67d6573Sopenharmony_ci }; 5277c67d6573Sopenharmony_ci let union = ast::ClassSetUnion { span: span(2..2), items: vec![] }; 5278c67d6573Sopenharmony_ci Ok((set, union)) 5279c67d6573Sopenharmony_ci }); 5280c67d6573Sopenharmony_ci assert_eq!( 5281c67d6573Sopenharmony_ci parser_ignore_whitespace("[ ^ a]").parse_set_class_open(), 5282c67d6573Sopenharmony_ci { 5283c67d6573Sopenharmony_ci let set = ast::ClassBracketed { 5284c67d6573Sopenharmony_ci span: span(0..4), 5285c67d6573Sopenharmony_ci negated: true, 5286c67d6573Sopenharmony_ci kind: ast::ClassSet::union(ast::ClassSetUnion { 5287c67d6573Sopenharmony_ci span: span(4..4), 5288c67d6573Sopenharmony_ci items: vec![], 5289c67d6573Sopenharmony_ci }), 5290c67d6573Sopenharmony_ci }; 5291c67d6573Sopenharmony_ci let union = 5292c67d6573Sopenharmony_ci ast::ClassSetUnion { span: span(4..4), items: vec![] }; 5293c67d6573Sopenharmony_ci Ok((set, union)) 5294c67d6573Sopenharmony_ci } 5295c67d6573Sopenharmony_ci ); 5296c67d6573Sopenharmony_ci assert_eq!(parser("[-a]").parse_set_class_open(), { 5297c67d6573Sopenharmony_ci let set = ast::ClassBracketed { 5298c67d6573Sopenharmony_ci span: span(0..2), 5299c67d6573Sopenharmony_ci negated: false, 5300c67d6573Sopenharmony_ci kind: ast::ClassSet::union(ast::ClassSetUnion { 5301c67d6573Sopenharmony_ci span: span(1..1), 5302c67d6573Sopenharmony_ci items: vec![], 5303c67d6573Sopenharmony_ci }), 5304c67d6573Sopenharmony_ci }; 5305c67d6573Sopenharmony_ci let union = ast::ClassSetUnion { 5306c67d6573Sopenharmony_ci span: span(1..2), 5307c67d6573Sopenharmony_ci items: vec![ast::ClassSetItem::Literal(ast::Literal { 5308c67d6573Sopenharmony_ci span: span(1..2), 5309c67d6573Sopenharmony_ci kind: ast::LiteralKind::Verbatim, 5310c67d6573Sopenharmony_ci c: '-', 5311c67d6573Sopenharmony_ci })], 5312c67d6573Sopenharmony_ci }; 5313c67d6573Sopenharmony_ci Ok((set, union)) 5314c67d6573Sopenharmony_ci }); 5315c67d6573Sopenharmony_ci assert_eq!( 5316c67d6573Sopenharmony_ci parser_ignore_whitespace("[ - a]").parse_set_class_open(), 5317c67d6573Sopenharmony_ci { 5318c67d6573Sopenharmony_ci let set = ast::ClassBracketed { 5319c67d6573Sopenharmony_ci span: span(0..4), 5320c67d6573Sopenharmony_ci negated: false, 5321c67d6573Sopenharmony_ci kind: ast::ClassSet::union(ast::ClassSetUnion { 5322c67d6573Sopenharmony_ci span: span(2..2), 5323c67d6573Sopenharmony_ci items: vec![], 5324c67d6573Sopenharmony_ci }), 5325c67d6573Sopenharmony_ci }; 5326c67d6573Sopenharmony_ci let union = ast::ClassSetUnion { 5327c67d6573Sopenharmony_ci span: span(2..3), 5328c67d6573Sopenharmony_ci items: vec![ast::ClassSetItem::Literal(ast::Literal { 5329c67d6573Sopenharmony_ci span: span(2..3), 5330c67d6573Sopenharmony_ci kind: ast::LiteralKind::Verbatim, 5331c67d6573Sopenharmony_ci c: '-', 5332c67d6573Sopenharmony_ci })], 5333c67d6573Sopenharmony_ci }; 5334c67d6573Sopenharmony_ci Ok((set, union)) 5335c67d6573Sopenharmony_ci } 5336c67d6573Sopenharmony_ci ); 5337c67d6573Sopenharmony_ci assert_eq!(parser("[^-a]").parse_set_class_open(), { 5338c67d6573Sopenharmony_ci let set = ast::ClassBracketed { 5339c67d6573Sopenharmony_ci span: span(0..3), 5340c67d6573Sopenharmony_ci negated: true, 5341c67d6573Sopenharmony_ci kind: ast::ClassSet::union(ast::ClassSetUnion { 5342c67d6573Sopenharmony_ci span: span(2..2), 5343c67d6573Sopenharmony_ci items: vec![], 5344c67d6573Sopenharmony_ci }), 5345c67d6573Sopenharmony_ci }; 5346c67d6573Sopenharmony_ci let union = ast::ClassSetUnion { 5347c67d6573Sopenharmony_ci span: span(2..3), 5348c67d6573Sopenharmony_ci items: vec![ast::ClassSetItem::Literal(ast::Literal { 5349c67d6573Sopenharmony_ci span: span(2..3), 5350c67d6573Sopenharmony_ci kind: ast::LiteralKind::Verbatim, 5351c67d6573Sopenharmony_ci c: '-', 5352c67d6573Sopenharmony_ci })], 5353c67d6573Sopenharmony_ci }; 5354c67d6573Sopenharmony_ci Ok((set, union)) 5355c67d6573Sopenharmony_ci }); 5356c67d6573Sopenharmony_ci assert_eq!(parser("[--a]").parse_set_class_open(), { 5357c67d6573Sopenharmony_ci let set = ast::ClassBracketed { 5358c67d6573Sopenharmony_ci span: span(0..3), 5359c67d6573Sopenharmony_ci negated: false, 5360c67d6573Sopenharmony_ci kind: ast::ClassSet::union(ast::ClassSetUnion { 5361c67d6573Sopenharmony_ci span: span(1..1), 5362c67d6573Sopenharmony_ci items: vec![], 5363c67d6573Sopenharmony_ci }), 5364c67d6573Sopenharmony_ci }; 5365c67d6573Sopenharmony_ci let union = ast::ClassSetUnion { 5366c67d6573Sopenharmony_ci span: span(1..3), 5367c67d6573Sopenharmony_ci items: vec![ 5368c67d6573Sopenharmony_ci ast::ClassSetItem::Literal(ast::Literal { 5369c67d6573Sopenharmony_ci span: span(1..2), 5370c67d6573Sopenharmony_ci kind: ast::LiteralKind::Verbatim, 5371c67d6573Sopenharmony_ci c: '-', 5372c67d6573Sopenharmony_ci }), 5373c67d6573Sopenharmony_ci ast::ClassSetItem::Literal(ast::Literal { 5374c67d6573Sopenharmony_ci span: span(2..3), 5375c67d6573Sopenharmony_ci kind: ast::LiteralKind::Verbatim, 5376c67d6573Sopenharmony_ci c: '-', 5377c67d6573Sopenharmony_ci }), 5378c67d6573Sopenharmony_ci ], 5379c67d6573Sopenharmony_ci }; 5380c67d6573Sopenharmony_ci Ok((set, union)) 5381c67d6573Sopenharmony_ci }); 5382c67d6573Sopenharmony_ci assert_eq!(parser("[]a]").parse_set_class_open(), { 5383c67d6573Sopenharmony_ci let set = ast::ClassBracketed { 5384c67d6573Sopenharmony_ci span: span(0..2), 5385c67d6573Sopenharmony_ci negated: false, 5386c67d6573Sopenharmony_ci kind: ast::ClassSet::union(ast::ClassSetUnion { 5387c67d6573Sopenharmony_ci span: span(1..1), 5388c67d6573Sopenharmony_ci items: vec![], 5389c67d6573Sopenharmony_ci }), 5390c67d6573Sopenharmony_ci }; 5391c67d6573Sopenharmony_ci let union = ast::ClassSetUnion { 5392c67d6573Sopenharmony_ci span: span(1..2), 5393c67d6573Sopenharmony_ci items: vec![ast::ClassSetItem::Literal(ast::Literal { 5394c67d6573Sopenharmony_ci span: span(1..2), 5395c67d6573Sopenharmony_ci kind: ast::LiteralKind::Verbatim, 5396c67d6573Sopenharmony_ci c: ']', 5397c67d6573Sopenharmony_ci })], 5398c67d6573Sopenharmony_ci }; 5399c67d6573Sopenharmony_ci Ok((set, union)) 5400c67d6573Sopenharmony_ci }); 5401c67d6573Sopenharmony_ci assert_eq!( 5402c67d6573Sopenharmony_ci parser_ignore_whitespace("[ ] a]").parse_set_class_open(), 5403c67d6573Sopenharmony_ci { 5404c67d6573Sopenharmony_ci let set = ast::ClassBracketed { 5405c67d6573Sopenharmony_ci span: span(0..4), 5406c67d6573Sopenharmony_ci negated: false, 5407c67d6573Sopenharmony_ci kind: ast::ClassSet::union(ast::ClassSetUnion { 5408c67d6573Sopenharmony_ci span: span(2..2), 5409c67d6573Sopenharmony_ci items: vec![], 5410c67d6573Sopenharmony_ci }), 5411c67d6573Sopenharmony_ci }; 5412c67d6573Sopenharmony_ci let union = ast::ClassSetUnion { 5413c67d6573Sopenharmony_ci span: span(2..3), 5414c67d6573Sopenharmony_ci items: vec![ast::ClassSetItem::Literal(ast::Literal { 5415c67d6573Sopenharmony_ci span: span(2..3), 5416c67d6573Sopenharmony_ci kind: ast::LiteralKind::Verbatim, 5417c67d6573Sopenharmony_ci c: ']', 5418c67d6573Sopenharmony_ci })], 5419c67d6573Sopenharmony_ci }; 5420c67d6573Sopenharmony_ci Ok((set, union)) 5421c67d6573Sopenharmony_ci } 5422c67d6573Sopenharmony_ci ); 5423c67d6573Sopenharmony_ci assert_eq!(parser("[^]a]").parse_set_class_open(), { 5424c67d6573Sopenharmony_ci let set = ast::ClassBracketed { 5425c67d6573Sopenharmony_ci span: span(0..3), 5426c67d6573Sopenharmony_ci negated: true, 5427c67d6573Sopenharmony_ci kind: ast::ClassSet::union(ast::ClassSetUnion { 5428c67d6573Sopenharmony_ci span: span(2..2), 5429c67d6573Sopenharmony_ci items: vec![], 5430c67d6573Sopenharmony_ci }), 5431c67d6573Sopenharmony_ci }; 5432c67d6573Sopenharmony_ci let union = ast::ClassSetUnion { 5433c67d6573Sopenharmony_ci span: span(2..3), 5434c67d6573Sopenharmony_ci items: vec![ast::ClassSetItem::Literal(ast::Literal { 5435c67d6573Sopenharmony_ci span: span(2..3), 5436c67d6573Sopenharmony_ci kind: ast::LiteralKind::Verbatim, 5437c67d6573Sopenharmony_ci c: ']', 5438c67d6573Sopenharmony_ci })], 5439c67d6573Sopenharmony_ci }; 5440c67d6573Sopenharmony_ci Ok((set, union)) 5441c67d6573Sopenharmony_ci }); 5442c67d6573Sopenharmony_ci assert_eq!(parser("[-]a]").parse_set_class_open(), { 5443c67d6573Sopenharmony_ci let set = ast::ClassBracketed { 5444c67d6573Sopenharmony_ci span: span(0..2), 5445c67d6573Sopenharmony_ci negated: false, 5446c67d6573Sopenharmony_ci kind: ast::ClassSet::union(ast::ClassSetUnion { 5447c67d6573Sopenharmony_ci span: span(1..1), 5448c67d6573Sopenharmony_ci items: vec![], 5449c67d6573Sopenharmony_ci }), 5450c67d6573Sopenharmony_ci }; 5451c67d6573Sopenharmony_ci let union = ast::ClassSetUnion { 5452c67d6573Sopenharmony_ci span: span(1..2), 5453c67d6573Sopenharmony_ci items: vec![ast::ClassSetItem::Literal(ast::Literal { 5454c67d6573Sopenharmony_ci span: span(1..2), 5455c67d6573Sopenharmony_ci kind: ast::LiteralKind::Verbatim, 5456c67d6573Sopenharmony_ci c: '-', 5457c67d6573Sopenharmony_ci })], 5458c67d6573Sopenharmony_ci }; 5459c67d6573Sopenharmony_ci Ok((set, union)) 5460c67d6573Sopenharmony_ci }); 5461c67d6573Sopenharmony_ci 5462c67d6573Sopenharmony_ci assert_eq!( 5463c67d6573Sopenharmony_ci parser("[").parse_set_class_open().unwrap_err(), 5464c67d6573Sopenharmony_ci TestError { 5465c67d6573Sopenharmony_ci span: span(0..1), 5466c67d6573Sopenharmony_ci kind: ast::ErrorKind::ClassUnclosed, 5467c67d6573Sopenharmony_ci } 5468c67d6573Sopenharmony_ci ); 5469c67d6573Sopenharmony_ci assert_eq!( 5470c67d6573Sopenharmony_ci parser_ignore_whitespace("[ ") 5471c67d6573Sopenharmony_ci .parse_set_class_open() 5472c67d6573Sopenharmony_ci .unwrap_err(), 5473c67d6573Sopenharmony_ci TestError { 5474c67d6573Sopenharmony_ci span: span(0..5), 5475c67d6573Sopenharmony_ci kind: ast::ErrorKind::ClassUnclosed, 5476c67d6573Sopenharmony_ci } 5477c67d6573Sopenharmony_ci ); 5478c67d6573Sopenharmony_ci assert_eq!( 5479c67d6573Sopenharmony_ci parser("[^").parse_set_class_open().unwrap_err(), 5480c67d6573Sopenharmony_ci TestError { 5481c67d6573Sopenharmony_ci span: span(0..2), 5482c67d6573Sopenharmony_ci kind: ast::ErrorKind::ClassUnclosed, 5483c67d6573Sopenharmony_ci } 5484c67d6573Sopenharmony_ci ); 5485c67d6573Sopenharmony_ci assert_eq!( 5486c67d6573Sopenharmony_ci parser("[]").parse_set_class_open().unwrap_err(), 5487c67d6573Sopenharmony_ci TestError { 5488c67d6573Sopenharmony_ci span: span(0..2), 5489c67d6573Sopenharmony_ci kind: ast::ErrorKind::ClassUnclosed, 5490c67d6573Sopenharmony_ci } 5491c67d6573Sopenharmony_ci ); 5492c67d6573Sopenharmony_ci assert_eq!( 5493c67d6573Sopenharmony_ci parser("[-").parse_set_class_open().unwrap_err(), 5494c67d6573Sopenharmony_ci TestError { 5495c67d6573Sopenharmony_ci span: span(0..0), 5496c67d6573Sopenharmony_ci kind: ast::ErrorKind::ClassUnclosed, 5497c67d6573Sopenharmony_ci } 5498c67d6573Sopenharmony_ci ); 5499c67d6573Sopenharmony_ci assert_eq!( 5500c67d6573Sopenharmony_ci parser("[--").parse_set_class_open().unwrap_err(), 5501c67d6573Sopenharmony_ci TestError { 5502c67d6573Sopenharmony_ci span: span(0..0), 5503c67d6573Sopenharmony_ci kind: ast::ErrorKind::ClassUnclosed, 5504c67d6573Sopenharmony_ci } 5505c67d6573Sopenharmony_ci ); 5506c67d6573Sopenharmony_ci 5507c67d6573Sopenharmony_ci // See: https://github.com/rust-lang/regex/issues/792 5508c67d6573Sopenharmony_ci assert_eq!( 5509c67d6573Sopenharmony_ci parser("(?x)[-#]").parse_with_comments().unwrap_err(), 5510c67d6573Sopenharmony_ci TestError { 5511c67d6573Sopenharmony_ci span: span(4..4), 5512c67d6573Sopenharmony_ci kind: ast::ErrorKind::ClassUnclosed, 5513c67d6573Sopenharmony_ci } 5514c67d6573Sopenharmony_ci ); 5515c67d6573Sopenharmony_ci } 5516c67d6573Sopenharmony_ci 5517c67d6573Sopenharmony_ci #[test] 5518c67d6573Sopenharmony_ci fn maybe_parse_ascii_class() { 5519c67d6573Sopenharmony_ci assert_eq!( 5520c67d6573Sopenharmony_ci parser(r"[:alnum:]").maybe_parse_ascii_class(), 5521c67d6573Sopenharmony_ci Some(ast::ClassAscii { 5522c67d6573Sopenharmony_ci span: span(0..9), 5523c67d6573Sopenharmony_ci kind: ast::ClassAsciiKind::Alnum, 5524c67d6573Sopenharmony_ci negated: false, 5525c67d6573Sopenharmony_ci }) 5526c67d6573Sopenharmony_ci ); 5527c67d6573Sopenharmony_ci assert_eq!( 5528c67d6573Sopenharmony_ci parser(r"[:alnum:]A").maybe_parse_ascii_class(), 5529c67d6573Sopenharmony_ci Some(ast::ClassAscii { 5530c67d6573Sopenharmony_ci span: span(0..9), 5531c67d6573Sopenharmony_ci kind: ast::ClassAsciiKind::Alnum, 5532c67d6573Sopenharmony_ci negated: false, 5533c67d6573Sopenharmony_ci }) 5534c67d6573Sopenharmony_ci ); 5535c67d6573Sopenharmony_ci assert_eq!( 5536c67d6573Sopenharmony_ci parser(r"[:^alnum:]").maybe_parse_ascii_class(), 5537c67d6573Sopenharmony_ci Some(ast::ClassAscii { 5538c67d6573Sopenharmony_ci span: span(0..10), 5539c67d6573Sopenharmony_ci kind: ast::ClassAsciiKind::Alnum, 5540c67d6573Sopenharmony_ci negated: true, 5541c67d6573Sopenharmony_ci }) 5542c67d6573Sopenharmony_ci ); 5543c67d6573Sopenharmony_ci 5544c67d6573Sopenharmony_ci let p = parser(r"[:"); 5545c67d6573Sopenharmony_ci assert_eq!(p.maybe_parse_ascii_class(), None); 5546c67d6573Sopenharmony_ci assert_eq!(p.offset(), 0); 5547c67d6573Sopenharmony_ci 5548c67d6573Sopenharmony_ci let p = parser(r"[:^"); 5549c67d6573Sopenharmony_ci assert_eq!(p.maybe_parse_ascii_class(), None); 5550c67d6573Sopenharmony_ci assert_eq!(p.offset(), 0); 5551c67d6573Sopenharmony_ci 5552c67d6573Sopenharmony_ci let p = parser(r"[^:alnum:]"); 5553c67d6573Sopenharmony_ci assert_eq!(p.maybe_parse_ascii_class(), None); 5554c67d6573Sopenharmony_ci assert_eq!(p.offset(), 0); 5555c67d6573Sopenharmony_ci 5556c67d6573Sopenharmony_ci let p = parser(r"[:alnnum:]"); 5557c67d6573Sopenharmony_ci assert_eq!(p.maybe_parse_ascii_class(), None); 5558c67d6573Sopenharmony_ci assert_eq!(p.offset(), 0); 5559c67d6573Sopenharmony_ci 5560c67d6573Sopenharmony_ci let p = parser(r"[:alnum]"); 5561c67d6573Sopenharmony_ci assert_eq!(p.maybe_parse_ascii_class(), None); 5562c67d6573Sopenharmony_ci assert_eq!(p.offset(), 0); 5563c67d6573Sopenharmony_ci 5564c67d6573Sopenharmony_ci let p = parser(r"[:alnum:"); 5565c67d6573Sopenharmony_ci assert_eq!(p.maybe_parse_ascii_class(), None); 5566c67d6573Sopenharmony_ci assert_eq!(p.offset(), 0); 5567c67d6573Sopenharmony_ci } 5568c67d6573Sopenharmony_ci 5569c67d6573Sopenharmony_ci #[test] 5570c67d6573Sopenharmony_ci fn parse_unicode_class() { 5571c67d6573Sopenharmony_ci assert_eq!( 5572c67d6573Sopenharmony_ci parser(r"\pN").parse_escape(), 5573c67d6573Sopenharmony_ci Ok(Primitive::Unicode(ast::ClassUnicode { 5574c67d6573Sopenharmony_ci span: span(0..3), 5575c67d6573Sopenharmony_ci negated: false, 5576c67d6573Sopenharmony_ci kind: ast::ClassUnicodeKind::OneLetter('N'), 5577c67d6573Sopenharmony_ci })) 5578c67d6573Sopenharmony_ci ); 5579c67d6573Sopenharmony_ci assert_eq!( 5580c67d6573Sopenharmony_ci parser(r"\PN").parse_escape(), 5581c67d6573Sopenharmony_ci Ok(Primitive::Unicode(ast::ClassUnicode { 5582c67d6573Sopenharmony_ci span: span(0..3), 5583c67d6573Sopenharmony_ci negated: true, 5584c67d6573Sopenharmony_ci kind: ast::ClassUnicodeKind::OneLetter('N'), 5585c67d6573Sopenharmony_ci })) 5586c67d6573Sopenharmony_ci ); 5587c67d6573Sopenharmony_ci assert_eq!( 5588c67d6573Sopenharmony_ci parser(r"\p{N}").parse_escape(), 5589c67d6573Sopenharmony_ci Ok(Primitive::Unicode(ast::ClassUnicode { 5590c67d6573Sopenharmony_ci span: span(0..5), 5591c67d6573Sopenharmony_ci negated: false, 5592c67d6573Sopenharmony_ci kind: ast::ClassUnicodeKind::Named(s("N")), 5593c67d6573Sopenharmony_ci })) 5594c67d6573Sopenharmony_ci ); 5595c67d6573Sopenharmony_ci assert_eq!( 5596c67d6573Sopenharmony_ci parser(r"\P{N}").parse_escape(), 5597c67d6573Sopenharmony_ci Ok(Primitive::Unicode(ast::ClassUnicode { 5598c67d6573Sopenharmony_ci span: span(0..5), 5599c67d6573Sopenharmony_ci negated: true, 5600c67d6573Sopenharmony_ci kind: ast::ClassUnicodeKind::Named(s("N")), 5601c67d6573Sopenharmony_ci })) 5602c67d6573Sopenharmony_ci ); 5603c67d6573Sopenharmony_ci assert_eq!( 5604c67d6573Sopenharmony_ci parser(r"\p{Greek}").parse_escape(), 5605c67d6573Sopenharmony_ci Ok(Primitive::Unicode(ast::ClassUnicode { 5606c67d6573Sopenharmony_ci span: span(0..9), 5607c67d6573Sopenharmony_ci negated: false, 5608c67d6573Sopenharmony_ci kind: ast::ClassUnicodeKind::Named(s("Greek")), 5609c67d6573Sopenharmony_ci })) 5610c67d6573Sopenharmony_ci ); 5611c67d6573Sopenharmony_ci 5612c67d6573Sopenharmony_ci assert_eq!( 5613c67d6573Sopenharmony_ci parser(r"\p{scx:Katakana}").parse_escape(), 5614c67d6573Sopenharmony_ci Ok(Primitive::Unicode(ast::ClassUnicode { 5615c67d6573Sopenharmony_ci span: span(0..16), 5616c67d6573Sopenharmony_ci negated: false, 5617c67d6573Sopenharmony_ci kind: ast::ClassUnicodeKind::NamedValue { 5618c67d6573Sopenharmony_ci op: ast::ClassUnicodeOpKind::Colon, 5619c67d6573Sopenharmony_ci name: s("scx"), 5620c67d6573Sopenharmony_ci value: s("Katakana"), 5621c67d6573Sopenharmony_ci }, 5622c67d6573Sopenharmony_ci })) 5623c67d6573Sopenharmony_ci ); 5624c67d6573Sopenharmony_ci assert_eq!( 5625c67d6573Sopenharmony_ci parser(r"\p{scx=Katakana}").parse_escape(), 5626c67d6573Sopenharmony_ci Ok(Primitive::Unicode(ast::ClassUnicode { 5627c67d6573Sopenharmony_ci span: span(0..16), 5628c67d6573Sopenharmony_ci negated: false, 5629c67d6573Sopenharmony_ci kind: ast::ClassUnicodeKind::NamedValue { 5630c67d6573Sopenharmony_ci op: ast::ClassUnicodeOpKind::Equal, 5631c67d6573Sopenharmony_ci name: s("scx"), 5632c67d6573Sopenharmony_ci value: s("Katakana"), 5633c67d6573Sopenharmony_ci }, 5634c67d6573Sopenharmony_ci })) 5635c67d6573Sopenharmony_ci ); 5636c67d6573Sopenharmony_ci assert_eq!( 5637c67d6573Sopenharmony_ci parser(r"\p{scx!=Katakana}").parse_escape(), 5638c67d6573Sopenharmony_ci Ok(Primitive::Unicode(ast::ClassUnicode { 5639c67d6573Sopenharmony_ci span: span(0..17), 5640c67d6573Sopenharmony_ci negated: false, 5641c67d6573Sopenharmony_ci kind: ast::ClassUnicodeKind::NamedValue { 5642c67d6573Sopenharmony_ci op: ast::ClassUnicodeOpKind::NotEqual, 5643c67d6573Sopenharmony_ci name: s("scx"), 5644c67d6573Sopenharmony_ci value: s("Katakana"), 5645c67d6573Sopenharmony_ci }, 5646c67d6573Sopenharmony_ci })) 5647c67d6573Sopenharmony_ci ); 5648c67d6573Sopenharmony_ci 5649c67d6573Sopenharmony_ci assert_eq!( 5650c67d6573Sopenharmony_ci parser(r"\p{:}").parse_escape(), 5651c67d6573Sopenharmony_ci Ok(Primitive::Unicode(ast::ClassUnicode { 5652c67d6573Sopenharmony_ci span: span(0..5), 5653c67d6573Sopenharmony_ci negated: false, 5654c67d6573Sopenharmony_ci kind: ast::ClassUnicodeKind::NamedValue { 5655c67d6573Sopenharmony_ci op: ast::ClassUnicodeOpKind::Colon, 5656c67d6573Sopenharmony_ci name: s(""), 5657c67d6573Sopenharmony_ci value: s(""), 5658c67d6573Sopenharmony_ci }, 5659c67d6573Sopenharmony_ci })) 5660c67d6573Sopenharmony_ci ); 5661c67d6573Sopenharmony_ci assert_eq!( 5662c67d6573Sopenharmony_ci parser(r"\p{=}").parse_escape(), 5663c67d6573Sopenharmony_ci Ok(Primitive::Unicode(ast::ClassUnicode { 5664c67d6573Sopenharmony_ci span: span(0..5), 5665c67d6573Sopenharmony_ci negated: false, 5666c67d6573Sopenharmony_ci kind: ast::ClassUnicodeKind::NamedValue { 5667c67d6573Sopenharmony_ci op: ast::ClassUnicodeOpKind::Equal, 5668c67d6573Sopenharmony_ci name: s(""), 5669c67d6573Sopenharmony_ci value: s(""), 5670c67d6573Sopenharmony_ci }, 5671c67d6573Sopenharmony_ci })) 5672c67d6573Sopenharmony_ci ); 5673c67d6573Sopenharmony_ci assert_eq!( 5674c67d6573Sopenharmony_ci parser(r"\p{!=}").parse_escape(), 5675c67d6573Sopenharmony_ci Ok(Primitive::Unicode(ast::ClassUnicode { 5676c67d6573Sopenharmony_ci span: span(0..6), 5677c67d6573Sopenharmony_ci negated: false, 5678c67d6573Sopenharmony_ci kind: ast::ClassUnicodeKind::NamedValue { 5679c67d6573Sopenharmony_ci op: ast::ClassUnicodeOpKind::NotEqual, 5680c67d6573Sopenharmony_ci name: s(""), 5681c67d6573Sopenharmony_ci value: s(""), 5682c67d6573Sopenharmony_ci }, 5683c67d6573Sopenharmony_ci })) 5684c67d6573Sopenharmony_ci ); 5685c67d6573Sopenharmony_ci 5686c67d6573Sopenharmony_ci assert_eq!( 5687c67d6573Sopenharmony_ci parser(r"\p").parse_escape().unwrap_err(), 5688c67d6573Sopenharmony_ci TestError { 5689c67d6573Sopenharmony_ci span: span(2..2), 5690c67d6573Sopenharmony_ci kind: ast::ErrorKind::EscapeUnexpectedEof, 5691c67d6573Sopenharmony_ci } 5692c67d6573Sopenharmony_ci ); 5693c67d6573Sopenharmony_ci assert_eq!( 5694c67d6573Sopenharmony_ci parser(r"\p{").parse_escape().unwrap_err(), 5695c67d6573Sopenharmony_ci TestError { 5696c67d6573Sopenharmony_ci span: span(3..3), 5697c67d6573Sopenharmony_ci kind: ast::ErrorKind::EscapeUnexpectedEof, 5698c67d6573Sopenharmony_ci } 5699c67d6573Sopenharmony_ci ); 5700c67d6573Sopenharmony_ci assert_eq!( 5701c67d6573Sopenharmony_ci parser(r"\p{N").parse_escape().unwrap_err(), 5702c67d6573Sopenharmony_ci TestError { 5703c67d6573Sopenharmony_ci span: span(4..4), 5704c67d6573Sopenharmony_ci kind: ast::ErrorKind::EscapeUnexpectedEof, 5705c67d6573Sopenharmony_ci } 5706c67d6573Sopenharmony_ci ); 5707c67d6573Sopenharmony_ci assert_eq!( 5708c67d6573Sopenharmony_ci parser(r"\p{Greek").parse_escape().unwrap_err(), 5709c67d6573Sopenharmony_ci TestError { 5710c67d6573Sopenharmony_ci span: span(8..8), 5711c67d6573Sopenharmony_ci kind: ast::ErrorKind::EscapeUnexpectedEof, 5712c67d6573Sopenharmony_ci } 5713c67d6573Sopenharmony_ci ); 5714c67d6573Sopenharmony_ci 5715c67d6573Sopenharmony_ci assert_eq!( 5716c67d6573Sopenharmony_ci parser(r"\pNz").parse(), 5717c67d6573Sopenharmony_ci Ok(Ast::Concat(ast::Concat { 5718c67d6573Sopenharmony_ci span: span(0..4), 5719c67d6573Sopenharmony_ci asts: vec![ 5720c67d6573Sopenharmony_ci Ast::Class(ast::Class::Unicode(ast::ClassUnicode { 5721c67d6573Sopenharmony_ci span: span(0..3), 5722c67d6573Sopenharmony_ci negated: false, 5723c67d6573Sopenharmony_ci kind: ast::ClassUnicodeKind::OneLetter('N'), 5724c67d6573Sopenharmony_ci })), 5725c67d6573Sopenharmony_ci Ast::Literal(ast::Literal { 5726c67d6573Sopenharmony_ci span: span(3..4), 5727c67d6573Sopenharmony_ci kind: ast::LiteralKind::Verbatim, 5728c67d6573Sopenharmony_ci c: 'z', 5729c67d6573Sopenharmony_ci }), 5730c67d6573Sopenharmony_ci ], 5731c67d6573Sopenharmony_ci })) 5732c67d6573Sopenharmony_ci ); 5733c67d6573Sopenharmony_ci assert_eq!( 5734c67d6573Sopenharmony_ci parser(r"\p{Greek}z").parse(), 5735c67d6573Sopenharmony_ci Ok(Ast::Concat(ast::Concat { 5736c67d6573Sopenharmony_ci span: span(0..10), 5737c67d6573Sopenharmony_ci asts: vec![ 5738c67d6573Sopenharmony_ci Ast::Class(ast::Class::Unicode(ast::ClassUnicode { 5739c67d6573Sopenharmony_ci span: span(0..9), 5740c67d6573Sopenharmony_ci negated: false, 5741c67d6573Sopenharmony_ci kind: ast::ClassUnicodeKind::Named(s("Greek")), 5742c67d6573Sopenharmony_ci })), 5743c67d6573Sopenharmony_ci Ast::Literal(ast::Literal { 5744c67d6573Sopenharmony_ci span: span(9..10), 5745c67d6573Sopenharmony_ci kind: ast::LiteralKind::Verbatim, 5746c67d6573Sopenharmony_ci c: 'z', 5747c67d6573Sopenharmony_ci }), 5748c67d6573Sopenharmony_ci ], 5749c67d6573Sopenharmony_ci })) 5750c67d6573Sopenharmony_ci ); 5751c67d6573Sopenharmony_ci assert_eq!( 5752c67d6573Sopenharmony_ci parser(r"\p\{").parse().unwrap_err(), 5753c67d6573Sopenharmony_ci TestError { 5754c67d6573Sopenharmony_ci span: span(2..3), 5755c67d6573Sopenharmony_ci kind: ast::ErrorKind::UnicodeClassInvalid, 5756c67d6573Sopenharmony_ci } 5757c67d6573Sopenharmony_ci ); 5758c67d6573Sopenharmony_ci assert_eq!( 5759c67d6573Sopenharmony_ci parser(r"\P\{").parse().unwrap_err(), 5760c67d6573Sopenharmony_ci TestError { 5761c67d6573Sopenharmony_ci span: span(2..3), 5762c67d6573Sopenharmony_ci kind: ast::ErrorKind::UnicodeClassInvalid, 5763c67d6573Sopenharmony_ci } 5764c67d6573Sopenharmony_ci ); 5765c67d6573Sopenharmony_ci } 5766c67d6573Sopenharmony_ci 5767c67d6573Sopenharmony_ci #[test] 5768c67d6573Sopenharmony_ci fn parse_perl_class() { 5769c67d6573Sopenharmony_ci assert_eq!( 5770c67d6573Sopenharmony_ci parser(r"\d").parse_escape(), 5771c67d6573Sopenharmony_ci Ok(Primitive::Perl(ast::ClassPerl { 5772c67d6573Sopenharmony_ci span: span(0..2), 5773c67d6573Sopenharmony_ci kind: ast::ClassPerlKind::Digit, 5774c67d6573Sopenharmony_ci negated: false, 5775c67d6573Sopenharmony_ci })) 5776c67d6573Sopenharmony_ci ); 5777c67d6573Sopenharmony_ci assert_eq!( 5778c67d6573Sopenharmony_ci parser(r"\D").parse_escape(), 5779c67d6573Sopenharmony_ci Ok(Primitive::Perl(ast::ClassPerl { 5780c67d6573Sopenharmony_ci span: span(0..2), 5781c67d6573Sopenharmony_ci kind: ast::ClassPerlKind::Digit, 5782c67d6573Sopenharmony_ci negated: true, 5783c67d6573Sopenharmony_ci })) 5784c67d6573Sopenharmony_ci ); 5785c67d6573Sopenharmony_ci assert_eq!( 5786c67d6573Sopenharmony_ci parser(r"\s").parse_escape(), 5787c67d6573Sopenharmony_ci Ok(Primitive::Perl(ast::ClassPerl { 5788c67d6573Sopenharmony_ci span: span(0..2), 5789c67d6573Sopenharmony_ci kind: ast::ClassPerlKind::Space, 5790c67d6573Sopenharmony_ci negated: false, 5791c67d6573Sopenharmony_ci })) 5792c67d6573Sopenharmony_ci ); 5793c67d6573Sopenharmony_ci assert_eq!( 5794c67d6573Sopenharmony_ci parser(r"\S").parse_escape(), 5795c67d6573Sopenharmony_ci Ok(Primitive::Perl(ast::ClassPerl { 5796c67d6573Sopenharmony_ci span: span(0..2), 5797c67d6573Sopenharmony_ci kind: ast::ClassPerlKind::Space, 5798c67d6573Sopenharmony_ci negated: true, 5799c67d6573Sopenharmony_ci })) 5800c67d6573Sopenharmony_ci ); 5801c67d6573Sopenharmony_ci assert_eq!( 5802c67d6573Sopenharmony_ci parser(r"\w").parse_escape(), 5803c67d6573Sopenharmony_ci Ok(Primitive::Perl(ast::ClassPerl { 5804c67d6573Sopenharmony_ci span: span(0..2), 5805c67d6573Sopenharmony_ci kind: ast::ClassPerlKind::Word, 5806c67d6573Sopenharmony_ci negated: false, 5807c67d6573Sopenharmony_ci })) 5808c67d6573Sopenharmony_ci ); 5809c67d6573Sopenharmony_ci assert_eq!( 5810c67d6573Sopenharmony_ci parser(r"\W").parse_escape(), 5811c67d6573Sopenharmony_ci Ok(Primitive::Perl(ast::ClassPerl { 5812c67d6573Sopenharmony_ci span: span(0..2), 5813c67d6573Sopenharmony_ci kind: ast::ClassPerlKind::Word, 5814c67d6573Sopenharmony_ci negated: true, 5815c67d6573Sopenharmony_ci })) 5816c67d6573Sopenharmony_ci ); 5817c67d6573Sopenharmony_ci 5818c67d6573Sopenharmony_ci assert_eq!( 5819c67d6573Sopenharmony_ci parser(r"\d").parse(), 5820c67d6573Sopenharmony_ci Ok(Ast::Class(ast::Class::Perl(ast::ClassPerl { 5821c67d6573Sopenharmony_ci span: span(0..2), 5822c67d6573Sopenharmony_ci kind: ast::ClassPerlKind::Digit, 5823c67d6573Sopenharmony_ci negated: false, 5824c67d6573Sopenharmony_ci }))) 5825c67d6573Sopenharmony_ci ); 5826c67d6573Sopenharmony_ci assert_eq!( 5827c67d6573Sopenharmony_ci parser(r"\dz").parse(), 5828c67d6573Sopenharmony_ci Ok(Ast::Concat(ast::Concat { 5829c67d6573Sopenharmony_ci span: span(0..3), 5830c67d6573Sopenharmony_ci asts: vec![ 5831c67d6573Sopenharmony_ci Ast::Class(ast::Class::Perl(ast::ClassPerl { 5832c67d6573Sopenharmony_ci span: span(0..2), 5833c67d6573Sopenharmony_ci kind: ast::ClassPerlKind::Digit, 5834c67d6573Sopenharmony_ci negated: false, 5835c67d6573Sopenharmony_ci })), 5836c67d6573Sopenharmony_ci Ast::Literal(ast::Literal { 5837c67d6573Sopenharmony_ci span: span(2..3), 5838c67d6573Sopenharmony_ci kind: ast::LiteralKind::Verbatim, 5839c67d6573Sopenharmony_ci c: 'z', 5840c67d6573Sopenharmony_ci }), 5841c67d6573Sopenharmony_ci ], 5842c67d6573Sopenharmony_ci })) 5843c67d6573Sopenharmony_ci ); 5844c67d6573Sopenharmony_ci } 5845c67d6573Sopenharmony_ci 5846c67d6573Sopenharmony_ci // This tests a bug fix where the nest limit checker wasn't decrementing 5847c67d6573Sopenharmony_ci // its depth during post-traversal, which causes long regexes to trip 5848c67d6573Sopenharmony_ci // the default limit too aggressively. 5849c67d6573Sopenharmony_ci #[test] 5850c67d6573Sopenharmony_ci fn regression_454_nest_too_big() { 5851c67d6573Sopenharmony_ci let pattern = r#" 5852c67d6573Sopenharmony_ci 2(?: 5853c67d6573Sopenharmony_ci [45]\d{3}| 5854c67d6573Sopenharmony_ci 7(?: 5855c67d6573Sopenharmony_ci 1[0-267]| 5856c67d6573Sopenharmony_ci 2[0-289]| 5857c67d6573Sopenharmony_ci 3[0-29]| 5858c67d6573Sopenharmony_ci 4[01]| 5859c67d6573Sopenharmony_ci 5[1-3]| 5860c67d6573Sopenharmony_ci 6[013]| 5861c67d6573Sopenharmony_ci 7[0178]| 5862c67d6573Sopenharmony_ci 91 5863c67d6573Sopenharmony_ci )| 5864c67d6573Sopenharmony_ci 8(?: 5865c67d6573Sopenharmony_ci 0[125]| 5866c67d6573Sopenharmony_ci [139][1-6]| 5867c67d6573Sopenharmony_ci 2[0157-9]| 5868c67d6573Sopenharmony_ci 41| 5869c67d6573Sopenharmony_ci 6[1-35]| 5870c67d6573Sopenharmony_ci 7[1-5]| 5871c67d6573Sopenharmony_ci 8[1-8]| 5872c67d6573Sopenharmony_ci 90 5873c67d6573Sopenharmony_ci )| 5874c67d6573Sopenharmony_ci 9(?: 5875c67d6573Sopenharmony_ci 0[0-2]| 5876c67d6573Sopenharmony_ci 1[0-4]| 5877c67d6573Sopenharmony_ci 2[568]| 5878c67d6573Sopenharmony_ci 3[3-6]| 5879c67d6573Sopenharmony_ci 5[5-7]| 5880c67d6573Sopenharmony_ci 6[0167]| 5881c67d6573Sopenharmony_ci 7[15]| 5882c67d6573Sopenharmony_ci 8[0146-9] 5883c67d6573Sopenharmony_ci ) 5884c67d6573Sopenharmony_ci )\d{4} 5885c67d6573Sopenharmony_ci "#; 5886c67d6573Sopenharmony_ci assert!(parser_nest_limit(pattern, 50).parse().is_ok()); 5887c67d6573Sopenharmony_ci } 5888c67d6573Sopenharmony_ci 5889c67d6573Sopenharmony_ci // This tests that we treat a trailing `-` in a character class as a 5890c67d6573Sopenharmony_ci // literal `-` even when whitespace mode is enabled and there is whitespace 5891c67d6573Sopenharmony_ci // after the trailing `-`. 5892c67d6573Sopenharmony_ci #[test] 5893c67d6573Sopenharmony_ci fn regression_455_trailing_dash_ignore_whitespace() { 5894c67d6573Sopenharmony_ci assert!(parser("(?x)[ / - ]").parse().is_ok()); 5895c67d6573Sopenharmony_ci assert!(parser("(?x)[ a - ]").parse().is_ok()); 5896c67d6573Sopenharmony_ci assert!(parser( 5897c67d6573Sopenharmony_ci "(?x)[ 5898c67d6573Sopenharmony_ci a 5899c67d6573Sopenharmony_ci - ] 5900c67d6573Sopenharmony_ci " 5901c67d6573Sopenharmony_ci ) 5902c67d6573Sopenharmony_ci .parse() 5903c67d6573Sopenharmony_ci .is_ok()); 5904c67d6573Sopenharmony_ci assert!(parser( 5905c67d6573Sopenharmony_ci "(?x)[ 5906c67d6573Sopenharmony_ci a # wat 5907c67d6573Sopenharmony_ci - ] 5908c67d6573Sopenharmony_ci " 5909c67d6573Sopenharmony_ci ) 5910c67d6573Sopenharmony_ci .parse() 5911c67d6573Sopenharmony_ci .is_ok()); 5912c67d6573Sopenharmony_ci 5913c67d6573Sopenharmony_ci assert!(parser("(?x)[ / -").parse().is_err()); 5914c67d6573Sopenharmony_ci assert!(parser("(?x)[ / - ").parse().is_err()); 5915c67d6573Sopenharmony_ci assert!(parser( 5916c67d6573Sopenharmony_ci "(?x)[ 5917c67d6573Sopenharmony_ci / - 5918c67d6573Sopenharmony_ci " 5919c67d6573Sopenharmony_ci ) 5920c67d6573Sopenharmony_ci .parse() 5921c67d6573Sopenharmony_ci .is_err()); 5922c67d6573Sopenharmony_ci assert!(parser( 5923c67d6573Sopenharmony_ci "(?x)[ 5924c67d6573Sopenharmony_ci / - # wat 5925c67d6573Sopenharmony_ci " 5926c67d6573Sopenharmony_ci ) 5927c67d6573Sopenharmony_ci .parse() 5928c67d6573Sopenharmony_ci .is_err()); 5929c67d6573Sopenharmony_ci } 5930c67d6573Sopenharmony_ci} 5931