Lines Matching defs:parser
2 This module provides a regular expression parser.
19 /// is used as intermediate state in the parser.
110 /// A builder for a regular expression parser.
112 /// This builder permits modifying configuration options for the parser.
127 /// Create a new parser builder with a default configuration.
136 /// Build a parser from this configuration with the given pattern.
153 /// Set the nesting limit for this parser.
157 /// groups), then an error is returned by the parser.
168 /// pattern string. In particular, this is viable since this parser
217 /// A regular expression parser.
227 /// The current position of the parser.
231 /// The maximum number of open parens/brackets allowed. If the parser
234 /// Whether to support octal syntax or not. When `false`, the parser will
239 /// `ParserBuilder`. It is used when resetting the parser's state.
259 /// ParserI is the internal parser implementation.
267 /// work against the internal interface of the parser.
270 /// The parser state/configuration.
271 parser: P,
301 /// Note that a parser's character class stack is only non-empty when parsing
325 /// Create a new parser with a default configuration.
327 /// The parser can be run with either the `parse` or `parse_with_comments`
330 /// To set configuration options on the parser, use
350 /// Reset the internal state of a parser.
353 /// parser from running with inconsistent state (say, if a previous
354 /// invocation returned an error and the parser is reused).
367 /// Build an internal parser from a parser configuration and a pattern.
368 fn new(parser: P, pattern: &'s str) -> ParserI<'s, P> {
369 ParserI { parser, pattern }
372 /// Return a reference to the parser state.
373 fn parser(&self) -> &Parser {
374 self.parser.borrow()
387 /// Return the current offset of the parser.
392 self.parser().pos.get().offset
395 /// Return the current line number of the parser.
399 self.parser().pos.get().line
402 /// Return the current column of the parser.
406 self.parser().pos.get().column
417 let current = self.parser().capture_index.get();
421 self.parser().capture_index.set(i);
425 /// Adds the given capture name to this parser. If this capture name has
428 let mut names = self.parser().capture_names.borrow_mut();
443 /// Return whether the parser should ignore whitespace or not.
445 self.parser().ignore_whitespace.get()
448 /// Return the character at the current position of the parser.
465 /// Bump the parser to the next Unicode scalar value.
480 self.parser().pos.set(Position { offset, line, column });
484 /// If the substring starting at the current position of the parser has
485 /// the given prefix, then bump the parser to the character immediately
486 /// following the prefix and return true. Otherwise, don't bump the parser
499 /// Returns true if and only if the parser is positioned at a look-around
513 /// Bump the parser, and if the `x` flag is enabled, bump through any
514 /// subsequent spaces. Return true if and only if the parser is not at
525 /// comments), then this will advance the parser through all whitespace
530 /// This should be used selectively throughout the parser where
556 self.parser().comments.borrow_mut().push(comment);
563 /// Peek at the next character in the input without advancing the parser.
573 /// Like peek, but will ignore spaces when the parser is in whitespace
604 /// Return the current position of the parser, which includes the offset,
607 self.parser().pos.get()
610 /// Create a span at the current position of the parser. Both the start
630 /// Parse and push a single alternation on to the parser's internal stack.
637 /// This assumes the parser is currently positioned at `|` and will advance
638 /// the parser to the character following `|`.
648 /// Pushes or adds the given branch of an alternation to the parser's
653 let mut stack = self.parser().stack_group.borrow_mut();
665 /// parser's internal stack. Return a fresh concatenation corresponding
671 /// This assumes that the parser is currently positioned on the opening
672 /// parenthesis. It advances the parser to the character at the start
684 self.parser().ignore_whitespace.set(v);
696 self.parser().stack_group.borrow_mut().push(
703 self.parser().ignore_whitespace.set(new_ignore_whitespace);
709 /// Pop a group AST from the parser's internal stack and set the group's
713 /// This assumes that the parser is currently positioned on the closing
714 /// parenthesis and advances the parser to the character following the `)`.
723 let mut stack = self.parser().stack_group.borrow_mut();
746 self.parser().ignore_whitespace.set(ignore_whitespace);
764 /// Pop the last state from the parser's internal stack, if it exists, and
769 /// This assumes that the parser has advanced to the end.
773 let mut stack = self.parser().stack_group.borrow_mut();
793 // `GroupState::Alternation`s adjacent in the parser's stack,
806 /// parsing context onto the parser's stack. This assumes that the parser
821 self.parser()
829 /// parser stack. The union given corresponds to the last union built
833 /// This assumes that the parser is positioned at a `]` and will advance
834 /// the parser to the byte immediately following the `]`.
840 /// If there is no corresponding opening bracket on the parser's stack,
851 let mut stack = self.parser().stack_class.borrow_mut();
857 // the character class parser is only initiated when it sees
860 // seeing a `]`, then we signal the character class parser
867 // Namely, every push to the class parser stack is guarded by
893 for state in self.parser().stack_class.borrow().iter().rev() {
903 /// Push the current set of class items on to the class parser's stack as
916 self.parser()
923 /// Pop a character class set from the character class parser stack. If the
930 let mut stack = self.parser().stack_class.borrow_mut();
958 assert_eq!(self.offset(), 0, "parser can only be used once");
959 self.parser().reset();
1003 &mut *self.parser().comments.borrow_mut(),
1014 /// This assumes that the parser is currently positioned at the repetition
1015 /// operator and advances the parser to the first character after the
1069 /// This assumes that the parser is currently positioned at the opening `{`
1070 /// and advances the parser to the first character after the operator.
1172 /// The parser should be positioned at the opening parenthesis.
1174 /// This advances the parser to the character before the start of the
1246 /// Parses a capture group name. Assumes that the parser is positioned at
1248 /// possibly be EOF). This advances the parser to the first character
1301 /// This advances the parser to the character immediately following the
1360 /// Parse the current character as a flag. Do not advance the parser.
1384 /// This assumes that the parser expects a primitive at the current
1386 /// For example, if the parser's position is at `|`, then `|` will be
1389 /// This advances the parser to the first character immediately following
1429 /// This assumes the parser is positioned at the start of the escape
1430 /// sequence, i.e., `\`. It advances the parser to the first position
1446 if !self.parser().octal {
1456 '8'..='9' if !self.parser().octal => {
1528 /// long. This expects the parser to be positioned at the first octal
1529 /// digit and advances the parser to the first character immediately
1539 assert!(self.parser().octal);
1565 /// hex notations, i.e., `\xFF` and `\x{FFFF}`. This expects the parser to
1566 /// be positioned at the `x`, `u` or `U` prefix. The parser is advanced to
1592 /// expects the parser to be positioned at the first digit and will advance
1593 /// the parser to the first character immediately following the escape
1606 let mut scratch = self.parser().scratch.borrow_mut();
1623 // The final bump just moves the parser past the literal, which may
1642 /// the parser to be positioned at the opening brace `{` and will advance
1643 /// the parser to the first character following the closing brace `}`.
1652 let mut scratch = self.parser().scratch.borrow_mut();
1699 /// This expects the parser to be positioned at the first position where
1700 /// a decimal digit could occur. This will advance the parser to the byte
1706 let mut scratch = self.parser().scratch.borrow_mut();
1735 /// This assumes the parser is positioned at the opening `[`. If parsing
1736 /// is successful, then the parser is advanced to the position immediately
1753 // class. If ASCII class parsing fails, then the parser
1755 if !self.parser().stack_class.borrow().is_empty() {
1846 /// This assumes the parser is positioned at the beginning of a primitive,
1847 /// and advances the parser to the first position after the primitive if
1877 /// when it is popped from the parser's stack.
1879 /// This assumes the parser is positioned at the opening `[` and advances
1880 /// the parser to the first non-special byte of the character class.
1951 /// This assumes the parser is positioned at the opening `[`.
1954 /// advance the parser and `None` is returned. Otherwise, the parser is
1979 // If parsing fails, then we back up the parser to this starting point.
1983 self.parser().pos.set(start);
1987 self.parser().pos.set(start);
1993 self.parser().pos.set(start);
2000 self.parser().pos.set(start);
2005 self.parser().pos.set(start);
2011 self.parser().pos.set(start);
2024 /// the parser is positioned at the `p` (or `P` for negation) and will
2025 /// advance the parser to the character immediately following the class.
2032 let mut scratch = self.parser().scratch.borrow_mut();
2105 /// parser is currently at a valid character class name and will be
2129 /// The parser that is checking the nest limit.
2152 let limit = self.p.parser().nest_limit;
2348 fn parser(pattern: &str) -> ParserI<'_, Parser> {
2353 let parser = ParserBuilder::new().octal(true).build();
2354 ParserI::new(parser, pattern)
2660 let astc = parser(pat).parse_with_comments().unwrap();
2701 assert_eq!(parser("]").parse(), Ok(lit(']', 0)));
2703 parser(r"\\\.\+\*\?\(\)\|\[\]\{\}\^\$\#\&\-\~").parse(),
2735 parser(pat).parse(),
2749 parser(pat).parse(),
2767 parser(pat).parse(),
2795 parser(pat).parse(),
2814 parser(pat).parse(),
2829 parser(pat).parse(),
2847 parser(pat).parse(),
2866 parser(pat).parse(),
2884 parser(pat).parse().unwrap_err(),
2896 parser(pat).parse(),
2909 parser(pat).parse(),
2937 parser(r"a*").parse(),
2949 parser(r"a+").parse(),
2962 parser(r"a?").parse(),
2974 parser(r"a??").parse(),
2986 parser(r"a?").parse(),
2998 parser(r"a?b").parse(),
3016 parser(r"a??b").parse(),
3034 parser(r"ab?").parse(),
3052 parser(r"(ab)?").parse(),
3068 parser(r"|a?").parse(),
3087 parser(r"*").parse().unwrap_err(),
3094 parser(r"(?i)*").parse().unwrap_err(),
3101 parser(r"(*)").parse().unwrap_err(),
3108 parser(r"(?:?)").parse().unwrap_err(),
3115 parser(r"+").parse().unwrap_err(),
3122 parser(r"?").parse().unwrap_err(),
3129 parser(r"(?)").parse().unwrap_err(),
3136 parser(r"|*").parse().unwrap_err(),
3143 parser(r"|+").parse().unwrap_err(),
3150 parser(r"|?").parse().unwrap_err(),
3161 parser(r"a{5}").parse(),
3175 parser(r"a{5,}").parse(),
3189 parser(r"a{5,9}").parse(),
3203 parser(r"a{5}?").parse(),
3217 parser(r"ab{5}").parse(),
3237 parser(r"ab{5}c").parse(),
3259 parser(r"a{ 5 }").parse(),
3273 parser(r"a{ 5 , 9 }").parse(),
3302 parser(r"(?i){0}").parse().unwrap_err(),
3309 parser(r"(?m){1,1}").parse().unwrap_err(),
3316 parser(r"a{]}").parse().unwrap_err(),
3323 parser(r"a{1,]}").parse().unwrap_err(),
3330 parser(r"a{").parse().unwrap_err(),
3337 parser(r"a{}").parse().unwrap_err(),
3344 parser(r"a{a").parse().unwrap_err(),
3351 parser(r"a{9999999999}").parse().unwrap_err(),
3358 parser(r"a{9").parse().unwrap_err(),
3365 parser(r"a{9,a").parse().unwrap_err(),
3372 parser(r"a{9,9999999999}").parse().unwrap_err(),
3379 parser(r"a{9,").parse().unwrap_err(),
3386 parser(r"a{9,11").parse().unwrap_err(),
3393 parser(r"a{2,1}").parse().unwrap_err(),
3400 parser(r"{5}").parse().unwrap_err(),
3407 parser(r"|{5}").parse().unwrap_err(),
3418 parser(r"a|b").parse(),
3425 parser(r"(a|b)").parse(),
3437 parser(r"a|b|c").parse(),
3444 parser(r"ax|by|cz").parse(),
3455 parser(r"(ax|by|cz)").parse(),
3470 parser(r"(ax|(by|(cz)))").parse(),
3505 parser(r"|").parse(),
3512 parser(r"||").parse(),
3523 parser(r"a|").parse(),
3527 parser(r"|a").parse(),
3532 parser(r"(|)").parse(),
3543 parser(r"(a|)").parse(),
3551 parser(r"(|a)").parse(),
3560 parser(r"a|b)").parse().unwrap_err(),
3567 parser(r"(a|b").parse().unwrap_err(),
3578 parser(r"(?=a)").parse().unwrap_err(),
3585 parser(r"(?!a)").parse().unwrap_err(),
3592 parser(r"(?<=a)").parse().unwrap_err(),
3599 parser(r"(?<!a)").parse().unwrap_err(),
3610 parser("(?i)").parse(),
3625 parser("(?iU)").parse(),
3648 parser("(?i-U)").parse(),
3676 parser("()").parse(),
3684 parser("(a)").parse(),
3692 parser("(())").parse(),
3705 parser("(?:a)").parse(),
3717 parser("(?i:a)").parse(),
3733 parser("(?i-U:a)").parse(),
3762 parser("(").parse().unwrap_err(),
3769 parser("(?").parse().unwrap_err(),
3776 parser("(?P").parse().unwrap_err(),
3783 parser("(?P<").parse().unwrap_err(),
3790 parser("(a").parse().unwrap_err(),
3797 parser("(()").parse().unwrap_err(),
3804 parser(")").parse().unwrap_err(),
3811 parser("a)").parse().unwrap_err(),
3822 parser("(?P<a>z)").parse(),
3834 parser("(?P<abc>z)").parse(),
3847 parser("(?P<a_1>z)").parse(),
3860 parser("(?P<a.1>z)").parse(),
3873 parser("(?P<a[1]>z)").parse(),
3886 parser("(?P<").parse().unwrap_err(),
3893 parser("(?P<>z)").parse().unwrap_err(),
3900 parser("(?P<a").parse().unwrap_err(),
3907 parser("(?P<ab").parse().unwrap_err(),
3914 parser("(?P<0a").parse().unwrap_err(),
3921 parser("(?P<~").parse().unwrap_err(),
3928 parser("(?P<abc~").parse().unwrap_err(),
3935 parser("(?P<a>y)(?P<a>z)").parse().unwrap_err(),
3948 parser("i:").parse_flags(),
3958 parser("i)").parse_flags(),
3969 parser("isU:").parse_flags(),
3994 parser("-isU:").parse_flags(),
4022 parser("i-sU:").parse_flags(),
4051 parser("isU").parse_flags().unwrap_err(),
4058 parser("isUa:").parse_flags().unwrap_err(),
4065 parser("isUi:").parse_flags().unwrap_err(),
4072 parser("i-sU-i:").parse_flags().unwrap_err(),
4081 parser("-)").parse_flags().unwrap_err(),
4088 parser("i-)").parse_flags().unwrap_err(),
4095 parser("iU-)").parse_flags().unwrap_err(),
4105 assert_eq!(parser("i").parse_flag(), Ok(ast::Flag::CaseInsensitive));
4106 assert_eq!(parser("m").parse_flag(), Ok(ast::Flag::MultiLine));
4107 assert_eq!(parser("s").parse_flag(), Ok(ast::Flag::DotMatchesNewLine));
4108 assert_eq!(parser("U").parse_flag(), Ok(ast::Flag::SwapGreed));
4109 assert_eq!(parser("u").parse_flag(), Ok(ast::Flag::Unicode));
4110 assert_eq!(parser("x").parse_flag(), Ok(ast::Flag::IgnoreWhitespace));
4113 parser("a").parse_flag().unwrap_err(),
4120 parser("☃").parse_flag().unwrap_err(),
4131 parser(r".").parse_primitive(),
4135 parser(r"^").parse_primitive(),
4142 parser(r"$").parse_primitive(),
4150 parser(r"a").parse_primitive(),
4158 parser(r"|").parse_primitive(),
4166 parser(r"☃").parse_primitive(),
4178 parser(r"\|").parse_primitive(),
4195 parser(pat).parse_primitive(),
4204 parser(r"\A").parse_primitive(),
4211 parser(r"\z").parse_primitive(),
4218 parser(r"\b").parse_primitive(),
4225 parser(r"\B").parse_primitive(),
4233 parser(r"\").parse_escape().unwrap_err(),
4240 parser(r"\y").parse_escape().unwrap_err(),
4251 parser(r"\0").parse_escape().unwrap_err(),
4258 parser(r"\9").parse_escape().unwrap_err(),
4346 parser(&pat).parse_escape(),
4356 parser(r"\xF").parse_escape().unwrap_err(),
4363 parser(r"\xG").parse_escape().unwrap_err(),
4370 parser(r"\xFG").parse_escape().unwrap_err(),
4387 parser(&pat).parse_escape(),
4399 parser(r"\uF").parse_escape().unwrap_err(),
4406 parser(r"\uG").parse_escape().unwrap_err(),
4413 parser(r"\uFG").parse_escape().unwrap_err(),
4420 parser(r"\uFFG").parse_escape().unwrap_err(),
4427 parser(r"\uFFFG").parse_escape().unwrap_err(),
4434 parser(r"\uD800").parse_escape().unwrap_err(),
4451 parser(&pat).parse_escape(),
4463 parser(r"\UF").parse_escape().unwrap_err(),
4470 parser(r"\UG").parse_escape().unwrap_err(),
4477 parser(r"\UFG").parse_escape().unwrap_err(),
4484 parser(r"\UFFG").parse_escape().unwrap_err(),
4491 parser(r"\UFFFG").parse_escape().unwrap_err(),
4498 parser(r"\UFFFFG").parse_escape().unwrap_err(),
4505 parser(r"\UFFFFFG").parse_escape().unwrap_err(),
4512 parser(r"\UFFFFFFG").parse_escape().unwrap_err(),
4519 parser(r"\UFFFFFFFG").parse_escape().unwrap_err(),
4530 parser(r"\u{26c4}").parse_escape(),
4540 parser(r"\U{26c4}").parse_escape(),
4550 parser(r"\x{26c4}").parse_escape(),
4558 parser(r"\x{26C4}").parse_escape(),
4566 parser(r"\x{10fFfF}").parse_escape(),
4575 parser(r"\x").parse_escape().unwrap_err(),
4582 parser(r"\x{").parse_escape().unwrap_err(),
4589 parser(r"\x{FF").parse_escape().unwrap_err(),
4596 parser(r"\x{}").parse_escape().unwrap_err(),
4603 parser(r"\x{FGF}").parse_escape().unwrap_err(),
4610 parser(r"\x{FFFFFF}").parse_escape().unwrap_err(),
4617 parser(r"\x{D800}").parse_escape().unwrap_err(),
4624 parser(r"\x{FFFFFFFFF}").parse_escape().unwrap_err(),
4634 assert_eq!(parser("123").parse_decimal(), Ok(123));
4635 assert_eq!(parser("0").parse_decimal(), Ok(0));
4636 assert_eq!(parser("01").parse_decimal(), Ok(1));
4639 parser("-1").parse_decimal().unwrap_err(),
4643 parser("").parse_decimal().unwrap_err(),
4647 parser("9999999999").parse_decimal().unwrap_err(),
4767 parser("[[:alnum:]]").parse(),
4775 parser("[[[:alnum:]]]").parse(),
4787 parser("[[:alnum:]&&[:lower:]]").parse(),
4799 parser("[[:alnum:]--[:lower:]]").parse(),
4811 parser("[[:alnum:]~~[:lower:]]").parse(),
4824 parser("[a]").parse(),
4832 parser(r"[a\]]").parse(),
4850 parser(r"[a\-z]").parse(),
4869 parser("[ab]").parse(),
4880 parser("[a-]").parse(),
4891 parser("[-a]").parse(),
4902 parser(r"[\pL]").parse(),
4914 parser(r"[\w]").parse(),
4926 parser(r"[a\wz]").parse(),
4946 parser("[a-z]").parse(),
4954 parser("[a-cx-z]").parse(),
4968 parser(r"[\w&&a-cx-z]").parse(),
4990 parser(r"[a-cx-z&&\w]").parse(),
5012 parser(r"[a--b--c]").parse(),
5028 parser(r"[a~~b~~c]").parse(),
5044 parser(r"[\^&&^]").parse(),
5060 parser(r"[\&&&&]").parse(),
5076 parser(r"[&&&&]").parse(),
5094 parser(pat).parse(),
5115 parser(r"[]]").parse(),
5123 parser(r"[]\[]").parse(),
5141 parser(r"[\[]]").parse(),
5166 parser("[").parse().unwrap_err(),
5173 parser("[[").parse().unwrap_err(),
5180 parser("[[-]").parse().unwrap_err(),
5187 parser("[[[:alnum:]").parse().unwrap_err(),
5194 parser(r"[\b]").parse().unwrap_err(),
5201 parser(r"[\w-a]").parse().unwrap_err(),
5208 parser(r"[a-\w]").parse().unwrap_err(),
5215 parser(r"[z-a]").parse().unwrap_err(),
5240 assert_eq!(parser("[a]").parse_set_class_open(), {
5268 assert_eq!(parser("[^a]").parse_set_class_open(), {
5296 assert_eq!(parser("[-a]").parse_set_class_open(), {
5337 assert_eq!(parser("[^-a]").parse_set_class_open(), {
5356 assert_eq!(parser("[--a]").parse_set_class_open(), {
5382 assert_eq!(parser("[]a]").parse_set_class_open(), {
5423 assert_eq!(parser("[^]a]").parse_set_class_open(), {
5442 assert_eq!(parser("[-]a]").parse_set_class_open(), {
5463 parser("[").parse_set_class_open().unwrap_err(),
5479 parser("[^").parse_set_class_open().unwrap_err(),
5486 parser("[]").parse_set_class_open().unwrap_err(),
5493 parser("[-").parse_set_class_open().unwrap_err(),
5500 parser("[--").parse_set_class_open().unwrap_err(),
5509 parser("(?x)[-#]").parse_with_comments().unwrap_err(),
5520 parser(r"[:alnum:]").maybe_parse_ascii_class(),
5528 parser(r"[:alnum:]A").maybe_parse_ascii_class(),
5536 parser(r"[:^alnum:]").maybe_parse_ascii_class(),
5544 let p = parser(r"[:");
5548 let p = parser(r"[:^");
5552 let p = parser(r"[^:alnum:]");
5556 let p = parser(r"[:alnnum:]");
5560 let p = parser(r"[:alnum]");
5564 let p = parser(r"[:alnum:");
5572 parser(r"\pN").parse_escape(),
5580 parser(r"\PN").parse_escape(),
5588 parser(r"\p{N}").parse_escape(),
5596 parser(r"\P{N}").parse_escape(),
5604 parser(r"\p{Greek}").parse_escape(),
5613 parser(r"\p{scx:Katakana}").parse_escape(),
5625 parser(r"\p{scx=Katakana}").parse_escape(),
5637 parser(r"\p{scx!=Katakana}").parse_escape(),
5650 parser(r"\p{:}").parse_escape(),
5662 parser(r"\p{=}").parse_escape(),
5674 parser(r"\p{!=}").parse_escape(),
5687 parser(r"\p").parse_escape().unwrap_err(),
5694 parser(r"\p{").parse_escape().unwrap_err(),
5701 parser(r"\p{N").parse_escape().unwrap_err(),
5708 parser(r"\p{Greek").parse_escape().unwrap_err(),
5716 parser(r"\pNz").parse(),
5734 parser(r"\p{Greek}z").parse(),
5752 parser(r"\p\{").parse().unwrap_err(),
5759 parser(r"\P\{").parse().unwrap_err(),
5770 parser(r"\d").parse_escape(),
5778 parser(r"\D").parse_escape(),
5786 parser(r"\s").parse_escape(),
5794 parser(r"\S").parse_escape(),
5802 parser(r"\w").parse_escape(),
5810 parser(r"\W").parse_escape(),
5819 parser(r"\d").parse(),
5827 parser(r"\dz").parse(),
5894 assert!(parser("(?x)[ / - ]").parse().is_ok());
5895 assert!(parser("(?x)[ a - ]").parse().is_ok());
5896 assert!(parser(
5904 assert!(parser(
5913 assert!(parser("(?x)[ / -").parse().is_err());
5914 assert!(parser("(?x)[ / - ").parse().is_err());
5915 assert!(parser(
5922 assert!(parser(