1c67d6573Sopenharmony_ci/*! 2c67d6573Sopenharmony_ciDefines a translator that converts an `Ast` to an `Hir`. 3c67d6573Sopenharmony_ci*/ 4c67d6573Sopenharmony_ci 5c67d6573Sopenharmony_ciuse std::cell::{Cell, RefCell}; 6c67d6573Sopenharmony_ciuse std::result; 7c67d6573Sopenharmony_ci 8c67d6573Sopenharmony_ciuse crate::ast::{self, Ast, Span, Visitor}; 9c67d6573Sopenharmony_ciuse crate::hir::{self, Error, ErrorKind, Hir}; 10c67d6573Sopenharmony_ciuse crate::unicode::{self, ClassQuery}; 11c67d6573Sopenharmony_ci 12c67d6573Sopenharmony_citype Result<T> = result::Result<T, Error>; 13c67d6573Sopenharmony_ci 14c67d6573Sopenharmony_ci/// A builder for constructing an AST->HIR translator. 15c67d6573Sopenharmony_ci#[derive(Clone, Debug)] 16c67d6573Sopenharmony_cipub struct TranslatorBuilder { 17c67d6573Sopenharmony_ci allow_invalid_utf8: bool, 18c67d6573Sopenharmony_ci flags: Flags, 19c67d6573Sopenharmony_ci} 20c67d6573Sopenharmony_ci 21c67d6573Sopenharmony_ciimpl Default for TranslatorBuilder { 22c67d6573Sopenharmony_ci fn default() -> TranslatorBuilder { 23c67d6573Sopenharmony_ci TranslatorBuilder::new() 24c67d6573Sopenharmony_ci } 25c67d6573Sopenharmony_ci} 26c67d6573Sopenharmony_ci 27c67d6573Sopenharmony_ciimpl TranslatorBuilder { 28c67d6573Sopenharmony_ci /// Create a new translator builder with a default c onfiguration. 29c67d6573Sopenharmony_ci pub fn new() -> TranslatorBuilder { 30c67d6573Sopenharmony_ci TranslatorBuilder { 31c67d6573Sopenharmony_ci allow_invalid_utf8: false, 32c67d6573Sopenharmony_ci flags: Flags::default(), 33c67d6573Sopenharmony_ci } 34c67d6573Sopenharmony_ci } 35c67d6573Sopenharmony_ci 36c67d6573Sopenharmony_ci /// Build a translator using the current configuration. 37c67d6573Sopenharmony_ci pub fn build(&self) -> Translator { 38c67d6573Sopenharmony_ci Translator { 39c67d6573Sopenharmony_ci stack: RefCell::new(vec![]), 40c67d6573Sopenharmony_ci flags: Cell::new(self.flags), 41c67d6573Sopenharmony_ci allow_invalid_utf8: self.allow_invalid_utf8, 42c67d6573Sopenharmony_ci } 43c67d6573Sopenharmony_ci } 44c67d6573Sopenharmony_ci 45c67d6573Sopenharmony_ci /// When enabled, translation will permit the construction of a regular 46c67d6573Sopenharmony_ci /// expression that may match invalid UTF-8. 47c67d6573Sopenharmony_ci /// 48c67d6573Sopenharmony_ci /// When disabled (the default), the translator is guaranteed to produce 49c67d6573Sopenharmony_ci /// an expression that will only ever match valid UTF-8 (otherwise, the 50c67d6573Sopenharmony_ci /// translator will return an error). 51c67d6573Sopenharmony_ci /// 52c67d6573Sopenharmony_ci /// Perhaps surprisingly, when invalid UTF-8 isn't allowed, a negated ASCII 53c67d6573Sopenharmony_ci /// word boundary (uttered as `(?-u:\B)` in the concrete syntax) will cause 54c67d6573Sopenharmony_ci /// the parser to return an error. Namely, a negated ASCII word boundary 55c67d6573Sopenharmony_ci /// can result in matching positions that aren't valid UTF-8 boundaries. 56c67d6573Sopenharmony_ci pub fn allow_invalid_utf8(&mut self, yes: bool) -> &mut TranslatorBuilder { 57c67d6573Sopenharmony_ci self.allow_invalid_utf8 = yes; 58c67d6573Sopenharmony_ci self 59c67d6573Sopenharmony_ci } 60c67d6573Sopenharmony_ci 61c67d6573Sopenharmony_ci /// Enable or disable the case insensitive flag (`i`) by default. 62c67d6573Sopenharmony_ci pub fn case_insensitive(&mut self, yes: bool) -> &mut TranslatorBuilder { 63c67d6573Sopenharmony_ci self.flags.case_insensitive = if yes { Some(true) } else { None }; 64c67d6573Sopenharmony_ci self 65c67d6573Sopenharmony_ci } 66c67d6573Sopenharmony_ci 67c67d6573Sopenharmony_ci /// Enable or disable the multi-line matching flag (`m`) by default. 68c67d6573Sopenharmony_ci pub fn multi_line(&mut self, yes: bool) -> &mut TranslatorBuilder { 69c67d6573Sopenharmony_ci self.flags.multi_line = if yes { Some(true) } else { None }; 70c67d6573Sopenharmony_ci self 71c67d6573Sopenharmony_ci } 72c67d6573Sopenharmony_ci 73c67d6573Sopenharmony_ci /// Enable or disable the "dot matches any character" flag (`s`) by 74c67d6573Sopenharmony_ci /// default. 75c67d6573Sopenharmony_ci pub fn dot_matches_new_line( 76c67d6573Sopenharmony_ci &mut self, 77c67d6573Sopenharmony_ci yes: bool, 78c67d6573Sopenharmony_ci ) -> &mut TranslatorBuilder { 79c67d6573Sopenharmony_ci self.flags.dot_matches_new_line = if yes { Some(true) } else { None }; 80c67d6573Sopenharmony_ci self 81c67d6573Sopenharmony_ci } 82c67d6573Sopenharmony_ci 83c67d6573Sopenharmony_ci /// Enable or disable the "swap greed" flag (`U`) by default. 84c67d6573Sopenharmony_ci pub fn swap_greed(&mut self, yes: bool) -> &mut TranslatorBuilder { 85c67d6573Sopenharmony_ci self.flags.swap_greed = if yes { Some(true) } else { None }; 86c67d6573Sopenharmony_ci self 87c67d6573Sopenharmony_ci } 88c67d6573Sopenharmony_ci 89c67d6573Sopenharmony_ci /// Enable or disable the Unicode flag (`u`) by default. 90c67d6573Sopenharmony_ci pub fn unicode(&mut self, yes: bool) -> &mut TranslatorBuilder { 91c67d6573Sopenharmony_ci self.flags.unicode = if yes { None } else { Some(false) }; 92c67d6573Sopenharmony_ci self 93c67d6573Sopenharmony_ci } 94c67d6573Sopenharmony_ci} 95c67d6573Sopenharmony_ci 96c67d6573Sopenharmony_ci/// A translator maps abstract syntax to a high level intermediate 97c67d6573Sopenharmony_ci/// representation. 98c67d6573Sopenharmony_ci/// 99c67d6573Sopenharmony_ci/// A translator may be benefit from reuse. That is, a translator can translate 100c67d6573Sopenharmony_ci/// many abstract syntax trees. 101c67d6573Sopenharmony_ci/// 102c67d6573Sopenharmony_ci/// A `Translator` can be configured in more detail via a 103c67d6573Sopenharmony_ci/// [`TranslatorBuilder`](struct.TranslatorBuilder.html). 104c67d6573Sopenharmony_ci#[derive(Clone, Debug)] 105c67d6573Sopenharmony_cipub struct Translator { 106c67d6573Sopenharmony_ci /// Our call stack, but on the heap. 107c67d6573Sopenharmony_ci stack: RefCell<Vec<HirFrame>>, 108c67d6573Sopenharmony_ci /// The current flag settings. 109c67d6573Sopenharmony_ci flags: Cell<Flags>, 110c67d6573Sopenharmony_ci /// Whether we're allowed to produce HIR that can match arbitrary bytes. 111c67d6573Sopenharmony_ci allow_invalid_utf8: bool, 112c67d6573Sopenharmony_ci} 113c67d6573Sopenharmony_ci 114c67d6573Sopenharmony_ciimpl Translator { 115c67d6573Sopenharmony_ci /// Create a new translator using the default configuration. 116c67d6573Sopenharmony_ci pub fn new() -> Translator { 117c67d6573Sopenharmony_ci TranslatorBuilder::new().build() 118c67d6573Sopenharmony_ci } 119c67d6573Sopenharmony_ci 120c67d6573Sopenharmony_ci /// Translate the given abstract syntax tree (AST) into a high level 121c67d6573Sopenharmony_ci /// intermediate representation (HIR). 122c67d6573Sopenharmony_ci /// 123c67d6573Sopenharmony_ci /// If there was a problem doing the translation, then an HIR-specific 124c67d6573Sopenharmony_ci /// error is returned. 125c67d6573Sopenharmony_ci /// 126c67d6573Sopenharmony_ci /// The original pattern string used to produce the `Ast` *must* also be 127c67d6573Sopenharmony_ci /// provided. The translator does not use the pattern string during any 128c67d6573Sopenharmony_ci /// correct translation, but is used for error reporting. 129c67d6573Sopenharmony_ci pub fn translate(&mut self, pattern: &str, ast: &Ast) -> Result<Hir> { 130c67d6573Sopenharmony_ci ast::visit(ast, TranslatorI::new(self, pattern)) 131c67d6573Sopenharmony_ci } 132c67d6573Sopenharmony_ci} 133c67d6573Sopenharmony_ci 134c67d6573Sopenharmony_ci/// An HirFrame is a single stack frame, represented explicitly, which is 135c67d6573Sopenharmony_ci/// created for each item in the Ast that we traverse. 136c67d6573Sopenharmony_ci/// 137c67d6573Sopenharmony_ci/// Note that technically, this type doesn't represent our entire stack 138c67d6573Sopenharmony_ci/// frame. In particular, the Ast visitor represents any state associated with 139c67d6573Sopenharmony_ci/// traversing the Ast itself. 140c67d6573Sopenharmony_ci#[derive(Clone, Debug)] 141c67d6573Sopenharmony_cienum HirFrame { 142c67d6573Sopenharmony_ci /// An arbitrary HIR expression. These get pushed whenever we hit a base 143c67d6573Sopenharmony_ci /// case in the Ast. They get popped after an inductive (i.e., recursive) 144c67d6573Sopenharmony_ci /// step is complete. 145c67d6573Sopenharmony_ci Expr(Hir), 146c67d6573Sopenharmony_ci /// A Unicode character class. This frame is mutated as we descend into 147c67d6573Sopenharmony_ci /// the Ast of a character class (which is itself its own mini recursive 148c67d6573Sopenharmony_ci /// structure). 149c67d6573Sopenharmony_ci ClassUnicode(hir::ClassUnicode), 150c67d6573Sopenharmony_ci /// A byte-oriented character class. This frame is mutated as we descend 151c67d6573Sopenharmony_ci /// into the Ast of a character class (which is itself its own mini 152c67d6573Sopenharmony_ci /// recursive structure). 153c67d6573Sopenharmony_ci /// 154c67d6573Sopenharmony_ci /// Byte character classes are created when Unicode mode (`u`) is disabled. 155c67d6573Sopenharmony_ci /// If `allow_invalid_utf8` is disabled (the default), then a byte 156c67d6573Sopenharmony_ci /// character is only permitted to match ASCII text. 157c67d6573Sopenharmony_ci ClassBytes(hir::ClassBytes), 158c67d6573Sopenharmony_ci /// This is pushed on to the stack upon first seeing any kind of group, 159c67d6573Sopenharmony_ci /// indicated by parentheses (including non-capturing groups). It is popped 160c67d6573Sopenharmony_ci /// upon leaving a group. 161c67d6573Sopenharmony_ci Group { 162c67d6573Sopenharmony_ci /// The old active flags when this group was opened. 163c67d6573Sopenharmony_ci /// 164c67d6573Sopenharmony_ci /// If this group sets flags, then the new active flags are set to the 165c67d6573Sopenharmony_ci /// result of merging the old flags with the flags introduced by this 166c67d6573Sopenharmony_ci /// group. If the group doesn't set any flags, then this is simply 167c67d6573Sopenharmony_ci /// equivalent to whatever flags were set when the group was opened. 168c67d6573Sopenharmony_ci /// 169c67d6573Sopenharmony_ci /// When this group is popped, the active flags should be restored to 170c67d6573Sopenharmony_ci /// the flags set here. 171c67d6573Sopenharmony_ci /// 172c67d6573Sopenharmony_ci /// The "active" flags correspond to whatever flags are set in the 173c67d6573Sopenharmony_ci /// Translator. 174c67d6573Sopenharmony_ci old_flags: Flags, 175c67d6573Sopenharmony_ci }, 176c67d6573Sopenharmony_ci /// This is pushed whenever a concatenation is observed. After visiting 177c67d6573Sopenharmony_ci /// every sub-expression in the concatenation, the translator's stack is 178c67d6573Sopenharmony_ci /// popped until it sees a Concat frame. 179c67d6573Sopenharmony_ci Concat, 180c67d6573Sopenharmony_ci /// This is pushed whenever an alternation is observed. After visiting 181c67d6573Sopenharmony_ci /// every sub-expression in the alternation, the translator's stack is 182c67d6573Sopenharmony_ci /// popped until it sees an Alternation frame. 183c67d6573Sopenharmony_ci Alternation, 184c67d6573Sopenharmony_ci} 185c67d6573Sopenharmony_ci 186c67d6573Sopenharmony_ciimpl HirFrame { 187c67d6573Sopenharmony_ci /// Assert that the current stack frame is an Hir expression and return it. 188c67d6573Sopenharmony_ci fn unwrap_expr(self) -> Hir { 189c67d6573Sopenharmony_ci match self { 190c67d6573Sopenharmony_ci HirFrame::Expr(expr) => expr, 191c67d6573Sopenharmony_ci _ => panic!("tried to unwrap expr from HirFrame, got: {:?}", self), 192c67d6573Sopenharmony_ci } 193c67d6573Sopenharmony_ci } 194c67d6573Sopenharmony_ci 195c67d6573Sopenharmony_ci /// Assert that the current stack frame is a Unicode class expression and 196c67d6573Sopenharmony_ci /// return it. 197c67d6573Sopenharmony_ci fn unwrap_class_unicode(self) -> hir::ClassUnicode { 198c67d6573Sopenharmony_ci match self { 199c67d6573Sopenharmony_ci HirFrame::ClassUnicode(cls) => cls, 200c67d6573Sopenharmony_ci _ => panic!( 201c67d6573Sopenharmony_ci "tried to unwrap Unicode class \ 202c67d6573Sopenharmony_ci from HirFrame, got: {:?}", 203c67d6573Sopenharmony_ci self 204c67d6573Sopenharmony_ci ), 205c67d6573Sopenharmony_ci } 206c67d6573Sopenharmony_ci } 207c67d6573Sopenharmony_ci 208c67d6573Sopenharmony_ci /// Assert that the current stack frame is a byte class expression and 209c67d6573Sopenharmony_ci /// return it. 210c67d6573Sopenharmony_ci fn unwrap_class_bytes(self) -> hir::ClassBytes { 211c67d6573Sopenharmony_ci match self { 212c67d6573Sopenharmony_ci HirFrame::ClassBytes(cls) => cls, 213c67d6573Sopenharmony_ci _ => panic!( 214c67d6573Sopenharmony_ci "tried to unwrap byte class \ 215c67d6573Sopenharmony_ci from HirFrame, got: {:?}", 216c67d6573Sopenharmony_ci self 217c67d6573Sopenharmony_ci ), 218c67d6573Sopenharmony_ci } 219c67d6573Sopenharmony_ci } 220c67d6573Sopenharmony_ci 221c67d6573Sopenharmony_ci /// Assert that the current stack frame is a group indicator and return 222c67d6573Sopenharmony_ci /// its corresponding flags (the flags that were active at the time the 223c67d6573Sopenharmony_ci /// group was entered). 224c67d6573Sopenharmony_ci fn unwrap_group(self) -> Flags { 225c67d6573Sopenharmony_ci match self { 226c67d6573Sopenharmony_ci HirFrame::Group { old_flags } => old_flags, 227c67d6573Sopenharmony_ci _ => { 228c67d6573Sopenharmony_ci panic!("tried to unwrap group from HirFrame, got: {:?}", self) 229c67d6573Sopenharmony_ci } 230c67d6573Sopenharmony_ci } 231c67d6573Sopenharmony_ci } 232c67d6573Sopenharmony_ci} 233c67d6573Sopenharmony_ci 234c67d6573Sopenharmony_ciimpl<'t, 'p> Visitor for TranslatorI<'t, 'p> { 235c67d6573Sopenharmony_ci type Output = Hir; 236c67d6573Sopenharmony_ci type Err = Error; 237c67d6573Sopenharmony_ci 238c67d6573Sopenharmony_ci fn finish(self) -> Result<Hir> { 239c67d6573Sopenharmony_ci // ... otherwise, we should have exactly one HIR on the stack. 240c67d6573Sopenharmony_ci assert_eq!(self.trans().stack.borrow().len(), 1); 241c67d6573Sopenharmony_ci Ok(self.pop().unwrap().unwrap_expr()) 242c67d6573Sopenharmony_ci } 243c67d6573Sopenharmony_ci 244c67d6573Sopenharmony_ci fn visit_pre(&mut self, ast: &Ast) -> Result<()> { 245c67d6573Sopenharmony_ci match *ast { 246c67d6573Sopenharmony_ci Ast::Class(ast::Class::Bracketed(_)) => { 247c67d6573Sopenharmony_ci if self.flags().unicode() { 248c67d6573Sopenharmony_ci let cls = hir::ClassUnicode::empty(); 249c67d6573Sopenharmony_ci self.push(HirFrame::ClassUnicode(cls)); 250c67d6573Sopenharmony_ci } else { 251c67d6573Sopenharmony_ci let cls = hir::ClassBytes::empty(); 252c67d6573Sopenharmony_ci self.push(HirFrame::ClassBytes(cls)); 253c67d6573Sopenharmony_ci } 254c67d6573Sopenharmony_ci } 255c67d6573Sopenharmony_ci Ast::Group(ref x) => { 256c67d6573Sopenharmony_ci let old_flags = x 257c67d6573Sopenharmony_ci .flags() 258c67d6573Sopenharmony_ci .map(|ast| self.set_flags(ast)) 259c67d6573Sopenharmony_ci .unwrap_or_else(|| self.flags()); 260c67d6573Sopenharmony_ci self.push(HirFrame::Group { old_flags }); 261c67d6573Sopenharmony_ci } 262c67d6573Sopenharmony_ci Ast::Concat(ref x) if x.asts.is_empty() => {} 263c67d6573Sopenharmony_ci Ast::Concat(_) => { 264c67d6573Sopenharmony_ci self.push(HirFrame::Concat); 265c67d6573Sopenharmony_ci } 266c67d6573Sopenharmony_ci Ast::Alternation(ref x) if x.asts.is_empty() => {} 267c67d6573Sopenharmony_ci Ast::Alternation(_) => { 268c67d6573Sopenharmony_ci self.push(HirFrame::Alternation); 269c67d6573Sopenharmony_ci } 270c67d6573Sopenharmony_ci _ => {} 271c67d6573Sopenharmony_ci } 272c67d6573Sopenharmony_ci Ok(()) 273c67d6573Sopenharmony_ci } 274c67d6573Sopenharmony_ci 275c67d6573Sopenharmony_ci fn visit_post(&mut self, ast: &Ast) -> Result<()> { 276c67d6573Sopenharmony_ci match *ast { 277c67d6573Sopenharmony_ci Ast::Empty(_) => { 278c67d6573Sopenharmony_ci self.push(HirFrame::Expr(Hir::empty())); 279c67d6573Sopenharmony_ci } 280c67d6573Sopenharmony_ci Ast::Flags(ref x) => { 281c67d6573Sopenharmony_ci self.set_flags(&x.flags); 282c67d6573Sopenharmony_ci // Flags in the AST are generally considered directives and 283c67d6573Sopenharmony_ci // not actual sub-expressions. However, they can be used in 284c67d6573Sopenharmony_ci // the concrete syntax like `((?i))`, and we need some kind of 285c67d6573Sopenharmony_ci // indication of an expression there, and Empty is the correct 286c67d6573Sopenharmony_ci // choice. 287c67d6573Sopenharmony_ci // 288c67d6573Sopenharmony_ci // There can also be things like `(?i)+`, but we rule those out 289c67d6573Sopenharmony_ci // in the parser. In the future, we might allow them for 290c67d6573Sopenharmony_ci // consistency sake. 291c67d6573Sopenharmony_ci self.push(HirFrame::Expr(Hir::empty())); 292c67d6573Sopenharmony_ci } 293c67d6573Sopenharmony_ci Ast::Literal(ref x) => { 294c67d6573Sopenharmony_ci self.push(HirFrame::Expr(self.hir_literal(x)?)); 295c67d6573Sopenharmony_ci } 296c67d6573Sopenharmony_ci Ast::Dot(span) => { 297c67d6573Sopenharmony_ci self.push(HirFrame::Expr(self.hir_dot(span)?)); 298c67d6573Sopenharmony_ci } 299c67d6573Sopenharmony_ci Ast::Assertion(ref x) => { 300c67d6573Sopenharmony_ci self.push(HirFrame::Expr(self.hir_assertion(x)?)); 301c67d6573Sopenharmony_ci } 302c67d6573Sopenharmony_ci Ast::Class(ast::Class::Perl(ref x)) => { 303c67d6573Sopenharmony_ci if self.flags().unicode() { 304c67d6573Sopenharmony_ci let cls = self.hir_perl_unicode_class(x)?; 305c67d6573Sopenharmony_ci let hcls = hir::Class::Unicode(cls); 306c67d6573Sopenharmony_ci self.push(HirFrame::Expr(Hir::class(hcls))); 307c67d6573Sopenharmony_ci } else { 308c67d6573Sopenharmony_ci let cls = self.hir_perl_byte_class(x); 309c67d6573Sopenharmony_ci let hcls = hir::Class::Bytes(cls); 310c67d6573Sopenharmony_ci self.push(HirFrame::Expr(Hir::class(hcls))); 311c67d6573Sopenharmony_ci } 312c67d6573Sopenharmony_ci } 313c67d6573Sopenharmony_ci Ast::Class(ast::Class::Unicode(ref x)) => { 314c67d6573Sopenharmony_ci let cls = hir::Class::Unicode(self.hir_unicode_class(x)?); 315c67d6573Sopenharmony_ci self.push(HirFrame::Expr(Hir::class(cls))); 316c67d6573Sopenharmony_ci } 317c67d6573Sopenharmony_ci Ast::Class(ast::Class::Bracketed(ref ast)) => { 318c67d6573Sopenharmony_ci if self.flags().unicode() { 319c67d6573Sopenharmony_ci let mut cls = self.pop().unwrap().unwrap_class_unicode(); 320c67d6573Sopenharmony_ci self.unicode_fold_and_negate( 321c67d6573Sopenharmony_ci &ast.span, 322c67d6573Sopenharmony_ci ast.negated, 323c67d6573Sopenharmony_ci &mut cls, 324c67d6573Sopenharmony_ci )?; 325c67d6573Sopenharmony_ci if cls.ranges().is_empty() { 326c67d6573Sopenharmony_ci return Err(self.error( 327c67d6573Sopenharmony_ci ast.span, 328c67d6573Sopenharmony_ci ErrorKind::EmptyClassNotAllowed, 329c67d6573Sopenharmony_ci )); 330c67d6573Sopenharmony_ci } 331c67d6573Sopenharmony_ci let expr = Hir::class(hir::Class::Unicode(cls)); 332c67d6573Sopenharmony_ci self.push(HirFrame::Expr(expr)); 333c67d6573Sopenharmony_ci } else { 334c67d6573Sopenharmony_ci let mut cls = self.pop().unwrap().unwrap_class_bytes(); 335c67d6573Sopenharmony_ci self.bytes_fold_and_negate( 336c67d6573Sopenharmony_ci &ast.span, 337c67d6573Sopenharmony_ci ast.negated, 338c67d6573Sopenharmony_ci &mut cls, 339c67d6573Sopenharmony_ci )?; 340c67d6573Sopenharmony_ci if cls.ranges().is_empty() { 341c67d6573Sopenharmony_ci return Err(self.error( 342c67d6573Sopenharmony_ci ast.span, 343c67d6573Sopenharmony_ci ErrorKind::EmptyClassNotAllowed, 344c67d6573Sopenharmony_ci )); 345c67d6573Sopenharmony_ci } 346c67d6573Sopenharmony_ci 347c67d6573Sopenharmony_ci let expr = Hir::class(hir::Class::Bytes(cls)); 348c67d6573Sopenharmony_ci self.push(HirFrame::Expr(expr)); 349c67d6573Sopenharmony_ci } 350c67d6573Sopenharmony_ci } 351c67d6573Sopenharmony_ci Ast::Repetition(ref x) => { 352c67d6573Sopenharmony_ci let expr = self.pop().unwrap().unwrap_expr(); 353c67d6573Sopenharmony_ci self.push(HirFrame::Expr(self.hir_repetition(x, expr))); 354c67d6573Sopenharmony_ci } 355c67d6573Sopenharmony_ci Ast::Group(ref x) => { 356c67d6573Sopenharmony_ci let expr = self.pop().unwrap().unwrap_expr(); 357c67d6573Sopenharmony_ci let old_flags = self.pop().unwrap().unwrap_group(); 358c67d6573Sopenharmony_ci self.trans().flags.set(old_flags); 359c67d6573Sopenharmony_ci self.push(HirFrame::Expr(self.hir_group(x, expr))); 360c67d6573Sopenharmony_ci } 361c67d6573Sopenharmony_ci Ast::Concat(_) => { 362c67d6573Sopenharmony_ci let mut exprs = vec![]; 363c67d6573Sopenharmony_ci while let Some(HirFrame::Expr(expr)) = self.pop() { 364c67d6573Sopenharmony_ci if !expr.kind().is_empty() { 365c67d6573Sopenharmony_ci exprs.push(expr); 366c67d6573Sopenharmony_ci } 367c67d6573Sopenharmony_ci } 368c67d6573Sopenharmony_ci exprs.reverse(); 369c67d6573Sopenharmony_ci self.push(HirFrame::Expr(Hir::concat(exprs))); 370c67d6573Sopenharmony_ci } 371c67d6573Sopenharmony_ci Ast::Alternation(_) => { 372c67d6573Sopenharmony_ci let mut exprs = vec![]; 373c67d6573Sopenharmony_ci while let Some(HirFrame::Expr(expr)) = self.pop() { 374c67d6573Sopenharmony_ci exprs.push(expr); 375c67d6573Sopenharmony_ci } 376c67d6573Sopenharmony_ci exprs.reverse(); 377c67d6573Sopenharmony_ci self.push(HirFrame::Expr(Hir::alternation(exprs))); 378c67d6573Sopenharmony_ci } 379c67d6573Sopenharmony_ci } 380c67d6573Sopenharmony_ci Ok(()) 381c67d6573Sopenharmony_ci } 382c67d6573Sopenharmony_ci 383c67d6573Sopenharmony_ci fn visit_class_set_item_pre( 384c67d6573Sopenharmony_ci &mut self, 385c67d6573Sopenharmony_ci ast: &ast::ClassSetItem, 386c67d6573Sopenharmony_ci ) -> Result<()> { 387c67d6573Sopenharmony_ci match *ast { 388c67d6573Sopenharmony_ci ast::ClassSetItem::Bracketed(_) => { 389c67d6573Sopenharmony_ci if self.flags().unicode() { 390c67d6573Sopenharmony_ci let cls = hir::ClassUnicode::empty(); 391c67d6573Sopenharmony_ci self.push(HirFrame::ClassUnicode(cls)); 392c67d6573Sopenharmony_ci } else { 393c67d6573Sopenharmony_ci let cls = hir::ClassBytes::empty(); 394c67d6573Sopenharmony_ci self.push(HirFrame::ClassBytes(cls)); 395c67d6573Sopenharmony_ci } 396c67d6573Sopenharmony_ci } 397c67d6573Sopenharmony_ci // We needn't handle the Union case here since the visitor will 398c67d6573Sopenharmony_ci // do it for us. 399c67d6573Sopenharmony_ci _ => {} 400c67d6573Sopenharmony_ci } 401c67d6573Sopenharmony_ci Ok(()) 402c67d6573Sopenharmony_ci } 403c67d6573Sopenharmony_ci 404c67d6573Sopenharmony_ci fn visit_class_set_item_post( 405c67d6573Sopenharmony_ci &mut self, 406c67d6573Sopenharmony_ci ast: &ast::ClassSetItem, 407c67d6573Sopenharmony_ci ) -> Result<()> { 408c67d6573Sopenharmony_ci match *ast { 409c67d6573Sopenharmony_ci ast::ClassSetItem::Empty(_) => {} 410c67d6573Sopenharmony_ci ast::ClassSetItem::Literal(ref x) => { 411c67d6573Sopenharmony_ci if self.flags().unicode() { 412c67d6573Sopenharmony_ci let mut cls = self.pop().unwrap().unwrap_class_unicode(); 413c67d6573Sopenharmony_ci cls.push(hir::ClassUnicodeRange::new(x.c, x.c)); 414c67d6573Sopenharmony_ci self.push(HirFrame::ClassUnicode(cls)); 415c67d6573Sopenharmony_ci } else { 416c67d6573Sopenharmony_ci let mut cls = self.pop().unwrap().unwrap_class_bytes(); 417c67d6573Sopenharmony_ci let byte = self.class_literal_byte(x)?; 418c67d6573Sopenharmony_ci cls.push(hir::ClassBytesRange::new(byte, byte)); 419c67d6573Sopenharmony_ci self.push(HirFrame::ClassBytes(cls)); 420c67d6573Sopenharmony_ci } 421c67d6573Sopenharmony_ci } 422c67d6573Sopenharmony_ci ast::ClassSetItem::Range(ref x) => { 423c67d6573Sopenharmony_ci if self.flags().unicode() { 424c67d6573Sopenharmony_ci let mut cls = self.pop().unwrap().unwrap_class_unicode(); 425c67d6573Sopenharmony_ci cls.push(hir::ClassUnicodeRange::new(x.start.c, x.end.c)); 426c67d6573Sopenharmony_ci self.push(HirFrame::ClassUnicode(cls)); 427c67d6573Sopenharmony_ci } else { 428c67d6573Sopenharmony_ci let mut cls = self.pop().unwrap().unwrap_class_bytes(); 429c67d6573Sopenharmony_ci let start = self.class_literal_byte(&x.start)?; 430c67d6573Sopenharmony_ci let end = self.class_literal_byte(&x.end)?; 431c67d6573Sopenharmony_ci cls.push(hir::ClassBytesRange::new(start, end)); 432c67d6573Sopenharmony_ci self.push(HirFrame::ClassBytes(cls)); 433c67d6573Sopenharmony_ci } 434c67d6573Sopenharmony_ci } 435c67d6573Sopenharmony_ci ast::ClassSetItem::Ascii(ref x) => { 436c67d6573Sopenharmony_ci if self.flags().unicode() { 437c67d6573Sopenharmony_ci let xcls = self.hir_ascii_unicode_class(x)?; 438c67d6573Sopenharmony_ci let mut cls = self.pop().unwrap().unwrap_class_unicode(); 439c67d6573Sopenharmony_ci cls.union(&xcls); 440c67d6573Sopenharmony_ci self.push(HirFrame::ClassUnicode(cls)); 441c67d6573Sopenharmony_ci } else { 442c67d6573Sopenharmony_ci let xcls = self.hir_ascii_byte_class(x)?; 443c67d6573Sopenharmony_ci let mut cls = self.pop().unwrap().unwrap_class_bytes(); 444c67d6573Sopenharmony_ci cls.union(&xcls); 445c67d6573Sopenharmony_ci self.push(HirFrame::ClassBytes(cls)); 446c67d6573Sopenharmony_ci } 447c67d6573Sopenharmony_ci } 448c67d6573Sopenharmony_ci ast::ClassSetItem::Unicode(ref x) => { 449c67d6573Sopenharmony_ci let xcls = self.hir_unicode_class(x)?; 450c67d6573Sopenharmony_ci let mut cls = self.pop().unwrap().unwrap_class_unicode(); 451c67d6573Sopenharmony_ci cls.union(&xcls); 452c67d6573Sopenharmony_ci self.push(HirFrame::ClassUnicode(cls)); 453c67d6573Sopenharmony_ci } 454c67d6573Sopenharmony_ci ast::ClassSetItem::Perl(ref x) => { 455c67d6573Sopenharmony_ci if self.flags().unicode() { 456c67d6573Sopenharmony_ci let xcls = self.hir_perl_unicode_class(x)?; 457c67d6573Sopenharmony_ci let mut cls = self.pop().unwrap().unwrap_class_unicode(); 458c67d6573Sopenharmony_ci cls.union(&xcls); 459c67d6573Sopenharmony_ci self.push(HirFrame::ClassUnicode(cls)); 460c67d6573Sopenharmony_ci } else { 461c67d6573Sopenharmony_ci let xcls = self.hir_perl_byte_class(x); 462c67d6573Sopenharmony_ci let mut cls = self.pop().unwrap().unwrap_class_bytes(); 463c67d6573Sopenharmony_ci cls.union(&xcls); 464c67d6573Sopenharmony_ci self.push(HirFrame::ClassBytes(cls)); 465c67d6573Sopenharmony_ci } 466c67d6573Sopenharmony_ci } 467c67d6573Sopenharmony_ci ast::ClassSetItem::Bracketed(ref ast) => { 468c67d6573Sopenharmony_ci if self.flags().unicode() { 469c67d6573Sopenharmony_ci let mut cls1 = self.pop().unwrap().unwrap_class_unicode(); 470c67d6573Sopenharmony_ci self.unicode_fold_and_negate( 471c67d6573Sopenharmony_ci &ast.span, 472c67d6573Sopenharmony_ci ast.negated, 473c67d6573Sopenharmony_ci &mut cls1, 474c67d6573Sopenharmony_ci )?; 475c67d6573Sopenharmony_ci 476c67d6573Sopenharmony_ci let mut cls2 = self.pop().unwrap().unwrap_class_unicode(); 477c67d6573Sopenharmony_ci cls2.union(&cls1); 478c67d6573Sopenharmony_ci self.push(HirFrame::ClassUnicode(cls2)); 479c67d6573Sopenharmony_ci } else { 480c67d6573Sopenharmony_ci let mut cls1 = self.pop().unwrap().unwrap_class_bytes(); 481c67d6573Sopenharmony_ci self.bytes_fold_and_negate( 482c67d6573Sopenharmony_ci &ast.span, 483c67d6573Sopenharmony_ci ast.negated, 484c67d6573Sopenharmony_ci &mut cls1, 485c67d6573Sopenharmony_ci )?; 486c67d6573Sopenharmony_ci 487c67d6573Sopenharmony_ci let mut cls2 = self.pop().unwrap().unwrap_class_bytes(); 488c67d6573Sopenharmony_ci cls2.union(&cls1); 489c67d6573Sopenharmony_ci self.push(HirFrame::ClassBytes(cls2)); 490c67d6573Sopenharmony_ci } 491c67d6573Sopenharmony_ci } 492c67d6573Sopenharmony_ci // This is handled automatically by the visitor. 493c67d6573Sopenharmony_ci ast::ClassSetItem::Union(_) => {} 494c67d6573Sopenharmony_ci } 495c67d6573Sopenharmony_ci Ok(()) 496c67d6573Sopenharmony_ci } 497c67d6573Sopenharmony_ci 498c67d6573Sopenharmony_ci fn visit_class_set_binary_op_pre( 499c67d6573Sopenharmony_ci &mut self, 500c67d6573Sopenharmony_ci _op: &ast::ClassSetBinaryOp, 501c67d6573Sopenharmony_ci ) -> Result<()> { 502c67d6573Sopenharmony_ci if self.flags().unicode() { 503c67d6573Sopenharmony_ci let cls = hir::ClassUnicode::empty(); 504c67d6573Sopenharmony_ci self.push(HirFrame::ClassUnicode(cls)); 505c67d6573Sopenharmony_ci } else { 506c67d6573Sopenharmony_ci let cls = hir::ClassBytes::empty(); 507c67d6573Sopenharmony_ci self.push(HirFrame::ClassBytes(cls)); 508c67d6573Sopenharmony_ci } 509c67d6573Sopenharmony_ci Ok(()) 510c67d6573Sopenharmony_ci } 511c67d6573Sopenharmony_ci 512c67d6573Sopenharmony_ci fn visit_class_set_binary_op_in( 513c67d6573Sopenharmony_ci &mut self, 514c67d6573Sopenharmony_ci _op: &ast::ClassSetBinaryOp, 515c67d6573Sopenharmony_ci ) -> Result<()> { 516c67d6573Sopenharmony_ci if self.flags().unicode() { 517c67d6573Sopenharmony_ci let cls = hir::ClassUnicode::empty(); 518c67d6573Sopenharmony_ci self.push(HirFrame::ClassUnicode(cls)); 519c67d6573Sopenharmony_ci } else { 520c67d6573Sopenharmony_ci let cls = hir::ClassBytes::empty(); 521c67d6573Sopenharmony_ci self.push(HirFrame::ClassBytes(cls)); 522c67d6573Sopenharmony_ci } 523c67d6573Sopenharmony_ci Ok(()) 524c67d6573Sopenharmony_ci } 525c67d6573Sopenharmony_ci 526c67d6573Sopenharmony_ci fn visit_class_set_binary_op_post( 527c67d6573Sopenharmony_ci &mut self, 528c67d6573Sopenharmony_ci op: &ast::ClassSetBinaryOp, 529c67d6573Sopenharmony_ci ) -> Result<()> { 530c67d6573Sopenharmony_ci use crate::ast::ClassSetBinaryOpKind::*; 531c67d6573Sopenharmony_ci 532c67d6573Sopenharmony_ci if self.flags().unicode() { 533c67d6573Sopenharmony_ci let mut rhs = self.pop().unwrap().unwrap_class_unicode(); 534c67d6573Sopenharmony_ci let mut lhs = self.pop().unwrap().unwrap_class_unicode(); 535c67d6573Sopenharmony_ci let mut cls = self.pop().unwrap().unwrap_class_unicode(); 536c67d6573Sopenharmony_ci if self.flags().case_insensitive() { 537c67d6573Sopenharmony_ci rhs.try_case_fold_simple().map_err(|_| { 538c67d6573Sopenharmony_ci self.error( 539c67d6573Sopenharmony_ci op.rhs.span().clone(), 540c67d6573Sopenharmony_ci ErrorKind::UnicodeCaseUnavailable, 541c67d6573Sopenharmony_ci ) 542c67d6573Sopenharmony_ci })?; 543c67d6573Sopenharmony_ci lhs.try_case_fold_simple().map_err(|_| { 544c67d6573Sopenharmony_ci self.error( 545c67d6573Sopenharmony_ci op.lhs.span().clone(), 546c67d6573Sopenharmony_ci ErrorKind::UnicodeCaseUnavailable, 547c67d6573Sopenharmony_ci ) 548c67d6573Sopenharmony_ci })?; 549c67d6573Sopenharmony_ci } 550c67d6573Sopenharmony_ci match op.kind { 551c67d6573Sopenharmony_ci Intersection => lhs.intersect(&rhs), 552c67d6573Sopenharmony_ci Difference => lhs.difference(&rhs), 553c67d6573Sopenharmony_ci SymmetricDifference => lhs.symmetric_difference(&rhs), 554c67d6573Sopenharmony_ci } 555c67d6573Sopenharmony_ci cls.union(&lhs); 556c67d6573Sopenharmony_ci self.push(HirFrame::ClassUnicode(cls)); 557c67d6573Sopenharmony_ci } else { 558c67d6573Sopenharmony_ci let mut rhs = self.pop().unwrap().unwrap_class_bytes(); 559c67d6573Sopenharmony_ci let mut lhs = self.pop().unwrap().unwrap_class_bytes(); 560c67d6573Sopenharmony_ci let mut cls = self.pop().unwrap().unwrap_class_bytes(); 561c67d6573Sopenharmony_ci if self.flags().case_insensitive() { 562c67d6573Sopenharmony_ci rhs.case_fold_simple(); 563c67d6573Sopenharmony_ci lhs.case_fold_simple(); 564c67d6573Sopenharmony_ci } 565c67d6573Sopenharmony_ci match op.kind { 566c67d6573Sopenharmony_ci Intersection => lhs.intersect(&rhs), 567c67d6573Sopenharmony_ci Difference => lhs.difference(&rhs), 568c67d6573Sopenharmony_ci SymmetricDifference => lhs.symmetric_difference(&rhs), 569c67d6573Sopenharmony_ci } 570c67d6573Sopenharmony_ci cls.union(&lhs); 571c67d6573Sopenharmony_ci self.push(HirFrame::ClassBytes(cls)); 572c67d6573Sopenharmony_ci } 573c67d6573Sopenharmony_ci Ok(()) 574c67d6573Sopenharmony_ci } 575c67d6573Sopenharmony_ci} 576c67d6573Sopenharmony_ci 577c67d6573Sopenharmony_ci/// The internal implementation of a translator. 578c67d6573Sopenharmony_ci/// 579c67d6573Sopenharmony_ci/// This type is responsible for carrying around the original pattern string, 580c67d6573Sopenharmony_ci/// which is not tied to the internal state of a translator. 581c67d6573Sopenharmony_ci/// 582c67d6573Sopenharmony_ci/// A TranslatorI exists for the time it takes to translate a single Ast. 583c67d6573Sopenharmony_ci#[derive(Clone, Debug)] 584c67d6573Sopenharmony_cistruct TranslatorI<'t, 'p> { 585c67d6573Sopenharmony_ci trans: &'t Translator, 586c67d6573Sopenharmony_ci pattern: &'p str, 587c67d6573Sopenharmony_ci} 588c67d6573Sopenharmony_ci 589c67d6573Sopenharmony_ciimpl<'t, 'p> TranslatorI<'t, 'p> { 590c67d6573Sopenharmony_ci /// Build a new internal translator. 591c67d6573Sopenharmony_ci fn new(trans: &'t Translator, pattern: &'p str) -> TranslatorI<'t, 'p> { 592c67d6573Sopenharmony_ci TranslatorI { trans, pattern } 593c67d6573Sopenharmony_ci } 594c67d6573Sopenharmony_ci 595c67d6573Sopenharmony_ci /// Return a reference to the underlying translator. 596c67d6573Sopenharmony_ci fn trans(&self) -> &Translator { 597c67d6573Sopenharmony_ci &self.trans 598c67d6573Sopenharmony_ci } 599c67d6573Sopenharmony_ci 600c67d6573Sopenharmony_ci /// Push the given frame on to the call stack. 601c67d6573Sopenharmony_ci fn push(&self, frame: HirFrame) { 602c67d6573Sopenharmony_ci self.trans().stack.borrow_mut().push(frame); 603c67d6573Sopenharmony_ci } 604c67d6573Sopenharmony_ci 605c67d6573Sopenharmony_ci /// Pop the top of the call stack. If the call stack is empty, return None. 606c67d6573Sopenharmony_ci fn pop(&self) -> Option<HirFrame> { 607c67d6573Sopenharmony_ci self.trans().stack.borrow_mut().pop() 608c67d6573Sopenharmony_ci } 609c67d6573Sopenharmony_ci 610c67d6573Sopenharmony_ci /// Create a new error with the given span and error type. 611c67d6573Sopenharmony_ci fn error(&self, span: Span, kind: ErrorKind) -> Error { 612c67d6573Sopenharmony_ci Error { kind, pattern: self.pattern.to_string(), span } 613c67d6573Sopenharmony_ci } 614c67d6573Sopenharmony_ci 615c67d6573Sopenharmony_ci /// Return a copy of the active flags. 616c67d6573Sopenharmony_ci fn flags(&self) -> Flags { 617c67d6573Sopenharmony_ci self.trans().flags.get() 618c67d6573Sopenharmony_ci } 619c67d6573Sopenharmony_ci 620c67d6573Sopenharmony_ci /// Set the flags of this translator from the flags set in the given AST. 621c67d6573Sopenharmony_ci /// Then, return the old flags. 622c67d6573Sopenharmony_ci fn set_flags(&self, ast_flags: &ast::Flags) -> Flags { 623c67d6573Sopenharmony_ci let old_flags = self.flags(); 624c67d6573Sopenharmony_ci let mut new_flags = Flags::from_ast(ast_flags); 625c67d6573Sopenharmony_ci new_flags.merge(&old_flags); 626c67d6573Sopenharmony_ci self.trans().flags.set(new_flags); 627c67d6573Sopenharmony_ci old_flags 628c67d6573Sopenharmony_ci } 629c67d6573Sopenharmony_ci 630c67d6573Sopenharmony_ci fn hir_literal(&self, lit: &ast::Literal) -> Result<Hir> { 631c67d6573Sopenharmony_ci let ch = match self.literal_to_char(lit)? { 632c67d6573Sopenharmony_ci byte @ hir::Literal::Byte(_) => return Ok(Hir::literal(byte)), 633c67d6573Sopenharmony_ci hir::Literal::Unicode(ch) => ch, 634c67d6573Sopenharmony_ci }; 635c67d6573Sopenharmony_ci if self.flags().case_insensitive() { 636c67d6573Sopenharmony_ci self.hir_from_char_case_insensitive(lit.span, ch) 637c67d6573Sopenharmony_ci } else { 638c67d6573Sopenharmony_ci self.hir_from_char(lit.span, ch) 639c67d6573Sopenharmony_ci } 640c67d6573Sopenharmony_ci } 641c67d6573Sopenharmony_ci 642c67d6573Sopenharmony_ci /// Convert an Ast literal to its scalar representation. 643c67d6573Sopenharmony_ci /// 644c67d6573Sopenharmony_ci /// When Unicode mode is enabled, then this always succeeds and returns a 645c67d6573Sopenharmony_ci /// `char` (Unicode scalar value). 646c67d6573Sopenharmony_ci /// 647c67d6573Sopenharmony_ci /// When Unicode mode is disabled, then a raw byte is returned. If that 648c67d6573Sopenharmony_ci /// byte is not ASCII and invalid UTF-8 is not allowed, then this returns 649c67d6573Sopenharmony_ci /// an error. 650c67d6573Sopenharmony_ci fn literal_to_char(&self, lit: &ast::Literal) -> Result<hir::Literal> { 651c67d6573Sopenharmony_ci if self.flags().unicode() { 652c67d6573Sopenharmony_ci return Ok(hir::Literal::Unicode(lit.c)); 653c67d6573Sopenharmony_ci } 654c67d6573Sopenharmony_ci let byte = match lit.byte() { 655c67d6573Sopenharmony_ci None => return Ok(hir::Literal::Unicode(lit.c)), 656c67d6573Sopenharmony_ci Some(byte) => byte, 657c67d6573Sopenharmony_ci }; 658c67d6573Sopenharmony_ci if byte <= 0x7F { 659c67d6573Sopenharmony_ci return Ok(hir::Literal::Unicode(byte as char)); 660c67d6573Sopenharmony_ci } 661c67d6573Sopenharmony_ci if !self.trans().allow_invalid_utf8 { 662c67d6573Sopenharmony_ci return Err(self.error(lit.span, ErrorKind::InvalidUtf8)); 663c67d6573Sopenharmony_ci } 664c67d6573Sopenharmony_ci Ok(hir::Literal::Byte(byte)) 665c67d6573Sopenharmony_ci } 666c67d6573Sopenharmony_ci 667c67d6573Sopenharmony_ci fn hir_from_char(&self, span: Span, c: char) -> Result<Hir> { 668c67d6573Sopenharmony_ci if !self.flags().unicode() && c.len_utf8() > 1 { 669c67d6573Sopenharmony_ci return Err(self.error(span, ErrorKind::UnicodeNotAllowed)); 670c67d6573Sopenharmony_ci } 671c67d6573Sopenharmony_ci Ok(Hir::literal(hir::Literal::Unicode(c))) 672c67d6573Sopenharmony_ci } 673c67d6573Sopenharmony_ci 674c67d6573Sopenharmony_ci fn hir_from_char_case_insensitive( 675c67d6573Sopenharmony_ci &self, 676c67d6573Sopenharmony_ci span: Span, 677c67d6573Sopenharmony_ci c: char, 678c67d6573Sopenharmony_ci ) -> Result<Hir> { 679c67d6573Sopenharmony_ci if self.flags().unicode() { 680c67d6573Sopenharmony_ci // If case folding won't do anything, then don't bother trying. 681c67d6573Sopenharmony_ci let map = 682c67d6573Sopenharmony_ci unicode::contains_simple_case_mapping(c, c).map_err(|_| { 683c67d6573Sopenharmony_ci self.error(span, ErrorKind::UnicodeCaseUnavailable) 684c67d6573Sopenharmony_ci })?; 685c67d6573Sopenharmony_ci if !map { 686c67d6573Sopenharmony_ci return self.hir_from_char(span, c); 687c67d6573Sopenharmony_ci } 688c67d6573Sopenharmony_ci let mut cls = 689c67d6573Sopenharmony_ci hir::ClassUnicode::new(vec![hir::ClassUnicodeRange::new( 690c67d6573Sopenharmony_ci c, c, 691c67d6573Sopenharmony_ci )]); 692c67d6573Sopenharmony_ci cls.try_case_fold_simple().map_err(|_| { 693c67d6573Sopenharmony_ci self.error(span, ErrorKind::UnicodeCaseUnavailable) 694c67d6573Sopenharmony_ci })?; 695c67d6573Sopenharmony_ci Ok(Hir::class(hir::Class::Unicode(cls))) 696c67d6573Sopenharmony_ci } else { 697c67d6573Sopenharmony_ci if c.len_utf8() > 1 { 698c67d6573Sopenharmony_ci return Err(self.error(span, ErrorKind::UnicodeNotAllowed)); 699c67d6573Sopenharmony_ci } 700c67d6573Sopenharmony_ci // If case folding won't do anything, then don't bother trying. 701c67d6573Sopenharmony_ci match c { 702c67d6573Sopenharmony_ci 'A'..='Z' | 'a'..='z' => {} 703c67d6573Sopenharmony_ci _ => return self.hir_from_char(span, c), 704c67d6573Sopenharmony_ci } 705c67d6573Sopenharmony_ci let mut cls = 706c67d6573Sopenharmony_ci hir::ClassBytes::new(vec![hir::ClassBytesRange::new( 707c67d6573Sopenharmony_ci c as u8, c as u8, 708c67d6573Sopenharmony_ci )]); 709c67d6573Sopenharmony_ci cls.case_fold_simple(); 710c67d6573Sopenharmony_ci Ok(Hir::class(hir::Class::Bytes(cls))) 711c67d6573Sopenharmony_ci } 712c67d6573Sopenharmony_ci } 713c67d6573Sopenharmony_ci 714c67d6573Sopenharmony_ci fn hir_dot(&self, span: Span) -> Result<Hir> { 715c67d6573Sopenharmony_ci let unicode = self.flags().unicode(); 716c67d6573Sopenharmony_ci if !unicode && !self.trans().allow_invalid_utf8 { 717c67d6573Sopenharmony_ci return Err(self.error(span, ErrorKind::InvalidUtf8)); 718c67d6573Sopenharmony_ci } 719c67d6573Sopenharmony_ci Ok(if self.flags().dot_matches_new_line() { 720c67d6573Sopenharmony_ci Hir::any(!unicode) 721c67d6573Sopenharmony_ci } else { 722c67d6573Sopenharmony_ci Hir::dot(!unicode) 723c67d6573Sopenharmony_ci }) 724c67d6573Sopenharmony_ci } 725c67d6573Sopenharmony_ci 726c67d6573Sopenharmony_ci fn hir_assertion(&self, asst: &ast::Assertion) -> Result<Hir> { 727c67d6573Sopenharmony_ci let unicode = self.flags().unicode(); 728c67d6573Sopenharmony_ci let multi_line = self.flags().multi_line(); 729c67d6573Sopenharmony_ci Ok(match asst.kind { 730c67d6573Sopenharmony_ci ast::AssertionKind::StartLine => Hir::anchor(if multi_line { 731c67d6573Sopenharmony_ci hir::Anchor::StartLine 732c67d6573Sopenharmony_ci } else { 733c67d6573Sopenharmony_ci hir::Anchor::StartText 734c67d6573Sopenharmony_ci }), 735c67d6573Sopenharmony_ci ast::AssertionKind::EndLine => Hir::anchor(if multi_line { 736c67d6573Sopenharmony_ci hir::Anchor::EndLine 737c67d6573Sopenharmony_ci } else { 738c67d6573Sopenharmony_ci hir::Anchor::EndText 739c67d6573Sopenharmony_ci }), 740c67d6573Sopenharmony_ci ast::AssertionKind::StartText => { 741c67d6573Sopenharmony_ci Hir::anchor(hir::Anchor::StartText) 742c67d6573Sopenharmony_ci } 743c67d6573Sopenharmony_ci ast::AssertionKind::EndText => Hir::anchor(hir::Anchor::EndText), 744c67d6573Sopenharmony_ci ast::AssertionKind::WordBoundary => { 745c67d6573Sopenharmony_ci Hir::word_boundary(if unicode { 746c67d6573Sopenharmony_ci hir::WordBoundary::Unicode 747c67d6573Sopenharmony_ci } else { 748c67d6573Sopenharmony_ci hir::WordBoundary::Ascii 749c67d6573Sopenharmony_ci }) 750c67d6573Sopenharmony_ci } 751c67d6573Sopenharmony_ci ast::AssertionKind::NotWordBoundary => { 752c67d6573Sopenharmony_ci Hir::word_boundary(if unicode { 753c67d6573Sopenharmony_ci hir::WordBoundary::UnicodeNegate 754c67d6573Sopenharmony_ci } else { 755c67d6573Sopenharmony_ci // It is possible for negated ASCII word boundaries to 756c67d6573Sopenharmony_ci // match at invalid UTF-8 boundaries, even when searching 757c67d6573Sopenharmony_ci // valid UTF-8. 758c67d6573Sopenharmony_ci if !self.trans().allow_invalid_utf8 { 759c67d6573Sopenharmony_ci return Err( 760c67d6573Sopenharmony_ci self.error(asst.span, ErrorKind::InvalidUtf8) 761c67d6573Sopenharmony_ci ); 762c67d6573Sopenharmony_ci } 763c67d6573Sopenharmony_ci hir::WordBoundary::AsciiNegate 764c67d6573Sopenharmony_ci }) 765c67d6573Sopenharmony_ci } 766c67d6573Sopenharmony_ci }) 767c67d6573Sopenharmony_ci } 768c67d6573Sopenharmony_ci 769c67d6573Sopenharmony_ci fn hir_group(&self, group: &ast::Group, expr: Hir) -> Hir { 770c67d6573Sopenharmony_ci let kind = match group.kind { 771c67d6573Sopenharmony_ci ast::GroupKind::CaptureIndex(idx) => { 772c67d6573Sopenharmony_ci hir::GroupKind::CaptureIndex(idx) 773c67d6573Sopenharmony_ci } 774c67d6573Sopenharmony_ci ast::GroupKind::CaptureName(ref capname) => { 775c67d6573Sopenharmony_ci hir::GroupKind::CaptureName { 776c67d6573Sopenharmony_ci name: capname.name.clone(), 777c67d6573Sopenharmony_ci index: capname.index, 778c67d6573Sopenharmony_ci } 779c67d6573Sopenharmony_ci } 780c67d6573Sopenharmony_ci ast::GroupKind::NonCapturing(_) => hir::GroupKind::NonCapturing, 781c67d6573Sopenharmony_ci }; 782c67d6573Sopenharmony_ci Hir::group(hir::Group { kind, hir: Box::new(expr) }) 783c67d6573Sopenharmony_ci } 784c67d6573Sopenharmony_ci 785c67d6573Sopenharmony_ci fn hir_repetition(&self, rep: &ast::Repetition, expr: Hir) -> Hir { 786c67d6573Sopenharmony_ci let kind = match rep.op.kind { 787c67d6573Sopenharmony_ci ast::RepetitionKind::ZeroOrOne => hir::RepetitionKind::ZeroOrOne, 788c67d6573Sopenharmony_ci ast::RepetitionKind::ZeroOrMore => hir::RepetitionKind::ZeroOrMore, 789c67d6573Sopenharmony_ci ast::RepetitionKind::OneOrMore => hir::RepetitionKind::OneOrMore, 790c67d6573Sopenharmony_ci ast::RepetitionKind::Range(ast::RepetitionRange::Exactly(m)) => { 791c67d6573Sopenharmony_ci hir::RepetitionKind::Range(hir::RepetitionRange::Exactly(m)) 792c67d6573Sopenharmony_ci } 793c67d6573Sopenharmony_ci ast::RepetitionKind::Range(ast::RepetitionRange::AtLeast(m)) => { 794c67d6573Sopenharmony_ci hir::RepetitionKind::Range(hir::RepetitionRange::AtLeast(m)) 795c67d6573Sopenharmony_ci } 796c67d6573Sopenharmony_ci ast::RepetitionKind::Range(ast::RepetitionRange::Bounded( 797c67d6573Sopenharmony_ci m, 798c67d6573Sopenharmony_ci n, 799c67d6573Sopenharmony_ci )) => { 800c67d6573Sopenharmony_ci hir::RepetitionKind::Range(hir::RepetitionRange::Bounded(m, n)) 801c67d6573Sopenharmony_ci } 802c67d6573Sopenharmony_ci }; 803c67d6573Sopenharmony_ci let greedy = 804c67d6573Sopenharmony_ci if self.flags().swap_greed() { !rep.greedy } else { rep.greedy }; 805c67d6573Sopenharmony_ci Hir::repetition(hir::Repetition { kind, greedy, hir: Box::new(expr) }) 806c67d6573Sopenharmony_ci } 807c67d6573Sopenharmony_ci 808c67d6573Sopenharmony_ci fn hir_unicode_class( 809c67d6573Sopenharmony_ci &self, 810c67d6573Sopenharmony_ci ast_class: &ast::ClassUnicode, 811c67d6573Sopenharmony_ci ) -> Result<hir::ClassUnicode> { 812c67d6573Sopenharmony_ci use crate::ast::ClassUnicodeKind::*; 813c67d6573Sopenharmony_ci 814c67d6573Sopenharmony_ci if !self.flags().unicode() { 815c67d6573Sopenharmony_ci return Err( 816c67d6573Sopenharmony_ci self.error(ast_class.span, ErrorKind::UnicodeNotAllowed) 817c67d6573Sopenharmony_ci ); 818c67d6573Sopenharmony_ci } 819c67d6573Sopenharmony_ci let query = match ast_class.kind { 820c67d6573Sopenharmony_ci OneLetter(name) => ClassQuery::OneLetter(name), 821c67d6573Sopenharmony_ci Named(ref name) => ClassQuery::Binary(name), 822c67d6573Sopenharmony_ci NamedValue { ref name, ref value, .. } => ClassQuery::ByValue { 823c67d6573Sopenharmony_ci property_name: name, 824c67d6573Sopenharmony_ci property_value: value, 825c67d6573Sopenharmony_ci }, 826c67d6573Sopenharmony_ci }; 827c67d6573Sopenharmony_ci let mut result = self.convert_unicode_class_error( 828c67d6573Sopenharmony_ci &ast_class.span, 829c67d6573Sopenharmony_ci unicode::class(query), 830c67d6573Sopenharmony_ci ); 831c67d6573Sopenharmony_ci if let Ok(ref mut class) = result { 832c67d6573Sopenharmony_ci self.unicode_fold_and_negate( 833c67d6573Sopenharmony_ci &ast_class.span, 834c67d6573Sopenharmony_ci ast_class.negated, 835c67d6573Sopenharmony_ci class, 836c67d6573Sopenharmony_ci )?; 837c67d6573Sopenharmony_ci if class.ranges().is_empty() { 838c67d6573Sopenharmony_ci let err = self 839c67d6573Sopenharmony_ci .error(ast_class.span, ErrorKind::EmptyClassNotAllowed); 840c67d6573Sopenharmony_ci return Err(err); 841c67d6573Sopenharmony_ci } 842c67d6573Sopenharmony_ci } 843c67d6573Sopenharmony_ci result 844c67d6573Sopenharmony_ci } 845c67d6573Sopenharmony_ci 846c67d6573Sopenharmony_ci fn hir_ascii_unicode_class( 847c67d6573Sopenharmony_ci &self, 848c67d6573Sopenharmony_ci ast: &ast::ClassAscii, 849c67d6573Sopenharmony_ci ) -> Result<hir::ClassUnicode> { 850c67d6573Sopenharmony_ci let mut cls = hir::ClassUnicode::new( 851c67d6573Sopenharmony_ci ascii_class(&ast.kind) 852c67d6573Sopenharmony_ci .iter() 853c67d6573Sopenharmony_ci .map(|&(s, e)| hir::ClassUnicodeRange::new(s, e)), 854c67d6573Sopenharmony_ci ); 855c67d6573Sopenharmony_ci self.unicode_fold_and_negate(&ast.span, ast.negated, &mut cls)?; 856c67d6573Sopenharmony_ci Ok(cls) 857c67d6573Sopenharmony_ci } 858c67d6573Sopenharmony_ci 859c67d6573Sopenharmony_ci fn hir_ascii_byte_class( 860c67d6573Sopenharmony_ci &self, 861c67d6573Sopenharmony_ci ast: &ast::ClassAscii, 862c67d6573Sopenharmony_ci ) -> Result<hir::ClassBytes> { 863c67d6573Sopenharmony_ci let mut cls = hir::ClassBytes::new( 864c67d6573Sopenharmony_ci ascii_class(&ast.kind) 865c67d6573Sopenharmony_ci .iter() 866c67d6573Sopenharmony_ci .map(|&(s, e)| hir::ClassBytesRange::new(s as u8, e as u8)), 867c67d6573Sopenharmony_ci ); 868c67d6573Sopenharmony_ci self.bytes_fold_and_negate(&ast.span, ast.negated, &mut cls)?; 869c67d6573Sopenharmony_ci Ok(cls) 870c67d6573Sopenharmony_ci } 871c67d6573Sopenharmony_ci 872c67d6573Sopenharmony_ci fn hir_perl_unicode_class( 873c67d6573Sopenharmony_ci &self, 874c67d6573Sopenharmony_ci ast_class: &ast::ClassPerl, 875c67d6573Sopenharmony_ci ) -> Result<hir::ClassUnicode> { 876c67d6573Sopenharmony_ci use crate::ast::ClassPerlKind::*; 877c67d6573Sopenharmony_ci 878c67d6573Sopenharmony_ci assert!(self.flags().unicode()); 879c67d6573Sopenharmony_ci let result = match ast_class.kind { 880c67d6573Sopenharmony_ci Digit => unicode::perl_digit(), 881c67d6573Sopenharmony_ci Space => unicode::perl_space(), 882c67d6573Sopenharmony_ci Word => unicode::perl_word(), 883c67d6573Sopenharmony_ci }; 884c67d6573Sopenharmony_ci let mut class = 885c67d6573Sopenharmony_ci self.convert_unicode_class_error(&ast_class.span, result)?; 886c67d6573Sopenharmony_ci // We needn't apply case folding here because the Perl Unicode classes 887c67d6573Sopenharmony_ci // are already closed under Unicode simple case folding. 888c67d6573Sopenharmony_ci if ast_class.negated { 889c67d6573Sopenharmony_ci class.negate(); 890c67d6573Sopenharmony_ci } 891c67d6573Sopenharmony_ci Ok(class) 892c67d6573Sopenharmony_ci } 893c67d6573Sopenharmony_ci 894c67d6573Sopenharmony_ci fn hir_perl_byte_class( 895c67d6573Sopenharmony_ci &self, 896c67d6573Sopenharmony_ci ast_class: &ast::ClassPerl, 897c67d6573Sopenharmony_ci ) -> hir::ClassBytes { 898c67d6573Sopenharmony_ci use crate::ast::ClassPerlKind::*; 899c67d6573Sopenharmony_ci 900c67d6573Sopenharmony_ci assert!(!self.flags().unicode()); 901c67d6573Sopenharmony_ci let mut class = match ast_class.kind { 902c67d6573Sopenharmony_ci Digit => hir_ascii_class_bytes(&ast::ClassAsciiKind::Digit), 903c67d6573Sopenharmony_ci Space => hir_ascii_class_bytes(&ast::ClassAsciiKind::Space), 904c67d6573Sopenharmony_ci Word => hir_ascii_class_bytes(&ast::ClassAsciiKind::Word), 905c67d6573Sopenharmony_ci }; 906c67d6573Sopenharmony_ci // We needn't apply case folding here because the Perl ASCII classes 907c67d6573Sopenharmony_ci // are already closed (under ASCII case folding). 908c67d6573Sopenharmony_ci if ast_class.negated { 909c67d6573Sopenharmony_ci class.negate(); 910c67d6573Sopenharmony_ci } 911c67d6573Sopenharmony_ci class 912c67d6573Sopenharmony_ci } 913c67d6573Sopenharmony_ci 914c67d6573Sopenharmony_ci /// Converts the given Unicode specific error to an HIR translation error. 915c67d6573Sopenharmony_ci /// 916c67d6573Sopenharmony_ci /// The span given should approximate the position at which an error would 917c67d6573Sopenharmony_ci /// occur. 918c67d6573Sopenharmony_ci fn convert_unicode_class_error( 919c67d6573Sopenharmony_ci &self, 920c67d6573Sopenharmony_ci span: &Span, 921c67d6573Sopenharmony_ci result: unicode::Result<hir::ClassUnicode>, 922c67d6573Sopenharmony_ci ) -> Result<hir::ClassUnicode> { 923c67d6573Sopenharmony_ci result.map_err(|err| { 924c67d6573Sopenharmony_ci let sp = span.clone(); 925c67d6573Sopenharmony_ci match err { 926c67d6573Sopenharmony_ci unicode::Error::PropertyNotFound => { 927c67d6573Sopenharmony_ci self.error(sp, ErrorKind::UnicodePropertyNotFound) 928c67d6573Sopenharmony_ci } 929c67d6573Sopenharmony_ci unicode::Error::PropertyValueNotFound => { 930c67d6573Sopenharmony_ci self.error(sp, ErrorKind::UnicodePropertyValueNotFound) 931c67d6573Sopenharmony_ci } 932c67d6573Sopenharmony_ci unicode::Error::PerlClassNotFound => { 933c67d6573Sopenharmony_ci self.error(sp, ErrorKind::UnicodePerlClassNotFound) 934c67d6573Sopenharmony_ci } 935c67d6573Sopenharmony_ci } 936c67d6573Sopenharmony_ci }) 937c67d6573Sopenharmony_ci } 938c67d6573Sopenharmony_ci 939c67d6573Sopenharmony_ci fn unicode_fold_and_negate( 940c67d6573Sopenharmony_ci &self, 941c67d6573Sopenharmony_ci span: &Span, 942c67d6573Sopenharmony_ci negated: bool, 943c67d6573Sopenharmony_ci class: &mut hir::ClassUnicode, 944c67d6573Sopenharmony_ci ) -> Result<()> { 945c67d6573Sopenharmony_ci // Note that we must apply case folding before negation! 946c67d6573Sopenharmony_ci // Consider `(?i)[^x]`. If we applied negation field, then 947c67d6573Sopenharmony_ci // the result would be the character class that matched any 948c67d6573Sopenharmony_ci // Unicode scalar value. 949c67d6573Sopenharmony_ci if self.flags().case_insensitive() { 950c67d6573Sopenharmony_ci class.try_case_fold_simple().map_err(|_| { 951c67d6573Sopenharmony_ci self.error(span.clone(), ErrorKind::UnicodeCaseUnavailable) 952c67d6573Sopenharmony_ci })?; 953c67d6573Sopenharmony_ci } 954c67d6573Sopenharmony_ci if negated { 955c67d6573Sopenharmony_ci class.negate(); 956c67d6573Sopenharmony_ci } 957c67d6573Sopenharmony_ci Ok(()) 958c67d6573Sopenharmony_ci } 959c67d6573Sopenharmony_ci 960c67d6573Sopenharmony_ci fn bytes_fold_and_negate( 961c67d6573Sopenharmony_ci &self, 962c67d6573Sopenharmony_ci span: &Span, 963c67d6573Sopenharmony_ci negated: bool, 964c67d6573Sopenharmony_ci class: &mut hir::ClassBytes, 965c67d6573Sopenharmony_ci ) -> Result<()> { 966c67d6573Sopenharmony_ci // Note that we must apply case folding before negation! 967c67d6573Sopenharmony_ci // Consider `(?i)[^x]`. If we applied negation first, then 968c67d6573Sopenharmony_ci // the result would be the character class that matched any 969c67d6573Sopenharmony_ci // Unicode scalar value. 970c67d6573Sopenharmony_ci if self.flags().case_insensitive() { 971c67d6573Sopenharmony_ci class.case_fold_simple(); 972c67d6573Sopenharmony_ci } 973c67d6573Sopenharmony_ci if negated { 974c67d6573Sopenharmony_ci class.negate(); 975c67d6573Sopenharmony_ci } 976c67d6573Sopenharmony_ci if !self.trans().allow_invalid_utf8 && !class.is_all_ascii() { 977c67d6573Sopenharmony_ci return Err(self.error(span.clone(), ErrorKind::InvalidUtf8)); 978c67d6573Sopenharmony_ci } 979c67d6573Sopenharmony_ci Ok(()) 980c67d6573Sopenharmony_ci } 981c67d6573Sopenharmony_ci 982c67d6573Sopenharmony_ci /// Return a scalar byte value suitable for use as a literal in a byte 983c67d6573Sopenharmony_ci /// character class. 984c67d6573Sopenharmony_ci fn class_literal_byte(&self, ast: &ast::Literal) -> Result<u8> { 985c67d6573Sopenharmony_ci match self.literal_to_char(ast)? { 986c67d6573Sopenharmony_ci hir::Literal::Byte(byte) => Ok(byte), 987c67d6573Sopenharmony_ci hir::Literal::Unicode(ch) => { 988c67d6573Sopenharmony_ci if ch <= 0x7F as char { 989c67d6573Sopenharmony_ci Ok(ch as u8) 990c67d6573Sopenharmony_ci } else { 991c67d6573Sopenharmony_ci // We can't feasibly support Unicode in 992c67d6573Sopenharmony_ci // byte oriented classes. Byte classes don't 993c67d6573Sopenharmony_ci // do Unicode case folding. 994c67d6573Sopenharmony_ci Err(self.error(ast.span, ErrorKind::UnicodeNotAllowed)) 995c67d6573Sopenharmony_ci } 996c67d6573Sopenharmony_ci } 997c67d6573Sopenharmony_ci } 998c67d6573Sopenharmony_ci } 999c67d6573Sopenharmony_ci} 1000c67d6573Sopenharmony_ci 1001c67d6573Sopenharmony_ci/// A translator's representation of a regular expression's flags at any given 1002c67d6573Sopenharmony_ci/// moment in time. 1003c67d6573Sopenharmony_ci/// 1004c67d6573Sopenharmony_ci/// Each flag can be in one of three states: absent, present but disabled or 1005c67d6573Sopenharmony_ci/// present but enabled. 1006c67d6573Sopenharmony_ci#[derive(Clone, Copy, Debug, Default)] 1007c67d6573Sopenharmony_cistruct Flags { 1008c67d6573Sopenharmony_ci case_insensitive: Option<bool>, 1009c67d6573Sopenharmony_ci multi_line: Option<bool>, 1010c67d6573Sopenharmony_ci dot_matches_new_line: Option<bool>, 1011c67d6573Sopenharmony_ci swap_greed: Option<bool>, 1012c67d6573Sopenharmony_ci unicode: Option<bool>, 1013c67d6573Sopenharmony_ci // Note that `ignore_whitespace` is omitted here because it is handled 1014c67d6573Sopenharmony_ci // entirely in the parser. 1015c67d6573Sopenharmony_ci} 1016c67d6573Sopenharmony_ci 1017c67d6573Sopenharmony_ciimpl Flags { 1018c67d6573Sopenharmony_ci fn from_ast(ast: &ast::Flags) -> Flags { 1019c67d6573Sopenharmony_ci let mut flags = Flags::default(); 1020c67d6573Sopenharmony_ci let mut enable = true; 1021c67d6573Sopenharmony_ci for item in &ast.items { 1022c67d6573Sopenharmony_ci match item.kind { 1023c67d6573Sopenharmony_ci ast::FlagsItemKind::Negation => { 1024c67d6573Sopenharmony_ci enable = false; 1025c67d6573Sopenharmony_ci } 1026c67d6573Sopenharmony_ci ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive) => { 1027c67d6573Sopenharmony_ci flags.case_insensitive = Some(enable); 1028c67d6573Sopenharmony_ci } 1029c67d6573Sopenharmony_ci ast::FlagsItemKind::Flag(ast::Flag::MultiLine) => { 1030c67d6573Sopenharmony_ci flags.multi_line = Some(enable); 1031c67d6573Sopenharmony_ci } 1032c67d6573Sopenharmony_ci ast::FlagsItemKind::Flag(ast::Flag::DotMatchesNewLine) => { 1033c67d6573Sopenharmony_ci flags.dot_matches_new_line = Some(enable); 1034c67d6573Sopenharmony_ci } 1035c67d6573Sopenharmony_ci ast::FlagsItemKind::Flag(ast::Flag::SwapGreed) => { 1036c67d6573Sopenharmony_ci flags.swap_greed = Some(enable); 1037c67d6573Sopenharmony_ci } 1038c67d6573Sopenharmony_ci ast::FlagsItemKind::Flag(ast::Flag::Unicode) => { 1039c67d6573Sopenharmony_ci flags.unicode = Some(enable); 1040c67d6573Sopenharmony_ci } 1041c67d6573Sopenharmony_ci ast::FlagsItemKind::Flag(ast::Flag::IgnoreWhitespace) => {} 1042c67d6573Sopenharmony_ci } 1043c67d6573Sopenharmony_ci } 1044c67d6573Sopenharmony_ci flags 1045c67d6573Sopenharmony_ci } 1046c67d6573Sopenharmony_ci 1047c67d6573Sopenharmony_ci fn merge(&mut self, previous: &Flags) { 1048c67d6573Sopenharmony_ci if self.case_insensitive.is_none() { 1049c67d6573Sopenharmony_ci self.case_insensitive = previous.case_insensitive; 1050c67d6573Sopenharmony_ci } 1051c67d6573Sopenharmony_ci if self.multi_line.is_none() { 1052c67d6573Sopenharmony_ci self.multi_line = previous.multi_line; 1053c67d6573Sopenharmony_ci } 1054c67d6573Sopenharmony_ci if self.dot_matches_new_line.is_none() { 1055c67d6573Sopenharmony_ci self.dot_matches_new_line = previous.dot_matches_new_line; 1056c67d6573Sopenharmony_ci } 1057c67d6573Sopenharmony_ci if self.swap_greed.is_none() { 1058c67d6573Sopenharmony_ci self.swap_greed = previous.swap_greed; 1059c67d6573Sopenharmony_ci } 1060c67d6573Sopenharmony_ci if self.unicode.is_none() { 1061c67d6573Sopenharmony_ci self.unicode = previous.unicode; 1062c67d6573Sopenharmony_ci } 1063c67d6573Sopenharmony_ci } 1064c67d6573Sopenharmony_ci 1065c67d6573Sopenharmony_ci fn case_insensitive(&self) -> bool { 1066c67d6573Sopenharmony_ci self.case_insensitive.unwrap_or(false) 1067c67d6573Sopenharmony_ci } 1068c67d6573Sopenharmony_ci 1069c67d6573Sopenharmony_ci fn multi_line(&self) -> bool { 1070c67d6573Sopenharmony_ci self.multi_line.unwrap_or(false) 1071c67d6573Sopenharmony_ci } 1072c67d6573Sopenharmony_ci 1073c67d6573Sopenharmony_ci fn dot_matches_new_line(&self) -> bool { 1074c67d6573Sopenharmony_ci self.dot_matches_new_line.unwrap_or(false) 1075c67d6573Sopenharmony_ci } 1076c67d6573Sopenharmony_ci 1077c67d6573Sopenharmony_ci fn swap_greed(&self) -> bool { 1078c67d6573Sopenharmony_ci self.swap_greed.unwrap_or(false) 1079c67d6573Sopenharmony_ci } 1080c67d6573Sopenharmony_ci 1081c67d6573Sopenharmony_ci fn unicode(&self) -> bool { 1082c67d6573Sopenharmony_ci self.unicode.unwrap_or(true) 1083c67d6573Sopenharmony_ci } 1084c67d6573Sopenharmony_ci} 1085c67d6573Sopenharmony_ci 1086c67d6573Sopenharmony_cifn hir_ascii_class_bytes(kind: &ast::ClassAsciiKind) -> hir::ClassBytes { 1087c67d6573Sopenharmony_ci let ranges: Vec<_> = ascii_class(kind) 1088c67d6573Sopenharmony_ci .iter() 1089c67d6573Sopenharmony_ci .cloned() 1090c67d6573Sopenharmony_ci .map(|(s, e)| hir::ClassBytesRange::new(s as u8, e as u8)) 1091c67d6573Sopenharmony_ci .collect(); 1092c67d6573Sopenharmony_ci hir::ClassBytes::new(ranges) 1093c67d6573Sopenharmony_ci} 1094c67d6573Sopenharmony_ci 1095c67d6573Sopenharmony_cifn ascii_class(kind: &ast::ClassAsciiKind) -> &'static [(char, char)] { 1096c67d6573Sopenharmony_ci use crate::ast::ClassAsciiKind::*; 1097c67d6573Sopenharmony_ci match *kind { 1098c67d6573Sopenharmony_ci Alnum => &[('0', '9'), ('A', 'Z'), ('a', 'z')], 1099c67d6573Sopenharmony_ci Alpha => &[('A', 'Z'), ('a', 'z')], 1100c67d6573Sopenharmony_ci Ascii => &[('\x00', '\x7F')], 1101c67d6573Sopenharmony_ci Blank => &[('\t', '\t'), (' ', ' ')], 1102c67d6573Sopenharmony_ci Cntrl => &[('\x00', '\x1F'), ('\x7F', '\x7F')], 1103c67d6573Sopenharmony_ci Digit => &[('0', '9')], 1104c67d6573Sopenharmony_ci Graph => &[('!', '~')], 1105c67d6573Sopenharmony_ci Lower => &[('a', 'z')], 1106c67d6573Sopenharmony_ci Print => &[(' ', '~')], 1107c67d6573Sopenharmony_ci Punct => &[('!', '/'), (':', '@'), ('[', '`'), ('{', '~')], 1108c67d6573Sopenharmony_ci Space => &[ 1109c67d6573Sopenharmony_ci ('\t', '\t'), 1110c67d6573Sopenharmony_ci ('\n', '\n'), 1111c67d6573Sopenharmony_ci ('\x0B', '\x0B'), 1112c67d6573Sopenharmony_ci ('\x0C', '\x0C'), 1113c67d6573Sopenharmony_ci ('\r', '\r'), 1114c67d6573Sopenharmony_ci (' ', ' '), 1115c67d6573Sopenharmony_ci ], 1116c67d6573Sopenharmony_ci Upper => &[('A', 'Z')], 1117c67d6573Sopenharmony_ci Word => &[('0', '9'), ('A', 'Z'), ('_', '_'), ('a', 'z')], 1118c67d6573Sopenharmony_ci Xdigit => &[('0', '9'), ('A', 'F'), ('a', 'f')], 1119c67d6573Sopenharmony_ci } 1120c67d6573Sopenharmony_ci} 1121c67d6573Sopenharmony_ci 1122c67d6573Sopenharmony_ci#[cfg(test)] 1123c67d6573Sopenharmony_cimod tests { 1124c67d6573Sopenharmony_ci use crate::ast::parse::ParserBuilder; 1125c67d6573Sopenharmony_ci use crate::ast::{self, Ast, Position, Span}; 1126c67d6573Sopenharmony_ci use crate::hir::{self, Hir, HirKind}; 1127c67d6573Sopenharmony_ci use crate::unicode::{self, ClassQuery}; 1128c67d6573Sopenharmony_ci 1129c67d6573Sopenharmony_ci use super::{ascii_class, TranslatorBuilder}; 1130c67d6573Sopenharmony_ci 1131c67d6573Sopenharmony_ci // We create these errors to compare with real hir::Errors in the tests. 1132c67d6573Sopenharmony_ci // We define equality between TestError and hir::Error to disregard the 1133c67d6573Sopenharmony_ci // pattern string in hir::Error, which is annoying to provide in tests. 1134c67d6573Sopenharmony_ci #[derive(Clone, Debug)] 1135c67d6573Sopenharmony_ci struct TestError { 1136c67d6573Sopenharmony_ci span: Span, 1137c67d6573Sopenharmony_ci kind: hir::ErrorKind, 1138c67d6573Sopenharmony_ci } 1139c67d6573Sopenharmony_ci 1140c67d6573Sopenharmony_ci impl PartialEq<hir::Error> for TestError { 1141c67d6573Sopenharmony_ci fn eq(&self, other: &hir::Error) -> bool { 1142c67d6573Sopenharmony_ci self.span == other.span && self.kind == other.kind 1143c67d6573Sopenharmony_ci } 1144c67d6573Sopenharmony_ci } 1145c67d6573Sopenharmony_ci 1146c67d6573Sopenharmony_ci impl PartialEq<TestError> for hir::Error { 1147c67d6573Sopenharmony_ci fn eq(&self, other: &TestError) -> bool { 1148c67d6573Sopenharmony_ci self.span == other.span && self.kind == other.kind 1149c67d6573Sopenharmony_ci } 1150c67d6573Sopenharmony_ci } 1151c67d6573Sopenharmony_ci 1152c67d6573Sopenharmony_ci fn parse(pattern: &str) -> Ast { 1153c67d6573Sopenharmony_ci ParserBuilder::new().octal(true).build().parse(pattern).unwrap() 1154c67d6573Sopenharmony_ci } 1155c67d6573Sopenharmony_ci 1156c67d6573Sopenharmony_ci fn t(pattern: &str) -> Hir { 1157c67d6573Sopenharmony_ci TranslatorBuilder::new() 1158c67d6573Sopenharmony_ci .allow_invalid_utf8(false) 1159c67d6573Sopenharmony_ci .build() 1160c67d6573Sopenharmony_ci .translate(pattern, &parse(pattern)) 1161c67d6573Sopenharmony_ci .unwrap() 1162c67d6573Sopenharmony_ci } 1163c67d6573Sopenharmony_ci 1164c67d6573Sopenharmony_ci fn t_err(pattern: &str) -> hir::Error { 1165c67d6573Sopenharmony_ci TranslatorBuilder::new() 1166c67d6573Sopenharmony_ci .allow_invalid_utf8(false) 1167c67d6573Sopenharmony_ci .build() 1168c67d6573Sopenharmony_ci .translate(pattern, &parse(pattern)) 1169c67d6573Sopenharmony_ci .unwrap_err() 1170c67d6573Sopenharmony_ci } 1171c67d6573Sopenharmony_ci 1172c67d6573Sopenharmony_ci fn t_bytes(pattern: &str) -> Hir { 1173c67d6573Sopenharmony_ci TranslatorBuilder::new() 1174c67d6573Sopenharmony_ci .allow_invalid_utf8(true) 1175c67d6573Sopenharmony_ci .build() 1176c67d6573Sopenharmony_ci .translate(pattern, &parse(pattern)) 1177c67d6573Sopenharmony_ci .unwrap() 1178c67d6573Sopenharmony_ci } 1179c67d6573Sopenharmony_ci 1180c67d6573Sopenharmony_ci fn hir_lit(s: &str) -> Hir { 1181c67d6573Sopenharmony_ci match s.len() { 1182c67d6573Sopenharmony_ci 0 => Hir::empty(), 1183c67d6573Sopenharmony_ci _ => { 1184c67d6573Sopenharmony_ci let lits = s 1185c67d6573Sopenharmony_ci .chars() 1186c67d6573Sopenharmony_ci .map(hir::Literal::Unicode) 1187c67d6573Sopenharmony_ci .map(Hir::literal) 1188c67d6573Sopenharmony_ci .collect(); 1189c67d6573Sopenharmony_ci Hir::concat(lits) 1190c67d6573Sopenharmony_ci } 1191c67d6573Sopenharmony_ci } 1192c67d6573Sopenharmony_ci } 1193c67d6573Sopenharmony_ci 1194c67d6573Sopenharmony_ci fn hir_blit(s: &[u8]) -> Hir { 1195c67d6573Sopenharmony_ci match s.len() { 1196c67d6573Sopenharmony_ci 0 => Hir::empty(), 1197c67d6573Sopenharmony_ci 1 => Hir::literal(hir::Literal::Byte(s[0])), 1198c67d6573Sopenharmony_ci _ => { 1199c67d6573Sopenharmony_ci let lits = s 1200c67d6573Sopenharmony_ci .iter() 1201c67d6573Sopenharmony_ci .cloned() 1202c67d6573Sopenharmony_ci .map(hir::Literal::Byte) 1203c67d6573Sopenharmony_ci .map(Hir::literal) 1204c67d6573Sopenharmony_ci .collect(); 1205c67d6573Sopenharmony_ci Hir::concat(lits) 1206c67d6573Sopenharmony_ci } 1207c67d6573Sopenharmony_ci } 1208c67d6573Sopenharmony_ci } 1209c67d6573Sopenharmony_ci 1210c67d6573Sopenharmony_ci fn hir_group(i: u32, expr: Hir) -> Hir { 1211c67d6573Sopenharmony_ci Hir::group(hir::Group { 1212c67d6573Sopenharmony_ci kind: hir::GroupKind::CaptureIndex(i), 1213c67d6573Sopenharmony_ci hir: Box::new(expr), 1214c67d6573Sopenharmony_ci }) 1215c67d6573Sopenharmony_ci } 1216c67d6573Sopenharmony_ci 1217c67d6573Sopenharmony_ci fn hir_group_name(i: u32, name: &str, expr: Hir) -> Hir { 1218c67d6573Sopenharmony_ci Hir::group(hir::Group { 1219c67d6573Sopenharmony_ci kind: hir::GroupKind::CaptureName { 1220c67d6573Sopenharmony_ci name: name.to_string(), 1221c67d6573Sopenharmony_ci index: i, 1222c67d6573Sopenharmony_ci }, 1223c67d6573Sopenharmony_ci hir: Box::new(expr), 1224c67d6573Sopenharmony_ci }) 1225c67d6573Sopenharmony_ci } 1226c67d6573Sopenharmony_ci 1227c67d6573Sopenharmony_ci fn hir_group_nocap(expr: Hir) -> Hir { 1228c67d6573Sopenharmony_ci Hir::group(hir::Group { 1229c67d6573Sopenharmony_ci kind: hir::GroupKind::NonCapturing, 1230c67d6573Sopenharmony_ci hir: Box::new(expr), 1231c67d6573Sopenharmony_ci }) 1232c67d6573Sopenharmony_ci } 1233c67d6573Sopenharmony_ci 1234c67d6573Sopenharmony_ci fn hir_quest(greedy: bool, expr: Hir) -> Hir { 1235c67d6573Sopenharmony_ci Hir::repetition(hir::Repetition { 1236c67d6573Sopenharmony_ci kind: hir::RepetitionKind::ZeroOrOne, 1237c67d6573Sopenharmony_ci greedy, 1238c67d6573Sopenharmony_ci hir: Box::new(expr), 1239c67d6573Sopenharmony_ci }) 1240c67d6573Sopenharmony_ci } 1241c67d6573Sopenharmony_ci 1242c67d6573Sopenharmony_ci fn hir_star(greedy: bool, expr: Hir) -> Hir { 1243c67d6573Sopenharmony_ci Hir::repetition(hir::Repetition { 1244c67d6573Sopenharmony_ci kind: hir::RepetitionKind::ZeroOrMore, 1245c67d6573Sopenharmony_ci greedy, 1246c67d6573Sopenharmony_ci hir: Box::new(expr), 1247c67d6573Sopenharmony_ci }) 1248c67d6573Sopenharmony_ci } 1249c67d6573Sopenharmony_ci 1250c67d6573Sopenharmony_ci fn hir_plus(greedy: bool, expr: Hir) -> Hir { 1251c67d6573Sopenharmony_ci Hir::repetition(hir::Repetition { 1252c67d6573Sopenharmony_ci kind: hir::RepetitionKind::OneOrMore, 1253c67d6573Sopenharmony_ci greedy, 1254c67d6573Sopenharmony_ci hir: Box::new(expr), 1255c67d6573Sopenharmony_ci }) 1256c67d6573Sopenharmony_ci } 1257c67d6573Sopenharmony_ci 1258c67d6573Sopenharmony_ci fn hir_range(greedy: bool, range: hir::RepetitionRange, expr: Hir) -> Hir { 1259c67d6573Sopenharmony_ci Hir::repetition(hir::Repetition { 1260c67d6573Sopenharmony_ci kind: hir::RepetitionKind::Range(range), 1261c67d6573Sopenharmony_ci greedy, 1262c67d6573Sopenharmony_ci hir: Box::new(expr), 1263c67d6573Sopenharmony_ci }) 1264c67d6573Sopenharmony_ci } 1265c67d6573Sopenharmony_ci 1266c67d6573Sopenharmony_ci fn hir_alt(alts: Vec<Hir>) -> Hir { 1267c67d6573Sopenharmony_ci Hir::alternation(alts) 1268c67d6573Sopenharmony_ci } 1269c67d6573Sopenharmony_ci 1270c67d6573Sopenharmony_ci fn hir_cat(exprs: Vec<Hir>) -> Hir { 1271c67d6573Sopenharmony_ci Hir::concat(exprs) 1272c67d6573Sopenharmony_ci } 1273c67d6573Sopenharmony_ci 1274c67d6573Sopenharmony_ci #[allow(dead_code)] 1275c67d6573Sopenharmony_ci fn hir_uclass_query(query: ClassQuery<'_>) -> Hir { 1276c67d6573Sopenharmony_ci Hir::class(hir::Class::Unicode(unicode::class(query).unwrap())) 1277c67d6573Sopenharmony_ci } 1278c67d6573Sopenharmony_ci 1279c67d6573Sopenharmony_ci #[allow(dead_code)] 1280c67d6573Sopenharmony_ci fn hir_uclass_perl_word() -> Hir { 1281c67d6573Sopenharmony_ci Hir::class(hir::Class::Unicode(unicode::perl_word().unwrap())) 1282c67d6573Sopenharmony_ci } 1283c67d6573Sopenharmony_ci 1284c67d6573Sopenharmony_ci fn hir_uclass(ranges: &[(char, char)]) -> Hir { 1285c67d6573Sopenharmony_ci let ranges: Vec<hir::ClassUnicodeRange> = ranges 1286c67d6573Sopenharmony_ci .iter() 1287c67d6573Sopenharmony_ci .map(|&(s, e)| hir::ClassUnicodeRange::new(s, e)) 1288c67d6573Sopenharmony_ci .collect(); 1289c67d6573Sopenharmony_ci Hir::class(hir::Class::Unicode(hir::ClassUnicode::new(ranges))) 1290c67d6573Sopenharmony_ci } 1291c67d6573Sopenharmony_ci 1292c67d6573Sopenharmony_ci fn hir_bclass(ranges: &[(u8, u8)]) -> Hir { 1293c67d6573Sopenharmony_ci let ranges: Vec<hir::ClassBytesRange> = ranges 1294c67d6573Sopenharmony_ci .iter() 1295c67d6573Sopenharmony_ci .map(|&(s, e)| hir::ClassBytesRange::new(s, e)) 1296c67d6573Sopenharmony_ci .collect(); 1297c67d6573Sopenharmony_ci Hir::class(hir::Class::Bytes(hir::ClassBytes::new(ranges))) 1298c67d6573Sopenharmony_ci } 1299c67d6573Sopenharmony_ci 1300c67d6573Sopenharmony_ci fn hir_bclass_from_char(ranges: &[(char, char)]) -> Hir { 1301c67d6573Sopenharmony_ci let ranges: Vec<hir::ClassBytesRange> = ranges 1302c67d6573Sopenharmony_ci .iter() 1303c67d6573Sopenharmony_ci .map(|&(s, e)| { 1304c67d6573Sopenharmony_ci assert!(s as u32 <= 0x7F); 1305c67d6573Sopenharmony_ci assert!(e as u32 <= 0x7F); 1306c67d6573Sopenharmony_ci hir::ClassBytesRange::new(s as u8, e as u8) 1307c67d6573Sopenharmony_ci }) 1308c67d6573Sopenharmony_ci .collect(); 1309c67d6573Sopenharmony_ci Hir::class(hir::Class::Bytes(hir::ClassBytes::new(ranges))) 1310c67d6573Sopenharmony_ci } 1311c67d6573Sopenharmony_ci 1312c67d6573Sopenharmony_ci fn hir_case_fold(expr: Hir) -> Hir { 1313c67d6573Sopenharmony_ci match expr.into_kind() { 1314c67d6573Sopenharmony_ci HirKind::Class(mut cls) => { 1315c67d6573Sopenharmony_ci cls.case_fold_simple(); 1316c67d6573Sopenharmony_ci Hir::class(cls) 1317c67d6573Sopenharmony_ci } 1318c67d6573Sopenharmony_ci _ => panic!("cannot case fold non-class Hir expr"), 1319c67d6573Sopenharmony_ci } 1320c67d6573Sopenharmony_ci } 1321c67d6573Sopenharmony_ci 1322c67d6573Sopenharmony_ci fn hir_negate(expr: Hir) -> Hir { 1323c67d6573Sopenharmony_ci match expr.into_kind() { 1324c67d6573Sopenharmony_ci HirKind::Class(mut cls) => { 1325c67d6573Sopenharmony_ci cls.negate(); 1326c67d6573Sopenharmony_ci Hir::class(cls) 1327c67d6573Sopenharmony_ci } 1328c67d6573Sopenharmony_ci _ => panic!("cannot negate non-class Hir expr"), 1329c67d6573Sopenharmony_ci } 1330c67d6573Sopenharmony_ci } 1331c67d6573Sopenharmony_ci 1332c67d6573Sopenharmony_ci #[allow(dead_code)] 1333c67d6573Sopenharmony_ci fn hir_union(expr1: Hir, expr2: Hir) -> Hir { 1334c67d6573Sopenharmony_ci use crate::hir::Class::{Bytes, Unicode}; 1335c67d6573Sopenharmony_ci 1336c67d6573Sopenharmony_ci match (expr1.into_kind(), expr2.into_kind()) { 1337c67d6573Sopenharmony_ci (HirKind::Class(Unicode(mut c1)), HirKind::Class(Unicode(c2))) => { 1338c67d6573Sopenharmony_ci c1.union(&c2); 1339c67d6573Sopenharmony_ci Hir::class(hir::Class::Unicode(c1)) 1340c67d6573Sopenharmony_ci } 1341c67d6573Sopenharmony_ci (HirKind::Class(Bytes(mut c1)), HirKind::Class(Bytes(c2))) => { 1342c67d6573Sopenharmony_ci c1.union(&c2); 1343c67d6573Sopenharmony_ci Hir::class(hir::Class::Bytes(c1)) 1344c67d6573Sopenharmony_ci } 1345c67d6573Sopenharmony_ci _ => panic!("cannot union non-class Hir exprs"), 1346c67d6573Sopenharmony_ci } 1347c67d6573Sopenharmony_ci } 1348c67d6573Sopenharmony_ci 1349c67d6573Sopenharmony_ci #[allow(dead_code)] 1350c67d6573Sopenharmony_ci fn hir_difference(expr1: Hir, expr2: Hir) -> Hir { 1351c67d6573Sopenharmony_ci use crate::hir::Class::{Bytes, Unicode}; 1352c67d6573Sopenharmony_ci 1353c67d6573Sopenharmony_ci match (expr1.into_kind(), expr2.into_kind()) { 1354c67d6573Sopenharmony_ci (HirKind::Class(Unicode(mut c1)), HirKind::Class(Unicode(c2))) => { 1355c67d6573Sopenharmony_ci c1.difference(&c2); 1356c67d6573Sopenharmony_ci Hir::class(hir::Class::Unicode(c1)) 1357c67d6573Sopenharmony_ci } 1358c67d6573Sopenharmony_ci (HirKind::Class(Bytes(mut c1)), HirKind::Class(Bytes(c2))) => { 1359c67d6573Sopenharmony_ci c1.difference(&c2); 1360c67d6573Sopenharmony_ci Hir::class(hir::Class::Bytes(c1)) 1361c67d6573Sopenharmony_ci } 1362c67d6573Sopenharmony_ci _ => panic!("cannot difference non-class Hir exprs"), 1363c67d6573Sopenharmony_ci } 1364c67d6573Sopenharmony_ci } 1365c67d6573Sopenharmony_ci 1366c67d6573Sopenharmony_ci fn hir_anchor(anchor: hir::Anchor) -> Hir { 1367c67d6573Sopenharmony_ci Hir::anchor(anchor) 1368c67d6573Sopenharmony_ci } 1369c67d6573Sopenharmony_ci 1370c67d6573Sopenharmony_ci fn hir_word(wb: hir::WordBoundary) -> Hir { 1371c67d6573Sopenharmony_ci Hir::word_boundary(wb) 1372c67d6573Sopenharmony_ci } 1373c67d6573Sopenharmony_ci 1374c67d6573Sopenharmony_ci #[test] 1375c67d6573Sopenharmony_ci fn empty() { 1376c67d6573Sopenharmony_ci assert_eq!(t(""), Hir::empty()); 1377c67d6573Sopenharmony_ci assert_eq!(t("(?i)"), Hir::empty()); 1378c67d6573Sopenharmony_ci assert_eq!(t("()"), hir_group(1, Hir::empty())); 1379c67d6573Sopenharmony_ci assert_eq!(t("(?:)"), hir_group_nocap(Hir::empty())); 1380c67d6573Sopenharmony_ci assert_eq!(t("(?P<wat>)"), hir_group_name(1, "wat", Hir::empty())); 1381c67d6573Sopenharmony_ci assert_eq!(t("|"), hir_alt(vec![Hir::empty(), Hir::empty()])); 1382c67d6573Sopenharmony_ci assert_eq!( 1383c67d6573Sopenharmony_ci t("()|()"), 1384c67d6573Sopenharmony_ci hir_alt(vec![ 1385c67d6573Sopenharmony_ci hir_group(1, Hir::empty()), 1386c67d6573Sopenharmony_ci hir_group(2, Hir::empty()), 1387c67d6573Sopenharmony_ci ]) 1388c67d6573Sopenharmony_ci ); 1389c67d6573Sopenharmony_ci assert_eq!( 1390c67d6573Sopenharmony_ci t("(|b)"), 1391c67d6573Sopenharmony_ci hir_group(1, hir_alt(vec![Hir::empty(), hir_lit("b"),])) 1392c67d6573Sopenharmony_ci ); 1393c67d6573Sopenharmony_ci assert_eq!( 1394c67d6573Sopenharmony_ci t("(a|)"), 1395c67d6573Sopenharmony_ci hir_group(1, hir_alt(vec![hir_lit("a"), Hir::empty(),])) 1396c67d6573Sopenharmony_ci ); 1397c67d6573Sopenharmony_ci assert_eq!( 1398c67d6573Sopenharmony_ci t("(a||c)"), 1399c67d6573Sopenharmony_ci hir_group( 1400c67d6573Sopenharmony_ci 1, 1401c67d6573Sopenharmony_ci hir_alt(vec![hir_lit("a"), Hir::empty(), hir_lit("c"),]) 1402c67d6573Sopenharmony_ci ) 1403c67d6573Sopenharmony_ci ); 1404c67d6573Sopenharmony_ci assert_eq!( 1405c67d6573Sopenharmony_ci t("(||)"), 1406c67d6573Sopenharmony_ci hir_group( 1407c67d6573Sopenharmony_ci 1, 1408c67d6573Sopenharmony_ci hir_alt(vec![Hir::empty(), Hir::empty(), Hir::empty(),]) 1409c67d6573Sopenharmony_ci ) 1410c67d6573Sopenharmony_ci ); 1411c67d6573Sopenharmony_ci } 1412c67d6573Sopenharmony_ci 1413c67d6573Sopenharmony_ci #[test] 1414c67d6573Sopenharmony_ci fn literal() { 1415c67d6573Sopenharmony_ci assert_eq!(t("a"), hir_lit("a")); 1416c67d6573Sopenharmony_ci assert_eq!(t("(?-u)a"), hir_lit("a")); 1417c67d6573Sopenharmony_ci assert_eq!(t("☃"), hir_lit("☃")); 1418c67d6573Sopenharmony_ci assert_eq!(t("abcd"), hir_lit("abcd")); 1419c67d6573Sopenharmony_ci 1420c67d6573Sopenharmony_ci assert_eq!(t_bytes("(?-u)a"), hir_lit("a")); 1421c67d6573Sopenharmony_ci assert_eq!(t_bytes("(?-u)\x61"), hir_lit("a")); 1422c67d6573Sopenharmony_ci assert_eq!(t_bytes(r"(?-u)\x61"), hir_lit("a")); 1423c67d6573Sopenharmony_ci assert_eq!(t_bytes(r"(?-u)\xFF"), hir_blit(b"\xFF")); 1424c67d6573Sopenharmony_ci 1425c67d6573Sopenharmony_ci assert_eq!( 1426c67d6573Sopenharmony_ci t_err("(?-u)☃"), 1427c67d6573Sopenharmony_ci TestError { 1428c67d6573Sopenharmony_ci kind: hir::ErrorKind::UnicodeNotAllowed, 1429c67d6573Sopenharmony_ci span: Span::new( 1430c67d6573Sopenharmony_ci Position::new(5, 1, 6), 1431c67d6573Sopenharmony_ci Position::new(8, 1, 7) 1432c67d6573Sopenharmony_ci ), 1433c67d6573Sopenharmony_ci } 1434c67d6573Sopenharmony_ci ); 1435c67d6573Sopenharmony_ci assert_eq!( 1436c67d6573Sopenharmony_ci t_err(r"(?-u)\xFF"), 1437c67d6573Sopenharmony_ci TestError { 1438c67d6573Sopenharmony_ci kind: hir::ErrorKind::InvalidUtf8, 1439c67d6573Sopenharmony_ci span: Span::new( 1440c67d6573Sopenharmony_ci Position::new(5, 1, 6), 1441c67d6573Sopenharmony_ci Position::new(9, 1, 10) 1442c67d6573Sopenharmony_ci ), 1443c67d6573Sopenharmony_ci } 1444c67d6573Sopenharmony_ci ); 1445c67d6573Sopenharmony_ci } 1446c67d6573Sopenharmony_ci 1447c67d6573Sopenharmony_ci #[test] 1448c67d6573Sopenharmony_ci fn literal_case_insensitive() { 1449c67d6573Sopenharmony_ci #[cfg(feature = "unicode-case")] 1450c67d6573Sopenharmony_ci assert_eq!(t("(?i)a"), hir_uclass(&[('A', 'A'), ('a', 'a'),])); 1451c67d6573Sopenharmony_ci #[cfg(feature = "unicode-case")] 1452c67d6573Sopenharmony_ci assert_eq!( 1453c67d6573Sopenharmony_ci t("(?i:a)"), 1454c67d6573Sopenharmony_ci hir_group_nocap(hir_uclass(&[('A', 'A'), ('a', 'a')],)) 1455c67d6573Sopenharmony_ci ); 1456c67d6573Sopenharmony_ci #[cfg(feature = "unicode-case")] 1457c67d6573Sopenharmony_ci assert_eq!( 1458c67d6573Sopenharmony_ci t("a(?i)a(?-i)a"), 1459c67d6573Sopenharmony_ci hir_cat(vec![ 1460c67d6573Sopenharmony_ci hir_lit("a"), 1461c67d6573Sopenharmony_ci hir_uclass(&[('A', 'A'), ('a', 'a')]), 1462c67d6573Sopenharmony_ci hir_lit("a"), 1463c67d6573Sopenharmony_ci ]) 1464c67d6573Sopenharmony_ci ); 1465c67d6573Sopenharmony_ci #[cfg(feature = "unicode-case")] 1466c67d6573Sopenharmony_ci assert_eq!( 1467c67d6573Sopenharmony_ci t("(?i)ab@c"), 1468c67d6573Sopenharmony_ci hir_cat(vec![ 1469c67d6573Sopenharmony_ci hir_uclass(&[('A', 'A'), ('a', 'a')]), 1470c67d6573Sopenharmony_ci hir_uclass(&[('B', 'B'), ('b', 'b')]), 1471c67d6573Sopenharmony_ci hir_lit("@"), 1472c67d6573Sopenharmony_ci hir_uclass(&[('C', 'C'), ('c', 'c')]), 1473c67d6573Sopenharmony_ci ]) 1474c67d6573Sopenharmony_ci ); 1475c67d6573Sopenharmony_ci #[cfg(feature = "unicode-case")] 1476c67d6573Sopenharmony_ci assert_eq!( 1477c67d6573Sopenharmony_ci t("(?i)β"), 1478c67d6573Sopenharmony_ci hir_uclass(&[('Β', 'Β'), ('β', 'β'), ('ϐ', 'ϐ'),]) 1479c67d6573Sopenharmony_ci ); 1480c67d6573Sopenharmony_ci 1481c67d6573Sopenharmony_ci assert_eq!(t("(?i-u)a"), hir_bclass(&[(b'A', b'A'), (b'a', b'a'),])); 1482c67d6573Sopenharmony_ci #[cfg(feature = "unicode-case")] 1483c67d6573Sopenharmony_ci assert_eq!( 1484c67d6573Sopenharmony_ci t("(?-u)a(?i)a(?-i)a"), 1485c67d6573Sopenharmony_ci hir_cat(vec![ 1486c67d6573Sopenharmony_ci hir_lit("a"), 1487c67d6573Sopenharmony_ci hir_bclass(&[(b'A', b'A'), (b'a', b'a')]), 1488c67d6573Sopenharmony_ci hir_lit("a"), 1489c67d6573Sopenharmony_ci ]) 1490c67d6573Sopenharmony_ci ); 1491c67d6573Sopenharmony_ci assert_eq!( 1492c67d6573Sopenharmony_ci t("(?i-u)ab@c"), 1493c67d6573Sopenharmony_ci hir_cat(vec![ 1494c67d6573Sopenharmony_ci hir_bclass(&[(b'A', b'A'), (b'a', b'a')]), 1495c67d6573Sopenharmony_ci hir_bclass(&[(b'B', b'B'), (b'b', b'b')]), 1496c67d6573Sopenharmony_ci hir_lit("@"), 1497c67d6573Sopenharmony_ci hir_bclass(&[(b'C', b'C'), (b'c', b'c')]), 1498c67d6573Sopenharmony_ci ]) 1499c67d6573Sopenharmony_ci ); 1500c67d6573Sopenharmony_ci 1501c67d6573Sopenharmony_ci assert_eq!( 1502c67d6573Sopenharmony_ci t_bytes("(?i-u)a"), 1503c67d6573Sopenharmony_ci hir_bclass(&[(b'A', b'A'), (b'a', b'a'),]) 1504c67d6573Sopenharmony_ci ); 1505c67d6573Sopenharmony_ci assert_eq!( 1506c67d6573Sopenharmony_ci t_bytes("(?i-u)\x61"), 1507c67d6573Sopenharmony_ci hir_bclass(&[(b'A', b'A'), (b'a', b'a'),]) 1508c67d6573Sopenharmony_ci ); 1509c67d6573Sopenharmony_ci assert_eq!( 1510c67d6573Sopenharmony_ci t_bytes(r"(?i-u)\x61"), 1511c67d6573Sopenharmony_ci hir_bclass(&[(b'A', b'A'), (b'a', b'a'),]) 1512c67d6573Sopenharmony_ci ); 1513c67d6573Sopenharmony_ci assert_eq!(t_bytes(r"(?i-u)\xFF"), hir_blit(b"\xFF")); 1514c67d6573Sopenharmony_ci 1515c67d6573Sopenharmony_ci assert_eq!( 1516c67d6573Sopenharmony_ci t_err("(?i-u)β"), 1517c67d6573Sopenharmony_ci TestError { 1518c67d6573Sopenharmony_ci kind: hir::ErrorKind::UnicodeNotAllowed, 1519c67d6573Sopenharmony_ci span: Span::new( 1520c67d6573Sopenharmony_ci Position::new(6, 1, 7), 1521c67d6573Sopenharmony_ci Position::new(8, 1, 8), 1522c67d6573Sopenharmony_ci ), 1523c67d6573Sopenharmony_ci } 1524c67d6573Sopenharmony_ci ); 1525c67d6573Sopenharmony_ci } 1526c67d6573Sopenharmony_ci 1527c67d6573Sopenharmony_ci #[test] 1528c67d6573Sopenharmony_ci fn dot() { 1529c67d6573Sopenharmony_ci assert_eq!( 1530c67d6573Sopenharmony_ci t("."), 1531c67d6573Sopenharmony_ci hir_uclass(&[('\0', '\t'), ('\x0B', '\u{10FFFF}'),]) 1532c67d6573Sopenharmony_ci ); 1533c67d6573Sopenharmony_ci assert_eq!(t("(?s)."), hir_uclass(&[('\0', '\u{10FFFF}'),])); 1534c67d6573Sopenharmony_ci assert_eq!( 1535c67d6573Sopenharmony_ci t_bytes("(?-u)."), 1536c67d6573Sopenharmony_ci hir_bclass(&[(b'\0', b'\t'), (b'\x0B', b'\xFF'),]) 1537c67d6573Sopenharmony_ci ); 1538c67d6573Sopenharmony_ci assert_eq!(t_bytes("(?s-u)."), hir_bclass(&[(b'\0', b'\xFF'),])); 1539c67d6573Sopenharmony_ci 1540c67d6573Sopenharmony_ci // If invalid UTF-8 isn't allowed, then non-Unicode `.` isn't allowed. 1541c67d6573Sopenharmony_ci assert_eq!( 1542c67d6573Sopenharmony_ci t_err("(?-u)."), 1543c67d6573Sopenharmony_ci TestError { 1544c67d6573Sopenharmony_ci kind: hir::ErrorKind::InvalidUtf8, 1545c67d6573Sopenharmony_ci span: Span::new( 1546c67d6573Sopenharmony_ci Position::new(5, 1, 6), 1547c67d6573Sopenharmony_ci Position::new(6, 1, 7) 1548c67d6573Sopenharmony_ci ), 1549c67d6573Sopenharmony_ci } 1550c67d6573Sopenharmony_ci ); 1551c67d6573Sopenharmony_ci assert_eq!( 1552c67d6573Sopenharmony_ci t_err("(?s-u)."), 1553c67d6573Sopenharmony_ci TestError { 1554c67d6573Sopenharmony_ci kind: hir::ErrorKind::InvalidUtf8, 1555c67d6573Sopenharmony_ci span: Span::new( 1556c67d6573Sopenharmony_ci Position::new(6, 1, 7), 1557c67d6573Sopenharmony_ci Position::new(7, 1, 8) 1558c67d6573Sopenharmony_ci ), 1559c67d6573Sopenharmony_ci } 1560c67d6573Sopenharmony_ci ); 1561c67d6573Sopenharmony_ci } 1562c67d6573Sopenharmony_ci 1563c67d6573Sopenharmony_ci #[test] 1564c67d6573Sopenharmony_ci fn assertions() { 1565c67d6573Sopenharmony_ci assert_eq!(t("^"), hir_anchor(hir::Anchor::StartText)); 1566c67d6573Sopenharmony_ci assert_eq!(t("$"), hir_anchor(hir::Anchor::EndText)); 1567c67d6573Sopenharmony_ci assert_eq!(t(r"\A"), hir_anchor(hir::Anchor::StartText)); 1568c67d6573Sopenharmony_ci assert_eq!(t(r"\z"), hir_anchor(hir::Anchor::EndText)); 1569c67d6573Sopenharmony_ci assert_eq!(t("(?m)^"), hir_anchor(hir::Anchor::StartLine)); 1570c67d6573Sopenharmony_ci assert_eq!(t("(?m)$"), hir_anchor(hir::Anchor::EndLine)); 1571c67d6573Sopenharmony_ci assert_eq!(t(r"(?m)\A"), hir_anchor(hir::Anchor::StartText)); 1572c67d6573Sopenharmony_ci assert_eq!(t(r"(?m)\z"), hir_anchor(hir::Anchor::EndText)); 1573c67d6573Sopenharmony_ci 1574c67d6573Sopenharmony_ci assert_eq!(t(r"\b"), hir_word(hir::WordBoundary::Unicode)); 1575c67d6573Sopenharmony_ci assert_eq!(t(r"\B"), hir_word(hir::WordBoundary::UnicodeNegate)); 1576c67d6573Sopenharmony_ci assert_eq!(t(r"(?-u)\b"), hir_word(hir::WordBoundary::Ascii)); 1577c67d6573Sopenharmony_ci assert_eq!( 1578c67d6573Sopenharmony_ci t_bytes(r"(?-u)\B"), 1579c67d6573Sopenharmony_ci hir_word(hir::WordBoundary::AsciiNegate) 1580c67d6573Sopenharmony_ci ); 1581c67d6573Sopenharmony_ci 1582c67d6573Sopenharmony_ci assert_eq!( 1583c67d6573Sopenharmony_ci t_err(r"(?-u)\B"), 1584c67d6573Sopenharmony_ci TestError { 1585c67d6573Sopenharmony_ci kind: hir::ErrorKind::InvalidUtf8, 1586c67d6573Sopenharmony_ci span: Span::new( 1587c67d6573Sopenharmony_ci Position::new(5, 1, 6), 1588c67d6573Sopenharmony_ci Position::new(7, 1, 8) 1589c67d6573Sopenharmony_ci ), 1590c67d6573Sopenharmony_ci } 1591c67d6573Sopenharmony_ci ); 1592c67d6573Sopenharmony_ci } 1593c67d6573Sopenharmony_ci 1594c67d6573Sopenharmony_ci #[test] 1595c67d6573Sopenharmony_ci fn group() { 1596c67d6573Sopenharmony_ci assert_eq!(t("(a)"), hir_group(1, hir_lit("a"))); 1597c67d6573Sopenharmony_ci assert_eq!( 1598c67d6573Sopenharmony_ci t("(a)(b)"), 1599c67d6573Sopenharmony_ci hir_cat(vec![ 1600c67d6573Sopenharmony_ci hir_group(1, hir_lit("a")), 1601c67d6573Sopenharmony_ci hir_group(2, hir_lit("b")), 1602c67d6573Sopenharmony_ci ]) 1603c67d6573Sopenharmony_ci ); 1604c67d6573Sopenharmony_ci assert_eq!( 1605c67d6573Sopenharmony_ci t("(a)|(b)"), 1606c67d6573Sopenharmony_ci hir_alt(vec![ 1607c67d6573Sopenharmony_ci hir_group(1, hir_lit("a")), 1608c67d6573Sopenharmony_ci hir_group(2, hir_lit("b")), 1609c67d6573Sopenharmony_ci ]) 1610c67d6573Sopenharmony_ci ); 1611c67d6573Sopenharmony_ci assert_eq!(t("(?P<foo>)"), hir_group_name(1, "foo", Hir::empty())); 1612c67d6573Sopenharmony_ci assert_eq!(t("(?P<foo>a)"), hir_group_name(1, "foo", hir_lit("a"))); 1613c67d6573Sopenharmony_ci assert_eq!( 1614c67d6573Sopenharmony_ci t("(?P<foo>a)(?P<bar>b)"), 1615c67d6573Sopenharmony_ci hir_cat(vec![ 1616c67d6573Sopenharmony_ci hir_group_name(1, "foo", hir_lit("a")), 1617c67d6573Sopenharmony_ci hir_group_name(2, "bar", hir_lit("b")), 1618c67d6573Sopenharmony_ci ]) 1619c67d6573Sopenharmony_ci ); 1620c67d6573Sopenharmony_ci assert_eq!(t("(?:)"), hir_group_nocap(Hir::empty())); 1621c67d6573Sopenharmony_ci assert_eq!(t("(?:a)"), hir_group_nocap(hir_lit("a"))); 1622c67d6573Sopenharmony_ci assert_eq!( 1623c67d6573Sopenharmony_ci t("(?:a)(b)"), 1624c67d6573Sopenharmony_ci hir_cat(vec![ 1625c67d6573Sopenharmony_ci hir_group_nocap(hir_lit("a")), 1626c67d6573Sopenharmony_ci hir_group(1, hir_lit("b")), 1627c67d6573Sopenharmony_ci ]) 1628c67d6573Sopenharmony_ci ); 1629c67d6573Sopenharmony_ci assert_eq!( 1630c67d6573Sopenharmony_ci t("(a)(?:b)(c)"), 1631c67d6573Sopenharmony_ci hir_cat(vec![ 1632c67d6573Sopenharmony_ci hir_group(1, hir_lit("a")), 1633c67d6573Sopenharmony_ci hir_group_nocap(hir_lit("b")), 1634c67d6573Sopenharmony_ci hir_group(2, hir_lit("c")), 1635c67d6573Sopenharmony_ci ]) 1636c67d6573Sopenharmony_ci ); 1637c67d6573Sopenharmony_ci assert_eq!( 1638c67d6573Sopenharmony_ci t("(a)(?P<foo>b)(c)"), 1639c67d6573Sopenharmony_ci hir_cat(vec![ 1640c67d6573Sopenharmony_ci hir_group(1, hir_lit("a")), 1641c67d6573Sopenharmony_ci hir_group_name(2, "foo", hir_lit("b")), 1642c67d6573Sopenharmony_ci hir_group(3, hir_lit("c")), 1643c67d6573Sopenharmony_ci ]) 1644c67d6573Sopenharmony_ci ); 1645c67d6573Sopenharmony_ci assert_eq!(t("()"), hir_group(1, Hir::empty())); 1646c67d6573Sopenharmony_ci assert_eq!(t("((?i))"), hir_group(1, Hir::empty())); 1647c67d6573Sopenharmony_ci assert_eq!(t("((?x))"), hir_group(1, Hir::empty())); 1648c67d6573Sopenharmony_ci assert_eq!(t("(((?x)))"), hir_group(1, hir_group(2, Hir::empty()))); 1649c67d6573Sopenharmony_ci } 1650c67d6573Sopenharmony_ci 1651c67d6573Sopenharmony_ci #[test] 1652c67d6573Sopenharmony_ci fn flags() { 1653c67d6573Sopenharmony_ci #[cfg(feature = "unicode-case")] 1654c67d6573Sopenharmony_ci assert_eq!( 1655c67d6573Sopenharmony_ci t("(?i:a)a"), 1656c67d6573Sopenharmony_ci hir_cat(vec![ 1657c67d6573Sopenharmony_ci hir_group_nocap(hir_uclass(&[('A', 'A'), ('a', 'a')])), 1658c67d6573Sopenharmony_ci hir_lit("a"), 1659c67d6573Sopenharmony_ci ]) 1660c67d6573Sopenharmony_ci ); 1661c67d6573Sopenharmony_ci assert_eq!( 1662c67d6573Sopenharmony_ci t("(?i-u:a)β"), 1663c67d6573Sopenharmony_ci hir_cat(vec![ 1664c67d6573Sopenharmony_ci hir_group_nocap(hir_bclass(&[(b'A', b'A'), (b'a', b'a')])), 1665c67d6573Sopenharmony_ci hir_lit("β"), 1666c67d6573Sopenharmony_ci ]) 1667c67d6573Sopenharmony_ci ); 1668c67d6573Sopenharmony_ci assert_eq!( 1669c67d6573Sopenharmony_ci t("(?:(?i-u)a)b"), 1670c67d6573Sopenharmony_ci hir_cat(vec![ 1671c67d6573Sopenharmony_ci hir_group_nocap(hir_bclass(&[(b'A', b'A'), (b'a', b'a')])), 1672c67d6573Sopenharmony_ci hir_lit("b"), 1673c67d6573Sopenharmony_ci ]) 1674c67d6573Sopenharmony_ci ); 1675c67d6573Sopenharmony_ci assert_eq!( 1676c67d6573Sopenharmony_ci t("((?i-u)a)b"), 1677c67d6573Sopenharmony_ci hir_cat(vec![ 1678c67d6573Sopenharmony_ci hir_group(1, hir_bclass(&[(b'A', b'A'), (b'a', b'a')])), 1679c67d6573Sopenharmony_ci hir_lit("b"), 1680c67d6573Sopenharmony_ci ]) 1681c67d6573Sopenharmony_ci ); 1682c67d6573Sopenharmony_ci #[cfg(feature = "unicode-case")] 1683c67d6573Sopenharmony_ci assert_eq!( 1684c67d6573Sopenharmony_ci t("(?i)(?-i:a)a"), 1685c67d6573Sopenharmony_ci hir_cat(vec![ 1686c67d6573Sopenharmony_ci hir_group_nocap(hir_lit("a")), 1687c67d6573Sopenharmony_ci hir_uclass(&[('A', 'A'), ('a', 'a')]), 1688c67d6573Sopenharmony_ci ]) 1689c67d6573Sopenharmony_ci ); 1690c67d6573Sopenharmony_ci #[cfg(feature = "unicode-case")] 1691c67d6573Sopenharmony_ci assert_eq!( 1692c67d6573Sopenharmony_ci t("(?im)a^"), 1693c67d6573Sopenharmony_ci hir_cat(vec![ 1694c67d6573Sopenharmony_ci hir_uclass(&[('A', 'A'), ('a', 'a')]), 1695c67d6573Sopenharmony_ci hir_anchor(hir::Anchor::StartLine), 1696c67d6573Sopenharmony_ci ]) 1697c67d6573Sopenharmony_ci ); 1698c67d6573Sopenharmony_ci #[cfg(feature = "unicode-case")] 1699c67d6573Sopenharmony_ci assert_eq!( 1700c67d6573Sopenharmony_ci t("(?im)a^(?i-m)a^"), 1701c67d6573Sopenharmony_ci hir_cat(vec![ 1702c67d6573Sopenharmony_ci hir_uclass(&[('A', 'A'), ('a', 'a')]), 1703c67d6573Sopenharmony_ci hir_anchor(hir::Anchor::StartLine), 1704c67d6573Sopenharmony_ci hir_uclass(&[('A', 'A'), ('a', 'a')]), 1705c67d6573Sopenharmony_ci hir_anchor(hir::Anchor::StartText), 1706c67d6573Sopenharmony_ci ]) 1707c67d6573Sopenharmony_ci ); 1708c67d6573Sopenharmony_ci assert_eq!( 1709c67d6573Sopenharmony_ci t("(?U)a*a*?(?-U)a*a*?"), 1710c67d6573Sopenharmony_ci hir_cat(vec![ 1711c67d6573Sopenharmony_ci hir_star(false, hir_lit("a")), 1712c67d6573Sopenharmony_ci hir_star(true, hir_lit("a")), 1713c67d6573Sopenharmony_ci hir_star(true, hir_lit("a")), 1714c67d6573Sopenharmony_ci hir_star(false, hir_lit("a")), 1715c67d6573Sopenharmony_ci ]) 1716c67d6573Sopenharmony_ci ); 1717c67d6573Sopenharmony_ci #[cfg(feature = "unicode-case")] 1718c67d6573Sopenharmony_ci assert_eq!( 1719c67d6573Sopenharmony_ci t("(?:a(?i)a)a"), 1720c67d6573Sopenharmony_ci hir_cat(vec![ 1721c67d6573Sopenharmony_ci hir_group_nocap(hir_cat(vec![ 1722c67d6573Sopenharmony_ci hir_lit("a"), 1723c67d6573Sopenharmony_ci hir_uclass(&[('A', 'A'), ('a', 'a')]), 1724c67d6573Sopenharmony_ci ])), 1725c67d6573Sopenharmony_ci hir_lit("a"), 1726c67d6573Sopenharmony_ci ]) 1727c67d6573Sopenharmony_ci ); 1728c67d6573Sopenharmony_ci #[cfg(feature = "unicode-case")] 1729c67d6573Sopenharmony_ci assert_eq!( 1730c67d6573Sopenharmony_ci t("(?i)(?:a(?-i)a)a"), 1731c67d6573Sopenharmony_ci hir_cat(vec![ 1732c67d6573Sopenharmony_ci hir_group_nocap(hir_cat(vec![ 1733c67d6573Sopenharmony_ci hir_uclass(&[('A', 'A'), ('a', 'a')]), 1734c67d6573Sopenharmony_ci hir_lit("a"), 1735c67d6573Sopenharmony_ci ])), 1736c67d6573Sopenharmony_ci hir_uclass(&[('A', 'A'), ('a', 'a')]), 1737c67d6573Sopenharmony_ci ]) 1738c67d6573Sopenharmony_ci ); 1739c67d6573Sopenharmony_ci } 1740c67d6573Sopenharmony_ci 1741c67d6573Sopenharmony_ci #[test] 1742c67d6573Sopenharmony_ci fn escape() { 1743c67d6573Sopenharmony_ci assert_eq!( 1744c67d6573Sopenharmony_ci t(r"\\\.\+\*\?\(\)\|\[\]\{\}\^\$\#"), 1745c67d6573Sopenharmony_ci hir_lit(r"\.+*?()|[]{}^$#") 1746c67d6573Sopenharmony_ci ); 1747c67d6573Sopenharmony_ci } 1748c67d6573Sopenharmony_ci 1749c67d6573Sopenharmony_ci #[test] 1750c67d6573Sopenharmony_ci fn repetition() { 1751c67d6573Sopenharmony_ci assert_eq!(t("a?"), hir_quest(true, hir_lit("a"))); 1752c67d6573Sopenharmony_ci assert_eq!(t("a*"), hir_star(true, hir_lit("a"))); 1753c67d6573Sopenharmony_ci assert_eq!(t("a+"), hir_plus(true, hir_lit("a"))); 1754c67d6573Sopenharmony_ci assert_eq!(t("a??"), hir_quest(false, hir_lit("a"))); 1755c67d6573Sopenharmony_ci assert_eq!(t("a*?"), hir_star(false, hir_lit("a"))); 1756c67d6573Sopenharmony_ci assert_eq!(t("a+?"), hir_plus(false, hir_lit("a"))); 1757c67d6573Sopenharmony_ci 1758c67d6573Sopenharmony_ci assert_eq!( 1759c67d6573Sopenharmony_ci t("a{1}"), 1760c67d6573Sopenharmony_ci hir_range(true, hir::RepetitionRange::Exactly(1), hir_lit("a"),) 1761c67d6573Sopenharmony_ci ); 1762c67d6573Sopenharmony_ci assert_eq!( 1763c67d6573Sopenharmony_ci t("a{1,}"), 1764c67d6573Sopenharmony_ci hir_range(true, hir::RepetitionRange::AtLeast(1), hir_lit("a"),) 1765c67d6573Sopenharmony_ci ); 1766c67d6573Sopenharmony_ci assert_eq!( 1767c67d6573Sopenharmony_ci t("a{1,2}"), 1768c67d6573Sopenharmony_ci hir_range(true, hir::RepetitionRange::Bounded(1, 2), hir_lit("a"),) 1769c67d6573Sopenharmony_ci ); 1770c67d6573Sopenharmony_ci assert_eq!( 1771c67d6573Sopenharmony_ci t("a{1}?"), 1772c67d6573Sopenharmony_ci hir_range(false, hir::RepetitionRange::Exactly(1), hir_lit("a"),) 1773c67d6573Sopenharmony_ci ); 1774c67d6573Sopenharmony_ci assert_eq!( 1775c67d6573Sopenharmony_ci t("a{1,}?"), 1776c67d6573Sopenharmony_ci hir_range(false, hir::RepetitionRange::AtLeast(1), hir_lit("a"),) 1777c67d6573Sopenharmony_ci ); 1778c67d6573Sopenharmony_ci assert_eq!( 1779c67d6573Sopenharmony_ci t("a{1,2}?"), 1780c67d6573Sopenharmony_ci hir_range( 1781c67d6573Sopenharmony_ci false, 1782c67d6573Sopenharmony_ci hir::RepetitionRange::Bounded(1, 2), 1783c67d6573Sopenharmony_ci hir_lit("a"), 1784c67d6573Sopenharmony_ci ) 1785c67d6573Sopenharmony_ci ); 1786c67d6573Sopenharmony_ci 1787c67d6573Sopenharmony_ci assert_eq!( 1788c67d6573Sopenharmony_ci t("ab?"), 1789c67d6573Sopenharmony_ci hir_cat(vec![hir_lit("a"), hir_quest(true, hir_lit("b")),]) 1790c67d6573Sopenharmony_ci ); 1791c67d6573Sopenharmony_ci assert_eq!( 1792c67d6573Sopenharmony_ci t("(ab)?"), 1793c67d6573Sopenharmony_ci hir_quest( 1794c67d6573Sopenharmony_ci true, 1795c67d6573Sopenharmony_ci hir_group(1, hir_cat(vec![hir_lit("a"), hir_lit("b"),])) 1796c67d6573Sopenharmony_ci ) 1797c67d6573Sopenharmony_ci ); 1798c67d6573Sopenharmony_ci assert_eq!( 1799c67d6573Sopenharmony_ci t("a|b?"), 1800c67d6573Sopenharmony_ci hir_alt(vec![hir_lit("a"), hir_quest(true, hir_lit("b")),]) 1801c67d6573Sopenharmony_ci ); 1802c67d6573Sopenharmony_ci } 1803c67d6573Sopenharmony_ci 1804c67d6573Sopenharmony_ci #[test] 1805c67d6573Sopenharmony_ci fn cat_alt() { 1806c67d6573Sopenharmony_ci assert_eq!( 1807c67d6573Sopenharmony_ci t("(ab)"), 1808c67d6573Sopenharmony_ci hir_group(1, hir_cat(vec![hir_lit("a"), hir_lit("b"),])) 1809c67d6573Sopenharmony_ci ); 1810c67d6573Sopenharmony_ci assert_eq!(t("a|b"), hir_alt(vec![hir_lit("a"), hir_lit("b"),])); 1811c67d6573Sopenharmony_ci assert_eq!( 1812c67d6573Sopenharmony_ci t("a|b|c"), 1813c67d6573Sopenharmony_ci hir_alt(vec![hir_lit("a"), hir_lit("b"), hir_lit("c"),]) 1814c67d6573Sopenharmony_ci ); 1815c67d6573Sopenharmony_ci assert_eq!( 1816c67d6573Sopenharmony_ci t("ab|bc|cd"), 1817c67d6573Sopenharmony_ci hir_alt(vec![hir_lit("ab"), hir_lit("bc"), hir_lit("cd"),]) 1818c67d6573Sopenharmony_ci ); 1819c67d6573Sopenharmony_ci assert_eq!( 1820c67d6573Sopenharmony_ci t("(a|b)"), 1821c67d6573Sopenharmony_ci hir_group(1, hir_alt(vec![hir_lit("a"), hir_lit("b"),])) 1822c67d6573Sopenharmony_ci ); 1823c67d6573Sopenharmony_ci assert_eq!( 1824c67d6573Sopenharmony_ci t("(a|b|c)"), 1825c67d6573Sopenharmony_ci hir_group( 1826c67d6573Sopenharmony_ci 1, 1827c67d6573Sopenharmony_ci hir_alt(vec![hir_lit("a"), hir_lit("b"), hir_lit("c"),]) 1828c67d6573Sopenharmony_ci ) 1829c67d6573Sopenharmony_ci ); 1830c67d6573Sopenharmony_ci assert_eq!( 1831c67d6573Sopenharmony_ci t("(ab|bc|cd)"), 1832c67d6573Sopenharmony_ci hir_group( 1833c67d6573Sopenharmony_ci 1, 1834c67d6573Sopenharmony_ci hir_alt(vec![hir_lit("ab"), hir_lit("bc"), hir_lit("cd"),]) 1835c67d6573Sopenharmony_ci ) 1836c67d6573Sopenharmony_ci ); 1837c67d6573Sopenharmony_ci assert_eq!( 1838c67d6573Sopenharmony_ci t("(ab|(bc|(cd)))"), 1839c67d6573Sopenharmony_ci hir_group( 1840c67d6573Sopenharmony_ci 1, 1841c67d6573Sopenharmony_ci hir_alt(vec![ 1842c67d6573Sopenharmony_ci hir_lit("ab"), 1843c67d6573Sopenharmony_ci hir_group( 1844c67d6573Sopenharmony_ci 2, 1845c67d6573Sopenharmony_ci hir_alt(vec![ 1846c67d6573Sopenharmony_ci hir_lit("bc"), 1847c67d6573Sopenharmony_ci hir_group(3, hir_lit("cd")), 1848c67d6573Sopenharmony_ci ]) 1849c67d6573Sopenharmony_ci ), 1850c67d6573Sopenharmony_ci ]) 1851c67d6573Sopenharmony_ci ) 1852c67d6573Sopenharmony_ci ); 1853c67d6573Sopenharmony_ci } 1854c67d6573Sopenharmony_ci 1855c67d6573Sopenharmony_ci #[test] 1856c67d6573Sopenharmony_ci fn class_ascii() { 1857c67d6573Sopenharmony_ci assert_eq!( 1858c67d6573Sopenharmony_ci t("[[:alnum:]]"), 1859c67d6573Sopenharmony_ci hir_uclass(ascii_class(&ast::ClassAsciiKind::Alnum)) 1860c67d6573Sopenharmony_ci ); 1861c67d6573Sopenharmony_ci assert_eq!( 1862c67d6573Sopenharmony_ci t("[[:alpha:]]"), 1863c67d6573Sopenharmony_ci hir_uclass(ascii_class(&ast::ClassAsciiKind::Alpha)) 1864c67d6573Sopenharmony_ci ); 1865c67d6573Sopenharmony_ci assert_eq!( 1866c67d6573Sopenharmony_ci t("[[:ascii:]]"), 1867c67d6573Sopenharmony_ci hir_uclass(ascii_class(&ast::ClassAsciiKind::Ascii)) 1868c67d6573Sopenharmony_ci ); 1869c67d6573Sopenharmony_ci assert_eq!( 1870c67d6573Sopenharmony_ci t("[[:blank:]]"), 1871c67d6573Sopenharmony_ci hir_uclass(ascii_class(&ast::ClassAsciiKind::Blank)) 1872c67d6573Sopenharmony_ci ); 1873c67d6573Sopenharmony_ci assert_eq!( 1874c67d6573Sopenharmony_ci t("[[:cntrl:]]"), 1875c67d6573Sopenharmony_ci hir_uclass(ascii_class(&ast::ClassAsciiKind::Cntrl)) 1876c67d6573Sopenharmony_ci ); 1877c67d6573Sopenharmony_ci assert_eq!( 1878c67d6573Sopenharmony_ci t("[[:digit:]]"), 1879c67d6573Sopenharmony_ci hir_uclass(ascii_class(&ast::ClassAsciiKind::Digit)) 1880c67d6573Sopenharmony_ci ); 1881c67d6573Sopenharmony_ci assert_eq!( 1882c67d6573Sopenharmony_ci t("[[:graph:]]"), 1883c67d6573Sopenharmony_ci hir_uclass(ascii_class(&ast::ClassAsciiKind::Graph)) 1884c67d6573Sopenharmony_ci ); 1885c67d6573Sopenharmony_ci assert_eq!( 1886c67d6573Sopenharmony_ci t("[[:lower:]]"), 1887c67d6573Sopenharmony_ci hir_uclass(ascii_class(&ast::ClassAsciiKind::Lower)) 1888c67d6573Sopenharmony_ci ); 1889c67d6573Sopenharmony_ci assert_eq!( 1890c67d6573Sopenharmony_ci t("[[:print:]]"), 1891c67d6573Sopenharmony_ci hir_uclass(ascii_class(&ast::ClassAsciiKind::Print)) 1892c67d6573Sopenharmony_ci ); 1893c67d6573Sopenharmony_ci assert_eq!( 1894c67d6573Sopenharmony_ci t("[[:punct:]]"), 1895c67d6573Sopenharmony_ci hir_uclass(ascii_class(&ast::ClassAsciiKind::Punct)) 1896c67d6573Sopenharmony_ci ); 1897c67d6573Sopenharmony_ci assert_eq!( 1898c67d6573Sopenharmony_ci t("[[:space:]]"), 1899c67d6573Sopenharmony_ci hir_uclass(ascii_class(&ast::ClassAsciiKind::Space)) 1900c67d6573Sopenharmony_ci ); 1901c67d6573Sopenharmony_ci assert_eq!( 1902c67d6573Sopenharmony_ci t("[[:upper:]]"), 1903c67d6573Sopenharmony_ci hir_uclass(ascii_class(&ast::ClassAsciiKind::Upper)) 1904c67d6573Sopenharmony_ci ); 1905c67d6573Sopenharmony_ci assert_eq!( 1906c67d6573Sopenharmony_ci t("[[:word:]]"), 1907c67d6573Sopenharmony_ci hir_uclass(ascii_class(&ast::ClassAsciiKind::Word)) 1908c67d6573Sopenharmony_ci ); 1909c67d6573Sopenharmony_ci assert_eq!( 1910c67d6573Sopenharmony_ci t("[[:xdigit:]]"), 1911c67d6573Sopenharmony_ci hir_uclass(ascii_class(&ast::ClassAsciiKind::Xdigit)) 1912c67d6573Sopenharmony_ci ); 1913c67d6573Sopenharmony_ci 1914c67d6573Sopenharmony_ci assert_eq!( 1915c67d6573Sopenharmony_ci t("[[:^lower:]]"), 1916c67d6573Sopenharmony_ci hir_negate(hir_uclass(ascii_class(&ast::ClassAsciiKind::Lower))) 1917c67d6573Sopenharmony_ci ); 1918c67d6573Sopenharmony_ci #[cfg(feature = "unicode-case")] 1919c67d6573Sopenharmony_ci assert_eq!( 1920c67d6573Sopenharmony_ci t("(?i)[[:lower:]]"), 1921c67d6573Sopenharmony_ci hir_uclass(&[ 1922c67d6573Sopenharmony_ci ('A', 'Z'), 1923c67d6573Sopenharmony_ci ('a', 'z'), 1924c67d6573Sopenharmony_ci ('\u{17F}', '\u{17F}'), 1925c67d6573Sopenharmony_ci ('\u{212A}', '\u{212A}'), 1926c67d6573Sopenharmony_ci ]) 1927c67d6573Sopenharmony_ci ); 1928c67d6573Sopenharmony_ci 1929c67d6573Sopenharmony_ci assert_eq!( 1930c67d6573Sopenharmony_ci t("(?-u)[[:lower:]]"), 1931c67d6573Sopenharmony_ci hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Lower)) 1932c67d6573Sopenharmony_ci ); 1933c67d6573Sopenharmony_ci assert_eq!( 1934c67d6573Sopenharmony_ci t("(?i-u)[[:lower:]]"), 1935c67d6573Sopenharmony_ci hir_case_fold(hir_bclass_from_char(ascii_class( 1936c67d6573Sopenharmony_ci &ast::ClassAsciiKind::Lower 1937c67d6573Sopenharmony_ci ))) 1938c67d6573Sopenharmony_ci ); 1939c67d6573Sopenharmony_ci 1940c67d6573Sopenharmony_ci assert_eq!( 1941c67d6573Sopenharmony_ci t_err("(?-u)[[:^lower:]]"), 1942c67d6573Sopenharmony_ci TestError { 1943c67d6573Sopenharmony_ci kind: hir::ErrorKind::InvalidUtf8, 1944c67d6573Sopenharmony_ci span: Span::new( 1945c67d6573Sopenharmony_ci Position::new(6, 1, 7), 1946c67d6573Sopenharmony_ci Position::new(16, 1, 17) 1947c67d6573Sopenharmony_ci ), 1948c67d6573Sopenharmony_ci } 1949c67d6573Sopenharmony_ci ); 1950c67d6573Sopenharmony_ci assert_eq!( 1951c67d6573Sopenharmony_ci t_err("(?i-u)[[:^lower:]]"), 1952c67d6573Sopenharmony_ci TestError { 1953c67d6573Sopenharmony_ci kind: hir::ErrorKind::InvalidUtf8, 1954c67d6573Sopenharmony_ci span: Span::new( 1955c67d6573Sopenharmony_ci Position::new(7, 1, 8), 1956c67d6573Sopenharmony_ci Position::new(17, 1, 18) 1957c67d6573Sopenharmony_ci ), 1958c67d6573Sopenharmony_ci } 1959c67d6573Sopenharmony_ci ); 1960c67d6573Sopenharmony_ci } 1961c67d6573Sopenharmony_ci 1962c67d6573Sopenharmony_ci #[test] 1963c67d6573Sopenharmony_ci fn class_ascii_multiple() { 1964c67d6573Sopenharmony_ci // See: https://github.com/rust-lang/regex/issues/680 1965c67d6573Sopenharmony_ci assert_eq!( 1966c67d6573Sopenharmony_ci t("[[:alnum:][:^ascii:]]"), 1967c67d6573Sopenharmony_ci hir_union( 1968c67d6573Sopenharmony_ci hir_uclass(ascii_class(&ast::ClassAsciiKind::Alnum)), 1969c67d6573Sopenharmony_ci hir_uclass(&[('\u{80}', '\u{10FFFF}')]), 1970c67d6573Sopenharmony_ci ), 1971c67d6573Sopenharmony_ci ); 1972c67d6573Sopenharmony_ci assert_eq!( 1973c67d6573Sopenharmony_ci t_bytes("(?-u)[[:alnum:][:^ascii:]]"), 1974c67d6573Sopenharmony_ci hir_union( 1975c67d6573Sopenharmony_ci hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Alnum)), 1976c67d6573Sopenharmony_ci hir_bclass(&[(0x80, 0xFF)]), 1977c67d6573Sopenharmony_ci ), 1978c67d6573Sopenharmony_ci ); 1979c67d6573Sopenharmony_ci } 1980c67d6573Sopenharmony_ci 1981c67d6573Sopenharmony_ci #[test] 1982c67d6573Sopenharmony_ci #[cfg(feature = "unicode-perl")] 1983c67d6573Sopenharmony_ci fn class_perl() { 1984c67d6573Sopenharmony_ci // Unicode 1985c67d6573Sopenharmony_ci assert_eq!(t(r"\d"), hir_uclass_query(ClassQuery::Binary("digit"))); 1986c67d6573Sopenharmony_ci assert_eq!(t(r"\s"), hir_uclass_query(ClassQuery::Binary("space"))); 1987c67d6573Sopenharmony_ci assert_eq!(t(r"\w"), hir_uclass_perl_word()); 1988c67d6573Sopenharmony_ci #[cfg(feature = "unicode-case")] 1989c67d6573Sopenharmony_ci assert_eq!( 1990c67d6573Sopenharmony_ci t(r"(?i)\d"), 1991c67d6573Sopenharmony_ci hir_uclass_query(ClassQuery::Binary("digit")) 1992c67d6573Sopenharmony_ci ); 1993c67d6573Sopenharmony_ci #[cfg(feature = "unicode-case")] 1994c67d6573Sopenharmony_ci assert_eq!( 1995c67d6573Sopenharmony_ci t(r"(?i)\s"), 1996c67d6573Sopenharmony_ci hir_uclass_query(ClassQuery::Binary("space")) 1997c67d6573Sopenharmony_ci ); 1998c67d6573Sopenharmony_ci #[cfg(feature = "unicode-case")] 1999c67d6573Sopenharmony_ci assert_eq!(t(r"(?i)\w"), hir_uclass_perl_word()); 2000c67d6573Sopenharmony_ci 2001c67d6573Sopenharmony_ci // Unicode, negated 2002c67d6573Sopenharmony_ci assert_eq!( 2003c67d6573Sopenharmony_ci t(r"\D"), 2004c67d6573Sopenharmony_ci hir_negate(hir_uclass_query(ClassQuery::Binary("digit"))) 2005c67d6573Sopenharmony_ci ); 2006c67d6573Sopenharmony_ci assert_eq!( 2007c67d6573Sopenharmony_ci t(r"\S"), 2008c67d6573Sopenharmony_ci hir_negate(hir_uclass_query(ClassQuery::Binary("space"))) 2009c67d6573Sopenharmony_ci ); 2010c67d6573Sopenharmony_ci assert_eq!(t(r"\W"), hir_negate(hir_uclass_perl_word())); 2011c67d6573Sopenharmony_ci #[cfg(feature = "unicode-case")] 2012c67d6573Sopenharmony_ci assert_eq!( 2013c67d6573Sopenharmony_ci t(r"(?i)\D"), 2014c67d6573Sopenharmony_ci hir_negate(hir_uclass_query(ClassQuery::Binary("digit"))) 2015c67d6573Sopenharmony_ci ); 2016c67d6573Sopenharmony_ci #[cfg(feature = "unicode-case")] 2017c67d6573Sopenharmony_ci assert_eq!( 2018c67d6573Sopenharmony_ci t(r"(?i)\S"), 2019c67d6573Sopenharmony_ci hir_negate(hir_uclass_query(ClassQuery::Binary("space"))) 2020c67d6573Sopenharmony_ci ); 2021c67d6573Sopenharmony_ci #[cfg(feature = "unicode-case")] 2022c67d6573Sopenharmony_ci assert_eq!(t(r"(?i)\W"), hir_negate(hir_uclass_perl_word())); 2023c67d6573Sopenharmony_ci 2024c67d6573Sopenharmony_ci // ASCII only 2025c67d6573Sopenharmony_ci assert_eq!( 2026c67d6573Sopenharmony_ci t(r"(?-u)\d"), 2027c67d6573Sopenharmony_ci hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Digit)) 2028c67d6573Sopenharmony_ci ); 2029c67d6573Sopenharmony_ci assert_eq!( 2030c67d6573Sopenharmony_ci t(r"(?-u)\s"), 2031c67d6573Sopenharmony_ci hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Space)) 2032c67d6573Sopenharmony_ci ); 2033c67d6573Sopenharmony_ci assert_eq!( 2034c67d6573Sopenharmony_ci t(r"(?-u)\w"), 2035c67d6573Sopenharmony_ci hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Word)) 2036c67d6573Sopenharmony_ci ); 2037c67d6573Sopenharmony_ci assert_eq!( 2038c67d6573Sopenharmony_ci t(r"(?i-u)\d"), 2039c67d6573Sopenharmony_ci hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Digit)) 2040c67d6573Sopenharmony_ci ); 2041c67d6573Sopenharmony_ci assert_eq!( 2042c67d6573Sopenharmony_ci t(r"(?i-u)\s"), 2043c67d6573Sopenharmony_ci hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Space)) 2044c67d6573Sopenharmony_ci ); 2045c67d6573Sopenharmony_ci assert_eq!( 2046c67d6573Sopenharmony_ci t(r"(?i-u)\w"), 2047c67d6573Sopenharmony_ci hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Word)) 2048c67d6573Sopenharmony_ci ); 2049c67d6573Sopenharmony_ci 2050c67d6573Sopenharmony_ci // ASCII only, negated 2051c67d6573Sopenharmony_ci assert_eq!( 2052c67d6573Sopenharmony_ci t(r"(?-u)\D"), 2053c67d6573Sopenharmony_ci hir_negate(hir_bclass_from_char(ascii_class( 2054c67d6573Sopenharmony_ci &ast::ClassAsciiKind::Digit 2055c67d6573Sopenharmony_ci ))) 2056c67d6573Sopenharmony_ci ); 2057c67d6573Sopenharmony_ci assert_eq!( 2058c67d6573Sopenharmony_ci t(r"(?-u)\S"), 2059c67d6573Sopenharmony_ci hir_negate(hir_bclass_from_char(ascii_class( 2060c67d6573Sopenharmony_ci &ast::ClassAsciiKind::Space 2061c67d6573Sopenharmony_ci ))) 2062c67d6573Sopenharmony_ci ); 2063c67d6573Sopenharmony_ci assert_eq!( 2064c67d6573Sopenharmony_ci t(r"(?-u)\W"), 2065c67d6573Sopenharmony_ci hir_negate(hir_bclass_from_char(ascii_class( 2066c67d6573Sopenharmony_ci &ast::ClassAsciiKind::Word 2067c67d6573Sopenharmony_ci ))) 2068c67d6573Sopenharmony_ci ); 2069c67d6573Sopenharmony_ci assert_eq!( 2070c67d6573Sopenharmony_ci t(r"(?i-u)\D"), 2071c67d6573Sopenharmony_ci hir_negate(hir_bclass_from_char(ascii_class( 2072c67d6573Sopenharmony_ci &ast::ClassAsciiKind::Digit 2073c67d6573Sopenharmony_ci ))) 2074c67d6573Sopenharmony_ci ); 2075c67d6573Sopenharmony_ci assert_eq!( 2076c67d6573Sopenharmony_ci t(r"(?i-u)\S"), 2077c67d6573Sopenharmony_ci hir_negate(hir_bclass_from_char(ascii_class( 2078c67d6573Sopenharmony_ci &ast::ClassAsciiKind::Space 2079c67d6573Sopenharmony_ci ))) 2080c67d6573Sopenharmony_ci ); 2081c67d6573Sopenharmony_ci assert_eq!( 2082c67d6573Sopenharmony_ci t(r"(?i-u)\W"), 2083c67d6573Sopenharmony_ci hir_negate(hir_bclass_from_char(ascii_class( 2084c67d6573Sopenharmony_ci &ast::ClassAsciiKind::Word 2085c67d6573Sopenharmony_ci ))) 2086c67d6573Sopenharmony_ci ); 2087c67d6573Sopenharmony_ci } 2088c67d6573Sopenharmony_ci 2089c67d6573Sopenharmony_ci #[test] 2090c67d6573Sopenharmony_ci #[cfg(not(feature = "unicode-perl"))] 2091c67d6573Sopenharmony_ci fn class_perl_word_disabled() { 2092c67d6573Sopenharmony_ci assert_eq!( 2093c67d6573Sopenharmony_ci t_err(r"\w"), 2094c67d6573Sopenharmony_ci TestError { 2095c67d6573Sopenharmony_ci kind: hir::ErrorKind::UnicodePerlClassNotFound, 2096c67d6573Sopenharmony_ci span: Span::new( 2097c67d6573Sopenharmony_ci Position::new(0, 1, 1), 2098c67d6573Sopenharmony_ci Position::new(2, 1, 3) 2099c67d6573Sopenharmony_ci ), 2100c67d6573Sopenharmony_ci } 2101c67d6573Sopenharmony_ci ); 2102c67d6573Sopenharmony_ci } 2103c67d6573Sopenharmony_ci 2104c67d6573Sopenharmony_ci #[test] 2105c67d6573Sopenharmony_ci #[cfg(all(not(feature = "unicode-perl"), not(feature = "unicode-bool")))] 2106c67d6573Sopenharmony_ci fn class_perl_space_disabled() { 2107c67d6573Sopenharmony_ci assert_eq!( 2108c67d6573Sopenharmony_ci t_err(r"\s"), 2109c67d6573Sopenharmony_ci TestError { 2110c67d6573Sopenharmony_ci kind: hir::ErrorKind::UnicodePerlClassNotFound, 2111c67d6573Sopenharmony_ci span: Span::new( 2112c67d6573Sopenharmony_ci Position::new(0, 1, 1), 2113c67d6573Sopenharmony_ci Position::new(2, 1, 3) 2114c67d6573Sopenharmony_ci ), 2115c67d6573Sopenharmony_ci } 2116c67d6573Sopenharmony_ci ); 2117c67d6573Sopenharmony_ci } 2118c67d6573Sopenharmony_ci 2119c67d6573Sopenharmony_ci #[test] 2120c67d6573Sopenharmony_ci #[cfg(all( 2121c67d6573Sopenharmony_ci not(feature = "unicode-perl"), 2122c67d6573Sopenharmony_ci not(feature = "unicode-gencat") 2123c67d6573Sopenharmony_ci ))] 2124c67d6573Sopenharmony_ci fn class_perl_digit_disabled() { 2125c67d6573Sopenharmony_ci assert_eq!( 2126c67d6573Sopenharmony_ci t_err(r"\d"), 2127c67d6573Sopenharmony_ci TestError { 2128c67d6573Sopenharmony_ci kind: hir::ErrorKind::UnicodePerlClassNotFound, 2129c67d6573Sopenharmony_ci span: Span::new( 2130c67d6573Sopenharmony_ci Position::new(0, 1, 1), 2131c67d6573Sopenharmony_ci Position::new(2, 1, 3) 2132c67d6573Sopenharmony_ci ), 2133c67d6573Sopenharmony_ci } 2134c67d6573Sopenharmony_ci ); 2135c67d6573Sopenharmony_ci } 2136c67d6573Sopenharmony_ci 2137c67d6573Sopenharmony_ci #[test] 2138c67d6573Sopenharmony_ci #[cfg(feature = "unicode-gencat")] 2139c67d6573Sopenharmony_ci fn class_unicode_gencat() { 2140c67d6573Sopenharmony_ci assert_eq!(t(r"\pZ"), hir_uclass_query(ClassQuery::Binary("Z"))); 2141c67d6573Sopenharmony_ci assert_eq!(t(r"\pz"), hir_uclass_query(ClassQuery::Binary("Z"))); 2142c67d6573Sopenharmony_ci assert_eq!( 2143c67d6573Sopenharmony_ci t(r"\p{Separator}"), 2144c67d6573Sopenharmony_ci hir_uclass_query(ClassQuery::Binary("Z")) 2145c67d6573Sopenharmony_ci ); 2146c67d6573Sopenharmony_ci assert_eq!( 2147c67d6573Sopenharmony_ci t(r"\p{se PaRa ToR}"), 2148c67d6573Sopenharmony_ci hir_uclass_query(ClassQuery::Binary("Z")) 2149c67d6573Sopenharmony_ci ); 2150c67d6573Sopenharmony_ci assert_eq!( 2151c67d6573Sopenharmony_ci t(r"\p{gc:Separator}"), 2152c67d6573Sopenharmony_ci hir_uclass_query(ClassQuery::Binary("Z")) 2153c67d6573Sopenharmony_ci ); 2154c67d6573Sopenharmony_ci assert_eq!( 2155c67d6573Sopenharmony_ci t(r"\p{gc=Separator}"), 2156c67d6573Sopenharmony_ci hir_uclass_query(ClassQuery::Binary("Z")) 2157c67d6573Sopenharmony_ci ); 2158c67d6573Sopenharmony_ci assert_eq!( 2159c67d6573Sopenharmony_ci t(r"\p{Other}"), 2160c67d6573Sopenharmony_ci hir_uclass_query(ClassQuery::Binary("Other")) 2161c67d6573Sopenharmony_ci ); 2162c67d6573Sopenharmony_ci assert_eq!(t(r"\pC"), hir_uclass_query(ClassQuery::Binary("Other"))); 2163c67d6573Sopenharmony_ci 2164c67d6573Sopenharmony_ci assert_eq!( 2165c67d6573Sopenharmony_ci t(r"\PZ"), 2166c67d6573Sopenharmony_ci hir_negate(hir_uclass_query(ClassQuery::Binary("Z"))) 2167c67d6573Sopenharmony_ci ); 2168c67d6573Sopenharmony_ci assert_eq!( 2169c67d6573Sopenharmony_ci t(r"\P{separator}"), 2170c67d6573Sopenharmony_ci hir_negate(hir_uclass_query(ClassQuery::Binary("Z"))) 2171c67d6573Sopenharmony_ci ); 2172c67d6573Sopenharmony_ci assert_eq!( 2173c67d6573Sopenharmony_ci t(r"\P{gc!=separator}"), 2174c67d6573Sopenharmony_ci hir_negate(hir_uclass_query(ClassQuery::Binary("Z"))) 2175c67d6573Sopenharmony_ci ); 2176c67d6573Sopenharmony_ci 2177c67d6573Sopenharmony_ci assert_eq!(t(r"\p{any}"), hir_uclass_query(ClassQuery::Binary("Any"))); 2178c67d6573Sopenharmony_ci assert_eq!( 2179c67d6573Sopenharmony_ci t(r"\p{assigned}"), 2180c67d6573Sopenharmony_ci hir_uclass_query(ClassQuery::Binary("Assigned")) 2181c67d6573Sopenharmony_ci ); 2182c67d6573Sopenharmony_ci assert_eq!( 2183c67d6573Sopenharmony_ci t(r"\p{ascii}"), 2184c67d6573Sopenharmony_ci hir_uclass_query(ClassQuery::Binary("ASCII")) 2185c67d6573Sopenharmony_ci ); 2186c67d6573Sopenharmony_ci assert_eq!( 2187c67d6573Sopenharmony_ci t(r"\p{gc:any}"), 2188c67d6573Sopenharmony_ci hir_uclass_query(ClassQuery::Binary("Any")) 2189c67d6573Sopenharmony_ci ); 2190c67d6573Sopenharmony_ci assert_eq!( 2191c67d6573Sopenharmony_ci t(r"\p{gc:assigned}"), 2192c67d6573Sopenharmony_ci hir_uclass_query(ClassQuery::Binary("Assigned")) 2193c67d6573Sopenharmony_ci ); 2194c67d6573Sopenharmony_ci assert_eq!( 2195c67d6573Sopenharmony_ci t(r"\p{gc:ascii}"), 2196c67d6573Sopenharmony_ci hir_uclass_query(ClassQuery::Binary("ASCII")) 2197c67d6573Sopenharmony_ci ); 2198c67d6573Sopenharmony_ci 2199c67d6573Sopenharmony_ci assert_eq!( 2200c67d6573Sopenharmony_ci t_err(r"(?-u)\pZ"), 2201c67d6573Sopenharmony_ci TestError { 2202c67d6573Sopenharmony_ci kind: hir::ErrorKind::UnicodeNotAllowed, 2203c67d6573Sopenharmony_ci span: Span::new( 2204c67d6573Sopenharmony_ci Position::new(5, 1, 6), 2205c67d6573Sopenharmony_ci Position::new(8, 1, 9) 2206c67d6573Sopenharmony_ci ), 2207c67d6573Sopenharmony_ci } 2208c67d6573Sopenharmony_ci ); 2209c67d6573Sopenharmony_ci assert_eq!( 2210c67d6573Sopenharmony_ci t_err(r"(?-u)\p{Separator}"), 2211c67d6573Sopenharmony_ci TestError { 2212c67d6573Sopenharmony_ci kind: hir::ErrorKind::UnicodeNotAllowed, 2213c67d6573Sopenharmony_ci span: Span::new( 2214c67d6573Sopenharmony_ci Position::new(5, 1, 6), 2215c67d6573Sopenharmony_ci Position::new(18, 1, 19) 2216c67d6573Sopenharmony_ci ), 2217c67d6573Sopenharmony_ci } 2218c67d6573Sopenharmony_ci ); 2219c67d6573Sopenharmony_ci assert_eq!( 2220c67d6573Sopenharmony_ci t_err(r"\pE"), 2221c67d6573Sopenharmony_ci TestError { 2222c67d6573Sopenharmony_ci kind: hir::ErrorKind::UnicodePropertyNotFound, 2223c67d6573Sopenharmony_ci span: Span::new( 2224c67d6573Sopenharmony_ci Position::new(0, 1, 1), 2225c67d6573Sopenharmony_ci Position::new(3, 1, 4) 2226c67d6573Sopenharmony_ci ), 2227c67d6573Sopenharmony_ci } 2228c67d6573Sopenharmony_ci ); 2229c67d6573Sopenharmony_ci assert_eq!( 2230c67d6573Sopenharmony_ci t_err(r"\p{Foo}"), 2231c67d6573Sopenharmony_ci TestError { 2232c67d6573Sopenharmony_ci kind: hir::ErrorKind::UnicodePropertyNotFound, 2233c67d6573Sopenharmony_ci span: Span::new( 2234c67d6573Sopenharmony_ci Position::new(0, 1, 1), 2235c67d6573Sopenharmony_ci Position::new(7, 1, 8) 2236c67d6573Sopenharmony_ci ), 2237c67d6573Sopenharmony_ci } 2238c67d6573Sopenharmony_ci ); 2239c67d6573Sopenharmony_ci assert_eq!( 2240c67d6573Sopenharmony_ci t_err(r"\p{gc:Foo}"), 2241c67d6573Sopenharmony_ci TestError { 2242c67d6573Sopenharmony_ci kind: hir::ErrorKind::UnicodePropertyValueNotFound, 2243c67d6573Sopenharmony_ci span: Span::new( 2244c67d6573Sopenharmony_ci Position::new(0, 1, 1), 2245c67d6573Sopenharmony_ci Position::new(10, 1, 11) 2246c67d6573Sopenharmony_ci ), 2247c67d6573Sopenharmony_ci } 2248c67d6573Sopenharmony_ci ); 2249c67d6573Sopenharmony_ci } 2250c67d6573Sopenharmony_ci 2251c67d6573Sopenharmony_ci #[test] 2252c67d6573Sopenharmony_ci #[cfg(not(feature = "unicode-gencat"))] 2253c67d6573Sopenharmony_ci fn class_unicode_gencat_disabled() { 2254c67d6573Sopenharmony_ci assert_eq!( 2255c67d6573Sopenharmony_ci t_err(r"\p{Separator}"), 2256c67d6573Sopenharmony_ci TestError { 2257c67d6573Sopenharmony_ci kind: hir::ErrorKind::UnicodePropertyNotFound, 2258c67d6573Sopenharmony_ci span: Span::new( 2259c67d6573Sopenharmony_ci Position::new(0, 1, 1), 2260c67d6573Sopenharmony_ci Position::new(13, 1, 14) 2261c67d6573Sopenharmony_ci ), 2262c67d6573Sopenharmony_ci } 2263c67d6573Sopenharmony_ci ); 2264c67d6573Sopenharmony_ci 2265c67d6573Sopenharmony_ci assert_eq!( 2266c67d6573Sopenharmony_ci t_err(r"\p{Any}"), 2267c67d6573Sopenharmony_ci TestError { 2268c67d6573Sopenharmony_ci kind: hir::ErrorKind::UnicodePropertyNotFound, 2269c67d6573Sopenharmony_ci span: Span::new( 2270c67d6573Sopenharmony_ci Position::new(0, 1, 1), 2271c67d6573Sopenharmony_ci Position::new(7, 1, 8) 2272c67d6573Sopenharmony_ci ), 2273c67d6573Sopenharmony_ci } 2274c67d6573Sopenharmony_ci ); 2275c67d6573Sopenharmony_ci } 2276c67d6573Sopenharmony_ci 2277c67d6573Sopenharmony_ci #[test] 2278c67d6573Sopenharmony_ci #[cfg(feature = "unicode-script")] 2279c67d6573Sopenharmony_ci fn class_unicode_script() { 2280c67d6573Sopenharmony_ci assert_eq!( 2281c67d6573Sopenharmony_ci t(r"\p{Greek}"), 2282c67d6573Sopenharmony_ci hir_uclass_query(ClassQuery::Binary("Greek")) 2283c67d6573Sopenharmony_ci ); 2284c67d6573Sopenharmony_ci #[cfg(feature = "unicode-case")] 2285c67d6573Sopenharmony_ci assert_eq!( 2286c67d6573Sopenharmony_ci t(r"(?i)\p{Greek}"), 2287c67d6573Sopenharmony_ci hir_case_fold(hir_uclass_query(ClassQuery::Binary("Greek"))) 2288c67d6573Sopenharmony_ci ); 2289c67d6573Sopenharmony_ci #[cfg(feature = "unicode-case")] 2290c67d6573Sopenharmony_ci assert_eq!( 2291c67d6573Sopenharmony_ci t(r"(?i)\P{Greek}"), 2292c67d6573Sopenharmony_ci hir_negate(hir_case_fold(hir_uclass_query(ClassQuery::Binary( 2293c67d6573Sopenharmony_ci "Greek" 2294c67d6573Sopenharmony_ci )))) 2295c67d6573Sopenharmony_ci ); 2296c67d6573Sopenharmony_ci 2297c67d6573Sopenharmony_ci assert_eq!( 2298c67d6573Sopenharmony_ci t_err(r"\p{sc:Foo}"), 2299c67d6573Sopenharmony_ci TestError { 2300c67d6573Sopenharmony_ci kind: hir::ErrorKind::UnicodePropertyValueNotFound, 2301c67d6573Sopenharmony_ci span: Span::new( 2302c67d6573Sopenharmony_ci Position::new(0, 1, 1), 2303c67d6573Sopenharmony_ci Position::new(10, 1, 11) 2304c67d6573Sopenharmony_ci ), 2305c67d6573Sopenharmony_ci } 2306c67d6573Sopenharmony_ci ); 2307c67d6573Sopenharmony_ci assert_eq!( 2308c67d6573Sopenharmony_ci t_err(r"\p{scx:Foo}"), 2309c67d6573Sopenharmony_ci TestError { 2310c67d6573Sopenharmony_ci kind: hir::ErrorKind::UnicodePropertyValueNotFound, 2311c67d6573Sopenharmony_ci span: Span::new( 2312c67d6573Sopenharmony_ci Position::new(0, 1, 1), 2313c67d6573Sopenharmony_ci Position::new(11, 1, 12) 2314c67d6573Sopenharmony_ci ), 2315c67d6573Sopenharmony_ci } 2316c67d6573Sopenharmony_ci ); 2317c67d6573Sopenharmony_ci } 2318c67d6573Sopenharmony_ci 2319c67d6573Sopenharmony_ci #[test] 2320c67d6573Sopenharmony_ci #[cfg(not(feature = "unicode-script"))] 2321c67d6573Sopenharmony_ci fn class_unicode_script_disabled() { 2322c67d6573Sopenharmony_ci assert_eq!( 2323c67d6573Sopenharmony_ci t_err(r"\p{Greek}"), 2324c67d6573Sopenharmony_ci TestError { 2325c67d6573Sopenharmony_ci kind: hir::ErrorKind::UnicodePropertyNotFound, 2326c67d6573Sopenharmony_ci span: Span::new( 2327c67d6573Sopenharmony_ci Position::new(0, 1, 1), 2328c67d6573Sopenharmony_ci Position::new(9, 1, 10) 2329c67d6573Sopenharmony_ci ), 2330c67d6573Sopenharmony_ci } 2331c67d6573Sopenharmony_ci ); 2332c67d6573Sopenharmony_ci 2333c67d6573Sopenharmony_ci assert_eq!( 2334c67d6573Sopenharmony_ci t_err(r"\p{scx:Greek}"), 2335c67d6573Sopenharmony_ci TestError { 2336c67d6573Sopenharmony_ci kind: hir::ErrorKind::UnicodePropertyNotFound, 2337c67d6573Sopenharmony_ci span: Span::new( 2338c67d6573Sopenharmony_ci Position::new(0, 1, 1), 2339c67d6573Sopenharmony_ci Position::new(13, 1, 14) 2340c67d6573Sopenharmony_ci ), 2341c67d6573Sopenharmony_ci } 2342c67d6573Sopenharmony_ci ); 2343c67d6573Sopenharmony_ci } 2344c67d6573Sopenharmony_ci 2345c67d6573Sopenharmony_ci #[test] 2346c67d6573Sopenharmony_ci #[cfg(feature = "unicode-age")] 2347c67d6573Sopenharmony_ci fn class_unicode_age() { 2348c67d6573Sopenharmony_ci assert_eq!( 2349c67d6573Sopenharmony_ci t_err(r"\p{age:Foo}"), 2350c67d6573Sopenharmony_ci TestError { 2351c67d6573Sopenharmony_ci kind: hir::ErrorKind::UnicodePropertyValueNotFound, 2352c67d6573Sopenharmony_ci span: Span::new( 2353c67d6573Sopenharmony_ci Position::new(0, 1, 1), 2354c67d6573Sopenharmony_ci Position::new(11, 1, 12) 2355c67d6573Sopenharmony_ci ), 2356c67d6573Sopenharmony_ci } 2357c67d6573Sopenharmony_ci ); 2358c67d6573Sopenharmony_ci } 2359c67d6573Sopenharmony_ci 2360c67d6573Sopenharmony_ci #[test] 2361c67d6573Sopenharmony_ci #[cfg(feature = "unicode-gencat")] 2362c67d6573Sopenharmony_ci fn class_unicode_any_empty() { 2363c67d6573Sopenharmony_ci assert_eq!( 2364c67d6573Sopenharmony_ci t_err(r"\P{any}"), 2365c67d6573Sopenharmony_ci TestError { 2366c67d6573Sopenharmony_ci kind: hir::ErrorKind::EmptyClassNotAllowed, 2367c67d6573Sopenharmony_ci span: Span::new( 2368c67d6573Sopenharmony_ci Position::new(0, 1, 1), 2369c67d6573Sopenharmony_ci Position::new(7, 1, 8) 2370c67d6573Sopenharmony_ci ), 2371c67d6573Sopenharmony_ci } 2372c67d6573Sopenharmony_ci ); 2373c67d6573Sopenharmony_ci } 2374c67d6573Sopenharmony_ci 2375c67d6573Sopenharmony_ci #[test] 2376c67d6573Sopenharmony_ci #[cfg(not(feature = "unicode-age"))] 2377c67d6573Sopenharmony_ci fn class_unicode_age_disabled() { 2378c67d6573Sopenharmony_ci assert_eq!( 2379c67d6573Sopenharmony_ci t_err(r"\p{age:3.0}"), 2380c67d6573Sopenharmony_ci TestError { 2381c67d6573Sopenharmony_ci kind: hir::ErrorKind::UnicodePropertyNotFound, 2382c67d6573Sopenharmony_ci span: Span::new( 2383c67d6573Sopenharmony_ci Position::new(0, 1, 1), 2384c67d6573Sopenharmony_ci Position::new(11, 1, 12) 2385c67d6573Sopenharmony_ci ), 2386c67d6573Sopenharmony_ci } 2387c67d6573Sopenharmony_ci ); 2388c67d6573Sopenharmony_ci } 2389c67d6573Sopenharmony_ci 2390c67d6573Sopenharmony_ci #[test] 2391c67d6573Sopenharmony_ci fn class_bracketed() { 2392c67d6573Sopenharmony_ci assert_eq!(t("[a]"), hir_uclass(&[('a', 'a')])); 2393c67d6573Sopenharmony_ci assert_eq!(t("[^[a]]"), hir_negate(hir_uclass(&[('a', 'a')]))); 2394c67d6573Sopenharmony_ci assert_eq!(t("[a-z]"), hir_uclass(&[('a', 'z')])); 2395c67d6573Sopenharmony_ci assert_eq!(t("[a-fd-h]"), hir_uclass(&[('a', 'h')])); 2396c67d6573Sopenharmony_ci assert_eq!(t("[a-fg-m]"), hir_uclass(&[('a', 'm')])); 2397c67d6573Sopenharmony_ci assert_eq!(t(r"[\x00]"), hir_uclass(&[('\0', '\0')])); 2398c67d6573Sopenharmony_ci assert_eq!(t(r"[\n]"), hir_uclass(&[('\n', '\n')])); 2399c67d6573Sopenharmony_ci assert_eq!(t("[\n]"), hir_uclass(&[('\n', '\n')])); 2400c67d6573Sopenharmony_ci #[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))] 2401c67d6573Sopenharmony_ci assert_eq!(t(r"[\d]"), hir_uclass_query(ClassQuery::Binary("digit"))); 2402c67d6573Sopenharmony_ci #[cfg(feature = "unicode-gencat")] 2403c67d6573Sopenharmony_ci assert_eq!( 2404c67d6573Sopenharmony_ci t(r"[\pZ]"), 2405c67d6573Sopenharmony_ci hir_uclass_query(ClassQuery::Binary("separator")) 2406c67d6573Sopenharmony_ci ); 2407c67d6573Sopenharmony_ci #[cfg(feature = "unicode-gencat")] 2408c67d6573Sopenharmony_ci assert_eq!( 2409c67d6573Sopenharmony_ci t(r"[\p{separator}]"), 2410c67d6573Sopenharmony_ci hir_uclass_query(ClassQuery::Binary("separator")) 2411c67d6573Sopenharmony_ci ); 2412c67d6573Sopenharmony_ci #[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))] 2413c67d6573Sopenharmony_ci assert_eq!(t(r"[^\D]"), hir_uclass_query(ClassQuery::Binary("digit"))); 2414c67d6573Sopenharmony_ci #[cfg(feature = "unicode-gencat")] 2415c67d6573Sopenharmony_ci assert_eq!( 2416c67d6573Sopenharmony_ci t(r"[^\PZ]"), 2417c67d6573Sopenharmony_ci hir_uclass_query(ClassQuery::Binary("separator")) 2418c67d6573Sopenharmony_ci ); 2419c67d6573Sopenharmony_ci #[cfg(feature = "unicode-gencat")] 2420c67d6573Sopenharmony_ci assert_eq!( 2421c67d6573Sopenharmony_ci t(r"[^\P{separator}]"), 2422c67d6573Sopenharmony_ci hir_uclass_query(ClassQuery::Binary("separator")) 2423c67d6573Sopenharmony_ci ); 2424c67d6573Sopenharmony_ci #[cfg(all( 2425c67d6573Sopenharmony_ci feature = "unicode-case", 2426c67d6573Sopenharmony_ci any(feature = "unicode-perl", feature = "unicode-gencat") 2427c67d6573Sopenharmony_ci ))] 2428c67d6573Sopenharmony_ci assert_eq!( 2429c67d6573Sopenharmony_ci t(r"(?i)[^\D]"), 2430c67d6573Sopenharmony_ci hir_uclass_query(ClassQuery::Binary("digit")) 2431c67d6573Sopenharmony_ci ); 2432c67d6573Sopenharmony_ci #[cfg(all(feature = "unicode-case", feature = "unicode-script"))] 2433c67d6573Sopenharmony_ci assert_eq!( 2434c67d6573Sopenharmony_ci t(r"(?i)[^\P{greek}]"), 2435c67d6573Sopenharmony_ci hir_case_fold(hir_uclass_query(ClassQuery::Binary("greek"))) 2436c67d6573Sopenharmony_ci ); 2437c67d6573Sopenharmony_ci 2438c67d6573Sopenharmony_ci assert_eq!(t("(?-u)[a]"), hir_bclass(&[(b'a', b'a')])); 2439c67d6573Sopenharmony_ci assert_eq!(t(r"(?-u)[\x00]"), hir_bclass(&[(b'\0', b'\0')])); 2440c67d6573Sopenharmony_ci assert_eq!(t_bytes(r"(?-u)[\xFF]"), hir_bclass(&[(b'\xFF', b'\xFF')])); 2441c67d6573Sopenharmony_ci 2442c67d6573Sopenharmony_ci #[cfg(feature = "unicode-case")] 2443c67d6573Sopenharmony_ci assert_eq!(t("(?i)[a]"), hir_uclass(&[('A', 'A'), ('a', 'a')])); 2444c67d6573Sopenharmony_ci #[cfg(feature = "unicode-case")] 2445c67d6573Sopenharmony_ci assert_eq!( 2446c67d6573Sopenharmony_ci t("(?i)[k]"), 2447c67d6573Sopenharmony_ci hir_uclass(&[('K', 'K'), ('k', 'k'), ('\u{212A}', '\u{212A}'),]) 2448c67d6573Sopenharmony_ci ); 2449c67d6573Sopenharmony_ci #[cfg(feature = "unicode-case")] 2450c67d6573Sopenharmony_ci assert_eq!( 2451c67d6573Sopenharmony_ci t("(?i)[β]"), 2452c67d6573Sopenharmony_ci hir_uclass(&[('Β', 'Β'), ('β', 'β'), ('ϐ', 'ϐ'),]) 2453c67d6573Sopenharmony_ci ); 2454c67d6573Sopenharmony_ci assert_eq!(t("(?i-u)[k]"), hir_bclass(&[(b'K', b'K'), (b'k', b'k'),])); 2455c67d6573Sopenharmony_ci 2456c67d6573Sopenharmony_ci assert_eq!(t("[^a]"), hir_negate(hir_uclass(&[('a', 'a')]))); 2457c67d6573Sopenharmony_ci assert_eq!(t(r"[^\x00]"), hir_negate(hir_uclass(&[('\0', '\0')]))); 2458c67d6573Sopenharmony_ci assert_eq!( 2459c67d6573Sopenharmony_ci t_bytes("(?-u)[^a]"), 2460c67d6573Sopenharmony_ci hir_negate(hir_bclass(&[(b'a', b'a')])) 2461c67d6573Sopenharmony_ci ); 2462c67d6573Sopenharmony_ci #[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))] 2463c67d6573Sopenharmony_ci assert_eq!( 2464c67d6573Sopenharmony_ci t(r"[^\d]"), 2465c67d6573Sopenharmony_ci hir_negate(hir_uclass_query(ClassQuery::Binary("digit"))) 2466c67d6573Sopenharmony_ci ); 2467c67d6573Sopenharmony_ci #[cfg(feature = "unicode-gencat")] 2468c67d6573Sopenharmony_ci assert_eq!( 2469c67d6573Sopenharmony_ci t(r"[^\pZ]"), 2470c67d6573Sopenharmony_ci hir_negate(hir_uclass_query(ClassQuery::Binary("separator"))) 2471c67d6573Sopenharmony_ci ); 2472c67d6573Sopenharmony_ci #[cfg(feature = "unicode-gencat")] 2473c67d6573Sopenharmony_ci assert_eq!( 2474c67d6573Sopenharmony_ci t(r"[^\p{separator}]"), 2475c67d6573Sopenharmony_ci hir_negate(hir_uclass_query(ClassQuery::Binary("separator"))) 2476c67d6573Sopenharmony_ci ); 2477c67d6573Sopenharmony_ci #[cfg(all(feature = "unicode-case", feature = "unicode-script"))] 2478c67d6573Sopenharmony_ci assert_eq!( 2479c67d6573Sopenharmony_ci t(r"(?i)[^\p{greek}]"), 2480c67d6573Sopenharmony_ci hir_negate(hir_case_fold(hir_uclass_query(ClassQuery::Binary( 2481c67d6573Sopenharmony_ci "greek" 2482c67d6573Sopenharmony_ci )))) 2483c67d6573Sopenharmony_ci ); 2484c67d6573Sopenharmony_ci #[cfg(all(feature = "unicode-case", feature = "unicode-script"))] 2485c67d6573Sopenharmony_ci assert_eq!( 2486c67d6573Sopenharmony_ci t(r"(?i)[\P{greek}]"), 2487c67d6573Sopenharmony_ci hir_negate(hir_case_fold(hir_uclass_query(ClassQuery::Binary( 2488c67d6573Sopenharmony_ci "greek" 2489c67d6573Sopenharmony_ci )))) 2490c67d6573Sopenharmony_ci ); 2491c67d6573Sopenharmony_ci 2492c67d6573Sopenharmony_ci // Test some weird cases. 2493c67d6573Sopenharmony_ci assert_eq!(t(r"[\[]"), hir_uclass(&[('[', '[')])); 2494c67d6573Sopenharmony_ci 2495c67d6573Sopenharmony_ci assert_eq!(t(r"[&]"), hir_uclass(&[('&', '&')])); 2496c67d6573Sopenharmony_ci assert_eq!(t(r"[\&]"), hir_uclass(&[('&', '&')])); 2497c67d6573Sopenharmony_ci assert_eq!(t(r"[\&\&]"), hir_uclass(&[('&', '&')])); 2498c67d6573Sopenharmony_ci assert_eq!(t(r"[\x00-&]"), hir_uclass(&[('\0', '&')])); 2499c67d6573Sopenharmony_ci assert_eq!(t(r"[&-\xFF]"), hir_uclass(&[('&', '\u{FF}')])); 2500c67d6573Sopenharmony_ci 2501c67d6573Sopenharmony_ci assert_eq!(t(r"[~]"), hir_uclass(&[('~', '~')])); 2502c67d6573Sopenharmony_ci assert_eq!(t(r"[\~]"), hir_uclass(&[('~', '~')])); 2503c67d6573Sopenharmony_ci assert_eq!(t(r"[\~\~]"), hir_uclass(&[('~', '~')])); 2504c67d6573Sopenharmony_ci assert_eq!(t(r"[\x00-~]"), hir_uclass(&[('\0', '~')])); 2505c67d6573Sopenharmony_ci assert_eq!(t(r"[~-\xFF]"), hir_uclass(&[('~', '\u{FF}')])); 2506c67d6573Sopenharmony_ci 2507c67d6573Sopenharmony_ci assert_eq!(t(r"[-]"), hir_uclass(&[('-', '-')])); 2508c67d6573Sopenharmony_ci assert_eq!(t(r"[\-]"), hir_uclass(&[('-', '-')])); 2509c67d6573Sopenharmony_ci assert_eq!(t(r"[\-\-]"), hir_uclass(&[('-', '-')])); 2510c67d6573Sopenharmony_ci assert_eq!(t(r"[\x00-\-]"), hir_uclass(&[('\0', '-')])); 2511c67d6573Sopenharmony_ci assert_eq!(t(r"[\--\xFF]"), hir_uclass(&[('-', '\u{FF}')])); 2512c67d6573Sopenharmony_ci 2513c67d6573Sopenharmony_ci assert_eq!( 2514c67d6573Sopenharmony_ci t_err("(?-u)[^a]"), 2515c67d6573Sopenharmony_ci TestError { 2516c67d6573Sopenharmony_ci kind: hir::ErrorKind::InvalidUtf8, 2517c67d6573Sopenharmony_ci span: Span::new( 2518c67d6573Sopenharmony_ci Position::new(5, 1, 6), 2519c67d6573Sopenharmony_ci Position::new(9, 1, 10) 2520c67d6573Sopenharmony_ci ), 2521c67d6573Sopenharmony_ci } 2522c67d6573Sopenharmony_ci ); 2523c67d6573Sopenharmony_ci #[cfg(any(feature = "unicode-perl", feature = "unicode-bool"))] 2524c67d6573Sopenharmony_ci assert_eq!( 2525c67d6573Sopenharmony_ci t_err(r"[^\s\S]"), 2526c67d6573Sopenharmony_ci TestError { 2527c67d6573Sopenharmony_ci kind: hir::ErrorKind::EmptyClassNotAllowed, 2528c67d6573Sopenharmony_ci span: Span::new( 2529c67d6573Sopenharmony_ci Position::new(0, 1, 1), 2530c67d6573Sopenharmony_ci Position::new(7, 1, 8) 2531c67d6573Sopenharmony_ci ), 2532c67d6573Sopenharmony_ci } 2533c67d6573Sopenharmony_ci ); 2534c67d6573Sopenharmony_ci #[cfg(any(feature = "unicode-perl", feature = "unicode-bool"))] 2535c67d6573Sopenharmony_ci assert_eq!( 2536c67d6573Sopenharmony_ci t_err(r"(?-u)[^\s\S]"), 2537c67d6573Sopenharmony_ci TestError { 2538c67d6573Sopenharmony_ci kind: hir::ErrorKind::EmptyClassNotAllowed, 2539c67d6573Sopenharmony_ci span: Span::new( 2540c67d6573Sopenharmony_ci Position::new(5, 1, 6), 2541c67d6573Sopenharmony_ci Position::new(12, 1, 13) 2542c67d6573Sopenharmony_ci ), 2543c67d6573Sopenharmony_ci } 2544c67d6573Sopenharmony_ci ); 2545c67d6573Sopenharmony_ci } 2546c67d6573Sopenharmony_ci 2547c67d6573Sopenharmony_ci #[test] 2548c67d6573Sopenharmony_ci fn class_bracketed_union() { 2549c67d6573Sopenharmony_ci assert_eq!(t("[a-zA-Z]"), hir_uclass(&[('A', 'Z'), ('a', 'z')])); 2550c67d6573Sopenharmony_ci #[cfg(feature = "unicode-gencat")] 2551c67d6573Sopenharmony_ci assert_eq!( 2552c67d6573Sopenharmony_ci t(r"[a\pZb]"), 2553c67d6573Sopenharmony_ci hir_union( 2554c67d6573Sopenharmony_ci hir_uclass(&[('a', 'b')]), 2555c67d6573Sopenharmony_ci hir_uclass_query(ClassQuery::Binary("separator")) 2556c67d6573Sopenharmony_ci ) 2557c67d6573Sopenharmony_ci ); 2558c67d6573Sopenharmony_ci #[cfg(all(feature = "unicode-gencat", feature = "unicode-script"))] 2559c67d6573Sopenharmony_ci assert_eq!( 2560c67d6573Sopenharmony_ci t(r"[\pZ\p{Greek}]"), 2561c67d6573Sopenharmony_ci hir_union( 2562c67d6573Sopenharmony_ci hir_uclass_query(ClassQuery::Binary("greek")), 2563c67d6573Sopenharmony_ci hir_uclass_query(ClassQuery::Binary("separator")) 2564c67d6573Sopenharmony_ci ) 2565c67d6573Sopenharmony_ci ); 2566c67d6573Sopenharmony_ci #[cfg(all( 2567c67d6573Sopenharmony_ci feature = "unicode-age", 2568c67d6573Sopenharmony_ci feature = "unicode-gencat", 2569c67d6573Sopenharmony_ci feature = "unicode-script" 2570c67d6573Sopenharmony_ci ))] 2571c67d6573Sopenharmony_ci assert_eq!( 2572c67d6573Sopenharmony_ci t(r"[\p{age:3.0}\pZ\p{Greek}]"), 2573c67d6573Sopenharmony_ci hir_union( 2574c67d6573Sopenharmony_ci hir_uclass_query(ClassQuery::ByValue { 2575c67d6573Sopenharmony_ci property_name: "age", 2576c67d6573Sopenharmony_ci property_value: "3.0", 2577c67d6573Sopenharmony_ci }), 2578c67d6573Sopenharmony_ci hir_union( 2579c67d6573Sopenharmony_ci hir_uclass_query(ClassQuery::Binary("greek")), 2580c67d6573Sopenharmony_ci hir_uclass_query(ClassQuery::Binary("separator")) 2581c67d6573Sopenharmony_ci ) 2582c67d6573Sopenharmony_ci ) 2583c67d6573Sopenharmony_ci ); 2584c67d6573Sopenharmony_ci #[cfg(all( 2585c67d6573Sopenharmony_ci feature = "unicode-age", 2586c67d6573Sopenharmony_ci feature = "unicode-gencat", 2587c67d6573Sopenharmony_ci feature = "unicode-script" 2588c67d6573Sopenharmony_ci ))] 2589c67d6573Sopenharmony_ci assert_eq!( 2590c67d6573Sopenharmony_ci t(r"[[[\p{age:3.0}\pZ]\p{Greek}][\p{Cyrillic}]]"), 2591c67d6573Sopenharmony_ci hir_union( 2592c67d6573Sopenharmony_ci hir_uclass_query(ClassQuery::ByValue { 2593c67d6573Sopenharmony_ci property_name: "age", 2594c67d6573Sopenharmony_ci property_value: "3.0", 2595c67d6573Sopenharmony_ci }), 2596c67d6573Sopenharmony_ci hir_union( 2597c67d6573Sopenharmony_ci hir_uclass_query(ClassQuery::Binary("cyrillic")), 2598c67d6573Sopenharmony_ci hir_union( 2599c67d6573Sopenharmony_ci hir_uclass_query(ClassQuery::Binary("greek")), 2600c67d6573Sopenharmony_ci hir_uclass_query(ClassQuery::Binary("separator")) 2601c67d6573Sopenharmony_ci ) 2602c67d6573Sopenharmony_ci ) 2603c67d6573Sopenharmony_ci ) 2604c67d6573Sopenharmony_ci ); 2605c67d6573Sopenharmony_ci 2606c67d6573Sopenharmony_ci #[cfg(all( 2607c67d6573Sopenharmony_ci feature = "unicode-age", 2608c67d6573Sopenharmony_ci feature = "unicode-case", 2609c67d6573Sopenharmony_ci feature = "unicode-gencat", 2610c67d6573Sopenharmony_ci feature = "unicode-script" 2611c67d6573Sopenharmony_ci ))] 2612c67d6573Sopenharmony_ci assert_eq!( 2613c67d6573Sopenharmony_ci t(r"(?i)[\p{age:3.0}\pZ\p{Greek}]"), 2614c67d6573Sopenharmony_ci hir_case_fold(hir_union( 2615c67d6573Sopenharmony_ci hir_uclass_query(ClassQuery::ByValue { 2616c67d6573Sopenharmony_ci property_name: "age", 2617c67d6573Sopenharmony_ci property_value: "3.0", 2618c67d6573Sopenharmony_ci }), 2619c67d6573Sopenharmony_ci hir_union( 2620c67d6573Sopenharmony_ci hir_uclass_query(ClassQuery::Binary("greek")), 2621c67d6573Sopenharmony_ci hir_uclass_query(ClassQuery::Binary("separator")) 2622c67d6573Sopenharmony_ci ) 2623c67d6573Sopenharmony_ci )) 2624c67d6573Sopenharmony_ci ); 2625c67d6573Sopenharmony_ci #[cfg(all( 2626c67d6573Sopenharmony_ci feature = "unicode-age", 2627c67d6573Sopenharmony_ci feature = "unicode-gencat", 2628c67d6573Sopenharmony_ci feature = "unicode-script" 2629c67d6573Sopenharmony_ci ))] 2630c67d6573Sopenharmony_ci assert_eq!( 2631c67d6573Sopenharmony_ci t(r"[^\p{age:3.0}\pZ\p{Greek}]"), 2632c67d6573Sopenharmony_ci hir_negate(hir_union( 2633c67d6573Sopenharmony_ci hir_uclass_query(ClassQuery::ByValue { 2634c67d6573Sopenharmony_ci property_name: "age", 2635c67d6573Sopenharmony_ci property_value: "3.0", 2636c67d6573Sopenharmony_ci }), 2637c67d6573Sopenharmony_ci hir_union( 2638c67d6573Sopenharmony_ci hir_uclass_query(ClassQuery::Binary("greek")), 2639c67d6573Sopenharmony_ci hir_uclass_query(ClassQuery::Binary("separator")) 2640c67d6573Sopenharmony_ci ) 2641c67d6573Sopenharmony_ci )) 2642c67d6573Sopenharmony_ci ); 2643c67d6573Sopenharmony_ci #[cfg(all( 2644c67d6573Sopenharmony_ci feature = "unicode-age", 2645c67d6573Sopenharmony_ci feature = "unicode-case", 2646c67d6573Sopenharmony_ci feature = "unicode-gencat", 2647c67d6573Sopenharmony_ci feature = "unicode-script" 2648c67d6573Sopenharmony_ci ))] 2649c67d6573Sopenharmony_ci assert_eq!( 2650c67d6573Sopenharmony_ci t(r"(?i)[^\p{age:3.0}\pZ\p{Greek}]"), 2651c67d6573Sopenharmony_ci hir_negate(hir_case_fold(hir_union( 2652c67d6573Sopenharmony_ci hir_uclass_query(ClassQuery::ByValue { 2653c67d6573Sopenharmony_ci property_name: "age", 2654c67d6573Sopenharmony_ci property_value: "3.0", 2655c67d6573Sopenharmony_ci }), 2656c67d6573Sopenharmony_ci hir_union( 2657c67d6573Sopenharmony_ci hir_uclass_query(ClassQuery::Binary("greek")), 2658c67d6573Sopenharmony_ci hir_uclass_query(ClassQuery::Binary("separator")) 2659c67d6573Sopenharmony_ci ) 2660c67d6573Sopenharmony_ci ))) 2661c67d6573Sopenharmony_ci ); 2662c67d6573Sopenharmony_ci } 2663c67d6573Sopenharmony_ci 2664c67d6573Sopenharmony_ci #[test] 2665c67d6573Sopenharmony_ci fn class_bracketed_nested() { 2666c67d6573Sopenharmony_ci assert_eq!(t(r"[a[^c]]"), hir_negate(hir_uclass(&[('c', 'c')]))); 2667c67d6573Sopenharmony_ci assert_eq!(t(r"[a-b[^c]]"), hir_negate(hir_uclass(&[('c', 'c')]))); 2668c67d6573Sopenharmony_ci assert_eq!(t(r"[a-c[^c]]"), hir_negate(hir_uclass(&[]))); 2669c67d6573Sopenharmony_ci 2670c67d6573Sopenharmony_ci assert_eq!(t(r"[^a[^c]]"), hir_uclass(&[('c', 'c')])); 2671c67d6573Sopenharmony_ci assert_eq!(t(r"[^a-b[^c]]"), hir_uclass(&[('c', 'c')])); 2672c67d6573Sopenharmony_ci 2673c67d6573Sopenharmony_ci #[cfg(feature = "unicode-case")] 2674c67d6573Sopenharmony_ci assert_eq!( 2675c67d6573Sopenharmony_ci t(r"(?i)[a[^c]]"), 2676c67d6573Sopenharmony_ci hir_negate(hir_case_fold(hir_uclass(&[('c', 'c')]))) 2677c67d6573Sopenharmony_ci ); 2678c67d6573Sopenharmony_ci #[cfg(feature = "unicode-case")] 2679c67d6573Sopenharmony_ci assert_eq!( 2680c67d6573Sopenharmony_ci t(r"(?i)[a-b[^c]]"), 2681c67d6573Sopenharmony_ci hir_negate(hir_case_fold(hir_uclass(&[('c', 'c')]))) 2682c67d6573Sopenharmony_ci ); 2683c67d6573Sopenharmony_ci 2684c67d6573Sopenharmony_ci #[cfg(feature = "unicode-case")] 2685c67d6573Sopenharmony_ci assert_eq!(t(r"(?i)[^a[^c]]"), hir_uclass(&[('C', 'C'), ('c', 'c')])); 2686c67d6573Sopenharmony_ci #[cfg(feature = "unicode-case")] 2687c67d6573Sopenharmony_ci assert_eq!( 2688c67d6573Sopenharmony_ci t(r"(?i)[^a-b[^c]]"), 2689c67d6573Sopenharmony_ci hir_uclass(&[('C', 'C'), ('c', 'c')]) 2690c67d6573Sopenharmony_ci ); 2691c67d6573Sopenharmony_ci 2692c67d6573Sopenharmony_ci assert_eq!( 2693c67d6573Sopenharmony_ci t_err(r"[^a-c[^c]]"), 2694c67d6573Sopenharmony_ci TestError { 2695c67d6573Sopenharmony_ci kind: hir::ErrorKind::EmptyClassNotAllowed, 2696c67d6573Sopenharmony_ci span: Span::new( 2697c67d6573Sopenharmony_ci Position::new(0, 1, 1), 2698c67d6573Sopenharmony_ci Position::new(10, 1, 11) 2699c67d6573Sopenharmony_ci ), 2700c67d6573Sopenharmony_ci } 2701c67d6573Sopenharmony_ci ); 2702c67d6573Sopenharmony_ci #[cfg(feature = "unicode-case")] 2703c67d6573Sopenharmony_ci assert_eq!( 2704c67d6573Sopenharmony_ci t_err(r"(?i)[^a-c[^c]]"), 2705c67d6573Sopenharmony_ci TestError { 2706c67d6573Sopenharmony_ci kind: hir::ErrorKind::EmptyClassNotAllowed, 2707c67d6573Sopenharmony_ci span: Span::new( 2708c67d6573Sopenharmony_ci Position::new(4, 1, 5), 2709c67d6573Sopenharmony_ci Position::new(14, 1, 15) 2710c67d6573Sopenharmony_ci ), 2711c67d6573Sopenharmony_ci } 2712c67d6573Sopenharmony_ci ); 2713c67d6573Sopenharmony_ci } 2714c67d6573Sopenharmony_ci 2715c67d6573Sopenharmony_ci #[test] 2716c67d6573Sopenharmony_ci fn class_bracketed_intersect() { 2717c67d6573Sopenharmony_ci assert_eq!(t("[abc&&b-c]"), hir_uclass(&[('b', 'c')])); 2718c67d6573Sopenharmony_ci assert_eq!(t("[abc&&[b-c]]"), hir_uclass(&[('b', 'c')])); 2719c67d6573Sopenharmony_ci assert_eq!(t("[[abc]&&[b-c]]"), hir_uclass(&[('b', 'c')])); 2720c67d6573Sopenharmony_ci assert_eq!(t("[a-z&&b-y&&c-x]"), hir_uclass(&[('c', 'x')])); 2721c67d6573Sopenharmony_ci assert_eq!(t("[c-da-b&&a-d]"), hir_uclass(&[('a', 'd')])); 2722c67d6573Sopenharmony_ci assert_eq!(t("[a-d&&c-da-b]"), hir_uclass(&[('a', 'd')])); 2723c67d6573Sopenharmony_ci assert_eq!(t(r"[a-z&&a-c]"), hir_uclass(&[('a', 'c')])); 2724c67d6573Sopenharmony_ci assert_eq!(t(r"[[a-z&&a-c]]"), hir_uclass(&[('a', 'c')])); 2725c67d6573Sopenharmony_ci assert_eq!(t(r"[^[a-z&&a-c]]"), hir_negate(hir_uclass(&[('a', 'c')]))); 2726c67d6573Sopenharmony_ci 2727c67d6573Sopenharmony_ci assert_eq!(t("(?-u)[abc&&b-c]"), hir_bclass(&[(b'b', b'c')])); 2728c67d6573Sopenharmony_ci assert_eq!(t("(?-u)[abc&&[b-c]]"), hir_bclass(&[(b'b', b'c')])); 2729c67d6573Sopenharmony_ci assert_eq!(t("(?-u)[[abc]&&[b-c]]"), hir_bclass(&[(b'b', b'c')])); 2730c67d6573Sopenharmony_ci assert_eq!(t("(?-u)[a-z&&b-y&&c-x]"), hir_bclass(&[(b'c', b'x')])); 2731c67d6573Sopenharmony_ci assert_eq!(t("(?-u)[c-da-b&&a-d]"), hir_bclass(&[(b'a', b'd')])); 2732c67d6573Sopenharmony_ci assert_eq!(t("(?-u)[a-d&&c-da-b]"), hir_bclass(&[(b'a', b'd')])); 2733c67d6573Sopenharmony_ci 2734c67d6573Sopenharmony_ci #[cfg(feature = "unicode-case")] 2735c67d6573Sopenharmony_ci assert_eq!( 2736c67d6573Sopenharmony_ci t("(?i)[abc&&b-c]"), 2737c67d6573Sopenharmony_ci hir_case_fold(hir_uclass(&[('b', 'c')])) 2738c67d6573Sopenharmony_ci ); 2739c67d6573Sopenharmony_ci #[cfg(feature = "unicode-case")] 2740c67d6573Sopenharmony_ci assert_eq!( 2741c67d6573Sopenharmony_ci t("(?i)[abc&&[b-c]]"), 2742c67d6573Sopenharmony_ci hir_case_fold(hir_uclass(&[('b', 'c')])) 2743c67d6573Sopenharmony_ci ); 2744c67d6573Sopenharmony_ci #[cfg(feature = "unicode-case")] 2745c67d6573Sopenharmony_ci assert_eq!( 2746c67d6573Sopenharmony_ci t("(?i)[[abc]&&[b-c]]"), 2747c67d6573Sopenharmony_ci hir_case_fold(hir_uclass(&[('b', 'c')])) 2748c67d6573Sopenharmony_ci ); 2749c67d6573Sopenharmony_ci #[cfg(feature = "unicode-case")] 2750c67d6573Sopenharmony_ci assert_eq!( 2751c67d6573Sopenharmony_ci t("(?i)[a-z&&b-y&&c-x]"), 2752c67d6573Sopenharmony_ci hir_case_fold(hir_uclass(&[('c', 'x')])) 2753c67d6573Sopenharmony_ci ); 2754c67d6573Sopenharmony_ci #[cfg(feature = "unicode-case")] 2755c67d6573Sopenharmony_ci assert_eq!( 2756c67d6573Sopenharmony_ci t("(?i)[c-da-b&&a-d]"), 2757c67d6573Sopenharmony_ci hir_case_fold(hir_uclass(&[('a', 'd')])) 2758c67d6573Sopenharmony_ci ); 2759c67d6573Sopenharmony_ci #[cfg(feature = "unicode-case")] 2760c67d6573Sopenharmony_ci assert_eq!( 2761c67d6573Sopenharmony_ci t("(?i)[a-d&&c-da-b]"), 2762c67d6573Sopenharmony_ci hir_case_fold(hir_uclass(&[('a', 'd')])) 2763c67d6573Sopenharmony_ci ); 2764c67d6573Sopenharmony_ci 2765c67d6573Sopenharmony_ci assert_eq!( 2766c67d6573Sopenharmony_ci t("(?i-u)[abc&&b-c]"), 2767c67d6573Sopenharmony_ci hir_case_fold(hir_bclass(&[(b'b', b'c')])) 2768c67d6573Sopenharmony_ci ); 2769c67d6573Sopenharmony_ci assert_eq!( 2770c67d6573Sopenharmony_ci t("(?i-u)[abc&&[b-c]]"), 2771c67d6573Sopenharmony_ci hir_case_fold(hir_bclass(&[(b'b', b'c')])) 2772c67d6573Sopenharmony_ci ); 2773c67d6573Sopenharmony_ci assert_eq!( 2774c67d6573Sopenharmony_ci t("(?i-u)[[abc]&&[b-c]]"), 2775c67d6573Sopenharmony_ci hir_case_fold(hir_bclass(&[(b'b', b'c')])) 2776c67d6573Sopenharmony_ci ); 2777c67d6573Sopenharmony_ci assert_eq!( 2778c67d6573Sopenharmony_ci t("(?i-u)[a-z&&b-y&&c-x]"), 2779c67d6573Sopenharmony_ci hir_case_fold(hir_bclass(&[(b'c', b'x')])) 2780c67d6573Sopenharmony_ci ); 2781c67d6573Sopenharmony_ci assert_eq!( 2782c67d6573Sopenharmony_ci t("(?i-u)[c-da-b&&a-d]"), 2783c67d6573Sopenharmony_ci hir_case_fold(hir_bclass(&[(b'a', b'd')])) 2784c67d6573Sopenharmony_ci ); 2785c67d6573Sopenharmony_ci assert_eq!( 2786c67d6573Sopenharmony_ci t("(?i-u)[a-d&&c-da-b]"), 2787c67d6573Sopenharmony_ci hir_case_fold(hir_bclass(&[(b'a', b'd')])) 2788c67d6573Sopenharmony_ci ); 2789c67d6573Sopenharmony_ci 2790c67d6573Sopenharmony_ci // In `[a^]`, `^` does not need to be escaped, so it makes sense that 2791c67d6573Sopenharmony_ci // `^` is also allowed to be unescaped after `&&`. 2792c67d6573Sopenharmony_ci assert_eq!(t(r"[\^&&^]"), hir_uclass(&[('^', '^')])); 2793c67d6573Sopenharmony_ci // `]` needs to be escaped after `&&` since it's not at start of class. 2794c67d6573Sopenharmony_ci assert_eq!(t(r"[]&&\]]"), hir_uclass(&[(']', ']')])); 2795c67d6573Sopenharmony_ci assert_eq!(t(r"[-&&-]"), hir_uclass(&[('-', '-')])); 2796c67d6573Sopenharmony_ci assert_eq!(t(r"[\&&&&]"), hir_uclass(&[('&', '&')])); 2797c67d6573Sopenharmony_ci assert_eq!(t(r"[\&&&\&]"), hir_uclass(&[('&', '&')])); 2798c67d6573Sopenharmony_ci // Test precedence. 2799c67d6573Sopenharmony_ci assert_eq!( 2800c67d6573Sopenharmony_ci t(r"[a-w&&[^c-g]z]"), 2801c67d6573Sopenharmony_ci hir_uclass(&[('a', 'b'), ('h', 'w')]) 2802c67d6573Sopenharmony_ci ); 2803c67d6573Sopenharmony_ci } 2804c67d6573Sopenharmony_ci 2805c67d6573Sopenharmony_ci #[test] 2806c67d6573Sopenharmony_ci fn class_bracketed_intersect_negate() { 2807c67d6573Sopenharmony_ci #[cfg(feature = "unicode-perl")] 2808c67d6573Sopenharmony_ci assert_eq!( 2809c67d6573Sopenharmony_ci t(r"[^\w&&\d]"), 2810c67d6573Sopenharmony_ci hir_negate(hir_uclass_query(ClassQuery::Binary("digit"))) 2811c67d6573Sopenharmony_ci ); 2812c67d6573Sopenharmony_ci assert_eq!(t(r"[^[a-z&&a-c]]"), hir_negate(hir_uclass(&[('a', 'c')]))); 2813c67d6573Sopenharmony_ci #[cfg(feature = "unicode-perl")] 2814c67d6573Sopenharmony_ci assert_eq!( 2815c67d6573Sopenharmony_ci t(r"[^[\w&&\d]]"), 2816c67d6573Sopenharmony_ci hir_negate(hir_uclass_query(ClassQuery::Binary("digit"))) 2817c67d6573Sopenharmony_ci ); 2818c67d6573Sopenharmony_ci #[cfg(feature = "unicode-perl")] 2819c67d6573Sopenharmony_ci assert_eq!( 2820c67d6573Sopenharmony_ci t(r"[^[^\w&&\d]]"), 2821c67d6573Sopenharmony_ci hir_uclass_query(ClassQuery::Binary("digit")) 2822c67d6573Sopenharmony_ci ); 2823c67d6573Sopenharmony_ci #[cfg(feature = "unicode-perl")] 2824c67d6573Sopenharmony_ci assert_eq!(t(r"[[[^\w]&&[^\d]]]"), hir_negate(hir_uclass_perl_word())); 2825c67d6573Sopenharmony_ci 2826c67d6573Sopenharmony_ci #[cfg(feature = "unicode-perl")] 2827c67d6573Sopenharmony_ci assert_eq!( 2828c67d6573Sopenharmony_ci t_bytes(r"(?-u)[^\w&&\d]"), 2829c67d6573Sopenharmony_ci hir_negate(hir_bclass_from_char(ascii_class( 2830c67d6573Sopenharmony_ci &ast::ClassAsciiKind::Digit 2831c67d6573Sopenharmony_ci ))) 2832c67d6573Sopenharmony_ci ); 2833c67d6573Sopenharmony_ci assert_eq!( 2834c67d6573Sopenharmony_ci t_bytes(r"(?-u)[^[a-z&&a-c]]"), 2835c67d6573Sopenharmony_ci hir_negate(hir_bclass(&[(b'a', b'c')])) 2836c67d6573Sopenharmony_ci ); 2837c67d6573Sopenharmony_ci assert_eq!( 2838c67d6573Sopenharmony_ci t_bytes(r"(?-u)[^[\w&&\d]]"), 2839c67d6573Sopenharmony_ci hir_negate(hir_bclass_from_char(ascii_class( 2840c67d6573Sopenharmony_ci &ast::ClassAsciiKind::Digit 2841c67d6573Sopenharmony_ci ))) 2842c67d6573Sopenharmony_ci ); 2843c67d6573Sopenharmony_ci assert_eq!( 2844c67d6573Sopenharmony_ci t_bytes(r"(?-u)[^[^\w&&\d]]"), 2845c67d6573Sopenharmony_ci hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Digit)) 2846c67d6573Sopenharmony_ci ); 2847c67d6573Sopenharmony_ci assert_eq!( 2848c67d6573Sopenharmony_ci t_bytes(r"(?-u)[[[^\w]&&[^\d]]]"), 2849c67d6573Sopenharmony_ci hir_negate(hir_bclass_from_char(ascii_class( 2850c67d6573Sopenharmony_ci &ast::ClassAsciiKind::Word 2851c67d6573Sopenharmony_ci ))) 2852c67d6573Sopenharmony_ci ); 2853c67d6573Sopenharmony_ci } 2854c67d6573Sopenharmony_ci 2855c67d6573Sopenharmony_ci #[test] 2856c67d6573Sopenharmony_ci fn class_bracketed_difference() { 2857c67d6573Sopenharmony_ci #[cfg(feature = "unicode-gencat")] 2858c67d6573Sopenharmony_ci assert_eq!( 2859c67d6573Sopenharmony_ci t(r"[\pL--[:ascii:]]"), 2860c67d6573Sopenharmony_ci hir_difference( 2861c67d6573Sopenharmony_ci hir_uclass_query(ClassQuery::Binary("letter")), 2862c67d6573Sopenharmony_ci hir_uclass(&[('\0', '\x7F')]) 2863c67d6573Sopenharmony_ci ) 2864c67d6573Sopenharmony_ci ); 2865c67d6573Sopenharmony_ci 2866c67d6573Sopenharmony_ci assert_eq!( 2867c67d6573Sopenharmony_ci t(r"(?-u)[[:alpha:]--[:lower:]]"), 2868c67d6573Sopenharmony_ci hir_bclass(&[(b'A', b'Z')]) 2869c67d6573Sopenharmony_ci ); 2870c67d6573Sopenharmony_ci } 2871c67d6573Sopenharmony_ci 2872c67d6573Sopenharmony_ci #[test] 2873c67d6573Sopenharmony_ci fn class_bracketed_symmetric_difference() { 2874c67d6573Sopenharmony_ci #[cfg(feature = "unicode-script")] 2875c67d6573Sopenharmony_ci assert_eq!( 2876c67d6573Sopenharmony_ci t(r"[\p{sc:Greek}~~\p{scx:Greek}]"), 2877c67d6573Sopenharmony_ci hir_uclass(&[ 2878c67d6573Sopenharmony_ci ('\u{0342}', '\u{0342}'), 2879c67d6573Sopenharmony_ci ('\u{0345}', '\u{0345}'), 2880c67d6573Sopenharmony_ci ('\u{1DC0}', '\u{1DC1}'), 2881c67d6573Sopenharmony_ci ]) 2882c67d6573Sopenharmony_ci ); 2883c67d6573Sopenharmony_ci assert_eq!(t(r"[a-g~~c-j]"), hir_uclass(&[('a', 'b'), ('h', 'j')])); 2884c67d6573Sopenharmony_ci 2885c67d6573Sopenharmony_ci assert_eq!( 2886c67d6573Sopenharmony_ci t(r"(?-u)[a-g~~c-j]"), 2887c67d6573Sopenharmony_ci hir_bclass(&[(b'a', b'b'), (b'h', b'j')]) 2888c67d6573Sopenharmony_ci ); 2889c67d6573Sopenharmony_ci } 2890c67d6573Sopenharmony_ci 2891c67d6573Sopenharmony_ci #[test] 2892c67d6573Sopenharmony_ci fn ignore_whitespace() { 2893c67d6573Sopenharmony_ci assert_eq!(t(r"(?x)\12 3"), hir_lit("\n3")); 2894c67d6573Sopenharmony_ci assert_eq!(t(r"(?x)\x { 53 }"), hir_lit("S")); 2895c67d6573Sopenharmony_ci assert_eq!( 2896c67d6573Sopenharmony_ci t(r"(?x)\x # comment 2897c67d6573Sopenharmony_ci{ # comment 2898c67d6573Sopenharmony_ci 53 # comment 2899c67d6573Sopenharmony_ci} #comment"), 2900c67d6573Sopenharmony_ci hir_lit("S") 2901c67d6573Sopenharmony_ci ); 2902c67d6573Sopenharmony_ci 2903c67d6573Sopenharmony_ci assert_eq!(t(r"(?x)\x 53"), hir_lit("S")); 2904c67d6573Sopenharmony_ci assert_eq!( 2905c67d6573Sopenharmony_ci t(r"(?x)\x # comment 2906c67d6573Sopenharmony_ci 53 # comment"), 2907c67d6573Sopenharmony_ci hir_lit("S") 2908c67d6573Sopenharmony_ci ); 2909c67d6573Sopenharmony_ci assert_eq!(t(r"(?x)\x5 3"), hir_lit("S")); 2910c67d6573Sopenharmony_ci 2911c67d6573Sopenharmony_ci #[cfg(feature = "unicode-gencat")] 2912c67d6573Sopenharmony_ci assert_eq!( 2913c67d6573Sopenharmony_ci t(r"(?x)\p # comment 2914c67d6573Sopenharmony_ci{ # comment 2915c67d6573Sopenharmony_ci Separator # comment 2916c67d6573Sopenharmony_ci} # comment"), 2917c67d6573Sopenharmony_ci hir_uclass_query(ClassQuery::Binary("separator")) 2918c67d6573Sopenharmony_ci ); 2919c67d6573Sopenharmony_ci 2920c67d6573Sopenharmony_ci assert_eq!( 2921c67d6573Sopenharmony_ci t(r"(?x)a # comment 2922c67d6573Sopenharmony_ci{ # comment 2923c67d6573Sopenharmony_ci 5 # comment 2924c67d6573Sopenharmony_ci , # comment 2925c67d6573Sopenharmony_ci 10 # comment 2926c67d6573Sopenharmony_ci} # comment"), 2927c67d6573Sopenharmony_ci hir_range( 2928c67d6573Sopenharmony_ci true, 2929c67d6573Sopenharmony_ci hir::RepetitionRange::Bounded(5, 10), 2930c67d6573Sopenharmony_ci hir_lit("a") 2931c67d6573Sopenharmony_ci ) 2932c67d6573Sopenharmony_ci ); 2933c67d6573Sopenharmony_ci 2934c67d6573Sopenharmony_ci assert_eq!(t(r"(?x)a\ # hi there"), hir_lit("a ")); 2935c67d6573Sopenharmony_ci } 2936c67d6573Sopenharmony_ci 2937c67d6573Sopenharmony_ci #[test] 2938c67d6573Sopenharmony_ci fn analysis_is_always_utf8() { 2939c67d6573Sopenharmony_ci // Positive examples. 2940c67d6573Sopenharmony_ci assert!(t_bytes(r"a").is_always_utf8()); 2941c67d6573Sopenharmony_ci assert!(t_bytes(r"ab").is_always_utf8()); 2942c67d6573Sopenharmony_ci assert!(t_bytes(r"(?-u)a").is_always_utf8()); 2943c67d6573Sopenharmony_ci assert!(t_bytes(r"(?-u)ab").is_always_utf8()); 2944c67d6573Sopenharmony_ci assert!(t_bytes(r"\xFF").is_always_utf8()); 2945c67d6573Sopenharmony_ci assert!(t_bytes(r"\xFF\xFF").is_always_utf8()); 2946c67d6573Sopenharmony_ci assert!(t_bytes(r"[^a]").is_always_utf8()); 2947c67d6573Sopenharmony_ci assert!(t_bytes(r"[^a][^a]").is_always_utf8()); 2948c67d6573Sopenharmony_ci assert!(t_bytes(r"\b").is_always_utf8()); 2949c67d6573Sopenharmony_ci assert!(t_bytes(r"\B").is_always_utf8()); 2950c67d6573Sopenharmony_ci assert!(t_bytes(r"(?-u)\b").is_always_utf8()); 2951c67d6573Sopenharmony_ci 2952c67d6573Sopenharmony_ci // Negative examples. 2953c67d6573Sopenharmony_ci assert!(!t_bytes(r"(?-u)\xFF").is_always_utf8()); 2954c67d6573Sopenharmony_ci assert!(!t_bytes(r"(?-u)\xFF\xFF").is_always_utf8()); 2955c67d6573Sopenharmony_ci assert!(!t_bytes(r"(?-u)[^a]").is_always_utf8()); 2956c67d6573Sopenharmony_ci assert!(!t_bytes(r"(?-u)[^a][^a]").is_always_utf8()); 2957c67d6573Sopenharmony_ci assert!(!t_bytes(r"(?-u)\B").is_always_utf8()); 2958c67d6573Sopenharmony_ci } 2959c67d6573Sopenharmony_ci 2960c67d6573Sopenharmony_ci #[test] 2961c67d6573Sopenharmony_ci fn analysis_is_all_assertions() { 2962c67d6573Sopenharmony_ci // Positive examples. 2963c67d6573Sopenharmony_ci assert!(t(r"\b").is_all_assertions()); 2964c67d6573Sopenharmony_ci assert!(t(r"\B").is_all_assertions()); 2965c67d6573Sopenharmony_ci assert!(t(r"^").is_all_assertions()); 2966c67d6573Sopenharmony_ci assert!(t(r"$").is_all_assertions()); 2967c67d6573Sopenharmony_ci assert!(t(r"\A").is_all_assertions()); 2968c67d6573Sopenharmony_ci assert!(t(r"\z").is_all_assertions()); 2969c67d6573Sopenharmony_ci assert!(t(r"$^\z\A\b\B").is_all_assertions()); 2970c67d6573Sopenharmony_ci assert!(t(r"$|^|\z|\A|\b|\B").is_all_assertions()); 2971c67d6573Sopenharmony_ci assert!(t(r"^$|$^").is_all_assertions()); 2972c67d6573Sopenharmony_ci assert!(t(r"((\b)+())*^").is_all_assertions()); 2973c67d6573Sopenharmony_ci 2974c67d6573Sopenharmony_ci // Negative examples. 2975c67d6573Sopenharmony_ci assert!(!t(r"^a").is_all_assertions()); 2976c67d6573Sopenharmony_ci } 2977c67d6573Sopenharmony_ci 2978c67d6573Sopenharmony_ci #[test] 2979c67d6573Sopenharmony_ci fn analysis_is_anchored() { 2980c67d6573Sopenharmony_ci // Positive examples. 2981c67d6573Sopenharmony_ci assert!(t(r"^").is_anchored_start()); 2982c67d6573Sopenharmony_ci assert!(t(r"$").is_anchored_end()); 2983c67d6573Sopenharmony_ci assert!(t(r"^").is_line_anchored_start()); 2984c67d6573Sopenharmony_ci assert!(t(r"$").is_line_anchored_end()); 2985c67d6573Sopenharmony_ci 2986c67d6573Sopenharmony_ci assert!(t(r"^^").is_anchored_start()); 2987c67d6573Sopenharmony_ci assert!(t(r"$$").is_anchored_end()); 2988c67d6573Sopenharmony_ci assert!(t(r"^^").is_line_anchored_start()); 2989c67d6573Sopenharmony_ci assert!(t(r"$$").is_line_anchored_end()); 2990c67d6573Sopenharmony_ci 2991c67d6573Sopenharmony_ci assert!(t(r"^$").is_anchored_start()); 2992c67d6573Sopenharmony_ci assert!(t(r"^$").is_anchored_end()); 2993c67d6573Sopenharmony_ci assert!(t(r"^$").is_line_anchored_start()); 2994c67d6573Sopenharmony_ci assert!(t(r"^$").is_line_anchored_end()); 2995c67d6573Sopenharmony_ci 2996c67d6573Sopenharmony_ci assert!(t(r"^foo").is_anchored_start()); 2997c67d6573Sopenharmony_ci assert!(t(r"foo$").is_anchored_end()); 2998c67d6573Sopenharmony_ci assert!(t(r"^foo").is_line_anchored_start()); 2999c67d6573Sopenharmony_ci assert!(t(r"foo$").is_line_anchored_end()); 3000c67d6573Sopenharmony_ci 3001c67d6573Sopenharmony_ci assert!(t(r"^foo|^bar").is_anchored_start()); 3002c67d6573Sopenharmony_ci assert!(t(r"foo$|bar$").is_anchored_end()); 3003c67d6573Sopenharmony_ci assert!(t(r"^foo|^bar").is_line_anchored_start()); 3004c67d6573Sopenharmony_ci assert!(t(r"foo$|bar$").is_line_anchored_end()); 3005c67d6573Sopenharmony_ci 3006c67d6573Sopenharmony_ci assert!(t(r"^(foo|bar)").is_anchored_start()); 3007c67d6573Sopenharmony_ci assert!(t(r"(foo|bar)$").is_anchored_end()); 3008c67d6573Sopenharmony_ci assert!(t(r"^(foo|bar)").is_line_anchored_start()); 3009c67d6573Sopenharmony_ci assert!(t(r"(foo|bar)$").is_line_anchored_end()); 3010c67d6573Sopenharmony_ci 3011c67d6573Sopenharmony_ci assert!(t(r"^+").is_anchored_start()); 3012c67d6573Sopenharmony_ci assert!(t(r"$+").is_anchored_end()); 3013c67d6573Sopenharmony_ci assert!(t(r"^+").is_line_anchored_start()); 3014c67d6573Sopenharmony_ci assert!(t(r"$+").is_line_anchored_end()); 3015c67d6573Sopenharmony_ci assert!(t(r"^++").is_anchored_start()); 3016c67d6573Sopenharmony_ci assert!(t(r"$++").is_anchored_end()); 3017c67d6573Sopenharmony_ci assert!(t(r"^++").is_line_anchored_start()); 3018c67d6573Sopenharmony_ci assert!(t(r"$++").is_line_anchored_end()); 3019c67d6573Sopenharmony_ci assert!(t(r"(^)+").is_anchored_start()); 3020c67d6573Sopenharmony_ci assert!(t(r"($)+").is_anchored_end()); 3021c67d6573Sopenharmony_ci assert!(t(r"(^)+").is_line_anchored_start()); 3022c67d6573Sopenharmony_ci assert!(t(r"($)+").is_line_anchored_end()); 3023c67d6573Sopenharmony_ci 3024c67d6573Sopenharmony_ci assert!(t(r"$^").is_anchored_start()); 3025c67d6573Sopenharmony_ci assert!(t(r"$^").is_anchored_start()); 3026c67d6573Sopenharmony_ci assert!(t(r"$^").is_line_anchored_end()); 3027c67d6573Sopenharmony_ci assert!(t(r"$^").is_line_anchored_end()); 3028c67d6573Sopenharmony_ci assert!(t(r"$^|^$").is_anchored_start()); 3029c67d6573Sopenharmony_ci assert!(t(r"$^|^$").is_anchored_end()); 3030c67d6573Sopenharmony_ci assert!(t(r"$^|^$").is_line_anchored_start()); 3031c67d6573Sopenharmony_ci assert!(t(r"$^|^$").is_line_anchored_end()); 3032c67d6573Sopenharmony_ci 3033c67d6573Sopenharmony_ci assert!(t(r"\b^").is_anchored_start()); 3034c67d6573Sopenharmony_ci assert!(t(r"$\b").is_anchored_end()); 3035c67d6573Sopenharmony_ci assert!(t(r"\b^").is_line_anchored_start()); 3036c67d6573Sopenharmony_ci assert!(t(r"$\b").is_line_anchored_end()); 3037c67d6573Sopenharmony_ci assert!(t(r"^(?m:^)").is_anchored_start()); 3038c67d6573Sopenharmony_ci assert!(t(r"(?m:$)$").is_anchored_end()); 3039c67d6573Sopenharmony_ci assert!(t(r"^(?m:^)").is_line_anchored_start()); 3040c67d6573Sopenharmony_ci assert!(t(r"(?m:$)$").is_line_anchored_end()); 3041c67d6573Sopenharmony_ci assert!(t(r"(?m:^)^").is_anchored_start()); 3042c67d6573Sopenharmony_ci assert!(t(r"$(?m:$)").is_anchored_end()); 3043c67d6573Sopenharmony_ci assert!(t(r"(?m:^)^").is_line_anchored_start()); 3044c67d6573Sopenharmony_ci assert!(t(r"$(?m:$)").is_line_anchored_end()); 3045c67d6573Sopenharmony_ci 3046c67d6573Sopenharmony_ci // Negative examples. 3047c67d6573Sopenharmony_ci assert!(!t(r"(?m)^").is_anchored_start()); 3048c67d6573Sopenharmony_ci assert!(!t(r"(?m)$").is_anchored_end()); 3049c67d6573Sopenharmony_ci assert!(!t(r"(?m:^$)|$^").is_anchored_start()); 3050c67d6573Sopenharmony_ci assert!(!t(r"(?m:^$)|$^").is_anchored_end()); 3051c67d6573Sopenharmony_ci assert!(!t(r"$^|(?m:^$)").is_anchored_start()); 3052c67d6573Sopenharmony_ci assert!(!t(r"$^|(?m:^$)").is_anchored_end()); 3053c67d6573Sopenharmony_ci 3054c67d6573Sopenharmony_ci assert!(!t(r"a^").is_anchored_start()); 3055c67d6573Sopenharmony_ci assert!(!t(r"$a").is_anchored_start()); 3056c67d6573Sopenharmony_ci assert!(!t(r"a^").is_line_anchored_start()); 3057c67d6573Sopenharmony_ci assert!(!t(r"$a").is_line_anchored_start()); 3058c67d6573Sopenharmony_ci 3059c67d6573Sopenharmony_ci assert!(!t(r"a^").is_anchored_end()); 3060c67d6573Sopenharmony_ci assert!(!t(r"$a").is_anchored_end()); 3061c67d6573Sopenharmony_ci assert!(!t(r"a^").is_line_anchored_end()); 3062c67d6573Sopenharmony_ci assert!(!t(r"$a").is_line_anchored_end()); 3063c67d6573Sopenharmony_ci 3064c67d6573Sopenharmony_ci assert!(!t(r"^foo|bar").is_anchored_start()); 3065c67d6573Sopenharmony_ci assert!(!t(r"foo|bar$").is_anchored_end()); 3066c67d6573Sopenharmony_ci assert!(!t(r"^foo|bar").is_line_anchored_start()); 3067c67d6573Sopenharmony_ci assert!(!t(r"foo|bar$").is_line_anchored_end()); 3068c67d6573Sopenharmony_ci 3069c67d6573Sopenharmony_ci assert!(!t(r"^*").is_anchored_start()); 3070c67d6573Sopenharmony_ci assert!(!t(r"$*").is_anchored_end()); 3071c67d6573Sopenharmony_ci assert!(!t(r"^*").is_line_anchored_start()); 3072c67d6573Sopenharmony_ci assert!(!t(r"$*").is_line_anchored_end()); 3073c67d6573Sopenharmony_ci assert!(!t(r"^*+").is_anchored_start()); 3074c67d6573Sopenharmony_ci assert!(!t(r"$*+").is_anchored_end()); 3075c67d6573Sopenharmony_ci assert!(!t(r"^*+").is_line_anchored_start()); 3076c67d6573Sopenharmony_ci assert!(!t(r"$*+").is_line_anchored_end()); 3077c67d6573Sopenharmony_ci assert!(!t(r"^+*").is_anchored_start()); 3078c67d6573Sopenharmony_ci assert!(!t(r"$+*").is_anchored_end()); 3079c67d6573Sopenharmony_ci assert!(!t(r"^+*").is_line_anchored_start()); 3080c67d6573Sopenharmony_ci assert!(!t(r"$+*").is_line_anchored_end()); 3081c67d6573Sopenharmony_ci assert!(!t(r"(^)*").is_anchored_start()); 3082c67d6573Sopenharmony_ci assert!(!t(r"($)*").is_anchored_end()); 3083c67d6573Sopenharmony_ci assert!(!t(r"(^)*").is_line_anchored_start()); 3084c67d6573Sopenharmony_ci assert!(!t(r"($)*").is_line_anchored_end()); 3085c67d6573Sopenharmony_ci } 3086c67d6573Sopenharmony_ci 3087c67d6573Sopenharmony_ci #[test] 3088c67d6573Sopenharmony_ci fn analysis_is_line_anchored() { 3089c67d6573Sopenharmony_ci assert!(t(r"(?m)^(foo|bar)").is_line_anchored_start()); 3090c67d6573Sopenharmony_ci assert!(t(r"(?m)(foo|bar)$").is_line_anchored_end()); 3091c67d6573Sopenharmony_ci 3092c67d6573Sopenharmony_ci assert!(t(r"(?m)^foo|^bar").is_line_anchored_start()); 3093c67d6573Sopenharmony_ci assert!(t(r"(?m)foo$|bar$").is_line_anchored_end()); 3094c67d6573Sopenharmony_ci 3095c67d6573Sopenharmony_ci assert!(t(r"(?m)^").is_line_anchored_start()); 3096c67d6573Sopenharmony_ci assert!(t(r"(?m)$").is_line_anchored_end()); 3097c67d6573Sopenharmony_ci 3098c67d6573Sopenharmony_ci assert!(t(r"(?m:^$)|$^").is_line_anchored_start()); 3099c67d6573Sopenharmony_ci assert!(t(r"(?m:^$)|$^").is_line_anchored_end()); 3100c67d6573Sopenharmony_ci 3101c67d6573Sopenharmony_ci assert!(t(r"$^|(?m:^$)").is_line_anchored_start()); 3102c67d6573Sopenharmony_ci assert!(t(r"$^|(?m:^$)").is_line_anchored_end()); 3103c67d6573Sopenharmony_ci } 3104c67d6573Sopenharmony_ci 3105c67d6573Sopenharmony_ci #[test] 3106c67d6573Sopenharmony_ci fn analysis_is_any_anchored() { 3107c67d6573Sopenharmony_ci // Positive examples. 3108c67d6573Sopenharmony_ci assert!(t(r"^").is_any_anchored_start()); 3109c67d6573Sopenharmony_ci assert!(t(r"$").is_any_anchored_end()); 3110c67d6573Sopenharmony_ci assert!(t(r"\A").is_any_anchored_start()); 3111c67d6573Sopenharmony_ci assert!(t(r"\z").is_any_anchored_end()); 3112c67d6573Sopenharmony_ci 3113c67d6573Sopenharmony_ci // Negative examples. 3114c67d6573Sopenharmony_ci assert!(!t(r"(?m)^").is_any_anchored_start()); 3115c67d6573Sopenharmony_ci assert!(!t(r"(?m)$").is_any_anchored_end()); 3116c67d6573Sopenharmony_ci assert!(!t(r"$").is_any_anchored_start()); 3117c67d6573Sopenharmony_ci assert!(!t(r"^").is_any_anchored_end()); 3118c67d6573Sopenharmony_ci } 3119c67d6573Sopenharmony_ci 3120c67d6573Sopenharmony_ci #[test] 3121c67d6573Sopenharmony_ci fn analysis_is_match_empty() { 3122c67d6573Sopenharmony_ci // Positive examples. 3123c67d6573Sopenharmony_ci assert!(t(r"").is_match_empty()); 3124c67d6573Sopenharmony_ci assert!(t(r"()").is_match_empty()); 3125c67d6573Sopenharmony_ci assert!(t(r"()*").is_match_empty()); 3126c67d6573Sopenharmony_ci assert!(t(r"()+").is_match_empty()); 3127c67d6573Sopenharmony_ci assert!(t(r"()?").is_match_empty()); 3128c67d6573Sopenharmony_ci assert!(t(r"a*").is_match_empty()); 3129c67d6573Sopenharmony_ci assert!(t(r"a?").is_match_empty()); 3130c67d6573Sopenharmony_ci assert!(t(r"a{0}").is_match_empty()); 3131c67d6573Sopenharmony_ci assert!(t(r"a{0,}").is_match_empty()); 3132c67d6573Sopenharmony_ci assert!(t(r"a{0,1}").is_match_empty()); 3133c67d6573Sopenharmony_ci assert!(t(r"a{0,10}").is_match_empty()); 3134c67d6573Sopenharmony_ci #[cfg(feature = "unicode-gencat")] 3135c67d6573Sopenharmony_ci assert!(t(r"\pL*").is_match_empty()); 3136c67d6573Sopenharmony_ci assert!(t(r"a*|b").is_match_empty()); 3137c67d6573Sopenharmony_ci assert!(t(r"b|a*").is_match_empty()); 3138c67d6573Sopenharmony_ci assert!(t(r"a|").is_match_empty()); 3139c67d6573Sopenharmony_ci assert!(t(r"|a").is_match_empty()); 3140c67d6573Sopenharmony_ci assert!(t(r"a||b").is_match_empty()); 3141c67d6573Sopenharmony_ci assert!(t(r"a*a?(abcd)*").is_match_empty()); 3142c67d6573Sopenharmony_ci assert!(t(r"^").is_match_empty()); 3143c67d6573Sopenharmony_ci assert!(t(r"$").is_match_empty()); 3144c67d6573Sopenharmony_ci assert!(t(r"(?m)^").is_match_empty()); 3145c67d6573Sopenharmony_ci assert!(t(r"(?m)$").is_match_empty()); 3146c67d6573Sopenharmony_ci assert!(t(r"\A").is_match_empty()); 3147c67d6573Sopenharmony_ci assert!(t(r"\z").is_match_empty()); 3148c67d6573Sopenharmony_ci assert!(t(r"\B").is_match_empty()); 3149c67d6573Sopenharmony_ci assert!(t_bytes(r"(?-u)\B").is_match_empty()); 3150c67d6573Sopenharmony_ci assert!(t(r"\b").is_match_empty()); 3151c67d6573Sopenharmony_ci assert!(t(r"(?-u)\b").is_match_empty()); 3152c67d6573Sopenharmony_ci 3153c67d6573Sopenharmony_ci // Negative examples. 3154c67d6573Sopenharmony_ci assert!(!t(r"a+").is_match_empty()); 3155c67d6573Sopenharmony_ci assert!(!t(r"a{1}").is_match_empty()); 3156c67d6573Sopenharmony_ci assert!(!t(r"a{1,}").is_match_empty()); 3157c67d6573Sopenharmony_ci assert!(!t(r"a{1,2}").is_match_empty()); 3158c67d6573Sopenharmony_ci assert!(!t(r"a{1,10}").is_match_empty()); 3159c67d6573Sopenharmony_ci assert!(!t(r"b|a").is_match_empty()); 3160c67d6573Sopenharmony_ci assert!(!t(r"a*a+(abcd)*").is_match_empty()); 3161c67d6573Sopenharmony_ci } 3162c67d6573Sopenharmony_ci 3163c67d6573Sopenharmony_ci #[test] 3164c67d6573Sopenharmony_ci fn analysis_is_literal() { 3165c67d6573Sopenharmony_ci // Positive examples. 3166c67d6573Sopenharmony_ci assert!(t(r"a").is_literal()); 3167c67d6573Sopenharmony_ci assert!(t(r"ab").is_literal()); 3168c67d6573Sopenharmony_ci assert!(t(r"abc").is_literal()); 3169c67d6573Sopenharmony_ci assert!(t(r"(?m)abc").is_literal()); 3170c67d6573Sopenharmony_ci 3171c67d6573Sopenharmony_ci // Negative examples. 3172c67d6573Sopenharmony_ci assert!(!t(r"").is_literal()); 3173c67d6573Sopenharmony_ci assert!(!t(r"^").is_literal()); 3174c67d6573Sopenharmony_ci assert!(!t(r"a|b").is_literal()); 3175c67d6573Sopenharmony_ci assert!(!t(r"(a)").is_literal()); 3176c67d6573Sopenharmony_ci assert!(!t(r"a+").is_literal()); 3177c67d6573Sopenharmony_ci assert!(!t(r"foo(a)").is_literal()); 3178c67d6573Sopenharmony_ci assert!(!t(r"(a)foo").is_literal()); 3179c67d6573Sopenharmony_ci assert!(!t(r"[a]").is_literal()); 3180c67d6573Sopenharmony_ci } 3181c67d6573Sopenharmony_ci 3182c67d6573Sopenharmony_ci #[test] 3183c67d6573Sopenharmony_ci fn analysis_is_alternation_literal() { 3184c67d6573Sopenharmony_ci // Positive examples. 3185c67d6573Sopenharmony_ci assert!(t(r"a").is_alternation_literal()); 3186c67d6573Sopenharmony_ci assert!(t(r"ab").is_alternation_literal()); 3187c67d6573Sopenharmony_ci assert!(t(r"abc").is_alternation_literal()); 3188c67d6573Sopenharmony_ci assert!(t(r"(?m)abc").is_alternation_literal()); 3189c67d6573Sopenharmony_ci assert!(t(r"a|b").is_alternation_literal()); 3190c67d6573Sopenharmony_ci assert!(t(r"a|b|c").is_alternation_literal()); 3191c67d6573Sopenharmony_ci assert!(t(r"foo|bar").is_alternation_literal()); 3192c67d6573Sopenharmony_ci assert!(t(r"foo|bar|baz").is_alternation_literal()); 3193c67d6573Sopenharmony_ci 3194c67d6573Sopenharmony_ci // Negative examples. 3195c67d6573Sopenharmony_ci assert!(!t(r"").is_alternation_literal()); 3196c67d6573Sopenharmony_ci assert!(!t(r"^").is_alternation_literal()); 3197c67d6573Sopenharmony_ci assert!(!t(r"(a)").is_alternation_literal()); 3198c67d6573Sopenharmony_ci assert!(!t(r"a+").is_alternation_literal()); 3199c67d6573Sopenharmony_ci assert!(!t(r"foo(a)").is_alternation_literal()); 3200c67d6573Sopenharmony_ci assert!(!t(r"(a)foo").is_alternation_literal()); 3201c67d6573Sopenharmony_ci assert!(!t(r"[a]").is_alternation_literal()); 3202c67d6573Sopenharmony_ci assert!(!t(r"[a]|b").is_alternation_literal()); 3203c67d6573Sopenharmony_ci assert!(!t(r"a|[b]").is_alternation_literal()); 3204c67d6573Sopenharmony_ci assert!(!t(r"(a)|b").is_alternation_literal()); 3205c67d6573Sopenharmony_ci assert!(!t(r"a|(b)").is_alternation_literal()); 3206c67d6573Sopenharmony_ci } 3207c67d6573Sopenharmony_ci} 3208