1/*!
2Defines a translator that converts an `Ast` to an `Hir`.
3*/
4
5use std::cell::{Cell, RefCell};
6use std::result;
7
8use crate::ast::{self, Ast, Span, Visitor};
9use crate::hir::{self, Error, ErrorKind, Hir};
10use crate::unicode::{self, ClassQuery};
11
12type Result<T> = result::Result<T, Error>;
13
14/// A builder for constructing an AST->HIR translator.
15#[derive(Clone, Debug)]
16pub struct TranslatorBuilder {
17    allow_invalid_utf8: bool,
18    flags: Flags,
19}
20
21impl Default for TranslatorBuilder {
22    fn default() -> TranslatorBuilder {
23        TranslatorBuilder::new()
24    }
25}
26
27impl TranslatorBuilder {
28    /// Create a new translator builder with a default c onfiguration.
29    pub fn new() -> TranslatorBuilder {
30        TranslatorBuilder {
31            allow_invalid_utf8: false,
32            flags: Flags::default(),
33        }
34    }
35
36    /// Build a translator using the current configuration.
37    pub fn build(&self) -> Translator {
38        Translator {
39            stack: RefCell::new(vec![]),
40            flags: Cell::new(self.flags),
41            allow_invalid_utf8: self.allow_invalid_utf8,
42        }
43    }
44
45    /// When enabled, translation will permit the construction of a regular
46    /// expression that may match invalid UTF-8.
47    ///
48    /// When disabled (the default), the translator is guaranteed to produce
49    /// an expression that will only ever match valid UTF-8 (otherwise, the
50    /// translator will return an error).
51    ///
52    /// Perhaps surprisingly, when invalid UTF-8 isn't allowed, a negated ASCII
53    /// word boundary (uttered as `(?-u:\B)` in the concrete syntax) will cause
54    /// the parser to return an error. Namely, a negated ASCII word boundary
55    /// can result in matching positions that aren't valid UTF-8 boundaries.
56    pub fn allow_invalid_utf8(&mut self, yes: bool) -> &mut TranslatorBuilder {
57        self.allow_invalid_utf8 = yes;
58        self
59    }
60
61    /// Enable or disable the case insensitive flag (`i`) by default.
62    pub fn case_insensitive(&mut self, yes: bool) -> &mut TranslatorBuilder {
63        self.flags.case_insensitive = if yes { Some(true) } else { None };
64        self
65    }
66
67    /// Enable or disable the multi-line matching flag (`m`) by default.
68    pub fn multi_line(&mut self, yes: bool) -> &mut TranslatorBuilder {
69        self.flags.multi_line = if yes { Some(true) } else { None };
70        self
71    }
72
73    /// Enable or disable the "dot matches any character" flag (`s`) by
74    /// default.
75    pub fn dot_matches_new_line(
76        &mut self,
77        yes: bool,
78    ) -> &mut TranslatorBuilder {
79        self.flags.dot_matches_new_line = if yes { Some(true) } else { None };
80        self
81    }
82
83    /// Enable or disable the "swap greed" flag (`U`) by default.
84    pub fn swap_greed(&mut self, yes: bool) -> &mut TranslatorBuilder {
85        self.flags.swap_greed = if yes { Some(true) } else { None };
86        self
87    }
88
89    /// Enable or disable the Unicode flag (`u`) by default.
90    pub fn unicode(&mut self, yes: bool) -> &mut TranslatorBuilder {
91        self.flags.unicode = if yes { None } else { Some(false) };
92        self
93    }
94}
95
96/// A translator maps abstract syntax to a high level intermediate
97/// representation.
98///
99/// A translator may be benefit from reuse. That is, a translator can translate
100/// many abstract syntax trees.
101///
102/// A `Translator` can be configured in more detail via a
103/// [`TranslatorBuilder`](struct.TranslatorBuilder.html).
104#[derive(Clone, Debug)]
105pub struct Translator {
106    /// Our call stack, but on the heap.
107    stack: RefCell<Vec<HirFrame>>,
108    /// The current flag settings.
109    flags: Cell<Flags>,
110    /// Whether we're allowed to produce HIR that can match arbitrary bytes.
111    allow_invalid_utf8: bool,
112}
113
114impl Translator {
115    /// Create a new translator using the default configuration.
116    pub fn new() -> Translator {
117        TranslatorBuilder::new().build()
118    }
119
120    /// Translate the given abstract syntax tree (AST) into a high level
121    /// intermediate representation (HIR).
122    ///
123    /// If there was a problem doing the translation, then an HIR-specific
124    /// error is returned.
125    ///
126    /// The original pattern string used to produce the `Ast` *must* also be
127    /// provided. The translator does not use the pattern string during any
128    /// correct translation, but is used for error reporting.
129    pub fn translate(&mut self, pattern: &str, ast: &Ast) -> Result<Hir> {
130        ast::visit(ast, TranslatorI::new(self, pattern))
131    }
132}
133
134/// An HirFrame is a single stack frame, represented explicitly, which is
135/// created for each item in the Ast that we traverse.
136///
137/// Note that technically, this type doesn't represent our entire stack
138/// frame. In particular, the Ast visitor represents any state associated with
139/// traversing the Ast itself.
140#[derive(Clone, Debug)]
141enum HirFrame {
142    /// An arbitrary HIR expression. These get pushed whenever we hit a base
143    /// case in the Ast. They get popped after an inductive (i.e., recursive)
144    /// step is complete.
145    Expr(Hir),
146    /// A Unicode character class. This frame is mutated as we descend into
147    /// the Ast of a character class (which is itself its own mini recursive
148    /// structure).
149    ClassUnicode(hir::ClassUnicode),
150    /// A byte-oriented character class. This frame is mutated as we descend
151    /// into the Ast of a character class (which is itself its own mini
152    /// recursive structure).
153    ///
154    /// Byte character classes are created when Unicode mode (`u`) is disabled.
155    /// If `allow_invalid_utf8` is disabled (the default), then a byte
156    /// character is only permitted to match ASCII text.
157    ClassBytes(hir::ClassBytes),
158    /// This is pushed on to the stack upon first seeing any kind of group,
159    /// indicated by parentheses (including non-capturing groups). It is popped
160    /// upon leaving a group.
161    Group {
162        /// The old active flags when this group was opened.
163        ///
164        /// If this group sets flags, then the new active flags are set to the
165        /// result of merging the old flags with the flags introduced by this
166        /// group. If the group doesn't set any flags, then this is simply
167        /// equivalent to whatever flags were set when the group was opened.
168        ///
169        /// When this group is popped, the active flags should be restored to
170        /// the flags set here.
171        ///
172        /// The "active" flags correspond to whatever flags are set in the
173        /// Translator.
174        old_flags: Flags,
175    },
176    /// This is pushed whenever a concatenation is observed. After visiting
177    /// every sub-expression in the concatenation, the translator's stack is
178    /// popped until it sees a Concat frame.
179    Concat,
180    /// This is pushed whenever an alternation is observed. After visiting
181    /// every sub-expression in the alternation, the translator's stack is
182    /// popped until it sees an Alternation frame.
183    Alternation,
184}
185
186impl HirFrame {
187    /// Assert that the current stack frame is an Hir expression and return it.
188    fn unwrap_expr(self) -> Hir {
189        match self {
190            HirFrame::Expr(expr) => expr,
191            _ => panic!("tried to unwrap expr from HirFrame, got: {:?}", self),
192        }
193    }
194
195    /// Assert that the current stack frame is a Unicode class expression and
196    /// return it.
197    fn unwrap_class_unicode(self) -> hir::ClassUnicode {
198        match self {
199            HirFrame::ClassUnicode(cls) => cls,
200            _ => panic!(
201                "tried to unwrap Unicode class \
202                 from HirFrame, got: {:?}",
203                self
204            ),
205        }
206    }
207
208    /// Assert that the current stack frame is a byte class expression and
209    /// return it.
210    fn unwrap_class_bytes(self) -> hir::ClassBytes {
211        match self {
212            HirFrame::ClassBytes(cls) => cls,
213            _ => panic!(
214                "tried to unwrap byte class \
215                 from HirFrame, got: {:?}",
216                self
217            ),
218        }
219    }
220
221    /// Assert that the current stack frame is a group indicator and return
222    /// its corresponding flags (the flags that were active at the time the
223    /// group was entered).
224    fn unwrap_group(self) -> Flags {
225        match self {
226            HirFrame::Group { old_flags } => old_flags,
227            _ => {
228                panic!("tried to unwrap group from HirFrame, got: {:?}", self)
229            }
230        }
231    }
232}
233
234impl<'t, 'p> Visitor for TranslatorI<'t, 'p> {
235    type Output = Hir;
236    type Err = Error;
237
238    fn finish(self) -> Result<Hir> {
239        // ... otherwise, we should have exactly one HIR on the stack.
240        assert_eq!(self.trans().stack.borrow().len(), 1);
241        Ok(self.pop().unwrap().unwrap_expr())
242    }
243
244    fn visit_pre(&mut self, ast: &Ast) -> Result<()> {
245        match *ast {
246            Ast::Class(ast::Class::Bracketed(_)) => {
247                if self.flags().unicode() {
248                    let cls = hir::ClassUnicode::empty();
249                    self.push(HirFrame::ClassUnicode(cls));
250                } else {
251                    let cls = hir::ClassBytes::empty();
252                    self.push(HirFrame::ClassBytes(cls));
253                }
254            }
255            Ast::Group(ref x) => {
256                let old_flags = x
257                    .flags()
258                    .map(|ast| self.set_flags(ast))
259                    .unwrap_or_else(|| self.flags());
260                self.push(HirFrame::Group { old_flags });
261            }
262            Ast::Concat(ref x) if x.asts.is_empty() => {}
263            Ast::Concat(_) => {
264                self.push(HirFrame::Concat);
265            }
266            Ast::Alternation(ref x) if x.asts.is_empty() => {}
267            Ast::Alternation(_) => {
268                self.push(HirFrame::Alternation);
269            }
270            _ => {}
271        }
272        Ok(())
273    }
274
275    fn visit_post(&mut self, ast: &Ast) -> Result<()> {
276        match *ast {
277            Ast::Empty(_) => {
278                self.push(HirFrame::Expr(Hir::empty()));
279            }
280            Ast::Flags(ref x) => {
281                self.set_flags(&x.flags);
282                // Flags in the AST are generally considered directives and
283                // not actual sub-expressions. However, they can be used in
284                // the concrete syntax like `((?i))`, and we need some kind of
285                // indication of an expression there, and Empty is the correct
286                // choice.
287                //
288                // There can also be things like `(?i)+`, but we rule those out
289                // in the parser. In the future, we might allow them for
290                // consistency sake.
291                self.push(HirFrame::Expr(Hir::empty()));
292            }
293            Ast::Literal(ref x) => {
294                self.push(HirFrame::Expr(self.hir_literal(x)?));
295            }
296            Ast::Dot(span) => {
297                self.push(HirFrame::Expr(self.hir_dot(span)?));
298            }
299            Ast::Assertion(ref x) => {
300                self.push(HirFrame::Expr(self.hir_assertion(x)?));
301            }
302            Ast::Class(ast::Class::Perl(ref x)) => {
303                if self.flags().unicode() {
304                    let cls = self.hir_perl_unicode_class(x)?;
305                    let hcls = hir::Class::Unicode(cls);
306                    self.push(HirFrame::Expr(Hir::class(hcls)));
307                } else {
308                    let cls = self.hir_perl_byte_class(x);
309                    let hcls = hir::Class::Bytes(cls);
310                    self.push(HirFrame::Expr(Hir::class(hcls)));
311                }
312            }
313            Ast::Class(ast::Class::Unicode(ref x)) => {
314                let cls = hir::Class::Unicode(self.hir_unicode_class(x)?);
315                self.push(HirFrame::Expr(Hir::class(cls)));
316            }
317            Ast::Class(ast::Class::Bracketed(ref ast)) => {
318                if self.flags().unicode() {
319                    let mut cls = self.pop().unwrap().unwrap_class_unicode();
320                    self.unicode_fold_and_negate(
321                        &ast.span,
322                        ast.negated,
323                        &mut cls,
324                    )?;
325                    if cls.ranges().is_empty() {
326                        return Err(self.error(
327                            ast.span,
328                            ErrorKind::EmptyClassNotAllowed,
329                        ));
330                    }
331                    let expr = Hir::class(hir::Class::Unicode(cls));
332                    self.push(HirFrame::Expr(expr));
333                } else {
334                    let mut cls = self.pop().unwrap().unwrap_class_bytes();
335                    self.bytes_fold_and_negate(
336                        &ast.span,
337                        ast.negated,
338                        &mut cls,
339                    )?;
340                    if cls.ranges().is_empty() {
341                        return Err(self.error(
342                            ast.span,
343                            ErrorKind::EmptyClassNotAllowed,
344                        ));
345                    }
346
347                    let expr = Hir::class(hir::Class::Bytes(cls));
348                    self.push(HirFrame::Expr(expr));
349                }
350            }
351            Ast::Repetition(ref x) => {
352                let expr = self.pop().unwrap().unwrap_expr();
353                self.push(HirFrame::Expr(self.hir_repetition(x, expr)));
354            }
355            Ast::Group(ref x) => {
356                let expr = self.pop().unwrap().unwrap_expr();
357                let old_flags = self.pop().unwrap().unwrap_group();
358                self.trans().flags.set(old_flags);
359                self.push(HirFrame::Expr(self.hir_group(x, expr)));
360            }
361            Ast::Concat(_) => {
362                let mut exprs = vec![];
363                while let Some(HirFrame::Expr(expr)) = self.pop() {
364                    if !expr.kind().is_empty() {
365                        exprs.push(expr);
366                    }
367                }
368                exprs.reverse();
369                self.push(HirFrame::Expr(Hir::concat(exprs)));
370            }
371            Ast::Alternation(_) => {
372                let mut exprs = vec![];
373                while let Some(HirFrame::Expr(expr)) = self.pop() {
374                    exprs.push(expr);
375                }
376                exprs.reverse();
377                self.push(HirFrame::Expr(Hir::alternation(exprs)));
378            }
379        }
380        Ok(())
381    }
382
383    fn visit_class_set_item_pre(
384        &mut self,
385        ast: &ast::ClassSetItem,
386    ) -> Result<()> {
387        match *ast {
388            ast::ClassSetItem::Bracketed(_) => {
389                if self.flags().unicode() {
390                    let cls = hir::ClassUnicode::empty();
391                    self.push(HirFrame::ClassUnicode(cls));
392                } else {
393                    let cls = hir::ClassBytes::empty();
394                    self.push(HirFrame::ClassBytes(cls));
395                }
396            }
397            // We needn't handle the Union case here since the visitor will
398            // do it for us.
399            _ => {}
400        }
401        Ok(())
402    }
403
404    fn visit_class_set_item_post(
405        &mut self,
406        ast: &ast::ClassSetItem,
407    ) -> Result<()> {
408        match *ast {
409            ast::ClassSetItem::Empty(_) => {}
410            ast::ClassSetItem::Literal(ref x) => {
411                if self.flags().unicode() {
412                    let mut cls = self.pop().unwrap().unwrap_class_unicode();
413                    cls.push(hir::ClassUnicodeRange::new(x.c, x.c));
414                    self.push(HirFrame::ClassUnicode(cls));
415                } else {
416                    let mut cls = self.pop().unwrap().unwrap_class_bytes();
417                    let byte = self.class_literal_byte(x)?;
418                    cls.push(hir::ClassBytesRange::new(byte, byte));
419                    self.push(HirFrame::ClassBytes(cls));
420                }
421            }
422            ast::ClassSetItem::Range(ref x) => {
423                if self.flags().unicode() {
424                    let mut cls = self.pop().unwrap().unwrap_class_unicode();
425                    cls.push(hir::ClassUnicodeRange::new(x.start.c, x.end.c));
426                    self.push(HirFrame::ClassUnicode(cls));
427                } else {
428                    let mut cls = self.pop().unwrap().unwrap_class_bytes();
429                    let start = self.class_literal_byte(&x.start)?;
430                    let end = self.class_literal_byte(&x.end)?;
431                    cls.push(hir::ClassBytesRange::new(start, end));
432                    self.push(HirFrame::ClassBytes(cls));
433                }
434            }
435            ast::ClassSetItem::Ascii(ref x) => {
436                if self.flags().unicode() {
437                    let xcls = self.hir_ascii_unicode_class(x)?;
438                    let mut cls = self.pop().unwrap().unwrap_class_unicode();
439                    cls.union(&xcls);
440                    self.push(HirFrame::ClassUnicode(cls));
441                } else {
442                    let xcls = self.hir_ascii_byte_class(x)?;
443                    let mut cls = self.pop().unwrap().unwrap_class_bytes();
444                    cls.union(&xcls);
445                    self.push(HirFrame::ClassBytes(cls));
446                }
447            }
448            ast::ClassSetItem::Unicode(ref x) => {
449                let xcls = self.hir_unicode_class(x)?;
450                let mut cls = self.pop().unwrap().unwrap_class_unicode();
451                cls.union(&xcls);
452                self.push(HirFrame::ClassUnicode(cls));
453            }
454            ast::ClassSetItem::Perl(ref x) => {
455                if self.flags().unicode() {
456                    let xcls = self.hir_perl_unicode_class(x)?;
457                    let mut cls = self.pop().unwrap().unwrap_class_unicode();
458                    cls.union(&xcls);
459                    self.push(HirFrame::ClassUnicode(cls));
460                } else {
461                    let xcls = self.hir_perl_byte_class(x);
462                    let mut cls = self.pop().unwrap().unwrap_class_bytes();
463                    cls.union(&xcls);
464                    self.push(HirFrame::ClassBytes(cls));
465                }
466            }
467            ast::ClassSetItem::Bracketed(ref ast) => {
468                if self.flags().unicode() {
469                    let mut cls1 = self.pop().unwrap().unwrap_class_unicode();
470                    self.unicode_fold_and_negate(
471                        &ast.span,
472                        ast.negated,
473                        &mut cls1,
474                    )?;
475
476                    let mut cls2 = self.pop().unwrap().unwrap_class_unicode();
477                    cls2.union(&cls1);
478                    self.push(HirFrame::ClassUnicode(cls2));
479                } else {
480                    let mut cls1 = self.pop().unwrap().unwrap_class_bytes();
481                    self.bytes_fold_and_negate(
482                        &ast.span,
483                        ast.negated,
484                        &mut cls1,
485                    )?;
486
487                    let mut cls2 = self.pop().unwrap().unwrap_class_bytes();
488                    cls2.union(&cls1);
489                    self.push(HirFrame::ClassBytes(cls2));
490                }
491            }
492            // This is handled automatically by the visitor.
493            ast::ClassSetItem::Union(_) => {}
494        }
495        Ok(())
496    }
497
498    fn visit_class_set_binary_op_pre(
499        &mut self,
500        _op: &ast::ClassSetBinaryOp,
501    ) -> Result<()> {
502        if self.flags().unicode() {
503            let cls = hir::ClassUnicode::empty();
504            self.push(HirFrame::ClassUnicode(cls));
505        } else {
506            let cls = hir::ClassBytes::empty();
507            self.push(HirFrame::ClassBytes(cls));
508        }
509        Ok(())
510    }
511
512    fn visit_class_set_binary_op_in(
513        &mut self,
514        _op: &ast::ClassSetBinaryOp,
515    ) -> Result<()> {
516        if self.flags().unicode() {
517            let cls = hir::ClassUnicode::empty();
518            self.push(HirFrame::ClassUnicode(cls));
519        } else {
520            let cls = hir::ClassBytes::empty();
521            self.push(HirFrame::ClassBytes(cls));
522        }
523        Ok(())
524    }
525
526    fn visit_class_set_binary_op_post(
527        &mut self,
528        op: &ast::ClassSetBinaryOp,
529    ) -> Result<()> {
530        use crate::ast::ClassSetBinaryOpKind::*;
531
532        if self.flags().unicode() {
533            let mut rhs = self.pop().unwrap().unwrap_class_unicode();
534            let mut lhs = self.pop().unwrap().unwrap_class_unicode();
535            let mut cls = self.pop().unwrap().unwrap_class_unicode();
536            if self.flags().case_insensitive() {
537                rhs.try_case_fold_simple().map_err(|_| {
538                    self.error(
539                        op.rhs.span().clone(),
540                        ErrorKind::UnicodeCaseUnavailable,
541                    )
542                })?;
543                lhs.try_case_fold_simple().map_err(|_| {
544                    self.error(
545                        op.lhs.span().clone(),
546                        ErrorKind::UnicodeCaseUnavailable,
547                    )
548                })?;
549            }
550            match op.kind {
551                Intersection => lhs.intersect(&rhs),
552                Difference => lhs.difference(&rhs),
553                SymmetricDifference => lhs.symmetric_difference(&rhs),
554            }
555            cls.union(&lhs);
556            self.push(HirFrame::ClassUnicode(cls));
557        } else {
558            let mut rhs = self.pop().unwrap().unwrap_class_bytes();
559            let mut lhs = self.pop().unwrap().unwrap_class_bytes();
560            let mut cls = self.pop().unwrap().unwrap_class_bytes();
561            if self.flags().case_insensitive() {
562                rhs.case_fold_simple();
563                lhs.case_fold_simple();
564            }
565            match op.kind {
566                Intersection => lhs.intersect(&rhs),
567                Difference => lhs.difference(&rhs),
568                SymmetricDifference => lhs.symmetric_difference(&rhs),
569            }
570            cls.union(&lhs);
571            self.push(HirFrame::ClassBytes(cls));
572        }
573        Ok(())
574    }
575}
576
577/// The internal implementation of a translator.
578///
579/// This type is responsible for carrying around the original pattern string,
580/// which is not tied to the internal state of a translator.
581///
582/// A TranslatorI exists for the time it takes to translate a single Ast.
583#[derive(Clone, Debug)]
584struct TranslatorI<'t, 'p> {
585    trans: &'t Translator,
586    pattern: &'p str,
587}
588
589impl<'t, 'p> TranslatorI<'t, 'p> {
590    /// Build a new internal translator.
591    fn new(trans: &'t Translator, pattern: &'p str) -> TranslatorI<'t, 'p> {
592        TranslatorI { trans, pattern }
593    }
594
595    /// Return a reference to the underlying translator.
596    fn trans(&self) -> &Translator {
597        &self.trans
598    }
599
600    /// Push the given frame on to the call stack.
601    fn push(&self, frame: HirFrame) {
602        self.trans().stack.borrow_mut().push(frame);
603    }
604
605    /// Pop the top of the call stack. If the call stack is empty, return None.
606    fn pop(&self) -> Option<HirFrame> {
607        self.trans().stack.borrow_mut().pop()
608    }
609
610    /// Create a new error with the given span and error type.
611    fn error(&self, span: Span, kind: ErrorKind) -> Error {
612        Error { kind, pattern: self.pattern.to_string(), span }
613    }
614
615    /// Return a copy of the active flags.
616    fn flags(&self) -> Flags {
617        self.trans().flags.get()
618    }
619
620    /// Set the flags of this translator from the flags set in the given AST.
621    /// Then, return the old flags.
622    fn set_flags(&self, ast_flags: &ast::Flags) -> Flags {
623        let old_flags = self.flags();
624        let mut new_flags = Flags::from_ast(ast_flags);
625        new_flags.merge(&old_flags);
626        self.trans().flags.set(new_flags);
627        old_flags
628    }
629
630    fn hir_literal(&self, lit: &ast::Literal) -> Result<Hir> {
631        let ch = match self.literal_to_char(lit)? {
632            byte @ hir::Literal::Byte(_) => return Ok(Hir::literal(byte)),
633            hir::Literal::Unicode(ch) => ch,
634        };
635        if self.flags().case_insensitive() {
636            self.hir_from_char_case_insensitive(lit.span, ch)
637        } else {
638            self.hir_from_char(lit.span, ch)
639        }
640    }
641
642    /// Convert an Ast literal to its scalar representation.
643    ///
644    /// When Unicode mode is enabled, then this always succeeds and returns a
645    /// `char` (Unicode scalar value).
646    ///
647    /// When Unicode mode is disabled, then a raw byte is returned. If that
648    /// byte is not ASCII and invalid UTF-8 is not allowed, then this returns
649    /// an error.
650    fn literal_to_char(&self, lit: &ast::Literal) -> Result<hir::Literal> {
651        if self.flags().unicode() {
652            return Ok(hir::Literal::Unicode(lit.c));
653        }
654        let byte = match lit.byte() {
655            None => return Ok(hir::Literal::Unicode(lit.c)),
656            Some(byte) => byte,
657        };
658        if byte <= 0x7F {
659            return Ok(hir::Literal::Unicode(byte as char));
660        }
661        if !self.trans().allow_invalid_utf8 {
662            return Err(self.error(lit.span, ErrorKind::InvalidUtf8));
663        }
664        Ok(hir::Literal::Byte(byte))
665    }
666
667    fn hir_from_char(&self, span: Span, c: char) -> Result<Hir> {
668        if !self.flags().unicode() && c.len_utf8() > 1 {
669            return Err(self.error(span, ErrorKind::UnicodeNotAllowed));
670        }
671        Ok(Hir::literal(hir::Literal::Unicode(c)))
672    }
673
674    fn hir_from_char_case_insensitive(
675        &self,
676        span: Span,
677        c: char,
678    ) -> Result<Hir> {
679        if self.flags().unicode() {
680            // If case folding won't do anything, then don't bother trying.
681            let map =
682                unicode::contains_simple_case_mapping(c, c).map_err(|_| {
683                    self.error(span, ErrorKind::UnicodeCaseUnavailable)
684                })?;
685            if !map {
686                return self.hir_from_char(span, c);
687            }
688            let mut cls =
689                hir::ClassUnicode::new(vec![hir::ClassUnicodeRange::new(
690                    c, c,
691                )]);
692            cls.try_case_fold_simple().map_err(|_| {
693                self.error(span, ErrorKind::UnicodeCaseUnavailable)
694            })?;
695            Ok(Hir::class(hir::Class::Unicode(cls)))
696        } else {
697            if c.len_utf8() > 1 {
698                return Err(self.error(span, ErrorKind::UnicodeNotAllowed));
699            }
700            // If case folding won't do anything, then don't bother trying.
701            match c {
702                'A'..='Z' | 'a'..='z' => {}
703                _ => return self.hir_from_char(span, c),
704            }
705            let mut cls =
706                hir::ClassBytes::new(vec![hir::ClassBytesRange::new(
707                    c as u8, c as u8,
708                )]);
709            cls.case_fold_simple();
710            Ok(Hir::class(hir::Class::Bytes(cls)))
711        }
712    }
713
714    fn hir_dot(&self, span: Span) -> Result<Hir> {
715        let unicode = self.flags().unicode();
716        if !unicode && !self.trans().allow_invalid_utf8 {
717            return Err(self.error(span, ErrorKind::InvalidUtf8));
718        }
719        Ok(if self.flags().dot_matches_new_line() {
720            Hir::any(!unicode)
721        } else {
722            Hir::dot(!unicode)
723        })
724    }
725
726    fn hir_assertion(&self, asst: &ast::Assertion) -> Result<Hir> {
727        let unicode = self.flags().unicode();
728        let multi_line = self.flags().multi_line();
729        Ok(match asst.kind {
730            ast::AssertionKind::StartLine => Hir::anchor(if multi_line {
731                hir::Anchor::StartLine
732            } else {
733                hir::Anchor::StartText
734            }),
735            ast::AssertionKind::EndLine => Hir::anchor(if multi_line {
736                hir::Anchor::EndLine
737            } else {
738                hir::Anchor::EndText
739            }),
740            ast::AssertionKind::StartText => {
741                Hir::anchor(hir::Anchor::StartText)
742            }
743            ast::AssertionKind::EndText => Hir::anchor(hir::Anchor::EndText),
744            ast::AssertionKind::WordBoundary => {
745                Hir::word_boundary(if unicode {
746                    hir::WordBoundary::Unicode
747                } else {
748                    hir::WordBoundary::Ascii
749                })
750            }
751            ast::AssertionKind::NotWordBoundary => {
752                Hir::word_boundary(if unicode {
753                    hir::WordBoundary::UnicodeNegate
754                } else {
755                    // It is possible for negated ASCII word boundaries to
756                    // match at invalid UTF-8 boundaries, even when searching
757                    // valid UTF-8.
758                    if !self.trans().allow_invalid_utf8 {
759                        return Err(
760                            self.error(asst.span, ErrorKind::InvalidUtf8)
761                        );
762                    }
763                    hir::WordBoundary::AsciiNegate
764                })
765            }
766        })
767    }
768
769    fn hir_group(&self, group: &ast::Group, expr: Hir) -> Hir {
770        let kind = match group.kind {
771            ast::GroupKind::CaptureIndex(idx) => {
772                hir::GroupKind::CaptureIndex(idx)
773            }
774            ast::GroupKind::CaptureName(ref capname) => {
775                hir::GroupKind::CaptureName {
776                    name: capname.name.clone(),
777                    index: capname.index,
778                }
779            }
780            ast::GroupKind::NonCapturing(_) => hir::GroupKind::NonCapturing,
781        };
782        Hir::group(hir::Group { kind, hir: Box::new(expr) })
783    }
784
785    fn hir_repetition(&self, rep: &ast::Repetition, expr: Hir) -> Hir {
786        let kind = match rep.op.kind {
787            ast::RepetitionKind::ZeroOrOne => hir::RepetitionKind::ZeroOrOne,
788            ast::RepetitionKind::ZeroOrMore => hir::RepetitionKind::ZeroOrMore,
789            ast::RepetitionKind::OneOrMore => hir::RepetitionKind::OneOrMore,
790            ast::RepetitionKind::Range(ast::RepetitionRange::Exactly(m)) => {
791                hir::RepetitionKind::Range(hir::RepetitionRange::Exactly(m))
792            }
793            ast::RepetitionKind::Range(ast::RepetitionRange::AtLeast(m)) => {
794                hir::RepetitionKind::Range(hir::RepetitionRange::AtLeast(m))
795            }
796            ast::RepetitionKind::Range(ast::RepetitionRange::Bounded(
797                m,
798                n,
799            )) => {
800                hir::RepetitionKind::Range(hir::RepetitionRange::Bounded(m, n))
801            }
802        };
803        let greedy =
804            if self.flags().swap_greed() { !rep.greedy } else { rep.greedy };
805        Hir::repetition(hir::Repetition { kind, greedy, hir: Box::new(expr) })
806    }
807
808    fn hir_unicode_class(
809        &self,
810        ast_class: &ast::ClassUnicode,
811    ) -> Result<hir::ClassUnicode> {
812        use crate::ast::ClassUnicodeKind::*;
813
814        if !self.flags().unicode() {
815            return Err(
816                self.error(ast_class.span, ErrorKind::UnicodeNotAllowed)
817            );
818        }
819        let query = match ast_class.kind {
820            OneLetter(name) => ClassQuery::OneLetter(name),
821            Named(ref name) => ClassQuery::Binary(name),
822            NamedValue { ref name, ref value, .. } => ClassQuery::ByValue {
823                property_name: name,
824                property_value: value,
825            },
826        };
827        let mut result = self.convert_unicode_class_error(
828            &ast_class.span,
829            unicode::class(query),
830        );
831        if let Ok(ref mut class) = result {
832            self.unicode_fold_and_negate(
833                &ast_class.span,
834                ast_class.negated,
835                class,
836            )?;
837            if class.ranges().is_empty() {
838                let err = self
839                    .error(ast_class.span, ErrorKind::EmptyClassNotAllowed);
840                return Err(err);
841            }
842        }
843        result
844    }
845
846    fn hir_ascii_unicode_class(
847        &self,
848        ast: &ast::ClassAscii,
849    ) -> Result<hir::ClassUnicode> {
850        let mut cls = hir::ClassUnicode::new(
851            ascii_class(&ast.kind)
852                .iter()
853                .map(|&(s, e)| hir::ClassUnicodeRange::new(s, e)),
854        );
855        self.unicode_fold_and_negate(&ast.span, ast.negated, &mut cls)?;
856        Ok(cls)
857    }
858
859    fn hir_ascii_byte_class(
860        &self,
861        ast: &ast::ClassAscii,
862    ) -> Result<hir::ClassBytes> {
863        let mut cls = hir::ClassBytes::new(
864            ascii_class(&ast.kind)
865                .iter()
866                .map(|&(s, e)| hir::ClassBytesRange::new(s as u8, e as u8)),
867        );
868        self.bytes_fold_and_negate(&ast.span, ast.negated, &mut cls)?;
869        Ok(cls)
870    }
871
872    fn hir_perl_unicode_class(
873        &self,
874        ast_class: &ast::ClassPerl,
875    ) -> Result<hir::ClassUnicode> {
876        use crate::ast::ClassPerlKind::*;
877
878        assert!(self.flags().unicode());
879        let result = match ast_class.kind {
880            Digit => unicode::perl_digit(),
881            Space => unicode::perl_space(),
882            Word => unicode::perl_word(),
883        };
884        let mut class =
885            self.convert_unicode_class_error(&ast_class.span, result)?;
886        // We needn't apply case folding here because the Perl Unicode classes
887        // are already closed under Unicode simple case folding.
888        if ast_class.negated {
889            class.negate();
890        }
891        Ok(class)
892    }
893
894    fn hir_perl_byte_class(
895        &self,
896        ast_class: &ast::ClassPerl,
897    ) -> hir::ClassBytes {
898        use crate::ast::ClassPerlKind::*;
899
900        assert!(!self.flags().unicode());
901        let mut class = match ast_class.kind {
902            Digit => hir_ascii_class_bytes(&ast::ClassAsciiKind::Digit),
903            Space => hir_ascii_class_bytes(&ast::ClassAsciiKind::Space),
904            Word => hir_ascii_class_bytes(&ast::ClassAsciiKind::Word),
905        };
906        // We needn't apply case folding here because the Perl ASCII classes
907        // are already closed (under ASCII case folding).
908        if ast_class.negated {
909            class.negate();
910        }
911        class
912    }
913
914    /// Converts the given Unicode specific error to an HIR translation error.
915    ///
916    /// The span given should approximate the position at which an error would
917    /// occur.
918    fn convert_unicode_class_error(
919        &self,
920        span: &Span,
921        result: unicode::Result<hir::ClassUnicode>,
922    ) -> Result<hir::ClassUnicode> {
923        result.map_err(|err| {
924            let sp = span.clone();
925            match err {
926                unicode::Error::PropertyNotFound => {
927                    self.error(sp, ErrorKind::UnicodePropertyNotFound)
928                }
929                unicode::Error::PropertyValueNotFound => {
930                    self.error(sp, ErrorKind::UnicodePropertyValueNotFound)
931                }
932                unicode::Error::PerlClassNotFound => {
933                    self.error(sp, ErrorKind::UnicodePerlClassNotFound)
934                }
935            }
936        })
937    }
938
939    fn unicode_fold_and_negate(
940        &self,
941        span: &Span,
942        negated: bool,
943        class: &mut hir::ClassUnicode,
944    ) -> Result<()> {
945        // Note that we must apply case folding before negation!
946        // Consider `(?i)[^x]`. If we applied negation field, then
947        // the result would be the character class that matched any
948        // Unicode scalar value.
949        if self.flags().case_insensitive() {
950            class.try_case_fold_simple().map_err(|_| {
951                self.error(span.clone(), ErrorKind::UnicodeCaseUnavailable)
952            })?;
953        }
954        if negated {
955            class.negate();
956        }
957        Ok(())
958    }
959
960    fn bytes_fold_and_negate(
961        &self,
962        span: &Span,
963        negated: bool,
964        class: &mut hir::ClassBytes,
965    ) -> Result<()> {
966        // Note that we must apply case folding before negation!
967        // Consider `(?i)[^x]`. If we applied negation first, then
968        // the result would be the character class that matched any
969        // Unicode scalar value.
970        if self.flags().case_insensitive() {
971            class.case_fold_simple();
972        }
973        if negated {
974            class.negate();
975        }
976        if !self.trans().allow_invalid_utf8 && !class.is_all_ascii() {
977            return Err(self.error(span.clone(), ErrorKind::InvalidUtf8));
978        }
979        Ok(())
980    }
981
982    /// Return a scalar byte value suitable for use as a literal in a byte
983    /// character class.
984    fn class_literal_byte(&self, ast: &ast::Literal) -> Result<u8> {
985        match self.literal_to_char(ast)? {
986            hir::Literal::Byte(byte) => Ok(byte),
987            hir::Literal::Unicode(ch) => {
988                if ch <= 0x7F as char {
989                    Ok(ch as u8)
990                } else {
991                    // We can't feasibly support Unicode in
992                    // byte oriented classes. Byte classes don't
993                    // do Unicode case folding.
994                    Err(self.error(ast.span, ErrorKind::UnicodeNotAllowed))
995                }
996            }
997        }
998    }
999}
1000
1001/// A translator's representation of a regular expression's flags at any given
1002/// moment in time.
1003///
1004/// Each flag can be in one of three states: absent, present but disabled or
1005/// present but enabled.
1006#[derive(Clone, Copy, Debug, Default)]
1007struct Flags {
1008    case_insensitive: Option<bool>,
1009    multi_line: Option<bool>,
1010    dot_matches_new_line: Option<bool>,
1011    swap_greed: Option<bool>,
1012    unicode: Option<bool>,
1013    // Note that `ignore_whitespace` is omitted here because it is handled
1014    // entirely in the parser.
1015}
1016
1017impl Flags {
1018    fn from_ast(ast: &ast::Flags) -> Flags {
1019        let mut flags = Flags::default();
1020        let mut enable = true;
1021        for item in &ast.items {
1022            match item.kind {
1023                ast::FlagsItemKind::Negation => {
1024                    enable = false;
1025                }
1026                ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive) => {
1027                    flags.case_insensitive = Some(enable);
1028                }
1029                ast::FlagsItemKind::Flag(ast::Flag::MultiLine) => {
1030                    flags.multi_line = Some(enable);
1031                }
1032                ast::FlagsItemKind::Flag(ast::Flag::DotMatchesNewLine) => {
1033                    flags.dot_matches_new_line = Some(enable);
1034                }
1035                ast::FlagsItemKind::Flag(ast::Flag::SwapGreed) => {
1036                    flags.swap_greed = Some(enable);
1037                }
1038                ast::FlagsItemKind::Flag(ast::Flag::Unicode) => {
1039                    flags.unicode = Some(enable);
1040                }
1041                ast::FlagsItemKind::Flag(ast::Flag::IgnoreWhitespace) => {}
1042            }
1043        }
1044        flags
1045    }
1046
1047    fn merge(&mut self, previous: &Flags) {
1048        if self.case_insensitive.is_none() {
1049            self.case_insensitive = previous.case_insensitive;
1050        }
1051        if self.multi_line.is_none() {
1052            self.multi_line = previous.multi_line;
1053        }
1054        if self.dot_matches_new_line.is_none() {
1055            self.dot_matches_new_line = previous.dot_matches_new_line;
1056        }
1057        if self.swap_greed.is_none() {
1058            self.swap_greed = previous.swap_greed;
1059        }
1060        if self.unicode.is_none() {
1061            self.unicode = previous.unicode;
1062        }
1063    }
1064
1065    fn case_insensitive(&self) -> bool {
1066        self.case_insensitive.unwrap_or(false)
1067    }
1068
1069    fn multi_line(&self) -> bool {
1070        self.multi_line.unwrap_or(false)
1071    }
1072
1073    fn dot_matches_new_line(&self) -> bool {
1074        self.dot_matches_new_line.unwrap_or(false)
1075    }
1076
1077    fn swap_greed(&self) -> bool {
1078        self.swap_greed.unwrap_or(false)
1079    }
1080
1081    fn unicode(&self) -> bool {
1082        self.unicode.unwrap_or(true)
1083    }
1084}
1085
1086fn hir_ascii_class_bytes(kind: &ast::ClassAsciiKind) -> hir::ClassBytes {
1087    let ranges: Vec<_> = ascii_class(kind)
1088        .iter()
1089        .cloned()
1090        .map(|(s, e)| hir::ClassBytesRange::new(s as u8, e as u8))
1091        .collect();
1092    hir::ClassBytes::new(ranges)
1093}
1094
1095fn ascii_class(kind: &ast::ClassAsciiKind) -> &'static [(char, char)] {
1096    use crate::ast::ClassAsciiKind::*;
1097    match *kind {
1098        Alnum => &[('0', '9'), ('A', 'Z'), ('a', 'z')],
1099        Alpha => &[('A', 'Z'), ('a', 'z')],
1100        Ascii => &[('\x00', '\x7F')],
1101        Blank => &[('\t', '\t'), (' ', ' ')],
1102        Cntrl => &[('\x00', '\x1F'), ('\x7F', '\x7F')],
1103        Digit => &[('0', '9')],
1104        Graph => &[('!', '~')],
1105        Lower => &[('a', 'z')],
1106        Print => &[(' ', '~')],
1107        Punct => &[('!', '/'), (':', '@'), ('[', '`'), ('{', '~')],
1108        Space => &[
1109            ('\t', '\t'),
1110            ('\n', '\n'),
1111            ('\x0B', '\x0B'),
1112            ('\x0C', '\x0C'),
1113            ('\r', '\r'),
1114            (' ', ' '),
1115        ],
1116        Upper => &[('A', 'Z')],
1117        Word => &[('0', '9'), ('A', 'Z'), ('_', '_'), ('a', 'z')],
1118        Xdigit => &[('0', '9'), ('A', 'F'), ('a', 'f')],
1119    }
1120}
1121
1122#[cfg(test)]
1123mod tests {
1124    use crate::ast::parse::ParserBuilder;
1125    use crate::ast::{self, Ast, Position, Span};
1126    use crate::hir::{self, Hir, HirKind};
1127    use crate::unicode::{self, ClassQuery};
1128
1129    use super::{ascii_class, TranslatorBuilder};
1130
1131    // We create these errors to compare with real hir::Errors in the tests.
1132    // We define equality between TestError and hir::Error to disregard the
1133    // pattern string in hir::Error, which is annoying to provide in tests.
1134    #[derive(Clone, Debug)]
1135    struct TestError {
1136        span: Span,
1137        kind: hir::ErrorKind,
1138    }
1139
1140    impl PartialEq<hir::Error> for TestError {
1141        fn eq(&self, other: &hir::Error) -> bool {
1142            self.span == other.span && self.kind == other.kind
1143        }
1144    }
1145
1146    impl PartialEq<TestError> for hir::Error {
1147        fn eq(&self, other: &TestError) -> bool {
1148            self.span == other.span && self.kind == other.kind
1149        }
1150    }
1151
1152    fn parse(pattern: &str) -> Ast {
1153        ParserBuilder::new().octal(true).build().parse(pattern).unwrap()
1154    }
1155
1156    fn t(pattern: &str) -> Hir {
1157        TranslatorBuilder::new()
1158            .allow_invalid_utf8(false)
1159            .build()
1160            .translate(pattern, &parse(pattern))
1161            .unwrap()
1162    }
1163
1164    fn t_err(pattern: &str) -> hir::Error {
1165        TranslatorBuilder::new()
1166            .allow_invalid_utf8(false)
1167            .build()
1168            .translate(pattern, &parse(pattern))
1169            .unwrap_err()
1170    }
1171
1172    fn t_bytes(pattern: &str) -> Hir {
1173        TranslatorBuilder::new()
1174            .allow_invalid_utf8(true)
1175            .build()
1176            .translate(pattern, &parse(pattern))
1177            .unwrap()
1178    }
1179
1180    fn hir_lit(s: &str) -> Hir {
1181        match s.len() {
1182            0 => Hir::empty(),
1183            _ => {
1184                let lits = s
1185                    .chars()
1186                    .map(hir::Literal::Unicode)
1187                    .map(Hir::literal)
1188                    .collect();
1189                Hir::concat(lits)
1190            }
1191        }
1192    }
1193
1194    fn hir_blit(s: &[u8]) -> Hir {
1195        match s.len() {
1196            0 => Hir::empty(),
1197            1 => Hir::literal(hir::Literal::Byte(s[0])),
1198            _ => {
1199                let lits = s
1200                    .iter()
1201                    .cloned()
1202                    .map(hir::Literal::Byte)
1203                    .map(Hir::literal)
1204                    .collect();
1205                Hir::concat(lits)
1206            }
1207        }
1208    }
1209
1210    fn hir_group(i: u32, expr: Hir) -> Hir {
1211        Hir::group(hir::Group {
1212            kind: hir::GroupKind::CaptureIndex(i),
1213            hir: Box::new(expr),
1214        })
1215    }
1216
1217    fn hir_group_name(i: u32, name: &str, expr: Hir) -> Hir {
1218        Hir::group(hir::Group {
1219            kind: hir::GroupKind::CaptureName {
1220                name: name.to_string(),
1221                index: i,
1222            },
1223            hir: Box::new(expr),
1224        })
1225    }
1226
1227    fn hir_group_nocap(expr: Hir) -> Hir {
1228        Hir::group(hir::Group {
1229            kind: hir::GroupKind::NonCapturing,
1230            hir: Box::new(expr),
1231        })
1232    }
1233
1234    fn hir_quest(greedy: bool, expr: Hir) -> Hir {
1235        Hir::repetition(hir::Repetition {
1236            kind: hir::RepetitionKind::ZeroOrOne,
1237            greedy,
1238            hir: Box::new(expr),
1239        })
1240    }
1241
1242    fn hir_star(greedy: bool, expr: Hir) -> Hir {
1243        Hir::repetition(hir::Repetition {
1244            kind: hir::RepetitionKind::ZeroOrMore,
1245            greedy,
1246            hir: Box::new(expr),
1247        })
1248    }
1249
1250    fn hir_plus(greedy: bool, expr: Hir) -> Hir {
1251        Hir::repetition(hir::Repetition {
1252            kind: hir::RepetitionKind::OneOrMore,
1253            greedy,
1254            hir: Box::new(expr),
1255        })
1256    }
1257
1258    fn hir_range(greedy: bool, range: hir::RepetitionRange, expr: Hir) -> Hir {
1259        Hir::repetition(hir::Repetition {
1260            kind: hir::RepetitionKind::Range(range),
1261            greedy,
1262            hir: Box::new(expr),
1263        })
1264    }
1265
1266    fn hir_alt(alts: Vec<Hir>) -> Hir {
1267        Hir::alternation(alts)
1268    }
1269
1270    fn hir_cat(exprs: Vec<Hir>) -> Hir {
1271        Hir::concat(exprs)
1272    }
1273
1274    #[allow(dead_code)]
1275    fn hir_uclass_query(query: ClassQuery<'_>) -> Hir {
1276        Hir::class(hir::Class::Unicode(unicode::class(query).unwrap()))
1277    }
1278
1279    #[allow(dead_code)]
1280    fn hir_uclass_perl_word() -> Hir {
1281        Hir::class(hir::Class::Unicode(unicode::perl_word().unwrap()))
1282    }
1283
1284    fn hir_uclass(ranges: &[(char, char)]) -> Hir {
1285        let ranges: Vec<hir::ClassUnicodeRange> = ranges
1286            .iter()
1287            .map(|&(s, e)| hir::ClassUnicodeRange::new(s, e))
1288            .collect();
1289        Hir::class(hir::Class::Unicode(hir::ClassUnicode::new(ranges)))
1290    }
1291
1292    fn hir_bclass(ranges: &[(u8, u8)]) -> Hir {
1293        let ranges: Vec<hir::ClassBytesRange> = ranges
1294            .iter()
1295            .map(|&(s, e)| hir::ClassBytesRange::new(s, e))
1296            .collect();
1297        Hir::class(hir::Class::Bytes(hir::ClassBytes::new(ranges)))
1298    }
1299
1300    fn hir_bclass_from_char(ranges: &[(char, char)]) -> Hir {
1301        let ranges: Vec<hir::ClassBytesRange> = ranges
1302            .iter()
1303            .map(|&(s, e)| {
1304                assert!(s as u32 <= 0x7F);
1305                assert!(e as u32 <= 0x7F);
1306                hir::ClassBytesRange::new(s as u8, e as u8)
1307            })
1308            .collect();
1309        Hir::class(hir::Class::Bytes(hir::ClassBytes::new(ranges)))
1310    }
1311
1312    fn hir_case_fold(expr: Hir) -> Hir {
1313        match expr.into_kind() {
1314            HirKind::Class(mut cls) => {
1315                cls.case_fold_simple();
1316                Hir::class(cls)
1317            }
1318            _ => panic!("cannot case fold non-class Hir expr"),
1319        }
1320    }
1321
1322    fn hir_negate(expr: Hir) -> Hir {
1323        match expr.into_kind() {
1324            HirKind::Class(mut cls) => {
1325                cls.negate();
1326                Hir::class(cls)
1327            }
1328            _ => panic!("cannot negate non-class Hir expr"),
1329        }
1330    }
1331
1332    #[allow(dead_code)]
1333    fn hir_union(expr1: Hir, expr2: Hir) -> Hir {
1334        use crate::hir::Class::{Bytes, Unicode};
1335
1336        match (expr1.into_kind(), expr2.into_kind()) {
1337            (HirKind::Class(Unicode(mut c1)), HirKind::Class(Unicode(c2))) => {
1338                c1.union(&c2);
1339                Hir::class(hir::Class::Unicode(c1))
1340            }
1341            (HirKind::Class(Bytes(mut c1)), HirKind::Class(Bytes(c2))) => {
1342                c1.union(&c2);
1343                Hir::class(hir::Class::Bytes(c1))
1344            }
1345            _ => panic!("cannot union non-class Hir exprs"),
1346        }
1347    }
1348
1349    #[allow(dead_code)]
1350    fn hir_difference(expr1: Hir, expr2: Hir) -> Hir {
1351        use crate::hir::Class::{Bytes, Unicode};
1352
1353        match (expr1.into_kind(), expr2.into_kind()) {
1354            (HirKind::Class(Unicode(mut c1)), HirKind::Class(Unicode(c2))) => {
1355                c1.difference(&c2);
1356                Hir::class(hir::Class::Unicode(c1))
1357            }
1358            (HirKind::Class(Bytes(mut c1)), HirKind::Class(Bytes(c2))) => {
1359                c1.difference(&c2);
1360                Hir::class(hir::Class::Bytes(c1))
1361            }
1362            _ => panic!("cannot difference non-class Hir exprs"),
1363        }
1364    }
1365
1366    fn hir_anchor(anchor: hir::Anchor) -> Hir {
1367        Hir::anchor(anchor)
1368    }
1369
1370    fn hir_word(wb: hir::WordBoundary) -> Hir {
1371        Hir::word_boundary(wb)
1372    }
1373
1374    #[test]
1375    fn empty() {
1376        assert_eq!(t(""), Hir::empty());
1377        assert_eq!(t("(?i)"), Hir::empty());
1378        assert_eq!(t("()"), hir_group(1, Hir::empty()));
1379        assert_eq!(t("(?:)"), hir_group_nocap(Hir::empty()));
1380        assert_eq!(t("(?P<wat>)"), hir_group_name(1, "wat", Hir::empty()));
1381        assert_eq!(t("|"), hir_alt(vec![Hir::empty(), Hir::empty()]));
1382        assert_eq!(
1383            t("()|()"),
1384            hir_alt(vec![
1385                hir_group(1, Hir::empty()),
1386                hir_group(2, Hir::empty()),
1387            ])
1388        );
1389        assert_eq!(
1390            t("(|b)"),
1391            hir_group(1, hir_alt(vec![Hir::empty(), hir_lit("b"),]))
1392        );
1393        assert_eq!(
1394            t("(a|)"),
1395            hir_group(1, hir_alt(vec![hir_lit("a"), Hir::empty(),]))
1396        );
1397        assert_eq!(
1398            t("(a||c)"),
1399            hir_group(
1400                1,
1401                hir_alt(vec![hir_lit("a"), Hir::empty(), hir_lit("c"),])
1402            )
1403        );
1404        assert_eq!(
1405            t("(||)"),
1406            hir_group(
1407                1,
1408                hir_alt(vec![Hir::empty(), Hir::empty(), Hir::empty(),])
1409            )
1410        );
1411    }
1412
1413    #[test]
1414    fn literal() {
1415        assert_eq!(t("a"), hir_lit("a"));
1416        assert_eq!(t("(?-u)a"), hir_lit("a"));
1417        assert_eq!(t("☃"), hir_lit("☃"));
1418        assert_eq!(t("abcd"), hir_lit("abcd"));
1419
1420        assert_eq!(t_bytes("(?-u)a"), hir_lit("a"));
1421        assert_eq!(t_bytes("(?-u)\x61"), hir_lit("a"));
1422        assert_eq!(t_bytes(r"(?-u)\x61"), hir_lit("a"));
1423        assert_eq!(t_bytes(r"(?-u)\xFF"), hir_blit(b"\xFF"));
1424
1425        assert_eq!(
1426            t_err("(?-u)☃"),
1427            TestError {
1428                kind: hir::ErrorKind::UnicodeNotAllowed,
1429                span: Span::new(
1430                    Position::new(5, 1, 6),
1431                    Position::new(8, 1, 7)
1432                ),
1433            }
1434        );
1435        assert_eq!(
1436            t_err(r"(?-u)\xFF"),
1437            TestError {
1438                kind: hir::ErrorKind::InvalidUtf8,
1439                span: Span::new(
1440                    Position::new(5, 1, 6),
1441                    Position::new(9, 1, 10)
1442                ),
1443            }
1444        );
1445    }
1446
1447    #[test]
1448    fn literal_case_insensitive() {
1449        #[cfg(feature = "unicode-case")]
1450        assert_eq!(t("(?i)a"), hir_uclass(&[('A', 'A'), ('a', 'a'),]));
1451        #[cfg(feature = "unicode-case")]
1452        assert_eq!(
1453            t("(?i:a)"),
1454            hir_group_nocap(hir_uclass(&[('A', 'A'), ('a', 'a')],))
1455        );
1456        #[cfg(feature = "unicode-case")]
1457        assert_eq!(
1458            t("a(?i)a(?-i)a"),
1459            hir_cat(vec![
1460                hir_lit("a"),
1461                hir_uclass(&[('A', 'A'), ('a', 'a')]),
1462                hir_lit("a"),
1463            ])
1464        );
1465        #[cfg(feature = "unicode-case")]
1466        assert_eq!(
1467            t("(?i)ab@c"),
1468            hir_cat(vec![
1469                hir_uclass(&[('A', 'A'), ('a', 'a')]),
1470                hir_uclass(&[('B', 'B'), ('b', 'b')]),
1471                hir_lit("@"),
1472                hir_uclass(&[('C', 'C'), ('c', 'c')]),
1473            ])
1474        );
1475        #[cfg(feature = "unicode-case")]
1476        assert_eq!(
1477            t("(?i)β"),
1478            hir_uclass(&[('Β', 'Β'), ('β', 'β'), ('ϐ', 'ϐ'),])
1479        );
1480
1481        assert_eq!(t("(?i-u)a"), hir_bclass(&[(b'A', b'A'), (b'a', b'a'),]));
1482        #[cfg(feature = "unicode-case")]
1483        assert_eq!(
1484            t("(?-u)a(?i)a(?-i)a"),
1485            hir_cat(vec![
1486                hir_lit("a"),
1487                hir_bclass(&[(b'A', b'A'), (b'a', b'a')]),
1488                hir_lit("a"),
1489            ])
1490        );
1491        assert_eq!(
1492            t("(?i-u)ab@c"),
1493            hir_cat(vec![
1494                hir_bclass(&[(b'A', b'A'), (b'a', b'a')]),
1495                hir_bclass(&[(b'B', b'B'), (b'b', b'b')]),
1496                hir_lit("@"),
1497                hir_bclass(&[(b'C', b'C'), (b'c', b'c')]),
1498            ])
1499        );
1500
1501        assert_eq!(
1502            t_bytes("(?i-u)a"),
1503            hir_bclass(&[(b'A', b'A'), (b'a', b'a'),])
1504        );
1505        assert_eq!(
1506            t_bytes("(?i-u)\x61"),
1507            hir_bclass(&[(b'A', b'A'), (b'a', b'a'),])
1508        );
1509        assert_eq!(
1510            t_bytes(r"(?i-u)\x61"),
1511            hir_bclass(&[(b'A', b'A'), (b'a', b'a'),])
1512        );
1513        assert_eq!(t_bytes(r"(?i-u)\xFF"), hir_blit(b"\xFF"));
1514
1515        assert_eq!(
1516            t_err("(?i-u)β"),
1517            TestError {
1518                kind: hir::ErrorKind::UnicodeNotAllowed,
1519                span: Span::new(
1520                    Position::new(6, 1, 7),
1521                    Position::new(8, 1, 8),
1522                ),
1523            }
1524        );
1525    }
1526
1527    #[test]
1528    fn dot() {
1529        assert_eq!(
1530            t("."),
1531            hir_uclass(&[('\0', '\t'), ('\x0B', '\u{10FFFF}'),])
1532        );
1533        assert_eq!(t("(?s)."), hir_uclass(&[('\0', '\u{10FFFF}'),]));
1534        assert_eq!(
1535            t_bytes("(?-u)."),
1536            hir_bclass(&[(b'\0', b'\t'), (b'\x0B', b'\xFF'),])
1537        );
1538        assert_eq!(t_bytes("(?s-u)."), hir_bclass(&[(b'\0', b'\xFF'),]));
1539
1540        // If invalid UTF-8 isn't allowed, then non-Unicode `.` isn't allowed.
1541        assert_eq!(
1542            t_err("(?-u)."),
1543            TestError {
1544                kind: hir::ErrorKind::InvalidUtf8,
1545                span: Span::new(
1546                    Position::new(5, 1, 6),
1547                    Position::new(6, 1, 7)
1548                ),
1549            }
1550        );
1551        assert_eq!(
1552            t_err("(?s-u)."),
1553            TestError {
1554                kind: hir::ErrorKind::InvalidUtf8,
1555                span: Span::new(
1556                    Position::new(6, 1, 7),
1557                    Position::new(7, 1, 8)
1558                ),
1559            }
1560        );
1561    }
1562
1563    #[test]
1564    fn assertions() {
1565        assert_eq!(t("^"), hir_anchor(hir::Anchor::StartText));
1566        assert_eq!(t("$"), hir_anchor(hir::Anchor::EndText));
1567        assert_eq!(t(r"\A"), hir_anchor(hir::Anchor::StartText));
1568        assert_eq!(t(r"\z"), hir_anchor(hir::Anchor::EndText));
1569        assert_eq!(t("(?m)^"), hir_anchor(hir::Anchor::StartLine));
1570        assert_eq!(t("(?m)$"), hir_anchor(hir::Anchor::EndLine));
1571        assert_eq!(t(r"(?m)\A"), hir_anchor(hir::Anchor::StartText));
1572        assert_eq!(t(r"(?m)\z"), hir_anchor(hir::Anchor::EndText));
1573
1574        assert_eq!(t(r"\b"), hir_word(hir::WordBoundary::Unicode));
1575        assert_eq!(t(r"\B"), hir_word(hir::WordBoundary::UnicodeNegate));
1576        assert_eq!(t(r"(?-u)\b"), hir_word(hir::WordBoundary::Ascii));
1577        assert_eq!(
1578            t_bytes(r"(?-u)\B"),
1579            hir_word(hir::WordBoundary::AsciiNegate)
1580        );
1581
1582        assert_eq!(
1583            t_err(r"(?-u)\B"),
1584            TestError {
1585                kind: hir::ErrorKind::InvalidUtf8,
1586                span: Span::new(
1587                    Position::new(5, 1, 6),
1588                    Position::new(7, 1, 8)
1589                ),
1590            }
1591        );
1592    }
1593
1594    #[test]
1595    fn group() {
1596        assert_eq!(t("(a)"), hir_group(1, hir_lit("a")));
1597        assert_eq!(
1598            t("(a)(b)"),
1599            hir_cat(vec![
1600                hir_group(1, hir_lit("a")),
1601                hir_group(2, hir_lit("b")),
1602            ])
1603        );
1604        assert_eq!(
1605            t("(a)|(b)"),
1606            hir_alt(vec![
1607                hir_group(1, hir_lit("a")),
1608                hir_group(2, hir_lit("b")),
1609            ])
1610        );
1611        assert_eq!(t("(?P<foo>)"), hir_group_name(1, "foo", Hir::empty()));
1612        assert_eq!(t("(?P<foo>a)"), hir_group_name(1, "foo", hir_lit("a")));
1613        assert_eq!(
1614            t("(?P<foo>a)(?P<bar>b)"),
1615            hir_cat(vec![
1616                hir_group_name(1, "foo", hir_lit("a")),
1617                hir_group_name(2, "bar", hir_lit("b")),
1618            ])
1619        );
1620        assert_eq!(t("(?:)"), hir_group_nocap(Hir::empty()));
1621        assert_eq!(t("(?:a)"), hir_group_nocap(hir_lit("a")));
1622        assert_eq!(
1623            t("(?:a)(b)"),
1624            hir_cat(vec![
1625                hir_group_nocap(hir_lit("a")),
1626                hir_group(1, hir_lit("b")),
1627            ])
1628        );
1629        assert_eq!(
1630            t("(a)(?:b)(c)"),
1631            hir_cat(vec![
1632                hir_group(1, hir_lit("a")),
1633                hir_group_nocap(hir_lit("b")),
1634                hir_group(2, hir_lit("c")),
1635            ])
1636        );
1637        assert_eq!(
1638            t("(a)(?P<foo>b)(c)"),
1639            hir_cat(vec![
1640                hir_group(1, hir_lit("a")),
1641                hir_group_name(2, "foo", hir_lit("b")),
1642                hir_group(3, hir_lit("c")),
1643            ])
1644        );
1645        assert_eq!(t("()"), hir_group(1, Hir::empty()));
1646        assert_eq!(t("((?i))"), hir_group(1, Hir::empty()));
1647        assert_eq!(t("((?x))"), hir_group(1, Hir::empty()));
1648        assert_eq!(t("(((?x)))"), hir_group(1, hir_group(2, Hir::empty())));
1649    }
1650
1651    #[test]
1652    fn flags() {
1653        #[cfg(feature = "unicode-case")]
1654        assert_eq!(
1655            t("(?i:a)a"),
1656            hir_cat(vec![
1657                hir_group_nocap(hir_uclass(&[('A', 'A'), ('a', 'a')])),
1658                hir_lit("a"),
1659            ])
1660        );
1661        assert_eq!(
1662            t("(?i-u:a)β"),
1663            hir_cat(vec![
1664                hir_group_nocap(hir_bclass(&[(b'A', b'A'), (b'a', b'a')])),
1665                hir_lit("β"),
1666            ])
1667        );
1668        assert_eq!(
1669            t("(?:(?i-u)a)b"),
1670            hir_cat(vec![
1671                hir_group_nocap(hir_bclass(&[(b'A', b'A'), (b'a', b'a')])),
1672                hir_lit("b"),
1673            ])
1674        );
1675        assert_eq!(
1676            t("((?i-u)a)b"),
1677            hir_cat(vec![
1678                hir_group(1, hir_bclass(&[(b'A', b'A'), (b'a', b'a')])),
1679                hir_lit("b"),
1680            ])
1681        );
1682        #[cfg(feature = "unicode-case")]
1683        assert_eq!(
1684            t("(?i)(?-i:a)a"),
1685            hir_cat(vec![
1686                hir_group_nocap(hir_lit("a")),
1687                hir_uclass(&[('A', 'A'), ('a', 'a')]),
1688            ])
1689        );
1690        #[cfg(feature = "unicode-case")]
1691        assert_eq!(
1692            t("(?im)a^"),
1693            hir_cat(vec![
1694                hir_uclass(&[('A', 'A'), ('a', 'a')]),
1695                hir_anchor(hir::Anchor::StartLine),
1696            ])
1697        );
1698        #[cfg(feature = "unicode-case")]
1699        assert_eq!(
1700            t("(?im)a^(?i-m)a^"),
1701            hir_cat(vec![
1702                hir_uclass(&[('A', 'A'), ('a', 'a')]),
1703                hir_anchor(hir::Anchor::StartLine),
1704                hir_uclass(&[('A', 'A'), ('a', 'a')]),
1705                hir_anchor(hir::Anchor::StartText),
1706            ])
1707        );
1708        assert_eq!(
1709            t("(?U)a*a*?(?-U)a*a*?"),
1710            hir_cat(vec![
1711                hir_star(false, hir_lit("a")),
1712                hir_star(true, hir_lit("a")),
1713                hir_star(true, hir_lit("a")),
1714                hir_star(false, hir_lit("a")),
1715            ])
1716        );
1717        #[cfg(feature = "unicode-case")]
1718        assert_eq!(
1719            t("(?:a(?i)a)a"),
1720            hir_cat(vec![
1721                hir_group_nocap(hir_cat(vec![
1722                    hir_lit("a"),
1723                    hir_uclass(&[('A', 'A'), ('a', 'a')]),
1724                ])),
1725                hir_lit("a"),
1726            ])
1727        );
1728        #[cfg(feature = "unicode-case")]
1729        assert_eq!(
1730            t("(?i)(?:a(?-i)a)a"),
1731            hir_cat(vec![
1732                hir_group_nocap(hir_cat(vec![
1733                    hir_uclass(&[('A', 'A'), ('a', 'a')]),
1734                    hir_lit("a"),
1735                ])),
1736                hir_uclass(&[('A', 'A'), ('a', 'a')]),
1737            ])
1738        );
1739    }
1740
1741    #[test]
1742    fn escape() {
1743        assert_eq!(
1744            t(r"\\\.\+\*\?\(\)\|\[\]\{\}\^\$\#"),
1745            hir_lit(r"\.+*?()|[]{}^$#")
1746        );
1747    }
1748
1749    #[test]
1750    fn repetition() {
1751        assert_eq!(t("a?"), hir_quest(true, hir_lit("a")));
1752        assert_eq!(t("a*"), hir_star(true, hir_lit("a")));
1753        assert_eq!(t("a+"), hir_plus(true, hir_lit("a")));
1754        assert_eq!(t("a??"), hir_quest(false, hir_lit("a")));
1755        assert_eq!(t("a*?"), hir_star(false, hir_lit("a")));
1756        assert_eq!(t("a+?"), hir_plus(false, hir_lit("a")));
1757
1758        assert_eq!(
1759            t("a{1}"),
1760            hir_range(true, hir::RepetitionRange::Exactly(1), hir_lit("a"),)
1761        );
1762        assert_eq!(
1763            t("a{1,}"),
1764            hir_range(true, hir::RepetitionRange::AtLeast(1), hir_lit("a"),)
1765        );
1766        assert_eq!(
1767            t("a{1,2}"),
1768            hir_range(true, hir::RepetitionRange::Bounded(1, 2), hir_lit("a"),)
1769        );
1770        assert_eq!(
1771            t("a{1}?"),
1772            hir_range(false, hir::RepetitionRange::Exactly(1), hir_lit("a"),)
1773        );
1774        assert_eq!(
1775            t("a{1,}?"),
1776            hir_range(false, hir::RepetitionRange::AtLeast(1), hir_lit("a"),)
1777        );
1778        assert_eq!(
1779            t("a{1,2}?"),
1780            hir_range(
1781                false,
1782                hir::RepetitionRange::Bounded(1, 2),
1783                hir_lit("a"),
1784            )
1785        );
1786
1787        assert_eq!(
1788            t("ab?"),
1789            hir_cat(vec![hir_lit("a"), hir_quest(true, hir_lit("b")),])
1790        );
1791        assert_eq!(
1792            t("(ab)?"),
1793            hir_quest(
1794                true,
1795                hir_group(1, hir_cat(vec![hir_lit("a"), hir_lit("b"),]))
1796            )
1797        );
1798        assert_eq!(
1799            t("a|b?"),
1800            hir_alt(vec![hir_lit("a"), hir_quest(true, hir_lit("b")),])
1801        );
1802    }
1803
1804    #[test]
1805    fn cat_alt() {
1806        assert_eq!(
1807            t("(ab)"),
1808            hir_group(1, hir_cat(vec![hir_lit("a"), hir_lit("b"),]))
1809        );
1810        assert_eq!(t("a|b"), hir_alt(vec![hir_lit("a"), hir_lit("b"),]));
1811        assert_eq!(
1812            t("a|b|c"),
1813            hir_alt(vec![hir_lit("a"), hir_lit("b"), hir_lit("c"),])
1814        );
1815        assert_eq!(
1816            t("ab|bc|cd"),
1817            hir_alt(vec![hir_lit("ab"), hir_lit("bc"), hir_lit("cd"),])
1818        );
1819        assert_eq!(
1820            t("(a|b)"),
1821            hir_group(1, hir_alt(vec![hir_lit("a"), hir_lit("b"),]))
1822        );
1823        assert_eq!(
1824            t("(a|b|c)"),
1825            hir_group(
1826                1,
1827                hir_alt(vec![hir_lit("a"), hir_lit("b"), hir_lit("c"),])
1828            )
1829        );
1830        assert_eq!(
1831            t("(ab|bc|cd)"),
1832            hir_group(
1833                1,
1834                hir_alt(vec![hir_lit("ab"), hir_lit("bc"), hir_lit("cd"),])
1835            )
1836        );
1837        assert_eq!(
1838            t("(ab|(bc|(cd)))"),
1839            hir_group(
1840                1,
1841                hir_alt(vec![
1842                    hir_lit("ab"),
1843                    hir_group(
1844                        2,
1845                        hir_alt(vec![
1846                            hir_lit("bc"),
1847                            hir_group(3, hir_lit("cd")),
1848                        ])
1849                    ),
1850                ])
1851            )
1852        );
1853    }
1854
1855    #[test]
1856    fn class_ascii() {
1857        assert_eq!(
1858            t("[[:alnum:]]"),
1859            hir_uclass(ascii_class(&ast::ClassAsciiKind::Alnum))
1860        );
1861        assert_eq!(
1862            t("[[:alpha:]]"),
1863            hir_uclass(ascii_class(&ast::ClassAsciiKind::Alpha))
1864        );
1865        assert_eq!(
1866            t("[[:ascii:]]"),
1867            hir_uclass(ascii_class(&ast::ClassAsciiKind::Ascii))
1868        );
1869        assert_eq!(
1870            t("[[:blank:]]"),
1871            hir_uclass(ascii_class(&ast::ClassAsciiKind::Blank))
1872        );
1873        assert_eq!(
1874            t("[[:cntrl:]]"),
1875            hir_uclass(ascii_class(&ast::ClassAsciiKind::Cntrl))
1876        );
1877        assert_eq!(
1878            t("[[:digit:]]"),
1879            hir_uclass(ascii_class(&ast::ClassAsciiKind::Digit))
1880        );
1881        assert_eq!(
1882            t("[[:graph:]]"),
1883            hir_uclass(ascii_class(&ast::ClassAsciiKind::Graph))
1884        );
1885        assert_eq!(
1886            t("[[:lower:]]"),
1887            hir_uclass(ascii_class(&ast::ClassAsciiKind::Lower))
1888        );
1889        assert_eq!(
1890            t("[[:print:]]"),
1891            hir_uclass(ascii_class(&ast::ClassAsciiKind::Print))
1892        );
1893        assert_eq!(
1894            t("[[:punct:]]"),
1895            hir_uclass(ascii_class(&ast::ClassAsciiKind::Punct))
1896        );
1897        assert_eq!(
1898            t("[[:space:]]"),
1899            hir_uclass(ascii_class(&ast::ClassAsciiKind::Space))
1900        );
1901        assert_eq!(
1902            t("[[:upper:]]"),
1903            hir_uclass(ascii_class(&ast::ClassAsciiKind::Upper))
1904        );
1905        assert_eq!(
1906            t("[[:word:]]"),
1907            hir_uclass(ascii_class(&ast::ClassAsciiKind::Word))
1908        );
1909        assert_eq!(
1910            t("[[:xdigit:]]"),
1911            hir_uclass(ascii_class(&ast::ClassAsciiKind::Xdigit))
1912        );
1913
1914        assert_eq!(
1915            t("[[:^lower:]]"),
1916            hir_negate(hir_uclass(ascii_class(&ast::ClassAsciiKind::Lower)))
1917        );
1918        #[cfg(feature = "unicode-case")]
1919        assert_eq!(
1920            t("(?i)[[:lower:]]"),
1921            hir_uclass(&[
1922                ('A', 'Z'),
1923                ('a', 'z'),
1924                ('\u{17F}', '\u{17F}'),
1925                ('\u{212A}', '\u{212A}'),
1926            ])
1927        );
1928
1929        assert_eq!(
1930            t("(?-u)[[:lower:]]"),
1931            hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Lower))
1932        );
1933        assert_eq!(
1934            t("(?i-u)[[:lower:]]"),
1935            hir_case_fold(hir_bclass_from_char(ascii_class(
1936                &ast::ClassAsciiKind::Lower
1937            )))
1938        );
1939
1940        assert_eq!(
1941            t_err("(?-u)[[:^lower:]]"),
1942            TestError {
1943                kind: hir::ErrorKind::InvalidUtf8,
1944                span: Span::new(
1945                    Position::new(6, 1, 7),
1946                    Position::new(16, 1, 17)
1947                ),
1948            }
1949        );
1950        assert_eq!(
1951            t_err("(?i-u)[[:^lower:]]"),
1952            TestError {
1953                kind: hir::ErrorKind::InvalidUtf8,
1954                span: Span::new(
1955                    Position::new(7, 1, 8),
1956                    Position::new(17, 1, 18)
1957                ),
1958            }
1959        );
1960    }
1961
1962    #[test]
1963    fn class_ascii_multiple() {
1964        // See: https://github.com/rust-lang/regex/issues/680
1965        assert_eq!(
1966            t("[[:alnum:][:^ascii:]]"),
1967            hir_union(
1968                hir_uclass(ascii_class(&ast::ClassAsciiKind::Alnum)),
1969                hir_uclass(&[('\u{80}', '\u{10FFFF}')]),
1970            ),
1971        );
1972        assert_eq!(
1973            t_bytes("(?-u)[[:alnum:][:^ascii:]]"),
1974            hir_union(
1975                hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Alnum)),
1976                hir_bclass(&[(0x80, 0xFF)]),
1977            ),
1978        );
1979    }
1980
1981    #[test]
1982    #[cfg(feature = "unicode-perl")]
1983    fn class_perl() {
1984        // Unicode
1985        assert_eq!(t(r"\d"), hir_uclass_query(ClassQuery::Binary("digit")));
1986        assert_eq!(t(r"\s"), hir_uclass_query(ClassQuery::Binary("space")));
1987        assert_eq!(t(r"\w"), hir_uclass_perl_word());
1988        #[cfg(feature = "unicode-case")]
1989        assert_eq!(
1990            t(r"(?i)\d"),
1991            hir_uclass_query(ClassQuery::Binary("digit"))
1992        );
1993        #[cfg(feature = "unicode-case")]
1994        assert_eq!(
1995            t(r"(?i)\s"),
1996            hir_uclass_query(ClassQuery::Binary("space"))
1997        );
1998        #[cfg(feature = "unicode-case")]
1999        assert_eq!(t(r"(?i)\w"), hir_uclass_perl_word());
2000
2001        // Unicode, negated
2002        assert_eq!(
2003            t(r"\D"),
2004            hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
2005        );
2006        assert_eq!(
2007            t(r"\S"),
2008            hir_negate(hir_uclass_query(ClassQuery::Binary("space")))
2009        );
2010        assert_eq!(t(r"\W"), hir_negate(hir_uclass_perl_word()));
2011        #[cfg(feature = "unicode-case")]
2012        assert_eq!(
2013            t(r"(?i)\D"),
2014            hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
2015        );
2016        #[cfg(feature = "unicode-case")]
2017        assert_eq!(
2018            t(r"(?i)\S"),
2019            hir_negate(hir_uclass_query(ClassQuery::Binary("space")))
2020        );
2021        #[cfg(feature = "unicode-case")]
2022        assert_eq!(t(r"(?i)\W"), hir_negate(hir_uclass_perl_word()));
2023
2024        // ASCII only
2025        assert_eq!(
2026            t(r"(?-u)\d"),
2027            hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Digit))
2028        );
2029        assert_eq!(
2030            t(r"(?-u)\s"),
2031            hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Space))
2032        );
2033        assert_eq!(
2034            t(r"(?-u)\w"),
2035            hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Word))
2036        );
2037        assert_eq!(
2038            t(r"(?i-u)\d"),
2039            hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Digit))
2040        );
2041        assert_eq!(
2042            t(r"(?i-u)\s"),
2043            hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Space))
2044        );
2045        assert_eq!(
2046            t(r"(?i-u)\w"),
2047            hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Word))
2048        );
2049
2050        // ASCII only, negated
2051        assert_eq!(
2052            t(r"(?-u)\D"),
2053            hir_negate(hir_bclass_from_char(ascii_class(
2054                &ast::ClassAsciiKind::Digit
2055            )))
2056        );
2057        assert_eq!(
2058            t(r"(?-u)\S"),
2059            hir_negate(hir_bclass_from_char(ascii_class(
2060                &ast::ClassAsciiKind::Space
2061            )))
2062        );
2063        assert_eq!(
2064            t(r"(?-u)\W"),
2065            hir_negate(hir_bclass_from_char(ascii_class(
2066                &ast::ClassAsciiKind::Word
2067            )))
2068        );
2069        assert_eq!(
2070            t(r"(?i-u)\D"),
2071            hir_negate(hir_bclass_from_char(ascii_class(
2072                &ast::ClassAsciiKind::Digit
2073            )))
2074        );
2075        assert_eq!(
2076            t(r"(?i-u)\S"),
2077            hir_negate(hir_bclass_from_char(ascii_class(
2078                &ast::ClassAsciiKind::Space
2079            )))
2080        );
2081        assert_eq!(
2082            t(r"(?i-u)\W"),
2083            hir_negate(hir_bclass_from_char(ascii_class(
2084                &ast::ClassAsciiKind::Word
2085            )))
2086        );
2087    }
2088
2089    #[test]
2090    #[cfg(not(feature = "unicode-perl"))]
2091    fn class_perl_word_disabled() {
2092        assert_eq!(
2093            t_err(r"\w"),
2094            TestError {
2095                kind: hir::ErrorKind::UnicodePerlClassNotFound,
2096                span: Span::new(
2097                    Position::new(0, 1, 1),
2098                    Position::new(2, 1, 3)
2099                ),
2100            }
2101        );
2102    }
2103
2104    #[test]
2105    #[cfg(all(not(feature = "unicode-perl"), not(feature = "unicode-bool")))]
2106    fn class_perl_space_disabled() {
2107        assert_eq!(
2108            t_err(r"\s"),
2109            TestError {
2110                kind: hir::ErrorKind::UnicodePerlClassNotFound,
2111                span: Span::new(
2112                    Position::new(0, 1, 1),
2113                    Position::new(2, 1, 3)
2114                ),
2115            }
2116        );
2117    }
2118
2119    #[test]
2120    #[cfg(all(
2121        not(feature = "unicode-perl"),
2122        not(feature = "unicode-gencat")
2123    ))]
2124    fn class_perl_digit_disabled() {
2125        assert_eq!(
2126            t_err(r"\d"),
2127            TestError {
2128                kind: hir::ErrorKind::UnicodePerlClassNotFound,
2129                span: Span::new(
2130                    Position::new(0, 1, 1),
2131                    Position::new(2, 1, 3)
2132                ),
2133            }
2134        );
2135    }
2136
2137    #[test]
2138    #[cfg(feature = "unicode-gencat")]
2139    fn class_unicode_gencat() {
2140        assert_eq!(t(r"\pZ"), hir_uclass_query(ClassQuery::Binary("Z")));
2141        assert_eq!(t(r"\pz"), hir_uclass_query(ClassQuery::Binary("Z")));
2142        assert_eq!(
2143            t(r"\p{Separator}"),
2144            hir_uclass_query(ClassQuery::Binary("Z"))
2145        );
2146        assert_eq!(
2147            t(r"\p{se      PaRa ToR}"),
2148            hir_uclass_query(ClassQuery::Binary("Z"))
2149        );
2150        assert_eq!(
2151            t(r"\p{gc:Separator}"),
2152            hir_uclass_query(ClassQuery::Binary("Z"))
2153        );
2154        assert_eq!(
2155            t(r"\p{gc=Separator}"),
2156            hir_uclass_query(ClassQuery::Binary("Z"))
2157        );
2158        assert_eq!(
2159            t(r"\p{Other}"),
2160            hir_uclass_query(ClassQuery::Binary("Other"))
2161        );
2162        assert_eq!(t(r"\pC"), hir_uclass_query(ClassQuery::Binary("Other")));
2163
2164        assert_eq!(
2165            t(r"\PZ"),
2166            hir_negate(hir_uclass_query(ClassQuery::Binary("Z")))
2167        );
2168        assert_eq!(
2169            t(r"\P{separator}"),
2170            hir_negate(hir_uclass_query(ClassQuery::Binary("Z")))
2171        );
2172        assert_eq!(
2173            t(r"\P{gc!=separator}"),
2174            hir_negate(hir_uclass_query(ClassQuery::Binary("Z")))
2175        );
2176
2177        assert_eq!(t(r"\p{any}"), hir_uclass_query(ClassQuery::Binary("Any")));
2178        assert_eq!(
2179            t(r"\p{assigned}"),
2180            hir_uclass_query(ClassQuery::Binary("Assigned"))
2181        );
2182        assert_eq!(
2183            t(r"\p{ascii}"),
2184            hir_uclass_query(ClassQuery::Binary("ASCII"))
2185        );
2186        assert_eq!(
2187            t(r"\p{gc:any}"),
2188            hir_uclass_query(ClassQuery::Binary("Any"))
2189        );
2190        assert_eq!(
2191            t(r"\p{gc:assigned}"),
2192            hir_uclass_query(ClassQuery::Binary("Assigned"))
2193        );
2194        assert_eq!(
2195            t(r"\p{gc:ascii}"),
2196            hir_uclass_query(ClassQuery::Binary("ASCII"))
2197        );
2198
2199        assert_eq!(
2200            t_err(r"(?-u)\pZ"),
2201            TestError {
2202                kind: hir::ErrorKind::UnicodeNotAllowed,
2203                span: Span::new(
2204                    Position::new(5, 1, 6),
2205                    Position::new(8, 1, 9)
2206                ),
2207            }
2208        );
2209        assert_eq!(
2210            t_err(r"(?-u)\p{Separator}"),
2211            TestError {
2212                kind: hir::ErrorKind::UnicodeNotAllowed,
2213                span: Span::new(
2214                    Position::new(5, 1, 6),
2215                    Position::new(18, 1, 19)
2216                ),
2217            }
2218        );
2219        assert_eq!(
2220            t_err(r"\pE"),
2221            TestError {
2222                kind: hir::ErrorKind::UnicodePropertyNotFound,
2223                span: Span::new(
2224                    Position::new(0, 1, 1),
2225                    Position::new(3, 1, 4)
2226                ),
2227            }
2228        );
2229        assert_eq!(
2230            t_err(r"\p{Foo}"),
2231            TestError {
2232                kind: hir::ErrorKind::UnicodePropertyNotFound,
2233                span: Span::new(
2234                    Position::new(0, 1, 1),
2235                    Position::new(7, 1, 8)
2236                ),
2237            }
2238        );
2239        assert_eq!(
2240            t_err(r"\p{gc:Foo}"),
2241            TestError {
2242                kind: hir::ErrorKind::UnicodePropertyValueNotFound,
2243                span: Span::new(
2244                    Position::new(0, 1, 1),
2245                    Position::new(10, 1, 11)
2246                ),
2247            }
2248        );
2249    }
2250
2251    #[test]
2252    #[cfg(not(feature = "unicode-gencat"))]
2253    fn class_unicode_gencat_disabled() {
2254        assert_eq!(
2255            t_err(r"\p{Separator}"),
2256            TestError {
2257                kind: hir::ErrorKind::UnicodePropertyNotFound,
2258                span: Span::new(
2259                    Position::new(0, 1, 1),
2260                    Position::new(13, 1, 14)
2261                ),
2262            }
2263        );
2264
2265        assert_eq!(
2266            t_err(r"\p{Any}"),
2267            TestError {
2268                kind: hir::ErrorKind::UnicodePropertyNotFound,
2269                span: Span::new(
2270                    Position::new(0, 1, 1),
2271                    Position::new(7, 1, 8)
2272                ),
2273            }
2274        );
2275    }
2276
2277    #[test]
2278    #[cfg(feature = "unicode-script")]
2279    fn class_unicode_script() {
2280        assert_eq!(
2281            t(r"\p{Greek}"),
2282            hir_uclass_query(ClassQuery::Binary("Greek"))
2283        );
2284        #[cfg(feature = "unicode-case")]
2285        assert_eq!(
2286            t(r"(?i)\p{Greek}"),
2287            hir_case_fold(hir_uclass_query(ClassQuery::Binary("Greek")))
2288        );
2289        #[cfg(feature = "unicode-case")]
2290        assert_eq!(
2291            t(r"(?i)\P{Greek}"),
2292            hir_negate(hir_case_fold(hir_uclass_query(ClassQuery::Binary(
2293                "Greek"
2294            ))))
2295        );
2296
2297        assert_eq!(
2298            t_err(r"\p{sc:Foo}"),
2299            TestError {
2300                kind: hir::ErrorKind::UnicodePropertyValueNotFound,
2301                span: Span::new(
2302                    Position::new(0, 1, 1),
2303                    Position::new(10, 1, 11)
2304                ),
2305            }
2306        );
2307        assert_eq!(
2308            t_err(r"\p{scx:Foo}"),
2309            TestError {
2310                kind: hir::ErrorKind::UnicodePropertyValueNotFound,
2311                span: Span::new(
2312                    Position::new(0, 1, 1),
2313                    Position::new(11, 1, 12)
2314                ),
2315            }
2316        );
2317    }
2318
2319    #[test]
2320    #[cfg(not(feature = "unicode-script"))]
2321    fn class_unicode_script_disabled() {
2322        assert_eq!(
2323            t_err(r"\p{Greek}"),
2324            TestError {
2325                kind: hir::ErrorKind::UnicodePropertyNotFound,
2326                span: Span::new(
2327                    Position::new(0, 1, 1),
2328                    Position::new(9, 1, 10)
2329                ),
2330            }
2331        );
2332
2333        assert_eq!(
2334            t_err(r"\p{scx:Greek}"),
2335            TestError {
2336                kind: hir::ErrorKind::UnicodePropertyNotFound,
2337                span: Span::new(
2338                    Position::new(0, 1, 1),
2339                    Position::new(13, 1, 14)
2340                ),
2341            }
2342        );
2343    }
2344
2345    #[test]
2346    #[cfg(feature = "unicode-age")]
2347    fn class_unicode_age() {
2348        assert_eq!(
2349            t_err(r"\p{age:Foo}"),
2350            TestError {
2351                kind: hir::ErrorKind::UnicodePropertyValueNotFound,
2352                span: Span::new(
2353                    Position::new(0, 1, 1),
2354                    Position::new(11, 1, 12)
2355                ),
2356            }
2357        );
2358    }
2359
2360    #[test]
2361    #[cfg(feature = "unicode-gencat")]
2362    fn class_unicode_any_empty() {
2363        assert_eq!(
2364            t_err(r"\P{any}"),
2365            TestError {
2366                kind: hir::ErrorKind::EmptyClassNotAllowed,
2367                span: Span::new(
2368                    Position::new(0, 1, 1),
2369                    Position::new(7, 1, 8)
2370                ),
2371            }
2372        );
2373    }
2374
2375    #[test]
2376    #[cfg(not(feature = "unicode-age"))]
2377    fn class_unicode_age_disabled() {
2378        assert_eq!(
2379            t_err(r"\p{age:3.0}"),
2380            TestError {
2381                kind: hir::ErrorKind::UnicodePropertyNotFound,
2382                span: Span::new(
2383                    Position::new(0, 1, 1),
2384                    Position::new(11, 1, 12)
2385                ),
2386            }
2387        );
2388    }
2389
2390    #[test]
2391    fn class_bracketed() {
2392        assert_eq!(t("[a]"), hir_uclass(&[('a', 'a')]));
2393        assert_eq!(t("[^[a]]"), hir_negate(hir_uclass(&[('a', 'a')])));
2394        assert_eq!(t("[a-z]"), hir_uclass(&[('a', 'z')]));
2395        assert_eq!(t("[a-fd-h]"), hir_uclass(&[('a', 'h')]));
2396        assert_eq!(t("[a-fg-m]"), hir_uclass(&[('a', 'm')]));
2397        assert_eq!(t(r"[\x00]"), hir_uclass(&[('\0', '\0')]));
2398        assert_eq!(t(r"[\n]"), hir_uclass(&[('\n', '\n')]));
2399        assert_eq!(t("[\n]"), hir_uclass(&[('\n', '\n')]));
2400        #[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))]
2401        assert_eq!(t(r"[\d]"), hir_uclass_query(ClassQuery::Binary("digit")));
2402        #[cfg(feature = "unicode-gencat")]
2403        assert_eq!(
2404            t(r"[\pZ]"),
2405            hir_uclass_query(ClassQuery::Binary("separator"))
2406        );
2407        #[cfg(feature = "unicode-gencat")]
2408        assert_eq!(
2409            t(r"[\p{separator}]"),
2410            hir_uclass_query(ClassQuery::Binary("separator"))
2411        );
2412        #[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))]
2413        assert_eq!(t(r"[^\D]"), hir_uclass_query(ClassQuery::Binary("digit")));
2414        #[cfg(feature = "unicode-gencat")]
2415        assert_eq!(
2416            t(r"[^\PZ]"),
2417            hir_uclass_query(ClassQuery::Binary("separator"))
2418        );
2419        #[cfg(feature = "unicode-gencat")]
2420        assert_eq!(
2421            t(r"[^\P{separator}]"),
2422            hir_uclass_query(ClassQuery::Binary("separator"))
2423        );
2424        #[cfg(all(
2425            feature = "unicode-case",
2426            any(feature = "unicode-perl", feature = "unicode-gencat")
2427        ))]
2428        assert_eq!(
2429            t(r"(?i)[^\D]"),
2430            hir_uclass_query(ClassQuery::Binary("digit"))
2431        );
2432        #[cfg(all(feature = "unicode-case", feature = "unicode-script"))]
2433        assert_eq!(
2434            t(r"(?i)[^\P{greek}]"),
2435            hir_case_fold(hir_uclass_query(ClassQuery::Binary("greek")))
2436        );
2437
2438        assert_eq!(t("(?-u)[a]"), hir_bclass(&[(b'a', b'a')]));
2439        assert_eq!(t(r"(?-u)[\x00]"), hir_bclass(&[(b'\0', b'\0')]));
2440        assert_eq!(t_bytes(r"(?-u)[\xFF]"), hir_bclass(&[(b'\xFF', b'\xFF')]));
2441
2442        #[cfg(feature = "unicode-case")]
2443        assert_eq!(t("(?i)[a]"), hir_uclass(&[('A', 'A'), ('a', 'a')]));
2444        #[cfg(feature = "unicode-case")]
2445        assert_eq!(
2446            t("(?i)[k]"),
2447            hir_uclass(&[('K', 'K'), ('k', 'k'), ('\u{212A}', '\u{212A}'),])
2448        );
2449        #[cfg(feature = "unicode-case")]
2450        assert_eq!(
2451            t("(?i)[β]"),
2452            hir_uclass(&[('Β', 'Β'), ('β', 'β'), ('ϐ', 'ϐ'),])
2453        );
2454        assert_eq!(t("(?i-u)[k]"), hir_bclass(&[(b'K', b'K'), (b'k', b'k'),]));
2455
2456        assert_eq!(t("[^a]"), hir_negate(hir_uclass(&[('a', 'a')])));
2457        assert_eq!(t(r"[^\x00]"), hir_negate(hir_uclass(&[('\0', '\0')])));
2458        assert_eq!(
2459            t_bytes("(?-u)[^a]"),
2460            hir_negate(hir_bclass(&[(b'a', b'a')]))
2461        );
2462        #[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))]
2463        assert_eq!(
2464            t(r"[^\d]"),
2465            hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
2466        );
2467        #[cfg(feature = "unicode-gencat")]
2468        assert_eq!(
2469            t(r"[^\pZ]"),
2470            hir_negate(hir_uclass_query(ClassQuery::Binary("separator")))
2471        );
2472        #[cfg(feature = "unicode-gencat")]
2473        assert_eq!(
2474            t(r"[^\p{separator}]"),
2475            hir_negate(hir_uclass_query(ClassQuery::Binary("separator")))
2476        );
2477        #[cfg(all(feature = "unicode-case", feature = "unicode-script"))]
2478        assert_eq!(
2479            t(r"(?i)[^\p{greek}]"),
2480            hir_negate(hir_case_fold(hir_uclass_query(ClassQuery::Binary(
2481                "greek"
2482            ))))
2483        );
2484        #[cfg(all(feature = "unicode-case", feature = "unicode-script"))]
2485        assert_eq!(
2486            t(r"(?i)[\P{greek}]"),
2487            hir_negate(hir_case_fold(hir_uclass_query(ClassQuery::Binary(
2488                "greek"
2489            ))))
2490        );
2491
2492        // Test some weird cases.
2493        assert_eq!(t(r"[\[]"), hir_uclass(&[('[', '[')]));
2494
2495        assert_eq!(t(r"[&]"), hir_uclass(&[('&', '&')]));
2496        assert_eq!(t(r"[\&]"), hir_uclass(&[('&', '&')]));
2497        assert_eq!(t(r"[\&\&]"), hir_uclass(&[('&', '&')]));
2498        assert_eq!(t(r"[\x00-&]"), hir_uclass(&[('\0', '&')]));
2499        assert_eq!(t(r"[&-\xFF]"), hir_uclass(&[('&', '\u{FF}')]));
2500
2501        assert_eq!(t(r"[~]"), hir_uclass(&[('~', '~')]));
2502        assert_eq!(t(r"[\~]"), hir_uclass(&[('~', '~')]));
2503        assert_eq!(t(r"[\~\~]"), hir_uclass(&[('~', '~')]));
2504        assert_eq!(t(r"[\x00-~]"), hir_uclass(&[('\0', '~')]));
2505        assert_eq!(t(r"[~-\xFF]"), hir_uclass(&[('~', '\u{FF}')]));
2506
2507        assert_eq!(t(r"[-]"), hir_uclass(&[('-', '-')]));
2508        assert_eq!(t(r"[\-]"), hir_uclass(&[('-', '-')]));
2509        assert_eq!(t(r"[\-\-]"), hir_uclass(&[('-', '-')]));
2510        assert_eq!(t(r"[\x00-\-]"), hir_uclass(&[('\0', '-')]));
2511        assert_eq!(t(r"[\--\xFF]"), hir_uclass(&[('-', '\u{FF}')]));
2512
2513        assert_eq!(
2514            t_err("(?-u)[^a]"),
2515            TestError {
2516                kind: hir::ErrorKind::InvalidUtf8,
2517                span: Span::new(
2518                    Position::new(5, 1, 6),
2519                    Position::new(9, 1, 10)
2520                ),
2521            }
2522        );
2523        #[cfg(any(feature = "unicode-perl", feature = "unicode-bool"))]
2524        assert_eq!(
2525            t_err(r"[^\s\S]"),
2526            TestError {
2527                kind: hir::ErrorKind::EmptyClassNotAllowed,
2528                span: Span::new(
2529                    Position::new(0, 1, 1),
2530                    Position::new(7, 1, 8)
2531                ),
2532            }
2533        );
2534        #[cfg(any(feature = "unicode-perl", feature = "unicode-bool"))]
2535        assert_eq!(
2536            t_err(r"(?-u)[^\s\S]"),
2537            TestError {
2538                kind: hir::ErrorKind::EmptyClassNotAllowed,
2539                span: Span::new(
2540                    Position::new(5, 1, 6),
2541                    Position::new(12, 1, 13)
2542                ),
2543            }
2544        );
2545    }
2546
2547    #[test]
2548    fn class_bracketed_union() {
2549        assert_eq!(t("[a-zA-Z]"), hir_uclass(&[('A', 'Z'), ('a', 'z')]));
2550        #[cfg(feature = "unicode-gencat")]
2551        assert_eq!(
2552            t(r"[a\pZb]"),
2553            hir_union(
2554                hir_uclass(&[('a', 'b')]),
2555                hir_uclass_query(ClassQuery::Binary("separator"))
2556            )
2557        );
2558        #[cfg(all(feature = "unicode-gencat", feature = "unicode-script"))]
2559        assert_eq!(
2560            t(r"[\pZ\p{Greek}]"),
2561            hir_union(
2562                hir_uclass_query(ClassQuery::Binary("greek")),
2563                hir_uclass_query(ClassQuery::Binary("separator"))
2564            )
2565        );
2566        #[cfg(all(
2567            feature = "unicode-age",
2568            feature = "unicode-gencat",
2569            feature = "unicode-script"
2570        ))]
2571        assert_eq!(
2572            t(r"[\p{age:3.0}\pZ\p{Greek}]"),
2573            hir_union(
2574                hir_uclass_query(ClassQuery::ByValue {
2575                    property_name: "age",
2576                    property_value: "3.0",
2577                }),
2578                hir_union(
2579                    hir_uclass_query(ClassQuery::Binary("greek")),
2580                    hir_uclass_query(ClassQuery::Binary("separator"))
2581                )
2582            )
2583        );
2584        #[cfg(all(
2585            feature = "unicode-age",
2586            feature = "unicode-gencat",
2587            feature = "unicode-script"
2588        ))]
2589        assert_eq!(
2590            t(r"[[[\p{age:3.0}\pZ]\p{Greek}][\p{Cyrillic}]]"),
2591            hir_union(
2592                hir_uclass_query(ClassQuery::ByValue {
2593                    property_name: "age",
2594                    property_value: "3.0",
2595                }),
2596                hir_union(
2597                    hir_uclass_query(ClassQuery::Binary("cyrillic")),
2598                    hir_union(
2599                        hir_uclass_query(ClassQuery::Binary("greek")),
2600                        hir_uclass_query(ClassQuery::Binary("separator"))
2601                    )
2602                )
2603            )
2604        );
2605
2606        #[cfg(all(
2607            feature = "unicode-age",
2608            feature = "unicode-case",
2609            feature = "unicode-gencat",
2610            feature = "unicode-script"
2611        ))]
2612        assert_eq!(
2613            t(r"(?i)[\p{age:3.0}\pZ\p{Greek}]"),
2614            hir_case_fold(hir_union(
2615                hir_uclass_query(ClassQuery::ByValue {
2616                    property_name: "age",
2617                    property_value: "3.0",
2618                }),
2619                hir_union(
2620                    hir_uclass_query(ClassQuery::Binary("greek")),
2621                    hir_uclass_query(ClassQuery::Binary("separator"))
2622                )
2623            ))
2624        );
2625        #[cfg(all(
2626            feature = "unicode-age",
2627            feature = "unicode-gencat",
2628            feature = "unicode-script"
2629        ))]
2630        assert_eq!(
2631            t(r"[^\p{age:3.0}\pZ\p{Greek}]"),
2632            hir_negate(hir_union(
2633                hir_uclass_query(ClassQuery::ByValue {
2634                    property_name: "age",
2635                    property_value: "3.0",
2636                }),
2637                hir_union(
2638                    hir_uclass_query(ClassQuery::Binary("greek")),
2639                    hir_uclass_query(ClassQuery::Binary("separator"))
2640                )
2641            ))
2642        );
2643        #[cfg(all(
2644            feature = "unicode-age",
2645            feature = "unicode-case",
2646            feature = "unicode-gencat",
2647            feature = "unicode-script"
2648        ))]
2649        assert_eq!(
2650            t(r"(?i)[^\p{age:3.0}\pZ\p{Greek}]"),
2651            hir_negate(hir_case_fold(hir_union(
2652                hir_uclass_query(ClassQuery::ByValue {
2653                    property_name: "age",
2654                    property_value: "3.0",
2655                }),
2656                hir_union(
2657                    hir_uclass_query(ClassQuery::Binary("greek")),
2658                    hir_uclass_query(ClassQuery::Binary("separator"))
2659                )
2660            )))
2661        );
2662    }
2663
2664    #[test]
2665    fn class_bracketed_nested() {
2666        assert_eq!(t(r"[a[^c]]"), hir_negate(hir_uclass(&[('c', 'c')])));
2667        assert_eq!(t(r"[a-b[^c]]"), hir_negate(hir_uclass(&[('c', 'c')])));
2668        assert_eq!(t(r"[a-c[^c]]"), hir_negate(hir_uclass(&[])));
2669
2670        assert_eq!(t(r"[^a[^c]]"), hir_uclass(&[('c', 'c')]));
2671        assert_eq!(t(r"[^a-b[^c]]"), hir_uclass(&[('c', 'c')]));
2672
2673        #[cfg(feature = "unicode-case")]
2674        assert_eq!(
2675            t(r"(?i)[a[^c]]"),
2676            hir_negate(hir_case_fold(hir_uclass(&[('c', 'c')])))
2677        );
2678        #[cfg(feature = "unicode-case")]
2679        assert_eq!(
2680            t(r"(?i)[a-b[^c]]"),
2681            hir_negate(hir_case_fold(hir_uclass(&[('c', 'c')])))
2682        );
2683
2684        #[cfg(feature = "unicode-case")]
2685        assert_eq!(t(r"(?i)[^a[^c]]"), hir_uclass(&[('C', 'C'), ('c', 'c')]));
2686        #[cfg(feature = "unicode-case")]
2687        assert_eq!(
2688            t(r"(?i)[^a-b[^c]]"),
2689            hir_uclass(&[('C', 'C'), ('c', 'c')])
2690        );
2691
2692        assert_eq!(
2693            t_err(r"[^a-c[^c]]"),
2694            TestError {
2695                kind: hir::ErrorKind::EmptyClassNotAllowed,
2696                span: Span::new(
2697                    Position::new(0, 1, 1),
2698                    Position::new(10, 1, 11)
2699                ),
2700            }
2701        );
2702        #[cfg(feature = "unicode-case")]
2703        assert_eq!(
2704            t_err(r"(?i)[^a-c[^c]]"),
2705            TestError {
2706                kind: hir::ErrorKind::EmptyClassNotAllowed,
2707                span: Span::new(
2708                    Position::new(4, 1, 5),
2709                    Position::new(14, 1, 15)
2710                ),
2711            }
2712        );
2713    }
2714
2715    #[test]
2716    fn class_bracketed_intersect() {
2717        assert_eq!(t("[abc&&b-c]"), hir_uclass(&[('b', 'c')]));
2718        assert_eq!(t("[abc&&[b-c]]"), hir_uclass(&[('b', 'c')]));
2719        assert_eq!(t("[[abc]&&[b-c]]"), hir_uclass(&[('b', 'c')]));
2720        assert_eq!(t("[a-z&&b-y&&c-x]"), hir_uclass(&[('c', 'x')]));
2721        assert_eq!(t("[c-da-b&&a-d]"), hir_uclass(&[('a', 'd')]));
2722        assert_eq!(t("[a-d&&c-da-b]"), hir_uclass(&[('a', 'd')]));
2723        assert_eq!(t(r"[a-z&&a-c]"), hir_uclass(&[('a', 'c')]));
2724        assert_eq!(t(r"[[a-z&&a-c]]"), hir_uclass(&[('a', 'c')]));
2725        assert_eq!(t(r"[^[a-z&&a-c]]"), hir_negate(hir_uclass(&[('a', 'c')])));
2726
2727        assert_eq!(t("(?-u)[abc&&b-c]"), hir_bclass(&[(b'b', b'c')]));
2728        assert_eq!(t("(?-u)[abc&&[b-c]]"), hir_bclass(&[(b'b', b'c')]));
2729        assert_eq!(t("(?-u)[[abc]&&[b-c]]"), hir_bclass(&[(b'b', b'c')]));
2730        assert_eq!(t("(?-u)[a-z&&b-y&&c-x]"), hir_bclass(&[(b'c', b'x')]));
2731        assert_eq!(t("(?-u)[c-da-b&&a-d]"), hir_bclass(&[(b'a', b'd')]));
2732        assert_eq!(t("(?-u)[a-d&&c-da-b]"), hir_bclass(&[(b'a', b'd')]));
2733
2734        #[cfg(feature = "unicode-case")]
2735        assert_eq!(
2736            t("(?i)[abc&&b-c]"),
2737            hir_case_fold(hir_uclass(&[('b', 'c')]))
2738        );
2739        #[cfg(feature = "unicode-case")]
2740        assert_eq!(
2741            t("(?i)[abc&&[b-c]]"),
2742            hir_case_fold(hir_uclass(&[('b', 'c')]))
2743        );
2744        #[cfg(feature = "unicode-case")]
2745        assert_eq!(
2746            t("(?i)[[abc]&&[b-c]]"),
2747            hir_case_fold(hir_uclass(&[('b', 'c')]))
2748        );
2749        #[cfg(feature = "unicode-case")]
2750        assert_eq!(
2751            t("(?i)[a-z&&b-y&&c-x]"),
2752            hir_case_fold(hir_uclass(&[('c', 'x')]))
2753        );
2754        #[cfg(feature = "unicode-case")]
2755        assert_eq!(
2756            t("(?i)[c-da-b&&a-d]"),
2757            hir_case_fold(hir_uclass(&[('a', 'd')]))
2758        );
2759        #[cfg(feature = "unicode-case")]
2760        assert_eq!(
2761            t("(?i)[a-d&&c-da-b]"),
2762            hir_case_fold(hir_uclass(&[('a', 'd')]))
2763        );
2764
2765        assert_eq!(
2766            t("(?i-u)[abc&&b-c]"),
2767            hir_case_fold(hir_bclass(&[(b'b', b'c')]))
2768        );
2769        assert_eq!(
2770            t("(?i-u)[abc&&[b-c]]"),
2771            hir_case_fold(hir_bclass(&[(b'b', b'c')]))
2772        );
2773        assert_eq!(
2774            t("(?i-u)[[abc]&&[b-c]]"),
2775            hir_case_fold(hir_bclass(&[(b'b', b'c')]))
2776        );
2777        assert_eq!(
2778            t("(?i-u)[a-z&&b-y&&c-x]"),
2779            hir_case_fold(hir_bclass(&[(b'c', b'x')]))
2780        );
2781        assert_eq!(
2782            t("(?i-u)[c-da-b&&a-d]"),
2783            hir_case_fold(hir_bclass(&[(b'a', b'd')]))
2784        );
2785        assert_eq!(
2786            t("(?i-u)[a-d&&c-da-b]"),
2787            hir_case_fold(hir_bclass(&[(b'a', b'd')]))
2788        );
2789
2790        // In `[a^]`, `^` does not need to be escaped, so it makes sense that
2791        // `^` is also allowed to be unescaped after `&&`.
2792        assert_eq!(t(r"[\^&&^]"), hir_uclass(&[('^', '^')]));
2793        // `]` needs to be escaped after `&&` since it's not at start of class.
2794        assert_eq!(t(r"[]&&\]]"), hir_uclass(&[(']', ']')]));
2795        assert_eq!(t(r"[-&&-]"), hir_uclass(&[('-', '-')]));
2796        assert_eq!(t(r"[\&&&&]"), hir_uclass(&[('&', '&')]));
2797        assert_eq!(t(r"[\&&&\&]"), hir_uclass(&[('&', '&')]));
2798        // Test precedence.
2799        assert_eq!(
2800            t(r"[a-w&&[^c-g]z]"),
2801            hir_uclass(&[('a', 'b'), ('h', 'w')])
2802        );
2803    }
2804
2805    #[test]
2806    fn class_bracketed_intersect_negate() {
2807        #[cfg(feature = "unicode-perl")]
2808        assert_eq!(
2809            t(r"[^\w&&\d]"),
2810            hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
2811        );
2812        assert_eq!(t(r"[^[a-z&&a-c]]"), hir_negate(hir_uclass(&[('a', 'c')])));
2813        #[cfg(feature = "unicode-perl")]
2814        assert_eq!(
2815            t(r"[^[\w&&\d]]"),
2816            hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
2817        );
2818        #[cfg(feature = "unicode-perl")]
2819        assert_eq!(
2820            t(r"[^[^\w&&\d]]"),
2821            hir_uclass_query(ClassQuery::Binary("digit"))
2822        );
2823        #[cfg(feature = "unicode-perl")]
2824        assert_eq!(t(r"[[[^\w]&&[^\d]]]"), hir_negate(hir_uclass_perl_word()));
2825
2826        #[cfg(feature = "unicode-perl")]
2827        assert_eq!(
2828            t_bytes(r"(?-u)[^\w&&\d]"),
2829            hir_negate(hir_bclass_from_char(ascii_class(
2830                &ast::ClassAsciiKind::Digit
2831            )))
2832        );
2833        assert_eq!(
2834            t_bytes(r"(?-u)[^[a-z&&a-c]]"),
2835            hir_negate(hir_bclass(&[(b'a', b'c')]))
2836        );
2837        assert_eq!(
2838            t_bytes(r"(?-u)[^[\w&&\d]]"),
2839            hir_negate(hir_bclass_from_char(ascii_class(
2840                &ast::ClassAsciiKind::Digit
2841            )))
2842        );
2843        assert_eq!(
2844            t_bytes(r"(?-u)[^[^\w&&\d]]"),
2845            hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Digit))
2846        );
2847        assert_eq!(
2848            t_bytes(r"(?-u)[[[^\w]&&[^\d]]]"),
2849            hir_negate(hir_bclass_from_char(ascii_class(
2850                &ast::ClassAsciiKind::Word
2851            )))
2852        );
2853    }
2854
2855    #[test]
2856    fn class_bracketed_difference() {
2857        #[cfg(feature = "unicode-gencat")]
2858        assert_eq!(
2859            t(r"[\pL--[:ascii:]]"),
2860            hir_difference(
2861                hir_uclass_query(ClassQuery::Binary("letter")),
2862                hir_uclass(&[('\0', '\x7F')])
2863            )
2864        );
2865
2866        assert_eq!(
2867            t(r"(?-u)[[:alpha:]--[:lower:]]"),
2868            hir_bclass(&[(b'A', b'Z')])
2869        );
2870    }
2871
2872    #[test]
2873    fn class_bracketed_symmetric_difference() {
2874        #[cfg(feature = "unicode-script")]
2875        assert_eq!(
2876            t(r"[\p{sc:Greek}~~\p{scx:Greek}]"),
2877            hir_uclass(&[
2878                ('\u{0342}', '\u{0342}'),
2879                ('\u{0345}', '\u{0345}'),
2880                ('\u{1DC0}', '\u{1DC1}'),
2881            ])
2882        );
2883        assert_eq!(t(r"[a-g~~c-j]"), hir_uclass(&[('a', 'b'), ('h', 'j')]));
2884
2885        assert_eq!(
2886            t(r"(?-u)[a-g~~c-j]"),
2887            hir_bclass(&[(b'a', b'b'), (b'h', b'j')])
2888        );
2889    }
2890
2891    #[test]
2892    fn ignore_whitespace() {
2893        assert_eq!(t(r"(?x)\12 3"), hir_lit("\n3"));
2894        assert_eq!(t(r"(?x)\x { 53 }"), hir_lit("S"));
2895        assert_eq!(
2896            t(r"(?x)\x # comment
2897{ # comment
2898    53 # comment
2899} #comment"),
2900            hir_lit("S")
2901        );
2902
2903        assert_eq!(t(r"(?x)\x 53"), hir_lit("S"));
2904        assert_eq!(
2905            t(r"(?x)\x # comment
2906        53 # comment"),
2907            hir_lit("S")
2908        );
2909        assert_eq!(t(r"(?x)\x5 3"), hir_lit("S"));
2910
2911        #[cfg(feature = "unicode-gencat")]
2912        assert_eq!(
2913            t(r"(?x)\p # comment
2914{ # comment
2915    Separator # comment
2916} # comment"),
2917            hir_uclass_query(ClassQuery::Binary("separator"))
2918        );
2919
2920        assert_eq!(
2921            t(r"(?x)a # comment
2922{ # comment
2923    5 # comment
2924    , # comment
2925    10 # comment
2926} # comment"),
2927            hir_range(
2928                true,
2929                hir::RepetitionRange::Bounded(5, 10),
2930                hir_lit("a")
2931            )
2932        );
2933
2934        assert_eq!(t(r"(?x)a\  # hi there"), hir_lit("a "));
2935    }
2936
2937    #[test]
2938    fn analysis_is_always_utf8() {
2939        // Positive examples.
2940        assert!(t_bytes(r"a").is_always_utf8());
2941        assert!(t_bytes(r"ab").is_always_utf8());
2942        assert!(t_bytes(r"(?-u)a").is_always_utf8());
2943        assert!(t_bytes(r"(?-u)ab").is_always_utf8());
2944        assert!(t_bytes(r"\xFF").is_always_utf8());
2945        assert!(t_bytes(r"\xFF\xFF").is_always_utf8());
2946        assert!(t_bytes(r"[^a]").is_always_utf8());
2947        assert!(t_bytes(r"[^a][^a]").is_always_utf8());
2948        assert!(t_bytes(r"\b").is_always_utf8());
2949        assert!(t_bytes(r"\B").is_always_utf8());
2950        assert!(t_bytes(r"(?-u)\b").is_always_utf8());
2951
2952        // Negative examples.
2953        assert!(!t_bytes(r"(?-u)\xFF").is_always_utf8());
2954        assert!(!t_bytes(r"(?-u)\xFF\xFF").is_always_utf8());
2955        assert!(!t_bytes(r"(?-u)[^a]").is_always_utf8());
2956        assert!(!t_bytes(r"(?-u)[^a][^a]").is_always_utf8());
2957        assert!(!t_bytes(r"(?-u)\B").is_always_utf8());
2958    }
2959
2960    #[test]
2961    fn analysis_is_all_assertions() {
2962        // Positive examples.
2963        assert!(t(r"\b").is_all_assertions());
2964        assert!(t(r"\B").is_all_assertions());
2965        assert!(t(r"^").is_all_assertions());
2966        assert!(t(r"$").is_all_assertions());
2967        assert!(t(r"\A").is_all_assertions());
2968        assert!(t(r"\z").is_all_assertions());
2969        assert!(t(r"$^\z\A\b\B").is_all_assertions());
2970        assert!(t(r"$|^|\z|\A|\b|\B").is_all_assertions());
2971        assert!(t(r"^$|$^").is_all_assertions());
2972        assert!(t(r"((\b)+())*^").is_all_assertions());
2973
2974        // Negative examples.
2975        assert!(!t(r"^a").is_all_assertions());
2976    }
2977
2978    #[test]
2979    fn analysis_is_anchored() {
2980        // Positive examples.
2981        assert!(t(r"^").is_anchored_start());
2982        assert!(t(r"$").is_anchored_end());
2983        assert!(t(r"^").is_line_anchored_start());
2984        assert!(t(r"$").is_line_anchored_end());
2985
2986        assert!(t(r"^^").is_anchored_start());
2987        assert!(t(r"$$").is_anchored_end());
2988        assert!(t(r"^^").is_line_anchored_start());
2989        assert!(t(r"$$").is_line_anchored_end());
2990
2991        assert!(t(r"^$").is_anchored_start());
2992        assert!(t(r"^$").is_anchored_end());
2993        assert!(t(r"^$").is_line_anchored_start());
2994        assert!(t(r"^$").is_line_anchored_end());
2995
2996        assert!(t(r"^foo").is_anchored_start());
2997        assert!(t(r"foo$").is_anchored_end());
2998        assert!(t(r"^foo").is_line_anchored_start());
2999        assert!(t(r"foo$").is_line_anchored_end());
3000
3001        assert!(t(r"^foo|^bar").is_anchored_start());
3002        assert!(t(r"foo$|bar$").is_anchored_end());
3003        assert!(t(r"^foo|^bar").is_line_anchored_start());
3004        assert!(t(r"foo$|bar$").is_line_anchored_end());
3005
3006        assert!(t(r"^(foo|bar)").is_anchored_start());
3007        assert!(t(r"(foo|bar)$").is_anchored_end());
3008        assert!(t(r"^(foo|bar)").is_line_anchored_start());
3009        assert!(t(r"(foo|bar)$").is_line_anchored_end());
3010
3011        assert!(t(r"^+").is_anchored_start());
3012        assert!(t(r"$+").is_anchored_end());
3013        assert!(t(r"^+").is_line_anchored_start());
3014        assert!(t(r"$+").is_line_anchored_end());
3015        assert!(t(r"^++").is_anchored_start());
3016        assert!(t(r"$++").is_anchored_end());
3017        assert!(t(r"^++").is_line_anchored_start());
3018        assert!(t(r"$++").is_line_anchored_end());
3019        assert!(t(r"(^)+").is_anchored_start());
3020        assert!(t(r"($)+").is_anchored_end());
3021        assert!(t(r"(^)+").is_line_anchored_start());
3022        assert!(t(r"($)+").is_line_anchored_end());
3023
3024        assert!(t(r"$^").is_anchored_start());
3025        assert!(t(r"$^").is_anchored_start());
3026        assert!(t(r"$^").is_line_anchored_end());
3027        assert!(t(r"$^").is_line_anchored_end());
3028        assert!(t(r"$^|^$").is_anchored_start());
3029        assert!(t(r"$^|^$").is_anchored_end());
3030        assert!(t(r"$^|^$").is_line_anchored_start());
3031        assert!(t(r"$^|^$").is_line_anchored_end());
3032
3033        assert!(t(r"\b^").is_anchored_start());
3034        assert!(t(r"$\b").is_anchored_end());
3035        assert!(t(r"\b^").is_line_anchored_start());
3036        assert!(t(r"$\b").is_line_anchored_end());
3037        assert!(t(r"^(?m:^)").is_anchored_start());
3038        assert!(t(r"(?m:$)$").is_anchored_end());
3039        assert!(t(r"^(?m:^)").is_line_anchored_start());
3040        assert!(t(r"(?m:$)$").is_line_anchored_end());
3041        assert!(t(r"(?m:^)^").is_anchored_start());
3042        assert!(t(r"$(?m:$)").is_anchored_end());
3043        assert!(t(r"(?m:^)^").is_line_anchored_start());
3044        assert!(t(r"$(?m:$)").is_line_anchored_end());
3045
3046        // Negative examples.
3047        assert!(!t(r"(?m)^").is_anchored_start());
3048        assert!(!t(r"(?m)$").is_anchored_end());
3049        assert!(!t(r"(?m:^$)|$^").is_anchored_start());
3050        assert!(!t(r"(?m:^$)|$^").is_anchored_end());
3051        assert!(!t(r"$^|(?m:^$)").is_anchored_start());
3052        assert!(!t(r"$^|(?m:^$)").is_anchored_end());
3053
3054        assert!(!t(r"a^").is_anchored_start());
3055        assert!(!t(r"$a").is_anchored_start());
3056        assert!(!t(r"a^").is_line_anchored_start());
3057        assert!(!t(r"$a").is_line_anchored_start());
3058
3059        assert!(!t(r"a^").is_anchored_end());
3060        assert!(!t(r"$a").is_anchored_end());
3061        assert!(!t(r"a^").is_line_anchored_end());
3062        assert!(!t(r"$a").is_line_anchored_end());
3063
3064        assert!(!t(r"^foo|bar").is_anchored_start());
3065        assert!(!t(r"foo|bar$").is_anchored_end());
3066        assert!(!t(r"^foo|bar").is_line_anchored_start());
3067        assert!(!t(r"foo|bar$").is_line_anchored_end());
3068
3069        assert!(!t(r"^*").is_anchored_start());
3070        assert!(!t(r"$*").is_anchored_end());
3071        assert!(!t(r"^*").is_line_anchored_start());
3072        assert!(!t(r"$*").is_line_anchored_end());
3073        assert!(!t(r"^*+").is_anchored_start());
3074        assert!(!t(r"$*+").is_anchored_end());
3075        assert!(!t(r"^*+").is_line_anchored_start());
3076        assert!(!t(r"$*+").is_line_anchored_end());
3077        assert!(!t(r"^+*").is_anchored_start());
3078        assert!(!t(r"$+*").is_anchored_end());
3079        assert!(!t(r"^+*").is_line_anchored_start());
3080        assert!(!t(r"$+*").is_line_anchored_end());
3081        assert!(!t(r"(^)*").is_anchored_start());
3082        assert!(!t(r"($)*").is_anchored_end());
3083        assert!(!t(r"(^)*").is_line_anchored_start());
3084        assert!(!t(r"($)*").is_line_anchored_end());
3085    }
3086
3087    #[test]
3088    fn analysis_is_line_anchored() {
3089        assert!(t(r"(?m)^(foo|bar)").is_line_anchored_start());
3090        assert!(t(r"(?m)(foo|bar)$").is_line_anchored_end());
3091
3092        assert!(t(r"(?m)^foo|^bar").is_line_anchored_start());
3093        assert!(t(r"(?m)foo$|bar$").is_line_anchored_end());
3094
3095        assert!(t(r"(?m)^").is_line_anchored_start());
3096        assert!(t(r"(?m)$").is_line_anchored_end());
3097
3098        assert!(t(r"(?m:^$)|$^").is_line_anchored_start());
3099        assert!(t(r"(?m:^$)|$^").is_line_anchored_end());
3100
3101        assert!(t(r"$^|(?m:^$)").is_line_anchored_start());
3102        assert!(t(r"$^|(?m:^$)").is_line_anchored_end());
3103    }
3104
3105    #[test]
3106    fn analysis_is_any_anchored() {
3107        // Positive examples.
3108        assert!(t(r"^").is_any_anchored_start());
3109        assert!(t(r"$").is_any_anchored_end());
3110        assert!(t(r"\A").is_any_anchored_start());
3111        assert!(t(r"\z").is_any_anchored_end());
3112
3113        // Negative examples.
3114        assert!(!t(r"(?m)^").is_any_anchored_start());
3115        assert!(!t(r"(?m)$").is_any_anchored_end());
3116        assert!(!t(r"$").is_any_anchored_start());
3117        assert!(!t(r"^").is_any_anchored_end());
3118    }
3119
3120    #[test]
3121    fn analysis_is_match_empty() {
3122        // Positive examples.
3123        assert!(t(r"").is_match_empty());
3124        assert!(t(r"()").is_match_empty());
3125        assert!(t(r"()*").is_match_empty());
3126        assert!(t(r"()+").is_match_empty());
3127        assert!(t(r"()?").is_match_empty());
3128        assert!(t(r"a*").is_match_empty());
3129        assert!(t(r"a?").is_match_empty());
3130        assert!(t(r"a{0}").is_match_empty());
3131        assert!(t(r"a{0,}").is_match_empty());
3132        assert!(t(r"a{0,1}").is_match_empty());
3133        assert!(t(r"a{0,10}").is_match_empty());
3134        #[cfg(feature = "unicode-gencat")]
3135        assert!(t(r"\pL*").is_match_empty());
3136        assert!(t(r"a*|b").is_match_empty());
3137        assert!(t(r"b|a*").is_match_empty());
3138        assert!(t(r"a|").is_match_empty());
3139        assert!(t(r"|a").is_match_empty());
3140        assert!(t(r"a||b").is_match_empty());
3141        assert!(t(r"a*a?(abcd)*").is_match_empty());
3142        assert!(t(r"^").is_match_empty());
3143        assert!(t(r"$").is_match_empty());
3144        assert!(t(r"(?m)^").is_match_empty());
3145        assert!(t(r"(?m)$").is_match_empty());
3146        assert!(t(r"\A").is_match_empty());
3147        assert!(t(r"\z").is_match_empty());
3148        assert!(t(r"\B").is_match_empty());
3149        assert!(t_bytes(r"(?-u)\B").is_match_empty());
3150        assert!(t(r"\b").is_match_empty());
3151        assert!(t(r"(?-u)\b").is_match_empty());
3152
3153        // Negative examples.
3154        assert!(!t(r"a+").is_match_empty());
3155        assert!(!t(r"a{1}").is_match_empty());
3156        assert!(!t(r"a{1,}").is_match_empty());
3157        assert!(!t(r"a{1,2}").is_match_empty());
3158        assert!(!t(r"a{1,10}").is_match_empty());
3159        assert!(!t(r"b|a").is_match_empty());
3160        assert!(!t(r"a*a+(abcd)*").is_match_empty());
3161    }
3162
3163    #[test]
3164    fn analysis_is_literal() {
3165        // Positive examples.
3166        assert!(t(r"a").is_literal());
3167        assert!(t(r"ab").is_literal());
3168        assert!(t(r"abc").is_literal());
3169        assert!(t(r"(?m)abc").is_literal());
3170
3171        // Negative examples.
3172        assert!(!t(r"").is_literal());
3173        assert!(!t(r"^").is_literal());
3174        assert!(!t(r"a|b").is_literal());
3175        assert!(!t(r"(a)").is_literal());
3176        assert!(!t(r"a+").is_literal());
3177        assert!(!t(r"foo(a)").is_literal());
3178        assert!(!t(r"(a)foo").is_literal());
3179        assert!(!t(r"[a]").is_literal());
3180    }
3181
3182    #[test]
3183    fn analysis_is_alternation_literal() {
3184        // Positive examples.
3185        assert!(t(r"a").is_alternation_literal());
3186        assert!(t(r"ab").is_alternation_literal());
3187        assert!(t(r"abc").is_alternation_literal());
3188        assert!(t(r"(?m)abc").is_alternation_literal());
3189        assert!(t(r"a|b").is_alternation_literal());
3190        assert!(t(r"a|b|c").is_alternation_literal());
3191        assert!(t(r"foo|bar").is_alternation_literal());
3192        assert!(t(r"foo|bar|baz").is_alternation_literal());
3193
3194        // Negative examples.
3195        assert!(!t(r"").is_alternation_literal());
3196        assert!(!t(r"^").is_alternation_literal());
3197        assert!(!t(r"(a)").is_alternation_literal());
3198        assert!(!t(r"a+").is_alternation_literal());
3199        assert!(!t(r"foo(a)").is_alternation_literal());
3200        assert!(!t(r"(a)foo").is_alternation_literal());
3201        assert!(!t(r"[a]").is_alternation_literal());
3202        assert!(!t(r"[a]|b").is_alternation_literal());
3203        assert!(!t(r"a|[b]").is_alternation_literal());
3204        assert!(!t(r"(a)|b").is_alternation_literal());
3205        assert!(!t(r"a|(b)").is_alternation_literal());
3206    }
3207}
3208