1c67d6573Sopenharmony_ciuse std::cmp;
2c67d6573Sopenharmony_ciuse std::error;
3c67d6573Sopenharmony_ciuse std::fmt;
4c67d6573Sopenharmony_ciuse std::result;
5c67d6573Sopenharmony_ci
6c67d6573Sopenharmony_ciuse crate::ast;
7c67d6573Sopenharmony_ciuse crate::hir;
8c67d6573Sopenharmony_ci
9c67d6573Sopenharmony_ci/// A type alias for dealing with errors returned by this crate.
10c67d6573Sopenharmony_cipub type Result<T> = result::Result<T, Error>;
11c67d6573Sopenharmony_ci
12c67d6573Sopenharmony_ci/// This error type encompasses any error that can be returned by this crate.
13c67d6573Sopenharmony_ci#[derive(Clone, Debug, Eq, PartialEq)]
14c67d6573Sopenharmony_cipub enum Error {
15c67d6573Sopenharmony_ci    /// An error that occurred while translating concrete syntax into abstract
16c67d6573Sopenharmony_ci    /// syntax (AST).
17c67d6573Sopenharmony_ci    Parse(ast::Error),
18c67d6573Sopenharmony_ci    /// An error that occurred while translating abstract syntax into a high
19c67d6573Sopenharmony_ci    /// level intermediate representation (HIR).
20c67d6573Sopenharmony_ci    Translate(hir::Error),
21c67d6573Sopenharmony_ci    /// Hints that destructuring should not be exhaustive.
22c67d6573Sopenharmony_ci    ///
23c67d6573Sopenharmony_ci    /// This enum may grow additional variants, so this makes sure clients
24c67d6573Sopenharmony_ci    /// don't count on exhaustive matching. (Otherwise, adding a new variant
25c67d6573Sopenharmony_ci    /// could break existing code.)
26c67d6573Sopenharmony_ci    #[doc(hidden)]
27c67d6573Sopenharmony_ci    __Nonexhaustive,
28c67d6573Sopenharmony_ci}
29c67d6573Sopenharmony_ci
30c67d6573Sopenharmony_ciimpl From<ast::Error> for Error {
31c67d6573Sopenharmony_ci    fn from(err: ast::Error) -> Error {
32c67d6573Sopenharmony_ci        Error::Parse(err)
33c67d6573Sopenharmony_ci    }
34c67d6573Sopenharmony_ci}
35c67d6573Sopenharmony_ci
36c67d6573Sopenharmony_ciimpl From<hir::Error> for Error {
37c67d6573Sopenharmony_ci    fn from(err: hir::Error) -> Error {
38c67d6573Sopenharmony_ci        Error::Translate(err)
39c67d6573Sopenharmony_ci    }
40c67d6573Sopenharmony_ci}
41c67d6573Sopenharmony_ci
42c67d6573Sopenharmony_ciimpl error::Error for Error {
43c67d6573Sopenharmony_ci    // TODO: Remove this method entirely on the next breaking semver release.
44c67d6573Sopenharmony_ci    #[allow(deprecated)]
45c67d6573Sopenharmony_ci    fn description(&self) -> &str {
46c67d6573Sopenharmony_ci        match *self {
47c67d6573Sopenharmony_ci            Error::Parse(ref x) => x.description(),
48c67d6573Sopenharmony_ci            Error::Translate(ref x) => x.description(),
49c67d6573Sopenharmony_ci            _ => unreachable!(),
50c67d6573Sopenharmony_ci        }
51c67d6573Sopenharmony_ci    }
52c67d6573Sopenharmony_ci}
53c67d6573Sopenharmony_ci
54c67d6573Sopenharmony_ciimpl fmt::Display for Error {
55c67d6573Sopenharmony_ci    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
56c67d6573Sopenharmony_ci        match *self {
57c67d6573Sopenharmony_ci            Error::Parse(ref x) => x.fmt(f),
58c67d6573Sopenharmony_ci            Error::Translate(ref x) => x.fmt(f),
59c67d6573Sopenharmony_ci            _ => unreachable!(),
60c67d6573Sopenharmony_ci        }
61c67d6573Sopenharmony_ci    }
62c67d6573Sopenharmony_ci}
63c67d6573Sopenharmony_ci
64c67d6573Sopenharmony_ci/// A helper type for formatting nice error messages.
65c67d6573Sopenharmony_ci///
66c67d6573Sopenharmony_ci/// This type is responsible for reporting regex parse errors in a nice human
67c67d6573Sopenharmony_ci/// readable format. Most of its complexity is from interspersing notational
68c67d6573Sopenharmony_ci/// markers pointing out the position where an error occurred.
69c67d6573Sopenharmony_ci#[derive(Debug)]
70c67d6573Sopenharmony_cipub struct Formatter<'e, E> {
71c67d6573Sopenharmony_ci    /// The original regex pattern in which the error occurred.
72c67d6573Sopenharmony_ci    pattern: &'e str,
73c67d6573Sopenharmony_ci    /// The error kind. It must impl fmt::Display.
74c67d6573Sopenharmony_ci    err: &'e E,
75c67d6573Sopenharmony_ci    /// The primary span of the error.
76c67d6573Sopenharmony_ci    span: &'e ast::Span,
77c67d6573Sopenharmony_ci    /// An auxiliary and optional span, in case the error needs to point to
78c67d6573Sopenharmony_ci    /// two locations (e.g., when reporting a duplicate capture group name).
79c67d6573Sopenharmony_ci    aux_span: Option<&'e ast::Span>,
80c67d6573Sopenharmony_ci}
81c67d6573Sopenharmony_ci
82c67d6573Sopenharmony_ciimpl<'e> From<&'e ast::Error> for Formatter<'e, ast::ErrorKind> {
83c67d6573Sopenharmony_ci    fn from(err: &'e ast::Error) -> Self {
84c67d6573Sopenharmony_ci        Formatter {
85c67d6573Sopenharmony_ci            pattern: err.pattern(),
86c67d6573Sopenharmony_ci            err: err.kind(),
87c67d6573Sopenharmony_ci            span: err.span(),
88c67d6573Sopenharmony_ci            aux_span: err.auxiliary_span(),
89c67d6573Sopenharmony_ci        }
90c67d6573Sopenharmony_ci    }
91c67d6573Sopenharmony_ci}
92c67d6573Sopenharmony_ci
93c67d6573Sopenharmony_ciimpl<'e> From<&'e hir::Error> for Formatter<'e, hir::ErrorKind> {
94c67d6573Sopenharmony_ci    fn from(err: &'e hir::Error) -> Self {
95c67d6573Sopenharmony_ci        Formatter {
96c67d6573Sopenharmony_ci            pattern: err.pattern(),
97c67d6573Sopenharmony_ci            err: err.kind(),
98c67d6573Sopenharmony_ci            span: err.span(),
99c67d6573Sopenharmony_ci            aux_span: None,
100c67d6573Sopenharmony_ci        }
101c67d6573Sopenharmony_ci    }
102c67d6573Sopenharmony_ci}
103c67d6573Sopenharmony_ci
104c67d6573Sopenharmony_ciimpl<'e, E: fmt::Display> fmt::Display for Formatter<'e, E> {
105c67d6573Sopenharmony_ci    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
106c67d6573Sopenharmony_ci        let spans = Spans::from_formatter(self);
107c67d6573Sopenharmony_ci        if self.pattern.contains('\n') {
108c67d6573Sopenharmony_ci            let divider = repeat_char('~', 79);
109c67d6573Sopenharmony_ci
110c67d6573Sopenharmony_ci            writeln!(f, "regex parse error:")?;
111c67d6573Sopenharmony_ci            writeln!(f, "{}", divider)?;
112c67d6573Sopenharmony_ci            let notated = spans.notate();
113c67d6573Sopenharmony_ci            write!(f, "{}", notated)?;
114c67d6573Sopenharmony_ci            writeln!(f, "{}", divider)?;
115c67d6573Sopenharmony_ci            // If we have error spans that cover multiple lines, then we just
116c67d6573Sopenharmony_ci            // note the line numbers.
117c67d6573Sopenharmony_ci            if !spans.multi_line.is_empty() {
118c67d6573Sopenharmony_ci                let mut notes = vec![];
119c67d6573Sopenharmony_ci                for span in &spans.multi_line {
120c67d6573Sopenharmony_ci                    notes.push(format!(
121c67d6573Sopenharmony_ci                        "on line {} (column {}) through line {} (column {})",
122c67d6573Sopenharmony_ci                        span.start.line,
123c67d6573Sopenharmony_ci                        span.start.column,
124c67d6573Sopenharmony_ci                        span.end.line,
125c67d6573Sopenharmony_ci                        span.end.column - 1
126c67d6573Sopenharmony_ci                    ));
127c67d6573Sopenharmony_ci                }
128c67d6573Sopenharmony_ci                writeln!(f, "{}", notes.join("\n"))?;
129c67d6573Sopenharmony_ci            }
130c67d6573Sopenharmony_ci            write!(f, "error: {}", self.err)?;
131c67d6573Sopenharmony_ci        } else {
132c67d6573Sopenharmony_ci            writeln!(f, "regex parse error:")?;
133c67d6573Sopenharmony_ci            let notated = Spans::from_formatter(self).notate();
134c67d6573Sopenharmony_ci            write!(f, "{}", notated)?;
135c67d6573Sopenharmony_ci            write!(f, "error: {}", self.err)?;
136c67d6573Sopenharmony_ci        }
137c67d6573Sopenharmony_ci        Ok(())
138c67d6573Sopenharmony_ci    }
139c67d6573Sopenharmony_ci}
140c67d6573Sopenharmony_ci
141c67d6573Sopenharmony_ci/// This type represents an arbitrary number of error spans in a way that makes
142c67d6573Sopenharmony_ci/// it convenient to notate the regex pattern. ("Notate" means "point out
143c67d6573Sopenharmony_ci/// exactly where the error occurred in the regex pattern.")
144c67d6573Sopenharmony_ci///
145c67d6573Sopenharmony_ci/// Technically, we can only ever have two spans given our current error
146c67d6573Sopenharmony_ci/// structure. However, after toiling with a specific algorithm for handling
147c67d6573Sopenharmony_ci/// two spans, it became obvious that an algorithm to handle an arbitrary
148c67d6573Sopenharmony_ci/// number of spans was actually much simpler.
149c67d6573Sopenharmony_cistruct Spans<'p> {
150c67d6573Sopenharmony_ci    /// The original regex pattern string.
151c67d6573Sopenharmony_ci    pattern: &'p str,
152c67d6573Sopenharmony_ci    /// The total width that should be used for line numbers. The width is
153c67d6573Sopenharmony_ci    /// used for left padding the line numbers for alignment.
154c67d6573Sopenharmony_ci    ///
155c67d6573Sopenharmony_ci    /// A value of `0` means line numbers should not be displayed. That is,
156c67d6573Sopenharmony_ci    /// the pattern is itself only one line.
157c67d6573Sopenharmony_ci    line_number_width: usize,
158c67d6573Sopenharmony_ci    /// All error spans that occur on a single line. This sequence always has
159c67d6573Sopenharmony_ci    /// length equivalent to the number of lines in `pattern`, where the index
160c67d6573Sopenharmony_ci    /// of the sequence represents a line number, starting at `0`. The spans
161c67d6573Sopenharmony_ci    /// in each line are sorted in ascending order.
162c67d6573Sopenharmony_ci    by_line: Vec<Vec<ast::Span>>,
163c67d6573Sopenharmony_ci    /// All error spans that occur over one or more lines. That is, the start
164c67d6573Sopenharmony_ci    /// and end position of the span have different line numbers. The spans are
165c67d6573Sopenharmony_ci    /// sorted in ascending order.
166c67d6573Sopenharmony_ci    multi_line: Vec<ast::Span>,
167c67d6573Sopenharmony_ci}
168c67d6573Sopenharmony_ci
169c67d6573Sopenharmony_ciimpl<'p> Spans<'p> {
170c67d6573Sopenharmony_ci    /// Build a sequence of spans from a formatter.
171c67d6573Sopenharmony_ci    fn from_formatter<'e, E: fmt::Display>(
172c67d6573Sopenharmony_ci        fmter: &'p Formatter<'e, E>,
173c67d6573Sopenharmony_ci    ) -> Spans<'p> {
174c67d6573Sopenharmony_ci        let mut line_count = fmter.pattern.lines().count();
175c67d6573Sopenharmony_ci        // If the pattern ends with a `\n` literal, then our line count is
176c67d6573Sopenharmony_ci        // off by one, since a span can occur immediately after the last `\n`,
177c67d6573Sopenharmony_ci        // which is consider to be an additional line.
178c67d6573Sopenharmony_ci        if fmter.pattern.ends_with('\n') {
179c67d6573Sopenharmony_ci            line_count += 1;
180c67d6573Sopenharmony_ci        }
181c67d6573Sopenharmony_ci        let line_number_width =
182c67d6573Sopenharmony_ci            if line_count <= 1 { 0 } else { line_count.to_string().len() };
183c67d6573Sopenharmony_ci        let mut spans = Spans {
184c67d6573Sopenharmony_ci            pattern: &fmter.pattern,
185c67d6573Sopenharmony_ci            line_number_width,
186c67d6573Sopenharmony_ci            by_line: vec![vec![]; line_count],
187c67d6573Sopenharmony_ci            multi_line: vec![],
188c67d6573Sopenharmony_ci        };
189c67d6573Sopenharmony_ci        spans.add(fmter.span.clone());
190c67d6573Sopenharmony_ci        if let Some(span) = fmter.aux_span {
191c67d6573Sopenharmony_ci            spans.add(span.clone());
192c67d6573Sopenharmony_ci        }
193c67d6573Sopenharmony_ci        spans
194c67d6573Sopenharmony_ci    }
195c67d6573Sopenharmony_ci
196c67d6573Sopenharmony_ci    /// Add the given span to this sequence, putting it in the right place.
197c67d6573Sopenharmony_ci    fn add(&mut self, span: ast::Span) {
198c67d6573Sopenharmony_ci        // This is grossly inefficient since we sort after each add, but right
199c67d6573Sopenharmony_ci        // now, we only ever add two spans at most.
200c67d6573Sopenharmony_ci        if span.is_one_line() {
201c67d6573Sopenharmony_ci            let i = span.start.line - 1; // because lines are 1-indexed
202c67d6573Sopenharmony_ci            self.by_line[i].push(span);
203c67d6573Sopenharmony_ci            self.by_line[i].sort();
204c67d6573Sopenharmony_ci        } else {
205c67d6573Sopenharmony_ci            self.multi_line.push(span);
206c67d6573Sopenharmony_ci            self.multi_line.sort();
207c67d6573Sopenharmony_ci        }
208c67d6573Sopenharmony_ci    }
209c67d6573Sopenharmony_ci
210c67d6573Sopenharmony_ci    /// Notate the pattern string with carents (`^`) pointing at each span
211c67d6573Sopenharmony_ci    /// location. This only applies to spans that occur within a single line.
212c67d6573Sopenharmony_ci    fn notate(&self) -> String {
213c67d6573Sopenharmony_ci        let mut notated = String::new();
214c67d6573Sopenharmony_ci        for (i, line) in self.pattern.lines().enumerate() {
215c67d6573Sopenharmony_ci            if self.line_number_width > 0 {
216c67d6573Sopenharmony_ci                notated.push_str(&self.left_pad_line_number(i + 1));
217c67d6573Sopenharmony_ci                notated.push_str(": ");
218c67d6573Sopenharmony_ci            } else {
219c67d6573Sopenharmony_ci                notated.push_str("    ");
220c67d6573Sopenharmony_ci            }
221c67d6573Sopenharmony_ci            notated.push_str(line);
222c67d6573Sopenharmony_ci            notated.push('\n');
223c67d6573Sopenharmony_ci            if let Some(notes) = self.notate_line(i) {
224c67d6573Sopenharmony_ci                notated.push_str(&notes);
225c67d6573Sopenharmony_ci                notated.push('\n');
226c67d6573Sopenharmony_ci            }
227c67d6573Sopenharmony_ci        }
228c67d6573Sopenharmony_ci        notated
229c67d6573Sopenharmony_ci    }
230c67d6573Sopenharmony_ci
231c67d6573Sopenharmony_ci    /// Return notes for the line indexed at `i` (zero-based). If there are no
232c67d6573Sopenharmony_ci    /// spans for the given line, then `None` is returned. Otherwise, an
233c67d6573Sopenharmony_ci    /// appropriately space padded string with correctly positioned `^` is
234c67d6573Sopenharmony_ci    /// returned, accounting for line numbers.
235c67d6573Sopenharmony_ci    fn notate_line(&self, i: usize) -> Option<String> {
236c67d6573Sopenharmony_ci        let spans = &self.by_line[i];
237c67d6573Sopenharmony_ci        if spans.is_empty() {
238c67d6573Sopenharmony_ci            return None;
239c67d6573Sopenharmony_ci        }
240c67d6573Sopenharmony_ci        let mut notes = String::new();
241c67d6573Sopenharmony_ci        for _ in 0..self.line_number_padding() {
242c67d6573Sopenharmony_ci            notes.push(' ');
243c67d6573Sopenharmony_ci        }
244c67d6573Sopenharmony_ci        let mut pos = 0;
245c67d6573Sopenharmony_ci        for span in spans {
246c67d6573Sopenharmony_ci            for _ in pos..(span.start.column - 1) {
247c67d6573Sopenharmony_ci                notes.push(' ');
248c67d6573Sopenharmony_ci                pos += 1;
249c67d6573Sopenharmony_ci            }
250c67d6573Sopenharmony_ci            let note_len = span.end.column.saturating_sub(span.start.column);
251c67d6573Sopenharmony_ci            for _ in 0..cmp::max(1, note_len) {
252c67d6573Sopenharmony_ci                notes.push('^');
253c67d6573Sopenharmony_ci                pos += 1;
254c67d6573Sopenharmony_ci            }
255c67d6573Sopenharmony_ci        }
256c67d6573Sopenharmony_ci        Some(notes)
257c67d6573Sopenharmony_ci    }
258c67d6573Sopenharmony_ci
259c67d6573Sopenharmony_ci    /// Left pad the given line number with spaces such that it is aligned with
260c67d6573Sopenharmony_ci    /// other line numbers.
261c67d6573Sopenharmony_ci    fn left_pad_line_number(&self, n: usize) -> String {
262c67d6573Sopenharmony_ci        let n = n.to_string();
263c67d6573Sopenharmony_ci        let pad = self.line_number_width.checked_sub(n.len()).unwrap();
264c67d6573Sopenharmony_ci        let mut result = repeat_char(' ', pad);
265c67d6573Sopenharmony_ci        result.push_str(&n);
266c67d6573Sopenharmony_ci        result
267c67d6573Sopenharmony_ci    }
268c67d6573Sopenharmony_ci
269c67d6573Sopenharmony_ci    /// Return the line number padding beginning at the start of each line of
270c67d6573Sopenharmony_ci    /// the pattern.
271c67d6573Sopenharmony_ci    ///
272c67d6573Sopenharmony_ci    /// If the pattern is only one line, then this returns a fixed padding
273c67d6573Sopenharmony_ci    /// for visual indentation.
274c67d6573Sopenharmony_ci    fn line_number_padding(&self) -> usize {
275c67d6573Sopenharmony_ci        if self.line_number_width == 0 {
276c67d6573Sopenharmony_ci            4
277c67d6573Sopenharmony_ci        } else {
278c67d6573Sopenharmony_ci            2 + self.line_number_width
279c67d6573Sopenharmony_ci        }
280c67d6573Sopenharmony_ci    }
281c67d6573Sopenharmony_ci}
282c67d6573Sopenharmony_ci
283c67d6573Sopenharmony_cifn repeat_char(c: char, count: usize) -> String {
284c67d6573Sopenharmony_ci    ::std::iter::repeat(c).take(count).collect()
285c67d6573Sopenharmony_ci}
286c67d6573Sopenharmony_ci
287c67d6573Sopenharmony_ci#[cfg(test)]
288c67d6573Sopenharmony_cimod tests {
289c67d6573Sopenharmony_ci    use crate::ast::parse::Parser;
290c67d6573Sopenharmony_ci
291c67d6573Sopenharmony_ci    fn assert_panic_message(pattern: &str, expected_msg: &str) {
292c67d6573Sopenharmony_ci        let result = Parser::new().parse(pattern);
293c67d6573Sopenharmony_ci        match result {
294c67d6573Sopenharmony_ci            Ok(_) => {
295c67d6573Sopenharmony_ci                panic!("regex should not have parsed");
296c67d6573Sopenharmony_ci            }
297c67d6573Sopenharmony_ci            Err(err) => {
298c67d6573Sopenharmony_ci                assert_eq!(err.to_string(), expected_msg.trim());
299c67d6573Sopenharmony_ci            }
300c67d6573Sopenharmony_ci        }
301c67d6573Sopenharmony_ci    }
302c67d6573Sopenharmony_ci
303c67d6573Sopenharmony_ci    // See: https://github.com/rust-lang/regex/issues/464
304c67d6573Sopenharmony_ci    #[test]
305c67d6573Sopenharmony_ci    fn regression_464() {
306c67d6573Sopenharmony_ci        let err = Parser::new().parse("a{\n").unwrap_err();
307c67d6573Sopenharmony_ci        // This test checks that the error formatter doesn't panic.
308c67d6573Sopenharmony_ci        assert!(!err.to_string().is_empty());
309c67d6573Sopenharmony_ci    }
310c67d6573Sopenharmony_ci
311c67d6573Sopenharmony_ci    // See: https://github.com/rust-lang/regex/issues/545
312c67d6573Sopenharmony_ci    #[test]
313c67d6573Sopenharmony_ci    fn repetition_quantifier_expects_a_valid_decimal() {
314c67d6573Sopenharmony_ci        assert_panic_message(
315c67d6573Sopenharmony_ci            r"\\u{[^}]*}",
316c67d6573Sopenharmony_ci            r#"
317c67d6573Sopenharmony_ciregex parse error:
318c67d6573Sopenharmony_ci    \\u{[^}]*}
319c67d6573Sopenharmony_ci        ^
320c67d6573Sopenharmony_cierror: repetition quantifier expects a valid decimal
321c67d6573Sopenharmony_ci"#,
322c67d6573Sopenharmony_ci        );
323c67d6573Sopenharmony_ci    }
324c67d6573Sopenharmony_ci}
325