1c67d6573Sopenharmony_ciuse std::cmp; 2c67d6573Sopenharmony_ciuse std::error; 3c67d6573Sopenharmony_ciuse std::fmt; 4c67d6573Sopenharmony_ciuse std::result; 5c67d6573Sopenharmony_ci 6c67d6573Sopenharmony_ciuse crate::ast; 7c67d6573Sopenharmony_ciuse crate::hir; 8c67d6573Sopenharmony_ci 9c67d6573Sopenharmony_ci/// A type alias for dealing with errors returned by this crate. 10c67d6573Sopenharmony_cipub type Result<T> = result::Result<T, Error>; 11c67d6573Sopenharmony_ci 12c67d6573Sopenharmony_ci/// This error type encompasses any error that can be returned by this crate. 13c67d6573Sopenharmony_ci#[derive(Clone, Debug, Eq, PartialEq)] 14c67d6573Sopenharmony_cipub enum Error { 15c67d6573Sopenharmony_ci /// An error that occurred while translating concrete syntax into abstract 16c67d6573Sopenharmony_ci /// syntax (AST). 17c67d6573Sopenharmony_ci Parse(ast::Error), 18c67d6573Sopenharmony_ci /// An error that occurred while translating abstract syntax into a high 19c67d6573Sopenharmony_ci /// level intermediate representation (HIR). 20c67d6573Sopenharmony_ci Translate(hir::Error), 21c67d6573Sopenharmony_ci /// Hints that destructuring should not be exhaustive. 22c67d6573Sopenharmony_ci /// 23c67d6573Sopenharmony_ci /// This enum may grow additional variants, so this makes sure clients 24c67d6573Sopenharmony_ci /// don't count on exhaustive matching. (Otherwise, adding a new variant 25c67d6573Sopenharmony_ci /// could break existing code.) 26c67d6573Sopenharmony_ci #[doc(hidden)] 27c67d6573Sopenharmony_ci __Nonexhaustive, 28c67d6573Sopenharmony_ci} 29c67d6573Sopenharmony_ci 30c67d6573Sopenharmony_ciimpl From<ast::Error> for Error { 31c67d6573Sopenharmony_ci fn from(err: ast::Error) -> Error { 32c67d6573Sopenharmony_ci Error::Parse(err) 33c67d6573Sopenharmony_ci } 34c67d6573Sopenharmony_ci} 35c67d6573Sopenharmony_ci 36c67d6573Sopenharmony_ciimpl From<hir::Error> for Error { 37c67d6573Sopenharmony_ci fn from(err: hir::Error) -> Error { 38c67d6573Sopenharmony_ci Error::Translate(err) 39c67d6573Sopenharmony_ci } 40c67d6573Sopenharmony_ci} 41c67d6573Sopenharmony_ci 42c67d6573Sopenharmony_ciimpl error::Error for Error { 43c67d6573Sopenharmony_ci // TODO: Remove this method entirely on the next breaking semver release. 44c67d6573Sopenharmony_ci #[allow(deprecated)] 45c67d6573Sopenharmony_ci fn description(&self) -> &str { 46c67d6573Sopenharmony_ci match *self { 47c67d6573Sopenharmony_ci Error::Parse(ref x) => x.description(), 48c67d6573Sopenharmony_ci Error::Translate(ref x) => x.description(), 49c67d6573Sopenharmony_ci _ => unreachable!(), 50c67d6573Sopenharmony_ci } 51c67d6573Sopenharmony_ci } 52c67d6573Sopenharmony_ci} 53c67d6573Sopenharmony_ci 54c67d6573Sopenharmony_ciimpl fmt::Display for Error { 55c67d6573Sopenharmony_ci fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 56c67d6573Sopenharmony_ci match *self { 57c67d6573Sopenharmony_ci Error::Parse(ref x) => x.fmt(f), 58c67d6573Sopenharmony_ci Error::Translate(ref x) => x.fmt(f), 59c67d6573Sopenharmony_ci _ => unreachable!(), 60c67d6573Sopenharmony_ci } 61c67d6573Sopenharmony_ci } 62c67d6573Sopenharmony_ci} 63c67d6573Sopenharmony_ci 64c67d6573Sopenharmony_ci/// A helper type for formatting nice error messages. 65c67d6573Sopenharmony_ci/// 66c67d6573Sopenharmony_ci/// This type is responsible for reporting regex parse errors in a nice human 67c67d6573Sopenharmony_ci/// readable format. Most of its complexity is from interspersing notational 68c67d6573Sopenharmony_ci/// markers pointing out the position where an error occurred. 69c67d6573Sopenharmony_ci#[derive(Debug)] 70c67d6573Sopenharmony_cipub struct Formatter<'e, E> { 71c67d6573Sopenharmony_ci /// The original regex pattern in which the error occurred. 72c67d6573Sopenharmony_ci pattern: &'e str, 73c67d6573Sopenharmony_ci /// The error kind. It must impl fmt::Display. 74c67d6573Sopenharmony_ci err: &'e E, 75c67d6573Sopenharmony_ci /// The primary span of the error. 76c67d6573Sopenharmony_ci span: &'e ast::Span, 77c67d6573Sopenharmony_ci /// An auxiliary and optional span, in case the error needs to point to 78c67d6573Sopenharmony_ci /// two locations (e.g., when reporting a duplicate capture group name). 79c67d6573Sopenharmony_ci aux_span: Option<&'e ast::Span>, 80c67d6573Sopenharmony_ci} 81c67d6573Sopenharmony_ci 82c67d6573Sopenharmony_ciimpl<'e> From<&'e ast::Error> for Formatter<'e, ast::ErrorKind> { 83c67d6573Sopenharmony_ci fn from(err: &'e ast::Error) -> Self { 84c67d6573Sopenharmony_ci Formatter { 85c67d6573Sopenharmony_ci pattern: err.pattern(), 86c67d6573Sopenharmony_ci err: err.kind(), 87c67d6573Sopenharmony_ci span: err.span(), 88c67d6573Sopenharmony_ci aux_span: err.auxiliary_span(), 89c67d6573Sopenharmony_ci } 90c67d6573Sopenharmony_ci } 91c67d6573Sopenharmony_ci} 92c67d6573Sopenharmony_ci 93c67d6573Sopenharmony_ciimpl<'e> From<&'e hir::Error> for Formatter<'e, hir::ErrorKind> { 94c67d6573Sopenharmony_ci fn from(err: &'e hir::Error) -> Self { 95c67d6573Sopenharmony_ci Formatter { 96c67d6573Sopenharmony_ci pattern: err.pattern(), 97c67d6573Sopenharmony_ci err: err.kind(), 98c67d6573Sopenharmony_ci span: err.span(), 99c67d6573Sopenharmony_ci aux_span: None, 100c67d6573Sopenharmony_ci } 101c67d6573Sopenharmony_ci } 102c67d6573Sopenharmony_ci} 103c67d6573Sopenharmony_ci 104c67d6573Sopenharmony_ciimpl<'e, E: fmt::Display> fmt::Display for Formatter<'e, E> { 105c67d6573Sopenharmony_ci fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 106c67d6573Sopenharmony_ci let spans = Spans::from_formatter(self); 107c67d6573Sopenharmony_ci if self.pattern.contains('\n') { 108c67d6573Sopenharmony_ci let divider = repeat_char('~', 79); 109c67d6573Sopenharmony_ci 110c67d6573Sopenharmony_ci writeln!(f, "regex parse error:")?; 111c67d6573Sopenharmony_ci writeln!(f, "{}", divider)?; 112c67d6573Sopenharmony_ci let notated = spans.notate(); 113c67d6573Sopenharmony_ci write!(f, "{}", notated)?; 114c67d6573Sopenharmony_ci writeln!(f, "{}", divider)?; 115c67d6573Sopenharmony_ci // If we have error spans that cover multiple lines, then we just 116c67d6573Sopenharmony_ci // note the line numbers. 117c67d6573Sopenharmony_ci if !spans.multi_line.is_empty() { 118c67d6573Sopenharmony_ci let mut notes = vec![]; 119c67d6573Sopenharmony_ci for span in &spans.multi_line { 120c67d6573Sopenharmony_ci notes.push(format!( 121c67d6573Sopenharmony_ci "on line {} (column {}) through line {} (column {})", 122c67d6573Sopenharmony_ci span.start.line, 123c67d6573Sopenharmony_ci span.start.column, 124c67d6573Sopenharmony_ci span.end.line, 125c67d6573Sopenharmony_ci span.end.column - 1 126c67d6573Sopenharmony_ci )); 127c67d6573Sopenharmony_ci } 128c67d6573Sopenharmony_ci writeln!(f, "{}", notes.join("\n"))?; 129c67d6573Sopenharmony_ci } 130c67d6573Sopenharmony_ci write!(f, "error: {}", self.err)?; 131c67d6573Sopenharmony_ci } else { 132c67d6573Sopenharmony_ci writeln!(f, "regex parse error:")?; 133c67d6573Sopenharmony_ci let notated = Spans::from_formatter(self).notate(); 134c67d6573Sopenharmony_ci write!(f, "{}", notated)?; 135c67d6573Sopenharmony_ci write!(f, "error: {}", self.err)?; 136c67d6573Sopenharmony_ci } 137c67d6573Sopenharmony_ci Ok(()) 138c67d6573Sopenharmony_ci } 139c67d6573Sopenharmony_ci} 140c67d6573Sopenharmony_ci 141c67d6573Sopenharmony_ci/// This type represents an arbitrary number of error spans in a way that makes 142c67d6573Sopenharmony_ci/// it convenient to notate the regex pattern. ("Notate" means "point out 143c67d6573Sopenharmony_ci/// exactly where the error occurred in the regex pattern.") 144c67d6573Sopenharmony_ci/// 145c67d6573Sopenharmony_ci/// Technically, we can only ever have two spans given our current error 146c67d6573Sopenharmony_ci/// structure. However, after toiling with a specific algorithm for handling 147c67d6573Sopenharmony_ci/// two spans, it became obvious that an algorithm to handle an arbitrary 148c67d6573Sopenharmony_ci/// number of spans was actually much simpler. 149c67d6573Sopenharmony_cistruct Spans<'p> { 150c67d6573Sopenharmony_ci /// The original regex pattern string. 151c67d6573Sopenharmony_ci pattern: &'p str, 152c67d6573Sopenharmony_ci /// The total width that should be used for line numbers. The width is 153c67d6573Sopenharmony_ci /// used for left padding the line numbers for alignment. 154c67d6573Sopenharmony_ci /// 155c67d6573Sopenharmony_ci /// A value of `0` means line numbers should not be displayed. That is, 156c67d6573Sopenharmony_ci /// the pattern is itself only one line. 157c67d6573Sopenharmony_ci line_number_width: usize, 158c67d6573Sopenharmony_ci /// All error spans that occur on a single line. This sequence always has 159c67d6573Sopenharmony_ci /// length equivalent to the number of lines in `pattern`, where the index 160c67d6573Sopenharmony_ci /// of the sequence represents a line number, starting at `0`. The spans 161c67d6573Sopenharmony_ci /// in each line are sorted in ascending order. 162c67d6573Sopenharmony_ci by_line: Vec<Vec<ast::Span>>, 163c67d6573Sopenharmony_ci /// All error spans that occur over one or more lines. That is, the start 164c67d6573Sopenharmony_ci /// and end position of the span have different line numbers. The spans are 165c67d6573Sopenharmony_ci /// sorted in ascending order. 166c67d6573Sopenharmony_ci multi_line: Vec<ast::Span>, 167c67d6573Sopenharmony_ci} 168c67d6573Sopenharmony_ci 169c67d6573Sopenharmony_ciimpl<'p> Spans<'p> { 170c67d6573Sopenharmony_ci /// Build a sequence of spans from a formatter. 171c67d6573Sopenharmony_ci fn from_formatter<'e, E: fmt::Display>( 172c67d6573Sopenharmony_ci fmter: &'p Formatter<'e, E>, 173c67d6573Sopenharmony_ci ) -> Spans<'p> { 174c67d6573Sopenharmony_ci let mut line_count = fmter.pattern.lines().count(); 175c67d6573Sopenharmony_ci // If the pattern ends with a `\n` literal, then our line count is 176c67d6573Sopenharmony_ci // off by one, since a span can occur immediately after the last `\n`, 177c67d6573Sopenharmony_ci // which is consider to be an additional line. 178c67d6573Sopenharmony_ci if fmter.pattern.ends_with('\n') { 179c67d6573Sopenharmony_ci line_count += 1; 180c67d6573Sopenharmony_ci } 181c67d6573Sopenharmony_ci let line_number_width = 182c67d6573Sopenharmony_ci if line_count <= 1 { 0 } else { line_count.to_string().len() }; 183c67d6573Sopenharmony_ci let mut spans = Spans { 184c67d6573Sopenharmony_ci pattern: &fmter.pattern, 185c67d6573Sopenharmony_ci line_number_width, 186c67d6573Sopenharmony_ci by_line: vec![vec![]; line_count], 187c67d6573Sopenharmony_ci multi_line: vec![], 188c67d6573Sopenharmony_ci }; 189c67d6573Sopenharmony_ci spans.add(fmter.span.clone()); 190c67d6573Sopenharmony_ci if let Some(span) = fmter.aux_span { 191c67d6573Sopenharmony_ci spans.add(span.clone()); 192c67d6573Sopenharmony_ci } 193c67d6573Sopenharmony_ci spans 194c67d6573Sopenharmony_ci } 195c67d6573Sopenharmony_ci 196c67d6573Sopenharmony_ci /// Add the given span to this sequence, putting it in the right place. 197c67d6573Sopenharmony_ci fn add(&mut self, span: ast::Span) { 198c67d6573Sopenharmony_ci // This is grossly inefficient since we sort after each add, but right 199c67d6573Sopenharmony_ci // now, we only ever add two spans at most. 200c67d6573Sopenharmony_ci if span.is_one_line() { 201c67d6573Sopenharmony_ci let i = span.start.line - 1; // because lines are 1-indexed 202c67d6573Sopenharmony_ci self.by_line[i].push(span); 203c67d6573Sopenharmony_ci self.by_line[i].sort(); 204c67d6573Sopenharmony_ci } else { 205c67d6573Sopenharmony_ci self.multi_line.push(span); 206c67d6573Sopenharmony_ci self.multi_line.sort(); 207c67d6573Sopenharmony_ci } 208c67d6573Sopenharmony_ci } 209c67d6573Sopenharmony_ci 210c67d6573Sopenharmony_ci /// Notate the pattern string with carents (`^`) pointing at each span 211c67d6573Sopenharmony_ci /// location. This only applies to spans that occur within a single line. 212c67d6573Sopenharmony_ci fn notate(&self) -> String { 213c67d6573Sopenharmony_ci let mut notated = String::new(); 214c67d6573Sopenharmony_ci for (i, line) in self.pattern.lines().enumerate() { 215c67d6573Sopenharmony_ci if self.line_number_width > 0 { 216c67d6573Sopenharmony_ci notated.push_str(&self.left_pad_line_number(i + 1)); 217c67d6573Sopenharmony_ci notated.push_str(": "); 218c67d6573Sopenharmony_ci } else { 219c67d6573Sopenharmony_ci notated.push_str(" "); 220c67d6573Sopenharmony_ci } 221c67d6573Sopenharmony_ci notated.push_str(line); 222c67d6573Sopenharmony_ci notated.push('\n'); 223c67d6573Sopenharmony_ci if let Some(notes) = self.notate_line(i) { 224c67d6573Sopenharmony_ci notated.push_str(¬es); 225c67d6573Sopenharmony_ci notated.push('\n'); 226c67d6573Sopenharmony_ci } 227c67d6573Sopenharmony_ci } 228c67d6573Sopenharmony_ci notated 229c67d6573Sopenharmony_ci } 230c67d6573Sopenharmony_ci 231c67d6573Sopenharmony_ci /// Return notes for the line indexed at `i` (zero-based). If there are no 232c67d6573Sopenharmony_ci /// spans for the given line, then `None` is returned. Otherwise, an 233c67d6573Sopenharmony_ci /// appropriately space padded string with correctly positioned `^` is 234c67d6573Sopenharmony_ci /// returned, accounting for line numbers. 235c67d6573Sopenharmony_ci fn notate_line(&self, i: usize) -> Option<String> { 236c67d6573Sopenharmony_ci let spans = &self.by_line[i]; 237c67d6573Sopenharmony_ci if spans.is_empty() { 238c67d6573Sopenharmony_ci return None; 239c67d6573Sopenharmony_ci } 240c67d6573Sopenharmony_ci let mut notes = String::new(); 241c67d6573Sopenharmony_ci for _ in 0..self.line_number_padding() { 242c67d6573Sopenharmony_ci notes.push(' '); 243c67d6573Sopenharmony_ci } 244c67d6573Sopenharmony_ci let mut pos = 0; 245c67d6573Sopenharmony_ci for span in spans { 246c67d6573Sopenharmony_ci for _ in pos..(span.start.column - 1) { 247c67d6573Sopenharmony_ci notes.push(' '); 248c67d6573Sopenharmony_ci pos += 1; 249c67d6573Sopenharmony_ci } 250c67d6573Sopenharmony_ci let note_len = span.end.column.saturating_sub(span.start.column); 251c67d6573Sopenharmony_ci for _ in 0..cmp::max(1, note_len) { 252c67d6573Sopenharmony_ci notes.push('^'); 253c67d6573Sopenharmony_ci pos += 1; 254c67d6573Sopenharmony_ci } 255c67d6573Sopenharmony_ci } 256c67d6573Sopenharmony_ci Some(notes) 257c67d6573Sopenharmony_ci } 258c67d6573Sopenharmony_ci 259c67d6573Sopenharmony_ci /// Left pad the given line number with spaces such that it is aligned with 260c67d6573Sopenharmony_ci /// other line numbers. 261c67d6573Sopenharmony_ci fn left_pad_line_number(&self, n: usize) -> String { 262c67d6573Sopenharmony_ci let n = n.to_string(); 263c67d6573Sopenharmony_ci let pad = self.line_number_width.checked_sub(n.len()).unwrap(); 264c67d6573Sopenharmony_ci let mut result = repeat_char(' ', pad); 265c67d6573Sopenharmony_ci result.push_str(&n); 266c67d6573Sopenharmony_ci result 267c67d6573Sopenharmony_ci } 268c67d6573Sopenharmony_ci 269c67d6573Sopenharmony_ci /// Return the line number padding beginning at the start of each line of 270c67d6573Sopenharmony_ci /// the pattern. 271c67d6573Sopenharmony_ci /// 272c67d6573Sopenharmony_ci /// If the pattern is only one line, then this returns a fixed padding 273c67d6573Sopenharmony_ci /// for visual indentation. 274c67d6573Sopenharmony_ci fn line_number_padding(&self) -> usize { 275c67d6573Sopenharmony_ci if self.line_number_width == 0 { 276c67d6573Sopenharmony_ci 4 277c67d6573Sopenharmony_ci } else { 278c67d6573Sopenharmony_ci 2 + self.line_number_width 279c67d6573Sopenharmony_ci } 280c67d6573Sopenharmony_ci } 281c67d6573Sopenharmony_ci} 282c67d6573Sopenharmony_ci 283c67d6573Sopenharmony_cifn repeat_char(c: char, count: usize) -> String { 284c67d6573Sopenharmony_ci ::std::iter::repeat(c).take(count).collect() 285c67d6573Sopenharmony_ci} 286c67d6573Sopenharmony_ci 287c67d6573Sopenharmony_ci#[cfg(test)] 288c67d6573Sopenharmony_cimod tests { 289c67d6573Sopenharmony_ci use crate::ast::parse::Parser; 290c67d6573Sopenharmony_ci 291c67d6573Sopenharmony_ci fn assert_panic_message(pattern: &str, expected_msg: &str) { 292c67d6573Sopenharmony_ci let result = Parser::new().parse(pattern); 293c67d6573Sopenharmony_ci match result { 294c67d6573Sopenharmony_ci Ok(_) => { 295c67d6573Sopenharmony_ci panic!("regex should not have parsed"); 296c67d6573Sopenharmony_ci } 297c67d6573Sopenharmony_ci Err(err) => { 298c67d6573Sopenharmony_ci assert_eq!(err.to_string(), expected_msg.trim()); 299c67d6573Sopenharmony_ci } 300c67d6573Sopenharmony_ci } 301c67d6573Sopenharmony_ci } 302c67d6573Sopenharmony_ci 303c67d6573Sopenharmony_ci // See: https://github.com/rust-lang/regex/issues/464 304c67d6573Sopenharmony_ci #[test] 305c67d6573Sopenharmony_ci fn regression_464() { 306c67d6573Sopenharmony_ci let err = Parser::new().parse("a{\n").unwrap_err(); 307c67d6573Sopenharmony_ci // This test checks that the error formatter doesn't panic. 308c67d6573Sopenharmony_ci assert!(!err.to_string().is_empty()); 309c67d6573Sopenharmony_ci } 310c67d6573Sopenharmony_ci 311c67d6573Sopenharmony_ci // See: https://github.com/rust-lang/regex/issues/545 312c67d6573Sopenharmony_ci #[test] 313c67d6573Sopenharmony_ci fn repetition_quantifier_expects_a_valid_decimal() { 314c67d6573Sopenharmony_ci assert_panic_message( 315c67d6573Sopenharmony_ci r"\\u{[^}]*}", 316c67d6573Sopenharmony_ci r#" 317c67d6573Sopenharmony_ciregex parse error: 318c67d6573Sopenharmony_ci \\u{[^}]*} 319c67d6573Sopenharmony_ci ^ 320c67d6573Sopenharmony_cierror: repetition quantifier expects a valid decimal 321c67d6573Sopenharmony_ci"#, 322c67d6573Sopenharmony_ci ); 323c67d6573Sopenharmony_ci } 324c67d6573Sopenharmony_ci} 325