1//! Source file support for diagnostic reporting. 2//! 3//! The main trait defined in this module is the [`Files`] trait, which provides 4//! provides the minimum amount of functionality required for printing [`Diagnostics`] 5//! with the [`term::emit`] function. 6//! 7//! Simple implementations of this trait are implemented: 8//! 9//! - [`SimpleFile`]: For single-file use-cases 10//! - [`SimpleFiles`]: For multi-file use-cases 11//! 12//! These data structures provide a pretty minimal API, however, 13//! so end-users are encouraged to create their own implementations for their 14//! own specific use-cases, such as an implementation that accesses the file 15//! system directly (and caches the line start locations), or an implementation 16//! using an incremental compilation library like [`salsa`]. 17//! 18//! [`term::emit`]: crate::term::emit 19//! [`Diagnostics`]: crate::diagnostic::Diagnostic 20//! [`Files`]: Files 21//! [`SimpleFile`]: SimpleFile 22//! [`SimpleFiles`]: SimpleFiles 23//! 24//! [`salsa`]: https://crates.io/crates/salsa 25 26use std::ops::Range; 27 28/// An enum representing an error that happened while looking up a file or a piece of content in that file. 29#[derive(Debug)] 30#[non_exhaustive] 31pub enum Error { 32 /// A required file is not in the file database. 33 FileMissing, 34 /// The file is present, but does not contain the specified byte index. 35 IndexTooLarge { given: usize, max: usize }, 36 /// The file is present, but does not contain the specified line index. 37 LineTooLarge { given: usize, max: usize }, 38 /// The file is present and contains the specified line index, but the line does not contain the specified column index. 39 ColumnTooLarge { given: usize, max: usize }, 40 /// The given index is contained in the file, but is not a boundary of a UTF-8 code point. 41 InvalidCharBoundary { given: usize }, 42 /// There was a error while doing IO. 43 Io(std::io::Error), 44} 45 46impl From<std::io::Error> for Error { 47 fn from(err: std::io::Error) -> Error { 48 Error::Io(err) 49 } 50} 51 52impl std::fmt::Display for Error { 53 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 54 match self { 55 Error::FileMissing => write!(f, "file missing"), 56 Error::IndexTooLarge { given, max } => { 57 write!(f, "invalid index {}, maximum index is {}", given, max) 58 } 59 Error::LineTooLarge { given, max } => { 60 write!(f, "invalid line {}, maximum line is {}", given, max) 61 } 62 Error::ColumnTooLarge { given, max } => { 63 write!(f, "invalid column {}, maximum column {}", given, max) 64 } 65 Error::InvalidCharBoundary { .. } => write!(f, "index is not a code point boundary"), 66 Error::Io(err) => write!(f, "{}", err), 67 } 68 } 69} 70 71impl std::error::Error for Error { 72 fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { 73 match &self { 74 Error::Io(err) => Some(err), 75 _ => None, 76 } 77 } 78} 79 80/// A minimal interface for accessing source files when rendering diagnostics. 81/// 82/// A lifetime parameter `'a` is provided to allow any of the returned values to returned by reference. 83/// This is to workaround the lack of higher kinded lifetime parameters. 84/// This can be ignored if this is not needed, however. 85pub trait Files<'a> { 86 /// A unique identifier for files in the file provider. This will be used 87 /// for rendering `diagnostic::Label`s in the corresponding source files. 88 type FileId: 'a + Copy + PartialEq; 89 /// The user-facing name of a file, to be displayed in diagnostics. 90 type Name: 'a + std::fmt::Display; 91 /// The source code of a file. 92 type Source: 'a + AsRef<str>; 93 94 /// The user-facing name of a file. 95 fn name(&'a self, id: Self::FileId) -> Result<Self::Name, Error>; 96 97 /// The source code of a file. 98 fn source(&'a self, id: Self::FileId) -> Result<Self::Source, Error>; 99 100 /// The index of the line at the given byte index. 101 /// If the byte index is past the end of the file, returns the maximum line index in the file. 102 /// This means that this function only fails if the file is not present. 103 /// 104 /// # Note for trait implementors 105 /// 106 /// This can be implemented efficiently by performing a binary search over 107 /// a list of line starts that was computed by calling the [`line_starts`] 108 /// function that is exported from the [`files`] module. It might be useful 109 /// to pre-compute and cache these line starts. 110 /// 111 /// [`line_starts`]: crate::files::line_starts 112 /// [`files`]: crate::files 113 fn line_index(&'a self, id: Self::FileId, byte_index: usize) -> Result<usize, Error>; 114 115 /// The user-facing line number at the given line index. 116 /// It is not necessarily checked that the specified line index 117 /// is actually in the file. 118 /// 119 /// # Note for trait implementors 120 /// 121 /// This is usually 1-indexed from the beginning of the file, but 122 /// can be useful for implementing something like the 123 /// [C preprocessor's `#line` macro][line-macro]. 124 /// 125 /// [line-macro]: https://en.cppreference.com/w/c/preprocessor/line 126 #[allow(unused_variables)] 127 fn line_number(&'a self, id: Self::FileId, line_index: usize) -> Result<usize, Error> { 128 Ok(line_index + 1) 129 } 130 131 /// The user-facing column number at the given line index and byte index. 132 /// 133 /// # Note for trait implementors 134 /// 135 /// This is usually 1-indexed from the the start of the line. 136 /// A default implementation is provided, based on the [`column_index`] 137 /// function that is exported from the [`files`] module. 138 /// 139 /// [`files`]: crate::files 140 /// [`column_index`]: crate::files::column_index 141 fn column_number( 142 &'a self, 143 id: Self::FileId, 144 line_index: usize, 145 byte_index: usize, 146 ) -> Result<usize, Error> { 147 let source = self.source(id)?; 148 let line_range = self.line_range(id, line_index)?; 149 let column_index = column_index(source.as_ref(), line_range, byte_index); 150 151 Ok(column_index + 1) 152 } 153 154 /// Convenience method for returning line and column number at the given 155 /// byte index in the file. 156 fn location(&'a self, id: Self::FileId, byte_index: usize) -> Result<Location, Error> { 157 let line_index = self.line_index(id, byte_index)?; 158 159 Ok(Location { 160 line_number: self.line_number(id, line_index)?, 161 column_number: self.column_number(id, line_index, byte_index)?, 162 }) 163 } 164 165 /// The byte range of line in the source of the file. 166 fn line_range(&'a self, id: Self::FileId, line_index: usize) -> Result<Range<usize>, Error>; 167} 168 169/// A user-facing location in a source file. 170/// 171/// Returned by [`Files::location`]. 172/// 173/// [`Files::location`]: Files::location 174#[derive(Debug, Copy, Clone, PartialEq, Eq)] 175pub struct Location { 176 /// The user-facing line number. 177 pub line_number: usize, 178 /// The user-facing column number. 179 pub column_number: usize, 180} 181 182/// The column index at the given byte index in the source file. 183/// This is the number of characters to the given byte index. 184/// 185/// If the byte index is smaller than the start of the line, then `0` is returned. 186/// If the byte index is past the end of the line, the column index of the last 187/// character `+ 1` is returned. 188/// 189/// # Example 190/// 191/// ```rust 192/// use codespan_reporting::files; 193/// 194/// let source = "\n\n∈\n\n"; 195/// 196/// assert_eq!(files::column_index(source, 0..1, 0), 0); 197/// assert_eq!(files::column_index(source, 2..13, 0), 0); 198/// assert_eq!(files::column_index(source, 2..13, 2 + 0), 0); 199/// assert_eq!(files::column_index(source, 2..13, 2 + 1), 0); 200/// assert_eq!(files::column_index(source, 2..13, 2 + 4), 1); 201/// assert_eq!(files::column_index(source, 2..13, 2 + 8), 2); 202/// assert_eq!(files::column_index(source, 2..13, 2 + 10), 2); 203/// assert_eq!(files::column_index(source, 2..13, 2 + 11), 3); 204/// assert_eq!(files::column_index(source, 2..13, 2 + 12), 3); 205/// ``` 206pub fn column_index(source: &str, line_range: Range<usize>, byte_index: usize) -> usize { 207 let end_index = std::cmp::min(byte_index, std::cmp::min(line_range.end, source.len())); 208 209 (line_range.start..end_index) 210 .filter(|byte_index| source.is_char_boundary(byte_index + 1)) 211 .count() 212} 213 214/// Return the starting byte index of each line in the source string. 215/// 216/// This can make it easier to implement [`Files::line_index`] by allowing 217/// implementors of [`Files`] to pre-compute the line starts, then search for 218/// the corresponding line range, as shown in the example below. 219/// 220/// [`Files`]: Files 221/// [`Files::line_index`]: Files::line_index 222/// 223/// # Example 224/// 225/// ```rust 226/// use codespan_reporting::files; 227/// 228/// let source = "foo\nbar\r\n\nbaz"; 229/// let line_starts: Vec<_> = files::line_starts(source).collect(); 230/// 231/// assert_eq!( 232/// line_starts, 233/// [ 234/// 0, // "foo\n" 235/// 4, // "bar\r\n" 236/// 9, // "" 237/// 10, // "baz" 238/// ], 239/// ); 240/// 241/// fn line_index(line_starts: &[usize], byte_index: usize) -> Option<usize> { 242/// match line_starts.binary_search(&byte_index) { 243/// Ok(line) => Some(line), 244/// Err(next_line) => Some(next_line - 1), 245/// } 246/// } 247/// 248/// assert_eq!(line_index(&line_starts, 5), Some(1)); 249/// ``` 250// NOTE: this is copied in `codespan::file::line_starts` and should be kept in sync. 251pub fn line_starts<'source>(source: &'source str) -> impl 'source + Iterator<Item = usize> { 252 std::iter::once(0).chain(source.match_indices('\n').map(|(i, _)| i + 1)) 253} 254 255/// A file database that contains a single source file. 256/// 257/// Because there is only single file in this database we use `()` as a [`FileId`]. 258/// 259/// This is useful for simple language tests, but it might be worth creating a 260/// custom implementation when a language scales beyond a certain size. 261/// 262/// [`FileId`]: Files::FileId 263#[derive(Debug, Clone)] 264pub struct SimpleFile<Name, Source> { 265 /// The name of the file. 266 name: Name, 267 /// The source code of the file. 268 source: Source, 269 /// The starting byte indices in the source code. 270 line_starts: Vec<usize>, 271} 272 273impl<Name, Source> SimpleFile<Name, Source> 274where 275 Name: std::fmt::Display, 276 Source: AsRef<str>, 277{ 278 /// Create a new source file. 279 pub fn new(name: Name, source: Source) -> SimpleFile<Name, Source> { 280 SimpleFile { 281 name, 282 line_starts: line_starts(source.as_ref()).collect(), 283 source, 284 } 285 } 286 287 /// Return the name of the file. 288 pub fn name(&self) -> &Name { 289 &self.name 290 } 291 292 /// Return the source of the file. 293 pub fn source(&self) -> &Source { 294 &self.source 295 } 296 297 /// Return the starting byte index of the line with the specified line index. 298 /// Convenience method that already generates errors if necessary. 299 fn line_start(&self, line_index: usize) -> Result<usize, Error> { 300 use std::cmp::Ordering; 301 302 match line_index.cmp(&self.line_starts.len()) { 303 Ordering::Less => Ok(self 304 .line_starts 305 .get(line_index) 306 .cloned() 307 .expect("failed despite previous check")), 308 Ordering::Equal => Ok(self.source.as_ref().len()), 309 Ordering::Greater => Err(Error::LineTooLarge { 310 given: line_index, 311 max: self.line_starts.len() - 1, 312 }), 313 } 314 } 315} 316 317impl<'a, Name, Source> Files<'a> for SimpleFile<Name, Source> 318where 319 Name: 'a + std::fmt::Display + Clone, 320 Source: 'a + AsRef<str>, 321{ 322 type FileId = (); 323 type Name = Name; 324 type Source = &'a str; 325 326 fn name(&self, (): ()) -> Result<Name, Error> { 327 Ok(self.name.clone()) 328 } 329 330 fn source(&self, (): ()) -> Result<&str, Error> { 331 Ok(self.source.as_ref()) 332 } 333 334 fn line_index(&self, (): (), byte_index: usize) -> Result<usize, Error> { 335 Ok(self 336 .line_starts 337 .binary_search(&byte_index) 338 .unwrap_or_else(|next_line| next_line - 1)) 339 } 340 341 fn line_range(&self, (): (), line_index: usize) -> Result<Range<usize>, Error> { 342 let line_start = self.line_start(line_index)?; 343 let next_line_start = self.line_start(line_index + 1)?; 344 345 Ok(line_start..next_line_start) 346 } 347} 348 349/// A file database that can store multiple source files. 350/// 351/// This is useful for simple language tests, but it might be worth creating a 352/// custom implementation when a language scales beyond a certain size. 353/// It is a glorified `Vec<SimpleFile>` that implements the `Files` trait. 354#[derive(Debug, Clone)] 355pub struct SimpleFiles<Name, Source> { 356 files: Vec<SimpleFile<Name, Source>>, 357} 358 359impl<Name, Source> SimpleFiles<Name, Source> 360where 361 Name: std::fmt::Display, 362 Source: AsRef<str>, 363{ 364 /// Create a new files database. 365 pub fn new() -> SimpleFiles<Name, Source> { 366 SimpleFiles { files: Vec::new() } 367 } 368 369 /// Add a file to the database, returning the handle that can be used to 370 /// refer to it again. 371 pub fn add(&mut self, name: Name, source: Source) -> usize { 372 let file_id = self.files.len(); 373 self.files.push(SimpleFile::new(name, source)); 374 file_id 375 } 376 377 /// Get the file corresponding to the given id. 378 pub fn get(&self, file_id: usize) -> Result<&SimpleFile<Name, Source>, Error> { 379 self.files.get(file_id).ok_or(Error::FileMissing) 380 } 381} 382 383impl<'a, Name, Source> Files<'a> for SimpleFiles<Name, Source> 384where 385 Name: 'a + std::fmt::Display + Clone, 386 Source: 'a + AsRef<str>, 387{ 388 type FileId = usize; 389 type Name = Name; 390 type Source = &'a str; 391 392 fn name(&self, file_id: usize) -> Result<Name, Error> { 393 Ok(self.get(file_id)?.name().clone()) 394 } 395 396 fn source(&self, file_id: usize) -> Result<&str, Error> { 397 Ok(self.get(file_id)?.source().as_ref()) 398 } 399 400 fn line_index(&self, file_id: usize, byte_index: usize) -> Result<usize, Error> { 401 self.get(file_id)?.line_index((), byte_index) 402 } 403 404 fn line_range(&self, file_id: usize, line_index: usize) -> Result<Range<usize>, Error> { 405 self.get(file_id)?.line_range((), line_index) 406 } 407} 408 409#[cfg(test)] 410mod test { 411 use super::*; 412 413 const TEST_SOURCE: &str = "foo\nbar\r\n\nbaz"; 414 415 #[test] 416 fn line_starts() { 417 let file = SimpleFile::new("test", TEST_SOURCE); 418 419 assert_eq!( 420 file.line_starts, 421 [ 422 0, // "foo\n" 423 4, // "bar\r\n" 424 9, // "" 425 10, // "baz" 426 ], 427 ); 428 } 429 430 #[test] 431 fn line_span_sources() { 432 let file = SimpleFile::new("test", TEST_SOURCE); 433 434 let line_sources = (0..4) 435 .map(|line| { 436 let line_range = file.line_range((), line).unwrap(); 437 &file.source[line_range] 438 }) 439 .collect::<Vec<_>>(); 440 441 assert_eq!(line_sources, ["foo\n", "bar\r\n", "\n", "baz"]); 442 } 443} 444