1e73685ebSopenharmony_ci//! Utilities for translating from codespan types into Language Server Protocol (LSP) types 2e73685ebSopenharmony_ci 3e73685ebSopenharmony_ciuse std::ops::Range; 4e73685ebSopenharmony_ci 5e73685ebSopenharmony_ciuse codespan_reporting::files::{Error, Files}; 6e73685ebSopenharmony_ci 7e73685ebSopenharmony_ci// WARNING: Be extremely careful when adding new imports here, as it could break 8e73685ebSopenharmony_ci// the compatible version range that we claim in our `Cargo.toml`. This could 9e73685ebSopenharmony_ci// potentially break down-stream builds on a `cargo update`. This is an 10e73685ebSopenharmony_ci// absolute no-no, breaking much of what we enjoy about Cargo! 11e73685ebSopenharmony_ciuse lsp_types::{Position as LspPosition, Range as LspRange}; 12e73685ebSopenharmony_ci 13e73685ebSopenharmony_cifn location_to_position( 14e73685ebSopenharmony_ci line_str: &str, 15e73685ebSopenharmony_ci line: usize, 16e73685ebSopenharmony_ci column: usize, 17e73685ebSopenharmony_ci byte_index: usize, 18e73685ebSopenharmony_ci) -> Result<LspPosition, Error> { 19e73685ebSopenharmony_ci if column > line_str.len() { 20e73685ebSopenharmony_ci let max = line_str.len(); 21e73685ebSopenharmony_ci let given = column; 22e73685ebSopenharmony_ci 23e73685ebSopenharmony_ci Err(Error::ColumnTooLarge { given, max }) 24e73685ebSopenharmony_ci } else if !line_str.is_char_boundary(column) { 25e73685ebSopenharmony_ci let given = byte_index; 26e73685ebSopenharmony_ci 27e73685ebSopenharmony_ci Err(Error::InvalidCharBoundary { given }) 28e73685ebSopenharmony_ci } else { 29e73685ebSopenharmony_ci let line_utf16 = line_str[..column].encode_utf16(); 30e73685ebSopenharmony_ci let character = line_utf16.count() as u32; 31e73685ebSopenharmony_ci let line = line as u32; 32e73685ebSopenharmony_ci 33e73685ebSopenharmony_ci Ok(LspPosition { line, character }) 34e73685ebSopenharmony_ci } 35e73685ebSopenharmony_ci} 36e73685ebSopenharmony_ci 37e73685ebSopenharmony_cipub fn byte_index_to_position<'a, F>( 38e73685ebSopenharmony_ci files: &'a F, 39e73685ebSopenharmony_ci file_id: F::FileId, 40e73685ebSopenharmony_ci byte_index: usize, 41e73685ebSopenharmony_ci) -> Result<LspPosition, Error> 42e73685ebSopenharmony_ciwhere 43e73685ebSopenharmony_ci F: Files<'a> + ?Sized, 44e73685ebSopenharmony_ci{ 45e73685ebSopenharmony_ci let source = files.source(file_id)?; 46e73685ebSopenharmony_ci let source = source.as_ref(); 47e73685ebSopenharmony_ci 48e73685ebSopenharmony_ci let line_index = files.line_index(file_id, byte_index)?; 49e73685ebSopenharmony_ci let line_span = files.line_range(file_id, line_index).unwrap(); 50e73685ebSopenharmony_ci 51e73685ebSopenharmony_ci let line_str = source 52e73685ebSopenharmony_ci .get(line_span.clone()) 53e73685ebSopenharmony_ci .ok_or_else(|| Error::IndexTooLarge { 54e73685ebSopenharmony_ci given: if line_span.start >= source.len() { 55e73685ebSopenharmony_ci line_span.start 56e73685ebSopenharmony_ci } else { 57e73685ebSopenharmony_ci line_span.end 58e73685ebSopenharmony_ci }, 59e73685ebSopenharmony_ci max: source.len() - 1, 60e73685ebSopenharmony_ci })?; 61e73685ebSopenharmony_ci let column = byte_index - line_span.start; 62e73685ebSopenharmony_ci 63e73685ebSopenharmony_ci location_to_position(line_str, line_index, column, byte_index) 64e73685ebSopenharmony_ci} 65e73685ebSopenharmony_ci 66e73685ebSopenharmony_cipub fn byte_span_to_range<'a, F>( 67e73685ebSopenharmony_ci files: &'a F, 68e73685ebSopenharmony_ci file_id: F::FileId, 69e73685ebSopenharmony_ci span: Range<usize>, 70e73685ebSopenharmony_ci) -> Result<LspRange, Error> 71e73685ebSopenharmony_ciwhere 72e73685ebSopenharmony_ci F: Files<'a> + ?Sized, 73e73685ebSopenharmony_ci{ 74e73685ebSopenharmony_ci Ok(LspRange { 75e73685ebSopenharmony_ci start: byte_index_to_position(files, file_id, span.start)?, 76e73685ebSopenharmony_ci end: byte_index_to_position(files, file_id, span.end)?, 77e73685ebSopenharmony_ci }) 78e73685ebSopenharmony_ci} 79e73685ebSopenharmony_ci 80e73685ebSopenharmony_cifn character_to_line_offset(line: &str, character: u32) -> Result<usize, Error> { 81e73685ebSopenharmony_ci let line_len = line.len(); 82e73685ebSopenharmony_ci let mut character_offset = 0; 83e73685ebSopenharmony_ci 84e73685ebSopenharmony_ci let mut chars = line.chars(); 85e73685ebSopenharmony_ci while let Some(ch) = chars.next() { 86e73685ebSopenharmony_ci if character_offset == character { 87e73685ebSopenharmony_ci let chars_off = chars.as_str().len(); 88e73685ebSopenharmony_ci let ch_off = ch.len_utf8(); 89e73685ebSopenharmony_ci 90e73685ebSopenharmony_ci return Ok(line_len - chars_off - ch_off); 91e73685ebSopenharmony_ci } 92e73685ebSopenharmony_ci 93e73685ebSopenharmony_ci character_offset += ch.len_utf16() as u32; 94e73685ebSopenharmony_ci } 95e73685ebSopenharmony_ci 96e73685ebSopenharmony_ci // Handle positions after the last character on the line 97e73685ebSopenharmony_ci if character_offset == character { 98e73685ebSopenharmony_ci Ok(line_len) 99e73685ebSopenharmony_ci } else { 100e73685ebSopenharmony_ci Err(Error::ColumnTooLarge { 101e73685ebSopenharmony_ci given: character_offset as usize, 102e73685ebSopenharmony_ci max: line.len(), 103e73685ebSopenharmony_ci }) 104e73685ebSopenharmony_ci } 105e73685ebSopenharmony_ci} 106e73685ebSopenharmony_ci 107e73685ebSopenharmony_cipub fn position_to_byte_index<'a, F>( 108e73685ebSopenharmony_ci files: &'a F, 109e73685ebSopenharmony_ci file_id: F::FileId, 110e73685ebSopenharmony_ci position: &LspPosition, 111e73685ebSopenharmony_ci) -> Result<usize, Error> 112e73685ebSopenharmony_ciwhere 113e73685ebSopenharmony_ci F: Files<'a> + ?Sized, 114e73685ebSopenharmony_ci{ 115e73685ebSopenharmony_ci let source = files.source(file_id)?; 116e73685ebSopenharmony_ci let source = source.as_ref(); 117e73685ebSopenharmony_ci 118e73685ebSopenharmony_ci let line_span = files.line_range(file_id, position.line as usize).unwrap(); 119e73685ebSopenharmony_ci let line_str = source.get(line_span.clone()).unwrap(); 120e73685ebSopenharmony_ci 121e73685ebSopenharmony_ci let byte_offset = character_to_line_offset(line_str, position.character)?; 122e73685ebSopenharmony_ci 123e73685ebSopenharmony_ci Ok(line_span.start + byte_offset) 124e73685ebSopenharmony_ci} 125e73685ebSopenharmony_ci 126e73685ebSopenharmony_cipub fn range_to_byte_span<'a, F>( 127e73685ebSopenharmony_ci files: &'a F, 128e73685ebSopenharmony_ci file_id: F::FileId, 129e73685ebSopenharmony_ci range: &LspRange, 130e73685ebSopenharmony_ci) -> Result<Range<usize>, Error> 131e73685ebSopenharmony_ciwhere 132e73685ebSopenharmony_ci F: Files<'a> + ?Sized, 133e73685ebSopenharmony_ci{ 134e73685ebSopenharmony_ci Ok(position_to_byte_index(files, file_id, &range.start)? 135e73685ebSopenharmony_ci ..position_to_byte_index(files, file_id, &range.end)?) 136e73685ebSopenharmony_ci} 137e73685ebSopenharmony_ci 138e73685ebSopenharmony_ci#[cfg(test)] 139e73685ebSopenharmony_cimod tests { 140e73685ebSopenharmony_ci use codespan_reporting::files::{Location, SimpleFiles}; 141e73685ebSopenharmony_ci 142e73685ebSopenharmony_ci use super::*; 143e73685ebSopenharmony_ci 144e73685ebSopenharmony_ci #[test] 145e73685ebSopenharmony_ci fn position() { 146e73685ebSopenharmony_ci let text = r#" 147e73685ebSopenharmony_cilet test = 2 148e73685ebSopenharmony_cilet test1 = "" 149e73685ebSopenharmony_citest 150e73685ebSopenharmony_ci"#; 151e73685ebSopenharmony_ci let mut files = SimpleFiles::new(); 152e73685ebSopenharmony_ci let file_id = files.add("test", text); 153e73685ebSopenharmony_ci let pos = position_to_byte_index( 154e73685ebSopenharmony_ci &files, 155e73685ebSopenharmony_ci file_id, 156e73685ebSopenharmony_ci &LspPosition { 157e73685ebSopenharmony_ci line: 3, 158e73685ebSopenharmony_ci character: 2, 159e73685ebSopenharmony_ci }, 160e73685ebSopenharmony_ci ) 161e73685ebSopenharmony_ci .unwrap(); 162e73685ebSopenharmony_ci assert_eq!( 163e73685ebSopenharmony_ci Location { 164e73685ebSopenharmony_ci // One-based 165e73685ebSopenharmony_ci line_number: 3 + 1, 166e73685ebSopenharmony_ci column_number: 2 + 1, 167e73685ebSopenharmony_ci }, 168e73685ebSopenharmony_ci files.location(file_id, pos).unwrap() 169e73685ebSopenharmony_ci ); 170e73685ebSopenharmony_ci } 171e73685ebSopenharmony_ci 172e73685ebSopenharmony_ci // The protocol specifies that each `character` in position is a UTF-16 character. 173e73685ebSopenharmony_ci // This means that `å` and `ä` here counts as 1 while `` counts as 2. 174e73685ebSopenharmony_ci const UNICODE: &str = "åä tb"; 175e73685ebSopenharmony_ci 176e73685ebSopenharmony_ci #[test] 177e73685ebSopenharmony_ci fn unicode_get_byte_index() { 178e73685ebSopenharmony_ci let mut files = SimpleFiles::new(); 179e73685ebSopenharmony_ci let file_id = files.add("unicode", UNICODE); 180e73685ebSopenharmony_ci 181e73685ebSopenharmony_ci let result = position_to_byte_index( 182e73685ebSopenharmony_ci &files, 183e73685ebSopenharmony_ci file_id, 184e73685ebSopenharmony_ci &LspPosition { 185e73685ebSopenharmony_ci line: 0, 186e73685ebSopenharmony_ci character: 3, 187e73685ebSopenharmony_ci }, 188e73685ebSopenharmony_ci ); 189e73685ebSopenharmony_ci assert_eq!(result.unwrap(), 5); 190e73685ebSopenharmony_ci 191e73685ebSopenharmony_ci let result = position_to_byte_index( 192e73685ebSopenharmony_ci &files, 193e73685ebSopenharmony_ci file_id, 194e73685ebSopenharmony_ci &LspPosition { 195e73685ebSopenharmony_ci line: 0, 196e73685ebSopenharmony_ci character: 6, 197e73685ebSopenharmony_ci }, 198e73685ebSopenharmony_ci ); 199e73685ebSopenharmony_ci assert_eq!(result.unwrap(), 10); 200e73685ebSopenharmony_ci } 201e73685ebSopenharmony_ci 202e73685ebSopenharmony_ci #[test] 203e73685ebSopenharmony_ci fn unicode_get_position() { 204e73685ebSopenharmony_ci let mut files = SimpleFiles::new(); 205e73685ebSopenharmony_ci let file_id = files.add("unicode", UNICODE.to_string()); 206e73685ebSopenharmony_ci let file_id2 = files.add("unicode newline", "\n".to_string() + UNICODE); 207e73685ebSopenharmony_ci 208e73685ebSopenharmony_ci let result = byte_index_to_position(&files, file_id, 5); 209e73685ebSopenharmony_ci assert_eq!( 210e73685ebSopenharmony_ci result.unwrap(), 211e73685ebSopenharmony_ci LspPosition { 212e73685ebSopenharmony_ci line: 0, 213e73685ebSopenharmony_ci character: 3, 214e73685ebSopenharmony_ci } 215e73685ebSopenharmony_ci ); 216e73685ebSopenharmony_ci 217e73685ebSopenharmony_ci let result = byte_index_to_position(&files, file_id, 10); 218e73685ebSopenharmony_ci assert_eq!( 219e73685ebSopenharmony_ci result.unwrap(), 220e73685ebSopenharmony_ci LspPosition { 221e73685ebSopenharmony_ci line: 0, 222e73685ebSopenharmony_ci character: 6, 223e73685ebSopenharmony_ci } 224e73685ebSopenharmony_ci ); 225e73685ebSopenharmony_ci 226e73685ebSopenharmony_ci let result = byte_index_to_position(&files, file_id2, 11); 227e73685ebSopenharmony_ci assert_eq!( 228e73685ebSopenharmony_ci result.unwrap(), 229e73685ebSopenharmony_ci LspPosition { 230e73685ebSopenharmony_ci line: 1, 231e73685ebSopenharmony_ci character: 6, 232e73685ebSopenharmony_ci } 233e73685ebSopenharmony_ci ); 234e73685ebSopenharmony_ci } 235e73685ebSopenharmony_ci} 236