1e73685ebSopenharmony_ci//! Utilities for translating from codespan types into Language Server Protocol (LSP) types
2e73685ebSopenharmony_ci
3e73685ebSopenharmony_ciuse std::ops::Range;
4e73685ebSopenharmony_ci
5e73685ebSopenharmony_ciuse codespan_reporting::files::{Error, Files};
6e73685ebSopenharmony_ci
7e73685ebSopenharmony_ci// WARNING: Be extremely careful when adding new imports here, as it could break
8e73685ebSopenharmony_ci// the compatible version range that we claim in our `Cargo.toml`. This could
9e73685ebSopenharmony_ci// potentially break down-stream builds on a `cargo update`. This is an
10e73685ebSopenharmony_ci// absolute no-no, breaking much of what we enjoy about Cargo!
11e73685ebSopenharmony_ciuse lsp_types::{Position as LspPosition, Range as LspRange};
12e73685ebSopenharmony_ci
13e73685ebSopenharmony_cifn location_to_position(
14e73685ebSopenharmony_ci    line_str: &str,
15e73685ebSopenharmony_ci    line: usize,
16e73685ebSopenharmony_ci    column: usize,
17e73685ebSopenharmony_ci    byte_index: usize,
18e73685ebSopenharmony_ci) -> Result<LspPosition, Error> {
19e73685ebSopenharmony_ci    if column > line_str.len() {
20e73685ebSopenharmony_ci        let max = line_str.len();
21e73685ebSopenharmony_ci        let given = column;
22e73685ebSopenharmony_ci
23e73685ebSopenharmony_ci        Err(Error::ColumnTooLarge { given, max })
24e73685ebSopenharmony_ci    } else if !line_str.is_char_boundary(column) {
25e73685ebSopenharmony_ci        let given = byte_index;
26e73685ebSopenharmony_ci
27e73685ebSopenharmony_ci        Err(Error::InvalidCharBoundary { given })
28e73685ebSopenharmony_ci    } else {
29e73685ebSopenharmony_ci        let line_utf16 = line_str[..column].encode_utf16();
30e73685ebSopenharmony_ci        let character = line_utf16.count() as u32;
31e73685ebSopenharmony_ci        let line = line as u32;
32e73685ebSopenharmony_ci
33e73685ebSopenharmony_ci        Ok(LspPosition { line, character })
34e73685ebSopenharmony_ci    }
35e73685ebSopenharmony_ci}
36e73685ebSopenharmony_ci
37e73685ebSopenharmony_cipub fn byte_index_to_position<'a, F>(
38e73685ebSopenharmony_ci    files: &'a F,
39e73685ebSopenharmony_ci    file_id: F::FileId,
40e73685ebSopenharmony_ci    byte_index: usize,
41e73685ebSopenharmony_ci) -> Result<LspPosition, Error>
42e73685ebSopenharmony_ciwhere
43e73685ebSopenharmony_ci    F: Files<'a> + ?Sized,
44e73685ebSopenharmony_ci{
45e73685ebSopenharmony_ci    let source = files.source(file_id)?;
46e73685ebSopenharmony_ci    let source = source.as_ref();
47e73685ebSopenharmony_ci
48e73685ebSopenharmony_ci    let line_index = files.line_index(file_id, byte_index)?;
49e73685ebSopenharmony_ci    let line_span = files.line_range(file_id, line_index).unwrap();
50e73685ebSopenharmony_ci
51e73685ebSopenharmony_ci    let line_str = source
52e73685ebSopenharmony_ci        .get(line_span.clone())
53e73685ebSopenharmony_ci        .ok_or_else(|| Error::IndexTooLarge {
54e73685ebSopenharmony_ci            given: if line_span.start >= source.len() {
55e73685ebSopenharmony_ci                line_span.start
56e73685ebSopenharmony_ci            } else {
57e73685ebSopenharmony_ci                line_span.end
58e73685ebSopenharmony_ci            },
59e73685ebSopenharmony_ci            max: source.len() - 1,
60e73685ebSopenharmony_ci        })?;
61e73685ebSopenharmony_ci    let column = byte_index - line_span.start;
62e73685ebSopenharmony_ci
63e73685ebSopenharmony_ci    location_to_position(line_str, line_index, column, byte_index)
64e73685ebSopenharmony_ci}
65e73685ebSopenharmony_ci
66e73685ebSopenharmony_cipub fn byte_span_to_range<'a, F>(
67e73685ebSopenharmony_ci    files: &'a F,
68e73685ebSopenharmony_ci    file_id: F::FileId,
69e73685ebSopenharmony_ci    span: Range<usize>,
70e73685ebSopenharmony_ci) -> Result<LspRange, Error>
71e73685ebSopenharmony_ciwhere
72e73685ebSopenharmony_ci    F: Files<'a> + ?Sized,
73e73685ebSopenharmony_ci{
74e73685ebSopenharmony_ci    Ok(LspRange {
75e73685ebSopenharmony_ci        start: byte_index_to_position(files, file_id, span.start)?,
76e73685ebSopenharmony_ci        end: byte_index_to_position(files, file_id, span.end)?,
77e73685ebSopenharmony_ci    })
78e73685ebSopenharmony_ci}
79e73685ebSopenharmony_ci
80e73685ebSopenharmony_cifn character_to_line_offset(line: &str, character: u32) -> Result<usize, Error> {
81e73685ebSopenharmony_ci    let line_len = line.len();
82e73685ebSopenharmony_ci    let mut character_offset = 0;
83e73685ebSopenharmony_ci
84e73685ebSopenharmony_ci    let mut chars = line.chars();
85e73685ebSopenharmony_ci    while let Some(ch) = chars.next() {
86e73685ebSopenharmony_ci        if character_offset == character {
87e73685ebSopenharmony_ci            let chars_off = chars.as_str().len();
88e73685ebSopenharmony_ci            let ch_off = ch.len_utf8();
89e73685ebSopenharmony_ci
90e73685ebSopenharmony_ci            return Ok(line_len - chars_off - ch_off);
91e73685ebSopenharmony_ci        }
92e73685ebSopenharmony_ci
93e73685ebSopenharmony_ci        character_offset += ch.len_utf16() as u32;
94e73685ebSopenharmony_ci    }
95e73685ebSopenharmony_ci
96e73685ebSopenharmony_ci    // Handle positions after the last character on the line
97e73685ebSopenharmony_ci    if character_offset == character {
98e73685ebSopenharmony_ci        Ok(line_len)
99e73685ebSopenharmony_ci    } else {
100e73685ebSopenharmony_ci        Err(Error::ColumnTooLarge {
101e73685ebSopenharmony_ci            given: character_offset as usize,
102e73685ebSopenharmony_ci            max: line.len(),
103e73685ebSopenharmony_ci        })
104e73685ebSopenharmony_ci    }
105e73685ebSopenharmony_ci}
106e73685ebSopenharmony_ci
107e73685ebSopenharmony_cipub fn position_to_byte_index<'a, F>(
108e73685ebSopenharmony_ci    files: &'a F,
109e73685ebSopenharmony_ci    file_id: F::FileId,
110e73685ebSopenharmony_ci    position: &LspPosition,
111e73685ebSopenharmony_ci) -> Result<usize, Error>
112e73685ebSopenharmony_ciwhere
113e73685ebSopenharmony_ci    F: Files<'a> + ?Sized,
114e73685ebSopenharmony_ci{
115e73685ebSopenharmony_ci    let source = files.source(file_id)?;
116e73685ebSopenharmony_ci    let source = source.as_ref();
117e73685ebSopenharmony_ci
118e73685ebSopenharmony_ci    let line_span = files.line_range(file_id, position.line as usize).unwrap();
119e73685ebSopenharmony_ci    let line_str = source.get(line_span.clone()).unwrap();
120e73685ebSopenharmony_ci
121e73685ebSopenharmony_ci    let byte_offset = character_to_line_offset(line_str, position.character)?;
122e73685ebSopenharmony_ci
123e73685ebSopenharmony_ci    Ok(line_span.start + byte_offset)
124e73685ebSopenharmony_ci}
125e73685ebSopenharmony_ci
126e73685ebSopenharmony_cipub fn range_to_byte_span<'a, F>(
127e73685ebSopenharmony_ci    files: &'a F,
128e73685ebSopenharmony_ci    file_id: F::FileId,
129e73685ebSopenharmony_ci    range: &LspRange,
130e73685ebSopenharmony_ci) -> Result<Range<usize>, Error>
131e73685ebSopenharmony_ciwhere
132e73685ebSopenharmony_ci    F: Files<'a> + ?Sized,
133e73685ebSopenharmony_ci{
134e73685ebSopenharmony_ci    Ok(position_to_byte_index(files, file_id, &range.start)?
135e73685ebSopenharmony_ci        ..position_to_byte_index(files, file_id, &range.end)?)
136e73685ebSopenharmony_ci}
137e73685ebSopenharmony_ci
138e73685ebSopenharmony_ci#[cfg(test)]
139e73685ebSopenharmony_cimod tests {
140e73685ebSopenharmony_ci    use codespan_reporting::files::{Location, SimpleFiles};
141e73685ebSopenharmony_ci
142e73685ebSopenharmony_ci    use super::*;
143e73685ebSopenharmony_ci
144e73685ebSopenharmony_ci    #[test]
145e73685ebSopenharmony_ci    fn position() {
146e73685ebSopenharmony_ci        let text = r#"
147e73685ebSopenharmony_cilet test = 2
148e73685ebSopenharmony_cilet test1 = ""
149e73685ebSopenharmony_citest
150e73685ebSopenharmony_ci"#;
151e73685ebSopenharmony_ci        let mut files = SimpleFiles::new();
152e73685ebSopenharmony_ci        let file_id = files.add("test", text);
153e73685ebSopenharmony_ci        let pos = position_to_byte_index(
154e73685ebSopenharmony_ci            &files,
155e73685ebSopenharmony_ci            file_id,
156e73685ebSopenharmony_ci            &LspPosition {
157e73685ebSopenharmony_ci                line: 3,
158e73685ebSopenharmony_ci                character: 2,
159e73685ebSopenharmony_ci            },
160e73685ebSopenharmony_ci        )
161e73685ebSopenharmony_ci        .unwrap();
162e73685ebSopenharmony_ci        assert_eq!(
163e73685ebSopenharmony_ci            Location {
164e73685ebSopenharmony_ci                // One-based
165e73685ebSopenharmony_ci                line_number: 3 + 1,
166e73685ebSopenharmony_ci                column_number: 2 + 1,
167e73685ebSopenharmony_ci            },
168e73685ebSopenharmony_ci            files.location(file_id, pos).unwrap()
169e73685ebSopenharmony_ci        );
170e73685ebSopenharmony_ci    }
171e73685ebSopenharmony_ci
172e73685ebSopenharmony_ci    // The protocol specifies that each `character` in position is a UTF-16 character.
173e73685ebSopenharmony_ci    // This means that `å` and `ä` here counts as 1 while `�` counts as 2.
174e73685ebSopenharmony_ci    const UNICODE: &str = "åä t�b";
175e73685ebSopenharmony_ci
176e73685ebSopenharmony_ci    #[test]
177e73685ebSopenharmony_ci    fn unicode_get_byte_index() {
178e73685ebSopenharmony_ci        let mut files = SimpleFiles::new();
179e73685ebSopenharmony_ci        let file_id = files.add("unicode", UNICODE);
180e73685ebSopenharmony_ci
181e73685ebSopenharmony_ci        let result = position_to_byte_index(
182e73685ebSopenharmony_ci            &files,
183e73685ebSopenharmony_ci            file_id,
184e73685ebSopenharmony_ci            &LspPosition {
185e73685ebSopenharmony_ci                line: 0,
186e73685ebSopenharmony_ci                character: 3,
187e73685ebSopenharmony_ci            },
188e73685ebSopenharmony_ci        );
189e73685ebSopenharmony_ci        assert_eq!(result.unwrap(), 5);
190e73685ebSopenharmony_ci
191e73685ebSopenharmony_ci        let result = position_to_byte_index(
192e73685ebSopenharmony_ci            &files,
193e73685ebSopenharmony_ci            file_id,
194e73685ebSopenharmony_ci            &LspPosition {
195e73685ebSopenharmony_ci                line: 0,
196e73685ebSopenharmony_ci                character: 6,
197e73685ebSopenharmony_ci            },
198e73685ebSopenharmony_ci        );
199e73685ebSopenharmony_ci        assert_eq!(result.unwrap(), 10);
200e73685ebSopenharmony_ci    }
201e73685ebSopenharmony_ci
202e73685ebSopenharmony_ci    #[test]
203e73685ebSopenharmony_ci    fn unicode_get_position() {
204e73685ebSopenharmony_ci        let mut files = SimpleFiles::new();
205e73685ebSopenharmony_ci        let file_id = files.add("unicode", UNICODE.to_string());
206e73685ebSopenharmony_ci        let file_id2 = files.add("unicode newline", "\n".to_string() + UNICODE);
207e73685ebSopenharmony_ci
208e73685ebSopenharmony_ci        let result = byte_index_to_position(&files, file_id, 5);
209e73685ebSopenharmony_ci        assert_eq!(
210e73685ebSopenharmony_ci            result.unwrap(),
211e73685ebSopenharmony_ci            LspPosition {
212e73685ebSopenharmony_ci                line: 0,
213e73685ebSopenharmony_ci                character: 3,
214e73685ebSopenharmony_ci            }
215e73685ebSopenharmony_ci        );
216e73685ebSopenharmony_ci
217e73685ebSopenharmony_ci        let result = byte_index_to_position(&files, file_id, 10);
218e73685ebSopenharmony_ci        assert_eq!(
219e73685ebSopenharmony_ci            result.unwrap(),
220e73685ebSopenharmony_ci            LspPosition {
221e73685ebSopenharmony_ci                line: 0,
222e73685ebSopenharmony_ci                character: 6,
223e73685ebSopenharmony_ci            }
224e73685ebSopenharmony_ci        );
225e73685ebSopenharmony_ci
226e73685ebSopenharmony_ci        let result = byte_index_to_position(&files, file_id2, 11);
227e73685ebSopenharmony_ci        assert_eq!(
228e73685ebSopenharmony_ci            result.unwrap(),
229e73685ebSopenharmony_ci            LspPosition {
230e73685ebSopenharmony_ci                line: 1,
231e73685ebSopenharmony_ci                character: 6,
232e73685ebSopenharmony_ci            }
233e73685ebSopenharmony_ci        );
234e73685ebSopenharmony_ci    }
235e73685ebSopenharmony_ci}
236