xref: /third_party/rust/crates/syn/src/buffer.rs (revision fad3a1d3)
1//! A stably addressed token buffer supporting efficient traversal based on a
2//! cheaply copyable cursor.
3
4// This module is heavily commented as it contains most of the unsafe code in
5// Syn, and caution should be used when editing it. The public-facing interface
6// is 100% safe but the implementation is fragile internally.
7
8use crate::Lifetime;
9use proc_macro2::extra::DelimSpan;
10use proc_macro2::{Delimiter, Group, Ident, Literal, Punct, Spacing, Span, TokenStream, TokenTree};
11use std::cmp::Ordering;
12use std::marker::PhantomData;
13
14/// Internal type which is used instead of `TokenTree` to represent a token tree
15/// within a `TokenBuffer`.
16enum Entry {
17    // Mimicking types from proc-macro.
18    // Group entries contain the offset to the matching End entry.
19    Group(Group, usize),
20    Ident(Ident),
21    Punct(Punct),
22    Literal(Literal),
23    // End entries contain the offset (negative) to the start of the buffer.
24    End(isize),
25}
26
27/// A buffer that can be efficiently traversed multiple times, unlike
28/// `TokenStream` which requires a deep copy in order to traverse more than
29/// once.
30pub struct TokenBuffer {
31    // NOTE: Do not implement clone on this - while the current design could be
32    // cloned, other designs which could be desirable may not be cloneable.
33    entries: Box<[Entry]>,
34}
35
36impl TokenBuffer {
37    fn recursive_new(entries: &mut Vec<Entry>, stream: TokenStream) {
38        for tt in stream {
39            match tt {
40                TokenTree::Ident(ident) => entries.push(Entry::Ident(ident)),
41                TokenTree::Punct(punct) => entries.push(Entry::Punct(punct)),
42                TokenTree::Literal(literal) => entries.push(Entry::Literal(literal)),
43                TokenTree::Group(group) => {
44                    let group_start_index = entries.len();
45                    entries.push(Entry::End(0)); // we replace this below
46                    Self::recursive_new(entries, group.stream());
47                    let group_end_index = entries.len();
48                    entries.push(Entry::End(-(group_end_index as isize)));
49                    let group_end_offset = group_end_index - group_start_index;
50                    entries[group_start_index] = Entry::Group(group, group_end_offset);
51                }
52            }
53        }
54    }
55
56    /// Creates a `TokenBuffer` containing all the tokens from the input
57    /// `proc_macro::TokenStream`.
58    #[cfg(feature = "proc-macro")]
59    #[cfg_attr(doc_cfg, doc(cfg(feature = "proc-macro")))]
60    pub fn new(stream: proc_macro::TokenStream) -> Self {
61        Self::new2(stream.into())
62    }
63
64    /// Creates a `TokenBuffer` containing all the tokens from the input
65    /// `proc_macro2::TokenStream`.
66    pub fn new2(stream: TokenStream) -> Self {
67        let mut entries = Vec::new();
68        Self::recursive_new(&mut entries, stream);
69        entries.push(Entry::End(-(entries.len() as isize)));
70        Self {
71            entries: entries.into_boxed_slice(),
72        }
73    }
74
75    /// Creates a cursor referencing the first token in the buffer and able to
76    /// traverse until the end of the buffer.
77    pub fn begin(&self) -> Cursor {
78        let ptr = self.entries.as_ptr();
79        unsafe { Cursor::create(ptr, ptr.add(self.entries.len() - 1)) }
80    }
81}
82
83/// A cheaply copyable cursor into a `TokenBuffer`.
84///
85/// This cursor holds a shared reference into the immutable data which is used
86/// internally to represent a `TokenStream`, and can be efficiently manipulated
87/// and copied around.
88///
89/// An empty `Cursor` can be created directly, or one may create a `TokenBuffer`
90/// object and get a cursor to its first token with `begin()`.
91pub struct Cursor<'a> {
92    // The current entry which the `Cursor` is pointing at.
93    ptr: *const Entry,
94    // This is the only `Entry::End` object which this cursor is allowed to
95    // point at. All other `End` objects are skipped over in `Cursor::create`.
96    scope: *const Entry,
97    // Cursor is covariant in 'a. This field ensures that our pointers are still
98    // valid.
99    marker: PhantomData<&'a Entry>,
100}
101
102impl<'a> Cursor<'a> {
103    /// Creates a cursor referencing a static empty TokenStream.
104    pub fn empty() -> Self {
105        // It's safe in this situation for us to put an `Entry` object in global
106        // storage, despite it not actually being safe to send across threads
107        // (`Ident` is a reference into a thread-local table). This is because
108        // this entry never includes a `Ident` object.
109        //
110        // This wrapper struct allows us to break the rules and put a `Sync`
111        // object in global storage.
112        struct UnsafeSyncEntry(Entry);
113        unsafe impl Sync for UnsafeSyncEntry {}
114        static EMPTY_ENTRY: UnsafeSyncEntry = UnsafeSyncEntry(Entry::End(0));
115
116        Cursor {
117            ptr: &EMPTY_ENTRY.0,
118            scope: &EMPTY_ENTRY.0,
119            marker: PhantomData,
120        }
121    }
122
123    /// This create method intelligently exits non-explicitly-entered
124    /// `None`-delimited scopes when the cursor reaches the end of them,
125    /// allowing for them to be treated transparently.
126    unsafe fn create(mut ptr: *const Entry, scope: *const Entry) -> Self {
127        // NOTE: If we're looking at a `End`, we want to advance the cursor
128        // past it, unless `ptr == scope`, which means that we're at the edge of
129        // our cursor's scope. We should only have `ptr != scope` at the exit
130        // from None-delimited groups entered with `ignore_none`.
131        while let Entry::End(_) = unsafe { &*ptr } {
132            if ptr == scope {
133                break;
134            }
135            ptr = unsafe { ptr.add(1) };
136        }
137
138        Cursor {
139            ptr,
140            scope,
141            marker: PhantomData,
142        }
143    }
144
145    /// Get the current entry.
146    fn entry(self) -> &'a Entry {
147        unsafe { &*self.ptr }
148    }
149
150    /// Bump the cursor to point at the next token after the current one. This
151    /// is undefined behavior if the cursor is currently looking at an
152    /// `Entry::End`.
153    ///
154    /// If the cursor is looking at an `Entry::Group`, the bumped cursor will
155    /// point at the first token in the group (with the same scope end).
156    unsafe fn bump_ignore_group(self) -> Cursor<'a> {
157        unsafe { Cursor::create(self.ptr.offset(1), self.scope) }
158    }
159
160    /// While the cursor is looking at a `None`-delimited group, move it to look
161    /// at the first token inside instead. If the group is empty, this will move
162    /// the cursor past the `None`-delimited group.
163    ///
164    /// WARNING: This mutates its argument.
165    fn ignore_none(&mut self) {
166        while let Entry::Group(group, _) = self.entry() {
167            if group.delimiter() == Delimiter::None {
168                unsafe { *self = self.bump_ignore_group() };
169            } else {
170                break;
171            }
172        }
173    }
174
175    /// Checks whether the cursor is currently pointing at the end of its valid
176    /// scope.
177    pub fn eof(self) -> bool {
178        // We're at eof if we're at the end of our scope.
179        self.ptr == self.scope
180    }
181
182    /// If the cursor is pointing at a `Group` with the given delimiter, returns
183    /// a cursor into that group and one pointing to the next `TokenTree`.
184    pub fn group(mut self, delim: Delimiter) -> Option<(Cursor<'a>, DelimSpan, Cursor<'a>)> {
185        // If we're not trying to enter a none-delimited group, we want to
186        // ignore them. We have to make sure to _not_ ignore them when we want
187        // to enter them, of course. For obvious reasons.
188        if delim != Delimiter::None {
189            self.ignore_none();
190        }
191
192        if let Entry::Group(group, end_offset) = self.entry() {
193            if group.delimiter() == delim {
194                let span = group.delim_span();
195                let end_of_group = unsafe { self.ptr.add(*end_offset) };
196                let inside_of_group = unsafe { Cursor::create(self.ptr.add(1), end_of_group) };
197                let after_group = unsafe { Cursor::create(end_of_group, self.scope) };
198                return Some((inside_of_group, span, after_group));
199            }
200        }
201
202        None
203    }
204
205    pub(crate) fn any_group(self) -> Option<(Cursor<'a>, Delimiter, DelimSpan, Cursor<'a>)> {
206        if let Entry::Group(group, end_offset) = self.entry() {
207            let delimiter = group.delimiter();
208            let span = group.delim_span();
209            let end_of_group = unsafe { self.ptr.add(*end_offset) };
210            let inside_of_group = unsafe { Cursor::create(self.ptr.add(1), end_of_group) };
211            let after_group = unsafe { Cursor::create(end_of_group, self.scope) };
212            return Some((inside_of_group, delimiter, span, after_group));
213        }
214
215        None
216    }
217
218    pub(crate) fn any_group_token(self) -> Option<(Group, Cursor<'a>)> {
219        if let Entry::Group(group, end_offset) = self.entry() {
220            let end_of_group = unsafe { self.ptr.add(*end_offset) };
221            let after_group = unsafe { Cursor::create(end_of_group, self.scope) };
222            return Some((group.clone(), after_group));
223        }
224
225        None
226    }
227
228    /// If the cursor is pointing at a `Ident`, returns it along with a cursor
229    /// pointing at the next `TokenTree`.
230    pub fn ident(mut self) -> Option<(Ident, Cursor<'a>)> {
231        self.ignore_none();
232        match self.entry() {
233            Entry::Ident(ident) => Some((ident.clone(), unsafe { self.bump_ignore_group() })),
234            _ => None,
235        }
236    }
237
238    /// If the cursor is pointing at a `Punct`, returns it along with a cursor
239    /// pointing at the next `TokenTree`.
240    pub fn punct(mut self) -> Option<(Punct, Cursor<'a>)> {
241        self.ignore_none();
242        match self.entry() {
243            Entry::Punct(punct) if punct.as_char() != '\'' => {
244                Some((punct.clone(), unsafe { self.bump_ignore_group() }))
245            }
246            _ => None,
247        }
248    }
249
250    /// If the cursor is pointing at a `Literal`, return it along with a cursor
251    /// pointing at the next `TokenTree`.
252    pub fn literal(mut self) -> Option<(Literal, Cursor<'a>)> {
253        self.ignore_none();
254        match self.entry() {
255            Entry::Literal(literal) => Some((literal.clone(), unsafe { self.bump_ignore_group() })),
256            _ => None,
257        }
258    }
259
260    /// If the cursor is pointing at a `Lifetime`, returns it along with a
261    /// cursor pointing at the next `TokenTree`.
262    pub fn lifetime(mut self) -> Option<(Lifetime, Cursor<'a>)> {
263        self.ignore_none();
264        match self.entry() {
265            Entry::Punct(punct) if punct.as_char() == '\'' && punct.spacing() == Spacing::Joint => {
266                let next = unsafe { self.bump_ignore_group() };
267                let (ident, rest) = next.ident()?;
268                let lifetime = Lifetime {
269                    apostrophe: punct.span(),
270                    ident,
271                };
272                Some((lifetime, rest))
273            }
274            _ => None,
275        }
276    }
277
278    /// Copies all remaining tokens visible from this cursor into a
279    /// `TokenStream`.
280    pub fn token_stream(self) -> TokenStream {
281        let mut tts = Vec::new();
282        let mut cursor = self;
283        while let Some((tt, rest)) = cursor.token_tree() {
284            tts.push(tt);
285            cursor = rest;
286        }
287        tts.into_iter().collect()
288    }
289
290    /// If the cursor is pointing at a `TokenTree`, returns it along with a
291    /// cursor pointing at the next `TokenTree`.
292    ///
293    /// Returns `None` if the cursor has reached the end of its stream.
294    ///
295    /// This method does not treat `None`-delimited groups as transparent, and
296    /// will return a `Group(None, ..)` if the cursor is looking at one.
297    pub fn token_tree(self) -> Option<(TokenTree, Cursor<'a>)> {
298        let (tree, len) = match self.entry() {
299            Entry::Group(group, end_offset) => (group.clone().into(), *end_offset),
300            Entry::Literal(literal) => (literal.clone().into(), 1),
301            Entry::Ident(ident) => (ident.clone().into(), 1),
302            Entry::Punct(punct) => (punct.clone().into(), 1),
303            Entry::End(_) => return None,
304        };
305
306        let rest = unsafe { Cursor::create(self.ptr.add(len), self.scope) };
307        Some((tree, rest))
308    }
309
310    /// Returns the `Span` of the current token, or `Span::call_site()` if this
311    /// cursor points to eof.
312    pub fn span(self) -> Span {
313        match self.entry() {
314            Entry::Group(group, _) => group.span(),
315            Entry::Literal(literal) => literal.span(),
316            Entry::Ident(ident) => ident.span(),
317            Entry::Punct(punct) => punct.span(),
318            Entry::End(_) => Span::call_site(),
319        }
320    }
321
322    /// Returns the `Span` of the token immediately prior to the position of
323    /// this cursor, or of the current token if there is no previous one.
324    #[cfg(any(feature = "full", feature = "derive"))]
325    pub(crate) fn prev_span(mut self) -> Span {
326        if start_of_buffer(self) < self.ptr {
327            self.ptr = unsafe { self.ptr.offset(-1) };
328            if let Entry::End(_) = self.entry() {
329                // Locate the matching Group begin token.
330                let mut depth = 1;
331                loop {
332                    self.ptr = unsafe { self.ptr.offset(-1) };
333                    match self.entry() {
334                        Entry::Group(group, _) => {
335                            depth -= 1;
336                            if depth == 0 {
337                                return group.span();
338                            }
339                        }
340                        Entry::End(_) => depth += 1,
341                        Entry::Literal(_) | Entry::Ident(_) | Entry::Punct(_) => {}
342                    }
343                }
344            }
345        }
346        self.span()
347    }
348
349    /// Skip over the next token without cloning it. Returns `None` if this
350    /// cursor points to eof.
351    ///
352    /// This method treats `'lifetimes` as a single token.
353    pub(crate) fn skip(self) -> Option<Cursor<'a>> {
354        let len = match self.entry() {
355            Entry::End(_) => return None,
356
357            // Treat lifetimes as a single tt for the purposes of 'skip'.
358            Entry::Punct(punct) if punct.as_char() == '\'' && punct.spacing() == Spacing::Joint => {
359                match unsafe { &*self.ptr.add(1) } {
360                    Entry::Ident(_) => 2,
361                    _ => 1,
362                }
363            }
364
365            Entry::Group(_, end_offset) => *end_offset,
366            _ => 1,
367        };
368
369        Some(unsafe { Cursor::create(self.ptr.add(len), self.scope) })
370    }
371}
372
373impl<'a> Copy for Cursor<'a> {}
374
375impl<'a> Clone for Cursor<'a> {
376    fn clone(&self) -> Self {
377        *self
378    }
379}
380
381impl<'a> Eq for Cursor<'a> {}
382
383impl<'a> PartialEq for Cursor<'a> {
384    fn eq(&self, other: &Self) -> bool {
385        self.ptr == other.ptr
386    }
387}
388
389impl<'a> PartialOrd for Cursor<'a> {
390    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
391        if same_buffer(*self, *other) {
392            Some(cmp_assuming_same_buffer(*self, *other))
393        } else {
394            None
395        }
396    }
397}
398
399pub(crate) fn same_scope(a: Cursor, b: Cursor) -> bool {
400    a.scope == b.scope
401}
402
403pub(crate) fn same_buffer(a: Cursor, b: Cursor) -> bool {
404    start_of_buffer(a) == start_of_buffer(b)
405}
406
407fn start_of_buffer(cursor: Cursor) -> *const Entry {
408    unsafe {
409        match &*cursor.scope {
410            Entry::End(offset) => cursor.scope.offset(*offset),
411            _ => unreachable!(),
412        }
413    }
414}
415
416pub(crate) fn cmp_assuming_same_buffer(a: Cursor, b: Cursor) -> Ordering {
417    a.ptr.cmp(&b.ptr)
418}
419
420pub(crate) fn open_span_of_group(cursor: Cursor) -> Span {
421    match cursor.entry() {
422        Entry::Group(group, _) => group.span_open(),
423        _ => cursor.span(),
424    }
425}
426
427pub(crate) fn close_span_of_group(cursor: Cursor) -> Span {
428    match cursor.entry() {
429        Entry::Group(group, _) => group.span_close(),
430        _ => cursor.span(),
431    }
432}
433