1 //! Extensions to the parsing API with niche applicability.
2 
3 use super::*;
4 use proc_macro2::extra::DelimSpan;
5 
6 /// Extensions to the `ParseStream` API to support speculative parsing.
7 pub trait Speculative {
8     /// Advance this parse stream to the position of a forked parse stream.
9     ///
10     /// This is the opposite operation to [`ParseStream::fork`]. You can fork a
11     /// parse stream, perform some speculative parsing, then join the original
12     /// stream to the fork to "commit" the parsing from the fork to the main
13     /// stream.
14     ///
15     /// If you can avoid doing this, you should, as it limits the ability to
16     /// generate useful errors. That said, it is often the only way to parse
17     /// syntax of the form `A* B*` for arbitrary syntax `A` and `B`. The problem
18     /// is that when the fork fails to parse an `A`, it's impossible to tell
19     /// whether that was because of a syntax error and the user meant to provide
20     /// an `A`, or that the `A`s are finished and it's time to start parsing
21     /// `B`s. Use with care.
22     ///
23     /// Also note that if `A` is a subset of `B`, `A* B*` can be parsed by
24     /// parsing `B*` and removing the leading members of `A` from the
25     /// repetition, bypassing the need to involve the downsides associated with
26     /// speculative parsing.
27     ///
28     /// [`ParseStream::fork`]: ParseBuffer::fork
29     ///
30     /// # Example
31     ///
32     /// There has been chatter about the possibility of making the colons in the
33     /// turbofish syntax like `path::to::<T>` no longer required by accepting
34     /// `path::to<T>` in expression position. Specifically, according to [RFC
35     /// 2544], [`PathSegment`] parsing should always try to consume a following
36     /// `<` token as the start of generic arguments, and reset to the `<` if
37     /// that fails (e.g. the token is acting as a less-than operator).
38     ///
39     /// This is the exact kind of parsing behavior which requires the "fork,
40     /// try, commit" behavior that [`ParseStream::fork`] discourages. With
41     /// `advance_to`, we can avoid having to parse the speculatively parsed
42     /// content a second time.
43     ///
44     /// This change in behavior can be implemented in syn by replacing just the
45     /// `Parse` implementation for `PathSegment`:
46     ///
47     /// ```
48     /// # use syn::ext::IdentExt;
49     /// use syn::parse::discouraged::Speculative;
50     /// # use syn::parse::{Parse, ParseStream};
51     /// # use syn::{Ident, PathArguments, Result, Token};
52     ///
53     /// pub struct PathSegment {
54     ///     pub ident: Ident,
55     ///     pub arguments: PathArguments,
56     /// }
57     /// #
58     /// # impl<T> From<T> for PathSegment
59     /// # where
60     /// #     T: Into<Ident>,
61     /// # {
62     /// #     fn from(ident: T) -> Self {
63     /// #         PathSegment {
64     /// #             ident: ident.into(),
65     /// #             arguments: PathArguments::None,
66     /// #         }
67     /// #     }
68     /// # }
69     ///
70     /// impl Parse for PathSegment {
71     ///     fn parse(input: ParseStream) -> Result<Self> {
72     ///         if input.peek(Token![super])
73     ///             || input.peek(Token![self])
74     ///             || input.peek(Token![Self])
75     ///             || input.peek(Token![crate])
76     ///         {
77     ///             let ident = input.call(Ident::parse_any)?;
78     ///             return Ok(PathSegment::from(ident));
79     ///         }
80     ///
81     ///         let ident = input.parse()?;
82     ///         if input.peek(Token![::]) && input.peek3(Token![<]) {
83     ///             return Ok(PathSegment {
84     ///                 ident,
85     ///                 arguments: PathArguments::AngleBracketed(input.parse()?),
86     ///             });
87     ///         }
88     ///         if input.peek(Token![<]) && !input.peek(Token![<=]) {
89     ///             let fork = input.fork();
90     ///             if let Ok(arguments) = fork.parse() {
91     ///                 input.advance_to(&fork);
92     ///                 return Ok(PathSegment {
93     ///                     ident,
94     ///                     arguments: PathArguments::AngleBracketed(arguments),
95     ///                 });
96     ///             }
97     ///         }
98     ///         Ok(PathSegment::from(ident))
99     ///     }
100     /// }
101     ///
102     /// # syn::parse_str::<PathSegment>("a<b,c>").unwrap();
103     /// ```
104     ///
105     /// # Drawbacks
106     ///
107     /// The main drawback of this style of speculative parsing is in error
108     /// presentation. Even if the lookahead is the "correct" parse, the error
109     /// that is shown is that of the "fallback" parse. To use the same example
110     /// as the turbofish above, take the following unfinished "turbofish":
111     ///
112     /// ```text
113     /// let _ = f<&'a fn(), for<'a> serde::>();
114     /// ```
115     ///
116     /// If this is parsed as generic arguments, we can provide the error message
117     ///
118     /// ```text
119     /// error: expected identifier
120     ///  --> src.rs:L:C
121     ///   |
122     /// L | let _ = f<&'a fn(), for<'a> serde::>();
123     ///   |                                    ^
124     /// ```
125     ///
126     /// but if parsed using the above speculative parsing, it falls back to
127     /// assuming that the `<` is a less-than when it fails to parse the generic
128     /// arguments, and tries to interpret the `&'a` as the start of a labelled
129     /// loop, resulting in the much less helpful error
130     ///
131     /// ```text
132     /// error: expected `:`
133     ///  --> src.rs:L:C
134     ///   |
135     /// L | let _ = f<&'a fn(), for<'a> serde::>();
136     ///   |               ^^
137     /// ```
138     ///
139     /// This can be mitigated with various heuristics (two examples: show both
140     /// forks' parse errors, or show the one that consumed more tokens), but
141     /// when you can control the grammar, sticking to something that can be
142     /// parsed LL(3) and without the LL(*) speculative parsing this makes
143     /// possible, displaying reasonable errors becomes much more simple.
144     ///
145     /// [RFC 2544]: https://github.com/rust-lang/rfcs/pull/2544
146     /// [`PathSegment`]: crate::PathSegment
147     ///
148     /// # Performance
149     ///
150     /// This method performs a cheap fixed amount of work that does not depend
151     /// on how far apart the two streams are positioned.
152     ///
153     /// # Panics
154     ///
155     /// The forked stream in the argument of `advance_to` must have been
156     /// obtained by forking `self`. Attempting to advance to any other stream
157     /// will cause a panic.
advance_tonull158     fn advance_to(&self, fork: &Self);
159 }
160 
161 impl<'a> Speculative for ParseBuffer<'a> {
advance_tonull162     fn advance_to(&self, fork: &Self) {
163         if !crate::buffer::same_scope(self.cursor(), fork.cursor()) {
164             panic!("Fork was not derived from the advancing parse stream");
165         }
166 
167         let (self_unexp, self_sp) = inner_unexpected(self);
168         let (fork_unexp, fork_sp) = inner_unexpected(fork);
169         if !Rc::ptr_eq(&self_unexp, &fork_unexp) {
170             match (fork_sp, self_sp) {
171                 // Unexpected set on the fork, but not on `self`, copy it over.
172                 (Some(span), None) => {
173                     self_unexp.set(Unexpected::Some(span));
174                 }
175                 // Unexpected unset. Use chain to propagate errors from fork.
176                 (None, None) => {
177                     fork_unexp.set(Unexpected::Chain(self_unexp));
178 
179                     // Ensure toplevel 'unexpected' tokens from the fork don't
180                     // bubble up the chain by replacing the root `unexpected`
181                     // pointer, only 'unexpected' tokens from existing group
182                     // parsers should bubble.
183                     fork.unexpected
184                         .set(Some(Rc::new(Cell::new(Unexpected::None))));
185                 }
186                 // Unexpected has been set on `self`. No changes needed.
187                 (_, Some(_)) => {}
188             }
189         }
190 
191         // See comment on `cell` in the struct definition.
192         self.cell
193             .set(unsafe { mem::transmute::<Cursor, Cursor<'static>>(fork.cursor()) });
194     }
195 }
196 
197 /// Extensions to the `ParseStream` API to support manipulating invisible
198 /// delimiters the same as if they were visible.
199 pub trait AnyDelimiter {
200     /// Returns the delimiter, the span of the delimiter token, and the nested
201     /// contents for further parsing.
parse_any_delimiternull202     fn parse_any_delimiter(&self) -> Result<(Delimiter, DelimSpan, ParseBuffer)>;
203 }
204 
205 impl<'a> AnyDelimiter for ParseBuffer<'a> {
parse_any_delimiternull206     fn parse_any_delimiter(&self) -> Result<(Delimiter, DelimSpan, ParseBuffer)> {
207         self.step(|cursor| {
208             if let Some((content, delimiter, span, rest)) = cursor.any_group() {
209                 let scope = crate::buffer::close_span_of_group(*cursor);
210                 let nested = crate::parse::advance_step_cursor(cursor, content);
211                 let unexpected = crate::parse::get_unexpected(self);
212                 let content = crate::parse::new_parse_buffer(scope, nested, unexpected);
213                 Ok(((delimiter, span, content), rest))
214             } else {
215                 Err(cursor.error("expected any delimiter"))
216             }
217         })
218     }
219 }
220