1fad3a1d3Sopenharmony_ci//! Extensions to the parsing API with niche applicability. 2fad3a1d3Sopenharmony_ci 3fad3a1d3Sopenharmony_ciuse super::*; 4fad3a1d3Sopenharmony_ciuse proc_macro2::extra::DelimSpan; 5fad3a1d3Sopenharmony_ci 6fad3a1d3Sopenharmony_ci/// Extensions to the `ParseStream` API to support speculative parsing. 7fad3a1d3Sopenharmony_cipub trait Speculative { 8fad3a1d3Sopenharmony_ci /// Advance this parse stream to the position of a forked parse stream. 9fad3a1d3Sopenharmony_ci /// 10fad3a1d3Sopenharmony_ci /// This is the opposite operation to [`ParseStream::fork`]. You can fork a 11fad3a1d3Sopenharmony_ci /// parse stream, perform some speculative parsing, then join the original 12fad3a1d3Sopenharmony_ci /// stream to the fork to "commit" the parsing from the fork to the main 13fad3a1d3Sopenharmony_ci /// stream. 14fad3a1d3Sopenharmony_ci /// 15fad3a1d3Sopenharmony_ci /// If you can avoid doing this, you should, as it limits the ability to 16fad3a1d3Sopenharmony_ci /// generate useful errors. That said, it is often the only way to parse 17fad3a1d3Sopenharmony_ci /// syntax of the form `A* B*` for arbitrary syntax `A` and `B`. The problem 18fad3a1d3Sopenharmony_ci /// is that when the fork fails to parse an `A`, it's impossible to tell 19fad3a1d3Sopenharmony_ci /// whether that was because of a syntax error and the user meant to provide 20fad3a1d3Sopenharmony_ci /// an `A`, or that the `A`s are finished and it's time to start parsing 21fad3a1d3Sopenharmony_ci /// `B`s. Use with care. 22fad3a1d3Sopenharmony_ci /// 23fad3a1d3Sopenharmony_ci /// Also note that if `A` is a subset of `B`, `A* B*` can be parsed by 24fad3a1d3Sopenharmony_ci /// parsing `B*` and removing the leading members of `A` from the 25fad3a1d3Sopenharmony_ci /// repetition, bypassing the need to involve the downsides associated with 26fad3a1d3Sopenharmony_ci /// speculative parsing. 27fad3a1d3Sopenharmony_ci /// 28fad3a1d3Sopenharmony_ci /// [`ParseStream::fork`]: ParseBuffer::fork 29fad3a1d3Sopenharmony_ci /// 30fad3a1d3Sopenharmony_ci /// # Example 31fad3a1d3Sopenharmony_ci /// 32fad3a1d3Sopenharmony_ci /// There has been chatter about the possibility of making the colons in the 33fad3a1d3Sopenharmony_ci /// turbofish syntax like `path::to::<T>` no longer required by accepting 34fad3a1d3Sopenharmony_ci /// `path::to<T>` in expression position. Specifically, according to [RFC 35fad3a1d3Sopenharmony_ci /// 2544], [`PathSegment`] parsing should always try to consume a following 36fad3a1d3Sopenharmony_ci /// `<` token as the start of generic arguments, and reset to the `<` if 37fad3a1d3Sopenharmony_ci /// that fails (e.g. the token is acting as a less-than operator). 38fad3a1d3Sopenharmony_ci /// 39fad3a1d3Sopenharmony_ci /// This is the exact kind of parsing behavior which requires the "fork, 40fad3a1d3Sopenharmony_ci /// try, commit" behavior that [`ParseStream::fork`] discourages. With 41fad3a1d3Sopenharmony_ci /// `advance_to`, we can avoid having to parse the speculatively parsed 42fad3a1d3Sopenharmony_ci /// content a second time. 43fad3a1d3Sopenharmony_ci /// 44fad3a1d3Sopenharmony_ci /// This change in behavior can be implemented in syn by replacing just the 45fad3a1d3Sopenharmony_ci /// `Parse` implementation for `PathSegment`: 46fad3a1d3Sopenharmony_ci /// 47fad3a1d3Sopenharmony_ci /// ``` 48fad3a1d3Sopenharmony_ci /// # use syn::ext::IdentExt; 49fad3a1d3Sopenharmony_ci /// use syn::parse::discouraged::Speculative; 50fad3a1d3Sopenharmony_ci /// # use syn::parse::{Parse, ParseStream}; 51fad3a1d3Sopenharmony_ci /// # use syn::{Ident, PathArguments, Result, Token}; 52fad3a1d3Sopenharmony_ci /// 53fad3a1d3Sopenharmony_ci /// pub struct PathSegment { 54fad3a1d3Sopenharmony_ci /// pub ident: Ident, 55fad3a1d3Sopenharmony_ci /// pub arguments: PathArguments, 56fad3a1d3Sopenharmony_ci /// } 57fad3a1d3Sopenharmony_ci /// # 58fad3a1d3Sopenharmony_ci /// # impl<T> From<T> for PathSegment 59fad3a1d3Sopenharmony_ci /// # where 60fad3a1d3Sopenharmony_ci /// # T: Into<Ident>, 61fad3a1d3Sopenharmony_ci /// # { 62fad3a1d3Sopenharmony_ci /// # fn from(ident: T) -> Self { 63fad3a1d3Sopenharmony_ci /// # PathSegment { 64fad3a1d3Sopenharmony_ci /// # ident: ident.into(), 65fad3a1d3Sopenharmony_ci /// # arguments: PathArguments::None, 66fad3a1d3Sopenharmony_ci /// # } 67fad3a1d3Sopenharmony_ci /// # } 68fad3a1d3Sopenharmony_ci /// # } 69fad3a1d3Sopenharmony_ci /// 70fad3a1d3Sopenharmony_ci /// impl Parse for PathSegment { 71fad3a1d3Sopenharmony_ci /// fn parse(input: ParseStream) -> Result<Self> { 72fad3a1d3Sopenharmony_ci /// if input.peek(Token![super]) 73fad3a1d3Sopenharmony_ci /// || input.peek(Token![self]) 74fad3a1d3Sopenharmony_ci /// || input.peek(Token![Self]) 75fad3a1d3Sopenharmony_ci /// || input.peek(Token![crate]) 76fad3a1d3Sopenharmony_ci /// { 77fad3a1d3Sopenharmony_ci /// let ident = input.call(Ident::parse_any)?; 78fad3a1d3Sopenharmony_ci /// return Ok(PathSegment::from(ident)); 79fad3a1d3Sopenharmony_ci /// } 80fad3a1d3Sopenharmony_ci /// 81fad3a1d3Sopenharmony_ci /// let ident = input.parse()?; 82fad3a1d3Sopenharmony_ci /// if input.peek(Token![::]) && input.peek3(Token![<]) { 83fad3a1d3Sopenharmony_ci /// return Ok(PathSegment { 84fad3a1d3Sopenharmony_ci /// ident, 85fad3a1d3Sopenharmony_ci /// arguments: PathArguments::AngleBracketed(input.parse()?), 86fad3a1d3Sopenharmony_ci /// }); 87fad3a1d3Sopenharmony_ci /// } 88fad3a1d3Sopenharmony_ci /// if input.peek(Token![<]) && !input.peek(Token![<=]) { 89fad3a1d3Sopenharmony_ci /// let fork = input.fork(); 90fad3a1d3Sopenharmony_ci /// if let Ok(arguments) = fork.parse() { 91fad3a1d3Sopenharmony_ci /// input.advance_to(&fork); 92fad3a1d3Sopenharmony_ci /// return Ok(PathSegment { 93fad3a1d3Sopenharmony_ci /// ident, 94fad3a1d3Sopenharmony_ci /// arguments: PathArguments::AngleBracketed(arguments), 95fad3a1d3Sopenharmony_ci /// }); 96fad3a1d3Sopenharmony_ci /// } 97fad3a1d3Sopenharmony_ci /// } 98fad3a1d3Sopenharmony_ci /// Ok(PathSegment::from(ident)) 99fad3a1d3Sopenharmony_ci /// } 100fad3a1d3Sopenharmony_ci /// } 101fad3a1d3Sopenharmony_ci /// 102fad3a1d3Sopenharmony_ci /// # syn::parse_str::<PathSegment>("a<b,c>").unwrap(); 103fad3a1d3Sopenharmony_ci /// ``` 104fad3a1d3Sopenharmony_ci /// 105fad3a1d3Sopenharmony_ci /// # Drawbacks 106fad3a1d3Sopenharmony_ci /// 107fad3a1d3Sopenharmony_ci /// The main drawback of this style of speculative parsing is in error 108fad3a1d3Sopenharmony_ci /// presentation. Even if the lookahead is the "correct" parse, the error 109fad3a1d3Sopenharmony_ci /// that is shown is that of the "fallback" parse. To use the same example 110fad3a1d3Sopenharmony_ci /// as the turbofish above, take the following unfinished "turbofish": 111fad3a1d3Sopenharmony_ci /// 112fad3a1d3Sopenharmony_ci /// ```text 113fad3a1d3Sopenharmony_ci /// let _ = f<&'a fn(), for<'a> serde::>(); 114fad3a1d3Sopenharmony_ci /// ``` 115fad3a1d3Sopenharmony_ci /// 116fad3a1d3Sopenharmony_ci /// If this is parsed as generic arguments, we can provide the error message 117fad3a1d3Sopenharmony_ci /// 118fad3a1d3Sopenharmony_ci /// ```text 119fad3a1d3Sopenharmony_ci /// error: expected identifier 120fad3a1d3Sopenharmony_ci /// --> src.rs:L:C 121fad3a1d3Sopenharmony_ci /// | 122fad3a1d3Sopenharmony_ci /// L | let _ = f<&'a fn(), for<'a> serde::>(); 123fad3a1d3Sopenharmony_ci /// | ^ 124fad3a1d3Sopenharmony_ci /// ``` 125fad3a1d3Sopenharmony_ci /// 126fad3a1d3Sopenharmony_ci /// but if parsed using the above speculative parsing, it falls back to 127fad3a1d3Sopenharmony_ci /// assuming that the `<` is a less-than when it fails to parse the generic 128fad3a1d3Sopenharmony_ci /// arguments, and tries to interpret the `&'a` as the start of a labelled 129fad3a1d3Sopenharmony_ci /// loop, resulting in the much less helpful error 130fad3a1d3Sopenharmony_ci /// 131fad3a1d3Sopenharmony_ci /// ```text 132fad3a1d3Sopenharmony_ci /// error: expected `:` 133fad3a1d3Sopenharmony_ci /// --> src.rs:L:C 134fad3a1d3Sopenharmony_ci /// | 135fad3a1d3Sopenharmony_ci /// L | let _ = f<&'a fn(), for<'a> serde::>(); 136fad3a1d3Sopenharmony_ci /// | ^^ 137fad3a1d3Sopenharmony_ci /// ``` 138fad3a1d3Sopenharmony_ci /// 139fad3a1d3Sopenharmony_ci /// This can be mitigated with various heuristics (two examples: show both 140fad3a1d3Sopenharmony_ci /// forks' parse errors, or show the one that consumed more tokens), but 141fad3a1d3Sopenharmony_ci /// when you can control the grammar, sticking to something that can be 142fad3a1d3Sopenharmony_ci /// parsed LL(3) and without the LL(*) speculative parsing this makes 143fad3a1d3Sopenharmony_ci /// possible, displaying reasonable errors becomes much more simple. 144fad3a1d3Sopenharmony_ci /// 145fad3a1d3Sopenharmony_ci /// [RFC 2544]: https://github.com/rust-lang/rfcs/pull/2544 146fad3a1d3Sopenharmony_ci /// [`PathSegment`]: crate::PathSegment 147fad3a1d3Sopenharmony_ci /// 148fad3a1d3Sopenharmony_ci /// # Performance 149fad3a1d3Sopenharmony_ci /// 150fad3a1d3Sopenharmony_ci /// This method performs a cheap fixed amount of work that does not depend 151fad3a1d3Sopenharmony_ci /// on how far apart the two streams are positioned. 152fad3a1d3Sopenharmony_ci /// 153fad3a1d3Sopenharmony_ci /// # Panics 154fad3a1d3Sopenharmony_ci /// 155fad3a1d3Sopenharmony_ci /// The forked stream in the argument of `advance_to` must have been 156fad3a1d3Sopenharmony_ci /// obtained by forking `self`. Attempting to advance to any other stream 157fad3a1d3Sopenharmony_ci /// will cause a panic. 158fad3a1d3Sopenharmony_ci fn advance_to(&self, fork: &Self); 159fad3a1d3Sopenharmony_ci} 160fad3a1d3Sopenharmony_ci 161fad3a1d3Sopenharmony_ciimpl<'a> Speculative for ParseBuffer<'a> { 162fad3a1d3Sopenharmony_ci fn advance_to(&self, fork: &Self) { 163fad3a1d3Sopenharmony_ci if !crate::buffer::same_scope(self.cursor(), fork.cursor()) { 164fad3a1d3Sopenharmony_ci panic!("Fork was not derived from the advancing parse stream"); 165fad3a1d3Sopenharmony_ci } 166fad3a1d3Sopenharmony_ci 167fad3a1d3Sopenharmony_ci let (self_unexp, self_sp) = inner_unexpected(self); 168fad3a1d3Sopenharmony_ci let (fork_unexp, fork_sp) = inner_unexpected(fork); 169fad3a1d3Sopenharmony_ci if !Rc::ptr_eq(&self_unexp, &fork_unexp) { 170fad3a1d3Sopenharmony_ci match (fork_sp, self_sp) { 171fad3a1d3Sopenharmony_ci // Unexpected set on the fork, but not on `self`, copy it over. 172fad3a1d3Sopenharmony_ci (Some(span), None) => { 173fad3a1d3Sopenharmony_ci self_unexp.set(Unexpected::Some(span)); 174fad3a1d3Sopenharmony_ci } 175fad3a1d3Sopenharmony_ci // Unexpected unset. Use chain to propagate errors from fork. 176fad3a1d3Sopenharmony_ci (None, None) => { 177fad3a1d3Sopenharmony_ci fork_unexp.set(Unexpected::Chain(self_unexp)); 178fad3a1d3Sopenharmony_ci 179fad3a1d3Sopenharmony_ci // Ensure toplevel 'unexpected' tokens from the fork don't 180fad3a1d3Sopenharmony_ci // bubble up the chain by replacing the root `unexpected` 181fad3a1d3Sopenharmony_ci // pointer, only 'unexpected' tokens from existing group 182fad3a1d3Sopenharmony_ci // parsers should bubble. 183fad3a1d3Sopenharmony_ci fork.unexpected 184fad3a1d3Sopenharmony_ci .set(Some(Rc::new(Cell::new(Unexpected::None)))); 185fad3a1d3Sopenharmony_ci } 186fad3a1d3Sopenharmony_ci // Unexpected has been set on `self`. No changes needed. 187fad3a1d3Sopenharmony_ci (_, Some(_)) => {} 188fad3a1d3Sopenharmony_ci } 189fad3a1d3Sopenharmony_ci } 190fad3a1d3Sopenharmony_ci 191fad3a1d3Sopenharmony_ci // See comment on `cell` in the struct definition. 192fad3a1d3Sopenharmony_ci self.cell 193fad3a1d3Sopenharmony_ci .set(unsafe { mem::transmute::<Cursor, Cursor<'static>>(fork.cursor()) }); 194fad3a1d3Sopenharmony_ci } 195fad3a1d3Sopenharmony_ci} 196fad3a1d3Sopenharmony_ci 197fad3a1d3Sopenharmony_ci/// Extensions to the `ParseStream` API to support manipulating invisible 198fad3a1d3Sopenharmony_ci/// delimiters the same as if they were visible. 199fad3a1d3Sopenharmony_cipub trait AnyDelimiter { 200fad3a1d3Sopenharmony_ci /// Returns the delimiter, the span of the delimiter token, and the nested 201fad3a1d3Sopenharmony_ci /// contents for further parsing. 202fad3a1d3Sopenharmony_ci fn parse_any_delimiter(&self) -> Result<(Delimiter, DelimSpan, ParseBuffer)>; 203fad3a1d3Sopenharmony_ci} 204fad3a1d3Sopenharmony_ci 205fad3a1d3Sopenharmony_ciimpl<'a> AnyDelimiter for ParseBuffer<'a> { 206fad3a1d3Sopenharmony_ci fn parse_any_delimiter(&self) -> Result<(Delimiter, DelimSpan, ParseBuffer)> { 207fad3a1d3Sopenharmony_ci self.step(|cursor| { 208fad3a1d3Sopenharmony_ci if let Some((content, delimiter, span, rest)) = cursor.any_group() { 209fad3a1d3Sopenharmony_ci let scope = crate::buffer::close_span_of_group(*cursor); 210fad3a1d3Sopenharmony_ci let nested = crate::parse::advance_step_cursor(cursor, content); 211fad3a1d3Sopenharmony_ci let unexpected = crate::parse::get_unexpected(self); 212fad3a1d3Sopenharmony_ci let content = crate::parse::new_parse_buffer(scope, nested, unexpected); 213fad3a1d3Sopenharmony_ci Ok(((delimiter, span, content), rest)) 214fad3a1d3Sopenharmony_ci } else { 215fad3a1d3Sopenharmony_ci Err(cursor.error("expected any delimiter")) 216fad3a1d3Sopenharmony_ci } 217fad3a1d3Sopenharmony_ci }) 218fad3a1d3Sopenharmony_ci } 219fad3a1d3Sopenharmony_ci} 220