xref: /third_party/rust/crates/clap/clap_lex/src/lib.rs (revision 19625d8c)
1//! Minimal, flexible command-line parser
2//!
3//! As opposed to a declarative parser, this processes arguments as a stream of tokens.  As lexing
4//! a command-line is not context-free, we rely on the caller to decide how to interpret the
5//! arguments.
6//!
7//! # Examples
8//!
9//! ```rust
10//! use std::path::PathBuf;
11//!
12//! type BoxedError = Box<dyn std::error::Error + Send + Sync>;
13//!
14//! #[derive(Debug)]
15//! struct Args {
16//!     paths: Vec<PathBuf>,
17//!     color: Color,
18//!     verbosity: usize,
19//! }
20//!
21//! #[derive(Debug)]
22//! enum Color {
23//!     Always,
24//!     Auto,
25//!     Never,
26//! }
27//!
28//! impl Color {
29//!     fn parse(s: Option<&clap_lex::RawOsStr>) -> Result<Self, BoxedError> {
30//!         let s = s.map(|s| s.to_str().ok_or(s));
31//!         match s {
32//!             Some(Ok("always")) | Some(Ok("")) | None => {
33//!                 Ok(Color::Always)
34//!             }
35//!             Some(Ok("auto")) => {
36//!                 Ok(Color::Auto)
37//!             }
38//!             Some(Ok("never")) => {
39//!                 Ok(Color::Never)
40//!             }
41//!             Some(invalid) => {
42//!                 Err(format!("Invalid value for `--color`, {:?}", invalid).into())
43//!             }
44//!         }
45//!     }
46//! }
47//!
48//! fn parse_args(
49//!     raw: impl IntoIterator<Item=impl Into<std::ffi::OsString>>
50//! ) -> Result<Args, BoxedError> {
51//!     let mut args = Args {
52//!         paths: Vec::new(),
53//!         color: Color::Auto,
54//!         verbosity: 0,
55//!     };
56//!
57//!     let raw = clap_lex::RawArgs::new(raw);
58//!     let mut cursor = raw.cursor();
59//!     raw.next(&mut cursor);  // Skip the bin
60//!     while let Some(arg) = raw.next(&mut cursor) {
61//!         if arg.is_escape() {
62//!             args.paths.extend(raw.remaining(&mut cursor).map(PathBuf::from));
63//!         } else if arg.is_stdio() {
64//!             args.paths.push(PathBuf::from("-"));
65//!         } else if let Some((long, value)) = arg.to_long() {
66//!             match long {
67//!                 Ok("verbose") => {
68//!                     if let Some(value) = value {
69//!                         return Err(format!("`--verbose` does not take a value, got `{:?}`", value).into());
70//!                     }
71//!                     args.verbosity += 1;
72//!                 }
73//!                 Ok("color") => {
74//!                     args.color = Color::parse(value)?;
75//!                 }
76//!                 _ => {
77//!                     return Err(
78//!                         format!("Unexpected flag: --{}", arg.display()).into()
79//!                     );
80//!                 }
81//!             }
82//!         } else if let Some(mut shorts) = arg.to_short() {
83//!             while let Some(short) = shorts.next_flag() {
84//!                 match short {
85//!                     Ok('v') => {
86//!                         args.verbosity += 1;
87//!                     }
88//!                     Ok('c') => {
89//!                         let value = shorts.next_value_os();
90//!                         args.color = Color::parse(value)?;
91//!                     }
92//!                     Ok(c) => {
93//!                         return Err(format!("Unexpected flag: -{}", c).into());
94//!                     }
95//!                     Err(e) => {
96//!                         return Err(format!("Unexpected flag: -{}", e.to_str_lossy()).into());
97//!                     }
98//!                 }
99//!             }
100//!         } else {
101//!             args.paths.push(PathBuf::from(arg.to_value_os().to_os_str().into_owned()));
102//!         }
103//!     }
104//!
105//!     Ok(args)
106//! }
107//!
108//! let args = parse_args(["bin", "--hello", "world"]);
109//! println!("{:?}", args);
110//! ```
111
112use std::ffi::OsStr;
113use std::ffi::OsString;
114
115pub use std::io::SeekFrom;
116
117pub use os_str_bytes::RawOsStr;
118pub use os_str_bytes::RawOsString;
119
120/// Command-line arguments
121#[derive(Default, Clone, Debug, PartialEq, Eq)]
122pub struct RawArgs {
123    items: Vec<OsString>,
124}
125
126impl RawArgs {
127    //// Create an argument list to parse
128    ///
129    /// **NOTE:** The argument returned will be the current binary.
130    ///
131    /// # Example
132    ///
133    /// ```rust,no_run
134    /// # use std::path::PathBuf;
135    /// let raw = clap_lex::RawArgs::from_args();
136    /// let mut cursor = raw.cursor();
137    /// let _bin = raw.next_os(&mut cursor);
138    ///
139    /// let mut paths = raw.remaining(&mut cursor).map(PathBuf::from).collect::<Vec<_>>();
140    /// println!("{:?}", paths);
141    /// ```
142    pub fn from_args() -> Self {
143        Self::new(std::env::args_os())
144    }
145
146    //// Create an argument list to parse
147    ///
148    /// # Example
149    ///
150    /// ```rust,no_run
151    /// # use std::path::PathBuf;
152    /// let raw = clap_lex::RawArgs::new(["bin", "foo.txt"]);
153    /// let mut cursor = raw.cursor();
154    /// let _bin = raw.next_os(&mut cursor);
155    ///
156    /// let mut paths = raw.remaining(&mut cursor).map(PathBuf::from).collect::<Vec<_>>();
157    /// println!("{:?}", paths);
158    /// ```
159    pub fn new(iter: impl IntoIterator<Item = impl Into<std::ffi::OsString>>) -> Self {
160        let iter = iter.into_iter();
161        Self::from(iter)
162    }
163
164    /// Create a cursor for walking the arguments
165    ///
166    /// # Example
167    ///
168    /// ```rust,no_run
169    /// # use std::path::PathBuf;
170    /// let raw = clap_lex::RawArgs::new(["bin", "foo.txt"]);
171    /// let mut cursor = raw.cursor();
172    /// let _bin = raw.next_os(&mut cursor);
173    ///
174    /// let mut paths = raw.remaining(&mut cursor).map(PathBuf::from).collect::<Vec<_>>();
175    /// println!("{:?}", paths);
176    /// ```
177    pub fn cursor(&self) -> ArgCursor {
178        ArgCursor::new()
179    }
180
181    /// Advance the cursor, returning the next [`ParsedArg`]
182    pub fn next(&self, cursor: &mut ArgCursor) -> Option<ParsedArg<'_>> {
183        self.next_os(cursor).map(ParsedArg::new)
184    }
185
186    /// Advance the cursor, returning a raw argument value.
187    pub fn next_os(&self, cursor: &mut ArgCursor) -> Option<&OsStr> {
188        let next = self.items.get(cursor.cursor).map(|s| s.as_os_str());
189        cursor.cursor = cursor.cursor.saturating_add(1);
190        next
191    }
192
193    /// Return the next [`ParsedArg`]
194    pub fn peek(&self, cursor: &ArgCursor) -> Option<ParsedArg<'_>> {
195        self.peek_os(cursor).map(ParsedArg::new)
196    }
197
198    /// Return a raw argument value.
199    pub fn peek_os(&self, cursor: &ArgCursor) -> Option<&OsStr> {
200        self.items.get(cursor.cursor).map(|s| s.as_os_str())
201    }
202
203    /// Return all remaining raw arguments, advancing the cursor to the end
204    ///
205    /// # Example
206    ///
207    /// ```rust,no_run
208    /// # use std::path::PathBuf;
209    /// let raw = clap_lex::RawArgs::new(["bin", "foo.txt"]);
210    /// let mut cursor = raw.cursor();
211    /// let _bin = raw.next_os(&mut cursor);
212    ///
213    /// let mut paths = raw.remaining(&mut cursor).map(PathBuf::from).collect::<Vec<_>>();
214    /// println!("{:?}", paths);
215    /// ```
216    pub fn remaining(&self, cursor: &mut ArgCursor) -> impl Iterator<Item = &OsStr> {
217        let remaining = self.items[cursor.cursor..].iter().map(|s| s.as_os_str());
218        cursor.cursor = self.items.len();
219        remaining
220    }
221
222    /// Adjust the cursor's position
223    pub fn seek(&self, cursor: &mut ArgCursor, pos: SeekFrom) {
224        let pos = match pos {
225            SeekFrom::Start(pos) => pos,
226            SeekFrom::End(pos) => (self.items.len() as i64).saturating_add(pos).max(0) as u64,
227            SeekFrom::Current(pos) => (cursor.cursor as i64).saturating_add(pos).max(0) as u64,
228        };
229        let pos = (pos as usize).min(self.items.len());
230        cursor.cursor = pos;
231    }
232
233    /// Inject arguments before the [`RawArgs::next`]
234    pub fn insert(
235        &mut self,
236        cursor: &ArgCursor,
237        insert_items: impl IntoIterator<Item = impl Into<OsString>>,
238    ) {
239        self.items.splice(
240            cursor.cursor..cursor.cursor,
241            insert_items.into_iter().map(Into::into),
242        );
243    }
244
245    /// Any remaining args?
246    pub fn is_end(&self, cursor: &ArgCursor) -> bool {
247        self.peek_os(cursor).is_none()
248    }
249}
250
251impl<I, T> From<I> for RawArgs
252where
253    I: Iterator<Item = T>,
254    T: Into<OsString>,
255{
256    fn from(val: I) -> Self {
257        Self {
258            items: val.map(|x| x.into()).collect(),
259        }
260    }
261}
262
263/// Position within [`RawArgs`]
264#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
265pub struct ArgCursor {
266    cursor: usize,
267}
268
269impl ArgCursor {
270    fn new() -> Self {
271        Self { cursor: 0 }
272    }
273}
274
275/// Command-line Argument
276#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
277pub struct ParsedArg<'s> {
278    inner: std::borrow::Cow<'s, RawOsStr>,
279    utf8: Option<&'s str>,
280}
281
282impl<'s> ParsedArg<'s> {
283    fn new(inner: &'s OsStr) -> Self {
284        let utf8 = inner.to_str();
285        let inner = RawOsStr::new(inner);
286        Self { inner, utf8 }
287    }
288
289    /// Argument is length of 0
290    pub fn is_empty(&self) -> bool {
291        self.inner.as_ref().is_empty()
292    }
293
294    /// Does the argument look like a stdio argument (`-`)
295    pub fn is_stdio(&self) -> bool {
296        self.inner.as_ref() == "-"
297    }
298
299    /// Does the argument look like an argument escape (`--`)
300    pub fn is_escape(&self) -> bool {
301        self.inner.as_ref() == "--"
302    }
303
304    /// Does the argument look like a number
305    pub fn is_number(&self) -> bool {
306        self.to_value()
307            .map(|s| s.parse::<f64>().is_ok())
308            .unwrap_or_default()
309    }
310
311    /// Treat as a long-flag
312    pub fn to_long(&self) -> Option<(Result<&str, &RawOsStr>, Option<&RawOsStr>)> {
313        if let Some(raw) = self.utf8 {
314            let remainder = raw.strip_prefix("--")?;
315            if remainder.is_empty() {
316                debug_assert!(self.is_escape());
317                return None;
318            }
319
320            let (flag, value) = if let Some((p0, p1)) = remainder.split_once('=') {
321                (p0, Some(p1))
322            } else {
323                (remainder, None)
324            };
325            let flag = Ok(flag);
326            let value = value.map(RawOsStr::from_str);
327            Some((flag, value))
328        } else {
329            let raw = self.inner.as_ref();
330            let remainder = raw.strip_prefix("--")?;
331            if remainder.is_empty() {
332                debug_assert!(self.is_escape());
333                return None;
334            }
335
336            let (flag, value) = if let Some((p0, p1)) = remainder.split_once('=') {
337                (p0, Some(p1))
338            } else {
339                (remainder, None)
340            };
341            let flag = flag.to_str().ok_or(flag);
342            Some((flag, value))
343        }
344    }
345
346    /// Can treat as a long-flag
347    pub fn is_long(&self) -> bool {
348        self.inner.as_ref().starts_with("--") && !self.is_escape()
349    }
350
351    /// Treat as a short-flag
352    pub fn to_short(&self) -> Option<ShortFlags<'_>> {
353        if let Some(remainder_os) = self.inner.as_ref().strip_prefix('-') {
354            if remainder_os.starts_with('-') {
355                None
356            } else if remainder_os.is_empty() {
357                debug_assert!(self.is_stdio());
358                None
359            } else {
360                let remainder = self.utf8.map(|s| &s[1..]);
361                Some(ShortFlags::new(remainder_os, remainder))
362            }
363        } else {
364            None
365        }
366    }
367
368    /// Can treat as a short-flag
369    pub fn is_short(&self) -> bool {
370        self.inner.as_ref().starts_with('-')
371            && !self.is_stdio()
372            && !self.inner.as_ref().starts_with("--")
373    }
374
375    /// Treat as a value
376    ///
377    /// **NOTE:** May return a flag or an escape.
378    pub fn to_value_os(&self) -> &RawOsStr {
379        self.inner.as_ref()
380    }
381
382    /// Treat as a value
383    ///
384    /// **NOTE:** May return a flag or an escape.
385    pub fn to_value(&self) -> Result<&str, &RawOsStr> {
386        self.utf8.ok_or_else(|| self.inner.as_ref())
387    }
388
389    /// Safely print an argument that may contain non-UTF8 content
390    ///
391    /// This may perform lossy conversion, depending on the platform. If you would like an implementation which escapes the path please use Debug instead.
392    pub fn display(&self) -> impl std::fmt::Display + '_ {
393        self.inner.to_str_lossy()
394    }
395}
396
397/// Walk through short flags within a [`ParsedArg`]
398#[derive(Clone, Debug)]
399pub struct ShortFlags<'s> {
400    inner: &'s RawOsStr,
401    utf8_prefix: std::str::CharIndices<'s>,
402    invalid_suffix: Option<&'s RawOsStr>,
403}
404
405impl<'s> ShortFlags<'s> {
406    fn new(inner: &'s RawOsStr, utf8: Option<&'s str>) -> Self {
407        let (utf8_prefix, invalid_suffix) = if let Some(utf8) = utf8 {
408            (utf8, None)
409        } else {
410            split_nonutf8_once(inner)
411        };
412        let utf8_prefix = utf8_prefix.char_indices();
413        Self {
414            inner,
415            utf8_prefix,
416            invalid_suffix,
417        }
418    }
419
420    /// Move the iterator forward by `n` short flags
421    pub fn advance_by(&mut self, n: usize) -> Result<(), usize> {
422        for i in 0..n {
423            self.next().ok_or(i)?.map_err(|_| i)?;
424        }
425        Ok(())
426    }
427
428    /// No short flags left
429    pub fn is_empty(&self) -> bool {
430        self.invalid_suffix.is_none() && self.utf8_prefix.as_str().is_empty()
431    }
432
433    /// Does the short flag look like a number
434    ///
435    /// Ideally call this before doing any iterator
436    pub fn is_number(&self) -> bool {
437        self.invalid_suffix.is_none() && self.utf8_prefix.as_str().parse::<f64>().is_ok()
438    }
439
440    /// Advance the iterator, returning the next short flag on success
441    ///
442    /// On error, returns the invalid-UTF8 value
443    pub fn next_flag(&mut self) -> Option<Result<char, &'s RawOsStr>> {
444        if let Some((_, flag)) = self.utf8_prefix.next() {
445            return Some(Ok(flag));
446        }
447
448        if let Some(suffix) = self.invalid_suffix {
449            self.invalid_suffix = None;
450            return Some(Err(suffix));
451        }
452
453        None
454    }
455
456    /// Advance the iterator, returning everything left as a value
457    pub fn next_value_os(&mut self) -> Option<&'s RawOsStr> {
458        if let Some((index, _)) = self.utf8_prefix.next() {
459            self.utf8_prefix = "".char_indices();
460            self.invalid_suffix = None;
461            return Some(&self.inner[index..]);
462        }
463
464        if let Some(suffix) = self.invalid_suffix {
465            self.invalid_suffix = None;
466            return Some(suffix);
467        }
468
469        None
470    }
471}
472
473impl<'s> Iterator for ShortFlags<'s> {
474    type Item = Result<char, &'s RawOsStr>;
475
476    fn next(&mut self) -> Option<Self::Item> {
477        self.next_flag()
478    }
479}
480
481fn split_nonutf8_once(b: &RawOsStr) -> (&str, Option<&RawOsStr>) {
482    match std::str::from_utf8(b.as_raw_bytes()) {
483        Ok(s) => (s, None),
484        Err(err) => {
485            let (valid, after_valid) = b.split_at(err.valid_up_to());
486            let valid = std::str::from_utf8(valid.as_raw_bytes()).unwrap();
487            (valid, Some(after_valid))
488        }
489    }
490}
491