1//! Minimal, flexible command-line parser 2//! 3//! As opposed to a declarative parser, this processes arguments as a stream of tokens. As lexing 4//! a command-line is not context-free, we rely on the caller to decide how to interpret the 5//! arguments. 6//! 7//! # Examples 8//! 9//! ```rust 10//! use std::path::PathBuf; 11//! 12//! type BoxedError = Box<dyn std::error::Error + Send + Sync>; 13//! 14//! #[derive(Debug)] 15//! struct Args { 16//! paths: Vec<PathBuf>, 17//! color: Color, 18//! verbosity: usize, 19//! } 20//! 21//! #[derive(Debug)] 22//! enum Color { 23//! Always, 24//! Auto, 25//! Never, 26//! } 27//! 28//! impl Color { 29//! fn parse(s: Option<&clap_lex::RawOsStr>) -> Result<Self, BoxedError> { 30//! let s = s.map(|s| s.to_str().ok_or(s)); 31//! match s { 32//! Some(Ok("always")) | Some(Ok("")) | None => { 33//! Ok(Color::Always) 34//! } 35//! Some(Ok("auto")) => { 36//! Ok(Color::Auto) 37//! } 38//! Some(Ok("never")) => { 39//! Ok(Color::Never) 40//! } 41//! Some(invalid) => { 42//! Err(format!("Invalid value for `--color`, {:?}", invalid).into()) 43//! } 44//! } 45//! } 46//! } 47//! 48//! fn parse_args( 49//! raw: impl IntoIterator<Item=impl Into<std::ffi::OsString>> 50//! ) -> Result<Args, BoxedError> { 51//! let mut args = Args { 52//! paths: Vec::new(), 53//! color: Color::Auto, 54//! verbosity: 0, 55//! }; 56//! 57//! let raw = clap_lex::RawArgs::new(raw); 58//! let mut cursor = raw.cursor(); 59//! raw.next(&mut cursor); // Skip the bin 60//! while let Some(arg) = raw.next(&mut cursor) { 61//! if arg.is_escape() { 62//! args.paths.extend(raw.remaining(&mut cursor).map(PathBuf::from)); 63//! } else if arg.is_stdio() { 64//! args.paths.push(PathBuf::from("-")); 65//! } else if let Some((long, value)) = arg.to_long() { 66//! match long { 67//! Ok("verbose") => { 68//! if let Some(value) = value { 69//! return Err(format!("`--verbose` does not take a value, got `{:?}`", value).into()); 70//! } 71//! args.verbosity += 1; 72//! } 73//! Ok("color") => { 74//! args.color = Color::parse(value)?; 75//! } 76//! _ => { 77//! return Err( 78//! format!("Unexpected flag: --{}", arg.display()).into() 79//! ); 80//! } 81//! } 82//! } else if let Some(mut shorts) = arg.to_short() { 83//! while let Some(short) = shorts.next_flag() { 84//! match short { 85//! Ok('v') => { 86//! args.verbosity += 1; 87//! } 88//! Ok('c') => { 89//! let value = shorts.next_value_os(); 90//! args.color = Color::parse(value)?; 91//! } 92//! Ok(c) => { 93//! return Err(format!("Unexpected flag: -{}", c).into()); 94//! } 95//! Err(e) => { 96//! return Err(format!("Unexpected flag: -{}", e.to_str_lossy()).into()); 97//! } 98//! } 99//! } 100//! } else { 101//! args.paths.push(PathBuf::from(arg.to_value_os().to_os_str().into_owned())); 102//! } 103//! } 104//! 105//! Ok(args) 106//! } 107//! 108//! let args = parse_args(["bin", "--hello", "world"]); 109//! println!("{:?}", args); 110//! ``` 111 112use std::ffi::OsStr; 113use std::ffi::OsString; 114 115pub use std::io::SeekFrom; 116 117pub use os_str_bytes::RawOsStr; 118pub use os_str_bytes::RawOsString; 119 120/// Command-line arguments 121#[derive(Default, Clone, Debug, PartialEq, Eq)] 122pub struct RawArgs { 123 items: Vec<OsString>, 124} 125 126impl RawArgs { 127 //// Create an argument list to parse 128 /// 129 /// **NOTE:** The argument returned will be the current binary. 130 /// 131 /// # Example 132 /// 133 /// ```rust,no_run 134 /// # use std::path::PathBuf; 135 /// let raw = clap_lex::RawArgs::from_args(); 136 /// let mut cursor = raw.cursor(); 137 /// let _bin = raw.next_os(&mut cursor); 138 /// 139 /// let mut paths = raw.remaining(&mut cursor).map(PathBuf::from).collect::<Vec<_>>(); 140 /// println!("{:?}", paths); 141 /// ``` 142 pub fn from_args() -> Self { 143 Self::new(std::env::args_os()) 144 } 145 146 //// Create an argument list to parse 147 /// 148 /// # Example 149 /// 150 /// ```rust,no_run 151 /// # use std::path::PathBuf; 152 /// let raw = clap_lex::RawArgs::new(["bin", "foo.txt"]); 153 /// let mut cursor = raw.cursor(); 154 /// let _bin = raw.next_os(&mut cursor); 155 /// 156 /// let mut paths = raw.remaining(&mut cursor).map(PathBuf::from).collect::<Vec<_>>(); 157 /// println!("{:?}", paths); 158 /// ``` 159 pub fn new(iter: impl IntoIterator<Item = impl Into<std::ffi::OsString>>) -> Self { 160 let iter = iter.into_iter(); 161 Self::from(iter) 162 } 163 164 /// Create a cursor for walking the arguments 165 /// 166 /// # Example 167 /// 168 /// ```rust,no_run 169 /// # use std::path::PathBuf; 170 /// let raw = clap_lex::RawArgs::new(["bin", "foo.txt"]); 171 /// let mut cursor = raw.cursor(); 172 /// let _bin = raw.next_os(&mut cursor); 173 /// 174 /// let mut paths = raw.remaining(&mut cursor).map(PathBuf::from).collect::<Vec<_>>(); 175 /// println!("{:?}", paths); 176 /// ``` 177 pub fn cursor(&self) -> ArgCursor { 178 ArgCursor::new() 179 } 180 181 /// Advance the cursor, returning the next [`ParsedArg`] 182 pub fn next(&self, cursor: &mut ArgCursor) -> Option<ParsedArg<'_>> { 183 self.next_os(cursor).map(ParsedArg::new) 184 } 185 186 /// Advance the cursor, returning a raw argument value. 187 pub fn next_os(&self, cursor: &mut ArgCursor) -> Option<&OsStr> { 188 let next = self.items.get(cursor.cursor).map(|s| s.as_os_str()); 189 cursor.cursor = cursor.cursor.saturating_add(1); 190 next 191 } 192 193 /// Return the next [`ParsedArg`] 194 pub fn peek(&self, cursor: &ArgCursor) -> Option<ParsedArg<'_>> { 195 self.peek_os(cursor).map(ParsedArg::new) 196 } 197 198 /// Return a raw argument value. 199 pub fn peek_os(&self, cursor: &ArgCursor) -> Option<&OsStr> { 200 self.items.get(cursor.cursor).map(|s| s.as_os_str()) 201 } 202 203 /// Return all remaining raw arguments, advancing the cursor to the end 204 /// 205 /// # Example 206 /// 207 /// ```rust,no_run 208 /// # use std::path::PathBuf; 209 /// let raw = clap_lex::RawArgs::new(["bin", "foo.txt"]); 210 /// let mut cursor = raw.cursor(); 211 /// let _bin = raw.next_os(&mut cursor); 212 /// 213 /// let mut paths = raw.remaining(&mut cursor).map(PathBuf::from).collect::<Vec<_>>(); 214 /// println!("{:?}", paths); 215 /// ``` 216 pub fn remaining(&self, cursor: &mut ArgCursor) -> impl Iterator<Item = &OsStr> { 217 let remaining = self.items[cursor.cursor..].iter().map(|s| s.as_os_str()); 218 cursor.cursor = self.items.len(); 219 remaining 220 } 221 222 /// Adjust the cursor's position 223 pub fn seek(&self, cursor: &mut ArgCursor, pos: SeekFrom) { 224 let pos = match pos { 225 SeekFrom::Start(pos) => pos, 226 SeekFrom::End(pos) => (self.items.len() as i64).saturating_add(pos).max(0) as u64, 227 SeekFrom::Current(pos) => (cursor.cursor as i64).saturating_add(pos).max(0) as u64, 228 }; 229 let pos = (pos as usize).min(self.items.len()); 230 cursor.cursor = pos; 231 } 232 233 /// Inject arguments before the [`RawArgs::next`] 234 pub fn insert( 235 &mut self, 236 cursor: &ArgCursor, 237 insert_items: impl IntoIterator<Item = impl Into<OsString>>, 238 ) { 239 self.items.splice( 240 cursor.cursor..cursor.cursor, 241 insert_items.into_iter().map(Into::into), 242 ); 243 } 244 245 /// Any remaining args? 246 pub fn is_end(&self, cursor: &ArgCursor) -> bool { 247 self.peek_os(cursor).is_none() 248 } 249} 250 251impl<I, T> From<I> for RawArgs 252where 253 I: Iterator<Item = T>, 254 T: Into<OsString>, 255{ 256 fn from(val: I) -> Self { 257 Self { 258 items: val.map(|x| x.into()).collect(), 259 } 260 } 261} 262 263/// Position within [`RawArgs`] 264#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] 265pub struct ArgCursor { 266 cursor: usize, 267} 268 269impl ArgCursor { 270 fn new() -> Self { 271 Self { cursor: 0 } 272 } 273} 274 275/// Command-line Argument 276#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] 277pub struct ParsedArg<'s> { 278 inner: std::borrow::Cow<'s, RawOsStr>, 279 utf8: Option<&'s str>, 280} 281 282impl<'s> ParsedArg<'s> { 283 fn new(inner: &'s OsStr) -> Self { 284 let utf8 = inner.to_str(); 285 let inner = RawOsStr::new(inner); 286 Self { inner, utf8 } 287 } 288 289 /// Argument is length of 0 290 pub fn is_empty(&self) -> bool { 291 self.inner.as_ref().is_empty() 292 } 293 294 /// Does the argument look like a stdio argument (`-`) 295 pub fn is_stdio(&self) -> bool { 296 self.inner.as_ref() == "-" 297 } 298 299 /// Does the argument look like an argument escape (`--`) 300 pub fn is_escape(&self) -> bool { 301 self.inner.as_ref() == "--" 302 } 303 304 /// Does the argument look like a number 305 pub fn is_number(&self) -> bool { 306 self.to_value() 307 .map(|s| s.parse::<f64>().is_ok()) 308 .unwrap_or_default() 309 } 310 311 /// Treat as a long-flag 312 pub fn to_long(&self) -> Option<(Result<&str, &RawOsStr>, Option<&RawOsStr>)> { 313 if let Some(raw) = self.utf8 { 314 let remainder = raw.strip_prefix("--")?; 315 if remainder.is_empty() { 316 debug_assert!(self.is_escape()); 317 return None; 318 } 319 320 let (flag, value) = if let Some((p0, p1)) = remainder.split_once('=') { 321 (p0, Some(p1)) 322 } else { 323 (remainder, None) 324 }; 325 let flag = Ok(flag); 326 let value = value.map(RawOsStr::from_str); 327 Some((flag, value)) 328 } else { 329 let raw = self.inner.as_ref(); 330 let remainder = raw.strip_prefix("--")?; 331 if remainder.is_empty() { 332 debug_assert!(self.is_escape()); 333 return None; 334 } 335 336 let (flag, value) = if let Some((p0, p1)) = remainder.split_once('=') { 337 (p0, Some(p1)) 338 } else { 339 (remainder, None) 340 }; 341 let flag = flag.to_str().ok_or(flag); 342 Some((flag, value)) 343 } 344 } 345 346 /// Can treat as a long-flag 347 pub fn is_long(&self) -> bool { 348 self.inner.as_ref().starts_with("--") && !self.is_escape() 349 } 350 351 /// Treat as a short-flag 352 pub fn to_short(&self) -> Option<ShortFlags<'_>> { 353 if let Some(remainder_os) = self.inner.as_ref().strip_prefix('-') { 354 if remainder_os.starts_with('-') { 355 None 356 } else if remainder_os.is_empty() { 357 debug_assert!(self.is_stdio()); 358 None 359 } else { 360 let remainder = self.utf8.map(|s| &s[1..]); 361 Some(ShortFlags::new(remainder_os, remainder)) 362 } 363 } else { 364 None 365 } 366 } 367 368 /// Can treat as a short-flag 369 pub fn is_short(&self) -> bool { 370 self.inner.as_ref().starts_with('-') 371 && !self.is_stdio() 372 && !self.inner.as_ref().starts_with("--") 373 } 374 375 /// Treat as a value 376 /// 377 /// **NOTE:** May return a flag or an escape. 378 pub fn to_value_os(&self) -> &RawOsStr { 379 self.inner.as_ref() 380 } 381 382 /// Treat as a value 383 /// 384 /// **NOTE:** May return a flag or an escape. 385 pub fn to_value(&self) -> Result<&str, &RawOsStr> { 386 self.utf8.ok_or_else(|| self.inner.as_ref()) 387 } 388 389 /// Safely print an argument that may contain non-UTF8 content 390 /// 391 /// This may perform lossy conversion, depending on the platform. If you would like an implementation which escapes the path please use Debug instead. 392 pub fn display(&self) -> impl std::fmt::Display + '_ { 393 self.inner.to_str_lossy() 394 } 395} 396 397/// Walk through short flags within a [`ParsedArg`] 398#[derive(Clone, Debug)] 399pub struct ShortFlags<'s> { 400 inner: &'s RawOsStr, 401 utf8_prefix: std::str::CharIndices<'s>, 402 invalid_suffix: Option<&'s RawOsStr>, 403} 404 405impl<'s> ShortFlags<'s> { 406 fn new(inner: &'s RawOsStr, utf8: Option<&'s str>) -> Self { 407 let (utf8_prefix, invalid_suffix) = if let Some(utf8) = utf8 { 408 (utf8, None) 409 } else { 410 split_nonutf8_once(inner) 411 }; 412 let utf8_prefix = utf8_prefix.char_indices(); 413 Self { 414 inner, 415 utf8_prefix, 416 invalid_suffix, 417 } 418 } 419 420 /// Move the iterator forward by `n` short flags 421 pub fn advance_by(&mut self, n: usize) -> Result<(), usize> { 422 for i in 0..n { 423 self.next().ok_or(i)?.map_err(|_| i)?; 424 } 425 Ok(()) 426 } 427 428 /// No short flags left 429 pub fn is_empty(&self) -> bool { 430 self.invalid_suffix.is_none() && self.utf8_prefix.as_str().is_empty() 431 } 432 433 /// Does the short flag look like a number 434 /// 435 /// Ideally call this before doing any iterator 436 pub fn is_number(&self) -> bool { 437 self.invalid_suffix.is_none() && self.utf8_prefix.as_str().parse::<f64>().is_ok() 438 } 439 440 /// Advance the iterator, returning the next short flag on success 441 /// 442 /// On error, returns the invalid-UTF8 value 443 pub fn next_flag(&mut self) -> Option<Result<char, &'s RawOsStr>> { 444 if let Some((_, flag)) = self.utf8_prefix.next() { 445 return Some(Ok(flag)); 446 } 447 448 if let Some(suffix) = self.invalid_suffix { 449 self.invalid_suffix = None; 450 return Some(Err(suffix)); 451 } 452 453 None 454 } 455 456 /// Advance the iterator, returning everything left as a value 457 pub fn next_value_os(&mut self) -> Option<&'s RawOsStr> { 458 if let Some((index, _)) = self.utf8_prefix.next() { 459 self.utf8_prefix = "".char_indices(); 460 self.invalid_suffix = None; 461 return Some(&self.inner[index..]); 462 } 463 464 if let Some(suffix) = self.invalid_suffix { 465 self.invalid_suffix = None; 466 return Some(suffix); 467 } 468 469 None 470 } 471} 472 473impl<'s> Iterator for ShortFlags<'s> { 474 type Item = Result<char, &'s RawOsStr>; 475 476 fn next(&mut self) -> Option<Self::Item> { 477 self.next_flag() 478 } 479} 480 481fn split_nonutf8_once(b: &RawOsStr) -> (&str, Option<&RawOsStr>) { 482 match std::str::from_utf8(b.as_raw_bytes()) { 483 Ok(s) => (s, None), 484 Err(err) => { 485 let (valid, after_valid) = b.split_at(err.valid_up_to()); 486 let valid = std::str::from_utf8(valid.as_raw_bytes()).unwrap(); 487 (valid, Some(after_valid)) 488 } 489 } 490} 491