1a078c8edSopenharmony_ci// Copyright 2015 Nicholas Allegra (comex). 2a078c8edSopenharmony_ci// Licensed under the Apache License, Version 2.0 <https://www.apache.org/licenses/LICENSE-2.0> or 3a078c8edSopenharmony_ci// the MIT license <https://opensource.org/licenses/MIT>, at your option. This file may not be 4a078c8edSopenharmony_ci// copied, modified, or distributed except according to those terms. 5a078c8edSopenharmony_ci 6a078c8edSopenharmony_ci//! Same idea as (but implementation not directly based on) the Python shlex module. However, this 7a078c8edSopenharmony_ci//! implementation does not support any of the Python module's customization because it makes 8a078c8edSopenharmony_ci//! parsing slower and is fairly useless. You only get the default settings of shlex.split, which 9a078c8edSopenharmony_ci//! mimic the POSIX shell: 10a078c8edSopenharmony_ci//! <https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html> 11a078c8edSopenharmony_ci//! 12a078c8edSopenharmony_ci//! This implementation also deviates from the Python version in not treating `\r` specially, which 13a078c8edSopenharmony_ci//! I believe is more compliant. 14a078c8edSopenharmony_ci//! 15a078c8edSopenharmony_ci//! The algorithms in this crate are oblivious to UTF-8 high bytes, so they iterate over the bytes 16a078c8edSopenharmony_ci//! directly as a micro-optimization. 17a078c8edSopenharmony_ci//! 18a078c8edSopenharmony_ci//! Disabling the `std` feature (which is enabled by default) will allow the crate to work in 19a078c8edSopenharmony_ci//! `no_std` environments, where the `alloc` crate, and a global allocator, are available. 20a078c8edSopenharmony_ci 21a078c8edSopenharmony_ci#![cfg_attr(not(feature = "std"), no_std)] 22a078c8edSopenharmony_ci 23a078c8edSopenharmony_ciextern crate alloc; 24a078c8edSopenharmony_ciuse alloc::vec::Vec; 25a078c8edSopenharmony_ciuse alloc::borrow::Cow; 26a078c8edSopenharmony_ciuse alloc::string::String; 27a078c8edSopenharmony_ci#[cfg(test)] 28a078c8edSopenharmony_ciuse alloc::vec; 29a078c8edSopenharmony_ci#[cfg(test)] 30a078c8edSopenharmony_ciuse alloc::borrow::ToOwned; 31a078c8edSopenharmony_ci 32a078c8edSopenharmony_ci/// An iterator that takes an input string and splits it into the words using the same syntax as 33a078c8edSopenharmony_ci/// the POSIX shell. 34a078c8edSopenharmony_cipub struct Shlex<'a> { 35a078c8edSopenharmony_ci in_iter: core::str::Bytes<'a>, 36a078c8edSopenharmony_ci /// The number of newlines read so far, plus one. 37a078c8edSopenharmony_ci pub line_no: usize, 38a078c8edSopenharmony_ci /// An input string is erroneous if it ends while inside a quotation or right after an 39a078c8edSopenharmony_ci /// unescaped backslash. Since Iterator does not have a mechanism to return an error, if that 40a078c8edSopenharmony_ci /// happens, Shlex just throws out the last token, ends the iteration, and sets 'had_error' to 41a078c8edSopenharmony_ci /// true; best to check it after you're done iterating. 42a078c8edSopenharmony_ci pub had_error: bool, 43a078c8edSopenharmony_ci} 44a078c8edSopenharmony_ci 45a078c8edSopenharmony_ciimpl<'a> Shlex<'a> { 46a078c8edSopenharmony_ci pub fn new(in_str: &'a str) -> Self { 47a078c8edSopenharmony_ci Shlex { 48a078c8edSopenharmony_ci in_iter: in_str.bytes(), 49a078c8edSopenharmony_ci line_no: 1, 50a078c8edSopenharmony_ci had_error: false, 51a078c8edSopenharmony_ci } 52a078c8edSopenharmony_ci } 53a078c8edSopenharmony_ci 54a078c8edSopenharmony_ci fn parse_word(&mut self, mut ch: u8) -> Option<String> { 55a078c8edSopenharmony_ci let mut result: Vec<u8> = Vec::new(); 56a078c8edSopenharmony_ci loop { 57a078c8edSopenharmony_ci match ch as char { 58a078c8edSopenharmony_ci '"' => if let Err(()) = self.parse_double(&mut result) { 59a078c8edSopenharmony_ci self.had_error = true; 60a078c8edSopenharmony_ci return None; 61a078c8edSopenharmony_ci }, 62a078c8edSopenharmony_ci '\'' => if let Err(()) = self.parse_single(&mut result) { 63a078c8edSopenharmony_ci self.had_error = true; 64a078c8edSopenharmony_ci return None; 65a078c8edSopenharmony_ci }, 66a078c8edSopenharmony_ci '\\' => if let Some(ch2) = self.next_char() { 67a078c8edSopenharmony_ci if ch2 != '\n' as u8 { result.push(ch2); } 68a078c8edSopenharmony_ci } else { 69a078c8edSopenharmony_ci self.had_error = true; 70a078c8edSopenharmony_ci return None; 71a078c8edSopenharmony_ci }, 72a078c8edSopenharmony_ci ' ' | '\t' | '\n' => { break; }, 73a078c8edSopenharmony_ci _ => { result.push(ch as u8); }, 74a078c8edSopenharmony_ci } 75a078c8edSopenharmony_ci if let Some(ch2) = self.next_char() { ch = ch2; } else { break; } 76a078c8edSopenharmony_ci } 77a078c8edSopenharmony_ci unsafe { Some(String::from_utf8_unchecked(result)) } 78a078c8edSopenharmony_ci } 79a078c8edSopenharmony_ci 80a078c8edSopenharmony_ci fn parse_double(&mut self, result: &mut Vec<u8>) -> Result<(), ()> { 81a078c8edSopenharmony_ci loop { 82a078c8edSopenharmony_ci if let Some(ch2) = self.next_char() { 83a078c8edSopenharmony_ci match ch2 as char { 84a078c8edSopenharmony_ci '\\' => { 85a078c8edSopenharmony_ci if let Some(ch3) = self.next_char() { 86a078c8edSopenharmony_ci match ch3 as char { 87a078c8edSopenharmony_ci // \$ => $ 88a078c8edSopenharmony_ci '$' | '`' | '"' | '\\' => { result.push(ch3); }, 89a078c8edSopenharmony_ci // \<newline> => nothing 90a078c8edSopenharmony_ci '\n' => {}, 91a078c8edSopenharmony_ci // \x => =x 92a078c8edSopenharmony_ci _ => { result.push('\\' as u8); result.push(ch3); } 93a078c8edSopenharmony_ci } 94a078c8edSopenharmony_ci } else { 95a078c8edSopenharmony_ci return Err(()); 96a078c8edSopenharmony_ci } 97a078c8edSopenharmony_ci }, 98a078c8edSopenharmony_ci '"' => { return Ok(()); }, 99a078c8edSopenharmony_ci _ => { result.push(ch2); }, 100a078c8edSopenharmony_ci } 101a078c8edSopenharmony_ci } else { 102a078c8edSopenharmony_ci return Err(()); 103a078c8edSopenharmony_ci } 104a078c8edSopenharmony_ci } 105a078c8edSopenharmony_ci } 106a078c8edSopenharmony_ci 107a078c8edSopenharmony_ci fn parse_single(&mut self, result: &mut Vec<u8>) -> Result<(), ()> { 108a078c8edSopenharmony_ci loop { 109a078c8edSopenharmony_ci if let Some(ch2) = self.next_char() { 110a078c8edSopenharmony_ci match ch2 as char { 111a078c8edSopenharmony_ci '\'' => { return Ok(()); }, 112a078c8edSopenharmony_ci _ => { result.push(ch2); }, 113a078c8edSopenharmony_ci } 114a078c8edSopenharmony_ci } else { 115a078c8edSopenharmony_ci return Err(()); 116a078c8edSopenharmony_ci } 117a078c8edSopenharmony_ci } 118a078c8edSopenharmony_ci } 119a078c8edSopenharmony_ci 120a078c8edSopenharmony_ci fn next_char(&mut self) -> Option<u8> { 121a078c8edSopenharmony_ci let res = self.in_iter.next(); 122a078c8edSopenharmony_ci if res == Some('\n' as u8) { self.line_no += 1; } 123a078c8edSopenharmony_ci res 124a078c8edSopenharmony_ci } 125a078c8edSopenharmony_ci} 126a078c8edSopenharmony_ci 127a078c8edSopenharmony_ciimpl<'a> Iterator for Shlex<'a> { 128a078c8edSopenharmony_ci type Item = String; 129a078c8edSopenharmony_ci fn next(&mut self) -> Option<String> { 130a078c8edSopenharmony_ci if let Some(mut ch) = self.next_char() { 131a078c8edSopenharmony_ci // skip initial whitespace 132a078c8edSopenharmony_ci loop { 133a078c8edSopenharmony_ci match ch as char { 134a078c8edSopenharmony_ci ' ' | '\t' | '\n' => {}, 135a078c8edSopenharmony_ci '#' => { 136a078c8edSopenharmony_ci while let Some(ch2) = self.next_char() { 137a078c8edSopenharmony_ci if ch2 as char == '\n' { break; } 138a078c8edSopenharmony_ci } 139a078c8edSopenharmony_ci }, 140a078c8edSopenharmony_ci _ => { break; } 141a078c8edSopenharmony_ci } 142a078c8edSopenharmony_ci if let Some(ch2) = self.next_char() { ch = ch2; } else { return None; } 143a078c8edSopenharmony_ci } 144a078c8edSopenharmony_ci self.parse_word(ch) 145a078c8edSopenharmony_ci } else { // no initial character 146a078c8edSopenharmony_ci None 147a078c8edSopenharmony_ci } 148a078c8edSopenharmony_ci } 149a078c8edSopenharmony_ci 150a078c8edSopenharmony_ci} 151a078c8edSopenharmony_ci 152a078c8edSopenharmony_ci/// Convenience function that consumes the whole string at once. Returns None if the input was 153a078c8edSopenharmony_ci/// erroneous. 154a078c8edSopenharmony_cipub fn split(in_str: &str) -> Option<Vec<String>> { 155a078c8edSopenharmony_ci let mut shl = Shlex::new(in_str); 156a078c8edSopenharmony_ci let res = shl.by_ref().collect(); 157a078c8edSopenharmony_ci if shl.had_error { None } else { Some(res) } 158a078c8edSopenharmony_ci} 159a078c8edSopenharmony_ci 160a078c8edSopenharmony_ci/// Given a single word, return a string suitable to encode it as a shell argument. 161a078c8edSopenharmony_cipub fn quote(in_str: &str) -> Cow<str> { 162a078c8edSopenharmony_ci if in_str.len() == 0 { 163a078c8edSopenharmony_ci "\"\"".into() 164a078c8edSopenharmony_ci } else if in_str.bytes().any(|c| match c as char { 165a078c8edSopenharmony_ci '|' | '&' | ';' | '<' | '>' | '(' | ')' | '$' | '`' | '\\' | '"' | '\'' | ' ' | '\t' | 166a078c8edSopenharmony_ci '\r' | '\n' | '*' | '?' | '[' | '#' | '~' | '=' | '%' => true, 167a078c8edSopenharmony_ci _ => false 168a078c8edSopenharmony_ci }) { 169a078c8edSopenharmony_ci let mut out: Vec<u8> = Vec::new(); 170a078c8edSopenharmony_ci out.push('"' as u8); 171a078c8edSopenharmony_ci for c in in_str.bytes() { 172a078c8edSopenharmony_ci match c as char { 173a078c8edSopenharmony_ci '$' | '`' | '"' | '\\' => out.push('\\' as u8), 174a078c8edSopenharmony_ci _ => () 175a078c8edSopenharmony_ci } 176a078c8edSopenharmony_ci out.push(c); 177a078c8edSopenharmony_ci } 178a078c8edSopenharmony_ci out.push('"' as u8); 179a078c8edSopenharmony_ci unsafe { String::from_utf8_unchecked(out) }.into() 180a078c8edSopenharmony_ci } else { 181a078c8edSopenharmony_ci in_str.into() 182a078c8edSopenharmony_ci } 183a078c8edSopenharmony_ci} 184a078c8edSopenharmony_ci 185a078c8edSopenharmony_ci/// Convenience function that consumes an iterable of words and turns it into a single string, 186a078c8edSopenharmony_ci/// quoting words when necessary. Consecutive words will be separated by a single space. 187a078c8edSopenharmony_cipub fn join<'a, I: IntoIterator<Item = &'a str>>(words: I) -> String { 188a078c8edSopenharmony_ci words.into_iter() 189a078c8edSopenharmony_ci .map(quote) 190a078c8edSopenharmony_ci .collect::<Vec<_>>() 191a078c8edSopenharmony_ci .join(" ") 192a078c8edSopenharmony_ci} 193a078c8edSopenharmony_ci 194a078c8edSopenharmony_ci#[cfg(test)] 195a078c8edSopenharmony_cistatic SPLIT_TEST_ITEMS: &'static [(&'static str, Option<&'static [&'static str]>)] = &[ 196a078c8edSopenharmony_ci ("foo$baz", Some(&["foo$baz"])), 197a078c8edSopenharmony_ci ("foo baz", Some(&["foo", "baz"])), 198a078c8edSopenharmony_ci ("foo\"bar\"baz", Some(&["foobarbaz"])), 199a078c8edSopenharmony_ci ("foo \"bar\"baz", Some(&["foo", "barbaz"])), 200a078c8edSopenharmony_ci (" foo \nbar", Some(&["foo", "bar"])), 201a078c8edSopenharmony_ci ("foo\\\nbar", Some(&["foobar"])), 202a078c8edSopenharmony_ci ("\"foo\\\nbar\"", Some(&["foobar"])), 203a078c8edSopenharmony_ci ("'baz\\$b'", Some(&["baz\\$b"])), 204a078c8edSopenharmony_ci ("'baz\\\''", None), 205a078c8edSopenharmony_ci ("\\", None), 206a078c8edSopenharmony_ci ("\"\\", None), 207a078c8edSopenharmony_ci ("'\\", None), 208a078c8edSopenharmony_ci ("\"", None), 209a078c8edSopenharmony_ci ("'", None), 210a078c8edSopenharmony_ci ("foo #bar\nbaz", Some(&["foo", "baz"])), 211a078c8edSopenharmony_ci ("foo #bar", Some(&["foo"])), 212a078c8edSopenharmony_ci ("foo#bar", Some(&["foo#bar"])), 213a078c8edSopenharmony_ci ("foo\"#bar", None), 214a078c8edSopenharmony_ci ("'\\n'", Some(&["\\n"])), 215a078c8edSopenharmony_ci ("'\\\\n'", Some(&["\\\\n"])), 216a078c8edSopenharmony_ci]; 217a078c8edSopenharmony_ci 218a078c8edSopenharmony_ci#[test] 219a078c8edSopenharmony_cifn test_split() { 220a078c8edSopenharmony_ci for &(input, output) in SPLIT_TEST_ITEMS { 221a078c8edSopenharmony_ci assert_eq!(split(input), output.map(|o| o.iter().map(|&x| x.to_owned()).collect())); 222a078c8edSopenharmony_ci } 223a078c8edSopenharmony_ci} 224a078c8edSopenharmony_ci 225a078c8edSopenharmony_ci#[test] 226a078c8edSopenharmony_cifn test_lineno() { 227a078c8edSopenharmony_ci let mut sh = Shlex::new("\nfoo\nbar"); 228a078c8edSopenharmony_ci while let Some(word) = sh.next() { 229a078c8edSopenharmony_ci if word == "bar" { 230a078c8edSopenharmony_ci assert_eq!(sh.line_no, 3); 231a078c8edSopenharmony_ci } 232a078c8edSopenharmony_ci } 233a078c8edSopenharmony_ci} 234a078c8edSopenharmony_ci 235a078c8edSopenharmony_ci#[test] 236a078c8edSopenharmony_cifn test_quote() { 237a078c8edSopenharmony_ci assert_eq!(quote("foobar"), "foobar"); 238a078c8edSopenharmony_ci assert_eq!(quote("foo bar"), "\"foo bar\""); 239a078c8edSopenharmony_ci assert_eq!(quote("\""), "\"\\\"\""); 240a078c8edSopenharmony_ci assert_eq!(quote(""), "\"\""); 241a078c8edSopenharmony_ci} 242a078c8edSopenharmony_ci 243a078c8edSopenharmony_ci#[test] 244a078c8edSopenharmony_cifn test_join() { 245a078c8edSopenharmony_ci assert_eq!(join(vec![]), ""); 246a078c8edSopenharmony_ci assert_eq!(join(vec![""]), "\"\""); 247a078c8edSopenharmony_ci assert_eq!(join(vec!["a", "b"]), "a b"); 248a078c8edSopenharmony_ci assert_eq!(join(vec!["foo bar", "baz"]), "\"foo bar\" baz"); 249a078c8edSopenharmony_ci} 250