1// Copyright 2015 Nicholas Allegra (comex). 2// Licensed under the Apache License, Version 2.0 <https://www.apache.org/licenses/LICENSE-2.0> or 3// the MIT license <https://opensource.org/licenses/MIT>, at your option. This file may not be 4// copied, modified, or distributed except according to those terms. 5 6//! Same idea as (but implementation not directly based on) the Python shlex module. However, this 7//! implementation does not support any of the Python module's customization because it makes 8//! parsing slower and is fairly useless. You only get the default settings of shlex.split, which 9//! mimic the POSIX shell: 10//! <https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html> 11//! 12//! This implementation also deviates from the Python version in not treating `\r` specially, which 13//! I believe is more compliant. 14//! 15//! The algorithms in this crate are oblivious to UTF-8 high bytes, so they iterate over the bytes 16//! directly as a micro-optimization. 17//! 18//! Disabling the `std` feature (which is enabled by default) will allow the crate to work in 19//! `no_std` environments, where the `alloc` crate, and a global allocator, are available. 20 21#![cfg_attr(not(feature = "std"), no_std)] 22 23extern crate alloc; 24use alloc::vec::Vec; 25use alloc::borrow::Cow; 26use alloc::string::String; 27#[cfg(test)] 28use alloc::vec; 29#[cfg(test)] 30use alloc::borrow::ToOwned; 31 32/// An iterator that takes an input string and splits it into the words using the same syntax as 33/// the POSIX shell. 34pub struct Shlex<'a> { 35 in_iter: core::str::Bytes<'a>, 36 /// The number of newlines read so far, plus one. 37 pub line_no: usize, 38 /// An input string is erroneous if it ends while inside a quotation or right after an 39 /// unescaped backslash. Since Iterator does not have a mechanism to return an error, if that 40 /// happens, Shlex just throws out the last token, ends the iteration, and sets 'had_error' to 41 /// true; best to check it after you're done iterating. 42 pub had_error: bool, 43} 44 45impl<'a> Shlex<'a> { 46 pub fn new(in_str: &'a str) -> Self { 47 Shlex { 48 in_iter: in_str.bytes(), 49 line_no: 1, 50 had_error: false, 51 } 52 } 53 54 fn parse_word(&mut self, mut ch: u8) -> Option<String> { 55 let mut result: Vec<u8> = Vec::new(); 56 loop { 57 match ch as char { 58 '"' => if let Err(()) = self.parse_double(&mut result) { 59 self.had_error = true; 60 return None; 61 }, 62 '\'' => if let Err(()) = self.parse_single(&mut result) { 63 self.had_error = true; 64 return None; 65 }, 66 '\\' => if let Some(ch2) = self.next_char() { 67 if ch2 != '\n' as u8 { result.push(ch2); } 68 } else { 69 self.had_error = true; 70 return None; 71 }, 72 ' ' | '\t' | '\n' => { break; }, 73 _ => { result.push(ch as u8); }, 74 } 75 if let Some(ch2) = self.next_char() { ch = ch2; } else { break; } 76 } 77 unsafe { Some(String::from_utf8_unchecked(result)) } 78 } 79 80 fn parse_double(&mut self, result: &mut Vec<u8>) -> Result<(), ()> { 81 loop { 82 if let Some(ch2) = self.next_char() { 83 match ch2 as char { 84 '\\' => { 85 if let Some(ch3) = self.next_char() { 86 match ch3 as char { 87 // \$ => $ 88 '$' | '`' | '"' | '\\' => { result.push(ch3); }, 89 // \<newline> => nothing 90 '\n' => {}, 91 // \x => =x 92 _ => { result.push('\\' as u8); result.push(ch3); } 93 } 94 } else { 95 return Err(()); 96 } 97 }, 98 '"' => { return Ok(()); }, 99 _ => { result.push(ch2); }, 100 } 101 } else { 102 return Err(()); 103 } 104 } 105 } 106 107 fn parse_single(&mut self, result: &mut Vec<u8>) -> Result<(), ()> { 108 loop { 109 if let Some(ch2) = self.next_char() { 110 match ch2 as char { 111 '\'' => { return Ok(()); }, 112 _ => { result.push(ch2); }, 113 } 114 } else { 115 return Err(()); 116 } 117 } 118 } 119 120 fn next_char(&mut self) -> Option<u8> { 121 let res = self.in_iter.next(); 122 if res == Some('\n' as u8) { self.line_no += 1; } 123 res 124 } 125} 126 127impl<'a> Iterator for Shlex<'a> { 128 type Item = String; 129 fn next(&mut self) -> Option<String> { 130 if let Some(mut ch) = self.next_char() { 131 // skip initial whitespace 132 loop { 133 match ch as char { 134 ' ' | '\t' | '\n' => {}, 135 '#' => { 136 while let Some(ch2) = self.next_char() { 137 if ch2 as char == '\n' { break; } 138 } 139 }, 140 _ => { break; } 141 } 142 if let Some(ch2) = self.next_char() { ch = ch2; } else { return None; } 143 } 144 self.parse_word(ch) 145 } else { // no initial character 146 None 147 } 148 } 149 150} 151 152/// Convenience function that consumes the whole string at once. Returns None if the input was 153/// erroneous. 154pub fn split(in_str: &str) -> Option<Vec<String>> { 155 let mut shl = Shlex::new(in_str); 156 let res = shl.by_ref().collect(); 157 if shl.had_error { None } else { Some(res) } 158} 159 160/// Given a single word, return a string suitable to encode it as a shell argument. 161pub fn quote(in_str: &str) -> Cow<str> { 162 if in_str.len() == 0 { 163 "\"\"".into() 164 } else if in_str.bytes().any(|c| match c as char { 165 '|' | '&' | ';' | '<' | '>' | '(' | ')' | '$' | '`' | '\\' | '"' | '\'' | ' ' | '\t' | 166 '\r' | '\n' | '*' | '?' | '[' | '#' | '~' | '=' | '%' => true, 167 _ => false 168 }) { 169 let mut out: Vec<u8> = Vec::new(); 170 out.push('"' as u8); 171 for c in in_str.bytes() { 172 match c as char { 173 '$' | '`' | '"' | '\\' => out.push('\\' as u8), 174 _ => () 175 } 176 out.push(c); 177 } 178 out.push('"' as u8); 179 unsafe { String::from_utf8_unchecked(out) }.into() 180 } else { 181 in_str.into() 182 } 183} 184 185/// Convenience function that consumes an iterable of words and turns it into a single string, 186/// quoting words when necessary. Consecutive words will be separated by a single space. 187pub fn join<'a, I: IntoIterator<Item = &'a str>>(words: I) -> String { 188 words.into_iter() 189 .map(quote) 190 .collect::<Vec<_>>() 191 .join(" ") 192} 193 194#[cfg(test)] 195static SPLIT_TEST_ITEMS: &'static [(&'static str, Option<&'static [&'static str]>)] = &[ 196 ("foo$baz", Some(&["foo$baz"])), 197 ("foo baz", Some(&["foo", "baz"])), 198 ("foo\"bar\"baz", Some(&["foobarbaz"])), 199 ("foo \"bar\"baz", Some(&["foo", "barbaz"])), 200 (" foo \nbar", Some(&["foo", "bar"])), 201 ("foo\\\nbar", Some(&["foobar"])), 202 ("\"foo\\\nbar\"", Some(&["foobar"])), 203 ("'baz\\$b'", Some(&["baz\\$b"])), 204 ("'baz\\\''", None), 205 ("\\", None), 206 ("\"\\", None), 207 ("'\\", None), 208 ("\"", None), 209 ("'", None), 210 ("foo #bar\nbaz", Some(&["foo", "baz"])), 211 ("foo #bar", Some(&["foo"])), 212 ("foo#bar", Some(&["foo#bar"])), 213 ("foo\"#bar", None), 214 ("'\\n'", Some(&["\\n"])), 215 ("'\\\\n'", Some(&["\\\\n"])), 216]; 217 218#[test] 219fn test_split() { 220 for &(input, output) in SPLIT_TEST_ITEMS { 221 assert_eq!(split(input), output.map(|o| o.iter().map(|&x| x.to_owned()).collect())); 222 } 223} 224 225#[test] 226fn test_lineno() { 227 let mut sh = Shlex::new("\nfoo\nbar"); 228 while let Some(word) = sh.next() { 229 if word == "bar" { 230 assert_eq!(sh.line_no, 3); 231 } 232 } 233} 234 235#[test] 236fn test_quote() { 237 assert_eq!(quote("foobar"), "foobar"); 238 assert_eq!(quote("foo bar"), "\"foo bar\""); 239 assert_eq!(quote("\""), "\"\\\"\""); 240 assert_eq!(quote(""), "\"\""); 241} 242 243#[test] 244fn test_join() { 245 assert_eq!(join(vec![]), ""); 246 assert_eq!(join(vec![""]), "\"\""); 247 assert_eq!(join(vec!["a", "b"]), "a b"); 248 assert_eq!(join(vec!["foo bar", "baz"]), "\"foo bar\" baz"); 249} 250