1a078c8edSopenharmony_ci// Copyright 2015 Nicholas Allegra (comex).
2a078c8edSopenharmony_ci// Licensed under the Apache License, Version 2.0 <https://www.apache.org/licenses/LICENSE-2.0> or
3a078c8edSopenharmony_ci// the MIT license <https://opensource.org/licenses/MIT>, at your option. This file may not be
4a078c8edSopenharmony_ci// copied, modified, or distributed except according to those terms.
5a078c8edSopenharmony_ci
6a078c8edSopenharmony_ci//! Same idea as (but implementation not directly based on) the Python shlex module.  However, this
7a078c8edSopenharmony_ci//! implementation does not support any of the Python module's customization because it makes
8a078c8edSopenharmony_ci//! parsing slower and is fairly useless.  You only get the default settings of shlex.split, which
9a078c8edSopenharmony_ci//! mimic the POSIX shell:
10a078c8edSopenharmony_ci//! <https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html>
11a078c8edSopenharmony_ci//!
12a078c8edSopenharmony_ci//! This implementation also deviates from the Python version in not treating `\r` specially, which
13a078c8edSopenharmony_ci//! I believe is more compliant.
14a078c8edSopenharmony_ci//!
15a078c8edSopenharmony_ci//! The algorithms in this crate are oblivious to UTF-8 high bytes, so they iterate over the bytes
16a078c8edSopenharmony_ci//! directly as a micro-optimization.
17a078c8edSopenharmony_ci//!
18a078c8edSopenharmony_ci//! Disabling the `std` feature (which is enabled by default) will allow the crate to work in
19a078c8edSopenharmony_ci//! `no_std` environments, where the `alloc` crate, and a global allocator, are available.
20a078c8edSopenharmony_ci
21a078c8edSopenharmony_ci#![cfg_attr(not(feature = "std"), no_std)]
22a078c8edSopenharmony_ci
23a078c8edSopenharmony_ciextern crate alloc;
24a078c8edSopenharmony_ciuse alloc::vec::Vec;
25a078c8edSopenharmony_ciuse alloc::borrow::Cow;
26a078c8edSopenharmony_ciuse alloc::string::String;
27a078c8edSopenharmony_ci#[cfg(test)]
28a078c8edSopenharmony_ciuse alloc::vec;
29a078c8edSopenharmony_ci#[cfg(test)]
30a078c8edSopenharmony_ciuse alloc::borrow::ToOwned;
31a078c8edSopenharmony_ci
32a078c8edSopenharmony_ci/// An iterator that takes an input string and splits it into the words using the same syntax as
33a078c8edSopenharmony_ci/// the POSIX shell.
34a078c8edSopenharmony_cipub struct Shlex<'a> {
35a078c8edSopenharmony_ci    in_iter: core::str::Bytes<'a>,
36a078c8edSopenharmony_ci    /// The number of newlines read so far, plus one.
37a078c8edSopenharmony_ci    pub line_no: usize,
38a078c8edSopenharmony_ci    /// An input string is erroneous if it ends while inside a quotation or right after an
39a078c8edSopenharmony_ci    /// unescaped backslash.  Since Iterator does not have a mechanism to return an error, if that
40a078c8edSopenharmony_ci    /// happens, Shlex just throws out the last token, ends the iteration, and sets 'had_error' to
41a078c8edSopenharmony_ci    /// true; best to check it after you're done iterating.
42a078c8edSopenharmony_ci    pub had_error: bool,
43a078c8edSopenharmony_ci}
44a078c8edSopenharmony_ci
45a078c8edSopenharmony_ciimpl<'a> Shlex<'a> {
46a078c8edSopenharmony_ci    pub fn new(in_str: &'a str) -> Self {
47a078c8edSopenharmony_ci        Shlex {
48a078c8edSopenharmony_ci            in_iter: in_str.bytes(),
49a078c8edSopenharmony_ci            line_no: 1,
50a078c8edSopenharmony_ci            had_error: false,
51a078c8edSopenharmony_ci        }
52a078c8edSopenharmony_ci    }
53a078c8edSopenharmony_ci
54a078c8edSopenharmony_ci    fn parse_word(&mut self, mut ch: u8) -> Option<String> {
55a078c8edSopenharmony_ci        let mut result: Vec<u8> = Vec::new();
56a078c8edSopenharmony_ci        loop {
57a078c8edSopenharmony_ci            match ch as char {
58a078c8edSopenharmony_ci                '"' => if let Err(()) = self.parse_double(&mut result) {
59a078c8edSopenharmony_ci                    self.had_error = true;
60a078c8edSopenharmony_ci                    return None;
61a078c8edSopenharmony_ci                },
62a078c8edSopenharmony_ci                '\'' => if let Err(()) = self.parse_single(&mut result) {
63a078c8edSopenharmony_ci                    self.had_error = true;
64a078c8edSopenharmony_ci                    return None;
65a078c8edSopenharmony_ci                },
66a078c8edSopenharmony_ci                '\\' => if let Some(ch2) = self.next_char() {
67a078c8edSopenharmony_ci                    if ch2 != '\n' as u8 { result.push(ch2); }
68a078c8edSopenharmony_ci                } else {
69a078c8edSopenharmony_ci                    self.had_error = true;
70a078c8edSopenharmony_ci                    return None;
71a078c8edSopenharmony_ci                },
72a078c8edSopenharmony_ci                ' ' | '\t' | '\n' => { break; },
73a078c8edSopenharmony_ci                _ => { result.push(ch as u8); },
74a078c8edSopenharmony_ci            }
75a078c8edSopenharmony_ci            if let Some(ch2) = self.next_char() { ch = ch2; } else { break; }
76a078c8edSopenharmony_ci        }
77a078c8edSopenharmony_ci        unsafe { Some(String::from_utf8_unchecked(result)) }
78a078c8edSopenharmony_ci    }
79a078c8edSopenharmony_ci
80a078c8edSopenharmony_ci    fn parse_double(&mut self, result: &mut Vec<u8>) -> Result<(), ()> {
81a078c8edSopenharmony_ci        loop {
82a078c8edSopenharmony_ci            if let Some(ch2) = self.next_char() {
83a078c8edSopenharmony_ci                match ch2 as char {
84a078c8edSopenharmony_ci                    '\\' => {
85a078c8edSopenharmony_ci                        if let Some(ch3) = self.next_char() {
86a078c8edSopenharmony_ci                            match ch3 as char {
87a078c8edSopenharmony_ci                                // \$ => $
88a078c8edSopenharmony_ci                                '$' | '`' | '"' | '\\' => { result.push(ch3); },
89a078c8edSopenharmony_ci                                // \<newline> => nothing
90a078c8edSopenharmony_ci                                '\n' => {},
91a078c8edSopenharmony_ci                                // \x => =x
92a078c8edSopenharmony_ci                                _ => { result.push('\\' as u8); result.push(ch3); }
93a078c8edSopenharmony_ci                            }
94a078c8edSopenharmony_ci                        } else {
95a078c8edSopenharmony_ci                            return Err(());
96a078c8edSopenharmony_ci                        }
97a078c8edSopenharmony_ci                    },
98a078c8edSopenharmony_ci                    '"' => { return Ok(()); },
99a078c8edSopenharmony_ci                    _ => { result.push(ch2); },
100a078c8edSopenharmony_ci                }
101a078c8edSopenharmony_ci            } else {
102a078c8edSopenharmony_ci                return Err(());
103a078c8edSopenharmony_ci            }
104a078c8edSopenharmony_ci        }
105a078c8edSopenharmony_ci    }
106a078c8edSopenharmony_ci
107a078c8edSopenharmony_ci    fn parse_single(&mut self, result: &mut Vec<u8>) -> Result<(), ()> {
108a078c8edSopenharmony_ci        loop {
109a078c8edSopenharmony_ci            if let Some(ch2) = self.next_char() {
110a078c8edSopenharmony_ci                match ch2 as char {
111a078c8edSopenharmony_ci                    '\'' => { return Ok(()); },
112a078c8edSopenharmony_ci                    _ => { result.push(ch2); },
113a078c8edSopenharmony_ci                }
114a078c8edSopenharmony_ci            } else {
115a078c8edSopenharmony_ci                return Err(());
116a078c8edSopenharmony_ci            }
117a078c8edSopenharmony_ci        }
118a078c8edSopenharmony_ci    }
119a078c8edSopenharmony_ci
120a078c8edSopenharmony_ci    fn next_char(&mut self) -> Option<u8> {
121a078c8edSopenharmony_ci        let res = self.in_iter.next();
122a078c8edSopenharmony_ci        if res == Some('\n' as u8) { self.line_no += 1; }
123a078c8edSopenharmony_ci        res
124a078c8edSopenharmony_ci    }
125a078c8edSopenharmony_ci}
126a078c8edSopenharmony_ci
127a078c8edSopenharmony_ciimpl<'a> Iterator for Shlex<'a> {
128a078c8edSopenharmony_ci    type Item = String;
129a078c8edSopenharmony_ci    fn next(&mut self) -> Option<String> {
130a078c8edSopenharmony_ci        if let Some(mut ch) = self.next_char() {
131a078c8edSopenharmony_ci            // skip initial whitespace
132a078c8edSopenharmony_ci            loop {
133a078c8edSopenharmony_ci                match ch as char {
134a078c8edSopenharmony_ci                    ' ' | '\t' | '\n' => {},
135a078c8edSopenharmony_ci                    '#' => {
136a078c8edSopenharmony_ci                        while let Some(ch2) = self.next_char() {
137a078c8edSopenharmony_ci                            if ch2 as char == '\n' { break; }
138a078c8edSopenharmony_ci                        }
139a078c8edSopenharmony_ci                    },
140a078c8edSopenharmony_ci                    _ => { break; }
141a078c8edSopenharmony_ci                }
142a078c8edSopenharmony_ci                if let Some(ch2) = self.next_char() { ch = ch2; } else { return None; }
143a078c8edSopenharmony_ci            }
144a078c8edSopenharmony_ci            self.parse_word(ch)
145a078c8edSopenharmony_ci        } else { // no initial character
146a078c8edSopenharmony_ci            None
147a078c8edSopenharmony_ci        }
148a078c8edSopenharmony_ci    }
149a078c8edSopenharmony_ci
150a078c8edSopenharmony_ci}
151a078c8edSopenharmony_ci
152a078c8edSopenharmony_ci/// Convenience function that consumes the whole string at once.  Returns None if the input was
153a078c8edSopenharmony_ci/// erroneous.
154a078c8edSopenharmony_cipub fn split(in_str: &str) -> Option<Vec<String>> {
155a078c8edSopenharmony_ci    let mut shl = Shlex::new(in_str);
156a078c8edSopenharmony_ci    let res = shl.by_ref().collect();
157a078c8edSopenharmony_ci    if shl.had_error { None } else { Some(res) }
158a078c8edSopenharmony_ci}
159a078c8edSopenharmony_ci
160a078c8edSopenharmony_ci/// Given a single word, return a string suitable to encode it as a shell argument.
161a078c8edSopenharmony_cipub fn quote(in_str: &str) -> Cow<str> {
162a078c8edSopenharmony_ci    if in_str.len() == 0 {
163a078c8edSopenharmony_ci        "\"\"".into()
164a078c8edSopenharmony_ci    } else if in_str.bytes().any(|c| match c as char {
165a078c8edSopenharmony_ci        '|' | '&' | ';' | '<' | '>' | '(' | ')' | '$' | '`' | '\\' | '"' | '\'' | ' ' | '\t' |
166a078c8edSopenharmony_ci        '\r' | '\n' | '*' | '?' | '[' | '#' | '~' | '=' | '%' => true,
167a078c8edSopenharmony_ci        _ => false
168a078c8edSopenharmony_ci    }) {
169a078c8edSopenharmony_ci        let mut out: Vec<u8> = Vec::new();
170a078c8edSopenharmony_ci        out.push('"' as u8);
171a078c8edSopenharmony_ci        for c in in_str.bytes() {
172a078c8edSopenharmony_ci            match c as char {
173a078c8edSopenharmony_ci                '$' | '`' | '"' | '\\' => out.push('\\' as u8),
174a078c8edSopenharmony_ci                _ => ()
175a078c8edSopenharmony_ci            }
176a078c8edSopenharmony_ci            out.push(c);
177a078c8edSopenharmony_ci        }
178a078c8edSopenharmony_ci        out.push('"' as u8);
179a078c8edSopenharmony_ci        unsafe { String::from_utf8_unchecked(out) }.into()
180a078c8edSopenharmony_ci    } else {
181a078c8edSopenharmony_ci        in_str.into()
182a078c8edSopenharmony_ci    }
183a078c8edSopenharmony_ci}
184a078c8edSopenharmony_ci
185a078c8edSopenharmony_ci/// Convenience function that consumes an iterable of words and turns it into a single string,
186a078c8edSopenharmony_ci/// quoting words when necessary. Consecutive words will be separated by a single space.
187a078c8edSopenharmony_cipub fn join<'a, I: IntoIterator<Item = &'a str>>(words: I) -> String {
188a078c8edSopenharmony_ci    words.into_iter()
189a078c8edSopenharmony_ci        .map(quote)
190a078c8edSopenharmony_ci        .collect::<Vec<_>>()
191a078c8edSopenharmony_ci        .join(" ")
192a078c8edSopenharmony_ci}
193a078c8edSopenharmony_ci
194a078c8edSopenharmony_ci#[cfg(test)]
195a078c8edSopenharmony_cistatic SPLIT_TEST_ITEMS: &'static [(&'static str, Option<&'static [&'static str]>)] = &[
196a078c8edSopenharmony_ci    ("foo$baz", Some(&["foo$baz"])),
197a078c8edSopenharmony_ci    ("foo baz", Some(&["foo", "baz"])),
198a078c8edSopenharmony_ci    ("foo\"bar\"baz", Some(&["foobarbaz"])),
199a078c8edSopenharmony_ci    ("foo \"bar\"baz", Some(&["foo", "barbaz"])),
200a078c8edSopenharmony_ci    ("   foo \nbar", Some(&["foo", "bar"])),
201a078c8edSopenharmony_ci    ("foo\\\nbar", Some(&["foobar"])),
202a078c8edSopenharmony_ci    ("\"foo\\\nbar\"", Some(&["foobar"])),
203a078c8edSopenharmony_ci    ("'baz\\$b'", Some(&["baz\\$b"])),
204a078c8edSopenharmony_ci    ("'baz\\\''", None),
205a078c8edSopenharmony_ci    ("\\", None),
206a078c8edSopenharmony_ci    ("\"\\", None),
207a078c8edSopenharmony_ci    ("'\\", None),
208a078c8edSopenharmony_ci    ("\"", None),
209a078c8edSopenharmony_ci    ("'", None),
210a078c8edSopenharmony_ci    ("foo #bar\nbaz", Some(&["foo", "baz"])),
211a078c8edSopenharmony_ci    ("foo #bar", Some(&["foo"])),
212a078c8edSopenharmony_ci    ("foo#bar", Some(&["foo#bar"])),
213a078c8edSopenharmony_ci    ("foo\"#bar", None),
214a078c8edSopenharmony_ci    ("'\\n'", Some(&["\\n"])),
215a078c8edSopenharmony_ci    ("'\\\\n'", Some(&["\\\\n"])),
216a078c8edSopenharmony_ci];
217a078c8edSopenharmony_ci
218a078c8edSopenharmony_ci#[test]
219a078c8edSopenharmony_cifn test_split() {
220a078c8edSopenharmony_ci    for &(input, output) in SPLIT_TEST_ITEMS {
221a078c8edSopenharmony_ci        assert_eq!(split(input), output.map(|o| o.iter().map(|&x| x.to_owned()).collect()));
222a078c8edSopenharmony_ci    }
223a078c8edSopenharmony_ci}
224a078c8edSopenharmony_ci
225a078c8edSopenharmony_ci#[test]
226a078c8edSopenharmony_cifn test_lineno() {
227a078c8edSopenharmony_ci    let mut sh = Shlex::new("\nfoo\nbar");
228a078c8edSopenharmony_ci    while let Some(word) = sh.next() {
229a078c8edSopenharmony_ci        if word == "bar" {
230a078c8edSopenharmony_ci            assert_eq!(sh.line_no, 3);
231a078c8edSopenharmony_ci        }
232a078c8edSopenharmony_ci    }
233a078c8edSopenharmony_ci}
234a078c8edSopenharmony_ci
235a078c8edSopenharmony_ci#[test]
236a078c8edSopenharmony_cifn test_quote() {
237a078c8edSopenharmony_ci    assert_eq!(quote("foobar"), "foobar");
238a078c8edSopenharmony_ci    assert_eq!(quote("foo bar"), "\"foo bar\"");
239a078c8edSopenharmony_ci    assert_eq!(quote("\""), "\"\\\"\"");
240a078c8edSopenharmony_ci    assert_eq!(quote(""), "\"\"");
241a078c8edSopenharmony_ci}
242a078c8edSopenharmony_ci
243a078c8edSopenharmony_ci#[test]
244a078c8edSopenharmony_cifn test_join() {
245a078c8edSopenharmony_ci    assert_eq!(join(vec![]), "");
246a078c8edSopenharmony_ci    assert_eq!(join(vec![""]), "\"\"");
247a078c8edSopenharmony_ci    assert_eq!(join(vec!["a", "b"]), "a b");
248a078c8edSopenharmony_ci    assert_eq!(join(vec!["foo bar", "baz"]), "\"foo bar\" baz");
249a078c8edSopenharmony_ci}
250