xref: /third_party/rust/crates/shlex/src/lib.rs (revision a078c8ed)
1// Copyright 2015 Nicholas Allegra (comex).
2// Licensed under the Apache License, Version 2.0 <https://www.apache.org/licenses/LICENSE-2.0> or
3// the MIT license <https://opensource.org/licenses/MIT>, at your option. This file may not be
4// copied, modified, or distributed except according to those terms.
5
6//! Same idea as (but implementation not directly based on) the Python shlex module.  However, this
7//! implementation does not support any of the Python module's customization because it makes
8//! parsing slower and is fairly useless.  You only get the default settings of shlex.split, which
9//! mimic the POSIX shell:
10//! <https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html>
11//!
12//! This implementation also deviates from the Python version in not treating `\r` specially, which
13//! I believe is more compliant.
14//!
15//! The algorithms in this crate are oblivious to UTF-8 high bytes, so they iterate over the bytes
16//! directly as a micro-optimization.
17//!
18//! Disabling the `std` feature (which is enabled by default) will allow the crate to work in
19//! `no_std` environments, where the `alloc` crate, and a global allocator, are available.
20
21#![cfg_attr(not(feature = "std"), no_std)]
22
23extern crate alloc;
24use alloc::vec::Vec;
25use alloc::borrow::Cow;
26use alloc::string::String;
27#[cfg(test)]
28use alloc::vec;
29#[cfg(test)]
30use alloc::borrow::ToOwned;
31
32/// An iterator that takes an input string and splits it into the words using the same syntax as
33/// the POSIX shell.
34pub struct Shlex<'a> {
35    in_iter: core::str::Bytes<'a>,
36    /// The number of newlines read so far, plus one.
37    pub line_no: usize,
38    /// An input string is erroneous if it ends while inside a quotation or right after an
39    /// unescaped backslash.  Since Iterator does not have a mechanism to return an error, if that
40    /// happens, Shlex just throws out the last token, ends the iteration, and sets 'had_error' to
41    /// true; best to check it after you're done iterating.
42    pub had_error: bool,
43}
44
45impl<'a> Shlex<'a> {
46    pub fn new(in_str: &'a str) -> Self {
47        Shlex {
48            in_iter: in_str.bytes(),
49            line_no: 1,
50            had_error: false,
51        }
52    }
53
54    fn parse_word(&mut self, mut ch: u8) -> Option<String> {
55        let mut result: Vec<u8> = Vec::new();
56        loop {
57            match ch as char {
58                '"' => if let Err(()) = self.parse_double(&mut result) {
59                    self.had_error = true;
60                    return None;
61                },
62                '\'' => if let Err(()) = self.parse_single(&mut result) {
63                    self.had_error = true;
64                    return None;
65                },
66                '\\' => if let Some(ch2) = self.next_char() {
67                    if ch2 != '\n' as u8 { result.push(ch2); }
68                } else {
69                    self.had_error = true;
70                    return None;
71                },
72                ' ' | '\t' | '\n' => { break; },
73                _ => { result.push(ch as u8); },
74            }
75            if let Some(ch2) = self.next_char() { ch = ch2; } else { break; }
76        }
77        unsafe { Some(String::from_utf8_unchecked(result)) }
78    }
79
80    fn parse_double(&mut self, result: &mut Vec<u8>) -> Result<(), ()> {
81        loop {
82            if let Some(ch2) = self.next_char() {
83                match ch2 as char {
84                    '\\' => {
85                        if let Some(ch3) = self.next_char() {
86                            match ch3 as char {
87                                // \$ => $
88                                '$' | '`' | '"' | '\\' => { result.push(ch3); },
89                                // \<newline> => nothing
90                                '\n' => {},
91                                // \x => =x
92                                _ => { result.push('\\' as u8); result.push(ch3); }
93                            }
94                        } else {
95                            return Err(());
96                        }
97                    },
98                    '"' => { return Ok(()); },
99                    _ => { result.push(ch2); },
100                }
101            } else {
102                return Err(());
103            }
104        }
105    }
106
107    fn parse_single(&mut self, result: &mut Vec<u8>) -> Result<(), ()> {
108        loop {
109            if let Some(ch2) = self.next_char() {
110                match ch2 as char {
111                    '\'' => { return Ok(()); },
112                    _ => { result.push(ch2); },
113                }
114            } else {
115                return Err(());
116            }
117        }
118    }
119
120    fn next_char(&mut self) -> Option<u8> {
121        let res = self.in_iter.next();
122        if res == Some('\n' as u8) { self.line_no += 1; }
123        res
124    }
125}
126
127impl<'a> Iterator for Shlex<'a> {
128    type Item = String;
129    fn next(&mut self) -> Option<String> {
130        if let Some(mut ch) = self.next_char() {
131            // skip initial whitespace
132            loop {
133                match ch as char {
134                    ' ' | '\t' | '\n' => {},
135                    '#' => {
136                        while let Some(ch2) = self.next_char() {
137                            if ch2 as char == '\n' { break; }
138                        }
139                    },
140                    _ => { break; }
141                }
142                if let Some(ch2) = self.next_char() { ch = ch2; } else { return None; }
143            }
144            self.parse_word(ch)
145        } else { // no initial character
146            None
147        }
148    }
149
150}
151
152/// Convenience function that consumes the whole string at once.  Returns None if the input was
153/// erroneous.
154pub fn split(in_str: &str) -> Option<Vec<String>> {
155    let mut shl = Shlex::new(in_str);
156    let res = shl.by_ref().collect();
157    if shl.had_error { None } else { Some(res) }
158}
159
160/// Given a single word, return a string suitable to encode it as a shell argument.
161pub fn quote(in_str: &str) -> Cow<str> {
162    if in_str.len() == 0 {
163        "\"\"".into()
164    } else if in_str.bytes().any(|c| match c as char {
165        '|' | '&' | ';' | '<' | '>' | '(' | ')' | '$' | '`' | '\\' | '"' | '\'' | ' ' | '\t' |
166        '\r' | '\n' | '*' | '?' | '[' | '#' | '~' | '=' | '%' => true,
167        _ => false
168    }) {
169        let mut out: Vec<u8> = Vec::new();
170        out.push('"' as u8);
171        for c in in_str.bytes() {
172            match c as char {
173                '$' | '`' | '"' | '\\' => out.push('\\' as u8),
174                _ => ()
175            }
176            out.push(c);
177        }
178        out.push('"' as u8);
179        unsafe { String::from_utf8_unchecked(out) }.into()
180    } else {
181        in_str.into()
182    }
183}
184
185/// Convenience function that consumes an iterable of words and turns it into a single string,
186/// quoting words when necessary. Consecutive words will be separated by a single space.
187pub fn join<'a, I: IntoIterator<Item = &'a str>>(words: I) -> String {
188    words.into_iter()
189        .map(quote)
190        .collect::<Vec<_>>()
191        .join(" ")
192}
193
194#[cfg(test)]
195static SPLIT_TEST_ITEMS: &'static [(&'static str, Option<&'static [&'static str]>)] = &[
196    ("foo$baz", Some(&["foo$baz"])),
197    ("foo baz", Some(&["foo", "baz"])),
198    ("foo\"bar\"baz", Some(&["foobarbaz"])),
199    ("foo \"bar\"baz", Some(&["foo", "barbaz"])),
200    ("   foo \nbar", Some(&["foo", "bar"])),
201    ("foo\\\nbar", Some(&["foobar"])),
202    ("\"foo\\\nbar\"", Some(&["foobar"])),
203    ("'baz\\$b'", Some(&["baz\\$b"])),
204    ("'baz\\\''", None),
205    ("\\", None),
206    ("\"\\", None),
207    ("'\\", None),
208    ("\"", None),
209    ("'", None),
210    ("foo #bar\nbaz", Some(&["foo", "baz"])),
211    ("foo #bar", Some(&["foo"])),
212    ("foo#bar", Some(&["foo#bar"])),
213    ("foo\"#bar", None),
214    ("'\\n'", Some(&["\\n"])),
215    ("'\\\\n'", Some(&["\\\\n"])),
216];
217
218#[test]
219fn test_split() {
220    for &(input, output) in SPLIT_TEST_ITEMS {
221        assert_eq!(split(input), output.map(|o| o.iter().map(|&x| x.to_owned()).collect()));
222    }
223}
224
225#[test]
226fn test_lineno() {
227    let mut sh = Shlex::new("\nfoo\nbar");
228    while let Some(word) = sh.next() {
229        if word == "bar" {
230            assert_eq!(sh.line_no, 3);
231        }
232    }
233}
234
235#[test]
236fn test_quote() {
237    assert_eq!(quote("foobar"), "foobar");
238    assert_eq!(quote("foo bar"), "\"foo bar\"");
239    assert_eq!(quote("\""), "\"\\\"\"");
240    assert_eq!(quote(""), "\"\"");
241}
242
243#[test]
244fn test_join() {
245    assert_eq!(join(vec![]), "");
246    assert_eq!(join(vec![""]), "\"\"");
247    assert_eq!(join(vec!["a", "b"]), "a b");
248    assert_eq!(join(vec!["foo bar", "baz"]), "\"foo bar\" baz");
249}
250