17db96d56Sopenharmony_ci"""A lexical analyzer class for simple shell-like syntaxes."""
27db96d56Sopenharmony_ci
37db96d56Sopenharmony_ci# Module and documentation by Eric S. Raymond, 21 Dec 1998
47db96d56Sopenharmony_ci# Input stacking and error message cleanup added by ESR, March 2000
57db96d56Sopenharmony_ci# push_source() and pop_source() made explicit by ESR, January 2001.
67db96d56Sopenharmony_ci# Posix compliance, split(), string arguments, and
77db96d56Sopenharmony_ci# iterator interface by Gustavo Niemeyer, April 2003.
87db96d56Sopenharmony_ci# changes to tokenize more like Posix shells by Vinay Sajip, July 2016.
97db96d56Sopenharmony_ci
107db96d56Sopenharmony_ciimport os
117db96d56Sopenharmony_ciimport re
127db96d56Sopenharmony_ciimport sys
137db96d56Sopenharmony_cifrom collections import deque
147db96d56Sopenharmony_ci
157db96d56Sopenharmony_cifrom io import StringIO
167db96d56Sopenharmony_ci
177db96d56Sopenharmony_ci__all__ = ["shlex", "split", "quote", "join"]
187db96d56Sopenharmony_ci
197db96d56Sopenharmony_ciclass shlex:
207db96d56Sopenharmony_ci    "A lexical analyzer class for simple shell-like syntaxes."
217db96d56Sopenharmony_ci    def __init__(self, instream=None, infile=None, posix=False,
227db96d56Sopenharmony_ci                 punctuation_chars=False):
237db96d56Sopenharmony_ci        if isinstance(instream, str):
247db96d56Sopenharmony_ci            instream = StringIO(instream)
257db96d56Sopenharmony_ci        if instream is not None:
267db96d56Sopenharmony_ci            self.instream = instream
277db96d56Sopenharmony_ci            self.infile = infile
287db96d56Sopenharmony_ci        else:
297db96d56Sopenharmony_ci            self.instream = sys.stdin
307db96d56Sopenharmony_ci            self.infile = None
317db96d56Sopenharmony_ci        self.posix = posix
327db96d56Sopenharmony_ci        if posix:
337db96d56Sopenharmony_ci            self.eof = None
347db96d56Sopenharmony_ci        else:
357db96d56Sopenharmony_ci            self.eof = ''
367db96d56Sopenharmony_ci        self.commenters = '#'
377db96d56Sopenharmony_ci        self.wordchars = ('abcdfeghijklmnopqrstuvwxyz'
387db96d56Sopenharmony_ci                          'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_')
397db96d56Sopenharmony_ci        if self.posix:
407db96d56Sopenharmony_ci            self.wordchars += ('ßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ'
417db96d56Sopenharmony_ci                               'ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ')
427db96d56Sopenharmony_ci        self.whitespace = ' \t\r\n'
437db96d56Sopenharmony_ci        self.whitespace_split = False
447db96d56Sopenharmony_ci        self.quotes = '\'"'
457db96d56Sopenharmony_ci        self.escape = '\\'
467db96d56Sopenharmony_ci        self.escapedquotes = '"'
477db96d56Sopenharmony_ci        self.state = ' '
487db96d56Sopenharmony_ci        self.pushback = deque()
497db96d56Sopenharmony_ci        self.lineno = 1
507db96d56Sopenharmony_ci        self.debug = 0
517db96d56Sopenharmony_ci        self.token = ''
527db96d56Sopenharmony_ci        self.filestack = deque()
537db96d56Sopenharmony_ci        self.source = None
547db96d56Sopenharmony_ci        if not punctuation_chars:
557db96d56Sopenharmony_ci            punctuation_chars = ''
567db96d56Sopenharmony_ci        elif punctuation_chars is True:
577db96d56Sopenharmony_ci            punctuation_chars = '();<>|&'
587db96d56Sopenharmony_ci        self._punctuation_chars = punctuation_chars
597db96d56Sopenharmony_ci        if punctuation_chars:
607db96d56Sopenharmony_ci            # _pushback_chars is a push back queue used by lookahead logic
617db96d56Sopenharmony_ci            self._pushback_chars = deque()
627db96d56Sopenharmony_ci            # these chars added because allowed in file names, args, wildcards
637db96d56Sopenharmony_ci            self.wordchars += '~-./*?='
647db96d56Sopenharmony_ci            #remove any punctuation chars from wordchars
657db96d56Sopenharmony_ci            t = self.wordchars.maketrans(dict.fromkeys(punctuation_chars))
667db96d56Sopenharmony_ci            self.wordchars = self.wordchars.translate(t)
677db96d56Sopenharmony_ci
687db96d56Sopenharmony_ci    @property
697db96d56Sopenharmony_ci    def punctuation_chars(self):
707db96d56Sopenharmony_ci        return self._punctuation_chars
717db96d56Sopenharmony_ci
727db96d56Sopenharmony_ci    def push_token(self, tok):
737db96d56Sopenharmony_ci        "Push a token onto the stack popped by the get_token method"
747db96d56Sopenharmony_ci        if self.debug >= 1:
757db96d56Sopenharmony_ci            print("shlex: pushing token " + repr(tok))
767db96d56Sopenharmony_ci        self.pushback.appendleft(tok)
777db96d56Sopenharmony_ci
787db96d56Sopenharmony_ci    def push_source(self, newstream, newfile=None):
797db96d56Sopenharmony_ci        "Push an input source onto the lexer's input source stack."
807db96d56Sopenharmony_ci        if isinstance(newstream, str):
817db96d56Sopenharmony_ci            newstream = StringIO(newstream)
827db96d56Sopenharmony_ci        self.filestack.appendleft((self.infile, self.instream, self.lineno))
837db96d56Sopenharmony_ci        self.infile = newfile
847db96d56Sopenharmony_ci        self.instream = newstream
857db96d56Sopenharmony_ci        self.lineno = 1
867db96d56Sopenharmony_ci        if self.debug:
877db96d56Sopenharmony_ci            if newfile is not None:
887db96d56Sopenharmony_ci                print('shlex: pushing to file %s' % (self.infile,))
897db96d56Sopenharmony_ci            else:
907db96d56Sopenharmony_ci                print('shlex: pushing to stream %s' % (self.instream,))
917db96d56Sopenharmony_ci
927db96d56Sopenharmony_ci    def pop_source(self):
937db96d56Sopenharmony_ci        "Pop the input source stack."
947db96d56Sopenharmony_ci        self.instream.close()
957db96d56Sopenharmony_ci        (self.infile, self.instream, self.lineno) = self.filestack.popleft()
967db96d56Sopenharmony_ci        if self.debug:
977db96d56Sopenharmony_ci            print('shlex: popping to %s, line %d' \
987db96d56Sopenharmony_ci                  % (self.instream, self.lineno))
997db96d56Sopenharmony_ci        self.state = ' '
1007db96d56Sopenharmony_ci
1017db96d56Sopenharmony_ci    def get_token(self):
1027db96d56Sopenharmony_ci        "Get a token from the input stream (or from stack if it's nonempty)"
1037db96d56Sopenharmony_ci        if self.pushback:
1047db96d56Sopenharmony_ci            tok = self.pushback.popleft()
1057db96d56Sopenharmony_ci            if self.debug >= 1:
1067db96d56Sopenharmony_ci                print("shlex: popping token " + repr(tok))
1077db96d56Sopenharmony_ci            return tok
1087db96d56Sopenharmony_ci        # No pushback.  Get a token.
1097db96d56Sopenharmony_ci        raw = self.read_token()
1107db96d56Sopenharmony_ci        # Handle inclusions
1117db96d56Sopenharmony_ci        if self.source is not None:
1127db96d56Sopenharmony_ci            while raw == self.source:
1137db96d56Sopenharmony_ci                spec = self.sourcehook(self.read_token())
1147db96d56Sopenharmony_ci                if spec:
1157db96d56Sopenharmony_ci                    (newfile, newstream) = spec
1167db96d56Sopenharmony_ci                    self.push_source(newstream, newfile)
1177db96d56Sopenharmony_ci                raw = self.get_token()
1187db96d56Sopenharmony_ci        # Maybe we got EOF instead?
1197db96d56Sopenharmony_ci        while raw == self.eof:
1207db96d56Sopenharmony_ci            if not self.filestack:
1217db96d56Sopenharmony_ci                return self.eof
1227db96d56Sopenharmony_ci            else:
1237db96d56Sopenharmony_ci                self.pop_source()
1247db96d56Sopenharmony_ci                raw = self.get_token()
1257db96d56Sopenharmony_ci        # Neither inclusion nor EOF
1267db96d56Sopenharmony_ci        if self.debug >= 1:
1277db96d56Sopenharmony_ci            if raw != self.eof:
1287db96d56Sopenharmony_ci                print("shlex: token=" + repr(raw))
1297db96d56Sopenharmony_ci            else:
1307db96d56Sopenharmony_ci                print("shlex: token=EOF")
1317db96d56Sopenharmony_ci        return raw
1327db96d56Sopenharmony_ci
1337db96d56Sopenharmony_ci    def read_token(self):
1347db96d56Sopenharmony_ci        quoted = False
1357db96d56Sopenharmony_ci        escapedstate = ' '
1367db96d56Sopenharmony_ci        while True:
1377db96d56Sopenharmony_ci            if self.punctuation_chars and self._pushback_chars:
1387db96d56Sopenharmony_ci                nextchar = self._pushback_chars.pop()
1397db96d56Sopenharmony_ci            else:
1407db96d56Sopenharmony_ci                nextchar = self.instream.read(1)
1417db96d56Sopenharmony_ci            if nextchar == '\n':
1427db96d56Sopenharmony_ci                self.lineno += 1
1437db96d56Sopenharmony_ci            if self.debug >= 3:
1447db96d56Sopenharmony_ci                print("shlex: in state %r I see character: %r" % (self.state,
1457db96d56Sopenharmony_ci                                                                  nextchar))
1467db96d56Sopenharmony_ci            if self.state is None:
1477db96d56Sopenharmony_ci                self.token = ''        # past end of file
1487db96d56Sopenharmony_ci                break
1497db96d56Sopenharmony_ci            elif self.state == ' ':
1507db96d56Sopenharmony_ci                if not nextchar:
1517db96d56Sopenharmony_ci                    self.state = None  # end of file
1527db96d56Sopenharmony_ci                    break
1537db96d56Sopenharmony_ci                elif nextchar in self.whitespace:
1547db96d56Sopenharmony_ci                    if self.debug >= 2:
1557db96d56Sopenharmony_ci                        print("shlex: I see whitespace in whitespace state")
1567db96d56Sopenharmony_ci                    if self.token or (self.posix and quoted):
1577db96d56Sopenharmony_ci                        break   # emit current token
1587db96d56Sopenharmony_ci                    else:
1597db96d56Sopenharmony_ci                        continue
1607db96d56Sopenharmony_ci                elif nextchar in self.commenters:
1617db96d56Sopenharmony_ci                    self.instream.readline()
1627db96d56Sopenharmony_ci                    self.lineno += 1
1637db96d56Sopenharmony_ci                elif self.posix and nextchar in self.escape:
1647db96d56Sopenharmony_ci                    escapedstate = 'a'
1657db96d56Sopenharmony_ci                    self.state = nextchar
1667db96d56Sopenharmony_ci                elif nextchar in self.wordchars:
1677db96d56Sopenharmony_ci                    self.token = nextchar
1687db96d56Sopenharmony_ci                    self.state = 'a'
1697db96d56Sopenharmony_ci                elif nextchar in self.punctuation_chars:
1707db96d56Sopenharmony_ci                    self.token = nextchar
1717db96d56Sopenharmony_ci                    self.state = 'c'
1727db96d56Sopenharmony_ci                elif nextchar in self.quotes:
1737db96d56Sopenharmony_ci                    if not self.posix:
1747db96d56Sopenharmony_ci                        self.token = nextchar
1757db96d56Sopenharmony_ci                    self.state = nextchar
1767db96d56Sopenharmony_ci                elif self.whitespace_split:
1777db96d56Sopenharmony_ci                    self.token = nextchar
1787db96d56Sopenharmony_ci                    self.state = 'a'
1797db96d56Sopenharmony_ci                else:
1807db96d56Sopenharmony_ci                    self.token = nextchar
1817db96d56Sopenharmony_ci                    if self.token or (self.posix and quoted):
1827db96d56Sopenharmony_ci                        break   # emit current token
1837db96d56Sopenharmony_ci                    else:
1847db96d56Sopenharmony_ci                        continue
1857db96d56Sopenharmony_ci            elif self.state in self.quotes:
1867db96d56Sopenharmony_ci                quoted = True
1877db96d56Sopenharmony_ci                if not nextchar:      # end of file
1887db96d56Sopenharmony_ci                    if self.debug >= 2:
1897db96d56Sopenharmony_ci                        print("shlex: I see EOF in quotes state")
1907db96d56Sopenharmony_ci                    # XXX what error should be raised here?
1917db96d56Sopenharmony_ci                    raise ValueError("No closing quotation")
1927db96d56Sopenharmony_ci                if nextchar == self.state:
1937db96d56Sopenharmony_ci                    if not self.posix:
1947db96d56Sopenharmony_ci                        self.token += nextchar
1957db96d56Sopenharmony_ci                        self.state = ' '
1967db96d56Sopenharmony_ci                        break
1977db96d56Sopenharmony_ci                    else:
1987db96d56Sopenharmony_ci                        self.state = 'a'
1997db96d56Sopenharmony_ci                elif (self.posix and nextchar in self.escape and self.state
2007db96d56Sopenharmony_ci                      in self.escapedquotes):
2017db96d56Sopenharmony_ci                    escapedstate = self.state
2027db96d56Sopenharmony_ci                    self.state = nextchar
2037db96d56Sopenharmony_ci                else:
2047db96d56Sopenharmony_ci                    self.token += nextchar
2057db96d56Sopenharmony_ci            elif self.state in self.escape:
2067db96d56Sopenharmony_ci                if not nextchar:      # end of file
2077db96d56Sopenharmony_ci                    if self.debug >= 2:
2087db96d56Sopenharmony_ci                        print("shlex: I see EOF in escape state")
2097db96d56Sopenharmony_ci                    # XXX what error should be raised here?
2107db96d56Sopenharmony_ci                    raise ValueError("No escaped character")
2117db96d56Sopenharmony_ci                # In posix shells, only the quote itself or the escape
2127db96d56Sopenharmony_ci                # character may be escaped within quotes.
2137db96d56Sopenharmony_ci                if (escapedstate in self.quotes and
2147db96d56Sopenharmony_ci                        nextchar != self.state and nextchar != escapedstate):
2157db96d56Sopenharmony_ci                    self.token += self.state
2167db96d56Sopenharmony_ci                self.token += nextchar
2177db96d56Sopenharmony_ci                self.state = escapedstate
2187db96d56Sopenharmony_ci            elif self.state in ('a', 'c'):
2197db96d56Sopenharmony_ci                if not nextchar:
2207db96d56Sopenharmony_ci                    self.state = None   # end of file
2217db96d56Sopenharmony_ci                    break
2227db96d56Sopenharmony_ci                elif nextchar in self.whitespace:
2237db96d56Sopenharmony_ci                    if self.debug >= 2:
2247db96d56Sopenharmony_ci                        print("shlex: I see whitespace in word state")
2257db96d56Sopenharmony_ci                    self.state = ' '
2267db96d56Sopenharmony_ci                    if self.token or (self.posix and quoted):
2277db96d56Sopenharmony_ci                        break   # emit current token
2287db96d56Sopenharmony_ci                    else:
2297db96d56Sopenharmony_ci                        continue
2307db96d56Sopenharmony_ci                elif nextchar in self.commenters:
2317db96d56Sopenharmony_ci                    self.instream.readline()
2327db96d56Sopenharmony_ci                    self.lineno += 1
2337db96d56Sopenharmony_ci                    if self.posix:
2347db96d56Sopenharmony_ci                        self.state = ' '
2357db96d56Sopenharmony_ci                        if self.token or (self.posix and quoted):
2367db96d56Sopenharmony_ci                            break   # emit current token
2377db96d56Sopenharmony_ci                        else:
2387db96d56Sopenharmony_ci                            continue
2397db96d56Sopenharmony_ci                elif self.state == 'c':
2407db96d56Sopenharmony_ci                    if nextchar in self.punctuation_chars:
2417db96d56Sopenharmony_ci                        self.token += nextchar
2427db96d56Sopenharmony_ci                    else:
2437db96d56Sopenharmony_ci                        if nextchar not in self.whitespace:
2447db96d56Sopenharmony_ci                            self._pushback_chars.append(nextchar)
2457db96d56Sopenharmony_ci                        self.state = ' '
2467db96d56Sopenharmony_ci                        break
2477db96d56Sopenharmony_ci                elif self.posix and nextchar in self.quotes:
2487db96d56Sopenharmony_ci                    self.state = nextchar
2497db96d56Sopenharmony_ci                elif self.posix and nextchar in self.escape:
2507db96d56Sopenharmony_ci                    escapedstate = 'a'
2517db96d56Sopenharmony_ci                    self.state = nextchar
2527db96d56Sopenharmony_ci                elif (nextchar in self.wordchars or nextchar in self.quotes
2537db96d56Sopenharmony_ci                      or (self.whitespace_split and
2547db96d56Sopenharmony_ci                          nextchar not in self.punctuation_chars)):
2557db96d56Sopenharmony_ci                    self.token += nextchar
2567db96d56Sopenharmony_ci                else:
2577db96d56Sopenharmony_ci                    if self.punctuation_chars:
2587db96d56Sopenharmony_ci                        self._pushback_chars.append(nextchar)
2597db96d56Sopenharmony_ci                    else:
2607db96d56Sopenharmony_ci                        self.pushback.appendleft(nextchar)
2617db96d56Sopenharmony_ci                    if self.debug >= 2:
2627db96d56Sopenharmony_ci                        print("shlex: I see punctuation in word state")
2637db96d56Sopenharmony_ci                    self.state = ' '
2647db96d56Sopenharmony_ci                    if self.token or (self.posix and quoted):
2657db96d56Sopenharmony_ci                        break   # emit current token
2667db96d56Sopenharmony_ci                    else:
2677db96d56Sopenharmony_ci                        continue
2687db96d56Sopenharmony_ci        result = self.token
2697db96d56Sopenharmony_ci        self.token = ''
2707db96d56Sopenharmony_ci        if self.posix and not quoted and result == '':
2717db96d56Sopenharmony_ci            result = None
2727db96d56Sopenharmony_ci        if self.debug > 1:
2737db96d56Sopenharmony_ci            if result:
2747db96d56Sopenharmony_ci                print("shlex: raw token=" + repr(result))
2757db96d56Sopenharmony_ci            else:
2767db96d56Sopenharmony_ci                print("shlex: raw token=EOF")
2777db96d56Sopenharmony_ci        return result
2787db96d56Sopenharmony_ci
2797db96d56Sopenharmony_ci    def sourcehook(self, newfile):
2807db96d56Sopenharmony_ci        "Hook called on a filename to be sourced."
2817db96d56Sopenharmony_ci        if newfile[0] == '"':
2827db96d56Sopenharmony_ci            newfile = newfile[1:-1]
2837db96d56Sopenharmony_ci        # This implements cpp-like semantics for relative-path inclusion.
2847db96d56Sopenharmony_ci        if isinstance(self.infile, str) and not os.path.isabs(newfile):
2857db96d56Sopenharmony_ci            newfile = os.path.join(os.path.dirname(self.infile), newfile)
2867db96d56Sopenharmony_ci        return (newfile, open(newfile, "r"))
2877db96d56Sopenharmony_ci
2887db96d56Sopenharmony_ci    def error_leader(self, infile=None, lineno=None):
2897db96d56Sopenharmony_ci        "Emit a C-compiler-like, Emacs-friendly error-message leader."
2907db96d56Sopenharmony_ci        if infile is None:
2917db96d56Sopenharmony_ci            infile = self.infile
2927db96d56Sopenharmony_ci        if lineno is None:
2937db96d56Sopenharmony_ci            lineno = self.lineno
2947db96d56Sopenharmony_ci        return "\"%s\", line %d: " % (infile, lineno)
2957db96d56Sopenharmony_ci
2967db96d56Sopenharmony_ci    def __iter__(self):
2977db96d56Sopenharmony_ci        return self
2987db96d56Sopenharmony_ci
2997db96d56Sopenharmony_ci    def __next__(self):
3007db96d56Sopenharmony_ci        token = self.get_token()
3017db96d56Sopenharmony_ci        if token == self.eof:
3027db96d56Sopenharmony_ci            raise StopIteration
3037db96d56Sopenharmony_ci        return token
3047db96d56Sopenharmony_ci
3057db96d56Sopenharmony_cidef split(s, comments=False, posix=True):
3067db96d56Sopenharmony_ci    """Split the string *s* using shell-like syntax."""
3077db96d56Sopenharmony_ci    if s is None:
3087db96d56Sopenharmony_ci        import warnings
3097db96d56Sopenharmony_ci        warnings.warn("Passing None for 's' to shlex.split() is deprecated.",
3107db96d56Sopenharmony_ci                      DeprecationWarning, stacklevel=2)
3117db96d56Sopenharmony_ci    lex = shlex(s, posix=posix)
3127db96d56Sopenharmony_ci    lex.whitespace_split = True
3137db96d56Sopenharmony_ci    if not comments:
3147db96d56Sopenharmony_ci        lex.commenters = ''
3157db96d56Sopenharmony_ci    return list(lex)
3167db96d56Sopenharmony_ci
3177db96d56Sopenharmony_ci
3187db96d56Sopenharmony_cidef join(split_command):
3197db96d56Sopenharmony_ci    """Return a shell-escaped string from *split_command*."""
3207db96d56Sopenharmony_ci    return ' '.join(quote(arg) for arg in split_command)
3217db96d56Sopenharmony_ci
3227db96d56Sopenharmony_ci
3237db96d56Sopenharmony_ci_find_unsafe = re.compile(r'[^\w@%+=:,./-]', re.ASCII).search
3247db96d56Sopenharmony_ci
3257db96d56Sopenharmony_cidef quote(s):
3267db96d56Sopenharmony_ci    """Return a shell-escaped version of the string *s*."""
3277db96d56Sopenharmony_ci    if not s:
3287db96d56Sopenharmony_ci        return "''"
3297db96d56Sopenharmony_ci    if _find_unsafe(s) is None:
3307db96d56Sopenharmony_ci        return s
3317db96d56Sopenharmony_ci
3327db96d56Sopenharmony_ci    # use single quotes, and put single quotes into double quotes
3337db96d56Sopenharmony_ci    # the string $'b is then quoted as '$'"'"'b'
3347db96d56Sopenharmony_ci    return "'" + s.replace("'", "'\"'\"'") + "'"
3357db96d56Sopenharmony_ci
3367db96d56Sopenharmony_ci
3377db96d56Sopenharmony_cidef _print_tokens(lexer):
3387db96d56Sopenharmony_ci    while 1:
3397db96d56Sopenharmony_ci        tt = lexer.get_token()
3407db96d56Sopenharmony_ci        if not tt:
3417db96d56Sopenharmony_ci            break
3427db96d56Sopenharmony_ci        print("Token: " + repr(tt))
3437db96d56Sopenharmony_ci
3447db96d56Sopenharmony_ciif __name__ == '__main__':
3457db96d56Sopenharmony_ci    if len(sys.argv) == 1:
3467db96d56Sopenharmony_ci        _print_tokens(shlex())
3477db96d56Sopenharmony_ci    else:
3487db96d56Sopenharmony_ci        fn = sys.argv[1]
3497db96d56Sopenharmony_ci        with open(fn) as f:
3507db96d56Sopenharmony_ci            _print_tokens(shlex(f, fn))
351