17db96d56Sopenharmony_ci#!/usr/bin/env python3
27db96d56Sopenharmony_ci'''Add syntax highlighting to Python source code'''
37db96d56Sopenharmony_ci
47db96d56Sopenharmony_ci__author__ = 'Raymond Hettinger'
57db96d56Sopenharmony_ci
67db96d56Sopenharmony_ciimport builtins
77db96d56Sopenharmony_ciimport functools
87db96d56Sopenharmony_ciimport html as html_module
97db96d56Sopenharmony_ciimport keyword
107db96d56Sopenharmony_ciimport re
117db96d56Sopenharmony_ciimport tokenize
127db96d56Sopenharmony_ci
137db96d56Sopenharmony_ci#### Analyze Python Source #################################
147db96d56Sopenharmony_ci
157db96d56Sopenharmony_cidef is_builtin(s):
167db96d56Sopenharmony_ci    'Return True if s is the name of a builtin'
177db96d56Sopenharmony_ci    return hasattr(builtins, s)
187db96d56Sopenharmony_ci
197db96d56Sopenharmony_cidef combine_range(lines, start, end):
207db96d56Sopenharmony_ci    'Join content from a range of lines between start and end'
217db96d56Sopenharmony_ci    (srow, scol), (erow, ecol) = start, end
227db96d56Sopenharmony_ci    if srow == erow:
237db96d56Sopenharmony_ci        return lines[srow-1][scol:ecol], end
247db96d56Sopenharmony_ci    rows = [lines[srow-1][scol:]] + lines[srow: erow-1] + [lines[erow-1][:ecol]]
257db96d56Sopenharmony_ci    return ''.join(rows), end
267db96d56Sopenharmony_ci
277db96d56Sopenharmony_cidef analyze_python(source):
287db96d56Sopenharmony_ci    '''Generate and classify chunks of Python for syntax highlighting.
297db96d56Sopenharmony_ci       Yields tuples in the form: (category, categorized_text).
307db96d56Sopenharmony_ci    '''
317db96d56Sopenharmony_ci    lines = source.splitlines(True)
327db96d56Sopenharmony_ci    lines.append('')
337db96d56Sopenharmony_ci    readline = functools.partial(next, iter(lines), '')
347db96d56Sopenharmony_ci    kind = tok_str = ''
357db96d56Sopenharmony_ci    tok_type = tokenize.COMMENT
367db96d56Sopenharmony_ci    written = (1, 0)
377db96d56Sopenharmony_ci    for tok in tokenize.generate_tokens(readline):
387db96d56Sopenharmony_ci        prev_tok_type, prev_tok_str = tok_type, tok_str
397db96d56Sopenharmony_ci        tok_type, tok_str, (srow, scol), (erow, ecol), logical_lineno = tok
407db96d56Sopenharmony_ci        kind = ''
417db96d56Sopenharmony_ci        if tok_type == tokenize.COMMENT:
427db96d56Sopenharmony_ci            kind = 'comment'
437db96d56Sopenharmony_ci        elif tok_type == tokenize.OP and tok_str[:1] not in '{}[](),.:;@':
447db96d56Sopenharmony_ci            kind = 'operator'
457db96d56Sopenharmony_ci        elif tok_type == tokenize.STRING:
467db96d56Sopenharmony_ci            kind = 'string'
477db96d56Sopenharmony_ci            if prev_tok_type == tokenize.INDENT or scol==0:
487db96d56Sopenharmony_ci                kind = 'docstring'
497db96d56Sopenharmony_ci        elif tok_type == tokenize.NAME:
507db96d56Sopenharmony_ci            if tok_str in ('def', 'class', 'import', 'from'):
517db96d56Sopenharmony_ci                kind = 'definition'
527db96d56Sopenharmony_ci            elif prev_tok_str in ('def', 'class'):
537db96d56Sopenharmony_ci                kind = 'defname'
547db96d56Sopenharmony_ci            elif keyword.iskeyword(tok_str):
557db96d56Sopenharmony_ci                kind = 'keyword'
567db96d56Sopenharmony_ci            elif is_builtin(tok_str) and prev_tok_str != '.':
577db96d56Sopenharmony_ci                kind = 'builtin'
587db96d56Sopenharmony_ci        if kind:
597db96d56Sopenharmony_ci            text, written = combine_range(lines, written, (srow, scol))
607db96d56Sopenharmony_ci            yield '', text
617db96d56Sopenharmony_ci            text, written = tok_str, (erow, ecol)
627db96d56Sopenharmony_ci            yield kind, text
637db96d56Sopenharmony_ci    line_upto_token, written = combine_range(lines, written, (erow, ecol))
647db96d56Sopenharmony_ci    yield '', line_upto_token
657db96d56Sopenharmony_ci
667db96d56Sopenharmony_ci#### Raw Output  ###########################################
677db96d56Sopenharmony_ci
687db96d56Sopenharmony_cidef raw_highlight(classified_text):
697db96d56Sopenharmony_ci    'Straight text display of text classifications'
707db96d56Sopenharmony_ci    result = []
717db96d56Sopenharmony_ci    for kind, text in classified_text:
727db96d56Sopenharmony_ci        result.append('%15s:  %r\n' % (kind or 'plain', text))
737db96d56Sopenharmony_ci    return ''.join(result)
747db96d56Sopenharmony_ci
757db96d56Sopenharmony_ci#### ANSI Output ###########################################
767db96d56Sopenharmony_ci
777db96d56Sopenharmony_cidefault_ansi = {
787db96d56Sopenharmony_ci    'comment': ('\033[0;31m', '\033[0m'),
797db96d56Sopenharmony_ci    'string': ('\033[0;32m', '\033[0m'),
807db96d56Sopenharmony_ci    'docstring': ('\033[0;32m', '\033[0m'),
817db96d56Sopenharmony_ci    'keyword': ('\033[0;33m', '\033[0m'),
827db96d56Sopenharmony_ci    'builtin': ('\033[0;35m', '\033[0m'),
837db96d56Sopenharmony_ci    'definition': ('\033[0;33m', '\033[0m'),
847db96d56Sopenharmony_ci    'defname': ('\033[0;34m', '\033[0m'),
857db96d56Sopenharmony_ci    'operator': ('\033[0;33m', '\033[0m'),
867db96d56Sopenharmony_ci}
877db96d56Sopenharmony_ci
887db96d56Sopenharmony_cidef ansi_highlight(classified_text, colors=default_ansi):
897db96d56Sopenharmony_ci    'Add syntax highlighting to source code using ANSI escape sequences'
907db96d56Sopenharmony_ci    # http://en.wikipedia.org/wiki/ANSI_escape_code
917db96d56Sopenharmony_ci    result = []
927db96d56Sopenharmony_ci    for kind, text in classified_text:
937db96d56Sopenharmony_ci        opener, closer = colors.get(kind, ('', ''))
947db96d56Sopenharmony_ci        result += [opener, text, closer]
957db96d56Sopenharmony_ci    return ''.join(result)
967db96d56Sopenharmony_ci
977db96d56Sopenharmony_ci#### HTML Output ###########################################
987db96d56Sopenharmony_ci
997db96d56Sopenharmony_cidef html_highlight(classified_text,opener='<pre class="python">\n', closer='</pre>\n'):
1007db96d56Sopenharmony_ci    'Convert classified text to an HTML fragment'
1017db96d56Sopenharmony_ci    result = [opener]
1027db96d56Sopenharmony_ci    for kind, text in classified_text:
1037db96d56Sopenharmony_ci        if kind:
1047db96d56Sopenharmony_ci            result.append('<span class="%s">' % kind)
1057db96d56Sopenharmony_ci        result.append(html_module.escape(text))
1067db96d56Sopenharmony_ci        if kind:
1077db96d56Sopenharmony_ci            result.append('</span>')
1087db96d56Sopenharmony_ci    result.append(closer)
1097db96d56Sopenharmony_ci    return ''.join(result)
1107db96d56Sopenharmony_ci
1117db96d56Sopenharmony_cidefault_css = {
1127db96d56Sopenharmony_ci    '.comment': '{color: crimson;}',
1137db96d56Sopenharmony_ci    '.string':  '{color: forestgreen;}',
1147db96d56Sopenharmony_ci    '.docstring': '{color: forestgreen; font-style:italic;}',
1157db96d56Sopenharmony_ci    '.keyword': '{color: darkorange;}',
1167db96d56Sopenharmony_ci    '.builtin': '{color: purple;}',
1177db96d56Sopenharmony_ci    '.definition': '{color: darkorange; font-weight:bold;}',
1187db96d56Sopenharmony_ci    '.defname': '{color: blue;}',
1197db96d56Sopenharmony_ci    '.operator': '{color: brown;}',
1207db96d56Sopenharmony_ci}
1217db96d56Sopenharmony_ci
1227db96d56Sopenharmony_cidefault_html = '''\
1237db96d56Sopenharmony_ci<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
1247db96d56Sopenharmony_ci          "http://www.w3.org/TR/html4/strict.dtd">
1257db96d56Sopenharmony_ci<html>
1267db96d56Sopenharmony_ci<head>
1277db96d56Sopenharmony_ci<meta http-equiv="Content-type" content="text/html;charset=UTF-8">
1287db96d56Sopenharmony_ci<title> {title} </title>
1297db96d56Sopenharmony_ci<style type="text/css">
1307db96d56Sopenharmony_ci{css}
1317db96d56Sopenharmony_ci</style>
1327db96d56Sopenharmony_ci</head>
1337db96d56Sopenharmony_ci<body>
1347db96d56Sopenharmony_ci{body}
1357db96d56Sopenharmony_ci</body>
1367db96d56Sopenharmony_ci</html>
1377db96d56Sopenharmony_ci'''
1387db96d56Sopenharmony_ci
1397db96d56Sopenharmony_cidef build_html_page(classified_text, title='python',
1407db96d56Sopenharmony_ci                    css=default_css, html=default_html):
1417db96d56Sopenharmony_ci    'Create a complete HTML page with colorized source code'
1427db96d56Sopenharmony_ci    css_str = '\n'.join(['%s %s' % item for item in css.items()])
1437db96d56Sopenharmony_ci    result = html_highlight(classified_text)
1447db96d56Sopenharmony_ci    title = html_module.escape(title)
1457db96d56Sopenharmony_ci    return html.format(title=title, css=css_str, body=result)
1467db96d56Sopenharmony_ci
1477db96d56Sopenharmony_ci#### LaTeX Output ##########################################
1487db96d56Sopenharmony_ci
1497db96d56Sopenharmony_cidefault_latex_commands = {
1507db96d56Sopenharmony_ci    'comment': r'{\color{red}#1}',
1517db96d56Sopenharmony_ci    'string': r'{\color{ForestGreen}#1}',
1527db96d56Sopenharmony_ci    'docstring': r'{\emph{\color{ForestGreen}#1}}',
1537db96d56Sopenharmony_ci    'keyword': r'{\color{orange}#1}',
1547db96d56Sopenharmony_ci    'builtin': r'{\color{purple}#1}',
1557db96d56Sopenharmony_ci    'definition': r'{\color{orange}#1}',
1567db96d56Sopenharmony_ci    'defname': r'{\color{blue}#1}',
1577db96d56Sopenharmony_ci    'operator': r'{\color{brown}#1}',
1587db96d56Sopenharmony_ci}
1597db96d56Sopenharmony_ci
1607db96d56Sopenharmony_cidefault_latex_document = r'''
1617db96d56Sopenharmony_ci\documentclass{article}
1627db96d56Sopenharmony_ci\usepackage{alltt}
1637db96d56Sopenharmony_ci\usepackage{upquote}
1647db96d56Sopenharmony_ci\usepackage{color}
1657db96d56Sopenharmony_ci\usepackage[usenames,dvipsnames]{xcolor}
1667db96d56Sopenharmony_ci\usepackage[cm]{fullpage}
1677db96d56Sopenharmony_ci%(macros)s
1687db96d56Sopenharmony_ci\begin{document}
1697db96d56Sopenharmony_ci\center{\LARGE{%(title)s}}
1707db96d56Sopenharmony_ci\begin{alltt}
1717db96d56Sopenharmony_ci%(body)s
1727db96d56Sopenharmony_ci\end{alltt}
1737db96d56Sopenharmony_ci\end{document}
1747db96d56Sopenharmony_ci'''
1757db96d56Sopenharmony_ci
1767db96d56Sopenharmony_cidef alltt_escape(s):
1777db96d56Sopenharmony_ci    'Replace backslash and braces with their escaped equivalents'
1787db96d56Sopenharmony_ci    xlat = {'{': r'\{', '}': r'\}', '\\': r'\textbackslash{}'}
1797db96d56Sopenharmony_ci    return re.sub(r'[\\{}]', lambda mo: xlat[mo.group()], s)
1807db96d56Sopenharmony_ci
1817db96d56Sopenharmony_cidef latex_highlight(classified_text, title = 'python',
1827db96d56Sopenharmony_ci                    commands = default_latex_commands,
1837db96d56Sopenharmony_ci                    document = default_latex_document):
1847db96d56Sopenharmony_ci    'Create a complete LaTeX document with colorized source code'
1857db96d56Sopenharmony_ci    macros = '\n'.join(r'\newcommand{\py%s}[1]{%s}' % c for c in commands.items())
1867db96d56Sopenharmony_ci    result = []
1877db96d56Sopenharmony_ci    for kind, text in classified_text:
1887db96d56Sopenharmony_ci        if kind:
1897db96d56Sopenharmony_ci            result.append(r'\py%s{' % kind)
1907db96d56Sopenharmony_ci        result.append(alltt_escape(text))
1917db96d56Sopenharmony_ci        if kind:
1927db96d56Sopenharmony_ci            result.append('}')
1937db96d56Sopenharmony_ci    return default_latex_document % dict(title=title, macros=macros, body=''.join(result))
1947db96d56Sopenharmony_ci
1957db96d56Sopenharmony_ci
1967db96d56Sopenharmony_ciif __name__ == '__main__':
1977db96d56Sopenharmony_ci    import argparse
1987db96d56Sopenharmony_ci    import os.path
1997db96d56Sopenharmony_ci    import sys
2007db96d56Sopenharmony_ci    import textwrap
2017db96d56Sopenharmony_ci    import webbrowser
2027db96d56Sopenharmony_ci
2037db96d56Sopenharmony_ci    parser = argparse.ArgumentParser(
2047db96d56Sopenharmony_ci            description = 'Add syntax highlighting to Python source code',
2057db96d56Sopenharmony_ci            formatter_class=argparse.RawDescriptionHelpFormatter,
2067db96d56Sopenharmony_ci            epilog = textwrap.dedent('''
2077db96d56Sopenharmony_ci                examples:
2087db96d56Sopenharmony_ci
2097db96d56Sopenharmony_ci                  # Show syntax highlighted code in the terminal window
2107db96d56Sopenharmony_ci                  $ ./highlight.py myfile.py
2117db96d56Sopenharmony_ci
2127db96d56Sopenharmony_ci                  # Colorize myfile.py and display in a browser
2137db96d56Sopenharmony_ci                  $ ./highlight.py -b myfile.py
2147db96d56Sopenharmony_ci
2157db96d56Sopenharmony_ci                  # Create an HTML section to embed in an existing webpage
2167db96d56Sopenharmony_ci                  ./highlight.py -s myfile.py
2177db96d56Sopenharmony_ci
2187db96d56Sopenharmony_ci                  # Create a complete HTML file
2197db96d56Sopenharmony_ci                  $ ./highlight.py -c myfile.py > myfile.html
2207db96d56Sopenharmony_ci
2217db96d56Sopenharmony_ci                  # Create a PDF using LaTeX
2227db96d56Sopenharmony_ci                  $ ./highlight.py -l myfile.py | pdflatex
2237db96d56Sopenharmony_ci
2247db96d56Sopenharmony_ci            '''))
2257db96d56Sopenharmony_ci    parser.add_argument('sourcefile', metavar = 'SOURCEFILE',
2267db96d56Sopenharmony_ci            help = 'file containing Python sourcecode')
2277db96d56Sopenharmony_ci    parser.add_argument('-b', '--browser', action = 'store_true',
2287db96d56Sopenharmony_ci            help = 'launch a browser to show results')
2297db96d56Sopenharmony_ci    parser.add_argument('-c', '--complete', action = 'store_true',
2307db96d56Sopenharmony_ci            help = 'build a complete html webpage')
2317db96d56Sopenharmony_ci    parser.add_argument('-l', '--latex', action = 'store_true',
2327db96d56Sopenharmony_ci            help = 'build a LaTeX document')
2337db96d56Sopenharmony_ci    parser.add_argument('-r', '--raw', action = 'store_true',
2347db96d56Sopenharmony_ci            help = 'raw parse of categorized text')
2357db96d56Sopenharmony_ci    parser.add_argument('-s', '--section', action = 'store_true',
2367db96d56Sopenharmony_ci            help = 'show an HTML section rather than a complete webpage')
2377db96d56Sopenharmony_ci    args = parser.parse_args()
2387db96d56Sopenharmony_ci
2397db96d56Sopenharmony_ci    if args.section and (args.browser or args.complete):
2407db96d56Sopenharmony_ci        parser.error('The -s/--section option is incompatible with '
2417db96d56Sopenharmony_ci                     'the -b/--browser or -c/--complete options')
2427db96d56Sopenharmony_ci
2437db96d56Sopenharmony_ci    sourcefile = args.sourcefile
2447db96d56Sopenharmony_ci    with open(sourcefile) as f:
2457db96d56Sopenharmony_ci        source = f.read()
2467db96d56Sopenharmony_ci    classified_text = analyze_python(source)
2477db96d56Sopenharmony_ci
2487db96d56Sopenharmony_ci    if args.raw:
2497db96d56Sopenharmony_ci        encoded = raw_highlight(classified_text)
2507db96d56Sopenharmony_ci    elif args.complete or args.browser:
2517db96d56Sopenharmony_ci        encoded = build_html_page(classified_text, title=sourcefile)
2527db96d56Sopenharmony_ci    elif args.section:
2537db96d56Sopenharmony_ci        encoded = html_highlight(classified_text)
2547db96d56Sopenharmony_ci    elif args.latex:
2557db96d56Sopenharmony_ci        encoded = latex_highlight(classified_text, title=sourcefile)
2567db96d56Sopenharmony_ci    else:
2577db96d56Sopenharmony_ci        encoded = ansi_highlight(classified_text)
2587db96d56Sopenharmony_ci
2597db96d56Sopenharmony_ci    if args.browser:
2607db96d56Sopenharmony_ci        htmlfile = os.path.splitext(os.path.basename(sourcefile))[0] + '.html'
2617db96d56Sopenharmony_ci        with open(htmlfile, 'w') as f:
2627db96d56Sopenharmony_ci            f.write(encoded)
2637db96d56Sopenharmony_ci        webbrowser.open('file://' + os.path.abspath(htmlfile))
2647db96d56Sopenharmony_ci    else:
2657db96d56Sopenharmony_ci        sys.stdout.write(encoded)
266