17db96d56Sopenharmony_ci#!/usr/bin/env python3 27db96d56Sopenharmony_ci'''Add syntax highlighting to Python source code''' 37db96d56Sopenharmony_ci 47db96d56Sopenharmony_ci__author__ = 'Raymond Hettinger' 57db96d56Sopenharmony_ci 67db96d56Sopenharmony_ciimport builtins 77db96d56Sopenharmony_ciimport functools 87db96d56Sopenharmony_ciimport html as html_module 97db96d56Sopenharmony_ciimport keyword 107db96d56Sopenharmony_ciimport re 117db96d56Sopenharmony_ciimport tokenize 127db96d56Sopenharmony_ci 137db96d56Sopenharmony_ci#### Analyze Python Source ################################# 147db96d56Sopenharmony_ci 157db96d56Sopenharmony_cidef is_builtin(s): 167db96d56Sopenharmony_ci 'Return True if s is the name of a builtin' 177db96d56Sopenharmony_ci return hasattr(builtins, s) 187db96d56Sopenharmony_ci 197db96d56Sopenharmony_cidef combine_range(lines, start, end): 207db96d56Sopenharmony_ci 'Join content from a range of lines between start and end' 217db96d56Sopenharmony_ci (srow, scol), (erow, ecol) = start, end 227db96d56Sopenharmony_ci if srow == erow: 237db96d56Sopenharmony_ci return lines[srow-1][scol:ecol], end 247db96d56Sopenharmony_ci rows = [lines[srow-1][scol:]] + lines[srow: erow-1] + [lines[erow-1][:ecol]] 257db96d56Sopenharmony_ci return ''.join(rows), end 267db96d56Sopenharmony_ci 277db96d56Sopenharmony_cidef analyze_python(source): 287db96d56Sopenharmony_ci '''Generate and classify chunks of Python for syntax highlighting. 297db96d56Sopenharmony_ci Yields tuples in the form: (category, categorized_text). 307db96d56Sopenharmony_ci ''' 317db96d56Sopenharmony_ci lines = source.splitlines(True) 327db96d56Sopenharmony_ci lines.append('') 337db96d56Sopenharmony_ci readline = functools.partial(next, iter(lines), '') 347db96d56Sopenharmony_ci kind = tok_str = '' 357db96d56Sopenharmony_ci tok_type = tokenize.COMMENT 367db96d56Sopenharmony_ci written = (1, 0) 377db96d56Sopenharmony_ci for tok in tokenize.generate_tokens(readline): 387db96d56Sopenharmony_ci prev_tok_type, prev_tok_str = tok_type, tok_str 397db96d56Sopenharmony_ci tok_type, tok_str, (srow, scol), (erow, ecol), logical_lineno = tok 407db96d56Sopenharmony_ci kind = '' 417db96d56Sopenharmony_ci if tok_type == tokenize.COMMENT: 427db96d56Sopenharmony_ci kind = 'comment' 437db96d56Sopenharmony_ci elif tok_type == tokenize.OP and tok_str[:1] not in '{}[](),.:;@': 447db96d56Sopenharmony_ci kind = 'operator' 457db96d56Sopenharmony_ci elif tok_type == tokenize.STRING: 467db96d56Sopenharmony_ci kind = 'string' 477db96d56Sopenharmony_ci if prev_tok_type == tokenize.INDENT or scol==0: 487db96d56Sopenharmony_ci kind = 'docstring' 497db96d56Sopenharmony_ci elif tok_type == tokenize.NAME: 507db96d56Sopenharmony_ci if tok_str in ('def', 'class', 'import', 'from'): 517db96d56Sopenharmony_ci kind = 'definition' 527db96d56Sopenharmony_ci elif prev_tok_str in ('def', 'class'): 537db96d56Sopenharmony_ci kind = 'defname' 547db96d56Sopenharmony_ci elif keyword.iskeyword(tok_str): 557db96d56Sopenharmony_ci kind = 'keyword' 567db96d56Sopenharmony_ci elif is_builtin(tok_str) and prev_tok_str != '.': 577db96d56Sopenharmony_ci kind = 'builtin' 587db96d56Sopenharmony_ci if kind: 597db96d56Sopenharmony_ci text, written = combine_range(lines, written, (srow, scol)) 607db96d56Sopenharmony_ci yield '', text 617db96d56Sopenharmony_ci text, written = tok_str, (erow, ecol) 627db96d56Sopenharmony_ci yield kind, text 637db96d56Sopenharmony_ci line_upto_token, written = combine_range(lines, written, (erow, ecol)) 647db96d56Sopenharmony_ci yield '', line_upto_token 657db96d56Sopenharmony_ci 667db96d56Sopenharmony_ci#### Raw Output ########################################### 677db96d56Sopenharmony_ci 687db96d56Sopenharmony_cidef raw_highlight(classified_text): 697db96d56Sopenharmony_ci 'Straight text display of text classifications' 707db96d56Sopenharmony_ci result = [] 717db96d56Sopenharmony_ci for kind, text in classified_text: 727db96d56Sopenharmony_ci result.append('%15s: %r\n' % (kind or 'plain', text)) 737db96d56Sopenharmony_ci return ''.join(result) 747db96d56Sopenharmony_ci 757db96d56Sopenharmony_ci#### ANSI Output ########################################### 767db96d56Sopenharmony_ci 777db96d56Sopenharmony_cidefault_ansi = { 787db96d56Sopenharmony_ci 'comment': ('\033[0;31m', '\033[0m'), 797db96d56Sopenharmony_ci 'string': ('\033[0;32m', '\033[0m'), 807db96d56Sopenharmony_ci 'docstring': ('\033[0;32m', '\033[0m'), 817db96d56Sopenharmony_ci 'keyword': ('\033[0;33m', '\033[0m'), 827db96d56Sopenharmony_ci 'builtin': ('\033[0;35m', '\033[0m'), 837db96d56Sopenharmony_ci 'definition': ('\033[0;33m', '\033[0m'), 847db96d56Sopenharmony_ci 'defname': ('\033[0;34m', '\033[0m'), 857db96d56Sopenharmony_ci 'operator': ('\033[0;33m', '\033[0m'), 867db96d56Sopenharmony_ci} 877db96d56Sopenharmony_ci 887db96d56Sopenharmony_cidef ansi_highlight(classified_text, colors=default_ansi): 897db96d56Sopenharmony_ci 'Add syntax highlighting to source code using ANSI escape sequences' 907db96d56Sopenharmony_ci # http://en.wikipedia.org/wiki/ANSI_escape_code 917db96d56Sopenharmony_ci result = [] 927db96d56Sopenharmony_ci for kind, text in classified_text: 937db96d56Sopenharmony_ci opener, closer = colors.get(kind, ('', '')) 947db96d56Sopenharmony_ci result += [opener, text, closer] 957db96d56Sopenharmony_ci return ''.join(result) 967db96d56Sopenharmony_ci 977db96d56Sopenharmony_ci#### HTML Output ########################################### 987db96d56Sopenharmony_ci 997db96d56Sopenharmony_cidef html_highlight(classified_text,opener='<pre class="python">\n', closer='</pre>\n'): 1007db96d56Sopenharmony_ci 'Convert classified text to an HTML fragment' 1017db96d56Sopenharmony_ci result = [opener] 1027db96d56Sopenharmony_ci for kind, text in classified_text: 1037db96d56Sopenharmony_ci if kind: 1047db96d56Sopenharmony_ci result.append('<span class="%s">' % kind) 1057db96d56Sopenharmony_ci result.append(html_module.escape(text)) 1067db96d56Sopenharmony_ci if kind: 1077db96d56Sopenharmony_ci result.append('</span>') 1087db96d56Sopenharmony_ci result.append(closer) 1097db96d56Sopenharmony_ci return ''.join(result) 1107db96d56Sopenharmony_ci 1117db96d56Sopenharmony_cidefault_css = { 1127db96d56Sopenharmony_ci '.comment': '{color: crimson;}', 1137db96d56Sopenharmony_ci '.string': '{color: forestgreen;}', 1147db96d56Sopenharmony_ci '.docstring': '{color: forestgreen; font-style:italic;}', 1157db96d56Sopenharmony_ci '.keyword': '{color: darkorange;}', 1167db96d56Sopenharmony_ci '.builtin': '{color: purple;}', 1177db96d56Sopenharmony_ci '.definition': '{color: darkorange; font-weight:bold;}', 1187db96d56Sopenharmony_ci '.defname': '{color: blue;}', 1197db96d56Sopenharmony_ci '.operator': '{color: brown;}', 1207db96d56Sopenharmony_ci} 1217db96d56Sopenharmony_ci 1227db96d56Sopenharmony_cidefault_html = '''\ 1237db96d56Sopenharmony_ci<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" 1247db96d56Sopenharmony_ci "http://www.w3.org/TR/html4/strict.dtd"> 1257db96d56Sopenharmony_ci<html> 1267db96d56Sopenharmony_ci<head> 1277db96d56Sopenharmony_ci<meta http-equiv="Content-type" content="text/html;charset=UTF-8"> 1287db96d56Sopenharmony_ci<title> {title} </title> 1297db96d56Sopenharmony_ci<style type="text/css"> 1307db96d56Sopenharmony_ci{css} 1317db96d56Sopenharmony_ci</style> 1327db96d56Sopenharmony_ci</head> 1337db96d56Sopenharmony_ci<body> 1347db96d56Sopenharmony_ci{body} 1357db96d56Sopenharmony_ci</body> 1367db96d56Sopenharmony_ci</html> 1377db96d56Sopenharmony_ci''' 1387db96d56Sopenharmony_ci 1397db96d56Sopenharmony_cidef build_html_page(classified_text, title='python', 1407db96d56Sopenharmony_ci css=default_css, html=default_html): 1417db96d56Sopenharmony_ci 'Create a complete HTML page with colorized source code' 1427db96d56Sopenharmony_ci css_str = '\n'.join(['%s %s' % item for item in css.items()]) 1437db96d56Sopenharmony_ci result = html_highlight(classified_text) 1447db96d56Sopenharmony_ci title = html_module.escape(title) 1457db96d56Sopenharmony_ci return html.format(title=title, css=css_str, body=result) 1467db96d56Sopenharmony_ci 1477db96d56Sopenharmony_ci#### LaTeX Output ########################################## 1487db96d56Sopenharmony_ci 1497db96d56Sopenharmony_cidefault_latex_commands = { 1507db96d56Sopenharmony_ci 'comment': r'{\color{red}#1}', 1517db96d56Sopenharmony_ci 'string': r'{\color{ForestGreen}#1}', 1527db96d56Sopenharmony_ci 'docstring': r'{\emph{\color{ForestGreen}#1}}', 1537db96d56Sopenharmony_ci 'keyword': r'{\color{orange}#1}', 1547db96d56Sopenharmony_ci 'builtin': r'{\color{purple}#1}', 1557db96d56Sopenharmony_ci 'definition': r'{\color{orange}#1}', 1567db96d56Sopenharmony_ci 'defname': r'{\color{blue}#1}', 1577db96d56Sopenharmony_ci 'operator': r'{\color{brown}#1}', 1587db96d56Sopenharmony_ci} 1597db96d56Sopenharmony_ci 1607db96d56Sopenharmony_cidefault_latex_document = r''' 1617db96d56Sopenharmony_ci\documentclass{article} 1627db96d56Sopenharmony_ci\usepackage{alltt} 1637db96d56Sopenharmony_ci\usepackage{upquote} 1647db96d56Sopenharmony_ci\usepackage{color} 1657db96d56Sopenharmony_ci\usepackage[usenames,dvipsnames]{xcolor} 1667db96d56Sopenharmony_ci\usepackage[cm]{fullpage} 1677db96d56Sopenharmony_ci%(macros)s 1687db96d56Sopenharmony_ci\begin{document} 1697db96d56Sopenharmony_ci\center{\LARGE{%(title)s}} 1707db96d56Sopenharmony_ci\begin{alltt} 1717db96d56Sopenharmony_ci%(body)s 1727db96d56Sopenharmony_ci\end{alltt} 1737db96d56Sopenharmony_ci\end{document} 1747db96d56Sopenharmony_ci''' 1757db96d56Sopenharmony_ci 1767db96d56Sopenharmony_cidef alltt_escape(s): 1777db96d56Sopenharmony_ci 'Replace backslash and braces with their escaped equivalents' 1787db96d56Sopenharmony_ci xlat = {'{': r'\{', '}': r'\}', '\\': r'\textbackslash{}'} 1797db96d56Sopenharmony_ci return re.sub(r'[\\{}]', lambda mo: xlat[mo.group()], s) 1807db96d56Sopenharmony_ci 1817db96d56Sopenharmony_cidef latex_highlight(classified_text, title = 'python', 1827db96d56Sopenharmony_ci commands = default_latex_commands, 1837db96d56Sopenharmony_ci document = default_latex_document): 1847db96d56Sopenharmony_ci 'Create a complete LaTeX document with colorized source code' 1857db96d56Sopenharmony_ci macros = '\n'.join(r'\newcommand{\py%s}[1]{%s}' % c for c in commands.items()) 1867db96d56Sopenharmony_ci result = [] 1877db96d56Sopenharmony_ci for kind, text in classified_text: 1887db96d56Sopenharmony_ci if kind: 1897db96d56Sopenharmony_ci result.append(r'\py%s{' % kind) 1907db96d56Sopenharmony_ci result.append(alltt_escape(text)) 1917db96d56Sopenharmony_ci if kind: 1927db96d56Sopenharmony_ci result.append('}') 1937db96d56Sopenharmony_ci return default_latex_document % dict(title=title, macros=macros, body=''.join(result)) 1947db96d56Sopenharmony_ci 1957db96d56Sopenharmony_ci 1967db96d56Sopenharmony_ciif __name__ == '__main__': 1977db96d56Sopenharmony_ci import argparse 1987db96d56Sopenharmony_ci import os.path 1997db96d56Sopenharmony_ci import sys 2007db96d56Sopenharmony_ci import textwrap 2017db96d56Sopenharmony_ci import webbrowser 2027db96d56Sopenharmony_ci 2037db96d56Sopenharmony_ci parser = argparse.ArgumentParser( 2047db96d56Sopenharmony_ci description = 'Add syntax highlighting to Python source code', 2057db96d56Sopenharmony_ci formatter_class=argparse.RawDescriptionHelpFormatter, 2067db96d56Sopenharmony_ci epilog = textwrap.dedent(''' 2077db96d56Sopenharmony_ci examples: 2087db96d56Sopenharmony_ci 2097db96d56Sopenharmony_ci # Show syntax highlighted code in the terminal window 2107db96d56Sopenharmony_ci $ ./highlight.py myfile.py 2117db96d56Sopenharmony_ci 2127db96d56Sopenharmony_ci # Colorize myfile.py and display in a browser 2137db96d56Sopenharmony_ci $ ./highlight.py -b myfile.py 2147db96d56Sopenharmony_ci 2157db96d56Sopenharmony_ci # Create an HTML section to embed in an existing webpage 2167db96d56Sopenharmony_ci ./highlight.py -s myfile.py 2177db96d56Sopenharmony_ci 2187db96d56Sopenharmony_ci # Create a complete HTML file 2197db96d56Sopenharmony_ci $ ./highlight.py -c myfile.py > myfile.html 2207db96d56Sopenharmony_ci 2217db96d56Sopenharmony_ci # Create a PDF using LaTeX 2227db96d56Sopenharmony_ci $ ./highlight.py -l myfile.py | pdflatex 2237db96d56Sopenharmony_ci 2247db96d56Sopenharmony_ci ''')) 2257db96d56Sopenharmony_ci parser.add_argument('sourcefile', metavar = 'SOURCEFILE', 2267db96d56Sopenharmony_ci help = 'file containing Python sourcecode') 2277db96d56Sopenharmony_ci parser.add_argument('-b', '--browser', action = 'store_true', 2287db96d56Sopenharmony_ci help = 'launch a browser to show results') 2297db96d56Sopenharmony_ci parser.add_argument('-c', '--complete', action = 'store_true', 2307db96d56Sopenharmony_ci help = 'build a complete html webpage') 2317db96d56Sopenharmony_ci parser.add_argument('-l', '--latex', action = 'store_true', 2327db96d56Sopenharmony_ci help = 'build a LaTeX document') 2337db96d56Sopenharmony_ci parser.add_argument('-r', '--raw', action = 'store_true', 2347db96d56Sopenharmony_ci help = 'raw parse of categorized text') 2357db96d56Sopenharmony_ci parser.add_argument('-s', '--section', action = 'store_true', 2367db96d56Sopenharmony_ci help = 'show an HTML section rather than a complete webpage') 2377db96d56Sopenharmony_ci args = parser.parse_args() 2387db96d56Sopenharmony_ci 2397db96d56Sopenharmony_ci if args.section and (args.browser or args.complete): 2407db96d56Sopenharmony_ci parser.error('The -s/--section option is incompatible with ' 2417db96d56Sopenharmony_ci 'the -b/--browser or -c/--complete options') 2427db96d56Sopenharmony_ci 2437db96d56Sopenharmony_ci sourcefile = args.sourcefile 2447db96d56Sopenharmony_ci with open(sourcefile) as f: 2457db96d56Sopenharmony_ci source = f.read() 2467db96d56Sopenharmony_ci classified_text = analyze_python(source) 2477db96d56Sopenharmony_ci 2487db96d56Sopenharmony_ci if args.raw: 2497db96d56Sopenharmony_ci encoded = raw_highlight(classified_text) 2507db96d56Sopenharmony_ci elif args.complete or args.browser: 2517db96d56Sopenharmony_ci encoded = build_html_page(classified_text, title=sourcefile) 2527db96d56Sopenharmony_ci elif args.section: 2537db96d56Sopenharmony_ci encoded = html_highlight(classified_text) 2547db96d56Sopenharmony_ci elif args.latex: 2557db96d56Sopenharmony_ci encoded = latex_highlight(classified_text, title=sourcefile) 2567db96d56Sopenharmony_ci else: 2577db96d56Sopenharmony_ci encoded = ansi_highlight(classified_text) 2587db96d56Sopenharmony_ci 2597db96d56Sopenharmony_ci if args.browser: 2607db96d56Sopenharmony_ci htmlfile = os.path.splitext(os.path.basename(sourcefile))[0] + '.html' 2617db96d56Sopenharmony_ci with open(htmlfile, 'w') as f: 2627db96d56Sopenharmony_ci f.write(encoded) 2637db96d56Sopenharmony_ci webbrowser.open('file://' + os.path.abspath(htmlfile)) 2647db96d56Sopenharmony_ci else: 2657db96d56Sopenharmony_ci sys.stdout.write(encoded) 266