1#!/usr/bin/env python3
2# -*- coding: utf-8 -*-
3
4# Check for stylistic and formal issues in .rst and .py
5# files included in the documentation.
6#
7# 01/2009, Georg Brandl
8
9# TODO: - wrong versions in versionadded/changed
10#       - wrong markup after versionchanged directive
11
12import os
13import re
14import sys
15import getopt
16from string import ascii_letters
17from os.path import join, splitext, abspath, exists
18from collections import defaultdict
19
20directives = [
21    # standard docutils ones
22    'admonition', 'attention', 'caution', 'class', 'compound', 'container',
23    'contents', 'csv-table', 'danger', 'date', 'default-role', 'epigraph',
24    'error', 'figure', 'footer', 'header', 'highlights', 'hint', 'image',
25    'important', 'include', 'line-block', 'list-table', 'meta', 'note',
26    'parsed-literal', 'pull-quote', 'raw', 'replace',
27    'restructuredtext-test-directive', 'role', 'rubric', 'sectnum', 'sidebar',
28    'table', 'target-notes', 'tip', 'title', 'topic', 'unicode', 'warning',
29    # Sphinx and Python docs custom ones
30    'acks', 'attribute', 'autoattribute', 'autoclass', 'autodata',
31    'autoexception', 'autofunction', 'automethod', 'automodule',
32    'availability', 'centered', 'cfunction', 'class', 'classmethod', 'cmacro',
33    'cmdoption', 'cmember', 'code-block', 'confval', 'cssclass', 'ctype',
34    'currentmodule', 'cvar', 'data', 'decorator', 'decoratormethod',
35    'deprecated-removed', 'deprecated(?!-removed)', 'describe', 'directive',
36    'doctest', 'envvar', 'event', 'exception', 'function', 'glossary',
37    'highlight', 'highlightlang', 'impl-detail', 'index', 'literalinclude',
38    'method', 'miscnews', 'module', 'moduleauthor', 'opcode', 'pdbcommand',
39    'productionlist', 'program', 'role', 'sectionauthor', 'seealso',
40    'sourcecode', 'staticmethod', 'tabularcolumns', 'testcode', 'testoutput',
41    'testsetup', 'toctree', 'todo', 'todolist', 'versionadded',
42    'versionchanged'
43]
44
45roles = [
46    "(?<!py):class:",
47    "(?<!:c|py):func:",
48    "(?<!py):meth:",
49    "(?<!:py):mod:",
50    ":exc:",
51    ":issue:",
52    ":attr:",
53    ":c:func:",
54    ":ref:",
55    ":const:",
56    ":term:",
57    "(?<!:c|py):data:",
58    ":keyword:",
59    ":file:",
60    ":pep:",
61    ":c:type:",
62    ":c:member:",
63    ":option:",
64    ":rfc:",
65    ":envvar:",
66    ":c:data:",
67    ":source:",
68    ":mailheader:",
69    ":program:",
70    ":c:macro:",
71    ":dfn:",
72    ":kbd:",
73    ":command:",
74    ":mimetype:",
75    ":opcode:",
76    ":manpage:",
77    ":py:data:",
78    ":RFC:",
79    ":pdbcmd:",
80    ":abbr:",
81    ":samp:",
82    ":token:",
83    ":PEP:",
84    ":sup:",
85    ":py:class:",
86    ":menuselection:",
87    ":doc:",
88    ":sub:",
89    ":py:meth:",
90    ":newsgroup:",
91    ":code:",
92    ":py:func:",
93    ":makevar:",
94    ":guilabel:",
95    ":title-reference:",
96    ":py:mod:",
97    ":download:",
98    ":2to3fixer:",
99]
100
101all_directives = "(" + "|".join(directives) + ")"
102all_roles = "(" + "|".join(roles) + ")"
103
104# Find comments that looks like a directive, like:
105# .. versionchanged 3.6
106# or
107# .. versionchanged: 3.6
108# as it should be:
109# .. versionchanged:: 3.6
110seems_directive_re = re.compile(r"(?<!\.)\.\. %s([^a-z:]|:(?!:))" % all_directives)
111
112# Find directive prefixed with three dots instead of two, like:
113# ... versionchanged:: 3.6
114# instead of:
115# .. versionchanged:: 3.6
116three_dot_directive_re = re.compile(r"\.\.\. %s::" % all_directives)
117
118# Find role used with double backticks instead of simple backticks like:
119# :const:``None``
120# instead of:
121# :const:`None`
122double_backtick_role = re.compile(r"(?<!``)%s``" % all_roles)
123
124
125# Find role used with no backticks instead of simple backticks like:
126# :const:None
127# instead of:
128# :const:`None`
129role_with_no_backticks = re.compile(r"%s[^` ]" % all_roles)
130
131# Find role glued with another word like:
132# the:c:func:`PyThreadState_LeaveTracing` function.
133# instead of:
134# the :c:func:`PyThreadState_LeaveTracing` function.
135role_glued_with_word = re.compile(r"[a-zA-Z]%s" % all_roles)
136
137default_role_re = re.compile(r"(^| )`\w([^`]*?\w)?`($| )")
138leaked_markup_re = re.compile(r"[a-z]::\s|`|\.\.\s*\w+:")
139
140
141checkers = {}
142
143checker_props = {'severity': 1, 'falsepositives': False}
144
145
146def checker(*suffixes, **kwds):
147    """Decorator to register a function as a checker."""
148    def deco(func):
149        for suffix in suffixes:
150            checkers.setdefault(suffix, []).append(func)
151        for prop in checker_props:
152            setattr(func, prop, kwds.get(prop, checker_props[prop]))
153        return func
154    return deco
155
156
157@checker('.py', severity=4)
158def check_syntax(fn, lines):
159    """Check Python examples for valid syntax."""
160    code = ''.join(lines)
161    if '\r' in code:
162        if os.name != 'nt':
163            yield 0, '\\r in code file'
164        code = code.replace('\r', '')
165    try:
166        compile(code, fn, 'exec')
167    except SyntaxError as err:
168        yield err.lineno, 'not compilable: %s' % err
169
170
171@checker('.rst', severity=2)
172def check_suspicious_constructs(fn, lines):
173    """Check for suspicious reST constructs."""
174    inprod = False
175    for lno, line in enumerate(lines, start=1):
176        if seems_directive_re.search(line):
177            yield lno, "comment seems to be intended as a directive"
178        if three_dot_directive_re.search(line):
179            yield lno, "directive should start with two dots, not three."
180        if double_backtick_role.search(line):
181            yield lno, "role use a single backtick, double backtick found."
182        if role_with_no_backticks.search(line):
183            yield lno, "role use a single backtick, no backtick found."
184        if role_glued_with_word.search(line):
185            yield lno, "missing space before role"
186        if ".. productionlist::" in line:
187            inprod = True
188        elif not inprod and default_role_re.search(line):
189            yield lno, "default role used"
190        elif inprod and not line.strip():
191            inprod = False
192
193
194@checker('.py', '.rst')
195def check_whitespace(fn, lines):
196    """Check for whitespace and line length issues."""
197    for lno, line in enumerate(lines):
198        if '\r' in line:
199            yield lno+1, '\\r in line'
200        if '\t' in line:
201            yield lno+1, 'OMG TABS!!!1'
202        if line[:-1].rstrip(' \t') != line[:-1]:
203            yield lno+1, 'trailing whitespace'
204
205
206@checker('.rst', severity=0)
207def check_line_length(fn, lines):
208    """Check for line length; this checker is not run by default."""
209    for lno, line in enumerate(lines):
210        if len(line) > 81:
211            # don't complain about tables, links and function signatures
212            if line.lstrip()[0] not in '+|' and \
213               'http://' not in line and \
214               not line.lstrip().startswith(('.. function',
215                                             '.. method',
216                                             '.. cfunction')):
217                yield lno+1, "line too long"
218
219
220@checker('.html', severity=2, falsepositives=True)
221def check_leaked_markup(fn, lines):
222    """Check HTML files for leaked reST markup; this only works if
223    the HTML files have been built.
224    """
225    for lno, line in enumerate(lines):
226        if leaked_markup_re.search(line):
227            yield lno+1, 'possibly leaked markup: %r' % line
228
229
230def hide_literal_blocks(lines):
231    """Tool to remove literal blocks from given lines.
232
233    It yields empty lines in place of blocks, so line numbers are
234    still meaningful.
235    """
236    in_block = False
237    for line in lines:
238        if line.endswith("::\n"):
239            in_block = True
240        elif in_block:
241            if line == "\n" or line.startswith(" "):
242                line = "\n"
243            else:
244                in_block = False
245        yield line
246
247
248def type_of_explicit_markup(line):
249    if re.match(fr'\.\. {all_directives}::', line):
250        return 'directive'
251    if re.match(r'\.\. \[[0-9]+\] ', line):
252        return 'footnote'
253    if re.match(r'\.\. \[[^\]]+\] ', line):
254        return 'citation'
255    if re.match(r'\.\. _.*[^_]: ', line):
256        return 'target'
257    if re.match(r'\.\. \|[^\|]*\| ', line):
258        return 'substitution_definition'
259    return 'comment'
260
261
262def hide_comments(lines):
263    """Tool to remove comments from given lines.
264
265    It yields empty lines in place of comments, so line numbers are
266    still meaningful.
267    """
268    in_multiline_comment = False
269    for line in lines:
270        if line == "..\n":
271            in_multiline_comment = True
272        elif in_multiline_comment:
273            if line == "\n" or line.startswith(" "):
274                line = "\n"
275            else:
276                in_multiline_comment = False
277        if line.startswith(".. ") and type_of_explicit_markup(line) == 'comment':
278            line = "\n"
279        yield line
280
281
282
283@checker(".rst", severity=2)
284def check_missing_surrogate_space_on_plural(fn, lines):
285    r"""Check for missing 'backslash-space' between a code sample a letter.
286
287    Good: ``Point``\ s
288    Bad: ``Point``s
289    """
290    in_code_sample = False
291    check_next_one = False
292    for lno, line in enumerate(hide_comments(hide_literal_blocks(lines))):
293        tokens = line.split("``")
294        for token_no, token in enumerate(tokens):
295            if check_next_one:
296                if token[0] in ascii_letters:
297                    yield lno + 1, f"Missing backslash-space between code sample and {token!r}."
298                check_next_one = False
299            if token_no == len(tokens) - 1:
300                continue
301            if in_code_sample:
302                check_next_one = True
303            in_code_sample = not in_code_sample
304
305def main(argv):
306    usage = '''\
307Usage: %s [-v] [-f] [-s sev] [-i path]* [path]
308
309Options:  -v       verbose (print all checked file names)
310          -f       enable checkers that yield many false positives
311          -s sev   only show problems with severity >= sev
312          -i path  ignore subdir or file path
313''' % argv[0]
314    try:
315        gopts, args = getopt.getopt(argv[1:], 'vfs:i:')
316    except getopt.GetoptError:
317        print(usage)
318        return 2
319
320    verbose = False
321    severity = 1
322    ignore = []
323    falsepos = False
324    for opt, val in gopts:
325        if opt == '-v':
326            verbose = True
327        elif opt == '-f':
328            falsepos = True
329        elif opt == '-s':
330            severity = int(val)
331        elif opt == '-i':
332            ignore.append(abspath(val))
333
334    if len(args) == 0:
335        path = '.'
336    elif len(args) == 1:
337        path = args[0]
338    else:
339        print(usage)
340        return 2
341
342    if not exists(path):
343        print('Error: path %s does not exist' % path)
344        return 2
345
346    count = defaultdict(int)
347
348    print("""⚠ rstlint.py is no longer maintained here and will be removed
349⚠ in a future release.
350⚠ Please use https://pypi.org/p/sphinx-lint instead.
351""")
352
353    for root, dirs, files in os.walk(path):
354        # ignore subdirs in ignore list
355        if abspath(root) in ignore:
356            del dirs[:]
357            continue
358
359        for fn in files:
360            fn = join(root, fn)
361            if fn[:2] == './':
362                fn = fn[2:]
363
364            # ignore files in ignore list
365            if abspath(fn) in ignore:
366                continue
367
368            ext = splitext(fn)[1]
369            checkerlist = checkers.get(ext, None)
370            if not checkerlist:
371                continue
372
373            if verbose:
374                print('Checking %s...' % fn)
375
376            try:
377                with open(fn, 'r', encoding='utf-8') as f:
378                    lines = list(f)
379            except (IOError, OSError) as err:
380                print('%s: cannot open: %s' % (fn, err))
381                count[4] += 1
382                continue
383
384            for checker in checkerlist:
385                if checker.falsepositives and not falsepos:
386                    continue
387                csev = checker.severity
388                if csev >= severity:
389                    for lno, msg in checker(fn, lines):
390                        print('[%d] %s:%d: %s' % (csev, fn, lno, msg))
391                        count[csev] += 1
392    if verbose:
393        print()
394    if not count:
395        if severity > 1:
396            print('No problems with severity >= %d found.' % severity)
397        else:
398            print('No problems found.')
399    else:
400        for severity in sorted(count):
401            number = count[severity]
402            print('%d problem%s with severity %d found.' %
403                  (number, number > 1 and 's' or '', severity))
404    return int(bool(count))
405
406
407if __name__ == '__main__':
408    sys.exit(main(sys.argv))
409