1#!/usr/bin/env python3 2# -*- coding: utf-8 -*- 3 4# Check for stylistic and formal issues in .rst and .py 5# files included in the documentation. 6# 7# 01/2009, Georg Brandl 8 9# TODO: - wrong versions in versionadded/changed 10# - wrong markup after versionchanged directive 11 12import os 13import re 14import sys 15import getopt 16from string import ascii_letters 17from os.path import join, splitext, abspath, exists 18from collections import defaultdict 19 20directives = [ 21 # standard docutils ones 22 'admonition', 'attention', 'caution', 'class', 'compound', 'container', 23 'contents', 'csv-table', 'danger', 'date', 'default-role', 'epigraph', 24 'error', 'figure', 'footer', 'header', 'highlights', 'hint', 'image', 25 'important', 'include', 'line-block', 'list-table', 'meta', 'note', 26 'parsed-literal', 'pull-quote', 'raw', 'replace', 27 'restructuredtext-test-directive', 'role', 'rubric', 'sectnum', 'sidebar', 28 'table', 'target-notes', 'tip', 'title', 'topic', 'unicode', 'warning', 29 # Sphinx and Python docs custom ones 30 'acks', 'attribute', 'autoattribute', 'autoclass', 'autodata', 31 'autoexception', 'autofunction', 'automethod', 'automodule', 32 'availability', 'centered', 'cfunction', 'class', 'classmethod', 'cmacro', 33 'cmdoption', 'cmember', 'code-block', 'confval', 'cssclass', 'ctype', 34 'currentmodule', 'cvar', 'data', 'decorator', 'decoratormethod', 35 'deprecated-removed', 'deprecated(?!-removed)', 'describe', 'directive', 36 'doctest', 'envvar', 'event', 'exception', 'function', 'glossary', 37 'highlight', 'highlightlang', 'impl-detail', 'index', 'literalinclude', 38 'method', 'miscnews', 'module', 'moduleauthor', 'opcode', 'pdbcommand', 39 'productionlist', 'program', 'role', 'sectionauthor', 'seealso', 40 'sourcecode', 'staticmethod', 'tabularcolumns', 'testcode', 'testoutput', 41 'testsetup', 'toctree', 'todo', 'todolist', 'versionadded', 42 'versionchanged' 43] 44 45roles = [ 46 "(?<!py):class:", 47 "(?<!:c|py):func:", 48 "(?<!py):meth:", 49 "(?<!:py):mod:", 50 ":exc:", 51 ":issue:", 52 ":attr:", 53 ":c:func:", 54 ":ref:", 55 ":const:", 56 ":term:", 57 "(?<!:c|py):data:", 58 ":keyword:", 59 ":file:", 60 ":pep:", 61 ":c:type:", 62 ":c:member:", 63 ":option:", 64 ":rfc:", 65 ":envvar:", 66 ":c:data:", 67 ":source:", 68 ":mailheader:", 69 ":program:", 70 ":c:macro:", 71 ":dfn:", 72 ":kbd:", 73 ":command:", 74 ":mimetype:", 75 ":opcode:", 76 ":manpage:", 77 ":py:data:", 78 ":RFC:", 79 ":pdbcmd:", 80 ":abbr:", 81 ":samp:", 82 ":token:", 83 ":PEP:", 84 ":sup:", 85 ":py:class:", 86 ":menuselection:", 87 ":doc:", 88 ":sub:", 89 ":py:meth:", 90 ":newsgroup:", 91 ":code:", 92 ":py:func:", 93 ":makevar:", 94 ":guilabel:", 95 ":title-reference:", 96 ":py:mod:", 97 ":download:", 98 ":2to3fixer:", 99] 100 101all_directives = "(" + "|".join(directives) + ")" 102all_roles = "(" + "|".join(roles) + ")" 103 104# Find comments that looks like a directive, like: 105# .. versionchanged 3.6 106# or 107# .. versionchanged: 3.6 108# as it should be: 109# .. versionchanged:: 3.6 110seems_directive_re = re.compile(r"(?<!\.)\.\. %s([^a-z:]|:(?!:))" % all_directives) 111 112# Find directive prefixed with three dots instead of two, like: 113# ... versionchanged:: 3.6 114# instead of: 115# .. versionchanged:: 3.6 116three_dot_directive_re = re.compile(r"\.\.\. %s::" % all_directives) 117 118# Find role used with double backticks instead of simple backticks like: 119# :const:``None`` 120# instead of: 121# :const:`None` 122double_backtick_role = re.compile(r"(?<!``)%s``" % all_roles) 123 124 125# Find role used with no backticks instead of simple backticks like: 126# :const:None 127# instead of: 128# :const:`None` 129role_with_no_backticks = re.compile(r"%s[^` ]" % all_roles) 130 131# Find role glued with another word like: 132# the:c:func:`PyThreadState_LeaveTracing` function. 133# instead of: 134# the :c:func:`PyThreadState_LeaveTracing` function. 135role_glued_with_word = re.compile(r"[a-zA-Z]%s" % all_roles) 136 137default_role_re = re.compile(r"(^| )`\w([^`]*?\w)?`($| )") 138leaked_markup_re = re.compile(r"[a-z]::\s|`|\.\.\s*\w+:") 139 140 141checkers = {} 142 143checker_props = {'severity': 1, 'falsepositives': False} 144 145 146def checker(*suffixes, **kwds): 147 """Decorator to register a function as a checker.""" 148 def deco(func): 149 for suffix in suffixes: 150 checkers.setdefault(suffix, []).append(func) 151 for prop in checker_props: 152 setattr(func, prop, kwds.get(prop, checker_props[prop])) 153 return func 154 return deco 155 156 157@checker('.py', severity=4) 158def check_syntax(fn, lines): 159 """Check Python examples for valid syntax.""" 160 code = ''.join(lines) 161 if '\r' in code: 162 if os.name != 'nt': 163 yield 0, '\\r in code file' 164 code = code.replace('\r', '') 165 try: 166 compile(code, fn, 'exec') 167 except SyntaxError as err: 168 yield err.lineno, 'not compilable: %s' % err 169 170 171@checker('.rst', severity=2) 172def check_suspicious_constructs(fn, lines): 173 """Check for suspicious reST constructs.""" 174 inprod = False 175 for lno, line in enumerate(lines, start=1): 176 if seems_directive_re.search(line): 177 yield lno, "comment seems to be intended as a directive" 178 if three_dot_directive_re.search(line): 179 yield lno, "directive should start with two dots, not three." 180 if double_backtick_role.search(line): 181 yield lno, "role use a single backtick, double backtick found." 182 if role_with_no_backticks.search(line): 183 yield lno, "role use a single backtick, no backtick found." 184 if role_glued_with_word.search(line): 185 yield lno, "missing space before role" 186 if ".. productionlist::" in line: 187 inprod = True 188 elif not inprod and default_role_re.search(line): 189 yield lno, "default role used" 190 elif inprod and not line.strip(): 191 inprod = False 192 193 194@checker('.py', '.rst') 195def check_whitespace(fn, lines): 196 """Check for whitespace and line length issues.""" 197 for lno, line in enumerate(lines): 198 if '\r' in line: 199 yield lno+1, '\\r in line' 200 if '\t' in line: 201 yield lno+1, 'OMG TABS!!!1' 202 if line[:-1].rstrip(' \t') != line[:-1]: 203 yield lno+1, 'trailing whitespace' 204 205 206@checker('.rst', severity=0) 207def check_line_length(fn, lines): 208 """Check for line length; this checker is not run by default.""" 209 for lno, line in enumerate(lines): 210 if len(line) > 81: 211 # don't complain about tables, links and function signatures 212 if line.lstrip()[0] not in '+|' and \ 213 'http://' not in line and \ 214 not line.lstrip().startswith(('.. function', 215 '.. method', 216 '.. cfunction')): 217 yield lno+1, "line too long" 218 219 220@checker('.html', severity=2, falsepositives=True) 221def check_leaked_markup(fn, lines): 222 """Check HTML files for leaked reST markup; this only works if 223 the HTML files have been built. 224 """ 225 for lno, line in enumerate(lines): 226 if leaked_markup_re.search(line): 227 yield lno+1, 'possibly leaked markup: %r' % line 228 229 230def hide_literal_blocks(lines): 231 """Tool to remove literal blocks from given lines. 232 233 It yields empty lines in place of blocks, so line numbers are 234 still meaningful. 235 """ 236 in_block = False 237 for line in lines: 238 if line.endswith("::\n"): 239 in_block = True 240 elif in_block: 241 if line == "\n" or line.startswith(" "): 242 line = "\n" 243 else: 244 in_block = False 245 yield line 246 247 248def type_of_explicit_markup(line): 249 if re.match(fr'\.\. {all_directives}::', line): 250 return 'directive' 251 if re.match(r'\.\. \[[0-9]+\] ', line): 252 return 'footnote' 253 if re.match(r'\.\. \[[^\]]+\] ', line): 254 return 'citation' 255 if re.match(r'\.\. _.*[^_]: ', line): 256 return 'target' 257 if re.match(r'\.\. \|[^\|]*\| ', line): 258 return 'substitution_definition' 259 return 'comment' 260 261 262def hide_comments(lines): 263 """Tool to remove comments from given lines. 264 265 It yields empty lines in place of comments, so line numbers are 266 still meaningful. 267 """ 268 in_multiline_comment = False 269 for line in lines: 270 if line == "..\n": 271 in_multiline_comment = True 272 elif in_multiline_comment: 273 if line == "\n" or line.startswith(" "): 274 line = "\n" 275 else: 276 in_multiline_comment = False 277 if line.startswith(".. ") and type_of_explicit_markup(line) == 'comment': 278 line = "\n" 279 yield line 280 281 282 283@checker(".rst", severity=2) 284def check_missing_surrogate_space_on_plural(fn, lines): 285 r"""Check for missing 'backslash-space' between a code sample a letter. 286 287 Good: ``Point``\ s 288 Bad: ``Point``s 289 """ 290 in_code_sample = False 291 check_next_one = False 292 for lno, line in enumerate(hide_comments(hide_literal_blocks(lines))): 293 tokens = line.split("``") 294 for token_no, token in enumerate(tokens): 295 if check_next_one: 296 if token[0] in ascii_letters: 297 yield lno + 1, f"Missing backslash-space between code sample and {token!r}." 298 check_next_one = False 299 if token_no == len(tokens) - 1: 300 continue 301 if in_code_sample: 302 check_next_one = True 303 in_code_sample = not in_code_sample 304 305def main(argv): 306 usage = '''\ 307Usage: %s [-v] [-f] [-s sev] [-i path]* [path] 308 309Options: -v verbose (print all checked file names) 310 -f enable checkers that yield many false positives 311 -s sev only show problems with severity >= sev 312 -i path ignore subdir or file path 313''' % argv[0] 314 try: 315 gopts, args = getopt.getopt(argv[1:], 'vfs:i:') 316 except getopt.GetoptError: 317 print(usage) 318 return 2 319 320 verbose = False 321 severity = 1 322 ignore = [] 323 falsepos = False 324 for opt, val in gopts: 325 if opt == '-v': 326 verbose = True 327 elif opt == '-f': 328 falsepos = True 329 elif opt == '-s': 330 severity = int(val) 331 elif opt == '-i': 332 ignore.append(abspath(val)) 333 334 if len(args) == 0: 335 path = '.' 336 elif len(args) == 1: 337 path = args[0] 338 else: 339 print(usage) 340 return 2 341 342 if not exists(path): 343 print('Error: path %s does not exist' % path) 344 return 2 345 346 count = defaultdict(int) 347 348 print("""⚠ rstlint.py is no longer maintained here and will be removed 349⚠ in a future release. 350⚠ Please use https://pypi.org/p/sphinx-lint instead. 351""") 352 353 for root, dirs, files in os.walk(path): 354 # ignore subdirs in ignore list 355 if abspath(root) in ignore: 356 del dirs[:] 357 continue 358 359 for fn in files: 360 fn = join(root, fn) 361 if fn[:2] == './': 362 fn = fn[2:] 363 364 # ignore files in ignore list 365 if abspath(fn) in ignore: 366 continue 367 368 ext = splitext(fn)[1] 369 checkerlist = checkers.get(ext, None) 370 if not checkerlist: 371 continue 372 373 if verbose: 374 print('Checking %s...' % fn) 375 376 try: 377 with open(fn, 'r', encoding='utf-8') as f: 378 lines = list(f) 379 except (IOError, OSError) as err: 380 print('%s: cannot open: %s' % (fn, err)) 381 count[4] += 1 382 continue 383 384 for checker in checkerlist: 385 if checker.falsepositives and not falsepos: 386 continue 387 csev = checker.severity 388 if csev >= severity: 389 for lno, msg in checker(fn, lines): 390 print('[%d] %s:%d: %s' % (csev, fn, lno, msg)) 391 count[csev] += 1 392 if verbose: 393 print() 394 if not count: 395 if severity > 1: 396 print('No problems with severity >= %d found.' % severity) 397 else: 398 print('No problems found.') 399 else: 400 for severity in sorted(count): 401 number = count[severity] 402 print('%d problem%s with severity %d found.' % 403 (number, number > 1 and 's' or '', severity)) 404 return int(bool(count)) 405 406 407if __name__ == '__main__': 408 sys.exit(main(sys.argv)) 409