162306a36Sopenharmony_ci# SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci# Copyright 2019 Jonathan Corbet <corbet@lwn.net>
362306a36Sopenharmony_ci#
462306a36Sopenharmony_ci# Apply kernel-specific tweaks after the initial document processing
562306a36Sopenharmony_ci# has been done.
662306a36Sopenharmony_ci#
762306a36Sopenharmony_cifrom docutils import nodes
862306a36Sopenharmony_ciimport sphinx
962306a36Sopenharmony_cifrom sphinx import addnodes
1062306a36Sopenharmony_ciif sphinx.version_info[0] < 2 or \
1162306a36Sopenharmony_ci   sphinx.version_info[0] == 2 and sphinx.version_info[1] < 1:
1262306a36Sopenharmony_ci    from sphinx.environment import NoUri
1362306a36Sopenharmony_cielse:
1462306a36Sopenharmony_ci    from sphinx.errors import NoUri
1562306a36Sopenharmony_ciimport re
1662306a36Sopenharmony_cifrom itertools import chain
1762306a36Sopenharmony_ci
1862306a36Sopenharmony_ci#
1962306a36Sopenharmony_ci# Python 2 lacks re.ASCII...
2062306a36Sopenharmony_ci#
2162306a36Sopenharmony_citry:
2262306a36Sopenharmony_ci    ascii_p3 = re.ASCII
2362306a36Sopenharmony_ciexcept AttributeError:
2462306a36Sopenharmony_ci    ascii_p3 = 0
2562306a36Sopenharmony_ci
2662306a36Sopenharmony_ci#
2762306a36Sopenharmony_ci# Regex nastiness.  Of course.
2862306a36Sopenharmony_ci# Try to identify "function()" that's not already marked up some
2962306a36Sopenharmony_ci# other way.  Sphinx doesn't like a lot of stuff right after a
3062306a36Sopenharmony_ci# :c:func: block (i.e. ":c:func:`mmap()`s" flakes out), so the last
3162306a36Sopenharmony_ci# bit tries to restrict matches to things that won't create trouble.
3262306a36Sopenharmony_ci#
3362306a36Sopenharmony_ciRE_function = re.compile(r'\b(([a-zA-Z_]\w+)\(\))', flags=ascii_p3)
3462306a36Sopenharmony_ci
3562306a36Sopenharmony_ci#
3662306a36Sopenharmony_ci# Sphinx 2 uses the same :c:type role for struct, union, enum and typedef
3762306a36Sopenharmony_ci#
3862306a36Sopenharmony_ciRE_generic_type = re.compile(r'\b(struct|union|enum|typedef)\s+([a-zA-Z_]\w+)',
3962306a36Sopenharmony_ci                             flags=ascii_p3)
4062306a36Sopenharmony_ci
4162306a36Sopenharmony_ci#
4262306a36Sopenharmony_ci# Sphinx 3 uses a different C role for each one of struct, union, enum and
4362306a36Sopenharmony_ci# typedef
4462306a36Sopenharmony_ci#
4562306a36Sopenharmony_ciRE_struct = re.compile(r'\b(struct)\s+([a-zA-Z_]\w+)', flags=ascii_p3)
4662306a36Sopenharmony_ciRE_union = re.compile(r'\b(union)\s+([a-zA-Z_]\w+)', flags=ascii_p3)
4762306a36Sopenharmony_ciRE_enum = re.compile(r'\b(enum)\s+([a-zA-Z_]\w+)', flags=ascii_p3)
4862306a36Sopenharmony_ciRE_typedef = re.compile(r'\b(typedef)\s+([a-zA-Z_]\w+)', flags=ascii_p3)
4962306a36Sopenharmony_ci
5062306a36Sopenharmony_ci#
5162306a36Sopenharmony_ci# Detects a reference to a documentation page of the form Documentation/... with
5262306a36Sopenharmony_ci# an optional extension
5362306a36Sopenharmony_ci#
5462306a36Sopenharmony_ciRE_doc = re.compile(r'(\bDocumentation/)?((\.\./)*[\w\-/]+)\.(rst|txt)')
5562306a36Sopenharmony_ci
5662306a36Sopenharmony_ciRE_namespace = re.compile(r'^\s*..\s*c:namespace::\s*(\S+)\s*$')
5762306a36Sopenharmony_ci
5862306a36Sopenharmony_ci#
5962306a36Sopenharmony_ci# Reserved C words that we should skip when cross-referencing
6062306a36Sopenharmony_ci#
6162306a36Sopenharmony_ciSkipnames = [ 'for', 'if', 'register', 'sizeof', 'struct', 'unsigned' ]
6262306a36Sopenharmony_ci
6362306a36Sopenharmony_ci
6462306a36Sopenharmony_ci#
6562306a36Sopenharmony_ci# Many places in the docs refer to common system calls.  It is
6662306a36Sopenharmony_ci# pointless to try to cross-reference them and, as has been known
6762306a36Sopenharmony_ci# to happen, somebody defining a function by these names can lead
6862306a36Sopenharmony_ci# to the creation of incorrect and confusing cross references.  So
6962306a36Sopenharmony_ci# just don't even try with these names.
7062306a36Sopenharmony_ci#
7162306a36Sopenharmony_ciSkipfuncs = [ 'open', 'close', 'read', 'write', 'fcntl', 'mmap',
7262306a36Sopenharmony_ci              'select', 'poll', 'fork', 'execve', 'clone', 'ioctl',
7362306a36Sopenharmony_ci              'socket' ]
7462306a36Sopenharmony_ci
7562306a36Sopenharmony_cic_namespace = ''
7662306a36Sopenharmony_ci
7762306a36Sopenharmony_cidef markup_refs(docname, app, node):
7862306a36Sopenharmony_ci    t = node.astext()
7962306a36Sopenharmony_ci    done = 0
8062306a36Sopenharmony_ci    repl = [ ]
8162306a36Sopenharmony_ci    #
8262306a36Sopenharmony_ci    # Associate each regex with the function that will markup its matches
8362306a36Sopenharmony_ci    #
8462306a36Sopenharmony_ci    markup_func_sphinx2 = {RE_doc: markup_doc_ref,
8562306a36Sopenharmony_ci                           RE_function: markup_c_ref,
8662306a36Sopenharmony_ci                           RE_generic_type: markup_c_ref}
8762306a36Sopenharmony_ci
8862306a36Sopenharmony_ci    markup_func_sphinx3 = {RE_doc: markup_doc_ref,
8962306a36Sopenharmony_ci                           RE_function: markup_func_ref_sphinx3,
9062306a36Sopenharmony_ci                           RE_struct: markup_c_ref,
9162306a36Sopenharmony_ci                           RE_union: markup_c_ref,
9262306a36Sopenharmony_ci                           RE_enum: markup_c_ref,
9362306a36Sopenharmony_ci                           RE_typedef: markup_c_ref}
9462306a36Sopenharmony_ci
9562306a36Sopenharmony_ci    if sphinx.version_info[0] >= 3:
9662306a36Sopenharmony_ci        markup_func = markup_func_sphinx3
9762306a36Sopenharmony_ci    else:
9862306a36Sopenharmony_ci        markup_func = markup_func_sphinx2
9962306a36Sopenharmony_ci
10062306a36Sopenharmony_ci    match_iterators = [regex.finditer(t) for regex in markup_func]
10162306a36Sopenharmony_ci    #
10262306a36Sopenharmony_ci    # Sort all references by the starting position in text
10362306a36Sopenharmony_ci    #
10462306a36Sopenharmony_ci    sorted_matches = sorted(chain(*match_iterators), key=lambda m: m.start())
10562306a36Sopenharmony_ci    for m in sorted_matches:
10662306a36Sopenharmony_ci        #
10762306a36Sopenharmony_ci        # Include any text prior to match as a normal text node.
10862306a36Sopenharmony_ci        #
10962306a36Sopenharmony_ci        if m.start() > done:
11062306a36Sopenharmony_ci            repl.append(nodes.Text(t[done:m.start()]))
11162306a36Sopenharmony_ci
11262306a36Sopenharmony_ci        #
11362306a36Sopenharmony_ci        # Call the function associated with the regex that matched this text and
11462306a36Sopenharmony_ci        # append its return to the text
11562306a36Sopenharmony_ci        #
11662306a36Sopenharmony_ci        repl.append(markup_func[m.re](docname, app, m))
11762306a36Sopenharmony_ci
11862306a36Sopenharmony_ci        done = m.end()
11962306a36Sopenharmony_ci    if done < len(t):
12062306a36Sopenharmony_ci        repl.append(nodes.Text(t[done:]))
12162306a36Sopenharmony_ci    return repl
12262306a36Sopenharmony_ci
12362306a36Sopenharmony_ci#
12462306a36Sopenharmony_ci# Keep track of cross-reference lookups that failed so we don't have to
12562306a36Sopenharmony_ci# do them again.
12662306a36Sopenharmony_ci#
12762306a36Sopenharmony_cifailed_lookups = { }
12862306a36Sopenharmony_cidef failure_seen(target):
12962306a36Sopenharmony_ci    return (target) in failed_lookups
13062306a36Sopenharmony_cidef note_failure(target):
13162306a36Sopenharmony_ci    failed_lookups[target] = True
13262306a36Sopenharmony_ci
13362306a36Sopenharmony_ci#
13462306a36Sopenharmony_ci# In sphinx3 we can cross-reference to C macro and function, each one with its
13562306a36Sopenharmony_ci# own C role, but both match the same regex, so we try both.
13662306a36Sopenharmony_ci#
13762306a36Sopenharmony_cidef markup_func_ref_sphinx3(docname, app, match):
13862306a36Sopenharmony_ci    cdom = app.env.domains['c']
13962306a36Sopenharmony_ci    #
14062306a36Sopenharmony_ci    # Go through the dance of getting an xref out of the C domain
14162306a36Sopenharmony_ci    #
14262306a36Sopenharmony_ci    base_target = match.group(2)
14362306a36Sopenharmony_ci    target_text = nodes.Text(match.group(0))
14462306a36Sopenharmony_ci    xref = None
14562306a36Sopenharmony_ci    possible_targets = [base_target]
14662306a36Sopenharmony_ci    # Check if this document has a namespace, and if so, try
14762306a36Sopenharmony_ci    # cross-referencing inside it first.
14862306a36Sopenharmony_ci    if c_namespace:
14962306a36Sopenharmony_ci        possible_targets.insert(0, c_namespace + "." + base_target)
15062306a36Sopenharmony_ci
15162306a36Sopenharmony_ci    if base_target not in Skipnames:
15262306a36Sopenharmony_ci        for target in possible_targets:
15362306a36Sopenharmony_ci            if (target not in Skipfuncs) and not failure_seen(target):
15462306a36Sopenharmony_ci                lit_text = nodes.literal(classes=['xref', 'c', 'c-func'])
15562306a36Sopenharmony_ci                lit_text += target_text
15662306a36Sopenharmony_ci                pxref = addnodes.pending_xref('', refdomain = 'c',
15762306a36Sopenharmony_ci                                              reftype = 'function',
15862306a36Sopenharmony_ci                                              reftarget = target,
15962306a36Sopenharmony_ci                                              modname = None,
16062306a36Sopenharmony_ci                                              classname = None)
16162306a36Sopenharmony_ci                #
16262306a36Sopenharmony_ci                # XXX The Latex builder will throw NoUri exceptions here,
16362306a36Sopenharmony_ci                # work around that by ignoring them.
16462306a36Sopenharmony_ci                #
16562306a36Sopenharmony_ci                try:
16662306a36Sopenharmony_ci                    xref = cdom.resolve_xref(app.env, docname, app.builder,
16762306a36Sopenharmony_ci                                             'function', target, pxref,
16862306a36Sopenharmony_ci                                             lit_text)
16962306a36Sopenharmony_ci                except NoUri:
17062306a36Sopenharmony_ci                    xref = None
17162306a36Sopenharmony_ci
17262306a36Sopenharmony_ci                if xref:
17362306a36Sopenharmony_ci                    return xref
17462306a36Sopenharmony_ci                note_failure(target)
17562306a36Sopenharmony_ci
17662306a36Sopenharmony_ci    return target_text
17762306a36Sopenharmony_ci
17862306a36Sopenharmony_cidef markup_c_ref(docname, app, match):
17962306a36Sopenharmony_ci    class_str = {# Sphinx 2 only
18062306a36Sopenharmony_ci                 RE_function: 'c-func',
18162306a36Sopenharmony_ci                 RE_generic_type: 'c-type',
18262306a36Sopenharmony_ci                 # Sphinx 3+ only
18362306a36Sopenharmony_ci                 RE_struct: 'c-struct',
18462306a36Sopenharmony_ci                 RE_union: 'c-union',
18562306a36Sopenharmony_ci                 RE_enum: 'c-enum',
18662306a36Sopenharmony_ci                 RE_typedef: 'c-type',
18762306a36Sopenharmony_ci                 }
18862306a36Sopenharmony_ci    reftype_str = {# Sphinx 2 only
18962306a36Sopenharmony_ci                   RE_function: 'function',
19062306a36Sopenharmony_ci                   RE_generic_type: 'type',
19162306a36Sopenharmony_ci                   # Sphinx 3+ only
19262306a36Sopenharmony_ci                   RE_struct: 'struct',
19362306a36Sopenharmony_ci                   RE_union: 'union',
19462306a36Sopenharmony_ci                   RE_enum: 'enum',
19562306a36Sopenharmony_ci                   RE_typedef: 'type',
19662306a36Sopenharmony_ci                   }
19762306a36Sopenharmony_ci
19862306a36Sopenharmony_ci    cdom = app.env.domains['c']
19962306a36Sopenharmony_ci    #
20062306a36Sopenharmony_ci    # Go through the dance of getting an xref out of the C domain
20162306a36Sopenharmony_ci    #
20262306a36Sopenharmony_ci    base_target = match.group(2)
20362306a36Sopenharmony_ci    target_text = nodes.Text(match.group(0))
20462306a36Sopenharmony_ci    xref = None
20562306a36Sopenharmony_ci    possible_targets = [base_target]
20662306a36Sopenharmony_ci    # Check if this document has a namespace, and if so, try
20762306a36Sopenharmony_ci    # cross-referencing inside it first.
20862306a36Sopenharmony_ci    if c_namespace:
20962306a36Sopenharmony_ci        possible_targets.insert(0, c_namespace + "." + base_target)
21062306a36Sopenharmony_ci
21162306a36Sopenharmony_ci    if base_target not in Skipnames:
21262306a36Sopenharmony_ci        for target in possible_targets:
21362306a36Sopenharmony_ci            if not (match.re == RE_function and target in Skipfuncs):
21462306a36Sopenharmony_ci                lit_text = nodes.literal(classes=['xref', 'c', class_str[match.re]])
21562306a36Sopenharmony_ci                lit_text += target_text
21662306a36Sopenharmony_ci                pxref = addnodes.pending_xref('', refdomain = 'c',
21762306a36Sopenharmony_ci                                              reftype = reftype_str[match.re],
21862306a36Sopenharmony_ci                                              reftarget = target, modname = None,
21962306a36Sopenharmony_ci                                              classname = None)
22062306a36Sopenharmony_ci                #
22162306a36Sopenharmony_ci                # XXX The Latex builder will throw NoUri exceptions here,
22262306a36Sopenharmony_ci                # work around that by ignoring them.
22362306a36Sopenharmony_ci                #
22462306a36Sopenharmony_ci                try:
22562306a36Sopenharmony_ci                    xref = cdom.resolve_xref(app.env, docname, app.builder,
22662306a36Sopenharmony_ci                                             reftype_str[match.re], target, pxref,
22762306a36Sopenharmony_ci                                             lit_text)
22862306a36Sopenharmony_ci                except NoUri:
22962306a36Sopenharmony_ci                    xref = None
23062306a36Sopenharmony_ci
23162306a36Sopenharmony_ci                if xref:
23262306a36Sopenharmony_ci                    return xref
23362306a36Sopenharmony_ci
23462306a36Sopenharmony_ci    return target_text
23562306a36Sopenharmony_ci
23662306a36Sopenharmony_ci#
23762306a36Sopenharmony_ci# Try to replace a documentation reference of the form Documentation/... with a
23862306a36Sopenharmony_ci# cross reference to that page
23962306a36Sopenharmony_ci#
24062306a36Sopenharmony_cidef markup_doc_ref(docname, app, match):
24162306a36Sopenharmony_ci    stddom = app.env.domains['std']
24262306a36Sopenharmony_ci    #
24362306a36Sopenharmony_ci    # Go through the dance of getting an xref out of the std domain
24462306a36Sopenharmony_ci    #
24562306a36Sopenharmony_ci    absolute = match.group(1)
24662306a36Sopenharmony_ci    target = match.group(2)
24762306a36Sopenharmony_ci    if absolute:
24862306a36Sopenharmony_ci       target = "/" + target
24962306a36Sopenharmony_ci    xref = None
25062306a36Sopenharmony_ci    pxref = addnodes.pending_xref('', refdomain = 'std', reftype = 'doc',
25162306a36Sopenharmony_ci                                  reftarget = target, modname = None,
25262306a36Sopenharmony_ci                                  classname = None, refexplicit = False)
25362306a36Sopenharmony_ci    #
25462306a36Sopenharmony_ci    # XXX The Latex builder will throw NoUri exceptions here,
25562306a36Sopenharmony_ci    # work around that by ignoring them.
25662306a36Sopenharmony_ci    #
25762306a36Sopenharmony_ci    try:
25862306a36Sopenharmony_ci        xref = stddom.resolve_xref(app.env, docname, app.builder, 'doc',
25962306a36Sopenharmony_ci                                   target, pxref, None)
26062306a36Sopenharmony_ci    except NoUri:
26162306a36Sopenharmony_ci        xref = None
26262306a36Sopenharmony_ci    #
26362306a36Sopenharmony_ci    # Return the xref if we got it; otherwise just return the plain text.
26462306a36Sopenharmony_ci    #
26562306a36Sopenharmony_ci    if xref:
26662306a36Sopenharmony_ci        return xref
26762306a36Sopenharmony_ci    else:
26862306a36Sopenharmony_ci        return nodes.Text(match.group(0))
26962306a36Sopenharmony_ci
27062306a36Sopenharmony_cidef get_c_namespace(app, docname):
27162306a36Sopenharmony_ci    source = app.env.doc2path(docname)
27262306a36Sopenharmony_ci    with open(source) as f:
27362306a36Sopenharmony_ci        for l in f:
27462306a36Sopenharmony_ci            match = RE_namespace.search(l)
27562306a36Sopenharmony_ci            if match:
27662306a36Sopenharmony_ci                return match.group(1)
27762306a36Sopenharmony_ci    return ''
27862306a36Sopenharmony_ci
27962306a36Sopenharmony_cidef auto_markup(app, doctree, name):
28062306a36Sopenharmony_ci    global c_namespace
28162306a36Sopenharmony_ci    c_namespace = get_c_namespace(app, name)
28262306a36Sopenharmony_ci    def text_but_not_a_reference(node):
28362306a36Sopenharmony_ci        # The nodes.literal test catches ``literal text``, its purpose is to
28462306a36Sopenharmony_ci        # avoid adding cross-references to functions that have been explicitly
28562306a36Sopenharmony_ci        # marked with cc:func:.
28662306a36Sopenharmony_ci        if not isinstance(node, nodes.Text) or isinstance(node.parent, nodes.literal):
28762306a36Sopenharmony_ci            return False
28862306a36Sopenharmony_ci
28962306a36Sopenharmony_ci        child_of_reference = False
29062306a36Sopenharmony_ci        parent = node.parent
29162306a36Sopenharmony_ci        while parent:
29262306a36Sopenharmony_ci            if isinstance(parent, nodes.Referential):
29362306a36Sopenharmony_ci                child_of_reference = True
29462306a36Sopenharmony_ci                break
29562306a36Sopenharmony_ci            parent = parent.parent
29662306a36Sopenharmony_ci        return not child_of_reference
29762306a36Sopenharmony_ci
29862306a36Sopenharmony_ci    #
29962306a36Sopenharmony_ci    # This loop could eventually be improved on.  Someday maybe we
30062306a36Sopenharmony_ci    # want a proper tree traversal with a lot of awareness of which
30162306a36Sopenharmony_ci    # kinds of nodes to prune.  But this works well for now.
30262306a36Sopenharmony_ci    #
30362306a36Sopenharmony_ci    for para in doctree.traverse(nodes.paragraph):
30462306a36Sopenharmony_ci        for node in para.traverse(condition=text_but_not_a_reference):
30562306a36Sopenharmony_ci            node.parent.replace(node, markup_refs(name, app, node))
30662306a36Sopenharmony_ci
30762306a36Sopenharmony_cidef setup(app):
30862306a36Sopenharmony_ci    app.connect('doctree-resolved', auto_markup)
30962306a36Sopenharmony_ci    return {
31062306a36Sopenharmony_ci        'parallel_read_safe': True,
31162306a36Sopenharmony_ci        'parallel_write_safe': True,
31262306a36Sopenharmony_ci        }
313