1e5c31af7Sopenharmony_ci#!/usr/bin/python3
2e5c31af7Sopenharmony_ci#
3e5c31af7Sopenharmony_ci# Copyright 2022-2024 The Khronos Group Inc.
4e5c31af7Sopenharmony_ci# SPDX-License-Identifier: Apache-2.0
5e5c31af7Sopenharmony_ci
6e5c31af7Sopenharmony_ci"""Used to convert files from the asciidoctor spec tree to Antora module
7e5c31af7Sopenharmony_ciformat. Success is highly dependent on strict adherence to Vulkan spec
8e5c31af7Sopenharmony_ciauthoring conventions.
9e5c31af7Sopenharmony_ci
10e5c31af7Sopenharmony_ciUsage: `antora-prep.py [-root path] -component path files`
11e5c31af7Sopenharmony_ci
12e5c31af7Sopenharmony_ci- `-root` is the root path (repository root, usually) relative to which spec
13e5c31af7Sopenharmony_ci  files are processed. Defaults to current directory if not specified.
14e5c31af7Sopenharmony_ci- `-component` is the path to the module and component in which converted
15e5c31af7Sopenharmony_ci  files are written (e.g. the component directory under which pages/,
16e5c31af7Sopenharmony_ci  partials/, images/, etc. are located).
17e5c31af7Sopenharmony_ci- `files` are asciidoc source files from the spec to convert.
18e5c31af7Sopenharmony_ci
19e5c31af7Sopenharmony_ciImage files are linked from the component 'images' directory
20e5c31af7Sopenharmony_ci
21e5c31af7Sopenharmony_ciAsciidoc markup files (.adoc) are scanned for the first title markup and
22e5c31af7Sopenharmony_ciclassified as partials or pages depending on whether it is a top-level title
23e5c31af7Sopenharmony_cior not. All .adoc files are rewritten to the component 'partials' directory, to
24e5c31af7Sopenharmony_ciallow transclusion of pages to work (otherwise the transclusions would also
25e5c31af7Sopenharmony_cihave to be rewritten).
26e5c31af7Sopenharmony_ci
27e5c31af7Sopenharmony_cipages then have additional markup injected immediately following the page
28e5c31af7Sopenharmony_cititle to set custom attributes needed for the build. pages are then
29e5c31af7Sopenharmony_cisymbolically linked from the component 'pages' directory to the actual
30e5c31af7Sopenharmony_cirewritten file in the 'partials' directory to follow Antora conventions.
31e5c31af7Sopenharmony_ci"""
32e5c31af7Sopenharmony_ci
33e5c31af7Sopenharmony_ci# For error and file-loading interfaces only
34e5c31af7Sopenharmony_ciimport argparse
35e5c31af7Sopenharmony_ciimport importlib
36e5c31af7Sopenharmony_ciimport os
37e5c31af7Sopenharmony_ciimport re
38e5c31af7Sopenharmony_ciimport sys
39e5c31af7Sopenharmony_cifrom generator import enquote
40e5c31af7Sopenharmony_cifrom reflib import loadFile, logDiag, logWarn, logErr, setLogFile, getBranch
41e5c31af7Sopenharmony_cifrom pathlib import Path
42e5c31af7Sopenharmony_ci
43e5c31af7Sopenharmony_cititleAnchorPat = re.compile(r'^\[\[(?P<anchor>[^,]+).*\]\]$')
44e5c31af7Sopenharmony_cititlePat = re.compile(r'^[=#] (?P<title>[A-Z].*)')
45e5c31af7Sopenharmony_cisubtitlePat = re.compile(r'^[=#]{2,} (?P<title>[A-Z].*)')
46e5c31af7Sopenharmony_ci
47e5c31af7Sopenharmony_ciPages = 'pages'
48e5c31af7Sopenharmony_ciPartials = 'partials'
49e5c31af7Sopenharmony_ciImages = 'images'
50e5c31af7Sopenharmony_ci
51e5c31af7Sopenharmony_cidef undefquote(s):
52e5c31af7Sopenharmony_ci    """Quote a string for JavaScript, or return the JavaScript undefined
53e5c31af7Sopenharmony_ci       value."""
54e5c31af7Sopenharmony_ci
55e5c31af7Sopenharmony_ci    if s is not None:
56e5c31af7Sopenharmony_ci        return enquote(s)
57e5c31af7Sopenharmony_ci    else:
58e5c31af7Sopenharmony_ci        return 'undefined'
59e5c31af7Sopenharmony_ci
60e5c31af7Sopenharmony_ci
61e5c31af7Sopenharmony_cidef mapAnchor(anchor, title, pageMap, xrefMap, closeAnchor):
62e5c31af7Sopenharmony_ci    """Rewrite a <<anchor{, title}>> xref -> xref:pagemap#anchor[{title}]
63e5c31af7Sopenharmony_ci        - anchor - anchor name
64e5c31af7Sopenharmony_ci        - title - xref description or '' if not specified, in which case the
65e5c31af7Sopenharmony_ci          anchor text from the xrefMap is used if available
66e5c31af7Sopenharmony_ci        - closeAnchor - True if closing >> is on this line, False otherwise
67e5c31af7Sopenharmony_ci        - pageMap, xrefMap - per rewriteXrefs below
68e5c31af7Sopenharmony_ci    """
69e5c31af7Sopenharmony_ci
70e5c31af7Sopenharmony_ci    #@if anchor == 'features-shaderStorageImageReadWithoutFormat':
71e5c31af7Sopenharmony_ci    #@    import pdb
72e5c31af7Sopenharmony_ci    #@    pdb.set_trace()
73e5c31af7Sopenharmony_ci
74e5c31af7Sopenharmony_ci    # Determine which page anchor this anchor comes from
75e5c31af7Sopenharmony_ci    # If it cannot be determined, use the unmapped anchor
76e5c31af7Sopenharmony_ci    #@ Simplify the page anchor if pageName == current page
77e5c31af7Sopenharmony_ci    try:
78e5c31af7Sopenharmony_ci        if title != '' or not closeAnchor:
79e5c31af7Sopenharmony_ci            # Either a (possibly up to a line break) title is supplied, or
80e5c31af7Sopenharmony_ci            # title is on the next line
81e5c31af7Sopenharmony_ci            (pageAnchor, _) = xrefMap[anchor]
82e5c31af7Sopenharmony_ci        else:
83e5c31af7Sopenharmony_ci            # No explicit title. Infer one from anchor and xrefMap.
84e5c31af7Sopenharmony_ci            (pageAnchor, title) = xrefMap[anchor]
85e5c31af7Sopenharmony_ci
86e5c31af7Sopenharmony_ci            # If the title is *still* empty, make a note of it and just use
87e5c31af7Sopenharmony_ci            # the anchor name
88e5c31af7Sopenharmony_ci            if title == '':
89e5c31af7Sopenharmony_ci                print(f'No title found for anchor {anchor}', file=sys.stderr)
90e5c31af7Sopenharmony_ci                title = anchor
91e5c31af7Sopenharmony_ci
92e5c31af7Sopenharmony_ci        # Page the page anchor comes from
93e5c31af7Sopenharmony_ci        pageName = pageMap[pageAnchor]
94e5c31af7Sopenharmony_ci        print(f'mapAnchor: anchor {anchor} pageAnchor {pageAnchor} -> pageName = {pageName}')
95e5c31af7Sopenharmony_ci
96e5c31af7Sopenharmony_ci        xref = f'{pageName}#{anchor}'
97e5c31af7Sopenharmony_ci    except:
98e5c31af7Sopenharmony_ci        print(f'Cannot determine which page {anchor} comes from, passing through to Antora intact', file=sys.stderr)
99e5c31af7Sopenharmony_ci        xref = f'{anchor}'
100e5c31af7Sopenharmony_ci
101e5c31af7Sopenharmony_ci    # Remove extraneous whitespace
102e5c31af7Sopenharmony_ci    title = ' '.join(title.split())
103e5c31af7Sopenharmony_ci
104e5c31af7Sopenharmony_ci    if closeAnchor:
105e5c31af7Sopenharmony_ci        return f'xref:{xref}[{title}]'
106e5c31af7Sopenharmony_ci    else:
107e5c31af7Sopenharmony_ci        return f'xref:{xref}[{title}'
108e5c31af7Sopenharmony_ci
109e5c31af7Sopenharmony_cidef replaceAnchorText(match, pageMap, xrefMap):
110e5c31af7Sopenharmony_ci    """Rewrite <<anchor,text>> to xref:newanchor[text]
111e5c31af7Sopenharmony_ci        - match - match object, \1 = anchor, \2 = text
112e5c31af7Sopenharmony_ci        - pageMap, xrefMap - per rewriteXrefs below
113e5c31af7Sopenharmony_ci    """
114e5c31af7Sopenharmony_ci
115e5c31af7Sopenharmony_ci    anchor = match.group(1)
116e5c31af7Sopenharmony_ci    text = match.group(2)
117e5c31af7Sopenharmony_ci
118e5c31af7Sopenharmony_ci    return mapAnchor(anchor, text, pageMap, xrefMap, closeAnchor=True)
119e5c31af7Sopenharmony_ci
120e5c31af7Sopenharmony_cidef replaceAnchorOnly(match, pageMap, xrefMap):
121e5c31af7Sopenharmony_ci    """Rewrite <<anchor>> to xref:newanchor[]
122e5c31af7Sopenharmony_ci        - match - match object, \1 = anchor
123e5c31af7Sopenharmony_ci        - pageMap, xrefMap - per rewriteXrefs below
124e5c31af7Sopenharmony_ci    """
125e5c31af7Sopenharmony_ci
126e5c31af7Sopenharmony_ci    anchor = match.group(1)
127e5c31af7Sopenharmony_ci
128e5c31af7Sopenharmony_ci    return mapAnchor(anchor, '', pageMap, xrefMap, closeAnchor=True)
129e5c31af7Sopenharmony_ci
130e5c31af7Sopenharmony_cidef replaceAnchorTrailingText(match, pageMap, xrefMap):
131e5c31af7Sopenharmony_ci    """Rewrite <<anchor, to xref:newanchor[
132e5c31af7Sopenharmony_ci        - match - match object, \1 = anchor, \2 = text (may be empty)
133e5c31af7Sopenharmony_ci        - pageMap, xrefMap - per rewriteXrefs below
134e5c31af7Sopenharmony_ci    """
135e5c31af7Sopenharmony_ci
136e5c31af7Sopenharmony_ci    anchor = match.group(1)
137e5c31af7Sopenharmony_ci    text = match.group(2)
138e5c31af7Sopenharmony_ci
139e5c31af7Sopenharmony_ci    return mapAnchor(anchor, text, pageMap, xrefMap, closeAnchor=False)
140e5c31af7Sopenharmony_ci
141e5c31af7Sopenharmony_ciclass DocFile:
142e5c31af7Sopenharmony_ci    """Information about a markup file being converted"""
143e5c31af7Sopenharmony_ci
144e5c31af7Sopenharmony_ci    def __init__(self):
145e5c31af7Sopenharmony_ci        """Constructor
146e5c31af7Sopenharmony_ci           - lines - text of file as list of strings
147e5c31af7Sopenharmony_ci           - root - common base directory for src files
148e5c31af7Sopenharmony_ci           - component - path to component directory for outputs
149e5c31af7Sopenharmony_ci           - srcpath - absolute path to file source
150e5c31af7Sopenharmony_ci           - relpath - path to file source relative to root
151e5c31af7Sopenharmony_ci           - dstpath - path to output file destination
152e5c31af7Sopenharmony_ci           - dstlink - path to a an alias (symlink to) dstpath, used for
153e5c31af7Sopenharmony_ci             files that need to be in both partials and pages directories.
154e5c31af7Sopenharmony_ci           - category - file type - Pages, Partials, or Images. These are
155e5c31af7Sopenharmony_ci             string variables containing the corresponding component
156e5c31af7Sopenharmony_ci             subdirectory name.
157e5c31af7Sopenharmony_ci           - title - page title for Pages, else ''
158e5c31af7Sopenharmony_ci           - titleAnchor - page title anchor for Pages, else ''
159e5c31af7Sopenharmony_ci           - anchors - asciidoc anchors found in the file
160e5c31af7Sopenharmony_ci           - includes - asciidoc includes found in the file
161e5c31af7Sopenharmony_ci           - pageMap - dictionary mapping a page anchor to a source file
162e5c31af7Sopenharmony_ci             relpath
163e5c31af7Sopenharmony_ci           - xrefMap - dictionary mapping an anchor within a page to a page
164e5c31af7Sopenharmony_ci             anchor
165e5c31af7Sopenharmony_ci        """
166e5c31af7Sopenharmony_ci
167e5c31af7Sopenharmony_ci        self.lines = None
168e5c31af7Sopenharmony_ci        self.root = None
169e5c31af7Sopenharmony_ci        self.component = None
170e5c31af7Sopenharmony_ci        self.srcpath = None
171e5c31af7Sopenharmony_ci        self.relpath = None
172e5c31af7Sopenharmony_ci        self.dstpath = None
173e5c31af7Sopenharmony_ci        self.dstlink = None
174e5c31af7Sopenharmony_ci        self.category = None
175e5c31af7Sopenharmony_ci        self.title = ''
176e5c31af7Sopenharmony_ci        self.titleAnchor = ''
177e5c31af7Sopenharmony_ci        self.anchors = set()
178e5c31af7Sopenharmony_ci        self.includes = set()
179e5c31af7Sopenharmony_ci
180e5c31af7Sopenharmony_ci        self.pageMap = {}
181e5c31af7Sopenharmony_ci        self.xrefMap = {}
182e5c31af7Sopenharmony_ci
183e5c31af7Sopenharmony_ci    def findTitle(self):
184e5c31af7Sopenharmony_ci        """Find category (Pages or Partials) and title, for Pages, in a
185e5c31af7Sopenharmony_ci           .adoc markup file.
186e5c31af7Sopenharmony_ci
187e5c31af7Sopenharmony_ci           Heuristic is to search the beginning of the file for a top-level
188e5c31af7Sopenharmony_ci           asciidoc title, preceded immediately by an anchor for the page.
189e5c31af7Sopenharmony_ci
190e5c31af7Sopenharmony_ci           Returns (category, title, titleLine, titleAnchor) with '' for a
191e5c31af7Sopenharmony_ci           Partials title and '' if no title anchor is found."""
192e5c31af7Sopenharmony_ci
193e5c31af7Sopenharmony_ci        """Chapter title block must be within this many lines of start of file"""
194e5c31af7Sopenharmony_ci        maxLines = min(30, len(self.lines))
195e5c31af7Sopenharmony_ci
196e5c31af7Sopenharmony_ci        """Default, if page title and/or page anchor not found"""
197e5c31af7Sopenharmony_ci        titleAnchor = ''
198e5c31af7Sopenharmony_ci        title = ''
199e5c31af7Sopenharmony_ci
200e5c31af7Sopenharmony_ci        for lineno in range(0, maxLines):
201e5c31af7Sopenharmony_ci            line = self.lines[lineno]
202e5c31af7Sopenharmony_ci
203e5c31af7Sopenharmony_ci            # Look for the first anchor, which must precede the title to
204e5c31af7Sopenharmony_ci            # apply to it (really, must precede it by exactly one line).
205e5c31af7Sopenharmony_ci            match = titleAnchorPat.match(line)
206e5c31af7Sopenharmony_ci            if match is not None:
207e5c31af7Sopenharmony_ci                titleAnchor = match.group('anchor')
208e5c31af7Sopenharmony_ci                continue
209e5c31af7Sopenharmony_ci
210e5c31af7Sopenharmony_ci            # If we find a top-level title, it is a page.
211e5c31af7Sopenharmony_ci            match = titlePat.match(line)
212e5c31af7Sopenharmony_ci            if match is not None:
213e5c31af7Sopenharmony_ci                return (Pages, match.group('title'), lineno, titleAnchor)
214e5c31af7Sopenharmony_ci
215e5c31af7Sopenharmony_ci            # If we find a second-level or above title, it is a partial
216e5c31af7Sopenharmony_ci            match = subtitlePat.match(line)
217e5c31af7Sopenharmony_ci            if match is not None:
218e5c31af7Sopenharmony_ci                return (Partials, match.group('title'), lineno, titleAnchor)
219e5c31af7Sopenharmony_ci
220e5c31af7Sopenharmony_ci        # If we do not find a match in the first maxLines lines, assume it
221e5c31af7Sopenharmony_ci        # is a partial.
222e5c31af7Sopenharmony_ci        return(Partials, 'NO TITLE FOUND', -1, titleAnchor)
223e5c31af7Sopenharmony_ci
224e5c31af7Sopenharmony_ci    def populate(self,
225e5c31af7Sopenharmony_ci                 filename,
226e5c31af7Sopenharmony_ci                 root,
227e5c31af7Sopenharmony_ci                 component):
228e5c31af7Sopenharmony_ci        """Populate data structures given file content and location.
229e5c31af7Sopenharmony_ci
230e5c31af7Sopenharmony_ci           - filename - file to scan
231e5c31af7Sopenharmony_ci           - root - absolute path to root under which all source files are
232e5c31af7Sopenharmony_ci             read
233e5c31af7Sopenharmony_ci           - component - absolute path to module / component directory under
234e5c31af7Sopenharmony_ci             which all destination files are written
235e5c31af7Sopenharmony_ci        """
236e5c31af7Sopenharmony_ci
237e5c31af7Sopenharmony_ci        # Load file content
238e5c31af7Sopenharmony_ci        self.srcpath = os.path.abspath(filename)
239e5c31af7Sopenharmony_ci        self.lines, _ = loadFile(self.srcpath)
240e5c31af7Sopenharmony_ci        if self.lines is None:
241e5c31af7Sopenharmony_ci            raise RuntimeError(f'No such file {self.srcpath}')
242e5c31af7Sopenharmony_ci
243e5c31af7Sopenharmony_ci        # Miscellaneous relevant paths
244e5c31af7Sopenharmony_ci        self.root = root
245e5c31af7Sopenharmony_ci        self.relpath = os.path.relpath(self.srcpath, root)
246e5c31af7Sopenharmony_ci        self.component = component
247e5c31af7Sopenharmony_ci
248e5c31af7Sopenharmony_ci        # Determine file category.
249e5c31af7Sopenharmony_ci        # Only .adoc files are candidates for pages, which is verified by
250e5c31af7Sopenharmony_ci        # looking at the file header for a top-level title.
251e5c31af7Sopenharmony_ci        # .svg .jpg .png are always images
252e5c31af7Sopenharmony_ci        # Anything else is a partial
253e5c31af7Sopenharmony_ci        (_, fileext) = os.path.splitext(filename)
254e5c31af7Sopenharmony_ci
255e5c31af7Sopenharmony_ci        # Defaults
256e5c31af7Sopenharmony_ci        self.title = ''
257e5c31af7Sopenharmony_ci        self.titleLine = 0
258e5c31af7Sopenharmony_ci        self.titleAnchor = None
259e5c31af7Sopenharmony_ci
260e5c31af7Sopenharmony_ci        if fileext in (('.svg', '.jpg', '.png')):
261e5c31af7Sopenharmony_ci            self.category = Images
262e5c31af7Sopenharmony_ci        elif fileext == '.adoc':
263e5c31af7Sopenharmony_ci            (self.category,
264e5c31af7Sopenharmony_ci             self.title,
265e5c31af7Sopenharmony_ci             self.titleLine,
266e5c31af7Sopenharmony_ci             self.titleAnchor) = self.findTitle()
267e5c31af7Sopenharmony_ci        else:
268e5c31af7Sopenharmony_ci            self.category = Partials
269e5c31af7Sopenharmony_ci
270e5c31af7Sopenharmony_ci        # Determine destination path based on category
271e5c31af7Sopenharmony_ci        # images/ are treated specially since there is only a single
272e5c31af7Sopenharmony_ci        # directory and the component directory is already named Images.
273e5c31af7Sopenharmony_ci        if self.category == Partials:
274e5c31af7Sopenharmony_ci            self.dstpath = Path(self.component) / Partials / self.relpath
275e5c31af7Sopenharmony_ci        elif self.category == Pages:
276e5c31af7Sopenharmony_ci            # Save the page in partials/, link from pages/
277e5c31af7Sopenharmony_ci            self.dstpath = Path(self.component) / Partials / self.relpath
278e5c31af7Sopenharmony_ci            self.dstlink = Path(self.component) / Pages / self.relpath
279e5c31af7Sopenharmony_ci        else:
280e5c31af7Sopenharmony_ci            # Images go under images/, not under images/images/
281e5c31af7Sopenharmony_ci            # This could fail if there were ever top-level images but as all
282e5c31af7Sopenharmony_ci            # images used in the spec are required to be specified relative
283e5c31af7Sopenharmony_ci            # to {images}, it is OK.
284e5c31af7Sopenharmony_ci            self.dstpath = Path(self.component) / self.relpath
285e5c31af7Sopenharmony_ci
286e5c31af7Sopenharmony_ci
287e5c31af7Sopenharmony_ci    def rewriteXrefs(self, pageMap = {}, xrefMap = {}):
288e5c31af7Sopenharmony_ci        """Rewrite asciidoc <<>> xrefs into Antora xref: xrefs, including
289e5c31af7Sopenharmony_ci           altering the xref target.
290e5c31af7Sopenharmony_ci
291e5c31af7Sopenharmony_ci           - pageMap - map from page anchors to page names
292e5c31af7Sopenharmony_ci           - xrefMap - map from anchors within a page to the page anchor"""
293e5c31af7Sopenharmony_ci
294e5c31af7Sopenharmony_ci        # pageMap and xrefMap are used in functions called by re.subn, so
295e5c31af7Sopenharmony_ci        # save them in members.
296e5c31af7Sopenharmony_ci        self.pageMap = pageMap
297e5c31af7Sopenharmony_ci        self.xrefMap = xrefMap
298e5c31af7Sopenharmony_ci
299e5c31af7Sopenharmony_ci        # Xref markup may be broken across lines, and may or may not include
300e5c31af7Sopenharmony_ci        # anchor text. Track whether the closing >> is being looked for at
301e5c31af7Sopenharmony_ci        # start of line, or not.
302e5c31af7Sopenharmony_ci        withinXref = False
303e5c31af7Sopenharmony_ci
304e5c31af7Sopenharmony_ci        for lineno in range(0, len(self.lines)):
305e5c31af7Sopenharmony_ci            line = self.lines[lineno]
306e5c31af7Sopenharmony_ci
307e5c31af7Sopenharmony_ci            if withinXref:
308e5c31af7Sopenharmony_ci                # Could use line.replace, but that does not return a match
309e5c31af7Sopenharmony_ci                # count, so we cannot tell if the '>>' is missing.
310e5c31af7Sopenharmony_ci                (line, count) = re.subn(r'>>', r']', line, count=1)
311e5c31af7Sopenharmony_ci                if count == 0:
312e5c31af7Sopenharmony_ci                    print(f'WARNING: No closing >> found on line {lineno} of {self.relpath}', file=sys.stderr)
313e5c31af7Sopenharmony_ci                elif line[0] != ' ' and self.lines[lineno-1][-1] not in '[ ':
314e5c31af7Sopenharmony_ci                    # Add whitespace corresponding to crushed-out newline on
315e5c31af7Sopenharmony_ci                    # previous line, so title words do not run together.
316e5c31af7Sopenharmony_ci                    self.lines[lineno-1] += ' '
317e5c31af7Sopenharmony_ci                withinXref = False
318e5c31af7Sopenharmony_ci
319e5c31af7Sopenharmony_ci            # Now look for all xrefs starting on this line and remap them,
320e5c31af7Sopenharmony_ci            # including remapping the anchor.
321e5c31af7Sopenharmony_ci
322e5c31af7Sopenharmony_ci            # First, complete xrefs with alt-text (<<anchor, text>>)
323e5c31af7Sopenharmony_ci            (line, count) = re.subn(r'<<([^,>]*),([^>]+)>>',
324e5c31af7Sopenharmony_ci                lambda match: replaceAnchorText(match, pageMap, xrefMap),
325e5c31af7Sopenharmony_ci                line)
326e5c31af7Sopenharmony_ci
327e5c31af7Sopenharmony_ci            # Next, complete xrefs without alt-text (<<anchor>>)
328e5c31af7Sopenharmony_ci            (line, count) = re.subn(r'<<([^,>]*)>>',
329e5c31af7Sopenharmony_ci                lambda match: replaceAnchorOnly(match, pageMap, xrefMap),
330e5c31af7Sopenharmony_ci                line)
331e5c31af7Sopenharmony_ci
332e5c31af7Sopenharmony_ci            # Finally, if there is a trailing '<<anchor,' at EOL, remap it
333e5c31af7Sopenharmony_ci            # and set the flag so the terminating '>>' on the next line will
334e5c31af7Sopenharmony_ci            # be mapped into an xref closing ']'.
335e5c31af7Sopenharmony_ci            (line, count) = re.subn(r'<<([^,>]*),([^>]*)$',
336e5c31af7Sopenharmony_ci                lambda match: replaceAnchorTrailingText(match, pageMap, xrefMap),
337e5c31af7Sopenharmony_ci                line)
338e5c31af7Sopenharmony_ci            if count > 0:
339e5c31af7Sopenharmony_ci                withinXref = True
340e5c31af7Sopenharmony_ci
341e5c31af7Sopenharmony_ci            self.lines[lineno] = line
342e5c31af7Sopenharmony_ci
343e5c31af7Sopenharmony_ci    def __str__(self):
344e5c31af7Sopenharmony_ci        lines = [
345e5c31af7Sopenharmony_ci            f'Input file {filename}: {len(self.lines)} lines',
346e5c31af7Sopenharmony_ci            f'root = {self.root} component = {self.component} relpath = {self.relpath}',
347e5c31af7Sopenharmony_ci            f'category = {self.category} dstpath = {self.dstpath}',
348e5c31af7Sopenharmony_ci            f'title = {self.title}',
349e5c31af7Sopenharmony_ci            f'titleAnchor = {self.titleAnchor}',
350e5c31af7Sopenharmony_ci        ]
351e5c31af7Sopenharmony_ci        return '\n'.join(lines)
352e5c31af7Sopenharmony_ci
353e5c31af7Sopenharmony_ci    def removeDestination(self, path, text, overwrite):
354e5c31af7Sopenharmony_ci        """Remove a destination file, if it exists and overwrite is true.
355e5c31af7Sopenharmony_ci           Ensure the destination directory exists.
356e5c31af7Sopenharmony_ci
357e5c31af7Sopenharmony_ci            path - file pathname
358e5c31af7Sopenharmony_ci            text - descriptive text for errors
359e5c31af7Sopenharmony_ci            overwrite - if True, replace existing output file
360e5c31af7Sopenharmony_ci        """
361e5c31af7Sopenharmony_ci
362e5c31af7Sopenharmony_ci        if os.path.exists(path):
363e5c31af7Sopenharmony_ci            if overwrite:
364e5c31af7Sopenharmony_ci                # print(f'Removing {text}: {path}')
365e5c31af7Sopenharmony_ci                os.remove(path)
366e5c31af7Sopenharmony_ci            else:
367e5c31af7Sopenharmony_ci                raise RuntimeError(f'Will not overwrite {text}: {path}')
368e5c31af7Sopenharmony_ci
369e5c31af7Sopenharmony_ci        dir = os.path.dirname(path)
370e5c31af7Sopenharmony_ci        if not os.path.exists(dir):
371e5c31af7Sopenharmony_ci            # print(f'Creating {text} directory {dir}')
372e5c31af7Sopenharmony_ci            os.makedirs(dir)
373e5c31af7Sopenharmony_ci
374e5c31af7Sopenharmony_ci    def rewriteFile(self, overwrite = True, pageHeaders = None):
375e5c31af7Sopenharmony_ci        """Write source file to component directory. Images are just symlinked
376e5c31af7Sopenharmony_ci           to the external file. Pages are rewritten to Partials, then
377e5c31af7Sopenharmony_ci           symlinked to Pages.
378e5c31af7Sopenharmony_ci
379e5c31af7Sopenharmony_ci           - overwrite - if True, replace existing output files
380e5c31af7Sopenharmony_ci           - pageHeaders - if not None, a list of strings to inject
381e5c31af7Sopenharmony_ci             following the chapter heading in each page
382e5c31af7Sopenharmony_ci
383e5c31af7Sopenharmony_ci           <<>>-style xrefs are assumed to be rewritten prior to calling
384e5c31af7Sopenharmony_ci           rewriteFile.
385e5c31af7Sopenharmony_ci
386e5c31af7Sopenharmony_ci           May still need to rewrite custom macros.
387e5c31af7Sopenharmony_ci        """
388e5c31af7Sopenharmony_ci
389e5c31af7Sopenharmony_ci        self.removeDestination(self.dstpath, 'destination file', overwrite)
390e5c31af7Sopenharmony_ci
391e5c31af7Sopenharmony_ci        if self.category == Images:
392e5c31af7Sopenharmony_ci            # Just symlink destination image to source
393e5c31af7Sopenharmony_ci            # print(f'Symlinking {self.dstpath} -> {self.srcpath}')
394e5c31af7Sopenharmony_ci            os.symlink(self.srcpath, self.dstpath)
395e5c31af7Sopenharmony_ci        elif self.category == Partials:
396e5c31af7Sopenharmony_ci            self.writeFile(self.dstpath)
397e5c31af7Sopenharmony_ci        elif self.category == Pages:
398e5c31af7Sopenharmony_ci            if pageHeaders is not None:
399e5c31af7Sopenharmony_ci                # Add blank lines before and after the pageHeaders to avoid
400e5c31af7Sopenharmony_ci                # coalescing with file content.
401e5c31af7Sopenharmony_ci                lines = self.lines[0:self.titleLine+1]
402e5c31af7Sopenharmony_ci                lines += ['\n'] + pageHeaders + ['\n']
403e5c31af7Sopenharmony_ci                lines = lines + self.lines[self.titleLine+1:]
404e5c31af7Sopenharmony_ci                self.lines = lines
405e5c31af7Sopenharmony_ci
406e5c31af7Sopenharmony_ci            # Inject page headers immediately following page title
407e5c31af7Sopenharmony_ci
408e5c31af7Sopenharmony_ci            self.writeFile(self.dstpath)
409e5c31af7Sopenharmony_ci
410e5c31af7Sopenharmony_ci            if self.dstlink is None:
411e5c31af7Sopenharmony_ci                RuntimeError(f'Wrote Page {self.dstpath} to Partials, but no Pages link supplied')
412e5c31af7Sopenharmony_ci            else:
413e5c31af7Sopenharmony_ci                self.removeDestination(self.dstlink, 'destination link', overwrite)
414e5c31af7Sopenharmony_ci                os.symlink(self.dstpath, self.dstlink)
415e5c31af7Sopenharmony_ci
416e5c31af7Sopenharmony_ci    def writeFile(self, path):
417e5c31af7Sopenharmony_ci        """Write self.lines[] to file at specified path"""
418e5c31af7Sopenharmony_ci
419e5c31af7Sopenharmony_ci        try:
420e5c31af7Sopenharmony_ci            fp = open(path, 'w', encoding='utf8')
421e5c31af7Sopenharmony_ci        except:
422e5c31af7Sopenharmony_ci            raise RuntimeError(f'Cannot open output file {path}')
423e5c31af7Sopenharmony_ci
424e5c31af7Sopenharmony_ci        for line in self.lines:
425e5c31af7Sopenharmony_ci            print(line, file=fp, end='')
426e5c31af7Sopenharmony_ci
427e5c31af7Sopenharmony_ci        fp.close()
428e5c31af7Sopenharmony_ci
429e5c31af7Sopenharmony_cidef testHarness():
430e5c31af7Sopenharmony_ci    def printFile(label, lines):
431e5c31af7Sopenharmony_ci        print(label)
432e5c31af7Sopenharmony_ci        print('------------------')
433e5c31af7Sopenharmony_ci        for line in lines:
434e5c31af7Sopenharmony_ci            print(line)
435e5c31af7Sopenharmony_ci
436e5c31af7Sopenharmony_ci    # Test harness
437e5c31af7Sopenharmony_ci    docFile = DocFile()
438e5c31af7Sopenharmony_ci    docFile.lines = [
439e5c31af7Sopenharmony_ci        '<<ext,ext chapter>> <<ext-label,',
440e5c31af7Sopenharmony_ci        'ext chapter/label>>',
441e5c31af7Sopenharmony_ci        '<<core>>, <<core-label, core chapter/label',
442e5c31af7Sopenharmony_ci        '>>'
443e5c31af7Sopenharmony_ci    ]
444e5c31af7Sopenharmony_ci
445e5c31af7Sopenharmony_ci    pageMap = {
446e5c31af7Sopenharmony_ci        'ext'  : 'file/ext.adoc',
447e5c31af7Sopenharmony_ci        'core' : 'file/core.adoc',
448e5c31af7Sopenharmony_ci    }
449e5c31af7Sopenharmony_ci    xrefMap = {
450e5c31af7Sopenharmony_ci        'ext'       : [ 'ext', '' ],
451e5c31af7Sopenharmony_ci        'ext-label' : [ 'ext', 'LABELLED ext-label' ],
452e5c31af7Sopenharmony_ci        'core'      : [ 'core', 'Core Title' ],
453e5c31af7Sopenharmony_ci        'core-label': [ 'core', 'Core Label Title' ],
454e5c31af7Sopenharmony_ci    }
455e5c31af7Sopenharmony_ci
456e5c31af7Sopenharmony_ci    printFile('Original File', docFile.lines)
457e5c31af7Sopenharmony_ci
458e5c31af7Sopenharmony_ci    docFile.rewriteXrefs(pageMap, xrefMap)
459e5c31af7Sopenharmony_ci
460e5c31af7Sopenharmony_ci    printFile('Edited File', docFile.lines)
461e5c31af7Sopenharmony_ci
462e5c31af7Sopenharmony_ciif __name__ == '__main__':
463e5c31af7Sopenharmony_ci    parser = argparse.ArgumentParser()
464e5c31af7Sopenharmony_ci
465e5c31af7Sopenharmony_ci    parser.add_argument('-root', action='store', dest='root',
466e5c31af7Sopenharmony_ci                        default=os.getcwd(),
467e5c31af7Sopenharmony_ci                        help='Specify root directory under which files are located (default current directory)')
468e5c31af7Sopenharmony_ci    parser.add_argument('-pageHeaders', action='store', dest='pageHeaders',
469e5c31af7Sopenharmony_ci                        default=None,
470e5c31af7Sopenharmony_ci                        help='Specify file whose contents are injected after title of each converted page')
471e5c31af7Sopenharmony_ci    parser.add_argument('-component', action='store', dest='component',
472e5c31af7Sopenharmony_ci                        required=True,
473e5c31af7Sopenharmony_ci                        help='Specify module / component directory in which converted files are written')
474e5c31af7Sopenharmony_ci    #parser.add_argument('-htmlspec', action='store', dest='htmlspec',
475e5c31af7Sopenharmony_ci    #                    default=None, required=False,
476e5c31af7Sopenharmony_ci    #                    help='Specify HTML of generated spec to extract anchor mapping from')
477e5c31af7Sopenharmony_ci    parser.add_argument('-xrefpath', action='store', dest='xrefpath',
478e5c31af7Sopenharmony_ci                        default=None, required=False,
479e5c31af7Sopenharmony_ci                        help='Specify path to xrefMap.py containing map of anchors to chapter anchors')
480e5c31af7Sopenharmony_ci    parser.add_argument('-pagemappath', action='store', dest='pagemappath',
481e5c31af7Sopenharmony_ci                        default=None, required=False,
482e5c31af7Sopenharmony_ci                        help='Specify path to output pageMap.cjs containing map of anchors to chapter anchors')
483e5c31af7Sopenharmony_ci    parser.add_argument('-filelist', action='store',
484e5c31af7Sopenharmony_ci                        default=None, required=False,
485e5c31af7Sopenharmony_ci                        help='Specify file containing a list of filenames to convert, one/line')
486e5c31af7Sopenharmony_ci    parser.add_argument('files', metavar='filename', nargs='*',
487e5c31af7Sopenharmony_ci                        help='Specify name of a single file to convert')
488e5c31af7Sopenharmony_ci
489e5c31af7Sopenharmony_ci    args = parser.parse_args()
490e5c31af7Sopenharmony_ci
491e5c31af7Sopenharmony_ci    args.root = os.path.abspath(args.root)
492e5c31af7Sopenharmony_ci    args.component = os.path.abspath(args.component)
493e5c31af7Sopenharmony_ci
494e5c31af7Sopenharmony_ci    if args.pageHeaders is not None:
495e5c31af7Sopenharmony_ci        args.pageHeaders, _ = loadFile(args.pageHeaders)
496e5c31af7Sopenharmony_ci
497e5c31af7Sopenharmony_ci    if False:
498e5c31af7Sopenharmony_ci        testHarness()
499e5c31af7Sopenharmony_ci        sys.exit(0)
500e5c31af7Sopenharmony_ci
501e5c31af7Sopenharmony_ci    # Initialize dictionaries
502e5c31af7Sopenharmony_ci    pageInfo = {}
503e5c31af7Sopenharmony_ci    pageMap = {}
504e5c31af7Sopenharmony_ci
505e5c31af7Sopenharmony_ci    # The xrefmap is imported from the 'xrefMap' module, if it exists
506e5c31af7Sopenharmony_ci    try:
507e5c31af7Sopenharmony_ci        if args.xrefpath is not None:
508e5c31af7Sopenharmony_ci            sys.path.append(args.xrefpath)
509e5c31af7Sopenharmony_ci        from xrefMap import xrefMap
510e5c31af7Sopenharmony_ci    except:
511e5c31af7Sopenharmony_ci        print('WARNING: No module xrefMap containing xrefMap dictionary', file=sys.stderr)
512e5c31af7Sopenharmony_ci        xrefMap = {}
513e5c31af7Sopenharmony_ci
514e5c31af7Sopenharmony_ci    # If a file containing a list of files was specified, add each one.
515e5c31af7Sopenharmony_ci    # Could try using os.walk() instead, but that is very slow.
516e5c31af7Sopenharmony_ci    if args.filelist is not None:
517e5c31af7Sopenharmony_ci        count = 0
518e5c31af7Sopenharmony_ci        lines, _ = loadFile(args.filelist)
519e5c31af7Sopenharmony_ci        if lines is None:
520e5c31af7Sopenharmony_ci            raise RuntimeError(f'Error reading filelist {args.filelist}')
521e5c31af7Sopenharmony_ci        for line in lines:
522e5c31af7Sopenharmony_ci            path = line.rstrip()
523e5c31af7Sopenharmony_ci            if path[0].isalpha() and path.endswith('.adoc'):
524e5c31af7Sopenharmony_ci                args.files.append(path)
525e5c31af7Sopenharmony_ci                count = count + 1
526e5c31af7Sopenharmony_ci        print(f'Read {count} paths from {args.filelist}')
527e5c31af7Sopenharmony_ci
528e5c31af7Sopenharmony_ci    for filename in args.files:
529e5c31af7Sopenharmony_ci        # Create data structure representing the file.
530e5c31af7Sopenharmony_ci        docFile = DocFile()
531e5c31af7Sopenharmony_ci        docFile.populate(filename = filename,
532e5c31af7Sopenharmony_ci                         root = args.root,
533e5c31af7Sopenharmony_ci                         component = args.component)
534e5c31af7Sopenharmony_ci        # print(docFile, '\n')
535e5c31af7Sopenharmony_ci
536e5c31af7Sopenharmony_ci        # Save information about the file under its relpath
537e5c31af7Sopenharmony_ci        pageInfo[docFile.relpath] = docFile
538e5c31af7Sopenharmony_ci
539e5c31af7Sopenharmony_ci        # Save mapping from page anchor to its relpath
540e5c31af7Sopenharmony_ci        if docFile.titleAnchor is not None:
541e5c31af7Sopenharmony_ci            pageMap[docFile.titleAnchor] = docFile.relpath
542e5c31af7Sopenharmony_ci
543e5c31af7Sopenharmony_ci    # All files have been read and classified.
544e5c31af7Sopenharmony_ci    # Rewrite them in memory.
545e5c31af7Sopenharmony_ci
546e5c31af7Sopenharmony_ci    for key in pageInfo:
547e5c31af7Sopenharmony_ci        # Look for <<>>-style anchors and rewrite them to Antora xref-style
548e5c31af7Sopenharmony_ci        # anchors using the pageMap (of top-level anchors to page names) and
549e5c31af7Sopenharmony_ci        # xrefmap (of anchors to top-level anchors).
550e5c31af7Sopenharmony_ci        docFile = pageInfo[key]
551e5c31af7Sopenharmony_ci
552e5c31af7Sopenharmony_ci        ## print(f'*** Rewriting {key}')
553e5c31af7Sopenharmony_ci        ## print(docFile, '\n')
554e5c31af7Sopenharmony_ci
555e5c31af7Sopenharmony_ci        docFile.rewriteXrefs(pageMap, xrefMap)
556e5c31af7Sopenharmony_ci        docFile.rewriteFile(overwrite = True, pageHeaders = args.pageHeaders)
557e5c31af7Sopenharmony_ci
558e5c31af7Sopenharmony_ci    # Write the pageMap to a .cjs file for use in the Antora build's
559e5c31af7Sopenharmony_ci    # specmacros extensions. The xrefMap is already written in JS form.
560e5c31af7Sopenharmony_ci    if args.pagemappath is not None:
561e5c31af7Sopenharmony_ci        try:
562e5c31af7Sopenharmony_ci            fp = open(args.pagemappath, 'w', encoding='utf8')
563e5c31af7Sopenharmony_ci        except:
564e5c31af7Sopenharmony_ci            raise RuntimeError(f'Cannot open output pageMap.cjs file {args.pagemappath}')
565e5c31af7Sopenharmony_ci
566e5c31af7Sopenharmony_ci        print('exports.pageMap = {', file=fp)
567e5c31af7Sopenharmony_ci        for pageAnchor in sorted(pageMap):
568e5c31af7Sopenharmony_ci            pageName = pageMap[pageAnchor]
569e5c31af7Sopenharmony_ci            print(f'    {undefquote(pageAnchor)} : {undefquote(pageName)},', file=fp)
570e5c31af7Sopenharmony_ci        print('}', file=fp)
571e5c31af7Sopenharmony_ci
572e5c31af7Sopenharmony_ci        fp.close()
573e5c31af7Sopenharmony_ci
574e5c31af7Sopenharmony_ci##        if not os.path.exists(args.xrefmap):
575e5c31af7Sopenharmony_ci##            raise UserWarning(f'Specified xrefmap {args.xrefmap} does not exist')
576e5c31af7Sopenharmony_ci##        if args.xrefmap[-3:] != '.py':
577e5c31af7Sopenharmony_ci##            raise UserWarning(f'Specified xrefmap {args.xrefmap} is not a .py file')
578e5c31af7Sopenharmony_ci##
579e5c31af7Sopenharmony_ci##        abspath = os.path.abspath(args.xrefmap)
580e5c31af7Sopenharmony_ci##        xrefdir = os.path.dirname(os.path.abspath(args.xrefmap))
581e5c31af7Sopenharmony_ci##        sys.path.append(dir)
582e5c31af7Sopenharmony_ci##
583e5c31af7Sopenharmony_ci##        xrefbase = os.path.split(args.xrefmap)[1]
584e5c31af7Sopenharmony_ci##        xrefbase = os.path.splitext(xrefbase)[0]
585e5c31af7Sopenharmony_ci##
586e5c31af7Sopenharmony_ci##            raise UserWarning(f'Specified xrefmap {args.xrefmap} does not exist')
587