1e5c31af7Sopenharmony_ci#!/usr/bin/python3 2e5c31af7Sopenharmony_ci# 3e5c31af7Sopenharmony_ci# Copyright 2022-2024 The Khronos Group Inc. 4e5c31af7Sopenharmony_ci# SPDX-License-Identifier: Apache-2.0 5e5c31af7Sopenharmony_ci 6e5c31af7Sopenharmony_ci"""Used to convert files from the asciidoctor spec tree to Antora module 7e5c31af7Sopenharmony_ciformat. Success is highly dependent on strict adherence to Vulkan spec 8e5c31af7Sopenharmony_ciauthoring conventions. 9e5c31af7Sopenharmony_ci 10e5c31af7Sopenharmony_ciUsage: `antora-prep.py [-root path] -component path files` 11e5c31af7Sopenharmony_ci 12e5c31af7Sopenharmony_ci- `-root` is the root path (repository root, usually) relative to which spec 13e5c31af7Sopenharmony_ci files are processed. Defaults to current directory if not specified. 14e5c31af7Sopenharmony_ci- `-component` is the path to the module and component in which converted 15e5c31af7Sopenharmony_ci files are written (e.g. the component directory under which pages/, 16e5c31af7Sopenharmony_ci partials/, images/, etc. are located). 17e5c31af7Sopenharmony_ci- `files` are asciidoc source files from the spec to convert. 18e5c31af7Sopenharmony_ci 19e5c31af7Sopenharmony_ciImage files are linked from the component 'images' directory 20e5c31af7Sopenharmony_ci 21e5c31af7Sopenharmony_ciAsciidoc markup files (.adoc) are scanned for the first title markup and 22e5c31af7Sopenharmony_ciclassified as partials or pages depending on whether it is a top-level title 23e5c31af7Sopenharmony_cior not. All .adoc files are rewritten to the component 'partials' directory, to 24e5c31af7Sopenharmony_ciallow transclusion of pages to work (otherwise the transclusions would also 25e5c31af7Sopenharmony_cihave to be rewritten). 26e5c31af7Sopenharmony_ci 27e5c31af7Sopenharmony_cipages then have additional markup injected immediately following the page 28e5c31af7Sopenharmony_cititle to set custom attributes needed for the build. pages are then 29e5c31af7Sopenharmony_cisymbolically linked from the component 'pages' directory to the actual 30e5c31af7Sopenharmony_cirewritten file in the 'partials' directory to follow Antora conventions. 31e5c31af7Sopenharmony_ci""" 32e5c31af7Sopenharmony_ci 33e5c31af7Sopenharmony_ci# For error and file-loading interfaces only 34e5c31af7Sopenharmony_ciimport argparse 35e5c31af7Sopenharmony_ciimport importlib 36e5c31af7Sopenharmony_ciimport os 37e5c31af7Sopenharmony_ciimport re 38e5c31af7Sopenharmony_ciimport sys 39e5c31af7Sopenharmony_cifrom generator import enquote 40e5c31af7Sopenharmony_cifrom reflib import loadFile, logDiag, logWarn, logErr, setLogFile, getBranch 41e5c31af7Sopenharmony_cifrom pathlib import Path 42e5c31af7Sopenharmony_ci 43e5c31af7Sopenharmony_cititleAnchorPat = re.compile(r'^\[\[(?P<anchor>[^,]+).*\]\]$') 44e5c31af7Sopenharmony_cititlePat = re.compile(r'^[=#] (?P<title>[A-Z].*)') 45e5c31af7Sopenharmony_cisubtitlePat = re.compile(r'^[=#]{2,} (?P<title>[A-Z].*)') 46e5c31af7Sopenharmony_ci 47e5c31af7Sopenharmony_ciPages = 'pages' 48e5c31af7Sopenharmony_ciPartials = 'partials' 49e5c31af7Sopenharmony_ciImages = 'images' 50e5c31af7Sopenharmony_ci 51e5c31af7Sopenharmony_cidef undefquote(s): 52e5c31af7Sopenharmony_ci """Quote a string for JavaScript, or return the JavaScript undefined 53e5c31af7Sopenharmony_ci value.""" 54e5c31af7Sopenharmony_ci 55e5c31af7Sopenharmony_ci if s is not None: 56e5c31af7Sopenharmony_ci return enquote(s) 57e5c31af7Sopenharmony_ci else: 58e5c31af7Sopenharmony_ci return 'undefined' 59e5c31af7Sopenharmony_ci 60e5c31af7Sopenharmony_ci 61e5c31af7Sopenharmony_cidef mapAnchor(anchor, title, pageMap, xrefMap, closeAnchor): 62e5c31af7Sopenharmony_ci """Rewrite a <<anchor{, title}>> xref -> xref:pagemap#anchor[{title}] 63e5c31af7Sopenharmony_ci - anchor - anchor name 64e5c31af7Sopenharmony_ci - title - xref description or '' if not specified, in which case the 65e5c31af7Sopenharmony_ci anchor text from the xrefMap is used if available 66e5c31af7Sopenharmony_ci - closeAnchor - True if closing >> is on this line, False otherwise 67e5c31af7Sopenharmony_ci - pageMap, xrefMap - per rewriteXrefs below 68e5c31af7Sopenharmony_ci """ 69e5c31af7Sopenharmony_ci 70e5c31af7Sopenharmony_ci #@if anchor == 'features-shaderStorageImageReadWithoutFormat': 71e5c31af7Sopenharmony_ci #@ import pdb 72e5c31af7Sopenharmony_ci #@ pdb.set_trace() 73e5c31af7Sopenharmony_ci 74e5c31af7Sopenharmony_ci # Determine which page anchor this anchor comes from 75e5c31af7Sopenharmony_ci # If it cannot be determined, use the unmapped anchor 76e5c31af7Sopenharmony_ci #@ Simplify the page anchor if pageName == current page 77e5c31af7Sopenharmony_ci try: 78e5c31af7Sopenharmony_ci if title != '' or not closeAnchor: 79e5c31af7Sopenharmony_ci # Either a (possibly up to a line break) title is supplied, or 80e5c31af7Sopenharmony_ci # title is on the next line 81e5c31af7Sopenharmony_ci (pageAnchor, _) = xrefMap[anchor] 82e5c31af7Sopenharmony_ci else: 83e5c31af7Sopenharmony_ci # No explicit title. Infer one from anchor and xrefMap. 84e5c31af7Sopenharmony_ci (pageAnchor, title) = xrefMap[anchor] 85e5c31af7Sopenharmony_ci 86e5c31af7Sopenharmony_ci # If the title is *still* empty, make a note of it and just use 87e5c31af7Sopenharmony_ci # the anchor name 88e5c31af7Sopenharmony_ci if title == '': 89e5c31af7Sopenharmony_ci print(f'No title found for anchor {anchor}', file=sys.stderr) 90e5c31af7Sopenharmony_ci title = anchor 91e5c31af7Sopenharmony_ci 92e5c31af7Sopenharmony_ci # Page the page anchor comes from 93e5c31af7Sopenharmony_ci pageName = pageMap[pageAnchor] 94e5c31af7Sopenharmony_ci print(f'mapAnchor: anchor {anchor} pageAnchor {pageAnchor} -> pageName = {pageName}') 95e5c31af7Sopenharmony_ci 96e5c31af7Sopenharmony_ci xref = f'{pageName}#{anchor}' 97e5c31af7Sopenharmony_ci except: 98e5c31af7Sopenharmony_ci print(f'Cannot determine which page {anchor} comes from, passing through to Antora intact', file=sys.stderr) 99e5c31af7Sopenharmony_ci xref = f'{anchor}' 100e5c31af7Sopenharmony_ci 101e5c31af7Sopenharmony_ci # Remove extraneous whitespace 102e5c31af7Sopenharmony_ci title = ' '.join(title.split()) 103e5c31af7Sopenharmony_ci 104e5c31af7Sopenharmony_ci if closeAnchor: 105e5c31af7Sopenharmony_ci return f'xref:{xref}[{title}]' 106e5c31af7Sopenharmony_ci else: 107e5c31af7Sopenharmony_ci return f'xref:{xref}[{title}' 108e5c31af7Sopenharmony_ci 109e5c31af7Sopenharmony_cidef replaceAnchorText(match, pageMap, xrefMap): 110e5c31af7Sopenharmony_ci """Rewrite <<anchor,text>> to xref:newanchor[text] 111e5c31af7Sopenharmony_ci - match - match object, \1 = anchor, \2 = text 112e5c31af7Sopenharmony_ci - pageMap, xrefMap - per rewriteXrefs below 113e5c31af7Sopenharmony_ci """ 114e5c31af7Sopenharmony_ci 115e5c31af7Sopenharmony_ci anchor = match.group(1) 116e5c31af7Sopenharmony_ci text = match.group(2) 117e5c31af7Sopenharmony_ci 118e5c31af7Sopenharmony_ci return mapAnchor(anchor, text, pageMap, xrefMap, closeAnchor=True) 119e5c31af7Sopenharmony_ci 120e5c31af7Sopenharmony_cidef replaceAnchorOnly(match, pageMap, xrefMap): 121e5c31af7Sopenharmony_ci """Rewrite <<anchor>> to xref:newanchor[] 122e5c31af7Sopenharmony_ci - match - match object, \1 = anchor 123e5c31af7Sopenharmony_ci - pageMap, xrefMap - per rewriteXrefs below 124e5c31af7Sopenharmony_ci """ 125e5c31af7Sopenharmony_ci 126e5c31af7Sopenharmony_ci anchor = match.group(1) 127e5c31af7Sopenharmony_ci 128e5c31af7Sopenharmony_ci return mapAnchor(anchor, '', pageMap, xrefMap, closeAnchor=True) 129e5c31af7Sopenharmony_ci 130e5c31af7Sopenharmony_cidef replaceAnchorTrailingText(match, pageMap, xrefMap): 131e5c31af7Sopenharmony_ci """Rewrite <<anchor, to xref:newanchor[ 132e5c31af7Sopenharmony_ci - match - match object, \1 = anchor, \2 = text (may be empty) 133e5c31af7Sopenharmony_ci - pageMap, xrefMap - per rewriteXrefs below 134e5c31af7Sopenharmony_ci """ 135e5c31af7Sopenharmony_ci 136e5c31af7Sopenharmony_ci anchor = match.group(1) 137e5c31af7Sopenharmony_ci text = match.group(2) 138e5c31af7Sopenharmony_ci 139e5c31af7Sopenharmony_ci return mapAnchor(anchor, text, pageMap, xrefMap, closeAnchor=False) 140e5c31af7Sopenharmony_ci 141e5c31af7Sopenharmony_ciclass DocFile: 142e5c31af7Sopenharmony_ci """Information about a markup file being converted""" 143e5c31af7Sopenharmony_ci 144e5c31af7Sopenharmony_ci def __init__(self): 145e5c31af7Sopenharmony_ci """Constructor 146e5c31af7Sopenharmony_ci - lines - text of file as list of strings 147e5c31af7Sopenharmony_ci - root - common base directory for src files 148e5c31af7Sopenharmony_ci - component - path to component directory for outputs 149e5c31af7Sopenharmony_ci - srcpath - absolute path to file source 150e5c31af7Sopenharmony_ci - relpath - path to file source relative to root 151e5c31af7Sopenharmony_ci - dstpath - path to output file destination 152e5c31af7Sopenharmony_ci - dstlink - path to a an alias (symlink to) dstpath, used for 153e5c31af7Sopenharmony_ci files that need to be in both partials and pages directories. 154e5c31af7Sopenharmony_ci - category - file type - Pages, Partials, or Images. These are 155e5c31af7Sopenharmony_ci string variables containing the corresponding component 156e5c31af7Sopenharmony_ci subdirectory name. 157e5c31af7Sopenharmony_ci - title - page title for Pages, else '' 158e5c31af7Sopenharmony_ci - titleAnchor - page title anchor for Pages, else '' 159e5c31af7Sopenharmony_ci - anchors - asciidoc anchors found in the file 160e5c31af7Sopenharmony_ci - includes - asciidoc includes found in the file 161e5c31af7Sopenharmony_ci - pageMap - dictionary mapping a page anchor to a source file 162e5c31af7Sopenharmony_ci relpath 163e5c31af7Sopenharmony_ci - xrefMap - dictionary mapping an anchor within a page to a page 164e5c31af7Sopenharmony_ci anchor 165e5c31af7Sopenharmony_ci """ 166e5c31af7Sopenharmony_ci 167e5c31af7Sopenharmony_ci self.lines = None 168e5c31af7Sopenharmony_ci self.root = None 169e5c31af7Sopenharmony_ci self.component = None 170e5c31af7Sopenharmony_ci self.srcpath = None 171e5c31af7Sopenharmony_ci self.relpath = None 172e5c31af7Sopenharmony_ci self.dstpath = None 173e5c31af7Sopenharmony_ci self.dstlink = None 174e5c31af7Sopenharmony_ci self.category = None 175e5c31af7Sopenharmony_ci self.title = '' 176e5c31af7Sopenharmony_ci self.titleAnchor = '' 177e5c31af7Sopenharmony_ci self.anchors = set() 178e5c31af7Sopenharmony_ci self.includes = set() 179e5c31af7Sopenharmony_ci 180e5c31af7Sopenharmony_ci self.pageMap = {} 181e5c31af7Sopenharmony_ci self.xrefMap = {} 182e5c31af7Sopenharmony_ci 183e5c31af7Sopenharmony_ci def findTitle(self): 184e5c31af7Sopenharmony_ci """Find category (Pages or Partials) and title, for Pages, in a 185e5c31af7Sopenharmony_ci .adoc markup file. 186e5c31af7Sopenharmony_ci 187e5c31af7Sopenharmony_ci Heuristic is to search the beginning of the file for a top-level 188e5c31af7Sopenharmony_ci asciidoc title, preceded immediately by an anchor for the page. 189e5c31af7Sopenharmony_ci 190e5c31af7Sopenharmony_ci Returns (category, title, titleLine, titleAnchor) with '' for a 191e5c31af7Sopenharmony_ci Partials title and '' if no title anchor is found.""" 192e5c31af7Sopenharmony_ci 193e5c31af7Sopenharmony_ci """Chapter title block must be within this many lines of start of file""" 194e5c31af7Sopenharmony_ci maxLines = min(30, len(self.lines)) 195e5c31af7Sopenharmony_ci 196e5c31af7Sopenharmony_ci """Default, if page title and/or page anchor not found""" 197e5c31af7Sopenharmony_ci titleAnchor = '' 198e5c31af7Sopenharmony_ci title = '' 199e5c31af7Sopenharmony_ci 200e5c31af7Sopenharmony_ci for lineno in range(0, maxLines): 201e5c31af7Sopenharmony_ci line = self.lines[lineno] 202e5c31af7Sopenharmony_ci 203e5c31af7Sopenharmony_ci # Look for the first anchor, which must precede the title to 204e5c31af7Sopenharmony_ci # apply to it (really, must precede it by exactly one line). 205e5c31af7Sopenharmony_ci match = titleAnchorPat.match(line) 206e5c31af7Sopenharmony_ci if match is not None: 207e5c31af7Sopenharmony_ci titleAnchor = match.group('anchor') 208e5c31af7Sopenharmony_ci continue 209e5c31af7Sopenharmony_ci 210e5c31af7Sopenharmony_ci # If we find a top-level title, it is a page. 211e5c31af7Sopenharmony_ci match = titlePat.match(line) 212e5c31af7Sopenharmony_ci if match is not None: 213e5c31af7Sopenharmony_ci return (Pages, match.group('title'), lineno, titleAnchor) 214e5c31af7Sopenharmony_ci 215e5c31af7Sopenharmony_ci # If we find a second-level or above title, it is a partial 216e5c31af7Sopenharmony_ci match = subtitlePat.match(line) 217e5c31af7Sopenharmony_ci if match is not None: 218e5c31af7Sopenharmony_ci return (Partials, match.group('title'), lineno, titleAnchor) 219e5c31af7Sopenharmony_ci 220e5c31af7Sopenharmony_ci # If we do not find a match in the first maxLines lines, assume it 221e5c31af7Sopenharmony_ci # is a partial. 222e5c31af7Sopenharmony_ci return(Partials, 'NO TITLE FOUND', -1, titleAnchor) 223e5c31af7Sopenharmony_ci 224e5c31af7Sopenharmony_ci def populate(self, 225e5c31af7Sopenharmony_ci filename, 226e5c31af7Sopenharmony_ci root, 227e5c31af7Sopenharmony_ci component): 228e5c31af7Sopenharmony_ci """Populate data structures given file content and location. 229e5c31af7Sopenharmony_ci 230e5c31af7Sopenharmony_ci - filename - file to scan 231e5c31af7Sopenharmony_ci - root - absolute path to root under which all source files are 232e5c31af7Sopenharmony_ci read 233e5c31af7Sopenharmony_ci - component - absolute path to module / component directory under 234e5c31af7Sopenharmony_ci which all destination files are written 235e5c31af7Sopenharmony_ci """ 236e5c31af7Sopenharmony_ci 237e5c31af7Sopenharmony_ci # Load file content 238e5c31af7Sopenharmony_ci self.srcpath = os.path.abspath(filename) 239e5c31af7Sopenharmony_ci self.lines, _ = loadFile(self.srcpath) 240e5c31af7Sopenharmony_ci if self.lines is None: 241e5c31af7Sopenharmony_ci raise RuntimeError(f'No such file {self.srcpath}') 242e5c31af7Sopenharmony_ci 243e5c31af7Sopenharmony_ci # Miscellaneous relevant paths 244e5c31af7Sopenharmony_ci self.root = root 245e5c31af7Sopenharmony_ci self.relpath = os.path.relpath(self.srcpath, root) 246e5c31af7Sopenharmony_ci self.component = component 247e5c31af7Sopenharmony_ci 248e5c31af7Sopenharmony_ci # Determine file category. 249e5c31af7Sopenharmony_ci # Only .adoc files are candidates for pages, which is verified by 250e5c31af7Sopenharmony_ci # looking at the file header for a top-level title. 251e5c31af7Sopenharmony_ci # .svg .jpg .png are always images 252e5c31af7Sopenharmony_ci # Anything else is a partial 253e5c31af7Sopenharmony_ci (_, fileext) = os.path.splitext(filename) 254e5c31af7Sopenharmony_ci 255e5c31af7Sopenharmony_ci # Defaults 256e5c31af7Sopenharmony_ci self.title = '' 257e5c31af7Sopenharmony_ci self.titleLine = 0 258e5c31af7Sopenharmony_ci self.titleAnchor = None 259e5c31af7Sopenharmony_ci 260e5c31af7Sopenharmony_ci if fileext in (('.svg', '.jpg', '.png')): 261e5c31af7Sopenharmony_ci self.category = Images 262e5c31af7Sopenharmony_ci elif fileext == '.adoc': 263e5c31af7Sopenharmony_ci (self.category, 264e5c31af7Sopenharmony_ci self.title, 265e5c31af7Sopenharmony_ci self.titleLine, 266e5c31af7Sopenharmony_ci self.titleAnchor) = self.findTitle() 267e5c31af7Sopenharmony_ci else: 268e5c31af7Sopenharmony_ci self.category = Partials 269e5c31af7Sopenharmony_ci 270e5c31af7Sopenharmony_ci # Determine destination path based on category 271e5c31af7Sopenharmony_ci # images/ are treated specially since there is only a single 272e5c31af7Sopenharmony_ci # directory and the component directory is already named Images. 273e5c31af7Sopenharmony_ci if self.category == Partials: 274e5c31af7Sopenharmony_ci self.dstpath = Path(self.component) / Partials / self.relpath 275e5c31af7Sopenharmony_ci elif self.category == Pages: 276e5c31af7Sopenharmony_ci # Save the page in partials/, link from pages/ 277e5c31af7Sopenharmony_ci self.dstpath = Path(self.component) / Partials / self.relpath 278e5c31af7Sopenharmony_ci self.dstlink = Path(self.component) / Pages / self.relpath 279e5c31af7Sopenharmony_ci else: 280e5c31af7Sopenharmony_ci # Images go under images/, not under images/images/ 281e5c31af7Sopenharmony_ci # This could fail if there were ever top-level images but as all 282e5c31af7Sopenharmony_ci # images used in the spec are required to be specified relative 283e5c31af7Sopenharmony_ci # to {images}, it is OK. 284e5c31af7Sopenharmony_ci self.dstpath = Path(self.component) / self.relpath 285e5c31af7Sopenharmony_ci 286e5c31af7Sopenharmony_ci 287e5c31af7Sopenharmony_ci def rewriteXrefs(self, pageMap = {}, xrefMap = {}): 288e5c31af7Sopenharmony_ci """Rewrite asciidoc <<>> xrefs into Antora xref: xrefs, including 289e5c31af7Sopenharmony_ci altering the xref target. 290e5c31af7Sopenharmony_ci 291e5c31af7Sopenharmony_ci - pageMap - map from page anchors to page names 292e5c31af7Sopenharmony_ci - xrefMap - map from anchors within a page to the page anchor""" 293e5c31af7Sopenharmony_ci 294e5c31af7Sopenharmony_ci # pageMap and xrefMap are used in functions called by re.subn, so 295e5c31af7Sopenharmony_ci # save them in members. 296e5c31af7Sopenharmony_ci self.pageMap = pageMap 297e5c31af7Sopenharmony_ci self.xrefMap = xrefMap 298e5c31af7Sopenharmony_ci 299e5c31af7Sopenharmony_ci # Xref markup may be broken across lines, and may or may not include 300e5c31af7Sopenharmony_ci # anchor text. Track whether the closing >> is being looked for at 301e5c31af7Sopenharmony_ci # start of line, or not. 302e5c31af7Sopenharmony_ci withinXref = False 303e5c31af7Sopenharmony_ci 304e5c31af7Sopenharmony_ci for lineno in range(0, len(self.lines)): 305e5c31af7Sopenharmony_ci line = self.lines[lineno] 306e5c31af7Sopenharmony_ci 307e5c31af7Sopenharmony_ci if withinXref: 308e5c31af7Sopenharmony_ci # Could use line.replace, but that does not return a match 309e5c31af7Sopenharmony_ci # count, so we cannot tell if the '>>' is missing. 310e5c31af7Sopenharmony_ci (line, count) = re.subn(r'>>', r']', line, count=1) 311e5c31af7Sopenharmony_ci if count == 0: 312e5c31af7Sopenharmony_ci print(f'WARNING: No closing >> found on line {lineno} of {self.relpath}', file=sys.stderr) 313e5c31af7Sopenharmony_ci elif line[0] != ' ' and self.lines[lineno-1][-1] not in '[ ': 314e5c31af7Sopenharmony_ci # Add whitespace corresponding to crushed-out newline on 315e5c31af7Sopenharmony_ci # previous line, so title words do not run together. 316e5c31af7Sopenharmony_ci self.lines[lineno-1] += ' ' 317e5c31af7Sopenharmony_ci withinXref = False 318e5c31af7Sopenharmony_ci 319e5c31af7Sopenharmony_ci # Now look for all xrefs starting on this line and remap them, 320e5c31af7Sopenharmony_ci # including remapping the anchor. 321e5c31af7Sopenharmony_ci 322e5c31af7Sopenharmony_ci # First, complete xrefs with alt-text (<<anchor, text>>) 323e5c31af7Sopenharmony_ci (line, count) = re.subn(r'<<([^,>]*),([^>]+)>>', 324e5c31af7Sopenharmony_ci lambda match: replaceAnchorText(match, pageMap, xrefMap), 325e5c31af7Sopenharmony_ci line) 326e5c31af7Sopenharmony_ci 327e5c31af7Sopenharmony_ci # Next, complete xrefs without alt-text (<<anchor>>) 328e5c31af7Sopenharmony_ci (line, count) = re.subn(r'<<([^,>]*)>>', 329e5c31af7Sopenharmony_ci lambda match: replaceAnchorOnly(match, pageMap, xrefMap), 330e5c31af7Sopenharmony_ci line) 331e5c31af7Sopenharmony_ci 332e5c31af7Sopenharmony_ci # Finally, if there is a trailing '<<anchor,' at EOL, remap it 333e5c31af7Sopenharmony_ci # and set the flag so the terminating '>>' on the next line will 334e5c31af7Sopenharmony_ci # be mapped into an xref closing ']'. 335e5c31af7Sopenharmony_ci (line, count) = re.subn(r'<<([^,>]*),([^>]*)$', 336e5c31af7Sopenharmony_ci lambda match: replaceAnchorTrailingText(match, pageMap, xrefMap), 337e5c31af7Sopenharmony_ci line) 338e5c31af7Sopenharmony_ci if count > 0: 339e5c31af7Sopenharmony_ci withinXref = True 340e5c31af7Sopenharmony_ci 341e5c31af7Sopenharmony_ci self.lines[lineno] = line 342e5c31af7Sopenharmony_ci 343e5c31af7Sopenharmony_ci def __str__(self): 344e5c31af7Sopenharmony_ci lines = [ 345e5c31af7Sopenharmony_ci f'Input file {filename}: {len(self.lines)} lines', 346e5c31af7Sopenharmony_ci f'root = {self.root} component = {self.component} relpath = {self.relpath}', 347e5c31af7Sopenharmony_ci f'category = {self.category} dstpath = {self.dstpath}', 348e5c31af7Sopenharmony_ci f'title = {self.title}', 349e5c31af7Sopenharmony_ci f'titleAnchor = {self.titleAnchor}', 350e5c31af7Sopenharmony_ci ] 351e5c31af7Sopenharmony_ci return '\n'.join(lines) 352e5c31af7Sopenharmony_ci 353e5c31af7Sopenharmony_ci def removeDestination(self, path, text, overwrite): 354e5c31af7Sopenharmony_ci """Remove a destination file, if it exists and overwrite is true. 355e5c31af7Sopenharmony_ci Ensure the destination directory exists. 356e5c31af7Sopenharmony_ci 357e5c31af7Sopenharmony_ci path - file pathname 358e5c31af7Sopenharmony_ci text - descriptive text for errors 359e5c31af7Sopenharmony_ci overwrite - if True, replace existing output file 360e5c31af7Sopenharmony_ci """ 361e5c31af7Sopenharmony_ci 362e5c31af7Sopenharmony_ci if os.path.exists(path): 363e5c31af7Sopenharmony_ci if overwrite: 364e5c31af7Sopenharmony_ci # print(f'Removing {text}: {path}') 365e5c31af7Sopenharmony_ci os.remove(path) 366e5c31af7Sopenharmony_ci else: 367e5c31af7Sopenharmony_ci raise RuntimeError(f'Will not overwrite {text}: {path}') 368e5c31af7Sopenharmony_ci 369e5c31af7Sopenharmony_ci dir = os.path.dirname(path) 370e5c31af7Sopenharmony_ci if not os.path.exists(dir): 371e5c31af7Sopenharmony_ci # print(f'Creating {text} directory {dir}') 372e5c31af7Sopenharmony_ci os.makedirs(dir) 373e5c31af7Sopenharmony_ci 374e5c31af7Sopenharmony_ci def rewriteFile(self, overwrite = True, pageHeaders = None): 375e5c31af7Sopenharmony_ci """Write source file to component directory. Images are just symlinked 376e5c31af7Sopenharmony_ci to the external file. Pages are rewritten to Partials, then 377e5c31af7Sopenharmony_ci symlinked to Pages. 378e5c31af7Sopenharmony_ci 379e5c31af7Sopenharmony_ci - overwrite - if True, replace existing output files 380e5c31af7Sopenharmony_ci - pageHeaders - if not None, a list of strings to inject 381e5c31af7Sopenharmony_ci following the chapter heading in each page 382e5c31af7Sopenharmony_ci 383e5c31af7Sopenharmony_ci <<>>-style xrefs are assumed to be rewritten prior to calling 384e5c31af7Sopenharmony_ci rewriteFile. 385e5c31af7Sopenharmony_ci 386e5c31af7Sopenharmony_ci May still need to rewrite custom macros. 387e5c31af7Sopenharmony_ci """ 388e5c31af7Sopenharmony_ci 389e5c31af7Sopenharmony_ci self.removeDestination(self.dstpath, 'destination file', overwrite) 390e5c31af7Sopenharmony_ci 391e5c31af7Sopenharmony_ci if self.category == Images: 392e5c31af7Sopenharmony_ci # Just symlink destination image to source 393e5c31af7Sopenharmony_ci # print(f'Symlinking {self.dstpath} -> {self.srcpath}') 394e5c31af7Sopenharmony_ci os.symlink(self.srcpath, self.dstpath) 395e5c31af7Sopenharmony_ci elif self.category == Partials: 396e5c31af7Sopenharmony_ci self.writeFile(self.dstpath) 397e5c31af7Sopenharmony_ci elif self.category == Pages: 398e5c31af7Sopenharmony_ci if pageHeaders is not None: 399e5c31af7Sopenharmony_ci # Add blank lines before and after the pageHeaders to avoid 400e5c31af7Sopenharmony_ci # coalescing with file content. 401e5c31af7Sopenharmony_ci lines = self.lines[0:self.titleLine+1] 402e5c31af7Sopenharmony_ci lines += ['\n'] + pageHeaders + ['\n'] 403e5c31af7Sopenharmony_ci lines = lines + self.lines[self.titleLine+1:] 404e5c31af7Sopenharmony_ci self.lines = lines 405e5c31af7Sopenharmony_ci 406e5c31af7Sopenharmony_ci # Inject page headers immediately following page title 407e5c31af7Sopenharmony_ci 408e5c31af7Sopenharmony_ci self.writeFile(self.dstpath) 409e5c31af7Sopenharmony_ci 410e5c31af7Sopenharmony_ci if self.dstlink is None: 411e5c31af7Sopenharmony_ci RuntimeError(f'Wrote Page {self.dstpath} to Partials, but no Pages link supplied') 412e5c31af7Sopenharmony_ci else: 413e5c31af7Sopenharmony_ci self.removeDestination(self.dstlink, 'destination link', overwrite) 414e5c31af7Sopenharmony_ci os.symlink(self.dstpath, self.dstlink) 415e5c31af7Sopenharmony_ci 416e5c31af7Sopenharmony_ci def writeFile(self, path): 417e5c31af7Sopenharmony_ci """Write self.lines[] to file at specified path""" 418e5c31af7Sopenharmony_ci 419e5c31af7Sopenharmony_ci try: 420e5c31af7Sopenharmony_ci fp = open(path, 'w', encoding='utf8') 421e5c31af7Sopenharmony_ci except: 422e5c31af7Sopenharmony_ci raise RuntimeError(f'Cannot open output file {path}') 423e5c31af7Sopenharmony_ci 424e5c31af7Sopenharmony_ci for line in self.lines: 425e5c31af7Sopenharmony_ci print(line, file=fp, end='') 426e5c31af7Sopenharmony_ci 427e5c31af7Sopenharmony_ci fp.close() 428e5c31af7Sopenharmony_ci 429e5c31af7Sopenharmony_cidef testHarness(): 430e5c31af7Sopenharmony_ci def printFile(label, lines): 431e5c31af7Sopenharmony_ci print(label) 432e5c31af7Sopenharmony_ci print('------------------') 433e5c31af7Sopenharmony_ci for line in lines: 434e5c31af7Sopenharmony_ci print(line) 435e5c31af7Sopenharmony_ci 436e5c31af7Sopenharmony_ci # Test harness 437e5c31af7Sopenharmony_ci docFile = DocFile() 438e5c31af7Sopenharmony_ci docFile.lines = [ 439e5c31af7Sopenharmony_ci '<<ext,ext chapter>> <<ext-label,', 440e5c31af7Sopenharmony_ci 'ext chapter/label>>', 441e5c31af7Sopenharmony_ci '<<core>>, <<core-label, core chapter/label', 442e5c31af7Sopenharmony_ci '>>' 443e5c31af7Sopenharmony_ci ] 444e5c31af7Sopenharmony_ci 445e5c31af7Sopenharmony_ci pageMap = { 446e5c31af7Sopenharmony_ci 'ext' : 'file/ext.adoc', 447e5c31af7Sopenharmony_ci 'core' : 'file/core.adoc', 448e5c31af7Sopenharmony_ci } 449e5c31af7Sopenharmony_ci xrefMap = { 450e5c31af7Sopenharmony_ci 'ext' : [ 'ext', '' ], 451e5c31af7Sopenharmony_ci 'ext-label' : [ 'ext', 'LABELLED ext-label' ], 452e5c31af7Sopenharmony_ci 'core' : [ 'core', 'Core Title' ], 453e5c31af7Sopenharmony_ci 'core-label': [ 'core', 'Core Label Title' ], 454e5c31af7Sopenharmony_ci } 455e5c31af7Sopenharmony_ci 456e5c31af7Sopenharmony_ci printFile('Original File', docFile.lines) 457e5c31af7Sopenharmony_ci 458e5c31af7Sopenharmony_ci docFile.rewriteXrefs(pageMap, xrefMap) 459e5c31af7Sopenharmony_ci 460e5c31af7Sopenharmony_ci printFile('Edited File', docFile.lines) 461e5c31af7Sopenharmony_ci 462e5c31af7Sopenharmony_ciif __name__ == '__main__': 463e5c31af7Sopenharmony_ci parser = argparse.ArgumentParser() 464e5c31af7Sopenharmony_ci 465e5c31af7Sopenharmony_ci parser.add_argument('-root', action='store', dest='root', 466e5c31af7Sopenharmony_ci default=os.getcwd(), 467e5c31af7Sopenharmony_ci help='Specify root directory under which files are located (default current directory)') 468e5c31af7Sopenharmony_ci parser.add_argument('-pageHeaders', action='store', dest='pageHeaders', 469e5c31af7Sopenharmony_ci default=None, 470e5c31af7Sopenharmony_ci help='Specify file whose contents are injected after title of each converted page') 471e5c31af7Sopenharmony_ci parser.add_argument('-component', action='store', dest='component', 472e5c31af7Sopenharmony_ci required=True, 473e5c31af7Sopenharmony_ci help='Specify module / component directory in which converted files are written') 474e5c31af7Sopenharmony_ci #parser.add_argument('-htmlspec', action='store', dest='htmlspec', 475e5c31af7Sopenharmony_ci # default=None, required=False, 476e5c31af7Sopenharmony_ci # help='Specify HTML of generated spec to extract anchor mapping from') 477e5c31af7Sopenharmony_ci parser.add_argument('-xrefpath', action='store', dest='xrefpath', 478e5c31af7Sopenharmony_ci default=None, required=False, 479e5c31af7Sopenharmony_ci help='Specify path to xrefMap.py containing map of anchors to chapter anchors') 480e5c31af7Sopenharmony_ci parser.add_argument('-pagemappath', action='store', dest='pagemappath', 481e5c31af7Sopenharmony_ci default=None, required=False, 482e5c31af7Sopenharmony_ci help='Specify path to output pageMap.cjs containing map of anchors to chapter anchors') 483e5c31af7Sopenharmony_ci parser.add_argument('-filelist', action='store', 484e5c31af7Sopenharmony_ci default=None, required=False, 485e5c31af7Sopenharmony_ci help='Specify file containing a list of filenames to convert, one/line') 486e5c31af7Sopenharmony_ci parser.add_argument('files', metavar='filename', nargs='*', 487e5c31af7Sopenharmony_ci help='Specify name of a single file to convert') 488e5c31af7Sopenharmony_ci 489e5c31af7Sopenharmony_ci args = parser.parse_args() 490e5c31af7Sopenharmony_ci 491e5c31af7Sopenharmony_ci args.root = os.path.abspath(args.root) 492e5c31af7Sopenharmony_ci args.component = os.path.abspath(args.component) 493e5c31af7Sopenharmony_ci 494e5c31af7Sopenharmony_ci if args.pageHeaders is not None: 495e5c31af7Sopenharmony_ci args.pageHeaders, _ = loadFile(args.pageHeaders) 496e5c31af7Sopenharmony_ci 497e5c31af7Sopenharmony_ci if False: 498e5c31af7Sopenharmony_ci testHarness() 499e5c31af7Sopenharmony_ci sys.exit(0) 500e5c31af7Sopenharmony_ci 501e5c31af7Sopenharmony_ci # Initialize dictionaries 502e5c31af7Sopenharmony_ci pageInfo = {} 503e5c31af7Sopenharmony_ci pageMap = {} 504e5c31af7Sopenharmony_ci 505e5c31af7Sopenharmony_ci # The xrefmap is imported from the 'xrefMap' module, if it exists 506e5c31af7Sopenharmony_ci try: 507e5c31af7Sopenharmony_ci if args.xrefpath is not None: 508e5c31af7Sopenharmony_ci sys.path.append(args.xrefpath) 509e5c31af7Sopenharmony_ci from xrefMap import xrefMap 510e5c31af7Sopenharmony_ci except: 511e5c31af7Sopenharmony_ci print('WARNING: No module xrefMap containing xrefMap dictionary', file=sys.stderr) 512e5c31af7Sopenharmony_ci xrefMap = {} 513e5c31af7Sopenharmony_ci 514e5c31af7Sopenharmony_ci # If a file containing a list of files was specified, add each one. 515e5c31af7Sopenharmony_ci # Could try using os.walk() instead, but that is very slow. 516e5c31af7Sopenharmony_ci if args.filelist is not None: 517e5c31af7Sopenharmony_ci count = 0 518e5c31af7Sopenharmony_ci lines, _ = loadFile(args.filelist) 519e5c31af7Sopenharmony_ci if lines is None: 520e5c31af7Sopenharmony_ci raise RuntimeError(f'Error reading filelist {args.filelist}') 521e5c31af7Sopenharmony_ci for line in lines: 522e5c31af7Sopenharmony_ci path = line.rstrip() 523e5c31af7Sopenharmony_ci if path[0].isalpha() and path.endswith('.adoc'): 524e5c31af7Sopenharmony_ci args.files.append(path) 525e5c31af7Sopenharmony_ci count = count + 1 526e5c31af7Sopenharmony_ci print(f'Read {count} paths from {args.filelist}') 527e5c31af7Sopenharmony_ci 528e5c31af7Sopenharmony_ci for filename in args.files: 529e5c31af7Sopenharmony_ci # Create data structure representing the file. 530e5c31af7Sopenharmony_ci docFile = DocFile() 531e5c31af7Sopenharmony_ci docFile.populate(filename = filename, 532e5c31af7Sopenharmony_ci root = args.root, 533e5c31af7Sopenharmony_ci component = args.component) 534e5c31af7Sopenharmony_ci # print(docFile, '\n') 535e5c31af7Sopenharmony_ci 536e5c31af7Sopenharmony_ci # Save information about the file under its relpath 537e5c31af7Sopenharmony_ci pageInfo[docFile.relpath] = docFile 538e5c31af7Sopenharmony_ci 539e5c31af7Sopenharmony_ci # Save mapping from page anchor to its relpath 540e5c31af7Sopenharmony_ci if docFile.titleAnchor is not None: 541e5c31af7Sopenharmony_ci pageMap[docFile.titleAnchor] = docFile.relpath 542e5c31af7Sopenharmony_ci 543e5c31af7Sopenharmony_ci # All files have been read and classified. 544e5c31af7Sopenharmony_ci # Rewrite them in memory. 545e5c31af7Sopenharmony_ci 546e5c31af7Sopenharmony_ci for key in pageInfo: 547e5c31af7Sopenharmony_ci # Look for <<>>-style anchors and rewrite them to Antora xref-style 548e5c31af7Sopenharmony_ci # anchors using the pageMap (of top-level anchors to page names) and 549e5c31af7Sopenharmony_ci # xrefmap (of anchors to top-level anchors). 550e5c31af7Sopenharmony_ci docFile = pageInfo[key] 551e5c31af7Sopenharmony_ci 552e5c31af7Sopenharmony_ci ## print(f'*** Rewriting {key}') 553e5c31af7Sopenharmony_ci ## print(docFile, '\n') 554e5c31af7Sopenharmony_ci 555e5c31af7Sopenharmony_ci docFile.rewriteXrefs(pageMap, xrefMap) 556e5c31af7Sopenharmony_ci docFile.rewriteFile(overwrite = True, pageHeaders = args.pageHeaders) 557e5c31af7Sopenharmony_ci 558e5c31af7Sopenharmony_ci # Write the pageMap to a .cjs file for use in the Antora build's 559e5c31af7Sopenharmony_ci # specmacros extensions. The xrefMap is already written in JS form. 560e5c31af7Sopenharmony_ci if args.pagemappath is not None: 561e5c31af7Sopenharmony_ci try: 562e5c31af7Sopenharmony_ci fp = open(args.pagemappath, 'w', encoding='utf8') 563e5c31af7Sopenharmony_ci except: 564e5c31af7Sopenharmony_ci raise RuntimeError(f'Cannot open output pageMap.cjs file {args.pagemappath}') 565e5c31af7Sopenharmony_ci 566e5c31af7Sopenharmony_ci print('exports.pageMap = {', file=fp) 567e5c31af7Sopenharmony_ci for pageAnchor in sorted(pageMap): 568e5c31af7Sopenharmony_ci pageName = pageMap[pageAnchor] 569e5c31af7Sopenharmony_ci print(f' {undefquote(pageAnchor)} : {undefquote(pageName)},', file=fp) 570e5c31af7Sopenharmony_ci print('}', file=fp) 571e5c31af7Sopenharmony_ci 572e5c31af7Sopenharmony_ci fp.close() 573e5c31af7Sopenharmony_ci 574e5c31af7Sopenharmony_ci## if not os.path.exists(args.xrefmap): 575e5c31af7Sopenharmony_ci## raise UserWarning(f'Specified xrefmap {args.xrefmap} does not exist') 576e5c31af7Sopenharmony_ci## if args.xrefmap[-3:] != '.py': 577e5c31af7Sopenharmony_ci## raise UserWarning(f'Specified xrefmap {args.xrefmap} is not a .py file') 578e5c31af7Sopenharmony_ci## 579e5c31af7Sopenharmony_ci## abspath = os.path.abspath(args.xrefmap) 580e5c31af7Sopenharmony_ci## xrefdir = os.path.dirname(os.path.abspath(args.xrefmap)) 581e5c31af7Sopenharmony_ci## sys.path.append(dir) 582e5c31af7Sopenharmony_ci## 583e5c31af7Sopenharmony_ci## xrefbase = os.path.split(args.xrefmap)[1] 584e5c31af7Sopenharmony_ci## xrefbase = os.path.splitext(xrefbase)[0] 585e5c31af7Sopenharmony_ci## 586e5c31af7Sopenharmony_ci## raise UserWarning(f'Specified xrefmap {args.xrefmap} does not exist') 587