1#!/usr/bin/python3 2# 3# Copyright 2022-2023 The Khronos Group Inc. 4# SPDX-License-Identifier: Apache-2.0 5 6"""Used to convert files from the asciidoctor spec tree to Antora module 7format. Success is highly dependent on strict adherence to Vulkan spec 8authoring conventions. 9 10Usage: `antora-prep.py [-root path] -component path files` 11 12- `-root` is the root path (repository root, usually) relative to which spec 13 files are processed. Defaults to current directory if not specified. 14- `-component` is the path to the module and component in which converted 15 files are written (e.g. the component directory under which pages/, 16 partials/, images/, etc. are located). 17- `files` are asciidoc source files from the spec to convert. 18 19Image files are linked from the component 'images' directory 20 21Asciidoc markup files (.adoc) are scanned for the first title markup and 22classified as partials or pages depending on whether it is a top-level title 23or not. All .adoc files are rewritten to the component 'partials' directory, to 24allow transclusion of pages to work (otherwise the transclusions would also 25have to be rewritten). 26 27pages then have additional markup injected immediately following the page 28title to set custom attributes needed for the build. pages are then 29symbolically linked from the component 'pages' directory to the actual 30rewritten file in the 'partials' directory to follow Antora conventions. 31""" 32 33# For error and file-loading interfaces only 34import argparse 35import importlib 36import os 37import re 38import sys 39from generator import enquote 40from reflib import loadFile, logDiag, logWarn, logErr, setLogFile, getBranch 41from pathlib import Path 42 43titleAnchorPat = re.compile(r'^\[\[(?P<anchor>[^,]+).*\]\]$') 44titlePat = re.compile(r'^[=#] (?P<title>[A-Z].*)') 45subtitlePat = re.compile(r'^[=#]{2,} (?P<title>[A-Z].*)') 46 47Pages = 'pages' 48Partials = 'partials' 49Images = 'images' 50 51def undefquote(s): 52 """Quote a string for JavaScript, or return the JavaScript undefined 53 value.""" 54 55 if s is not None: 56 return enquote(s) 57 else: 58 return 'undefined' 59 60 61def mapAnchor(anchor, title, pageMap, xrefMap, closeAnchor): 62 """Rewrite a <<anchor{, title}>> xref -> xref:pagemap#anchor[{title}] 63 - anchor - anchor name 64 - title - xref description or '' if not specified, in which case the 65 anchor text from the xrefMap is used if available 66 - closeAnchor - True if closing >> is on this line, False otherwise 67 - pageMap, xrefMap - per rewriteXrefs below 68 """ 69 70 #@if anchor == 'features-shaderStorageImageReadWithoutFormat': 71 #@ import pdb 72 #@ pdb.set_trace() 73 74 # Determine which page anchor this anchor comes from 75 # If it cannot be determined, use the unmapped anchor 76 #@ Simplify the page anchor if pageName == current page 77 try: 78 if title != '' or not closeAnchor: 79 # Either a (possibly up to a line break) title is supplied, or 80 # title is on the next line 81 (pageAnchor, _) = xrefMap[anchor] 82 else: 83 # No explicit title. Infer one from anchor and xrefMap. 84 (pageAnchor, title) = xrefMap[anchor] 85 86 # If the title is *still* empty, make a note of it and just use 87 # the anchor name 88 if title == '': 89 print(f'No title found for anchor {anchor}', file=sys.stderr) 90 title = anchor 91 92 # Page the page anchor comes from 93 pageName = pageMap[pageAnchor] 94 print(f'mapAnchor: anchor {anchor} pageAnchor {pageAnchor} -> pageName = {pageName}') 95 96 xref = f'{pageName}#{anchor}' 97 except: 98 print(f'Cannot determine which page {anchor} comes from, passing through to Antora intact', file=sys.stderr) 99 xref = f'{anchor}' 100 101 # Remove extraneous whitespace 102 title = ' '.join(title.split()) 103 104 if closeAnchor: 105 return f'xref:{xref}[{title}]' 106 else: 107 return f'xref:{xref}[{title}' 108 109def replaceAnchorText(match, pageMap, xrefMap): 110 """Rewrite <<anchor,text>> to xref:newanchor[text] 111 - match - match object, \1 = anchor, \2 = text 112 - pageMap, xrefMap - per rewriteXrefs below 113 """ 114 115 anchor = match.group(1) 116 text = match.group(2) 117 118 return mapAnchor(anchor, text, pageMap, xrefMap, closeAnchor=True) 119 120def replaceAnchorOnly(match, pageMap, xrefMap): 121 """Rewrite <<anchor>> to xref:newanchor[] 122 - match - match object, \1 = anchor 123 - pageMap, xrefMap - per rewriteXrefs below 124 """ 125 126 anchor = match.group(1) 127 128 return mapAnchor(anchor, '', pageMap, xrefMap, closeAnchor=True) 129 130def replaceAnchorTrailingText(match, pageMap, xrefMap): 131 """Rewrite <<anchor, to xref:newanchor[ 132 - match - match object, \1 = anchor, \2 = text (may be empty) 133 - pageMap, xrefMap - per rewriteXrefs below 134 """ 135 136 anchor = match.group(1) 137 text = match.group(2) 138 139 return mapAnchor(anchor, text, pageMap, xrefMap, closeAnchor=False) 140 141class DocFile: 142 """Information about a markup file being converted""" 143 144 def __init__(self): 145 """Constructor 146 - lines - text of file as list of strings 147 - root - common base directory for src files 148 - component - path to component directory for outputs 149 - srcpath - absolute path to file source 150 - relpath - path to file source relative to root 151 - dstpath - path to output file destination 152 - dstlink - path to a an alias (symlink to) dstpath, used for 153 files that need to be in both partials and pages directories. 154 - category - file type - Pages, Partials, or Images. These are 155 string variables containing the corresponding component 156 subdirectory name. 157 - title - page title for Pages, else '' 158 - titleAnchor - page title anchor for Pages, else '' 159 - anchors - asciidoc anchors found in the file 160 - includes - asciidoc includes found in the file 161 - pageMap - dictionary mapping a page anchor to a source file 162 relpath 163 - xrefMap - dictionary mapping an anchor within a page to a page 164 anchor 165 """ 166 167 self.lines = None 168 self.root = None 169 self.component = None 170 self.srcpath = None 171 self.relpath = None 172 self.dstpath = None 173 self.dstlink = None 174 self.category = None 175 self.title = '' 176 self.titleAnchor = '' 177 self.anchors = set() 178 self.includes = set() 179 180 self.pageMap = {} 181 self.xrefMap = {} 182 183 def findTitle(self): 184 """Find category (Pages or Partials) and title, for Pages, in a 185 .adoc markup file. 186 187 Heuristic is to search the beginning of the file for a top-level 188 asciidoc title, preceded immediately by an anchor for the page. 189 190 Returns (category, title, titleLine, titleAnchor) with '' for a 191 Partials title and '' if no title anchor is found.""" 192 193 """Chapter title block must be within this many lines of start of file""" 194 maxLines = min(30, len(self.lines)) 195 196 """Default, if page title and/or page anchor not found""" 197 titleAnchor = '' 198 title = '' 199 200 for lineno in range(0, maxLines): 201 line = self.lines[lineno] 202 203 # Look for the first anchor, which must precede the title to 204 # apply to it (really, must precede it by exactly one line). 205 match = titleAnchorPat.match(line) 206 if match is not None: 207 titleAnchor = match.group('anchor') 208 continue 209 210 # If we find a top-level title, it is a page. 211 match = titlePat.match(line) 212 if match is not None: 213 return (Pages, match.group('title'), lineno, titleAnchor) 214 215 # If we find a second-level or above title, it is a partial 216 match = subtitlePat.match(line) 217 if match is not None: 218 return (Partials, match.group('title'), lineno, titleAnchor) 219 220 # If we do not find a match in the first maxLines lines, assume it 221 # is a partial. 222 return(Partials, 'NO TITLE FOUND', -1, titleAnchor) 223 224 def populate(self, 225 filename, 226 root, 227 component): 228 """Populate data structures given file content and location. 229 230 - filename - file to scan 231 - root - absolute path to root under which all source files are 232 read 233 - component - absolute path to module / component directory under 234 which all destination files are written 235 """ 236 237 # Load file content 238 self.srcpath = os.path.abspath(filename) 239 self.lines, _ = loadFile(self.srcpath) 240 if self.lines is None: 241 raise RuntimeError(f'No such file {self.srcpath}') 242 243 # Miscellaneous relevant paths 244 self.root = root 245 self.relpath = os.path.relpath(self.srcpath, root) 246 self.component = component 247 248 # Determine file category. 249 # Only .adoc files are candidates for pages, which is verified by 250 # looking at the file header for a top-level title. 251 # .svg .jpg .png are always images 252 # Anything else is a partial 253 (_, fileext) = os.path.splitext(filename) 254 255 # Defaults 256 self.title = '' 257 self.titleLine = 0 258 self.titleAnchor = None 259 260 if fileext in (('.svg', '.jpg', '.png')): 261 self.category = Images 262 elif fileext == '.adoc': 263 (self.category, 264 self.title, 265 self.titleLine, 266 self.titleAnchor) = self.findTitle() 267 else: 268 self.category = Partials 269 270 # Determine destination path based on category 271 # images/ are treated specially since there is only a single 272 # directory and the component directory is already named Images. 273 if self.category == Partials: 274 self.dstpath = Path(self.component) / Partials / self.relpath 275 elif self.category == Pages: 276 # Save the page in partials/, link from pages/ 277 self.dstpath = Path(self.component) / Partials / self.relpath 278 self.dstlink = Path(self.component) / Pages / self.relpath 279 else: 280 # Images go under images/, not under images/images/ 281 # This could fail if there were ever top-level images but as all 282 # images used in the spec are required to be specified relative 283 # to {images}, it is OK. 284 self.dstpath = Path(self.component) / self.relpath 285 286 287 def rewriteXrefs(self, pageMap = {}, xrefMap = {}): 288 """Rewrite asciidoc <<>> xrefs into Antora xref: xrefs, including 289 altering the xref target. 290 291 - pageMap - map from page anchors to page names 292 - xrefMap - map from anchors within a page to the page anchor""" 293 294 # pageMap and xrefMap are used in functions called by re.subn, so 295 # save them in members. 296 self.pageMap = pageMap 297 self.xrefMap = xrefMap 298 299 # Xref markup may be broken across lines, and may or may not include 300 # anchor text. Track whether the closing >> is being looked for at 301 # start of line, or not. 302 withinXref = False 303 304 for lineno in range(0, len(self.lines)): 305 line = self.lines[lineno] 306 307 if withinXref: 308 # Could use line.replace, but that does not return a match 309 # count, so we cannot tell if the '>>' is missing. 310 (line, count) = re.subn(r'>>', r']', line, count=1) 311 if count == 0: 312 print(f'WARNING: No closing >> found on line {lineno} of {self.relpath}', file=sys.stderr) 313 elif line[0] != ' ' and self.lines[lineno-1][-1] not in '[ ': 314 # Add whitespace corresponding to crushed-out newline on 315 # previous line, so title words do not run together. 316 self.lines[lineno-1] += ' ' 317 withinXref = False 318 319 # Now look for all xrefs starting on this line and remap them, 320 # including remapping the anchor. 321 322 # First, complete xrefs with alt-text (<<anchor, text>>) 323 (line, count) = re.subn(r'<<([^,>]*),([^>]+)>>', 324 lambda match: replaceAnchorText(match, pageMap, xrefMap), 325 line) 326 327 # Next, complete xrefs without alt-text (<<anchor>>) 328 (line, count) = re.subn(r'<<([^,>]*)>>', 329 lambda match: replaceAnchorOnly(match, pageMap, xrefMap), 330 line) 331 332 # Finally, if there is a trailing '<<anchor,' at EOL, remap it 333 # and set the flag so the terminating '>>' on the next line will 334 # be mapped into an xref closing ']'. 335 (line, count) = re.subn(r'<<([^,>]*),([^>]*)$', 336 lambda match: replaceAnchorTrailingText(match, pageMap, xrefMap), 337 line) 338 if count > 0: 339 withinXref = True 340 341 self.lines[lineno] = line 342 343 def __str__(self): 344 lines = [ 345 f'Input file {filename}: {len(self.lines)} lines', 346 f'root = {self.root} component = {self.component} relpath = {self.relpath}', 347 f'category = {self.category} dstpath = {self.dstpath}', 348 f'title = {self.title}', 349 f'titleAnchor = {self.titleAnchor}', 350 ] 351 return '\n'.join(lines) 352 353 def removeDestination(self, path, text, overwrite): 354 """Remove a destination file, if it exists and overwrite is true. 355 Ensure the destination directory exists. 356 357 path - file pathname 358 text - descriptive text for errors 359 overwrite - if True, replace existing output file 360 """ 361 362 if os.path.exists(path): 363 if overwrite: 364 # print(f'Removing {text}: {path}') 365 os.remove(path) 366 else: 367 raise RuntimeError(f'Will not overwrite {text}: {path}') 368 369 dir = os.path.dirname(path) 370 if not os.path.exists(dir): 371 # print(f'Creating {text} directory {dir}') 372 os.makedirs(dir) 373 374 def rewriteFile(self, overwrite = True, pageHeaders = None): 375 """Write source file to component directory. Images are just symlinked 376 to the external file. Pages are rewritten to Partials, then 377 symlinked to Pages. 378 379 - overwrite - if True, replace existing output files 380 - pageHeaders - if not None, a list of strings to inject 381 following the chapter heading in each page 382 383 <<>>-style xrefs are assumed to be rewritten prior to calling 384 rewriteFile. 385 386 May still need to rewrite custom macros. 387 """ 388 389 self.removeDestination(self.dstpath, 'destination file', overwrite) 390 391 if self.category == Images: 392 # Just symlink destination image to source 393 # print(f'Symlinking {self.dstpath} -> {self.srcpath}') 394 os.symlink(self.srcpath, self.dstpath) 395 elif self.category == Partials: 396 self.writeFile(self.dstpath) 397 elif self.category == Pages: 398 if pageHeaders is not None: 399 # Add blank lines before and after the pageHeaders to avoid 400 # coalescing with file content. 401 lines = self.lines[0:self.titleLine+1] 402 lines += ['\n'] + pageHeaders + ['\n'] 403 lines = lines + self.lines[self.titleLine+1:] 404 self.lines = lines 405 406 # Inject page headers immediately following page title 407 408 self.writeFile(self.dstpath) 409 410 if self.dstlink is None: 411 RuntimeError(f'Wrote Page {self.dstpath} to Partials, but no Pages link supplied') 412 else: 413 self.removeDestination(self.dstlink, 'destination link', overwrite) 414 os.symlink(self.dstpath, self.dstlink) 415 416 def writeFile(self, path): 417 """Write self.lines[] to file at specified path""" 418 419 try: 420 fp = open(path, 'w', encoding='utf8') 421 except: 422 raise RuntimeError(f'Cannot open output file {path}') 423 424 for line in self.lines: 425 print(line, file=fp, end='') 426 427 fp.close() 428 429def testHarness(): 430 def printFile(label, lines): 431 print(label) 432 print('------------------') 433 for line in lines: 434 print(line) 435 436 # Test harness 437 docFile = DocFile() 438 docFile.lines = [ 439 '<<ext,ext chapter>> <<ext-label,', 440 'ext chapter/label>>', 441 '<<core>>, <<core-label, core chapter/label', 442 '>>' 443 ] 444 445 pageMap = { 446 'ext' : 'file/ext.adoc', 447 'core' : 'file/core.adoc', 448 } 449 xrefMap = { 450 'ext' : [ 'ext', '' ], 451 'ext-label' : [ 'ext', 'LABELLED ext-label' ], 452 'core' : [ 'core', 'Core Title' ], 453 'core-label': [ 'core', 'Core Label Title' ], 454 } 455 456 printFile('Original File', docFile.lines) 457 458 docFile.rewriteXrefs(pageMap, xrefMap) 459 460 printFile('Edited File', docFile.lines) 461 462if __name__ == '__main__': 463 parser = argparse.ArgumentParser() 464 465 parser.add_argument('-root', action='store', dest='root', 466 default=os.getcwd(), 467 help='Specify root directory under which files are located (default current directory)') 468 parser.add_argument('-pageHeaders', action='store', dest='pageHeaders', 469 default=None, 470 help='Specify file whose contents are injected after title of each converted page') 471 parser.add_argument('-component', action='store', dest='component', 472 required=True, 473 help='Specify module / component directory in which converted files are written') 474 #parser.add_argument('-htmlspec', action='store', dest='htmlspec', 475 # default=None, required=False, 476 # help='Specify HTML of generated spec to extract anchor mapping from') 477 parser.add_argument('-xrefpath', action='store', dest='xrefpath', 478 default=None, required=False, 479 help='Specify path to xrefMap.py containing map of anchors to chapter anchors') 480 parser.add_argument('-pagemappath', action='store', dest='pagemappath', 481 default=None, required=False, 482 help='Specify path to output pageMap.cjs containing map of anchors to chapter anchors') 483 parser.add_argument('-filelist', action='store', 484 default=None, required=False, 485 help='Specify file containing a list of filenames to convert, one/line') 486 parser.add_argument('files', metavar='filename', nargs='*', 487 help='Specify name of a single file to convert') 488 489 args = parser.parse_args() 490 491 args.root = os.path.abspath(args.root) 492 args.component = os.path.abspath(args.component) 493 494 if args.pageHeaders is not None: 495 args.pageHeaders, _ = loadFile(args.pageHeaders) 496 497 if False: 498 testHarness() 499 sys.exit(0) 500 501 # Initialize dictionaries 502 pageInfo = {} 503 pageMap = {} 504 505 # The xrefmap is imported from the 'xrefMap' module, if it exists 506 try: 507 if args.xrefpath is not None: 508 sys.path.append(args.xrefpath) 509 from xrefMap import xrefMap 510 except: 511 print('WARNING: No module xrefMap containing xrefMap dictionary', file=sys.stderr) 512 xrefMap = {} 513 514 # If a file containing a list of files was specified, add each one. 515 # Could try using os.walk() instead, but that is very slow. 516 if args.filelist is not None: 517 count = 0 518 lines, _ = loadFile(args.filelist) 519 if lines is None: 520 raise RuntimeError(f'Error reading filelist {args.filelist}') 521 for line in lines: 522 path = line.rstrip() 523 if path[0].isalpha() and path.endswith('.adoc'): 524 args.files.append(path) 525 count = count + 1 526 print(f'Read {count} paths from {args.filelist}') 527 528 for filename in args.files: 529 # Create data structure representing the file. 530 docFile = DocFile() 531 docFile.populate(filename = filename, 532 root = args.root, 533 component = args.component) 534 # print(docFile, '\n') 535 536 # Save information about the file under its relpath 537 pageInfo[docFile.relpath] = docFile 538 539 # Save mapping from page anchor to its relpath 540 if docFile.titleAnchor is not None: 541 pageMap[docFile.titleAnchor] = docFile.relpath 542 543 # All files have been read and classified. 544 # Rewrite them in memory. 545 546 for key in pageInfo: 547 # Look for <<>>-style anchors and rewrite them to Antora xref-style 548 # anchors using the pageMap (of top-level anchors to page names) and 549 # xrefmap (of anchors to top-level anchors). 550 docFile = pageInfo[key] 551 552 ## print(f'*** Rewriting {key}') 553 ## print(docFile, '\n') 554 555 docFile.rewriteXrefs(pageMap, xrefMap) 556 docFile.rewriteFile(overwrite = True, pageHeaders = args.pageHeaders) 557 558 # Write the pageMap to a .cjs file for use in the Antora build's 559 # specmacros extensions. The xrefMap is already written in JS form. 560 if args.pagemappath is not None: 561 try: 562 fp = open(args.pagemappath, 'w', encoding='utf8') 563 except: 564 raise RuntimeError(f'Cannot open output pageMap.cjs file {args.pagemappath}') 565 566 print('exports.pageMap = {', file=fp) 567 for pageAnchor in sorted(pageMap): 568 pageName = pageMap[pageAnchor] 569 print(f' {undefquote(pageAnchor)} : {undefquote(pageName)},', file=fp) 570 print('}', file=fp) 571 572 fp.close() 573 574## if not os.path.exists(args.xrefmap): 575## raise UserWarning(f'Specified xrefmap {args.xrefmap} does not exist') 576## if args.xrefmap[-3:] != '.py': 577## raise UserWarning(f'Specified xrefmap {args.xrefmap} is not a .py file') 578## 579## abspath = os.path.abspath(args.xrefmap) 580## xrefdir = os.path.dirname(os.path.abspath(args.xrefmap)) 581## sys.path.append(dir) 582## 583## xrefbase = os.path.split(args.xrefmap)[1] 584## xrefbase = os.path.splitext(xrefbase)[0] 585## 586## raise UserWarning(f'Specified xrefmap {args.xrefmap} does not exist') 587