1#!/usr/bin/python3 2# 3# Copyright 2016-2023 The Khronos Group Inc. 4# 5# SPDX-License-Identifier: Apache-2.0 6 7"""Used for automatic reflow of spec sources to satisfy the agreed layout to 8minimize git churn. Also used to insert identifying tags on explicit Valid 9Usage statements. 10 11Usage: `reflow.py [-noflow] [-tagvu] [-nextvu #] [-overwrite] [-out dir] [-suffix str] files` 12 13- `-noflow` acts as a passthrough, instead of reflowing text. Other 14 processing may occur. 15- `-tagvu` generates explicit VUID tag for Valid Usage statements which 16 do not already have them. 17- `-nextvu #` starts VUID tag generation at the specified # instead of 18 the value wired into the `reflow.py` script. 19- `-overwrite` updates in place (can be risky, make sure there are backups) 20- `-check FAIL|WARN` runs some consistency checks on markup. If the checks 21 fail and the WARN option is given, the script will simply print a warning 22 message. If the checks fail and the FAIL option is given, the script will 23 exit with an error code. FAIL is for use with continuous integration 24 scripts enforcing the checks. 25- `-out` specifies directory to create output file in, default 'out' 26- `-suffix` specifies suffix to add to output files, default '' 27- `files` are asciidoc source files from the spec to reflow. 28""" 29# For error and file-loading interfaces only 30import argparse 31import os 32import re 33import sys 34from reflib import loadFile, logDiag, logWarn, logErr, setLogFile, getBranch 35from pathlib import Path 36import doctransformer 37 38# Vulkan-specific - will consolidate into scripts/ like OpenXR soon 39sys.path.insert(0, 'xml') 40 41from apiconventions import APIConventions 42conventions = APIConventions() 43 44# Patterns used to recognize interesting lines in an asciidoc source file. 45# These patterns are only compiled once. 46 47# Find the pname: or code: patterns in a Valid Usage statement 48pnamePat = re.compile(r'pname:(?P<param>\{?\w+\}?)') 49codePat = re.compile(r'code:(?P<param>\w+)') 50 51# Text that (may) not end sentences 52 53# A single letter followed by a period, typically a middle initial. 54endInitial = re.compile(r'^[A-Z]\.$') 55# An abbreviation, which does not (usually) end a line. 56endAbbrev = re.compile(r'(e\.g|i\.e|c\.f|vs)\.$', re.IGNORECASE) 57 58# Explicit Valid Usage list item with one or more leading asterisks 59# The re.DOTALL is needed to prevent vuPat.search() from stripping 60# the trailing newline. 61vuPat = re.compile(r'^(?P<head> [*]+)( *)(?P<tail>.*)', re.DOTALL) 62 63# VUID with the numeric portion captured in the match object 64vuidPat = re.compile(r'VUID-[^-]+-[^-]+-(?P<vuid>[0-9]+)') 65 66# Pattern matching leading nested bullet points 67global nestedVuPat 68nestedVuPat = re.compile(r'^ \*\*') 69 70class ReflowCallbacks: 71 """State and arguments for reflowing. 72 73 Used with DocTransformer to reflow a file.""" 74 def __init__(self, 75 filename, 76 vuidDict, 77 margin = 76, 78 breakPeriod = True, 79 reflow = True, 80 nextvu = None, 81 maxvu = None, 82 check = True): 83 84 self.filename = filename 85 """base name of file being read from.""" 86 87 self.check = check 88 """Whether consistency checks must be performed.""" 89 90 self.margin = margin 91 """margin to reflow text to.""" 92 93 self.breakPeriod = breakPeriod 94 """True if justification should break to a new line after the end of a 95 sentence.""" 96 97 self.breakInitial = True 98 """True if justification should break to a new line after something 99 that appears to be an initial in someone's name. **TBD**""" 100 101 self.reflow = reflow 102 """True if text should be reflowed, False to pass through unchanged.""" 103 104 self.vuPrefix = 'VUID' 105 """Prefix of generated Valid Usage tags""" 106 107 self.vuFormat = '{0}-{1}-{2}-{3:0>5d}' 108 """Format string for generating Valid Usage tags. 109 First argument is vuPrefix, second is command/struct name, third is 110 parameter name, fourth is the tag number.""" 111 112 self.nextvu = nextvu 113 """Integer to start tagging un-numbered Valid Usage statements with, 114 or None if no tagging should be done.""" 115 116 self.maxvu = maxvu 117 """Maximum tag to use for Valid Usage statements, or None if no 118 tagging should be done.""" 119 120 self.vuidDict = vuidDict 121 """Dictionary of VUID numbers found, containing a list of (file, VUID) 122 on which that number was found. This is used to warn on duplicate 123 VUIDs.""" 124 125 self.warnCount = 0 126 """Count of markup check warnings encountered.""" 127 128 def endSentence(self, word): 129 """Return True if word ends with a sentence-period, False otherwise. 130 131 Allows for contraction cases which will not end a line: 132 133 - A single letter (if breakInitial is True) 134 - Abbreviations: 'c.f.', 'e.g.', 'i.e.' (or mixed-case versions)""" 135 if (word[-1:] != '.' or 136 endAbbrev.search(word) or 137 (self.breakInitial and endInitial.match(word))): 138 return False 139 140 return True 141 142 def vuidAnchor(self, word): 143 """Return True if word is a Valid Usage ID Tag anchor.""" 144 return (word[0:7] == '[[VUID-') 145 146 def visitVUID(self, vuid, line): 147 if vuid not in self.vuidDict: 148 self.vuidDict[vuid] = [] 149 self.vuidDict[vuid].append([self.filename, line]) 150 151 def gatherVUIDs(self, para): 152 """Gather VUID tags and add them to vuidDict. Used to verify no-duplicate VUIDs""" 153 for line in para: 154 line = line.rstrip() 155 156 matches = vuidPat.search(line) 157 if matches is not None: 158 vuid = matches.group('vuid') 159 self.visitVUID(vuid, line) 160 161 def addVUID(self, para, state): 162 hangIndent = state.hangIndent 163 164 """Generate and add VUID if necessary.""" 165 if not state.isVU or self.nextvu is None: 166 return para, hangIndent 167 168 # If: 169 # - this paragraph is in a Valid Usage block, 170 # - VUID tags are being assigned, 171 # Try to assign VUIDs 172 173 if nestedVuPat.search(para[0]): 174 # Do not assign VUIDs to nested bullet points. 175 # These are now allowed VU markup syntax, but will never 176 # themselves be VUs, just subsidiary points. 177 return para, hangIndent 178 179 # Skip if there is already a VUID assigned 180 if self.vuPrefix in para[0]: 181 return para, hangIndent 182 183 # If: 184 # - a tag is not already present, and 185 # - the paragraph is a properly marked-up list item 186 # Then add a VUID tag starting with the next free ID. 187 188 # Split the first line after the bullet point 189 matches = vuPat.search(para[0]) 190 if matches is None: 191 # There are only a few cases of this, and they are all 192 # legitimate. Leave detecting this case to another tool 193 # or hand inspection. 194 # logWarn(self.filename + ': Unexpected non-bullet item in VU block (harmless if following an ifdef):', 195 # para[0]) 196 return para, hangIndent 197 198 outPara = para 199 200 logDiag('addVUID: Matched vuPat on line:', para[0], end='') 201 head = matches.group('head') 202 tail = matches.group('tail') 203 204 # Find pname: or code: tags in the paragraph for the purposes of VUID 205 # tag generation. pname:{attribute}s are prioritized to make sure 206 # commonvalidity VUIDs end up being unique. Otherwise, the first pname: 207 # or code: tag in the paragraph is used, which may not always be 208 # correct, but should be highly reliable. 209 pnameMatches = re.findall(pnamePat, ' '.join(para)) 210 codeMatches = re.findall(codePat, ' '.join(para)) 211 212 # Prioritize {attribute}s, but not the ones in the exception list 213 # below. These have complex expressions including ., ->, or [index] 214 # which makes them unsuitable for VUID tags. Ideally these would be 215 # automatically discovered. 216 attributeExceptionList = ['maxinstancecheck', 'regionsparam', 217 'rayGenShaderBindingTableAddress', 218 'rayGenShaderBindingTableStride', 219 'missShaderBindingTableAddress', 220 'missShaderBindingTableStride', 221 'hitShaderBindingTableAddress', 222 'hitShaderBindingTableStride', 223 'callableShaderBindingTableAddress', 224 'callableShaderBindingTableStride', 225 ] 226 attributeMatches = [match for match in pnameMatches if 227 match[0] == '{' and 228 match[1:-1] not in attributeExceptionList] 229 nonattributeMatches = [match for match in pnameMatches if 230 match[0] != '{'] 231 232 if len(attributeMatches) > 0: 233 paramName = attributeMatches[0] 234 elif len(nonattributeMatches) > 0: 235 paramName = nonattributeMatches[0] 236 elif len(codeMatches) > 0: 237 paramName = codeMatches[0] 238 else: 239 paramName = 'None' 240 logWarn(self.filename, 241 'No param name found for VUID tag on line:', 242 para[0]) 243 244 # Transform: 245 # 246 # * VU first line 247 # 248 # To: 249 # 250 # * [[VUID]] 251 # VU first line 252 # 253 tagLine = (head + ' [[' + 254 self.vuFormat.format(self.vuPrefix, 255 state.apiName, 256 paramName, 257 self.nextvu) + ']]\n') 258 self.visitVUID(str(self.nextvu), tagLine) 259 260 newLines = [tagLine] 261 if tail.strip() != '': 262 logDiag('transformParagraph first line matches bullet point -' 263 'single line, assuming hangIndent @ input line', 264 state.lineNumber) 265 hangIndent = len(head) + 1 266 newLines.append(''.ljust(hangIndent) + tail) 267 268 logDiag('Assigning', self.vuPrefix, state.apiName, self.nextvu, 269 ' on line:\n' + para[0], '->\n' + newLines[0] + 'END', '\n' + newLines[1] if len(newLines) > 1 else '') 270 271 # Do not actually assign the VUID unless it is in the reserved range 272 if self.nextvu <= self.maxvu: 273 if self.nextvu == self.maxvu: 274 logWarn('Skipping VUID assignment, no more VUIDs available') 275 outPara = newLines + para[1:] 276 self.nextvu = self.nextvu + 1 277 278 return outPara, hangIndent 279 280 def transformParagraph(self, para, state): 281 """Reflow a given paragraph, respecting the paragraph lead and 282 hanging indentation levels. 283 284 The algorithm also respects trailing '+' signs that indicate embedded newlines, 285 and will not reflow a very long word immediately after a bullet point. 286 287 Just return the paragraph unchanged if the -noflow argument was 288 given.""" 289 290 self.gatherVUIDs(para) 291 292 # If this is a VU that is missing a VUID, add it to the paragraph now. 293 para, hangIndent = self.addVUID(para, state) 294 295 if not self.reflow: 296 return para 297 298 logDiag('transformParagraph lead indent = ', state.leadIndent, 299 'hangIndent =', state.hangIndent, 300 'para:', para[0], end='') 301 302 # Total words processed (we care about the *first* word vs. others) 303 wordCount = 0 304 305 # Tracks the *previous* word processed. It must not be empty. 306 prevWord = ' ' 307 308 # Track the previous line and paragraph being indented, if any 309 outLine = None 310 outPara = [] 311 312 for line in para: 313 line = line.rstrip() 314 words = line.split() 315 316 # logDiag('transformParagraph: input line =', line) 317 numWords = len(words) - 1 318 319 for i in range(0, numWords + 1): 320 word = words[i] 321 wordLen = len(word) 322 wordCount += 1 323 324 endEscape = False 325 if i == numWords and word in ('+', '-'): 326 # Trailing ' +' or ' -' must stay on the same line 327 endEscape = word 328 # logDiag('transformParagraph last word of line =', word, 329 # 'prevWord =', prevWord, 'endEscape =', endEscape) 330 else: 331 # logDiag('transformParagraph wordCount =', wordCount, 332 # 'word =', word, 'prevWord =', prevWord) 333 pass 334 335 if wordCount == 1: 336 # The first word of the paragraph is treated specially. 337 # The loop logic becomes trickier if all this code is 338 # done prior to looping over lines and words, so all the 339 # setup logic is done here. 340 341 outLine = ''.ljust(state.leadIndent) + word 342 outLineLen = state.leadIndent + wordLen 343 344 # If the paragraph begins with a bullet point, generate 345 # a hanging indent level if there is not one already. 346 if doctransformer.beginBullet.match(para[0]): 347 bulletPoint = True 348 if len(para) > 1: 349 logDiag('transformParagraph first line matches bullet point', 350 'but indent already hanging @ input line', 351 state.lineNumber) 352 else: 353 logDiag('transformParagraph first line matches bullet point -' 354 'single line, assuming hangIndent @ input line', 355 state.lineNumber) 356 hangIndent = outLineLen + 1 357 else: 358 bulletPoint = False 359 else: 360 # Possible actions to take with this word 361 # 362 # addWord - add word to current line 363 # closeLine - append line and start a new (null) one 364 # startLine - add word to a new line 365 366 # Default behavior if all the tests below fail is to add 367 # this word to the current line, and keep accumulating 368 # that line. 369 (addWord, closeLine, startLine) = (True, False, False) 370 371 # How long would this line be if the word were added? 372 newLen = outLineLen + 1 + wordLen 373 374 # Are we on the first word following a bullet point? 375 firstBullet = (wordCount == 2 and bulletPoint) 376 377 if endEscape: 378 # If the new word ends the input line with ' +', 379 # add it to the current line. 380 381 (addWord, closeLine, startLine) = (True, True, False) 382 elif self.vuidAnchor(word): 383 # If the new word is a Valid Usage anchor, break the 384 # line afterwards. Note that this should only happen 385 # immediately after a bullet point, but we do not 386 # currently check for this. 387 (addWord, closeLine, startLine) = (True, True, False) 388 389 elif newLen > self.margin: 390 if firstBullet: 391 # If the word follows a bullet point, add it to 392 # the current line no matter its length. 393 394 (addWord, closeLine, startLine) = (True, True, False) 395 elif doctransformer.beginBullet.match(word + ' '): 396 # If the word *is* a bullet point, add it to 397 # the current line no matter its length. 398 # This avoids an innocent inline '-' or '*' 399 # turning into a bogus bullet point. 400 401 (addWord, closeLine, startLine) = (True, True, False) 402 else: 403 # The word overflows, so add it to a new line. 404 405 (addWord, closeLine, startLine) = (False, True, True) 406 elif (self.breakPeriod and 407 (wordCount > 2 or not firstBullet) and 408 self.endSentence(prevWord)): 409 # If the previous word ends a sentence and 410 # breakPeriod is set, start a new line. 411 # The complicated logic allows for leading bullet 412 # points which are periods (implicitly numbered lists). 413 # @@@ But not yet for explicitly numbered lists. 414 415 (addWord, closeLine, startLine) = (False, True, True) 416 417 # Add a word to the current line 418 if addWord: 419 if outLine: 420 outLine += ' ' + word 421 outLineLen = newLen 422 else: 423 # Fall through to startLine case if there is no 424 # current line yet. 425 startLine = True 426 427 # Add current line to the output paragraph. Force 428 # starting a new line, although we do not yet know if it 429 # will ever have contents. 430 if closeLine: 431 if outLine: 432 outPara.append(outLine + '\n') 433 outLine = None 434 435 # Start a new line and add a word to it 436 if startLine: 437 outLine = ''.ljust(hangIndent) + word 438 outLineLen = hangIndent + wordLen 439 440 # Track the previous word, for use in breaking at end of 441 # a sentence 442 prevWord = word 443 444 # Add last line to the output paragraph. 445 if outLine: 446 outPara.append(outLine + '\n') 447 448 return outPara 449 450 def onEmbeddedVUConditional(self, state): 451 if self.check: 452 logWarn('Detected embedded Valid Usage conditional: {}:{}'.format( 453 self.filename, state.lineNumber - 1)) 454 # Keep track of warning check count 455 self.warnCount = self.warnCount + 1 456 457def reflowFile(filename, args): 458 logDiag('reflow: filename', filename) 459 460 # Output file handle and reflow object for this file. There are no race 461 # conditions on overwriting the input, but it is not recommended unless 462 # you have backing store such as git. 463 464 lines, newline_string = loadFile(filename) 465 if lines is None: 466 return 467 468 if args.overwrite: 469 outFilename = filename 470 else: 471 outDir = Path(args.outDir).resolve() 472 outDir.mkdir(parents=True, exist_ok=True) 473 474 outFilename = str(outDir / (os.path.basename(filename) + args.suffix)) 475 476 if args.nowrite: 477 fp = None 478 else: 479 try: 480 fp = open(outFilename, 'w', encoding='utf8', newline=newline_string) 481 except: 482 logWarn('Cannot open output file', outFilename, ':', sys.exc_info()[0]) 483 return 484 485 callback = ReflowCallbacks(filename, 486 args.vuidDict, 487 margin = args.margin, 488 reflow = not args.noflow, 489 nextvu = args.nextvu, 490 maxvu = args.maxvu, 491 check = args.check) 492 493 transformer = doctransformer.DocTransformer(filename, 494 outfile = fp, 495 callback = callback) 496 497 transformer.transformFile(lines) 498 499 if fp is not None: 500 fp.close() 501 502 # Update the 'nextvu' value 503 if args.nextvu != callback.nextvu: 504 logWarn('Updated nextvu to', callback.nextvu, 'after file', filename) 505 args.nextvu = callback.nextvu 506 507 args.warnCount += callback.warnCount 508 509def reflowAllAdocFiles(folder_to_reflow, args): 510 for root, subdirs, files in os.walk(folder_to_reflow): 511 for file in files: 512 if file.endswith(conventions.file_suffix): 513 file_path = os.path.join(root, file) 514 reflowFile(file_path, args) 515 for subdir in subdirs: 516 sub_folder = os.path.join(root, subdir) 517 print('Sub-folder = %s' % sub_folder) 518 if subdir.lower() not in conventions.spec_no_reflow_dirs: 519 print(' Parsing = %s' % sub_folder) 520 reflowAllAdocFiles(sub_folder, args) 521 else: 522 print(' Skipping = %s' % sub_folder) 523 524if __name__ == '__main__': 525 parser = argparse.ArgumentParser() 526 527 parser.add_argument('-diag', action='store', dest='diagFile', 528 help='Set the diagnostic file') 529 parser.add_argument('-warn', action='store', dest='warnFile', 530 help='Set the warning file') 531 parser.add_argument('-log', action='store', dest='logFile', 532 help='Set the log file for both diagnostics and warnings') 533 parser.add_argument('-overwrite', action='store_true', 534 help='Overwrite input filenames instead of writing different output filenames') 535 parser.add_argument('-out', action='store', dest='outDir', 536 default='out', 537 help='Set the output directory in which updated files are generated (default: out)') 538 parser.add_argument('-nowrite', action='store_true', 539 help='Do not write output files, for use with -check') 540 parser.add_argument('-check', action='store', dest='check', 541 help='Run markup checks and warn if WARN option is given, error exit if FAIL option is given') 542 parser.add_argument('-checkVUID', action='store', dest='checkVUID', 543 help='Detect duplicated VUID numbers and warn if WARN option is given, error exit if FAIL option is given') 544 parser.add_argument('-tagvu', action='store_true', 545 help='Tag un-tagged Valid Usage statements starting at the value wired into reflow.py') 546 parser.add_argument('-nextvu', action='store', dest='nextvu', type=int, 547 default=None, 548 help='Tag un-tagged Valid Usage statements starting at the specified base VUID instead of the value wired into reflow.py') 549 parser.add_argument('-maxvu', action='store', dest='maxvu', type=int, 550 default=None, 551 help='Specify maximum VUID instead of the value wired into vuidCounts.py') 552 parser.add_argument('-branch', action='store', dest='branch', 553 help='Specify branch to assign VUIDs for') 554 parser.add_argument('-noflow', action='store_true', dest='noflow', 555 help='Do not reflow text. Other actions may apply') 556 parser.add_argument('-margin', action='store', type=int, dest='margin', 557 default='76', 558 help='Width to reflow text, defaults to 76 characters') 559 parser.add_argument('-suffix', action='store', dest='suffix', 560 default='', 561 help='Set the suffix added to updated file names (default: none)') 562 parser.add_argument('files', metavar='filename', nargs='*', 563 help='a filename to reflow text in') 564 parser.add_argument('--version', action='version', version='%(prog)s 1.0') 565 566 args = parser.parse_args() 567 568 setLogFile(True, True, args.logFile) 569 setLogFile(True, False, args.diagFile) 570 setLogFile(False, True, args.warnFile) 571 572 if args.overwrite: 573 logWarn("reflow.py: will overwrite all input files") 574 575 errors = '' 576 if args.branch is None: 577 (args.branch, errors) = getBranch() 578 if args.branch is None: 579 # This is not fatal unless VUID assignment is required 580 if args.tagvu: 581 logErr('Cannot determine current git branch, so cannot assign VUIDs:', errors) 582 583 if args.tagvu and args.nextvu is None: 584 # Moved here since vuidCounts is only needed in the internal 585 # repository 586 from vuidCounts import vuidCounts 587 588 if args.branch not in vuidCounts: 589 logErr('Branch', args.branch, 'not in vuidCounts, cannot continue') 590 maxVUID = vuidCounts[args.branch][1] 591 startVUID = vuidCounts[args.branch][2] 592 args.nextvu = startVUID 593 args.maxvu = maxVUID 594 595 if args.nextvu is not None: 596 logWarn('Tagging untagged Valid Usage statements starting at', args.nextvu) 597 598 # Count of markup check warnings encountered 599 # This is added to the argparse structure 600 args.warnCount = 0 601 602 # Dictionary of VUID numbers found, containing a list of (file, VUID) on 603 # which that number was found 604 # This is added to the argparse structure 605 args.vuidDict = {} 606 607 # If no files are specified, reflow the entire specification chapters folder 608 if not args.files: 609 folder_to_reflow = conventions.spec_reflow_path 610 logWarn('Reflowing all asciidoc files under', folder_to_reflow) 611 reflowAllAdocFiles(folder_to_reflow, args) 612 else: 613 for file in args.files: 614 reflowFile(file, args) 615 616 if args.warnCount > 0: 617 if args.check == 'FAIL': 618 logErr('Failed with', args.warnCount, 'markup errors detected.\n' + 619 'To fix these, you can take actions such as:\n' + 620 ' * Moving conditionals outside VU start / end without changing VU meaning\n' + 621 ' * Refactor conditional text using terminology defined conditionally outside the VU itself\n' + 622 ' * Remove the conditional (allowable when this just affects command / structure / enum names)\n') 623 else: 624 logWarn('Total warning count for markup issues is', args.warnCount) 625 626 # Look for duplicated VUID numbers 627 if args.checkVUID: 628 dupVUIDs = 0 629 for vuid in sorted(args.vuidDict): 630 found = args.vuidDict[vuid] 631 if len(found) > 1: 632 logWarn('Duplicate VUID number {} found in files:'.format(vuid)) 633 for (file, vuidLine) in found: 634 logWarn(' {}: {}'.format(file, vuidLine)) 635 dupVUIDs = dupVUIDs + 1 636 637 if dupVUIDs > 0: 638 if args.checkVUID == 'FAIL': 639 logErr('Failed with', dupVUIDs, 'duplicated VUID numbers found.\n' + 640 'To fix this, either convert these to commonvalidity VUs if possible, or strip\n' + 641 'the VUIDs from all but one of the duplicates and regenerate new ones.') 642 else: 643 logWarn('Total number of duplicated VUID numbers is', dupVUIDs) 644 645 if args.nextvu is not None and args.nextvu != startVUID: 646 # Update next free VUID to assign 647 vuidCounts[args.branch][2] = args.nextvu 648 try: 649 reflow_count_file_path = os.path.dirname(os.path.realpath(__file__)) 650 reflow_count_file_path += '/vuidCounts.py' 651 reflow_count_file = open(reflow_count_file_path, 'w', encoding='utf8') 652 print('# Do not edit this file, unless reserving a new VUID range', file=reflow_count_file) 653 print('# VUID ranges reserved for branches', file=reflow_count_file) 654 print('# Key is branch name, value is [ start, end, nextfree ]', file=reflow_count_file) 655 print('# New reservations must be made by MR to main branch', file=reflow_count_file) 656 print('vuidCounts = {', file=reflow_count_file) 657 for key in sorted(vuidCounts.keys(), key=lambda k: vuidCounts[k][0]): 658 counts = vuidCounts[key] 659 print(f" '{key}': [ {counts[0]}, {counts[1]}, {counts[2]} ],", 660 file=reflow_count_file) 661 print('}', file=reflow_count_file) 662 reflow_count_file.close() 663 except: 664 logWarn('Cannot open output count file vuidCounts.py', ':', sys.exc_info()[0]) 665