1# Copyright 2023 The Khronos Group Inc. 2# 3# SPDX-License-Identifier: Apache-2.0 4 5"""Utilities for automatic transformation of spec sources. Most of the logic 6has to do with detecting asciidoc markup or block types that should not be 7transformed (tables, code) and ignoring them. It is very likely there are many 8asciidoc constructs not yet accounted for in the script, our usage of asciidoc 9markup is intentionally somewhat limited. 10""" 11 12import re 13import sys 14from reflib import logDiag, logWarn 15 16# Vulkan-specific - will consolidate into scripts/ like OpenXR soon 17sys.path.insert(0, 'xml') 18 19from apiconventions import APIConventions 20conventions = APIConventions() 21 22# Start of an asciidoctor conditional 23# ifdef:: 24# ifndef:: 25conditionalStart = re.compile(r'^(ifdef|ifndef)::') 26 27# Markup that always ends a paragraph 28# empty line or whitespace 29# [block options] 30# [[anchor]] 31# // comment 32# <<<< page break 33# :attribute-setting 34# macro-directive::terms 35# + standalone list item continuation 36# label:: labelled list - label must be standalone 37endPara = re.compile(r'^( *|\[.*\]|//.*|<<<<|:.*|[a-z]+::.*|\+|.*::)$') 38 39# Special case of markup ending a paragraph, used to track the current 40# command/structure. This allows for either OpenXR or Vulkan API path 41# conventions. Nominally it should use the file suffix defined by the API 42# conventions (conventions.file_suffix), except that XR uses '.txt' for 43# generated API include files, not '.adoc' like its other includes. 44includePat = re.compile( 45 r'include::(?P<directory_traverse>((../){1,4}|\{generated\}/)(generated/)?)(?P<generated_type>[\w]+)/(?P<category>\w+)/(?P<entity_name>[^./]+).adoc[\[][\]]') 46 47# Markup that is OK in a contiguous paragraph but otherwise passed through 48# .anything (except .., which indicates a literal block) 49# === Section Titles 50# image::path_to_image[attributes] (apparently a single colon is OK but less idiomatic) 51endParaContinue = re.compile(r'^(\.[^.].*|=+ .*|image:.*\[.*\])$') 52 53# Markup for block delimiters whose contents *should* be reformatted 54# -- (exactly two) (open block) 55# **** (4 or more) (sidebar block) 56# ==== (4 or more) (example block) 57# ____ (4 or more) (quote block) 58blockTransform = re.compile(r'^(--|[*=_]{4,})$') 59 60# Fake block delimiters for "common" VU statements 61blockCommonTransform = '// Common Valid Usage\n' 62 63# Markup for block delimiters whose contents should *not* be transformed 64# |=== (3 or more) (table) 65# ``` (3 or more) (listing block) 66# //// (4 or more) (comment block) 67# ---- (4 or more) (listing block) 68# .... (4 or more) (literal block) 69# ++++ (4 or more) (passthrough block) 70blockPassthrough = re.compile(r'^(\|={3,}|[`]{3}|[\-+./]{4,})$') 71 72# Markup for introducing lists (hanging paragraphs) 73# * bullet 74# ** bullet 75# -- bullet 76# . bullet 77# :: bullet (no longer supported by asciidoctor 2) 78# {empty}:: bullet 79# 1. list item 80# <1> source listing callout 81beginBullet = re.compile(r'^ *([-*.]+|\{empty\}::|::|[0-9]+[.]|<([0-9]+)>) ') 82 83class TransformState: 84 """State machine for transforming documents. 85 86 Represents the state of the transform operation""" 87 def __init__(self): 88 self.blockStack = [ None ] 89 """The last element is a line with the asciidoc block delimiter that is 90 currently in effect, such as '--', '----', '****', '====', or '++++'. 91 This affects whether or not the block contents should be transformed.""" 92 self.transformStack = [ True ] 93 """The last element is True or False if the current blockStack contents 94 should be transformed.""" 95 self.vuStack = [ False ] 96 """the last element is True or False if the current blockStack contents 97 are an explicit Valid Usage block.""" 98 99 self.para = [] 100 """list of lines in the paragraph being accumulated. 101 When this is non-empty, there is a current paragraph.""" 102 103 self.lastTitle = False 104 """true if the previous line was a document title line 105 (e.g. :leveloffset: 0 - no attempt to track changes to this is made).""" 106 107 self.leadIndent = 0 108 """indent level (in spaces) of the first line of a paragraph.""" 109 110 self.hangIndent = 0 111 """indent level of the remaining lines of a paragraph.""" 112 113 self.lineNumber = 0 114 """line number being read from the input file.""" 115 116 self.defaultApiName = '{refpage}' 117 self.apiName = self.defaultApiName 118 """String name of an API structure or command for VUID tag generation, 119 or {refpage} if one has not been included in this file yet.""" 120 121 def incrLineNumber(self): 122 self.lineNumber = self.lineNumber + 1 123 124 def isOpenBlockDelimiter(self, line): 125 """Returns True if line is an open block delimiter. 126 This does not and should not match the listing block delimiter, 127 which is used inside refpage blocks both as a listing block and, 128 via an extension, as a nested open block.""" 129 return line.rstrip() == '--' 130 131 def resetPara(self): 132 """Reset the paragraph, including its indentation level""" 133 self.para = [] 134 self.leadIndent = 0 135 self.hangIndent = 0 136 137 def endBlock(self, line, transform, vuBlock): 138 """If beginning a block, tag whether or not to transform the contents. 139 140 vuBlock is True if the previous line indicates this is a Valid Usage 141 block.""" 142 if self.blockStack[-1] == line: 143 logDiag('endBlock line', self.lineNumber, 144 ': popping block end depth:', len(self.blockStack), 145 ':', line, end='') 146 147 # Reset apiName at the end of an open block. 148 # Open blocks cannot be nested (at present), so this is safe. 149 if self.isOpenBlockDelimiter(line): 150 logDiag('reset apiName to empty at line', self.lineNumber) 151 self.apiName = self.defaultApiName 152 else: 153 logDiag('NOT resetting apiName to default at line', 154 self.lineNumber) 155 156 self.blockStack.pop() 157 self.transformStack.pop() 158 self.vuStack.pop() 159 else: 160 # Start a block 161 self.blockStack.append(line) 162 self.transformStack.append(transform) 163 self.vuStack.append(vuBlock) 164 165 logDiag('endBlock transform =', transform, ' line', self.lineNumber, 166 ': pushing block start depth', len(self.blockStack), 167 ':', line, end='') 168 169 def addLine(self, line, indent): 170 """Add a line to the current paragraph""" 171 if self.para == []: 172 # Begin a new paragraph 173 self.para = [line] 174 self.leadIndent = indent 175 self.hangIndent = indent 176 else: 177 # Add a line to a paragraph. Increase the hanging indentation 178 # level - once. 179 if self.hangIndent == self.leadIndent: 180 self.hangIndent = indent 181 self.para.append(line) 182 183 184class TransformCallbackState: 185 """State given to the transformer callback object, derived from 186 TransformState.""" 187 def __init__(self, state): 188 self.isVU = state.vuStack[-1] if len(state.vuStack) > 0 else False 189 """Whether this paragraph is a VU.""" 190 191 self.apiName = state.apiName 192 """String name of an API structure or command this paragraph belongs 193 to.""" 194 195 self.leadIndent = state.leadIndent 196 """indent level (in spaces) of the first line of a paragraph.""" 197 198 self.hangIndent = state.hangIndent 199 """indent level of the remaining lines of a paragraph.""" 200 201 self.lineNumber = state.lineNumber 202 """line number being read from the input file.""" 203 204 205class DocTransformer: 206 """A transformer that recursively goes over all spec files under a path. 207 208 The transformer goes over all spec files under a path and does some basic 209 parsing. In particular, it tracks which section the current text belongs 210 to, whether it references a VU, etc and processes them in 'paragraph' 211 granularity. 212 The transformer takes a callback object with the following methods: 213 214 - transformParagraph: Called when a paragraph is parsed. The paragraph 215 along with some information (such as whether it is a VU) is passed. The 216 function may transform the paragraph as necessary. 217 - onEmbeddedVUConditional: Called when an embedded VU conditional is 218 encountered. 219 """ 220 def __init__(self, 221 filename, 222 outfile, 223 callback): 224 self.filename = filename 225 """base name of file being read from.""" 226 227 self.outfile = outfile 228 """file handle to write to.""" 229 230 self.state = TransformState() 231 """State of transformation""" 232 233 self.callback = callback 234 """The transformation callback object""" 235 236 def printLines(self, lines): 237 """Print an array of lines with newlines already present""" 238 if len(lines) > 0: 239 logDiag(':: printLines:', len(lines), 'lines: ', lines[0], end='') 240 241 if self.outfile is not None: 242 for line in lines: 243 print(line, file=self.outfile, end='') 244 245 def emitPara(self): 246 """Emit a paragraph, possibly transforming it depending on the block 247 context. 248 249 Resets the paragraph accumulator.""" 250 if self.state.para != []: 251 transformedPara = self.state.para 252 253 if self.state.transformStack[-1]: 254 callbackState = TransformCallbackState(self.state) 255 256 transformedPara = self.callback.transformParagraph( 257 self.state.para, 258 callbackState) 259 260 self.printLines(transformedPara) 261 262 self.state.resetPara() 263 264 def endPara(self, line): 265 """'line' ends a paragraph and should itself be emitted. 266 line may be None to indicate EOF or other exception.""" 267 logDiag('endPara line', self.state.lineNumber, ': emitting paragraph') 268 269 # Emit current paragraph, this line, and reset tracker 270 self.emitPara() 271 272 if line: 273 self.printLines([line]) 274 275 def endParaContinue(self, line): 276 """'line' ends a paragraph (unless there is already a paragraph being 277 accumulated, e.g. len(para) > 0 - currently not implemented)""" 278 self.endPara(line) 279 280 def endBlock(self, line, transform = False, vuBlock = False): 281 """'line' begins or ends a block. 282 283 If beginning a block, tag whether or not to transform the contents. 284 285 vuBlock is True if the previous line indicates this is a Valid Usage 286 block.""" 287 self.endPara(line) 288 self.state.endBlock(line, transform, vuBlock) 289 290 def endParaBlockTransform(self, line, vuBlock): 291 """'line' begins or ends a block. The paragraphs in the block *should* be 292 reformatted (e.g. a NOTE).""" 293 self.endBlock(line, transform = True, vuBlock = vuBlock) 294 295 def endParaBlockPassthrough(self, line): 296 """'line' begins or ends a block. The paragraphs in the block should 297 *not* be reformatted (e.g. a code listing).""" 298 self.endBlock(line, transform = False) 299 300 def addLine(self, line): 301 """'line' starts or continues a paragraph. 302 303 Paragraphs may have "hanging indent", e.g. 304 305 ``` 306 * Bullet point... 307 ... continued 308 ``` 309 310 In this case, when the higher indentation level ends, so does the 311 paragraph.""" 312 logDiag('addLine line', self.state.lineNumber, ':', line, end='') 313 314 # See https://stackoverflow.com/questions/13648813/what-is-the-pythonic-way-to-count-the-leading-spaces-in-a-string 315 indent = len(line) - len(line.lstrip()) 316 317 # A hanging paragraph ends due to a less-indented line. 318 if self.state.para != [] and indent < self.state.hangIndent: 319 logDiag('addLine: line reduces indentation, emit paragraph') 320 self.emitPara() 321 322 # A bullet point (or something that looks like one) always ends the 323 # current paragraph. 324 if beginBullet.match(line): 325 logDiag('addLine: line matches beginBullet, emit paragraph') 326 self.emitPara() 327 328 self.state.addLine(line, indent) 329 330 def apiMatch(self, oldname, newname): 331 """Returns whether oldname and newname match, up to an API suffix. 332 This should use the API map instead of this heuristic, since aliases 333 like VkPhysicalDeviceVariablePointerFeaturesKHR -> 334 VkPhysicalDeviceVariablePointersFeatures are not recognized.""" 335 upper = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 336 return oldname.rstrip(upper) == newname.rstrip(upper) 337 338 def transformFile(self, lines): 339 """Transform lines, and possibly output to to the given file.""" 340 341 for line in lines: 342 self.state.incrLineNumber() 343 344 # Is this a title line (leading '= ' followed by text)? 345 thisTitle = False 346 347 # The logic here is broken. If we are in a non-transformable block and 348 # this line *does not* end the block, it should always be 349 # accumulated. 350 351 # Test for a blockCommonTransform delimiter comment first, to avoid 352 # treating it solely as a end-Paragraph marker comment. 353 if line == blockCommonTransform: 354 # Starting or ending a pseudo-block for "common" VU statements. 355 self.endParaBlockTransform(line, vuBlock = True) 356 357 elif blockTransform.match(line): 358 # Starting or ending a block whose contents may be transformed. 359 # Blocks cannot be nested. 360 361 # Is this is an explicit Valid Usage block? 362 vuBlock = (self.state.lineNumber > 1 and 363 lines[self.state.lineNumber-2] == '.Valid Usage\n') 364 365 self.endParaBlockTransform(line, vuBlock) 366 367 elif endPara.match(line): 368 # Ending a paragraph. Emit the current paragraph, if any, and 369 # prepare to begin a new paragraph. 370 371 self.endPara(line) 372 373 # If this is an include:: line starting the definition of a 374 # structure or command, track that for use in VUID generation. 375 376 matches = includePat.search(line) 377 if matches is not None: 378 generated_type = matches.group('generated_type') 379 include_type = matches.group('category') 380 if generated_type == 'api' and include_type in ('protos', 'structs', 'funcpointers'): 381 apiName = matches.group('entity_name') 382 if self.state.apiName != self.state.defaultApiName: 383 # This happens when there are multiple API include 384 # lines in a single block. The style guideline is to 385 # always place the API which others are promoted to 386 # first. In virtually all cases, the promoted API 387 # will differ solely in the vendor suffix (or 388 # absence of it), which is benign. 389 if not self.apiMatch(self.state.apiName, apiName): 390 logDiag(f'Promoted API name mismatch at line {self.state.lineNumber}: {apiName} does not match self.state.apiName (this is OK if it is just a spelling alias)') 391 else: 392 self.state.apiName = apiName 393 394 elif endParaContinue.match(line): 395 # For now, always just end the paragraph. 396 # Could check see if len(para) > 0 to accumulate. 397 398 self.endParaContinue(line) 399 400 # If it is a title line, track that 401 if line[0:2] == '= ': 402 thisTitle = True 403 404 elif blockPassthrough.match(line): 405 # Starting or ending a block whose contents must not be 406 # transformed. These are tables, etc. Blocks cannot be nested. 407 # Note that the use of a listing block masquerading as an 408 # open block, via an extension, will not be formatted even 409 # though it should be. 410 # Fixing this would require looking at the previous line 411 # state for the '[open]' tag, and there are so few cases of 412 # this in the spec markup that it is not worth the trouble. 413 414 self.endParaBlockPassthrough(line) 415 elif self.state.lastTitle: 416 # The previous line was a document title line. This line 417 # is the author / credits line and must not be transformed. 418 419 self.endPara(line) 420 else: 421 # Just accumulate a line to the current paragraph. Watch out for 422 # hanging indents / bullet-points and track that indent level. 423 424 self.addLine(line) 425 426 # Commented out now that VU extractor supports this, but may 427 # need to refactor through a conventions object enable if 428 # OpenXR still needs this. 429 430 # This test looks for disallowed conditionals inside Valid Usage 431 # blocks, by checking if (a) this line does not start a new VU 432 # (bullet point) and (b) the previous line starts an asciidoctor 433 # conditional (ifdef:: or ifndef::). 434 # if (self.state.vuStack[-1] 435 # and not beginBullet.match(line) 436 # and conditionalStart.match(lines[self.state.lineNumber-2])): 437 # self.callback.onEmbeddedVUConditional(self.state) 438 439 self.state.lastTitle = thisTitle 440 441 # Cleanup at end of file 442 self.endPara(None) 443 444 # Check for sensible block nesting 445 if len(self.state.blockStack) > 1: 446 logWarn('file', self.filename, 447 'mismatched asciidoc block delimiters at EOF:', 448 self.state.blockStack[-1]) 449 450