1#!/usr/bin/python3 2""" Generate an output file from a specification file and a template file. 3 See README.md for more details. 4""" 5 6import argparse 7import re 8 9class Reader: 10 """ Simple base class facilitates reading a file. 11 Derived class must implement handle_line() and may implement finish(). 12 """ 13 def __init__(self, filename): 14 self.filename = filename 15 self.line = None # most recently read line 16 self.lineno = -1 # zero-based 17 def finish(self): 18 """ Called after entire file has been read 19 """ 20 pass 21 def handle_line(self): 22 """ Called after each line has been read 23 """ 24 assert False 25 def read(self): 26 with open(self.filename) as f: 27 lines = f.readlines() 28 for self.lineno in range(len(lines)): 29 self.line = lines[self.lineno] 30 self.handle_line() 31 self.finish() 32 def context(self): 33 """ Error-reporting aid: Return a string describing the location 34 of the most recently read line 35 """ 36 return "line " + str(self.lineno + 1) + " of " + self.filename 37 38class Specification(Reader): 39 """ Reader for specification file 40 """ 41 42 # Describes %kind state 43 UNCONDITIONAL = 0 # No %kind in effect 44 CONDITIONAL_OFF = 1 # %kind in effect, lines are to be ignored 45 CONDITIONAL_ON = 2 # %kind in effect, lines are to be processed 46 47 def __init__(self, filename, kind): 48 super(Specification, self).__init__(filename) 49 self.sections = dict() # key is section name, value is array of strings (lines) in the section 50 self.section = None # name of current %section 51 self.section_start = None # first line number of current %section 52 self.defmacro = dict() # key is macro name, value is string (body of macro) 53 self.kind = kind 54 self.kinds = None # remember %define-kinds 55 self.conditional = self.UNCONDITIONAL 56 self.conditional_start = None # first line number of current %kind 57 58 def finish(self): 59 assert self.section is None, "\"%section " + self.section + \ 60 "\" not terminated by end of specification file" 61 assert self.conditional is self.UNCONDITIONAL, "%kind not terminated by end of specification file" 62 63 def macro_substitution(self): 64 """ Performs macro substitution on self.line, and returns the result 65 """ 66 LINESEARCH = "(%\{)(\S+?)(?=[\s}])\s*(.*?)\s*(\})" 67 BODYSEARCH = "(%\{)(\d+)(\})" 68 69 orig = self.line 70 out = "" 71 match = re.search(LINESEARCH, orig) 72 while match: 73 # lookup macro 74 key = match[2] 75 assert key in self.defmacro, "Missing definition of macro %{" + key + "} at " + self.context() 76 77 # handle macro arguments (read them and substitute for them in the macro body) 78 body_orig = self.defmacro[key] 79 body_out = "" 80 args = [] 81 if match[3] != "": 82 args = re.split("\s+", match[3]) 83 bodymatch = re.search(BODYSEARCH, body_orig) 84 while bodymatch: 85 argnum = int(bodymatch[2]) 86 assert argnum >= 0, "Macro argument number must be positive (at " + self.context() + ")" 87 assert argnum <= len(args), "Macro argument number " + str(argnum) + " exceeds " + \ 88 str(len(args)) + " supplied arguments at " + self.context() 89 body_out = body_out + body_orig[:bodymatch.start(1)] + args[int(bodymatch[2]) - 1] 90 body_orig = body_orig[bodymatch.end(3):] 91 bodymatch = re.search(BODYSEARCH, body_orig) 92 body_out = body_out + body_orig 93 94 # perform macro substitution 95 out = out + orig[:match.start(1)] + body_out 96 orig = orig[match.end(4):] 97 match = re.search(LINESEARCH, orig) 98 out = out + orig 99 return out 100 101 def match_kind(self, patterns_string): 102 """ Utility routine for %kind directive: Is self.kind found within patterns_string?""" 103 patterns = re.split("\s+", patterns_string.strip()) 104 for pattern in patterns: 105 wildcard_match = re.search("^(.*)\*$", pattern) 106 lowest_version_match = re.search("^(.*)\+$", pattern) 107 if wildcard_match: 108 # A wildcard pattern: Ends in *, so see if it's a prefix of self.kind. 109 if re.search("^" + re.escape(wildcard_match[1]), self.kind): 110 return True 111 elif lowest_version_match: 112 # A lowest version pattern: Ends in + and we check if self.kind is equal 113 # to the kind in the pattern or to any kind which is to the right of the 114 # kind in the pattern in self.kinds. 115 assert lowest_version_match[1] in self.kinds, ( 116 "Kind \"" + pattern + "\" at " + self.context() + 117 " wasn't defined in %define-kinds" 118 ) 119 lowest_pos = self.kinds.index(pattern[:-1]) 120 if self.kind in self.kinds[lowest_pos:]: 121 return True 122 else: 123 # An ordinary pattern: See if it matches self.kind. 124 if not self.kinds is None and not pattern in self.kinds: 125 # TODO: Something similar for the wildcard case above 126 print("WARNING: kind \"" + pattern + "\" at " + self.context() + 127 " would have been rejected by %define-kinds") 128 if pattern == self.kind: 129 return True 130 return False 131 132 def handle_line(self): 133 """ Most of the work occurs here. Having read a line, we act on it immediately: 134 skip a comment, process a directive, add a line to a section or a to a multiline 135 definition, etc. 136 """ 137 138 DIRECTIVES = [ 139 "%define", "%define-kinds", "%else", "%insert", "%insert-indented", 140 "%kind", "%/kind", "%section", "%/section" 141 ] 142 143 # Common typos: /%directive, \%directive 144 matchbad = re.search("^[/\\\]%(\S*)", self.line) 145 if matchbad and "%/" + matchbad[1] in DIRECTIVES: 146 print("WARNING: Probable misspelled directive at " + self.context()) 147 148 # Directive? 149 if re.search("^%", self.line) and not re.search("^%{", self.line): 150 # Check for comment 151 if re.search("^%%", self.line): 152 return 153 154 # Validate directive name 155 match = re.search("^(%\S*)", self.line); 156 directive = match[1] 157 if not directive in DIRECTIVES: 158 assert False, "Unknown directive \"" + directive + "\" on " + self.context() 159 160 # Check for insert 161 match = re.search("^%insert(?:-indented\s+(\S+))?\s+(\S+)\s*$", self.line) 162 if match: 163 directive = self.line.split(" ", 1)[0] 164 assert not self.section is None, directive + " outside %section at " + self.context() 165 count = match[1] or "0" 166 key = match[2] 167 assert re.match("^\d+$", count), "Bad count \"" + count + "\" on " + self.context() 168 assert key in self.sections, "Unknown section \"" + key + "\" on " + self.context() 169 assert key != self.section, "Cannot insert section \"" + key + "\" into itself on " + self.context() 170 if self.conditional is self.CONDITIONAL_OFF: 171 return 172 indent = " " * int(count) 173 self.sections[self.section].extend( 174 (indent + line if line.rstrip("\n") else line) 175 for line in self.sections[key]) 176 return 177 178 # Check for start of section 179 match = re.search("^%section\s+(\S+)\s*$", self.line) 180 if match: 181 assert self.section is None, "Nested %section is forbidden at " + self.context() 182 self.section_start = self.lineno 183 if self.conditional is self.CONDITIONAL_OFF: 184 self.section = "" 185 return 186 key = match[1] 187 assert not key in self.sections, "Duplicate definition of \"" + key + "\" on " + self.context() 188 self.sections[key] = [] 189 self.section = key 190 # Non-directive lines will be added to self.sections[key] as they are read 191 # until we see %/section 192 return 193 194 # Check for end of section 195 if re.search("^%/section\s*$", self.line): 196 assert not self.section is None, "%/section with no matching %section on " + self.context() 197 assert self.conditional_start is None or self.conditional_start < self.section_start, \ 198 "%kind not terminated by end of %section on " + self.context() 199 self.section = None 200 self.section_start = None 201 return 202 203 # Check for start of kind 204 match = re.search("^%kind\s+((\S+)(\s+\S+)*)\s*$", self.line) 205 if match: 206 assert self.conditional is self.UNCONDITIONAL, \ 207 "Nested %kind is forbidden at " + self.context() 208 patterns = match[1] 209 if self.match_kind(patterns): 210 self.conditional = self.CONDITIONAL_ON 211 else: 212 self.conditional = self.CONDITIONAL_OFF 213 self.conditional_start = self.lineno 214 return 215 216 # Check for complement of kind (else) 217 if re.search("^%else\s*$", self.line): 218 assert not self.conditional is self.UNCONDITIONAL, "%else without matching %kind on " + self.context() 219 assert self.section_start is None or self.section_start < self.conditional_start, \ 220 "%section not terminated by %else on " + self.context() 221 if self.conditional == self.CONDITIONAL_ON: 222 self.conditional = self.CONDITIONAL_OFF 223 else: 224 assert self.conditional == self.CONDITIONAL_OFF 225 self.conditional = self.CONDITIONAL_ON 226 # Note that we permit 227 # %kind foo 228 # abc 229 # %else 230 # def 231 # %else 232 # ghi 233 # %/kind 234 # which is equivalent to 235 # %kind foo 236 # abc 237 # ghi 238 # %else 239 # def 240 # %/kind 241 # Probably not very useful, but easier to allow than to forbid. 242 return 243 244 # Check for end of kind 245 if re.search("^%/kind\s*$", self.line): 246 assert not self.conditional is self.UNCONDITIONAL, "%/kind without matching %kind on " + self.context() 247 assert self.section_start is None or self.section_start < self.conditional_start, \ 248 "%section not terminated by end of %kind on " + self.context() 249 self.conditional = self.UNCONDITIONAL 250 self.conditional_start = None 251 return 252 253 # Check for kinds definition 254 match = re.search("^%define-kinds\s+(\S.*?)\s*$", self.line) 255 if match: 256 assert self.conditional is self.UNCONDITIONAL, "%define-kinds within %kind is forbidden at " + \ 257 self.context() 258 kinds = re.split("\s+", match[1]) 259 assert self.kind in kinds, "kind \"" + self.kind + "\" is not listed on " + self.context() 260 assert self.kinds is None, "Second %define-kinds directive at " + self.context() 261 self.kinds = kinds 262 return 263 264 # Check for define 265 match = re.search("^%define\s+(\S+)(.*)$", self.line) 266 if match: 267 if self.conditional is self.CONDITIONAL_OFF: 268 return 269 key = match[1] 270 assert not key in self.defmacro, "Duplicate definition of \"" + key + "\" on " + self.context() 271 tail = match[2] 272 match = re.search("\s(.*)$", tail) 273 if match: 274 self.defmacro[key] = match[1] 275 else: 276 self.defmacro[key] = "" 277 return 278 279 # Malformed directive -- the name matched, but the syntax didn't 280 assert False, "Malformed directive \"" + directive + "\" on " + self.context() 281 282 if self.conditional is self.CONDITIONAL_OFF: 283 pass 284 elif self.section is None: 285 # Treat as comment 286 pass 287 else: 288 self.sections[self.section].append(self.macro_substitution()) 289 290class Template(Reader): 291 """ Reader for template file 292 """ 293 294 def __init__(self, filename, specification): 295 super(Template, self).__init__(filename) 296 self.lines = [] 297 self.specification = specification 298 299 def handle_line(self): 300 """ Most of the work occurs here. Having read a line, we act on it immediately: 301 skip a comment, process a directive, accumulate a line. 302 """ 303 304 # Directive? 305 if re.search("^%", self.line): 306 # Check for comment 307 if re.search("^%%", self.line): 308 return 309 310 # Check for insertion 311 match = re.search("^%insert(?:-indented\s+(\S+))?\s+(\S+)\s*$", self.line) 312 if match: 313 count = match[1] or "0" 314 key = match[2] 315 assert re.match("^\d+$", count), "Bad count \"" + count + "\" on " + self.context() 316 assert key in specification.sections, "Unknown section \"" + key + "\" on " + self.context() 317 indent = " " * int(count) 318 for line in specification.sections[key]: 319 if re.search("TODO", line, re.IGNORECASE): 320 print("WARNING: \"TODO\" at " + self.context()) 321 self.lines.append(indent + line if line.rstrip("\n") else line) 322 return 323 324 # Bad directive 325 match = re.search("^(%\S*)", self.line) 326 assert False, "Unknown directive \"" + match[1] + "\" on " + self.context() 327 328 # Literal text 329 if re.search("TODO", self.line, re.IGNORECASE): 330 print("WARNING: \"TODO\" at " + self.context()) 331 self.lines.append(self.line) 332 333if __name__ == "__main__": 334 parser = argparse.ArgumentParser(description="Create an output file by inserting sections " 335 "from a specification file into a template file") 336 parser.add_argument("-k", "--kind", required=True, 337 help="token identifying kind of file to generate (per \"kind\" directive)") 338 parser.add_argument("-o", "--output", required=True, 339 help="path to generated output file") 340 parser.add_argument("-s", "--specification", required=True, 341 help="path to input specification file") 342 parser.add_argument("-t", "--template", required=True, 343 help="path to input template file") 344 parser.add_argument("-v", "--verbose", action="store_true") 345 args = parser.parse_args() 346 if args.verbose: 347 print(args) 348 349 # Read the specification 350 specification = Specification(args.specification, args.kind) 351 specification.read() 352 if (args.verbose): 353 print(specification.defmacro) 354 355 # Read the template 356 template = Template(args.template, specification) 357 template.read() 358 359 # Write the output 360 with open(args.output, "w") as f: 361 f.write("".join(["".join(line) for line in template.lines])) 362 363# TODO: Write test cases for malformed specification and template files 364# TODO: Find a cleaner way to handle conditionals (%kind) or nesting in general; 365# maybe add support for more nesting 366# TODO: Could we do away with the distinction between a specification file and a 367# template file and add a %include directive? 368