1#!/usr/bin/python3
2""" Generate an output file from a specification file and a template file.
3    See README.md for more details.
4"""
5
6import argparse
7import re
8
9class Reader:
10  """ Simple base class facilitates reading a file.
11      Derived class must implement handle_line() and may implement finish().
12  """
13  def __init__(self, filename):
14    self.filename = filename
15    self.line = None # most recently read line
16    self.lineno = -1 # zero-based
17  def finish(self):
18    """ Called after entire file has been read
19    """
20    pass
21  def handle_line(self):
22    """ Called after each line has been read
23    """
24    assert False
25  def read(self):
26    with open(self.filename) as f:
27      lines = f.readlines()
28    for self.lineno in range(len(lines)):
29      self.line = lines[self.lineno]
30      self.handle_line()
31    self.finish()
32  def context(self):
33    """ Error-reporting aid: Return a string describing the location
34        of the most recently read line
35    """
36    return "line " + str(self.lineno + 1) + " of " + self.filename
37
38class Specification(Reader):
39  """ Reader for specification file
40  """
41
42  # Describes %kind state
43  UNCONDITIONAL = 0   # No %kind in effect
44  CONDITIONAL_OFF = 1 # %kind in effect, lines are to be ignored
45  CONDITIONAL_ON = 2  # %kind in effect, lines are to be processed
46
47  def __init__(self, filename, kind):
48    super(Specification, self).__init__(filename)
49    self.sections = dict() # key is section name, value is array of strings (lines) in the section
50    self.section = None # name of current %section
51    self.section_start = None # first line number of current %section
52    self.defmacro = dict() # key is macro name, value is string (body of macro)
53    self.kind = kind
54    self.kinds = None # remember %define-kinds
55    self.conditional = self.UNCONDITIONAL
56    self.conditional_start = None # first line number of current %kind
57
58  def finish(self):
59    assert self.section is None, "\"%section " + self.section + \
60      "\" not terminated by end of specification file"
61    assert self.conditional is self.UNCONDITIONAL, "%kind not terminated by end of specification file"
62
63  def macro_substitution(self):
64    """ Performs macro substitution on self.line, and returns the result
65    """
66    LINESEARCH = "(%\{)(\S+?)(?=[\s}])\s*(.*?)\s*(\})"
67    BODYSEARCH = "(%\{)(\d+)(\})"
68
69    orig = self.line
70    out = ""
71    match = re.search(LINESEARCH, orig)
72    while match:
73      # lookup macro
74      key = match[2]
75      assert key in self.defmacro, "Missing definition of macro %{" + key + "} at " + self.context()
76
77      # handle macro arguments (read them and substitute for them in the macro body)
78      body_orig = self.defmacro[key]
79      body_out = ""
80      args = []
81      if match[3] != "":
82        args = re.split("\s+", match[3])
83      bodymatch = re.search(BODYSEARCH, body_orig)
84      while bodymatch:
85        argnum = int(bodymatch[2])
86        assert argnum >= 0, "Macro argument number must be positive (at " + self.context() + ")"
87        assert argnum <= len(args), "Macro argument number " + str(argnum) + " exceeds " + \
88          str(len(args)) + " supplied arguments at " + self.context()
89        body_out = body_out + body_orig[:bodymatch.start(1)] + args[int(bodymatch[2]) - 1]
90        body_orig = body_orig[bodymatch.end(3):]
91        bodymatch = re.search(BODYSEARCH, body_orig)
92      body_out = body_out + body_orig
93
94      # perform macro substitution
95      out = out + orig[:match.start(1)] + body_out
96      orig = orig[match.end(4):]
97      match = re.search(LINESEARCH, orig)
98    out = out + orig
99    return out
100
101  def match_kind(self, patterns_string):
102    """ Utility routine for %kind directive: Is self.kind found within patterns_string?"""
103    patterns = re.split("\s+", patterns_string.strip())
104    for pattern in patterns:
105      wildcard_match = re.search("^(.*)\*$", pattern)
106      lowest_version_match = re.search("^(.*)\+$", pattern)
107      if wildcard_match:
108        # A wildcard pattern: Ends in *, so see if it's a prefix of self.kind.
109        if re.search("^" + re.escape(wildcard_match[1]), self.kind):
110          return True
111      elif lowest_version_match:
112        # A lowest version pattern: Ends in + and we check if self.kind is equal
113        # to the kind in the pattern or to any kind which is to the right of the
114        # kind in the pattern in self.kinds.
115        assert lowest_version_match[1] in self.kinds, (
116            "Kind \"" + pattern + "\" at " + self.context() +
117            " wasn't defined in %define-kinds"
118        )
119        lowest_pos = self.kinds.index(pattern[:-1])
120        if self.kind in self.kinds[lowest_pos:]:
121          return True
122      else:
123        # An ordinary pattern: See if it matches self.kind.
124        if not self.kinds is None and not pattern in self.kinds:
125          # TODO: Something similar for the wildcard case above
126          print("WARNING: kind \"" + pattern + "\" at " + self.context() +
127                " would have been rejected by %define-kinds")
128        if pattern == self.kind:
129          return True
130    return False
131
132  def handle_line(self):
133    """ Most of the work occurs here.  Having read a line, we act on it immediately:
134        skip a comment, process a directive, add a line to a section or a to a multiline
135        definition, etc.
136    """
137
138    DIRECTIVES = [
139        "%define", "%define-kinds", "%else", "%insert", "%insert-indented",
140        "%kind", "%/kind", "%section", "%/section"
141    ]
142
143    # Common typos: /%directive, \%directive
144    matchbad = re.search("^[/\\\]%(\S*)", self.line)
145    if matchbad and "%/" + matchbad[1] in DIRECTIVES:
146      print("WARNING: Probable misspelled directive at " + self.context())
147
148    # Directive?
149    if re.search("^%", self.line) and not re.search("^%{", self.line):
150      # Check for comment
151      if re.search("^%%", self.line):
152        return
153
154      # Validate directive name
155      match = re.search("^(%\S*)", self.line);
156      directive = match[1]
157      if not directive in DIRECTIVES:
158        assert False, "Unknown directive \"" + directive + "\" on " + self.context()
159
160      # Check for insert
161      match = re.search("^%insert(?:-indented\s+(\S+))?\s+(\S+)\s*$", self.line)
162      if match:
163        directive = self.line.split(" ", 1)[0]
164        assert not self.section is None, directive + " outside %section at " + self.context()
165        count = match[1] or "0"
166        key = match[2]
167        assert re.match("^\d+$", count), "Bad count \"" + count + "\" on " + self.context()
168        assert key in self.sections, "Unknown section \"" + key + "\" on " + self.context()
169        assert key != self.section, "Cannot insert section \"" + key + "\" into itself on " + self.context()
170        if self.conditional is self.CONDITIONAL_OFF:
171          return
172        indent = " " * int(count)
173        self.sections[self.section].extend(
174            (indent + line if line.rstrip("\n") else line)
175            for line in self.sections[key])
176        return
177
178      # Check for start of section
179      match = re.search("^%section\s+(\S+)\s*$", self.line)
180      if match:
181        assert self.section is None, "Nested %section is forbidden at " + self.context()
182        self.section_start = self.lineno
183        if self.conditional is self.CONDITIONAL_OFF:
184          self.section = ""
185          return
186        key = match[1]
187        assert not key in self.sections, "Duplicate definition of \"" + key + "\" on " + self.context()
188        self.sections[key] = []
189        self.section = key
190        # Non-directive lines will be added to self.sections[key] as they are read
191        # until we see %/section
192        return
193
194      # Check for end of section
195      if re.search("^%/section\s*$", self.line):
196        assert not self.section is None, "%/section with no matching %section on " + self.context()
197        assert self.conditional_start is None or self.conditional_start < self.section_start, \
198            "%kind not terminated by end of %section on " + self.context()
199        self.section = None
200        self.section_start = None
201        return
202
203      # Check for start of kind
204      match = re.search("^%kind\s+((\S+)(\s+\S+)*)\s*$", self.line)
205      if match:
206        assert self.conditional is self.UNCONDITIONAL, \
207            "Nested %kind is forbidden at " + self.context()
208        patterns = match[1]
209        if self.match_kind(patterns):
210          self.conditional = self.CONDITIONAL_ON
211        else:
212          self.conditional = self.CONDITIONAL_OFF
213        self.conditional_start = self.lineno
214        return
215
216      # Check for complement of kind (else)
217      if re.search("^%else\s*$", self.line):
218        assert not self.conditional is self.UNCONDITIONAL, "%else without matching %kind on " + self.context()
219        assert self.section_start is None or self.section_start < self.conditional_start, \
220            "%section not terminated by %else on " + self.context()
221        if self.conditional == self.CONDITIONAL_ON:
222          self.conditional = self.CONDITIONAL_OFF
223        else:
224          assert self.conditional == self.CONDITIONAL_OFF
225          self.conditional = self.CONDITIONAL_ON
226        # Note that we permit
227        #   %kind foo
228        #   abc
229        #   %else
230        #   def
231        #   %else
232        #   ghi
233        #   %/kind
234        # which is equivalent to
235        #   %kind foo
236        #   abc
237        #   ghi
238        #   %else
239        #   def
240        #   %/kind
241        # Probably not very useful, but easier to allow than to forbid.
242        return
243
244      # Check for end of kind
245      if re.search("^%/kind\s*$", self.line):
246        assert not self.conditional is self.UNCONDITIONAL, "%/kind without matching %kind on " + self.context()
247        assert self.section_start is None or self.section_start < self.conditional_start, \
248            "%section not terminated by end of %kind on " + self.context()
249        self.conditional = self.UNCONDITIONAL
250        self.conditional_start = None
251        return
252
253      # Check for kinds definition
254      match = re.search("^%define-kinds\s+(\S.*?)\s*$", self.line)
255      if match:
256        assert self.conditional is self.UNCONDITIONAL, "%define-kinds within %kind is forbidden at " + \
257          self.context()
258        kinds = re.split("\s+", match[1])
259        assert self.kind in kinds, "kind \"" + self.kind + "\" is not listed on " + self.context()
260        assert self.kinds is None, "Second %define-kinds directive at " + self.context()
261        self.kinds = kinds
262        return
263
264      # Check for define
265      match = re.search("^%define\s+(\S+)(.*)$", self.line)
266      if match:
267        if self.conditional is self.CONDITIONAL_OFF:
268          return
269        key = match[1]
270        assert not key in self.defmacro, "Duplicate definition of \"" + key + "\" on " + self.context()
271        tail = match[2]
272        match = re.search("\s(.*)$", tail)
273        if match:
274          self.defmacro[key] = match[1]
275        else:
276          self.defmacro[key] = ""
277        return
278
279      # Malformed directive -- the name matched, but the syntax didn't
280      assert False, "Malformed directive \"" + directive + "\" on " + self.context()
281
282    if self.conditional is self.CONDITIONAL_OFF:
283      pass
284    elif self.section is None:
285      # Treat as comment
286      pass
287    else:
288      self.sections[self.section].append(self.macro_substitution())
289
290class Template(Reader):
291  """ Reader for template file
292  """
293
294  def __init__(self, filename, specification):
295    super(Template, self).__init__(filename)
296    self.lines = []
297    self.specification = specification
298
299  def handle_line(self):
300    """ Most of the work occurs here.  Having read a line, we act on it immediately:
301        skip a comment, process a directive, accumulate a line.
302    """
303
304    # Directive?
305    if re.search("^%", self.line):
306      # Check for comment
307      if re.search("^%%", self.line):
308        return
309
310      # Check for insertion
311      match = re.search("^%insert(?:-indented\s+(\S+))?\s+(\S+)\s*$", self.line)
312      if match:
313        count = match[1] or "0"
314        key = match[2]
315        assert re.match("^\d+$", count), "Bad count \"" + count + "\" on " + self.context()
316        assert key in specification.sections, "Unknown section \"" + key + "\" on " + self.context()
317        indent = " " * int(count)
318        for line in specification.sections[key]:
319          if re.search("TODO", line, re.IGNORECASE):
320            print("WARNING: \"TODO\" at " + self.context())
321          self.lines.append(indent + line if line.rstrip("\n") else line)
322        return
323
324      # Bad directive
325      match = re.search("^(%\S*)", self.line)
326      assert False, "Unknown directive \"" + match[1] + "\" on " + self.context()
327
328    # Literal text
329    if re.search("TODO", self.line, re.IGNORECASE):
330      print("WARNING: \"TODO\" at " + self.context())
331    self.lines.append(self.line)
332
333if __name__ == "__main__":
334  parser = argparse.ArgumentParser(description="Create an output file by inserting sections "
335                                   "from a specification file into a template file")
336  parser.add_argument("-k", "--kind", required=True,
337                      help="token identifying kind of file to generate (per \"kind\" directive)")
338  parser.add_argument("-o", "--output", required=True,
339                      help="path to generated output file")
340  parser.add_argument("-s", "--specification", required=True,
341                      help="path to input specification file")
342  parser.add_argument("-t", "--template", required=True,
343                      help="path to input template file")
344  parser.add_argument("-v", "--verbose", action="store_true")
345  args = parser.parse_args()
346  if args.verbose:
347    print(args)
348
349  # Read the specification
350  specification = Specification(args.specification, args.kind)
351  specification.read()
352  if (args.verbose):
353    print(specification.defmacro)
354
355  # Read the template
356  template = Template(args.template, specification)
357  template.read()
358
359  # Write the output
360  with open(args.output, "w") as f:
361    f.write("".join(["".join(line) for line in template.lines]))
362
363# TODO: Write test cases for malformed specification and template files
364# TODO: Find a cleaner way to handle conditionals (%kind) or nesting in general;
365#       maybe add support for more nesting
366# TODO: Could we do away with the distinction between a specification file and a
367#       template file and add a %include directive?
368