1"""Provides the MacroChecker class."""
2
3# Copyright (c) 2018-2019 Collabora, Ltd.
4#
5# SPDX-License-Identifier: Apache-2.0
6#
7# Author(s):    Ryan Pavlik <ryan.pavlik@collabora.com>
8
9from io import StringIO
10import re
11
12
13class MacroChecker(object):
14    """Perform and track checking of one or more files in an API spec.
15
16    This does not necessarily need to be subclassed per-API: it is sufficiently
17    parameterized in the constructor for expected usage.
18    """
19
20    def __init__(self, enabled_messages, entity_db,
21                 macro_checker_file_type, root_path):
22        """Construct an object that tracks checking one or more files in an API spec.
23
24        enabled_messages -- a set of MessageId that should be enabled.
25        entity_db -- an object of a EntityDatabase subclass for this API.
26        macro_checker_file_type -- Type to instantiate to create the right
27                                   MacroCheckerFile subclass for this API.
28        root_path -- A Path object for the root of this repository.
29        """
30        self.enabled_messages = enabled_messages
31        self.entity_db = entity_db
32        self.macro_checker_file_type = macro_checker_file_type
33        self.root_path = root_path
34
35        self.files = []
36
37        self.refpages = set()
38
39        # keys: entity names. values: MessageContext
40        self.links = {}
41        self.apiIncludes = {}
42        self.validityIncludes = {}
43        self.headings = {}
44
45        # Regexes that are members because they depend on the name prefix.
46
47        # apiPrefix, followed by some word characters or * as many times as desired,
48        # NOT followed by >> and NOT preceded by one of the characters in that first character class.
49        # (which distinguish "names being used somewhere other than prose").
50        self.suspected_missing_macro_re = re.compile(
51            r'\b(?<![-=:/[\.`+,])(?P<entity_name>{}[\w*]+)\b(?!>>)'.format(
52                self.entity_db.case_insensitive_name_prefix_pattern)
53        )
54        self.heading_command_re = re.compile(
55            r'=+ (?P<command>{}[\w]+)'.format(self.entity_db.name_prefix)
56        )
57
58        macros_pattern = '|'.join((re.escape(macro)
59                                   for macro in self.entity_db.macros))
60        # the "formatting" group is to strip matching */**/_/__
61        # surrounding an entire macro.
62        self.macro_re = re.compile(
63            r'(?P<formatting>\**|_*)(?P<macro>{}):(?P<entity_name>[\w*]+((?P<subscript>[\[][^\]]*[\]]))?)(?P=formatting)'.format(macros_pattern))
64
65    def haveLinkTarget(self, entity):
66        """Report if we have parsed an API include (or heading) for an entity.
67
68        None if there is no entity with that name.
69        """
70        if not self.findEntity(entity):
71            return None
72        if entity in self.apiIncludes:
73            return True
74        return entity in self.headings
75
76    def hasFixes(self):
77        """Report if any files have auto-fixes."""
78        for f in self.files:
79            if f.hasFixes():
80                return True
81        return False
82
83    def addLinkToEntity(self, entity, context):
84        """Record seeing a link to an entity's docs from a context."""
85        if entity not in self.links:
86            self.links[entity] = []
87        self.links[entity].append(context)
88
89    def seenRefPage(self, entity):
90        """Check if a ref-page markup block has been seen for an entity."""
91        return entity in self.refpages
92
93    def addRefPage(self, entity):
94        """Record seeing a ref-page markup block for an entity."""
95        self.refpages.add(entity)
96
97    def findMacroAndEntity(self, macro, entity):
98        """Look up EntityData by macro and entity pair.
99
100        Forwards to the EntityDatabase.
101        """
102        return self.entity_db.findMacroAndEntity(macro, entity)
103
104    def findEntity(self, entity):
105        """Look up EntityData by entity name (case-sensitive).
106
107        Forwards to the EntityDatabase.
108        """
109        return self.entity_db.findEntity(entity)
110
111    def findEntityCaseInsensitive(self, entity):
112        """Look up EntityData by entity name (case-insensitive).
113
114        Forwards to the EntityDatabase.
115        """
116        return self.entity_db.findEntityCaseInsensitive(entity)
117
118    def getMemberNames(self, commandOrStruct):
119        """Given a command or struct name, retrieve the names of each member/param.
120
121        Returns an empty list if the entity is not found or doesn't have members/params.
122
123        Forwards to the EntityDatabase.
124        """
125        return self.entity_db.getMemberNames(commandOrStruct)
126
127    def likelyRecognizedEntity(self, entity_name):
128        """Guess (based on name prefix alone) if an entity is likely to be recognized.
129
130        Forwards to the EntityDatabase.
131        """
132        return self.entity_db.likelyRecognizedEntity(entity_name)
133
134    def isLinkedMacro(self, macro):
135        """Identify if a macro is considered a "linked" macro.
136
137        Forwards to the EntityDatabase.
138        """
139        return self.entity_db.isLinkedMacro(macro)
140
141    def processFile(self, filename):
142        """Parse an .adoc file belonging to the spec and check it for errors."""
143        class FileStreamMaker(object):
144            def __init__(self, filename):
145                self.filename = filename
146
147            def make_stream(self):
148                return open(self.filename, 'r', encoding='utf-8')
149
150        f = self.macro_checker_file_type(self, filename, self.enabled_messages,
151                                         FileStreamMaker(filename))
152        f.process()
153        self.files.append(f)
154
155    def processString(self, s):
156        """Process a string as if it were a spec file.
157
158        Used for testing purposes.
159        """
160        if "\n" in s.rstrip():
161            # remove leading spaces from each line to allow easier
162            # block-quoting in tests
163            s = "\n".join((line.lstrip() for line in s.split("\n")))
164            # fabricate a "filename" that will display better.
165            filename = "string{}\n****START OF STRING****\n{}\n****END OF STRING****\n".format(
166                len(self.files), s.rstrip())
167
168        else:
169            filename = "string{}: {}".format(
170                len(self.files), s.rstrip())
171
172        class StringStreamMaker(object):
173            def __init__(self, string):
174                self.string = string
175
176            def make_stream(self):
177                return StringIO(self.string)
178
179        f = self.macro_checker_file_type(self, filename, self.enabled_messages,
180                                         StringStreamMaker(s))
181        f.process()
182        self.files.append(f)
183        return f
184
185    def numDiagnostics(self):
186        """Return the total number of diagnostics (warnings and errors) over all the files processed."""
187        return sum((f.numDiagnostics() for f in self.files))
188
189    def numErrors(self):
190        """Return the total number of errors over all the files processed."""
191        return sum((f.numErrors() for f in self.files))
192
193    def getMissingUnreferencedApiIncludes(self):
194        """Return the unreferenced entity names that we expected to see an API include or link target for, but did not.
195
196        Counterpart to getBrokenLinks(): This method returns the entity names
197        that were not used in a linking macro (and thus wouldn't create a broken link),
198        but were nevertheless expected and not seen.
199        """
200        return (entity for entity in self.entity_db.generating_entities
201                if (not self.haveLinkTarget(entity)) and entity not in self.links)
202
203    def getBrokenLinks(self):
204        """Return the entity names and usage contexts that we expected to see an API include or link target for, but did not.
205
206        Counterpart to getMissingUnreferencedApiIncludes(): This method returns only the
207        entity names that were used in a linking macro (and thus create a broken link),
208        but were not seen. The values of the dictionary are a list of MessageContext objects
209        for each linking macro usage for this entity name.
210        """
211        return {entity: contexts for entity, contexts in self.links.items()
212                if self.entity_db.entityGenerates(entity) and not self.haveLinkTarget(entity)}
213
214    def getMissingRefPages(self):
215        """Return a list of entities that we expected, but did not see, a ref page block for.
216
217        The heuristics here are rather crude: we expect a ref page for every generating entry.
218        """
219        return (entity for entity in sorted(self.entity_db.generating_entities)
220                if entity not in self.refpages)
221