1#
2# Copyright (C) 2016 The Android Open Source Project
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8#      http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15#
16"""Parser for Android's version script information."""
17from __future__ import annotations
18
19from dataclasses import dataclass, field
20import logging
21import re
22from typing import (
23    Dict,
24    Iterable,
25    Iterator,
26    List,
27    Mapping,
28    NewType,
29    Optional,
30    TextIO,
31    Tuple,
32    Union,
33)
34
35
36ApiMap = Mapping[str, int]
37Arch = NewType('Arch', str)
38Tag = NewType('Tag', str)
39
40
41ALL_ARCHITECTURES = (
42    Arch('arm'),
43    Arch('arm64'),
44    Arch('riscv64'),
45    Arch('x86'),
46    Arch('x86_64'),
47)
48
49# TODO: it would be nice to dedupe with 'has_*_tag' property methods
50SUPPORTED_TAGS = ALL_ARCHITECTURES + (
51    Tag('apex'),
52    Tag('llndk'),
53    Tag('platform-only'),
54    Tag('systemapi'),
55    Tag('var'),
56    Tag('weak'),
57)
58
59# Arbitrary magic number. We use the same one in api-level.h for this purpose.
60FUTURE_API_LEVEL = 10000
61
62
63def logger() -> logging.Logger:
64    """Return the main logger for this module."""
65    return logging.getLogger(__name__)
66
67
68@dataclass
69class Tags:
70    """Container class for the tags attached to a symbol or version."""
71
72    tags: tuple[Tag, ...] = field(default_factory=tuple)
73
74    @classmethod
75    def from_strs(cls, strs: Iterable[str]) -> Tags:
76        """Constructs tags from a collection of strings.
77
78        Does not decode API levels.
79        """
80        return Tags(tuple(Tag(s) for s in strs))
81
82    def __contains__(self, tag: Union[Tag, str]) -> bool:
83        return tag in self.tags
84
85    def __iter__(self) -> Iterator[Tag]:
86        yield from self.tags
87
88    @property
89    def has_mode_tags(self) -> bool:
90        """Returns True if any mode tags (apex, llndk, etc) are set."""
91        return self.has_apex_tags or self.has_llndk_tags or self.has_systemapi_tags
92
93    @property
94    def has_apex_tags(self) -> bool:
95        """Returns True if any APEX tags are set."""
96        return 'apex' in self.tags
97
98    @property
99    def has_systemapi_tags(self) -> bool:
100        """Returns True if any APEX tags are set."""
101        return 'systemapi' in self.tags
102
103    @property
104    def has_llndk_tags(self) -> bool:
105        """Returns True if any LL-NDK tags are set."""
106        for tag in self.tags:
107            if tag == 'llndk' or tag.startswith('llndk='):
108                return True
109        return False
110
111    @property
112    def has_platform_only_tags(self) -> bool:
113        """Returns True if any platform-only tags are set."""
114        return 'platform-only' in self.tags
115
116    def copy_introduced_from(self, tags: Tags) -> None:
117        """Copies introduced= or introduced-*= tags."""
118        for tag in tags:
119            if tag.startswith('introduced=') or tag.startswith('introduced-'):
120                name, _ = split_tag(tag)
121                if not any(self_tag.startswith(name + '=') for self_tag in self.tags):
122                    self.tags += (tag,)
123
124
125@dataclass
126class Symbol:
127    """A symbol definition from a symbol file."""
128
129    name: str
130    tags: Tags
131
132
133@dataclass
134class Version:
135    """A version block of a symbol file."""
136
137    name: str
138    base: Optional[str]
139    tags: Tags
140    symbols: List[Symbol]
141
142    @property
143    def is_private(self) -> bool:
144        """Returns True if this version block is private (platform only)."""
145        return self.name.endswith('_PRIVATE') or self.name.endswith('_PLATFORM')
146
147
148def get_tags(line: str, api_map: ApiMap) -> Tags:
149    """Returns a list of all tags on this line."""
150    _, _, all_tags = line.strip().partition('#')
151    return Tags(tuple(
152        decode_api_level_tag(Tag(e), api_map)
153        for e in re.split(r'\s+', all_tags) if e.strip()
154    ))
155
156
157def is_api_level_tag(tag: Tag) -> bool:
158    """Returns true if this tag has an API level that may need decoding."""
159    if tag.startswith('llndk-deprecated='):
160        return True
161    if tag.startswith('llndk='):
162        return True
163    if tag.startswith('introduced='):
164        return True
165    if tag.startswith('introduced-'):
166        return True
167    if tag.startswith('versioned='):
168        return True
169    return False
170
171
172def decode_api_level(api: str, api_map: ApiMap) -> int:
173    """Decodes the API level argument into the API level number.
174
175    For the average case, this just decodes the integer value from the string,
176    but for unreleased APIs we need to translate from the API codename (like
177    "O") to the future API level for that codename.
178    """
179    try:
180        return int(api)
181    except ValueError:
182        pass
183
184    if api == "current":
185        return FUTURE_API_LEVEL
186
187    return api_map[api]
188
189
190def decode_api_level_tag(tag: Tag, api_map: ApiMap) -> Tag:
191    """Decodes API level code name in a tag.
192
193    Raises:
194        ParseError: An unknown version name was found in a tag.
195    """
196    if not is_api_level_tag(tag):
197        if tag not in SUPPORTED_TAGS:
198            raise ParseError(f'Unsupported tag: {tag}')
199
200        return tag
201
202    name, value = split_tag(tag)
203    try:
204        decoded = str(decode_api_level(value, api_map))
205        return Tag(f'{name}={decoded}')
206    except KeyError as ex:
207        raise ParseError(f'Unknown version name in tag: {tag}') from ex
208
209
210def split_tag(tag: Tag) -> Tuple[str, str]:
211    """Returns a key/value tuple of the tag.
212
213    Raises:
214        ValueError: Tag is not a key/value type tag.
215
216    Returns: Tuple of (key, value) of the tag. Both components are strings.
217    """
218    if '=' not in tag:
219        raise ValueError('Not a key/value tag: ' + tag)
220    key, _, value = tag.partition('=')
221    return key, value
222
223
224def get_tag_value(tag: Tag) -> str:
225    """Returns the value of a key/value tag.
226
227    Raises:
228        ValueError: Tag is not a key/value type tag.
229
230    Returns: Value part of tag as a string.
231    """
232    return split_tag(tag)[1]
233
234class Filter:
235    """A filter encapsulates a condition that tells whether a version or a
236    symbol should be omitted or not
237    """
238
239    def __init__(self, arch: Arch, api: int, llndk: bool = False, apex: bool = False, systemapi:
240                 bool = False, ndk: bool = True):
241        self.arch = arch
242        self.api = api
243        self.llndk = llndk
244        self.apex = apex
245        self.systemapi = systemapi
246        self.ndk = ndk
247
248    def _should_omit_tags(self, tags: Tags) -> bool:
249        """Returns True if the tagged object should be omitted.
250
251        This defines the rules shared between version tagging and symbol tagging.
252        """
253        # LLNDK mode/tags follow the similar filtering except that API level checking
254        # is based llndk= instead of introduced=.
255        if self.llndk:
256            if tags.has_mode_tags and not tags.has_llndk_tags:
257                return True
258            if not symbol_in_arch(tags, self.arch):
259                return True
260            if not symbol_in_llndk_api(tags, self.arch, self.api):
261                return True
262            return False
263        # APEX or LLNDK mode and neither tag is provided, we fall back to the
264        # default behavior because all NDK symbols are implicitly available to
265        # APEX and LLNDK.
266        if tags.has_mode_tags:
267            if self.apex and tags.has_apex_tags:
268                return False
269            if self.systemapi and tags.has_systemapi_tags:
270                return False
271            return True
272        if not symbol_in_arch(tags, self.arch):
273            return True
274        if not symbol_in_api(tags, self.arch, self.api):
275            return True
276        return False
277
278    def should_omit_version(self, version: Version) -> bool:
279        """Returns True if the version section should be omitted.
280
281        We want to omit any sections that do not have any symbols we'll have in
282        the stub library. Sections that contain entirely future symbols or only
283        symbols for certain architectures.
284        """
285        if version.is_private:
286            return True
287        if version.tags.has_platform_only_tags:
288            return True
289        # Include all versions when targeting LLNDK because LLNDK symbols are self-versioned.
290        # Empty version block will be handled separately.
291        if self.llndk:
292            return False
293        return self._should_omit_tags(version.tags)
294
295    def should_omit_symbol(self, symbol: Symbol) -> bool:
296        """Returns True if the symbol should be omitted."""
297        if not symbol.tags.has_mode_tags and not self.ndk:
298            # Symbols that don't have mode tags are NDK. They are usually
299            # included, but have to be omitted if NDK symbols are explicitly
300            # filtered-out
301            return True
302
303        return self._should_omit_tags(symbol.tags)
304
305def symbol_in_arch(tags: Tags, arch: Arch) -> bool:
306    """Returns true if the symbol is present for the given architecture."""
307    has_arch_tags = False
308    for tag in tags:
309        if tag == arch:
310            return True
311        if tag in ALL_ARCHITECTURES:
312            has_arch_tags = True
313
314    # If there were no arch tags, the symbol is available for all
315    # architectures. If there were any arch tags, the symbol is only available
316    # for the tagged architectures.
317    return not has_arch_tags
318
319def symbol_in_llndk_api(tags: Iterable[Tag], arch: Arch, api: int) -> bool:
320    """Returns true if the symbol is present for the given LLNDK API level."""
321    # Check llndk= first.
322    for tag in tags:
323        if tag.startswith('llndk='):
324            return api >= int(get_tag_value(tag))
325    # If not, we keep old behavior: NDK symbols in <= 34 are LLNDK symbols.
326    return symbol_in_api(tags, arch, 34)
327
328def symbol_in_api(tags: Iterable[Tag], arch: Arch, api: int) -> bool:
329    """Returns true if the symbol is present for the given API level."""
330    introduced_tag = None
331    arch_specific = False
332    for tag in tags:
333        # If there is an arch-specific tag, it should override the common one.
334        if tag.startswith('introduced=') and not arch_specific:
335            introduced_tag = tag
336        elif tag.startswith('introduced-' + arch + '='):
337            introduced_tag = tag
338            arch_specific = True
339        elif tag == 'future':
340            return api == FUTURE_API_LEVEL
341
342    if introduced_tag is None:
343        # We found no "introduced" tags, so the symbol has always been
344        # available.
345        return True
346
347    return api >= int(get_tag_value(introduced_tag))
348
349
350def symbol_versioned_in_api(tags: Iterable[Tag], api: int) -> bool:
351    """Returns true if the symbol should be versioned for the given API.
352
353    This models the `versioned=API` tag. This should be a very uncommonly
354    needed tag, and is really only needed to fix versioning mistakes that are
355    already out in the wild.
356
357    For example, some of libc's __aeabi_* functions were originally placed in
358    the private version, but that was incorrect. They are now in LIBC_N, but
359    when building against any version prior to N we need the symbol to be
360    unversioned (otherwise it won't resolve on M where it is private).
361    """
362    for tag in tags:
363        if tag.startswith('versioned='):
364            return api >= int(get_tag_value(tag))
365    # If there is no "versioned" tag, the tag has been versioned for as long as
366    # it was introduced.
367    return True
368
369
370class ParseError(RuntimeError):
371    """An error that occurred while parsing a symbol file."""
372
373
374class MultiplyDefinedSymbolError(RuntimeError):
375    """A symbol name was multiply defined."""
376    def __init__(self, multiply_defined_symbols: Iterable[str]) -> None:
377        super().__init__(
378            'Version script contains multiple definitions for: {}'.format(
379                ', '.join(multiply_defined_symbols)))
380        self.multiply_defined_symbols = multiply_defined_symbols
381
382
383class SymbolFileParser:
384    """Parses NDK symbol files."""
385    def __init__(self, input_file: TextIO, api_map: ApiMap, filt: Filter) -> None:
386        self.input_file = input_file
387        self.api_map = api_map
388        self.filter = filt
389        self.current_line: Optional[str] = None
390
391    def parse(self) -> List[Version]:
392        """Parses the symbol file and returns a list of Version objects."""
393        versions = []
394        while self.next_line():
395            assert self.current_line is not None
396            if '{' in self.current_line:
397                versions.append(self.parse_version())
398            else:
399                raise ParseError(
400                    f'Unexpected contents at top level: {self.current_line}')
401
402        self.check_no_duplicate_symbols(versions)
403        self.check_llndk_introduced(versions)
404        return versions
405
406    def check_no_duplicate_symbols(self, versions: Iterable[Version]) -> None:
407        """Raises errors for multiply defined symbols.
408
409        This situation is the normal case when symbol versioning is actually
410        used, but this script doesn't currently handle that. The error message
411        will be a not necessarily obvious "error: redefition of 'foo'" from
412        stub.c, so it's better for us to catch this situation and raise a
413        better error.
414        """
415        symbol_names = set()
416        multiply_defined_symbols = set()
417        for version in versions:
418            if self.filter.should_omit_version(version):
419                continue
420
421            for symbol in version.symbols:
422                if self.filter.should_omit_symbol(symbol):
423                    continue
424
425                if symbol.name in symbol_names:
426                    multiply_defined_symbols.add(symbol.name)
427                symbol_names.add(symbol.name)
428        if multiply_defined_symbols:
429            raise MultiplyDefinedSymbolError(
430                sorted(list(multiply_defined_symbols)))
431
432    def check_llndk_introduced(self, versions: Iterable[Version]) -> None:
433        """Raises errors when llndk= is missing for new llndk symbols."""
434        if not self.filter.llndk:
435            return
436
437        def assert_llndk_with_version(tags: Tags,  name: str) -> None:
438            has_llndk_introduced = False
439            for tag in tags:
440                if tag.startswith('llndk='):
441                    has_llndk_introduced = True
442                    break
443            if not has_llndk_introduced:
444                raise ParseError(f'{name}: missing version. `llndk=yyyymm`')
445
446        arch = self.filter.arch
447        for version in versions:
448            # llndk symbols >= introduced=35 should be tagged
449            # explicitly with llndk=yyyymm.
450            for symbol in version.symbols:
451                if not symbol.tags.has_llndk_tags:
452                    continue
453                if symbol_in_api(symbol.tags, arch, 34):
454                    continue
455                assert_llndk_with_version(symbol.tags, symbol.name)
456
457    def parse_version(self) -> Version:
458        """Parses a single version section and returns a Version object."""
459        assert self.current_line is not None
460        name = self.current_line.split('{')[0].strip()
461        tags = get_tags(self.current_line, self.api_map)
462        symbols: List[Symbol] = []
463        global_scope = True
464        cpp_symbols = False
465        while self.next_line():
466            if '}' in self.current_line:
467                # Line is something like '} BASE; # tags'. Both base and tags
468                # are optional here.
469                base = self.current_line.partition('}')[2]
470                base = base.partition('#')[0].strip()
471                if not base.endswith(';'):
472                    raise ParseError(
473                        'Unterminated version/export "C++" block (expected ;).')
474                if cpp_symbols:
475                    cpp_symbols = False
476                else:
477                    base = base.rstrip(';').rstrip()
478                    return Version(name, base or None, tags, symbols)
479            elif 'extern "C++" {' in self.current_line:
480                cpp_symbols = True
481            elif not cpp_symbols and ':' in self.current_line:
482                visibility = self.current_line.split(':')[0].strip()
483                if visibility == 'local':
484                    global_scope = False
485                elif visibility == 'global':
486                    global_scope = True
487                else:
488                    raise ParseError('Unknown visiblity label: ' + visibility)
489            elif global_scope and not cpp_symbols:
490                symbol = self.parse_symbol()
491                symbol.tags.copy_introduced_from(tags)
492                symbols.append(symbol)
493            else:
494                # We're in a hidden scope or in 'extern "C++"' block. Ignore
495                # everything.
496                pass
497        raise ParseError('Unexpected EOF in version block.')
498
499    def parse_symbol(self) -> Symbol:
500        """Parses a single symbol line and returns a Symbol object."""
501        assert self.current_line is not None
502        if ';' not in self.current_line:
503            raise ParseError(
504                'Expected ; to terminate symbol: ' + self.current_line)
505        if '*' in self.current_line:
506            raise ParseError(
507                'Wildcard global symbols are not permitted.')
508        # Line is now in the format "<symbol-name>; # tags"
509        name, _, _ = self.current_line.strip().partition(';')
510        tags = get_tags(self.current_line, self.api_map)
511        return Symbol(name, tags)
512
513    def next_line(self) -> str:
514        """Returns the next non-empty non-comment line.
515
516        A return value of '' indicates EOF.
517        """
518        line = self.input_file.readline()
519        while not line.strip() or line.strip().startswith('#'):
520            line = self.input_file.readline()
521
522            # We want to skip empty lines, but '' indicates EOF.
523            if not line:
524                break
525        self.current_line = line
526        return self.current_line
527