1# 2# Copyright (C) 2016 The Android Open Source Project 3# 4# Licensed under the Apache License, Version 2.0 (the "License"); 5# you may not use this file except in compliance with the License. 6# You may obtain a copy of the License at 7# 8# http://www.apache.org/licenses/LICENSE-2.0 9# 10# Unless required by applicable law or agreed to in writing, software 11# distributed under the License is distributed on an "AS IS" BASIS, 12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13# See the License for the specific language governing permissions and 14# limitations under the License. 15# 16"""Parser for Android's version script information.""" 17from __future__ import annotations 18 19from dataclasses import dataclass, field 20import logging 21import re 22from typing import ( 23 Dict, 24 Iterable, 25 Iterator, 26 List, 27 Mapping, 28 NewType, 29 Optional, 30 TextIO, 31 Tuple, 32 Union, 33) 34 35 36ApiMap = Mapping[str, int] 37Arch = NewType('Arch', str) 38Tag = NewType('Tag', str) 39 40 41ALL_ARCHITECTURES = ( 42 Arch('arm'), 43 Arch('arm64'), 44 Arch('riscv64'), 45 Arch('x86'), 46 Arch('x86_64'), 47) 48 49# TODO: it would be nice to dedupe with 'has_*_tag' property methods 50SUPPORTED_TAGS = ALL_ARCHITECTURES + ( 51 Tag('apex'), 52 Tag('llndk'), 53 Tag('platform-only'), 54 Tag('systemapi'), 55 Tag('var'), 56 Tag('weak'), 57) 58 59# Arbitrary magic number. We use the same one in api-level.h for this purpose. 60FUTURE_API_LEVEL = 10000 61 62 63def logger() -> logging.Logger: 64 """Return the main logger for this module.""" 65 return logging.getLogger(__name__) 66 67 68@dataclass 69class Tags: 70 """Container class for the tags attached to a symbol or version.""" 71 72 tags: tuple[Tag, ...] = field(default_factory=tuple) 73 74 @classmethod 75 def from_strs(cls, strs: Iterable[str]) -> Tags: 76 """Constructs tags from a collection of strings. 77 78 Does not decode API levels. 79 """ 80 return Tags(tuple(Tag(s) for s in strs)) 81 82 def __contains__(self, tag: Union[Tag, str]) -> bool: 83 return tag in self.tags 84 85 def __iter__(self) -> Iterator[Tag]: 86 yield from self.tags 87 88 @property 89 def has_mode_tags(self) -> bool: 90 """Returns True if any mode tags (apex, llndk, etc) are set.""" 91 return self.has_apex_tags or self.has_llndk_tags or self.has_systemapi_tags 92 93 @property 94 def has_apex_tags(self) -> bool: 95 """Returns True if any APEX tags are set.""" 96 return 'apex' in self.tags 97 98 @property 99 def has_systemapi_tags(self) -> bool: 100 """Returns True if any APEX tags are set.""" 101 return 'systemapi' in self.tags 102 103 @property 104 def has_llndk_tags(self) -> bool: 105 """Returns True if any LL-NDK tags are set.""" 106 for tag in self.tags: 107 if tag == 'llndk' or tag.startswith('llndk='): 108 return True 109 return False 110 111 @property 112 def has_platform_only_tags(self) -> bool: 113 """Returns True if any platform-only tags are set.""" 114 return 'platform-only' in self.tags 115 116 def copy_introduced_from(self, tags: Tags) -> None: 117 """Copies introduced= or introduced-*= tags.""" 118 for tag in tags: 119 if tag.startswith('introduced=') or tag.startswith('introduced-'): 120 name, _ = split_tag(tag) 121 if not any(self_tag.startswith(name + '=') for self_tag in self.tags): 122 self.tags += (tag,) 123 124 125@dataclass 126class Symbol: 127 """A symbol definition from a symbol file.""" 128 129 name: str 130 tags: Tags 131 132 133@dataclass 134class Version: 135 """A version block of a symbol file.""" 136 137 name: str 138 base: Optional[str] 139 tags: Tags 140 symbols: List[Symbol] 141 142 @property 143 def is_private(self) -> bool: 144 """Returns True if this version block is private (platform only).""" 145 return self.name.endswith('_PRIVATE') or self.name.endswith('_PLATFORM') 146 147 148def get_tags(line: str, api_map: ApiMap) -> Tags: 149 """Returns a list of all tags on this line.""" 150 _, _, all_tags = line.strip().partition('#') 151 return Tags(tuple( 152 decode_api_level_tag(Tag(e), api_map) 153 for e in re.split(r'\s+', all_tags) if e.strip() 154 )) 155 156 157def is_api_level_tag(tag: Tag) -> bool: 158 """Returns true if this tag has an API level that may need decoding.""" 159 if tag.startswith('llndk-deprecated='): 160 return True 161 if tag.startswith('llndk='): 162 return True 163 if tag.startswith('introduced='): 164 return True 165 if tag.startswith('introduced-'): 166 return True 167 if tag.startswith('versioned='): 168 return True 169 return False 170 171 172def decode_api_level(api: str, api_map: ApiMap) -> int: 173 """Decodes the API level argument into the API level number. 174 175 For the average case, this just decodes the integer value from the string, 176 but for unreleased APIs we need to translate from the API codename (like 177 "O") to the future API level for that codename. 178 """ 179 try: 180 return int(api) 181 except ValueError: 182 pass 183 184 if api == "current": 185 return FUTURE_API_LEVEL 186 187 return api_map[api] 188 189 190def decode_api_level_tag(tag: Tag, api_map: ApiMap) -> Tag: 191 """Decodes API level code name in a tag. 192 193 Raises: 194 ParseError: An unknown version name was found in a tag. 195 """ 196 if not is_api_level_tag(tag): 197 if tag not in SUPPORTED_TAGS: 198 raise ParseError(f'Unsupported tag: {tag}') 199 200 return tag 201 202 name, value = split_tag(tag) 203 try: 204 decoded = str(decode_api_level(value, api_map)) 205 return Tag(f'{name}={decoded}') 206 except KeyError as ex: 207 raise ParseError(f'Unknown version name in tag: {tag}') from ex 208 209 210def split_tag(tag: Tag) -> Tuple[str, str]: 211 """Returns a key/value tuple of the tag. 212 213 Raises: 214 ValueError: Tag is not a key/value type tag. 215 216 Returns: Tuple of (key, value) of the tag. Both components are strings. 217 """ 218 if '=' not in tag: 219 raise ValueError('Not a key/value tag: ' + tag) 220 key, _, value = tag.partition('=') 221 return key, value 222 223 224def get_tag_value(tag: Tag) -> str: 225 """Returns the value of a key/value tag. 226 227 Raises: 228 ValueError: Tag is not a key/value type tag. 229 230 Returns: Value part of tag as a string. 231 """ 232 return split_tag(tag)[1] 233 234class Filter: 235 """A filter encapsulates a condition that tells whether a version or a 236 symbol should be omitted or not 237 """ 238 239 def __init__(self, arch: Arch, api: int, llndk: bool = False, apex: bool = False, systemapi: 240 bool = False, ndk: bool = True): 241 self.arch = arch 242 self.api = api 243 self.llndk = llndk 244 self.apex = apex 245 self.systemapi = systemapi 246 self.ndk = ndk 247 248 def _should_omit_tags(self, tags: Tags) -> bool: 249 """Returns True if the tagged object should be omitted. 250 251 This defines the rules shared between version tagging and symbol tagging. 252 """ 253 # LLNDK mode/tags follow the similar filtering except that API level checking 254 # is based llndk= instead of introduced=. 255 if self.llndk: 256 if tags.has_mode_tags and not tags.has_llndk_tags: 257 return True 258 if not symbol_in_arch(tags, self.arch): 259 return True 260 if not symbol_in_llndk_api(tags, self.arch, self.api): 261 return True 262 return False 263 # APEX or LLNDK mode and neither tag is provided, we fall back to the 264 # default behavior because all NDK symbols are implicitly available to 265 # APEX and LLNDK. 266 if tags.has_mode_tags: 267 if self.apex and tags.has_apex_tags: 268 return False 269 if self.systemapi and tags.has_systemapi_tags: 270 return False 271 return True 272 if not symbol_in_arch(tags, self.arch): 273 return True 274 if not symbol_in_api(tags, self.arch, self.api): 275 return True 276 return False 277 278 def should_omit_version(self, version: Version) -> bool: 279 """Returns True if the version section should be omitted. 280 281 We want to omit any sections that do not have any symbols we'll have in 282 the stub library. Sections that contain entirely future symbols or only 283 symbols for certain architectures. 284 """ 285 if version.is_private: 286 return True 287 if version.tags.has_platform_only_tags: 288 return True 289 # Include all versions when targeting LLNDK because LLNDK symbols are self-versioned. 290 # Empty version block will be handled separately. 291 if self.llndk: 292 return False 293 return self._should_omit_tags(version.tags) 294 295 def should_omit_symbol(self, symbol: Symbol) -> bool: 296 """Returns True if the symbol should be omitted.""" 297 if not symbol.tags.has_mode_tags and not self.ndk: 298 # Symbols that don't have mode tags are NDK. They are usually 299 # included, but have to be omitted if NDK symbols are explicitly 300 # filtered-out 301 return True 302 303 return self._should_omit_tags(symbol.tags) 304 305def symbol_in_arch(tags: Tags, arch: Arch) -> bool: 306 """Returns true if the symbol is present for the given architecture.""" 307 has_arch_tags = False 308 for tag in tags: 309 if tag == arch: 310 return True 311 if tag in ALL_ARCHITECTURES: 312 has_arch_tags = True 313 314 # If there were no arch tags, the symbol is available for all 315 # architectures. If there were any arch tags, the symbol is only available 316 # for the tagged architectures. 317 return not has_arch_tags 318 319def symbol_in_llndk_api(tags: Iterable[Tag], arch: Arch, api: int) -> bool: 320 """Returns true if the symbol is present for the given LLNDK API level.""" 321 # Check llndk= first. 322 for tag in tags: 323 if tag.startswith('llndk='): 324 return api >= int(get_tag_value(tag)) 325 # If not, we keep old behavior: NDK symbols in <= 34 are LLNDK symbols. 326 return symbol_in_api(tags, arch, 34) 327 328def symbol_in_api(tags: Iterable[Tag], arch: Arch, api: int) -> bool: 329 """Returns true if the symbol is present for the given API level.""" 330 introduced_tag = None 331 arch_specific = False 332 for tag in tags: 333 # If there is an arch-specific tag, it should override the common one. 334 if tag.startswith('introduced=') and not arch_specific: 335 introduced_tag = tag 336 elif tag.startswith('introduced-' + arch + '='): 337 introduced_tag = tag 338 arch_specific = True 339 elif tag == 'future': 340 return api == FUTURE_API_LEVEL 341 342 if introduced_tag is None: 343 # We found no "introduced" tags, so the symbol has always been 344 # available. 345 return True 346 347 return api >= int(get_tag_value(introduced_tag)) 348 349 350def symbol_versioned_in_api(tags: Iterable[Tag], api: int) -> bool: 351 """Returns true if the symbol should be versioned for the given API. 352 353 This models the `versioned=API` tag. This should be a very uncommonly 354 needed tag, and is really only needed to fix versioning mistakes that are 355 already out in the wild. 356 357 For example, some of libc's __aeabi_* functions were originally placed in 358 the private version, but that was incorrect. They are now in LIBC_N, but 359 when building against any version prior to N we need the symbol to be 360 unversioned (otherwise it won't resolve on M where it is private). 361 """ 362 for tag in tags: 363 if tag.startswith('versioned='): 364 return api >= int(get_tag_value(tag)) 365 # If there is no "versioned" tag, the tag has been versioned for as long as 366 # it was introduced. 367 return True 368 369 370class ParseError(RuntimeError): 371 """An error that occurred while parsing a symbol file.""" 372 373 374class MultiplyDefinedSymbolError(RuntimeError): 375 """A symbol name was multiply defined.""" 376 def __init__(self, multiply_defined_symbols: Iterable[str]) -> None: 377 super().__init__( 378 'Version script contains multiple definitions for: {}'.format( 379 ', '.join(multiply_defined_symbols))) 380 self.multiply_defined_symbols = multiply_defined_symbols 381 382 383class SymbolFileParser: 384 """Parses NDK symbol files.""" 385 def __init__(self, input_file: TextIO, api_map: ApiMap, filt: Filter) -> None: 386 self.input_file = input_file 387 self.api_map = api_map 388 self.filter = filt 389 self.current_line: Optional[str] = None 390 391 def parse(self) -> List[Version]: 392 """Parses the symbol file and returns a list of Version objects.""" 393 versions = [] 394 while self.next_line(): 395 assert self.current_line is not None 396 if '{' in self.current_line: 397 versions.append(self.parse_version()) 398 else: 399 raise ParseError( 400 f'Unexpected contents at top level: {self.current_line}') 401 402 self.check_no_duplicate_symbols(versions) 403 self.check_llndk_introduced(versions) 404 return versions 405 406 def check_no_duplicate_symbols(self, versions: Iterable[Version]) -> None: 407 """Raises errors for multiply defined symbols. 408 409 This situation is the normal case when symbol versioning is actually 410 used, but this script doesn't currently handle that. The error message 411 will be a not necessarily obvious "error: redefition of 'foo'" from 412 stub.c, so it's better for us to catch this situation and raise a 413 better error. 414 """ 415 symbol_names = set() 416 multiply_defined_symbols = set() 417 for version in versions: 418 if self.filter.should_omit_version(version): 419 continue 420 421 for symbol in version.symbols: 422 if self.filter.should_omit_symbol(symbol): 423 continue 424 425 if symbol.name in symbol_names: 426 multiply_defined_symbols.add(symbol.name) 427 symbol_names.add(symbol.name) 428 if multiply_defined_symbols: 429 raise MultiplyDefinedSymbolError( 430 sorted(list(multiply_defined_symbols))) 431 432 def check_llndk_introduced(self, versions: Iterable[Version]) -> None: 433 """Raises errors when llndk= is missing for new llndk symbols.""" 434 if not self.filter.llndk: 435 return 436 437 def assert_llndk_with_version(tags: Tags, name: str) -> None: 438 has_llndk_introduced = False 439 for tag in tags: 440 if tag.startswith('llndk='): 441 has_llndk_introduced = True 442 break 443 if not has_llndk_introduced: 444 raise ParseError(f'{name}: missing version. `llndk=yyyymm`') 445 446 arch = self.filter.arch 447 for version in versions: 448 # llndk symbols >= introduced=35 should be tagged 449 # explicitly with llndk=yyyymm. 450 for symbol in version.symbols: 451 if not symbol.tags.has_llndk_tags: 452 continue 453 if symbol_in_api(symbol.tags, arch, 34): 454 continue 455 assert_llndk_with_version(symbol.tags, symbol.name) 456 457 def parse_version(self) -> Version: 458 """Parses a single version section and returns a Version object.""" 459 assert self.current_line is not None 460 name = self.current_line.split('{')[0].strip() 461 tags = get_tags(self.current_line, self.api_map) 462 symbols: List[Symbol] = [] 463 global_scope = True 464 cpp_symbols = False 465 while self.next_line(): 466 if '}' in self.current_line: 467 # Line is something like '} BASE; # tags'. Both base and tags 468 # are optional here. 469 base = self.current_line.partition('}')[2] 470 base = base.partition('#')[0].strip() 471 if not base.endswith(';'): 472 raise ParseError( 473 'Unterminated version/export "C++" block (expected ;).') 474 if cpp_symbols: 475 cpp_symbols = False 476 else: 477 base = base.rstrip(';').rstrip() 478 return Version(name, base or None, tags, symbols) 479 elif 'extern "C++" {' in self.current_line: 480 cpp_symbols = True 481 elif not cpp_symbols and ':' in self.current_line: 482 visibility = self.current_line.split(':')[0].strip() 483 if visibility == 'local': 484 global_scope = False 485 elif visibility == 'global': 486 global_scope = True 487 else: 488 raise ParseError('Unknown visiblity label: ' + visibility) 489 elif global_scope and not cpp_symbols: 490 symbol = self.parse_symbol() 491 symbol.tags.copy_introduced_from(tags) 492 symbols.append(symbol) 493 else: 494 # We're in a hidden scope or in 'extern "C++"' block. Ignore 495 # everything. 496 pass 497 raise ParseError('Unexpected EOF in version block.') 498 499 def parse_symbol(self) -> Symbol: 500 """Parses a single symbol line and returns a Symbol object.""" 501 assert self.current_line is not None 502 if ';' not in self.current_line: 503 raise ParseError( 504 'Expected ; to terminate symbol: ' + self.current_line) 505 if '*' in self.current_line: 506 raise ParseError( 507 'Wildcard global symbols are not permitted.') 508 # Line is now in the format "<symbol-name>; # tags" 509 name, _, _ = self.current_line.strip().partition(';') 510 tags = get_tags(self.current_line, self.api_map) 511 return Symbol(name, tags) 512 513 def next_line(self) -> str: 514 """Returns the next non-empty non-comment line. 515 516 A return value of '' indicates EOF. 517 """ 518 line = self.input_file.readline() 519 while not line.strip() or line.strip().startswith('#'): 520 line = self.input_file.readline() 521 522 # We want to skip empty lines, but '' indicates EOF. 523 if not line: 524 break 525 self.current_line = line 526 return self.current_line 527