1#!/usr/bin/env python3 2# 3# Copyright (C) 2019 The Android Open Source Project 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16 17"""ELF file checker. 18 19This command ensures all undefined symbols in an ELF file can be resolved to 20global (or weak) symbols defined in shared objects specified in DT_NEEDED 21entries. 22""" 23 24from __future__ import print_function 25 26import argparse 27import collections 28import os 29import os.path 30import re 31import struct 32import subprocess 33import sys 34 35 36_ELF_MAGIC = b'\x7fELF' 37 38 39# Known machines 40_EM_386 = 3 41_EM_ARM = 40 42_EM_X86_64 = 62 43_EM_AARCH64 = 183 44 45_KNOWN_MACHINES = {_EM_386, _EM_ARM, _EM_X86_64, _EM_AARCH64} 46 47 48# ELF header struct 49_ELF_HEADER_STRUCT = ( 50 ('ei_magic', '4s'), 51 ('ei_class', 'B'), 52 ('ei_data', 'B'), 53 ('ei_version', 'B'), 54 ('ei_osabi', 'B'), 55 ('ei_pad', '8s'), 56 ('e_type', 'H'), 57 ('e_machine', 'H'), 58 ('e_version', 'I'), 59) 60 61_ELF_HEADER_STRUCT_FMT = ''.join(_fmt for _, _fmt in _ELF_HEADER_STRUCT) 62 63 64ELFHeader = collections.namedtuple( 65 'ELFHeader', [_name for _name, _ in _ELF_HEADER_STRUCT]) 66 67 68ELF = collections.namedtuple( 69 'ELF', 70 ('alignments', 'dt_soname', 'dt_needed', 'imported', 'exported', 'header')) 71 72 73def _get_os_name(): 74 """Get the host OS name.""" 75 if sys.platform.startswith('linux'): 76 return 'linux' 77 if sys.platform.startswith('darwin'): 78 return 'darwin' 79 raise ValueError(sys.platform + ' is not supported') 80 81 82def _get_build_top(): 83 """Find the build top of the source tree ($ANDROID_BUILD_TOP).""" 84 prev_path = None 85 curr_path = os.path.abspath(os.getcwd()) 86 while prev_path != curr_path: 87 if os.path.exists(os.path.join(curr_path, '.repo')): 88 return curr_path 89 prev_path = curr_path 90 curr_path = os.path.dirname(curr_path) 91 return None 92 93 94def _select_latest_llvm_version(versions): 95 """Select the latest LLVM prebuilts version from a set of versions.""" 96 pattern = re.compile('clang-r([0-9]+)([a-z]?)') 97 found_rev = 0 98 found_ver = None 99 for curr_ver in versions: 100 match = pattern.match(curr_ver) 101 if not match: 102 continue 103 curr_rev = int(match.group(1)) 104 if not found_ver or curr_rev > found_rev or ( 105 curr_rev == found_rev and curr_ver > found_ver): 106 found_rev = curr_rev 107 found_ver = curr_ver 108 return found_ver 109 110 111def _get_latest_llvm_version(llvm_dir): 112 """Find the latest LLVM prebuilts version from `llvm_dir`.""" 113 return _select_latest_llvm_version(os.listdir(llvm_dir)) 114 115 116def _get_llvm_dir(): 117 """Find the path to LLVM prebuilts.""" 118 build_top = _get_build_top() 119 120 llvm_prebuilts_base = os.environ.get('LLVM_PREBUILTS_BASE') 121 if not llvm_prebuilts_base: 122 llvm_prebuilts_base = os.path.join('prebuilts', 'clang', 'host') 123 124 llvm_dir = os.path.join( 125 build_top, llvm_prebuilts_base, _get_os_name() + '-x86') 126 127 if not os.path.exists(llvm_dir): 128 return None 129 130 llvm_prebuilts_version = os.environ.get('LLVM_PREBUILTS_VERSION') 131 if not llvm_prebuilts_version: 132 llvm_prebuilts_version = _get_latest_llvm_version(llvm_dir) 133 134 llvm_dir = os.path.join(llvm_dir, llvm_prebuilts_version) 135 136 if not os.path.exists(llvm_dir): 137 return None 138 139 return llvm_dir 140 141 142def _get_llvm_readobj(): 143 """Find the path to llvm-readobj executable.""" 144 llvm_dir = _get_llvm_dir() 145 llvm_readobj = os.path.join(llvm_dir, 'bin', 'llvm-readobj') 146 return llvm_readobj if os.path.exists(llvm_readobj) else 'llvm-readobj' 147 148 149class ELFError(ValueError): 150 """Generic ELF parse error""" 151 pass 152 153 154class ELFInvalidMagicError(ELFError): 155 """Invalid ELF magic word error""" 156 def __init__(self): 157 super(ELFInvalidMagicError, self).__init__('bad ELF magic') 158 159 160class ELFParser(object): 161 """ELF file parser""" 162 163 @classmethod 164 def _read_elf_header(cls, elf_file_path): 165 """Read the ELF magic word from the beginning of the file.""" 166 with open(elf_file_path, 'rb') as elf_file: 167 buf = elf_file.read(struct.calcsize(_ELF_HEADER_STRUCT_FMT)) 168 try: 169 return ELFHeader(*struct.unpack(_ELF_HEADER_STRUCT_FMT, buf)) 170 except struct.error: 171 return None 172 173 174 @classmethod 175 def open(cls, elf_file_path, llvm_readobj): 176 """Open and parse the ELF file.""" 177 # Parse the ELF header to check the magic word. 178 header = cls._read_elf_header(elf_file_path) 179 if not header or header.ei_magic != _ELF_MAGIC: 180 raise ELFInvalidMagicError() 181 182 # Run llvm-readobj and parse the output. 183 return cls._read_llvm_readobj(elf_file_path, header, llvm_readobj) 184 185 186 @classmethod 187 def _find_prefix(cls, pattern, lines_it): 188 """Iterate `lines_it` until finding a string that starts with `pattern`.""" 189 for line in lines_it: 190 if line.startswith(pattern): 191 return True 192 return False 193 194 195 @classmethod 196 def _read_llvm_readobj(cls, elf_file_path, header, llvm_readobj): 197 """Run llvm-readobj and parse the output.""" 198 cmd = [llvm_readobj, '--program-headers', '--dynamic-table', 199 '--dyn-symbols', elf_file_path] 200 out = subprocess.check_output(cmd, text=True) 201 lines = out.splitlines() 202 return cls._parse_llvm_readobj(elf_file_path, header, lines) 203 204 205 @classmethod 206 def _parse_llvm_readobj(cls, elf_file_path, header, lines): 207 """Parse the output of llvm-readobj.""" 208 lines_it = iter(lines) 209 alignments = cls._parse_program_headers(lines_it) 210 dt_soname, dt_needed = cls._parse_dynamic_table(elf_file_path, lines_it) 211 imported, exported = cls._parse_dynamic_symbols(lines_it) 212 return ELF(alignments, dt_soname, dt_needed, imported, exported, header) 213 214 215 _PROGRAM_HEADERS_START_PATTERN = 'ProgramHeaders [' 216 _PROGRAM_HEADERS_END_PATTERN = ']' 217 _PROGRAM_HEADER_START_PATTERN = 'ProgramHeader {' 218 _PROGRAM_HEADER_TYPE_PATTERN = re.compile('^\\s+Type:\\s+(.*)$') 219 _PROGRAM_HEADER_ALIGN_PATTERN = re.compile('^\\s+Alignment:\\s+(.*)$') 220 _PROGRAM_HEADER_END_PATTERN = '}' 221 222 223 @classmethod 224 def _parse_program_headers(cls, lines_it): 225 """Parse the dynamic table section.""" 226 alignments = [] 227 228 if not cls._find_prefix(cls._PROGRAM_HEADERS_START_PATTERN, lines_it): 229 raise ELFError() 230 231 for line in lines_it: 232 # Parse each program header 233 if line.strip() == cls._PROGRAM_HEADER_START_PATTERN: 234 p_align = None 235 p_type = None 236 for line in lines_it: 237 if line.strip() == cls._PROGRAM_HEADER_END_PATTERN: 238 if not p_align: 239 raise ELFError("Could not parse alignment from program header!") 240 if not p_type: 241 raise ELFError("Could not parse type from program header!") 242 243 if p_type.startswith("PT_LOAD "): 244 alignments.append(int(p_align)) 245 break 246 247 match = cls._PROGRAM_HEADER_TYPE_PATTERN.match(line) 248 if match: 249 p_type = match.group(1) 250 251 match = cls._PROGRAM_HEADER_ALIGN_PATTERN.match(line) 252 if match: 253 p_align = match.group(1) 254 255 if line == cls._PROGRAM_HEADERS_END_PATTERN: 256 break 257 258 return alignments 259 260 261 _DYNAMIC_SECTION_START_PATTERN = 'DynamicSection [' 262 263 _DYNAMIC_SECTION_NEEDED_PATTERN = re.compile( 264 '^ 0x[0-9a-fA-F]+\\s+NEEDED\\s+Shared library: \\[(.*)\\]$') 265 266 _DYNAMIC_SECTION_SONAME_PATTERN = re.compile( 267 '^ 0x[0-9a-fA-F]+\\s+SONAME\\s+Library soname: \\[(.*)\\]$') 268 269 _DYNAMIC_SECTION_END_PATTERN = ']' 270 271 272 @classmethod 273 def _parse_dynamic_table(cls, elf_file_path, lines_it): 274 """Parse the dynamic table section.""" 275 dt_soname = os.path.basename(elf_file_path) 276 dt_needed = [] 277 278 dynamic = cls._find_prefix(cls._DYNAMIC_SECTION_START_PATTERN, lines_it) 279 if not dynamic: 280 return (dt_soname, dt_needed) 281 282 for line in lines_it: 283 if line == cls._DYNAMIC_SECTION_END_PATTERN: 284 break 285 286 match = cls._DYNAMIC_SECTION_NEEDED_PATTERN.match(line) 287 if match: 288 dt_needed.append(match.group(1)) 289 continue 290 291 match = cls._DYNAMIC_SECTION_SONAME_PATTERN.match(line) 292 if match: 293 dt_soname = match.group(1) 294 continue 295 296 return (dt_soname, dt_needed) 297 298 299 _DYNAMIC_SYMBOLS_START_PATTERN = 'DynamicSymbols [' 300 _DYNAMIC_SYMBOLS_END_PATTERN = ']' 301 302 _SYMBOL_ENTRY_START_PATTERN = ' Symbol {' 303 _SYMBOL_ENTRY_PATTERN = re.compile('^ ([A-Za-z0-9_]+): (.*)$') 304 _SYMBOL_ENTRY_PAREN_PATTERN = re.compile( 305 '\\s+\\((?:(?:\\d+)|(?:0x[0-9a-fA-F]+))\\)$') 306 _SYMBOL_ENTRY_END_PATTERN = ' }' 307 308 309 @staticmethod 310 def _parse_symbol_name(name_with_version): 311 """Split `name_with_version` into name and version. This function may split 312 at last occurrence of `@@` or `@`.""" 313 pos = name_with_version.rfind('@') 314 if pos == -1: 315 name = name_with_version 316 version = '' 317 else: 318 if pos > 0 and name_with_version[pos - 1] == '@': 319 name = name_with_version[0:pos - 1] 320 else: 321 name = name_with_version[0:pos] 322 version = name_with_version[pos + 1:] 323 return (name, version) 324 325 326 @classmethod 327 def _parse_dynamic_symbols(cls, lines_it): 328 """Parse dynamic symbol table and collect imported and exported symbols.""" 329 imported = collections.defaultdict(set) 330 exported = collections.defaultdict(set) 331 332 for symbol in cls._parse_dynamic_symbols_internal(lines_it): 333 name, version = cls._parse_symbol_name(symbol['Name']) 334 if name: 335 if symbol['Section'] == 'Undefined': 336 if symbol['Binding'] != 'Weak': 337 imported[name].add(version) 338 else: 339 if symbol['Binding'] != 'Local': 340 exported[name].add(version) 341 342 # Freeze the returned imported/exported dict. 343 return (dict(imported), dict(exported)) 344 345 346 @classmethod 347 def _parse_dynamic_symbols_internal(cls, lines_it): 348 """Parse symbols entries and yield each symbols.""" 349 350 if not cls._find_prefix(cls._DYNAMIC_SYMBOLS_START_PATTERN, lines_it): 351 return 352 353 for line in lines_it: 354 if line == cls._DYNAMIC_SYMBOLS_END_PATTERN: 355 return 356 357 if line == cls._SYMBOL_ENTRY_START_PATTERN: 358 symbol = {} 359 continue 360 361 if line == cls._SYMBOL_ENTRY_END_PATTERN: 362 yield symbol 363 symbol = None 364 continue 365 366 match = cls._SYMBOL_ENTRY_PATTERN.match(line) 367 if match: 368 key = match.group(1) 369 value = cls._SYMBOL_ENTRY_PAREN_PATTERN.sub('', match.group(2)) 370 symbol[key] = value 371 continue 372 373 374class Checker(object): 375 """ELF file checker that checks DT_SONAME, DT_NEEDED, and symbols.""" 376 377 def __init__(self, llvm_readobj): 378 self._file_path = '' 379 self._file_under_test = None 380 self._shared_libs = [] 381 382 self._llvm_readobj = llvm_readobj 383 384 385 if sys.stderr.isatty(): 386 _ERROR_TAG = '\033[0;1;31merror:\033[m' # Red error 387 _NOTE_TAG = '\033[0;1;30mnote:\033[m' # Black note 388 else: 389 _ERROR_TAG = 'error:' # Red error 390 _NOTE_TAG = 'note:' # Black note 391 392 393 def _error(self, *args): 394 """Emit an error to stderr.""" 395 print(self._file_path + ': ' + self._ERROR_TAG, *args, file=sys.stderr) 396 397 398 def _note(self, *args): 399 """Emit a note to stderr.""" 400 print(self._file_path + ': ' + self._NOTE_TAG, *args, file=sys.stderr) 401 402 403 def _load_elf_file(self, path, skip_bad_elf_magic): 404 """Load an ELF file from the `path`.""" 405 try: 406 return ELFParser.open(path, self._llvm_readobj) 407 except (IOError, OSError): 408 self._error('Failed to open "{}".'.format(path)) 409 sys.exit(2) 410 except ELFInvalidMagicError: 411 if skip_bad_elf_magic: 412 sys.exit(0) 413 else: 414 self._error('File "{}" must have a valid ELF magic word.'.format(path)) 415 sys.exit(2) 416 except: 417 self._error('An unknown error occurred while opening "{}".'.format(path)) 418 raise 419 420 421 def load_file_under_test(self, path, skip_bad_elf_magic, 422 skip_unknown_elf_machine): 423 """Load file-under-test (either an executable or a shared lib).""" 424 self._file_path = path 425 self._file_under_test = self._load_elf_file(path, skip_bad_elf_magic) 426 427 if skip_unknown_elf_machine and \ 428 self._file_under_test.header.e_machine not in _KNOWN_MACHINES: 429 sys.exit(0) 430 431 432 def load_shared_libs(self, shared_lib_paths): 433 """Load shared libraries.""" 434 for path in shared_lib_paths: 435 self._shared_libs.append(self._load_elf_file(path, False)) 436 437 438 def check_dt_soname(self, soname): 439 """Check whether DT_SONAME matches installation file name.""" 440 if self._file_under_test.dt_soname != soname: 441 self._error('DT_SONAME "{}" must be equal to the file name "{}".' 442 .format(self._file_under_test.dt_soname, soname)) 443 sys.exit(2) 444 445 446 def check_dt_needed(self, system_shared_lib_names): 447 """Check whether all DT_NEEDED entries are specified in the build 448 system.""" 449 450 missing_shared_libs = False 451 452 # Collect the DT_SONAMEs from shared libs specified in the build system. 453 specified_sonames = {lib.dt_soname for lib in self._shared_libs} 454 455 # Chech whether all DT_NEEDED entries are specified. 456 for lib in self._file_under_test.dt_needed: 457 if lib not in specified_sonames: 458 self._error(f'DT_NEEDED "{lib}" is not specified in shared_libs.') 459 missing_shared_libs = True 460 461 if missing_shared_libs: 462 dt_needed = sorted(set(self._file_under_test.dt_needed)) 463 modules = [re.sub('\\.so$', '', lib) for lib in dt_needed] 464 465 # Remove system shared libraries from the suggestion since they are added 466 # by default. 467 modules = [name for name in modules 468 if name not in system_shared_lib_names] 469 470 self._note() 471 self._note('Fix suggestions:') 472 self._note( 473 ' Android.bp: shared_libs: [' + 474 ', '.join('"' + module + '"' for module in modules) + '],') 475 self._note( 476 ' Android.mk: LOCAL_SHARED_LIBRARIES := ' + ' '.join(modules)) 477 478 self._note() 479 self._note('If the fix above doesn\'t work, bypass this check with:') 480 self._note(' Android.bp: check_elf_files: false,') 481 self._note(' Android.mk: LOCAL_CHECK_ELF_FILES := false') 482 483 sys.exit(2) 484 485 def check_max_page_size(self, max_page_size): 486 for alignment in self._file_under_test.alignments: 487 if alignment % max_page_size != 0: 488 self._error(f'Load segment has alignment {alignment} but ' 489 f'{max_page_size} required.') 490 self._note() 491 self._note('Fix suggestions:') 492 self._note(f' use linker flag "-Wl,-z,max-page-size={max_page_size}" ' 493 f'when compiling this lib') 494 self._note() 495 self._note('If the fix above doesn\'t work, bypass this check with:') 496 self._note(' Android.bp: ignore_max_page_size: true,') 497 self._note(' Android.mk: LOCAL_IGNORE_MAX_PAGE_SIZE := true') 498 self._note(' Device mk: PRODUCT_CHECK_PREBUILT_MAX_PAGE_SIZE := false') 499 500 # TODO: instead of exiting immediately, we may want to collect the 501 # errors from all checks and emit them at once 502 sys.exit(2) 503 504 @staticmethod 505 def _find_symbol(lib, name, version): 506 """Check whether the symbol name and version matches a definition in 507 lib.""" 508 try: 509 lib_sym_vers = lib.exported[name] 510 except KeyError: 511 return False 512 if version == '': # Symbol version is not requested 513 return True 514 return version in lib_sym_vers 515 516 517 @classmethod 518 def _find_symbol_from_libs(cls, libs, name, version): 519 """Check whether the symbol name and version is defined in one of the 520 shared libraries in libs.""" 521 for lib in libs: 522 if cls._find_symbol(lib, name, version): 523 return lib 524 return None 525 526 527 def check_symbols(self): 528 """Check whether all undefined symbols are resolved to a definition.""" 529 all_elf_files = [self._file_under_test] + self._shared_libs 530 missing_symbols = [] 531 for sym, imported_vers in self._file_under_test.imported.items(): 532 for imported_ver in imported_vers: 533 lib = self._find_symbol_from_libs(all_elf_files, sym, imported_ver) 534 if not lib: 535 missing_symbols.append((sym, imported_ver)) 536 537 if missing_symbols: 538 for sym, ver in sorted(missing_symbols): 539 if ver: 540 sym += '@' + ver 541 self._error(f'Unresolved symbol: {sym}') 542 543 self._note() 544 self._note('Some dependencies might be changed, thus the symbol(s) ' 545 'above cannot be resolved.') 546 self._note(f'Please re-build the prebuilt file: "{self._file_path}".') 547 548 self._note() 549 self._note('If this is a new prebuilt file and it is designed to have ' 550 'unresolved symbols, add one of the following properties:') 551 self._note(' Android.bp: allow_undefined_symbols: true,') 552 self._note(' Android.mk: LOCAL_ALLOW_UNDEFINED_SYMBOLS := true') 553 554 sys.exit(2) 555 556 557def _parse_args(): 558 """Parse command line options.""" 559 parser = argparse.ArgumentParser() 560 561 # Input file 562 parser.add_argument('file', 563 help='Path to the input file to be checked') 564 parser.add_argument('--soname', 565 help='Shared object name of the input file') 566 567 # Shared library dependencies 568 parser.add_argument('--shared-lib', action='append', default=[], 569 help='Path to shared library dependencies') 570 571 # System Shared library names 572 parser.add_argument('--system-shared-lib', action='append', default=[], 573 help='System shared libraries to be hidden from fix ' 574 'suggestions') 575 576 # Check options 577 parser.add_argument('--skip-bad-elf-magic', action='store_true', 578 help='Ignore the input file without the ELF magic word') 579 parser.add_argument('--skip-unknown-elf-machine', action='store_true', 580 help='Ignore the input file with unknown machine ID') 581 parser.add_argument('--allow-undefined-symbols', action='store_true', 582 help='Ignore unresolved undefined symbols') 583 parser.add_argument('--max-page-size', action='store', type=int, 584 help='Required page size alignment support') 585 586 # Other options 587 parser.add_argument('--llvm-readobj', 588 help='Path to the llvm-readobj executable') 589 590 return parser.parse_args() 591 592 593def main(): 594 """Main function""" 595 args = _parse_args() 596 597 llvm_readobj = args.llvm_readobj 598 if not llvm_readobj: 599 llvm_readobj = _get_llvm_readobj() 600 601 # Load ELF files 602 checker = Checker(llvm_readobj) 603 checker.load_file_under_test( 604 args.file, args.skip_bad_elf_magic, args.skip_unknown_elf_machine) 605 checker.load_shared_libs(args.shared_lib) 606 607 # Run checks 608 if args.soname: 609 checker.check_dt_soname(args.soname) 610 611 checker.check_dt_needed(args.system_shared_lib) 612 613 if args.max_page_size: 614 checker.check_max_page_size(args.max_page_size) 615 616 if not args.allow_undefined_symbols: 617 checker.check_symbols() 618 619 620if __name__ == '__main__': 621 main() 622