1#!/usr/bin/env python3
2#
3# Copyright (C) 2019 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17"""ELF file checker.
18
19This command ensures all undefined symbols in an ELF file can be resolved to
20global (or weak) symbols defined in shared objects specified in DT_NEEDED
21entries.
22"""
23
24from __future__ import print_function
25
26import argparse
27import collections
28import os
29import os.path
30import re
31import struct
32import subprocess
33import sys
34
35
36_ELF_MAGIC = b'\x7fELF'
37
38
39# Known machines
40_EM_386 = 3
41_EM_ARM = 40
42_EM_X86_64 = 62
43_EM_AARCH64 = 183
44
45_KNOWN_MACHINES = {_EM_386, _EM_ARM, _EM_X86_64, _EM_AARCH64}
46
47
48# ELF header struct
49_ELF_HEADER_STRUCT = (
50  ('ei_magic', '4s'),
51  ('ei_class', 'B'),
52  ('ei_data', 'B'),
53  ('ei_version', 'B'),
54  ('ei_osabi', 'B'),
55  ('ei_pad', '8s'),
56  ('e_type', 'H'),
57  ('e_machine', 'H'),
58  ('e_version', 'I'),
59)
60
61_ELF_HEADER_STRUCT_FMT = ''.join(_fmt for _, _fmt in _ELF_HEADER_STRUCT)
62
63
64ELFHeader = collections.namedtuple(
65  'ELFHeader', [_name for _name, _ in _ELF_HEADER_STRUCT])
66
67
68ELF = collections.namedtuple(
69  'ELF',
70  ('alignments', 'dt_soname', 'dt_needed', 'imported', 'exported', 'header'))
71
72
73def _get_os_name():
74  """Get the host OS name."""
75  if sys.platform.startswith('linux'):
76    return 'linux'
77  if sys.platform.startswith('darwin'):
78    return 'darwin'
79  raise ValueError(sys.platform + ' is not supported')
80
81
82def _get_build_top():
83  """Find the build top of the source tree ($ANDROID_BUILD_TOP)."""
84  prev_path = None
85  curr_path = os.path.abspath(os.getcwd())
86  while prev_path != curr_path:
87    if os.path.exists(os.path.join(curr_path, '.repo')):
88      return curr_path
89    prev_path = curr_path
90    curr_path = os.path.dirname(curr_path)
91  return None
92
93
94def _select_latest_llvm_version(versions):
95  """Select the latest LLVM prebuilts version from a set of versions."""
96  pattern = re.compile('clang-r([0-9]+)([a-z]?)')
97  found_rev = 0
98  found_ver = None
99  for curr_ver in versions:
100    match = pattern.match(curr_ver)
101    if not match:
102      continue
103    curr_rev = int(match.group(1))
104    if not found_ver or curr_rev > found_rev or (
105        curr_rev == found_rev and curr_ver > found_ver):
106      found_rev = curr_rev
107      found_ver = curr_ver
108  return found_ver
109
110
111def _get_latest_llvm_version(llvm_dir):
112  """Find the latest LLVM prebuilts version from `llvm_dir`."""
113  return _select_latest_llvm_version(os.listdir(llvm_dir))
114
115
116def _get_llvm_dir():
117  """Find the path to LLVM prebuilts."""
118  build_top = _get_build_top()
119
120  llvm_prebuilts_base = os.environ.get('LLVM_PREBUILTS_BASE')
121  if not llvm_prebuilts_base:
122    llvm_prebuilts_base = os.path.join('prebuilts', 'clang', 'host')
123
124  llvm_dir = os.path.join(
125    build_top, llvm_prebuilts_base, _get_os_name() + '-x86')
126
127  if not os.path.exists(llvm_dir):
128    return None
129
130  llvm_prebuilts_version = os.environ.get('LLVM_PREBUILTS_VERSION')
131  if not llvm_prebuilts_version:
132    llvm_prebuilts_version = _get_latest_llvm_version(llvm_dir)
133
134  llvm_dir = os.path.join(llvm_dir, llvm_prebuilts_version)
135
136  if not os.path.exists(llvm_dir):
137    return None
138
139  return llvm_dir
140
141
142def _get_llvm_readobj():
143  """Find the path to llvm-readobj executable."""
144  llvm_dir = _get_llvm_dir()
145  llvm_readobj = os.path.join(llvm_dir, 'bin', 'llvm-readobj')
146  return llvm_readobj if os.path.exists(llvm_readobj) else 'llvm-readobj'
147
148
149class ELFError(ValueError):
150  """Generic ELF parse error"""
151  pass
152
153
154class ELFInvalidMagicError(ELFError):
155  """Invalid ELF magic word error"""
156  def __init__(self):
157    super(ELFInvalidMagicError, self).__init__('bad ELF magic')
158
159
160class ELFParser(object):
161  """ELF file parser"""
162
163  @classmethod
164  def _read_elf_header(cls, elf_file_path):
165    """Read the ELF magic word from the beginning of the file."""
166    with open(elf_file_path, 'rb') as elf_file:
167      buf = elf_file.read(struct.calcsize(_ELF_HEADER_STRUCT_FMT))
168      try:
169        return ELFHeader(*struct.unpack(_ELF_HEADER_STRUCT_FMT, buf))
170      except struct.error:
171        return None
172
173
174  @classmethod
175  def open(cls, elf_file_path, llvm_readobj):
176    """Open and parse the ELF file."""
177    # Parse the ELF header to check the magic word.
178    header = cls._read_elf_header(elf_file_path)
179    if not header or header.ei_magic != _ELF_MAGIC:
180      raise ELFInvalidMagicError()
181
182    # Run llvm-readobj and parse the output.
183    return cls._read_llvm_readobj(elf_file_path, header, llvm_readobj)
184
185
186  @classmethod
187  def _find_prefix(cls, pattern, lines_it):
188    """Iterate `lines_it` until finding a string that starts with `pattern`."""
189    for line in lines_it:
190      if line.startswith(pattern):
191        return True
192    return False
193
194
195  @classmethod
196  def _read_llvm_readobj(cls, elf_file_path, header, llvm_readobj):
197    """Run llvm-readobj and parse the output."""
198    cmd = [llvm_readobj, '--program-headers', '--dynamic-table',
199           '--dyn-symbols', elf_file_path]
200    out = subprocess.check_output(cmd, text=True)
201    lines = out.splitlines()
202    return cls._parse_llvm_readobj(elf_file_path, header, lines)
203
204
205  @classmethod
206  def _parse_llvm_readobj(cls, elf_file_path, header, lines):
207    """Parse the output of llvm-readobj."""
208    lines_it = iter(lines)
209    alignments = cls._parse_program_headers(lines_it)
210    dt_soname, dt_needed = cls._parse_dynamic_table(elf_file_path, lines_it)
211    imported, exported = cls._parse_dynamic_symbols(lines_it)
212    return ELF(alignments, dt_soname, dt_needed, imported, exported, header)
213
214
215  _PROGRAM_HEADERS_START_PATTERN = 'ProgramHeaders ['
216  _PROGRAM_HEADERS_END_PATTERN = ']'
217  _PROGRAM_HEADER_START_PATTERN = 'ProgramHeader {'
218  _PROGRAM_HEADER_TYPE_PATTERN = re.compile('^\\s+Type:\\s+(.*)$')
219  _PROGRAM_HEADER_ALIGN_PATTERN = re.compile('^\\s+Alignment:\\s+(.*)$')
220  _PROGRAM_HEADER_END_PATTERN = '}'
221
222
223  @classmethod
224  def _parse_program_headers(cls, lines_it):
225    """Parse the dynamic table section."""
226    alignments = []
227
228    if not cls._find_prefix(cls._PROGRAM_HEADERS_START_PATTERN, lines_it):
229      raise ELFError()
230
231    for line in lines_it:
232      # Parse each program header
233      if line.strip() == cls._PROGRAM_HEADER_START_PATTERN:
234        p_align = None
235        p_type = None
236        for line in lines_it:
237          if line.strip() == cls._PROGRAM_HEADER_END_PATTERN:
238            if not p_align:
239              raise ELFError("Could not parse alignment from program header!")
240            if not p_type:
241              raise ELFError("Could not parse type from program header!")
242
243            if p_type.startswith("PT_LOAD "):
244              alignments.append(int(p_align))
245            break
246
247          match = cls._PROGRAM_HEADER_TYPE_PATTERN.match(line)
248          if match:
249            p_type = match.group(1)
250
251          match = cls._PROGRAM_HEADER_ALIGN_PATTERN.match(line)
252          if match:
253            p_align = match.group(1)
254
255      if line == cls._PROGRAM_HEADERS_END_PATTERN:
256        break
257
258    return alignments
259
260
261  _DYNAMIC_SECTION_START_PATTERN = 'DynamicSection ['
262
263  _DYNAMIC_SECTION_NEEDED_PATTERN = re.compile(
264    '^  0x[0-9a-fA-F]+\\s+NEEDED\\s+Shared library: \\[(.*)\\]$')
265
266  _DYNAMIC_SECTION_SONAME_PATTERN = re.compile(
267    '^  0x[0-9a-fA-F]+\\s+SONAME\\s+Library soname: \\[(.*)\\]$')
268
269  _DYNAMIC_SECTION_END_PATTERN = ']'
270
271
272  @classmethod
273  def _parse_dynamic_table(cls, elf_file_path, lines_it):
274    """Parse the dynamic table section."""
275    dt_soname = os.path.basename(elf_file_path)
276    dt_needed = []
277
278    dynamic = cls._find_prefix(cls._DYNAMIC_SECTION_START_PATTERN, lines_it)
279    if not dynamic:
280      return (dt_soname, dt_needed)
281
282    for line in lines_it:
283      if line == cls._DYNAMIC_SECTION_END_PATTERN:
284        break
285
286      match = cls._DYNAMIC_SECTION_NEEDED_PATTERN.match(line)
287      if match:
288        dt_needed.append(match.group(1))
289        continue
290
291      match = cls._DYNAMIC_SECTION_SONAME_PATTERN.match(line)
292      if match:
293        dt_soname = match.group(1)
294        continue
295
296    return (dt_soname, dt_needed)
297
298
299  _DYNAMIC_SYMBOLS_START_PATTERN = 'DynamicSymbols ['
300  _DYNAMIC_SYMBOLS_END_PATTERN = ']'
301
302  _SYMBOL_ENTRY_START_PATTERN = '  Symbol {'
303  _SYMBOL_ENTRY_PATTERN = re.compile('^    ([A-Za-z0-9_]+): (.*)$')
304  _SYMBOL_ENTRY_PAREN_PATTERN = re.compile(
305    '\\s+\\((?:(?:\\d+)|(?:0x[0-9a-fA-F]+))\\)$')
306  _SYMBOL_ENTRY_END_PATTERN = '  }'
307
308
309  @staticmethod
310  def _parse_symbol_name(name_with_version):
311    """Split `name_with_version` into name and version. This function may split
312    at last occurrence of `@@` or `@`."""
313    pos = name_with_version.rfind('@')
314    if pos == -1:
315      name = name_with_version
316      version = ''
317    else:
318      if pos > 0 and name_with_version[pos - 1] == '@':
319        name = name_with_version[0:pos - 1]
320      else:
321        name = name_with_version[0:pos]
322      version = name_with_version[pos + 1:]
323    return (name, version)
324
325
326  @classmethod
327  def _parse_dynamic_symbols(cls, lines_it):
328    """Parse dynamic symbol table and collect imported and exported symbols."""
329    imported = collections.defaultdict(set)
330    exported = collections.defaultdict(set)
331
332    for symbol in cls._parse_dynamic_symbols_internal(lines_it):
333      name, version = cls._parse_symbol_name(symbol['Name'])
334      if name:
335        if symbol['Section'] == 'Undefined':
336          if symbol['Binding'] != 'Weak':
337            imported[name].add(version)
338        else:
339          if symbol['Binding'] != 'Local':
340            exported[name].add(version)
341
342    # Freeze the returned imported/exported dict.
343    return (dict(imported), dict(exported))
344
345
346  @classmethod
347  def _parse_dynamic_symbols_internal(cls, lines_it):
348    """Parse symbols entries and yield each symbols."""
349
350    if not cls._find_prefix(cls._DYNAMIC_SYMBOLS_START_PATTERN, lines_it):
351      return
352
353    for line in lines_it:
354      if line == cls._DYNAMIC_SYMBOLS_END_PATTERN:
355        return
356
357      if line == cls._SYMBOL_ENTRY_START_PATTERN:
358        symbol = {}
359        continue
360
361      if line == cls._SYMBOL_ENTRY_END_PATTERN:
362        yield symbol
363        symbol = None
364        continue
365
366      match = cls._SYMBOL_ENTRY_PATTERN.match(line)
367      if match:
368        key = match.group(1)
369        value = cls._SYMBOL_ENTRY_PAREN_PATTERN.sub('', match.group(2))
370        symbol[key] = value
371        continue
372
373
374class Checker(object):
375  """ELF file checker that checks DT_SONAME, DT_NEEDED, and symbols."""
376
377  def __init__(self, llvm_readobj):
378    self._file_path = ''
379    self._file_under_test = None
380    self._shared_libs = []
381
382    self._llvm_readobj = llvm_readobj
383
384
385  if sys.stderr.isatty():
386    _ERROR_TAG = '\033[0;1;31merror:\033[m'  # Red error
387    _NOTE_TAG = '\033[0;1;30mnote:\033[m'  # Black note
388  else:
389    _ERROR_TAG = 'error:'  # Red error
390    _NOTE_TAG = 'note:'  # Black note
391
392
393  def _error(self, *args):
394    """Emit an error to stderr."""
395    print(self._file_path + ': ' + self._ERROR_TAG, *args, file=sys.stderr)
396
397
398  def _note(self, *args):
399    """Emit a note to stderr."""
400    print(self._file_path + ': ' + self._NOTE_TAG, *args, file=sys.stderr)
401
402
403  def _load_elf_file(self, path, skip_bad_elf_magic):
404    """Load an ELF file from the `path`."""
405    try:
406      return ELFParser.open(path, self._llvm_readobj)
407    except (IOError, OSError):
408      self._error('Failed to open "{}".'.format(path))
409      sys.exit(2)
410    except ELFInvalidMagicError:
411      if skip_bad_elf_magic:
412        sys.exit(0)
413      else:
414        self._error('File "{}" must have a valid ELF magic word.'.format(path))
415        sys.exit(2)
416    except:
417      self._error('An unknown error occurred while opening "{}".'.format(path))
418      raise
419
420
421  def load_file_under_test(self, path, skip_bad_elf_magic,
422                           skip_unknown_elf_machine):
423    """Load file-under-test (either an executable or a shared lib)."""
424    self._file_path = path
425    self._file_under_test = self._load_elf_file(path, skip_bad_elf_magic)
426
427    if skip_unknown_elf_machine and \
428        self._file_under_test.header.e_machine not in _KNOWN_MACHINES:
429      sys.exit(0)
430
431
432  def load_shared_libs(self, shared_lib_paths):
433    """Load shared libraries."""
434    for path in shared_lib_paths:
435      self._shared_libs.append(self._load_elf_file(path, False))
436
437
438  def check_dt_soname(self, soname):
439    """Check whether DT_SONAME matches installation file name."""
440    if self._file_under_test.dt_soname != soname:
441      self._error('DT_SONAME "{}" must be equal to the file name "{}".'
442                  .format(self._file_under_test.dt_soname, soname))
443      sys.exit(2)
444
445
446  def check_dt_needed(self, system_shared_lib_names):
447    """Check whether all DT_NEEDED entries are specified in the build
448    system."""
449
450    missing_shared_libs = False
451
452    # Collect the DT_SONAMEs from shared libs specified in the build system.
453    specified_sonames = {lib.dt_soname for lib in self._shared_libs}
454
455    # Chech whether all DT_NEEDED entries are specified.
456    for lib in self._file_under_test.dt_needed:
457      if lib not in specified_sonames:
458        self._error(f'DT_NEEDED "{lib}" is not specified in shared_libs.')
459        missing_shared_libs = True
460
461    if missing_shared_libs:
462      dt_needed = sorted(set(self._file_under_test.dt_needed))
463      modules = [re.sub('\\.so$', '', lib) for lib in dt_needed]
464
465      # Remove system shared libraries from the suggestion since they are added
466      # by default.
467      modules = [name for name in modules
468                 if name not in system_shared_lib_names]
469
470      self._note()
471      self._note('Fix suggestions:')
472      self._note(
473        '  Android.bp: shared_libs: [' +
474        ', '.join('"' + module + '"' for module in modules) + '],')
475      self._note(
476        '  Android.mk: LOCAL_SHARED_LIBRARIES := ' + ' '.join(modules))
477
478      self._note()
479      self._note('If the fix above doesn\'t work, bypass this check with:')
480      self._note('  Android.bp: check_elf_files: false,')
481      self._note('  Android.mk: LOCAL_CHECK_ELF_FILES := false')
482
483      sys.exit(2)
484
485  def check_max_page_size(self, max_page_size):
486    for alignment in self._file_under_test.alignments:
487      if alignment % max_page_size != 0:
488        self._error(f'Load segment has alignment {alignment} but '
489                    f'{max_page_size} required.')
490        self._note()
491        self._note('Fix suggestions:')
492        self._note(f'  use linker flag "-Wl,-z,max-page-size={max_page_size}" '
493                   f'when compiling this lib')
494        self._note()
495        self._note('If the fix above doesn\'t work, bypass this check with:')
496        self._note('  Android.bp: ignore_max_page_size: true,')
497        self._note('  Android.mk: LOCAL_IGNORE_MAX_PAGE_SIZE := true')
498        self._note('  Device mk: PRODUCT_CHECK_PREBUILT_MAX_PAGE_SIZE := false')
499
500        # TODO: instead of exiting immediately, we may want to collect the
501        # errors from all checks and emit them at once
502        sys.exit(2)
503
504  @staticmethod
505  def _find_symbol(lib, name, version):
506    """Check whether the symbol name and version matches a definition in
507    lib."""
508    try:
509      lib_sym_vers = lib.exported[name]
510    except KeyError:
511      return False
512    if version == '':  # Symbol version is not requested
513      return True
514    return version in lib_sym_vers
515
516
517  @classmethod
518  def _find_symbol_from_libs(cls, libs, name, version):
519    """Check whether the symbol name and version is defined in one of the
520    shared libraries in libs."""
521    for lib in libs:
522      if cls._find_symbol(lib, name, version):
523        return lib
524    return None
525
526
527  def check_symbols(self):
528    """Check whether all undefined symbols are resolved to a definition."""
529    all_elf_files = [self._file_under_test] + self._shared_libs
530    missing_symbols = []
531    for sym, imported_vers in self._file_under_test.imported.items():
532      for imported_ver in imported_vers:
533        lib = self._find_symbol_from_libs(all_elf_files, sym, imported_ver)
534        if not lib:
535          missing_symbols.append((sym, imported_ver))
536
537    if missing_symbols:
538      for sym, ver in sorted(missing_symbols):
539        if ver:
540          sym += '@' + ver
541        self._error(f'Unresolved symbol: {sym}')
542
543      self._note()
544      self._note('Some dependencies might be changed, thus the symbol(s) '
545                 'above cannot be resolved.')
546      self._note(f'Please re-build the prebuilt file: "{self._file_path}".')
547
548      self._note()
549      self._note('If this is a new prebuilt file and it is designed to have '
550                 'unresolved symbols, add one of the following properties:')
551      self._note('  Android.bp: allow_undefined_symbols: true,')
552      self._note('  Android.mk: LOCAL_ALLOW_UNDEFINED_SYMBOLS := true')
553
554      sys.exit(2)
555
556
557def _parse_args():
558  """Parse command line options."""
559  parser = argparse.ArgumentParser()
560
561  # Input file
562  parser.add_argument('file',
563                      help='Path to the input file to be checked')
564  parser.add_argument('--soname',
565                      help='Shared object name of the input file')
566
567  # Shared library dependencies
568  parser.add_argument('--shared-lib', action='append', default=[],
569                      help='Path to shared library dependencies')
570
571  # System Shared library names
572  parser.add_argument('--system-shared-lib', action='append', default=[],
573                      help='System shared libraries to be hidden from fix '
574                      'suggestions')
575
576  # Check options
577  parser.add_argument('--skip-bad-elf-magic', action='store_true',
578                      help='Ignore the input file without the ELF magic word')
579  parser.add_argument('--skip-unknown-elf-machine', action='store_true',
580                      help='Ignore the input file with unknown machine ID')
581  parser.add_argument('--allow-undefined-symbols', action='store_true',
582                      help='Ignore unresolved undefined symbols')
583  parser.add_argument('--max-page-size', action='store', type=int,
584                      help='Required page size alignment support')
585
586  # Other options
587  parser.add_argument('--llvm-readobj',
588                      help='Path to the llvm-readobj executable')
589
590  return parser.parse_args()
591
592
593def main():
594  """Main function"""
595  args = _parse_args()
596
597  llvm_readobj = args.llvm_readobj
598  if not llvm_readobj:
599    llvm_readobj = _get_llvm_readobj()
600
601  # Load ELF files
602  checker = Checker(llvm_readobj)
603  checker.load_file_under_test(
604    args.file, args.skip_bad_elf_magic, args.skip_unknown_elf_machine)
605  checker.load_shared_libs(args.shared_lib)
606
607  # Run checks
608  if args.soname:
609    checker.check_dt_soname(args.soname)
610
611  checker.check_dt_needed(args.system_shared_lib)
612
613  if args.max_page_size:
614    checker.check_max_page_size(args.max_page_size)
615
616  if not args.allow_undefined_symbols:
617    checker.check_symbols()
618
619
620if __name__ == '__main__':
621  main()
622