1#
2# Copyright (C) 2018 The Android Open Source Project
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8#      http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15#
16r"""This file contains an ELF vtable abi dumper.
17
18Example usage:
19    from vts.utils.python.library import vtable_dumper
20    with vtable_dumper.VtableDumper(file) as dumper:
21        print('\n\n'.join(str(vtable) for vtable in dumper.DumpVtables()))
22"""
23
24import bisect
25
26from vts.utils.python.library import elf_parser
27from vts.utils.python.library.elf import consts
28
29
30class VtableError(Exception):
31    """The exception raised by VtableDumper."""
32    pass
33
34
35class VtableEntry(object):
36    """This class contains an entry in Vtable.
37
38    The names attribute constains all the possible symbol names for this entry
39    due to symbol aliasing.
40
41    Attributes:
42        offset: Offset with respect to vtable.
43        names: A list of possible symbol names of the entry.
44        value: Value of the entry.
45        is_undefined: If entry has a symbol, whether symbol is undefined or not.
46    """
47
48    def __init__(self, offset, names, value, is_undefined):
49        self.offset = offset
50        self.names = names
51        self.value = value
52        self.is_undefined = is_undefined
53
54    def __lt__(self, other):
55        return self.offset < other.offset
56
57
58class Vtable(object):
59    """This class contains a vtable and its entries.
60
61    Attributes:
62        name: Symbol name of vtable.
63        begin_addr: Begin address of vtable.
64        end_addr: End Address of vtable.
65        entries: A list of VtableEntry.
66    """
67
68    def __init__(self, name, begin_addr, end_addr):
69        self.name = name
70        self.begin_addr = begin_addr
71        self.end_addr = end_addr
72        self.entries = []
73
74    def __lt__(self, other):
75        if isinstance(other, Vtable):
76            key = other.begin_addr
77        else:
78            key = other
79        return self.begin_addr < key
80
81    def __str__(self):
82        msg = ('vtable {} {} entries begin_addr={:#x} size={:#x}'
83               .format(self.name,
84                       len(self.entries),
85                       self.begin_addr,
86                       self.end_addr - self.begin_addr))
87        for entry in self.entries:
88            msg += ('\n{:#x} {} {:#x} {}'
89                    .format(entry.offset,
90                            entry.is_undefined,
91                            entry.value,
92                            entry.names))
93        return msg
94
95
96class VtableDumper(elf_parser.ElfParser):
97    """This class wraps around a ElfParser and dumps vtables from an ELF file.
98    """
99
100    def __init__(self, file_path, begin_offset=0):
101        """Creates a VtableDumper to open and dump an ELF file's vtable.
102
103        Args:
104            file_path: The path to the file.
105            begin_offset: The offset of the ELF object in the file.
106
107        Raises:
108            ElfError: File is not a valid ELF.
109        """
110        super(VtableDumper, self).__init__(file_path, begin_offset)
111
112    def DumpVtables(self):
113        """Scans the relocation section and dump exported vtables.
114
115        Returns:
116            A list of Vtable.
117
118        Raises:
119            VtableError: Fails to dump vtable.
120            ElfError: ELF decoding fails.
121        """
122        # Determine absolute and relative relocation type from e_machine.
123        machine = self.Ehdr.e_machine
124        rel_type = {
125            consts.EM_ARM: (consts.R_ARM_ABS32, consts.R_ARM_RELATIVE),
126            consts.EM_AARCH64: (consts.R_AARCH64_ABS64, consts.R_AARCH64_RELATIVE),
127            consts.EM_386: (consts.R_386_32, consts.R_386_RELATIVE),
128            consts.EM_X86_64: (consts.R_X86_64_64, consts.R_X86_64_RELATIVE),
129            consts.EM_RISCV: (consts.R_RISCV_64, consts.R_RISCV_RELATIVE),
130        }
131        if machine in rel_type:
132            rel_abs_type, rel_relative_type = rel_type[machine]
133        else:
134            raise VtableError('Unexpected machine type: {}'.format(machine))
135        # Initialize vtable ranges.
136        vtables = self._PrepareVtables()
137        inv_table = self._FunctionSymbolInverseTable()
138        # Scan relocation sections.
139        for rel_sh in self._RelocationSections():
140            is_rela = rel_sh.sh_type in (consts.SHT_RELA,
141                                         consts.SHT_ANDROID_RELA)
142            is_relr = rel_sh.sh_type in (consts.SHT_RELR,
143                                         consts.SHT_ANDROID_RELR)
144            symtab = self.Shdr[rel_sh.sh_link]
145            strtab = self.Shdr[symtab.sh_link]
146            for reloc in self.GetRelocations(rel_sh):
147                # RELR is relative and has no type.
148                is_absolute_type = (not is_relr and
149                                    reloc.GetType() == rel_abs_type)
150                is_relative_type = (is_relr or
151                                    reloc.GetType() == rel_relative_type)
152                if not is_absolute_type and not is_relative_type:
153                    continue
154                # If relocation target is a vtable entry, find the vtable.
155                vtable = self._LocateVtable(vtables, reloc.r_offset)
156                if not vtable:
157                    continue
158                # *_RELA sections have explicit addend.
159                # *_REL and *_RELR sections have implicit addend.
160                if is_rela:
161                    addend = reloc.r_addend
162                else:
163                    addend = self._ReadRelocationAddend(reloc)
164                if is_absolute_type:
165                    # Absolute relocations uses symbol value + addend.
166                    sym = self.GetRelocationSymbol(symtab, reloc)
167                    reloc_value = sym.st_value + addend
168                    sym_is_undefined = (sym.st_shndx == consts.SHN_UNDEF)
169                    if reloc_value in inv_table:
170                        entry_names = inv_table[reloc_value]
171                    else:
172                        sym_name = self.GetString(strtab, sym.st_name)
173                        entry_names = [sym_name]
174                elif is_relative_type:
175                    # Relative relocations don't have symbol table entry,
176                    # instead it uses a vaddr offset which is stored
177                    # in the addend value.
178                    reloc_value = addend
179                    sym_is_undefined = False
180                    if reloc_value in inv_table:
181                        entry_names = inv_table[reloc_value]
182                    else:
183                        entry_names = []
184                vtable.entries.append(VtableEntry(
185                    reloc.r_offset - vtable.begin_addr,
186                    entry_names, reloc_value, sym_is_undefined))
187        # Sort the vtable entries.
188        for vtable in vtables:
189            vtable.entries.sort()
190        return vtables
191
192    def _PrepareVtables(self):
193        """Collects vtable symbols from symbol table / dynamic symbol table.
194
195        Returns:
196            A list of Vtable.
197
198        Raises:
199            ElfError: ELF decoding fails.
200        """
201        vtables = []
202        vtable_names = set()
203        symtab_names = ('.symtab', '.dynsym')
204        for symtab_name in symtab_names:
205            # Object files may have one section of each type
206            symtab = self.GetSectionByName(symtab_name)
207            if not symtab:
208                continue
209            strtab = self.Shdr[symtab.sh_link]
210            for sym in self.GetSymbols(symtab):
211                if sym.st_shndx == consts.SHN_UNDEF:
212                    continue
213                sym_name = self.GetString(strtab, sym.st_name)
214                if sym_name.startswith('_ZTV') and sym_name not in vtable_names:
215                    vtable_begin = sym.st_value
216                    vtable_end = sym.st_value + sym.st_size
217                    vtable = Vtable(sym_name, vtable_begin, vtable_end)
218                    vtables.append(vtable)
219                    vtable_names.add(sym_name)
220        # Sort the vtables with Vtable.begin_addr so that we can use binary
221        # search to speed up _LocateVtable()'s query.
222        vtables.sort()
223        return vtables
224
225    def _FunctionSymbolInverseTable(self):
226        """Returns an address to symbol name inverse lookup table.
227
228        For symbols in .symtab and .dynsym that are not undefined,
229        construct an address to symbol name lookup table.
230
231        Returns:
232            A dictionary of {address: [symbol names]}.
233
234        Raises:
235            ElfError: ELF decoding fails.
236        """
237        inv_table = dict()
238        symtab_names = ('.symtab', '.dynsym')
239        for symtab_name in symtab_names:
240            # Object files may have one section of each type
241            symtab = self.GetSectionByName(symtab_name)
242            if not symtab:
243                continue
244            strtab = self.Shdr[symtab.sh_link]
245            for sym in self.GetSymbols(symtab):
246                if (sym.GetType() in (consts.STT_OBJECT, consts.STT_FUNC)
247                        and sym.st_shndx != consts.SHN_UNDEF):
248                    sym_name = self.GetString(strtab, sym.st_name)
249                    if sym.st_value in inv_table:
250                        inv_table[sym.st_value].append(sym_name)
251                    else:
252                        inv_table[sym.st_value] = [sym_name]
253        for key in inv_table:
254            inv_table[key] = sorted(set(inv_table[key]))
255        return inv_table
256
257    def _LocateVtable(self, vtables, offset):
258        """Searches for the vtable that contains the offset.
259
260        Args:
261            vtables: A list of Vtable to search from.
262            offset: The offset value to search for.
263
264        Returns:
265            The vtable whose begin_addr <= offset and offset < end_addr.
266            None if no such vtable cound be found.
267        """
268        search_key = Vtable("", offset, offset)
269        idx = bisect.bisect(vtables, search_key)
270        if idx <= 0:
271            return None
272        vtable = vtables[idx-1]
273        if vtable.begin_addr <= offset and offset < vtable.end_addr:
274            return vtable
275        return None
276
277    def _ReadRelocationAddend(self, reloc):
278        """Reads the addend value from the location to be modified.
279
280        Args:
281            reloc: A Elf_Rel containing the relocation.
282
283        Returns:
284            An integer, the addend value.
285
286        Raises:
287            VtableError: reloc is not a valid relocation.
288            ElfError: ELF decoding fails.
289        """
290        for sh in self.Shdr:
291            sh_begin = sh.sh_addr
292            sh_end = sh.sh_addr + sh.sh_size
293            if sh_begin <= reloc.r_offset and reloc.r_offset < sh_end:
294                if sh.sh_type == consts.SHT_NOBITS:
295                    return 0
296                offset = reloc.r_offset - sh.sh_addr + sh.sh_offset
297                addend = self._SeekReadStruct(offset, self.Elf_Addr)
298                return addend.value
299        raise VtableError('Invalid relocation: '
300                          'Cannot find relocation target section '
301                          'r_offset = {:#x}, r_info = {:#x}'
302                          .format(reloc.r_offset, reloc.r_info))
303
304    def _RelocationSections(self):
305        """Yields section headers that contain relocation data."""
306        sh_rel_types = (consts.SHT_REL, consts.SHT_RELA, consts.SHT_RELR,
307                        consts.SHT_ANDROID_REL, consts.SHT_ANDROID_RELA,
308                        consts.SHT_ANDROID_RELR)
309        for sh in self.Shdr:
310            if sh.sh_type in sh_rel_types:
311                yield sh
312