1#!/usr/bin/env python3
2#
3# Copyright (C) 2016 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16#
17
18"""binary_cache_builder.py: read perf.data, collect binaries needed by
19    it, and put them in binary_cache.
20"""
21
22from collections import defaultdict
23import logging
24import os
25import os.path
26from pathlib import Path
27import shutil
28import sys
29from typing import Dict, List, Optional, Tuple, Union
30
31from simpleperf_report_lib import ReportLib
32from simpleperf_utils import (
33    AdbHelper, BaseArgumentParser, extant_dir, extant_file, flatten_arg_list,
34    ReadElf, str_to_bytes)
35
36
37def is_jit_symfile(dso_name):
38    return dso_name.split('/')[-1].startswith('TemporaryFile')
39
40
41class BinaryCache:
42    def __init__(self, binary_dir: Path):
43        self.binary_dir = binary_dir
44
45    def get_path_in_cache(self, device_path: str, build_id: str) -> Path:
46        """ Given a binary path in perf.data, return its corresponding path in the cache.
47        """
48        if build_id:
49            filename = device_path.split('/')[-1]
50            # Add build id to make the filename unique.
51            return self.binary_dir / build_id[2:] / filename
52
53        # For elf file without build id, we can only follow its path on device. Otherwise,
54        # simpleperf can't find it. However, we don't prefer this way. Because:
55        # 1) It doesn't work for native libs loaded directly from apk
56        #    (android:extractNativeLibs=”false”).
57        # 2) It may exceed path limit on windows.
58        if device_path.startswith('/'):
59            device_path = device_path[1:]
60        device_path = device_path.replace('/', os.sep)
61        return Path(os.path.join(self.binary_dir, device_path))
62
63
64class BinarySource:
65    """ Source to find debug binaries. """
66
67    def __init__(self, readelf: ReadElf):
68        self.readelf = readelf
69
70    def collect_binaries(self, binaries: Dict[str, str], binary_cache: BinaryCache):
71        """ pull binaries needed in perf.data to binary_cache.
72            binaries: maps from binary path to its build_id in perf.data.
73        """
74        raise Exception('not implemented')
75
76    def read_build_id(self, path: Path):
77        return self.readelf.get_build_id(path)
78
79
80class BinarySourceFromDevice(BinarySource):
81    """ Pull binaries from device. """
82
83    def __init__(self, readelf: ReadElf, disable_adb_root: bool):
84        super().__init__(readelf)
85        self.adb = AdbHelper(enable_switch_to_root=not disable_adb_root)
86
87    def collect_binaries(self, binaries: Dict[str, str], binary_cache: BinaryCache):
88        if not self.adb.is_device_available():
89            return
90        for path, build_id in binaries.items():
91            self.collect_binary(path, build_id, binary_cache)
92        self.pull_kernel_symbols(binary_cache.binary_dir / 'kallsyms')
93
94    def collect_binary(self, path: str, build_id: str, binary_cache: BinaryCache):
95        if not path.startswith('/') or path == "//anon" or path.startswith("/dev/"):
96            # [kernel.kallsyms] or unknown, or something we can't find binary.
97            return
98        binary_cache_file = binary_cache.get_path_in_cache(path, build_id)
99        self.check_and_pull_binary(path, build_id, binary_cache_file)
100
101    def check_and_pull_binary(self, path: str, expected_build_id: str, binary_cache_file: Path):
102        """If the binary_cache_file exists and has the expected_build_id, there
103           is no need to pull the binary from device. Otherwise, pull it.
104        """
105        if binary_cache_file.is_file() and (
106                not expected_build_id or expected_build_id == self.read_build_id(binary_cache_file)
107        ):
108            logging.info('use current file in binary_cache: %s', binary_cache_file)
109        else:
110            logging.info('pull file to binary_cache: %s to %s', path, binary_cache_file)
111            target_dir = binary_cache_file.parent
112            try:
113                os.makedirs(target_dir, exist_ok=True)
114                if binary_cache_file.is_file():
115                    binary_cache_file.unlink()
116                success = self.pull_file_from_device(path, binary_cache_file)
117            except FileNotFoundError:
118                # It happens on windows when the filename or extension is too long.
119                success = False
120            if not success:
121                logging.warning('failed to pull %s from device', path)
122
123    def pull_file_from_device(self, device_path: str, host_path: Path) -> bool:
124        if self.adb.run(['pull', device_path, str(host_path)]):
125            return True
126        # On non-root devices, we can't pull /data/app/XXX/base.odex directly.
127        # Instead, we can first copy the file to /data/local/tmp, then pull it.
128        filename = device_path[device_path.rfind('/')+1:]
129        if (self.adb.run(['shell', 'cp', device_path, '/data/local/tmp']) and
130                self.adb.run(['pull', '/data/local/tmp/' + filename, host_path])):
131            self.adb.run(['shell', 'rm', '/data/local/tmp/' + filename])
132            return True
133        return False
134
135    def pull_kernel_symbols(self, file_path: Path):
136        if file_path.is_file():
137            file_path.unlink()
138        if self.adb.switch_to_root():
139            self.adb.run(['shell', 'echo', '0', '>/proc/sys/kernel/kptr_restrict'])
140            self.adb.run(['pull', '/proc/kallsyms', file_path])
141
142
143class BinarySourceFromLibDirs(BinarySource):
144    """ Collect binaries from lib dirs. """
145
146    def __init__(self, readelf: ReadElf, lib_dirs: List[Path]):
147        super().__init__(readelf)
148        self.lib_dirs = lib_dirs
149        self.filename_map = None
150        self.build_id_map = None
151        self.binary_cache = None
152
153    def collect_binaries(self, binaries: Dict[str, str], binary_cache: BinaryCache):
154        self.create_filename_map(binaries)
155        self.create_build_id_map(binaries)
156        self.binary_cache = binary_cache
157
158        # Search all files in lib_dirs, and copy matching files to build_cache.
159        for lib_dir in self.lib_dirs:
160            if self.is_platform_symbols_dir(lib_dir):
161                self.search_platform_symbols_dir(lib_dir)
162            else:
163                self.search_dir(lib_dir)
164
165    def create_filename_map(self, binaries: Dict[str, str]):
166        """ Create a map mapping from filename to binaries having the name. """
167        self.filename_map = defaultdict(list)
168        for path, build_id in binaries.items():
169            index = path.rfind('/')
170            filename = path[index + 1:]
171            self.filename_map[filename].append((path, build_id))
172
173    def create_build_id_map(self, binaries: Dict[str, str]):
174        """ Create a map mapping from build id to binary path. """
175        self.build_id_map = {}
176        for path, build_id in binaries.items():
177            if build_id:
178                self.build_id_map[build_id] = path
179
180    def is_platform_symbols_dir(self, lib_dir: Path):
181        """ Check if lib_dir points to $ANDROID_PRODUCT_OUT/symbols. """
182        subdir_names = [p.name for p in lib_dir.iterdir()]
183        return lib_dir.name == 'symbols' and 'system' in subdir_names
184
185    def search_platform_symbols_dir(self, lib_dir: Path):
186        """ Platform symbols dir contains too many binaries. Reading build ids for
187            all of them takes a long time. So we only read build ids for binaries
188            having names exist in filename_map.
189        """
190        for root, _, files in os.walk(lib_dir):
191            for filename in files:
192                binaries = self.filename_map.get(filename)
193                if not binaries:
194                    continue
195                file_path = Path(os.path.join(root, filename))
196                build_id = self.read_build_id(file_path)
197                for path, expected_build_id in binaries:
198                    if expected_build_id == build_id:
199                        self.copy_to_binary_cache(file_path, build_id, path)
200
201    def search_dir(self, lib_dir: Path):
202        """ For a normal lib dir, it's unlikely to contain many binaries. So we can read
203            build ids for all binaries in it. But users may give debug binaries with a name
204            different from the one recorded in perf.data. So we should only rely on build id
205            if it is available.
206        """
207        for root, _, files in os.walk(lib_dir):
208            for filename in files:
209                file_path = Path(os.path.join(root, filename))
210                build_id = self.read_build_id(file_path)
211                if build_id:
212                    # For elf file with build id, use build id to match.
213                    device_path = self.build_id_map.get(build_id)
214                    if device_path:
215                        self.copy_to_binary_cache(file_path, build_id, device_path)
216                elif self.readelf.is_elf_file(file_path):
217                    # For elf file without build id, use filename to match.
218                    for path, expected_build_id in self.filename_map.get(filename, []):
219                        if not expected_build_id:
220                            self.copy_to_binary_cache(file_path, '', path)
221                            break
222
223    def copy_to_binary_cache(
224            self, from_path: Path, expected_build_id: str, device_path: str):
225        to_path = self.binary_cache.get_path_in_cache(device_path, expected_build_id)
226        if not self.need_to_copy(from_path, to_path, expected_build_id):
227            # The existing file in binary_cache can provide more information, so no need to copy.
228            return
229        to_dir = to_path.parent
230        if not to_dir.is_dir():
231            os.makedirs(to_dir)
232        logging.info('copy to binary_cache: %s to %s', from_path, to_path)
233        shutil.copy(from_path, to_path)
234
235    def need_to_copy(self, from_path: Path, to_path: Path, expected_build_id: str):
236        if not to_path.is_file() or self.read_build_id(to_path) != expected_build_id:
237            return True
238        return self.get_file_stripped_level(from_path) < self.get_file_stripped_level(to_path)
239
240    def get_file_stripped_level(self, path: Path) -> int:
241        """Return stripped level of an ELF file. Larger value means more stripped."""
242        sections = self.readelf.get_sections(path)
243        if '.debug_line' in sections:
244            return 0
245        if '.symtab' in sections:
246            return 1
247        return 2
248
249
250class BinaryCacheBuilder:
251    """Collect all binaries needed by perf.data in binary_cache."""
252
253    def __init__(self, ndk_path: Optional[str], disable_adb_root: bool):
254        self.readelf = ReadElf(ndk_path)
255        self.device_source = BinarySourceFromDevice(self.readelf, disable_adb_root)
256        self.binary_cache_dir = Path('binary_cache')
257        self.binary_cache = BinaryCache(self.binary_cache_dir)
258        self.binaries = {}
259
260    def build_binary_cache(self, perf_data_path: str, symfs_dirs: List[Union[Path, str]]) -> bool:
261        self.binary_cache_dir.mkdir(exist_ok=True)
262        self.collect_used_binaries(perf_data_path)
263        if not self.copy_binaries_from_symfs_dirs(symfs_dirs):
264            return False
265        self.pull_binaries_from_device()
266        self.create_build_id_list()
267        return True
268
269    def collect_used_binaries(self, perf_data_path):
270        """read perf.data, collect all used binaries and their build id(if available)."""
271        # A dict mapping from binary name to build_id
272        binaries = {}
273        lib = ReportLib()
274        lib.SetRecordFile(perf_data_path)
275        lib.SetLogSeverity('error')
276        while True:
277            sample = lib.GetNextSample()
278            if sample is None:
279                lib.Close()
280                break
281            symbols = [lib.GetSymbolOfCurrentSample()]
282            callchain = lib.GetCallChainOfCurrentSample()
283            for i in range(callchain.nr):
284                symbols.append(callchain.entries[i].symbol)
285
286            for symbol in symbols:
287                dso_name = symbol.dso_name
288                if dso_name not in binaries:
289                    if is_jit_symfile(dso_name):
290                        continue
291                    name = 'vmlinux' if dso_name == '[kernel.kallsyms]' else dso_name
292                    binaries[name] = lib.GetBuildIdForPath(dso_name)
293        self.binaries = binaries
294
295    def copy_binaries_from_symfs_dirs(self, symfs_dirs: List[Union[str, Path]]) -> bool:
296        if symfs_dirs:
297            lib_dirs: List[Path] = []
298            for symfs_dir in symfs_dirs:
299                if isinstance(symfs_dir, str):
300                    symfs_dir = Path(symfs_dir)
301                if not symfs_dir.is_dir():
302                    logging.error("can't find dir %s", symfs_dir)
303                    return False
304                lib_dirs.append(symfs_dir)
305            lib_dir_source = BinarySourceFromLibDirs(self.readelf, lib_dirs)
306            lib_dir_source.collect_binaries(self.binaries, self.binary_cache)
307        return True
308
309    def pull_binaries_from_device(self):
310        self.device_source.collect_binaries(self.binaries, self.binary_cache)
311
312    def create_build_id_list(self):
313        """ Create build_id_list. So report scripts can find a binary by its build_id instead of
314            path.
315        """
316        build_id_list_path = self.binary_cache_dir / 'build_id_list'
317        # Write in binary mode to avoid "\r\n" problem on windows, which can confuse simpleperf.
318        with open(build_id_list_path, 'wb') as fh:
319            for root, _, files in os.walk(self.binary_cache_dir):
320                for filename in files:
321                    path = Path(os.path.join(root, filename))
322                    build_id = self.readelf.get_build_id(path)
323                    if build_id:
324                        relative_path = path.relative_to(self.binary_cache_dir)
325                        line = f'{build_id}={relative_path}\n'
326                        fh.write(str_to_bytes(line))
327
328    def find_path_in_cache(self, device_path: str) -> Optional[Path]:
329        build_id = self.binaries.get(device_path)
330        return self.binary_cache.get_path_in_cache(device_path, build_id)
331
332
333def main() -> bool:
334    parser = BaseArgumentParser(description="""
335        Pull binaries needed by perf.data from device to binary_cache directory.""")
336    parser.add_argument('-i', '--perf_data_path', default='perf.data', type=extant_file, help="""
337        The path of profiling data.""")
338    parser.add_argument('-lib', '--native_lib_dir', type=extant_dir, nargs='+', help="""
339        Path to find debug version of native shared libraries used in the app.""", action='append')
340    parser.add_argument('--disable_adb_root', action='store_true', help="""
341        Force adb to run in non root mode.""")
342    parser.add_argument('--ndk_path', nargs=1, help='Find tools in the ndk path.')
343    args = parser.parse_args()
344    ndk_path = None if not args.ndk_path else args.ndk_path[0]
345    builder = BinaryCacheBuilder(ndk_path, args.disable_adb_root)
346    symfs_dirs = flatten_arg_list(args.native_lib_dir)
347    return builder.build_binary_cache(args.perf_data_path, symfs_dirs)
348
349
350if __name__ == '__main__':
351    sys.exit(0 if main() else 1)
352