1#!/usr/bin/env python3 2# 3# Copyright (C) 2016 The Android Open Source Project 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16# 17 18"""binary_cache_builder.py: read perf.data, collect binaries needed by 19 it, and put them in binary_cache. 20""" 21 22from collections import defaultdict 23import logging 24import os 25import os.path 26from pathlib import Path 27import shutil 28import sys 29from typing import Dict, List, Optional, Tuple, Union 30 31from simpleperf_report_lib import ReportLib 32from simpleperf_utils import ( 33 AdbHelper, BaseArgumentParser, extant_dir, extant_file, flatten_arg_list, 34 ReadElf, str_to_bytes) 35 36 37def is_jit_symfile(dso_name): 38 return dso_name.split('/')[-1].startswith('TemporaryFile') 39 40 41class BinaryCache: 42 def __init__(self, binary_dir: Path): 43 self.binary_dir = binary_dir 44 45 def get_path_in_cache(self, device_path: str, build_id: str) -> Path: 46 """ Given a binary path in perf.data, return its corresponding path in the cache. 47 """ 48 if build_id: 49 filename = device_path.split('/')[-1] 50 # Add build id to make the filename unique. 51 return self.binary_dir / build_id[2:] / filename 52 53 # For elf file without build id, we can only follow its path on device. Otherwise, 54 # simpleperf can't find it. However, we don't prefer this way. Because: 55 # 1) It doesn't work for native libs loaded directly from apk 56 # (android:extractNativeLibs=”false”). 57 # 2) It may exceed path limit on windows. 58 if device_path.startswith('/'): 59 device_path = device_path[1:] 60 device_path = device_path.replace('/', os.sep) 61 return Path(os.path.join(self.binary_dir, device_path)) 62 63 64class BinarySource: 65 """ Source to find debug binaries. """ 66 67 def __init__(self, readelf: ReadElf): 68 self.readelf = readelf 69 70 def collect_binaries(self, binaries: Dict[str, str], binary_cache: BinaryCache): 71 """ pull binaries needed in perf.data to binary_cache. 72 binaries: maps from binary path to its build_id in perf.data. 73 """ 74 raise Exception('not implemented') 75 76 def read_build_id(self, path: Path): 77 return self.readelf.get_build_id(path) 78 79 80class BinarySourceFromDevice(BinarySource): 81 """ Pull binaries from device. """ 82 83 def __init__(self, readelf: ReadElf, disable_adb_root: bool): 84 super().__init__(readelf) 85 self.adb = AdbHelper(enable_switch_to_root=not disable_adb_root) 86 87 def collect_binaries(self, binaries: Dict[str, str], binary_cache: BinaryCache): 88 if not self.adb.is_device_available(): 89 return 90 for path, build_id in binaries.items(): 91 self.collect_binary(path, build_id, binary_cache) 92 self.pull_kernel_symbols(binary_cache.binary_dir / 'kallsyms') 93 94 def collect_binary(self, path: str, build_id: str, binary_cache: BinaryCache): 95 if not path.startswith('/') or path == "//anon" or path.startswith("/dev/"): 96 # [kernel.kallsyms] or unknown, or something we can't find binary. 97 return 98 binary_cache_file = binary_cache.get_path_in_cache(path, build_id) 99 self.check_and_pull_binary(path, build_id, binary_cache_file) 100 101 def check_and_pull_binary(self, path: str, expected_build_id: str, binary_cache_file: Path): 102 """If the binary_cache_file exists and has the expected_build_id, there 103 is no need to pull the binary from device. Otherwise, pull it. 104 """ 105 if binary_cache_file.is_file() and ( 106 not expected_build_id or expected_build_id == self.read_build_id(binary_cache_file) 107 ): 108 logging.info('use current file in binary_cache: %s', binary_cache_file) 109 else: 110 logging.info('pull file to binary_cache: %s to %s', path, binary_cache_file) 111 target_dir = binary_cache_file.parent 112 try: 113 os.makedirs(target_dir, exist_ok=True) 114 if binary_cache_file.is_file(): 115 binary_cache_file.unlink() 116 success = self.pull_file_from_device(path, binary_cache_file) 117 except FileNotFoundError: 118 # It happens on windows when the filename or extension is too long. 119 success = False 120 if not success: 121 logging.warning('failed to pull %s from device', path) 122 123 def pull_file_from_device(self, device_path: str, host_path: Path) -> bool: 124 if self.adb.run(['pull', device_path, str(host_path)]): 125 return True 126 # On non-root devices, we can't pull /data/app/XXX/base.odex directly. 127 # Instead, we can first copy the file to /data/local/tmp, then pull it. 128 filename = device_path[device_path.rfind('/')+1:] 129 if (self.adb.run(['shell', 'cp', device_path, '/data/local/tmp']) and 130 self.adb.run(['pull', '/data/local/tmp/' + filename, host_path])): 131 self.adb.run(['shell', 'rm', '/data/local/tmp/' + filename]) 132 return True 133 return False 134 135 def pull_kernel_symbols(self, file_path: Path): 136 if file_path.is_file(): 137 file_path.unlink() 138 if self.adb.switch_to_root(): 139 self.adb.run(['shell', 'echo', '0', '>/proc/sys/kernel/kptr_restrict']) 140 self.adb.run(['pull', '/proc/kallsyms', file_path]) 141 142 143class BinarySourceFromLibDirs(BinarySource): 144 """ Collect binaries from lib dirs. """ 145 146 def __init__(self, readelf: ReadElf, lib_dirs: List[Path]): 147 super().__init__(readelf) 148 self.lib_dirs = lib_dirs 149 self.filename_map = None 150 self.build_id_map = None 151 self.binary_cache = None 152 153 def collect_binaries(self, binaries: Dict[str, str], binary_cache: BinaryCache): 154 self.create_filename_map(binaries) 155 self.create_build_id_map(binaries) 156 self.binary_cache = binary_cache 157 158 # Search all files in lib_dirs, and copy matching files to build_cache. 159 for lib_dir in self.lib_dirs: 160 if self.is_platform_symbols_dir(lib_dir): 161 self.search_platform_symbols_dir(lib_dir) 162 else: 163 self.search_dir(lib_dir) 164 165 def create_filename_map(self, binaries: Dict[str, str]): 166 """ Create a map mapping from filename to binaries having the name. """ 167 self.filename_map = defaultdict(list) 168 for path, build_id in binaries.items(): 169 index = path.rfind('/') 170 filename = path[index + 1:] 171 self.filename_map[filename].append((path, build_id)) 172 173 def create_build_id_map(self, binaries: Dict[str, str]): 174 """ Create a map mapping from build id to binary path. """ 175 self.build_id_map = {} 176 for path, build_id in binaries.items(): 177 if build_id: 178 self.build_id_map[build_id] = path 179 180 def is_platform_symbols_dir(self, lib_dir: Path): 181 """ Check if lib_dir points to $ANDROID_PRODUCT_OUT/symbols. """ 182 subdir_names = [p.name for p in lib_dir.iterdir()] 183 return lib_dir.name == 'symbols' and 'system' in subdir_names 184 185 def search_platform_symbols_dir(self, lib_dir: Path): 186 """ Platform symbols dir contains too many binaries. Reading build ids for 187 all of them takes a long time. So we only read build ids for binaries 188 having names exist in filename_map. 189 """ 190 for root, _, files in os.walk(lib_dir): 191 for filename in files: 192 binaries = self.filename_map.get(filename) 193 if not binaries: 194 continue 195 file_path = Path(os.path.join(root, filename)) 196 build_id = self.read_build_id(file_path) 197 for path, expected_build_id in binaries: 198 if expected_build_id == build_id: 199 self.copy_to_binary_cache(file_path, build_id, path) 200 201 def search_dir(self, lib_dir: Path): 202 """ For a normal lib dir, it's unlikely to contain many binaries. So we can read 203 build ids for all binaries in it. But users may give debug binaries with a name 204 different from the one recorded in perf.data. So we should only rely on build id 205 if it is available. 206 """ 207 for root, _, files in os.walk(lib_dir): 208 for filename in files: 209 file_path = Path(os.path.join(root, filename)) 210 build_id = self.read_build_id(file_path) 211 if build_id: 212 # For elf file with build id, use build id to match. 213 device_path = self.build_id_map.get(build_id) 214 if device_path: 215 self.copy_to_binary_cache(file_path, build_id, device_path) 216 elif self.readelf.is_elf_file(file_path): 217 # For elf file without build id, use filename to match. 218 for path, expected_build_id in self.filename_map.get(filename, []): 219 if not expected_build_id: 220 self.copy_to_binary_cache(file_path, '', path) 221 break 222 223 def copy_to_binary_cache( 224 self, from_path: Path, expected_build_id: str, device_path: str): 225 to_path = self.binary_cache.get_path_in_cache(device_path, expected_build_id) 226 if not self.need_to_copy(from_path, to_path, expected_build_id): 227 # The existing file in binary_cache can provide more information, so no need to copy. 228 return 229 to_dir = to_path.parent 230 if not to_dir.is_dir(): 231 os.makedirs(to_dir) 232 logging.info('copy to binary_cache: %s to %s', from_path, to_path) 233 shutil.copy(from_path, to_path) 234 235 def need_to_copy(self, from_path: Path, to_path: Path, expected_build_id: str): 236 if not to_path.is_file() or self.read_build_id(to_path) != expected_build_id: 237 return True 238 return self.get_file_stripped_level(from_path) < self.get_file_stripped_level(to_path) 239 240 def get_file_stripped_level(self, path: Path) -> int: 241 """Return stripped level of an ELF file. Larger value means more stripped.""" 242 sections = self.readelf.get_sections(path) 243 if '.debug_line' in sections: 244 return 0 245 if '.symtab' in sections: 246 return 1 247 return 2 248 249 250class BinaryCacheBuilder: 251 """Collect all binaries needed by perf.data in binary_cache.""" 252 253 def __init__(self, ndk_path: Optional[str], disable_adb_root: bool): 254 self.readelf = ReadElf(ndk_path) 255 self.device_source = BinarySourceFromDevice(self.readelf, disable_adb_root) 256 self.binary_cache_dir = Path('binary_cache') 257 self.binary_cache = BinaryCache(self.binary_cache_dir) 258 self.binaries = {} 259 260 def build_binary_cache(self, perf_data_path: str, symfs_dirs: List[Union[Path, str]]) -> bool: 261 self.binary_cache_dir.mkdir(exist_ok=True) 262 self.collect_used_binaries(perf_data_path) 263 if not self.copy_binaries_from_symfs_dirs(symfs_dirs): 264 return False 265 self.pull_binaries_from_device() 266 self.create_build_id_list() 267 return True 268 269 def collect_used_binaries(self, perf_data_path): 270 """read perf.data, collect all used binaries and their build id(if available).""" 271 # A dict mapping from binary name to build_id 272 binaries = {} 273 lib = ReportLib() 274 lib.SetRecordFile(perf_data_path) 275 lib.SetLogSeverity('error') 276 while True: 277 sample = lib.GetNextSample() 278 if sample is None: 279 lib.Close() 280 break 281 symbols = [lib.GetSymbolOfCurrentSample()] 282 callchain = lib.GetCallChainOfCurrentSample() 283 for i in range(callchain.nr): 284 symbols.append(callchain.entries[i].symbol) 285 286 for symbol in symbols: 287 dso_name = symbol.dso_name 288 if dso_name not in binaries: 289 if is_jit_symfile(dso_name): 290 continue 291 name = 'vmlinux' if dso_name == '[kernel.kallsyms]' else dso_name 292 binaries[name] = lib.GetBuildIdForPath(dso_name) 293 self.binaries = binaries 294 295 def copy_binaries_from_symfs_dirs(self, symfs_dirs: List[Union[str, Path]]) -> bool: 296 if symfs_dirs: 297 lib_dirs: List[Path] = [] 298 for symfs_dir in symfs_dirs: 299 if isinstance(symfs_dir, str): 300 symfs_dir = Path(symfs_dir) 301 if not symfs_dir.is_dir(): 302 logging.error("can't find dir %s", symfs_dir) 303 return False 304 lib_dirs.append(symfs_dir) 305 lib_dir_source = BinarySourceFromLibDirs(self.readelf, lib_dirs) 306 lib_dir_source.collect_binaries(self.binaries, self.binary_cache) 307 return True 308 309 def pull_binaries_from_device(self): 310 self.device_source.collect_binaries(self.binaries, self.binary_cache) 311 312 def create_build_id_list(self): 313 """ Create build_id_list. So report scripts can find a binary by its build_id instead of 314 path. 315 """ 316 build_id_list_path = self.binary_cache_dir / 'build_id_list' 317 # Write in binary mode to avoid "\r\n" problem on windows, which can confuse simpleperf. 318 with open(build_id_list_path, 'wb') as fh: 319 for root, _, files in os.walk(self.binary_cache_dir): 320 for filename in files: 321 path = Path(os.path.join(root, filename)) 322 build_id = self.readelf.get_build_id(path) 323 if build_id: 324 relative_path = path.relative_to(self.binary_cache_dir) 325 line = f'{build_id}={relative_path}\n' 326 fh.write(str_to_bytes(line)) 327 328 def find_path_in_cache(self, device_path: str) -> Optional[Path]: 329 build_id = self.binaries.get(device_path) 330 return self.binary_cache.get_path_in_cache(device_path, build_id) 331 332 333def main() -> bool: 334 parser = BaseArgumentParser(description=""" 335 Pull binaries needed by perf.data from device to binary_cache directory.""") 336 parser.add_argument('-i', '--perf_data_path', default='perf.data', type=extant_file, help=""" 337 The path of profiling data.""") 338 parser.add_argument('-lib', '--native_lib_dir', type=extant_dir, nargs='+', help=""" 339 Path to find debug version of native shared libraries used in the app.""", action='append') 340 parser.add_argument('--disable_adb_root', action='store_true', help=""" 341 Force adb to run in non root mode.""") 342 parser.add_argument('--ndk_path', nargs=1, help='Find tools in the ndk path.') 343 args = parser.parse_args() 344 ndk_path = None if not args.ndk_path else args.ndk_path[0] 345 builder = BinaryCacheBuilder(ndk_path, args.disable_adb_root) 346 symfs_dirs = flatten_arg_list(args.native_lib_dir) 347 return builder.build_binary_cache(args.perf_data_path, symfs_dirs) 348 349 350if __name__ == '__main__': 351 sys.exit(0 if main() else 1) 352