1#!/usr/bin/env python3 2# 3# Copyright (C) 2016 The Android Open Source Project 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16# 17 18"""utils.py: export utility functions. 19""" 20 21from __future__ import annotations 22import argparse 23from concurrent.futures import Future, ThreadPoolExecutor 24from dataclasses import dataclass 25import logging 26import os 27import os.path 28from pathlib import Path 29import re 30import shutil 31import subprocess 32import sys 33import time 34from typing import Any, Dict, Iterator, List, Optional, Set, Tuple, Union, TextIO 35 36 37NDK_ERROR_MESSAGE = "Please install the Android NDK (https://developer.android.com/studio/projects/install-ndk), then set NDK path with --ndk_path option." 38 39 40def get_script_dir() -> str: 41 return os.path.dirname(os.path.realpath(__file__)) 42 43 44def is_windows() -> bool: 45 return sys.platform == 'win32' or sys.platform == 'cygwin' 46 47 48def is_darwin() -> bool: 49 return sys.platform == 'darwin' 50 51 52def get_platform() -> str: 53 if is_windows(): 54 return 'windows' 55 if is_darwin(): 56 return 'darwin' 57 return 'linux' 58 59 60def str_to_bytes(str_value: str) -> bytes: 61 # In python 3, str are wide strings whereas the C api expects 8 bit strings, 62 # hence we have to convert. For now using utf-8 as the encoding. 63 return str_value.encode('utf-8') 64 65 66def bytes_to_str(bytes_value: Optional[bytes]) -> str: 67 if not bytes_value: 68 return '' 69 return bytes_value.decode('utf-8') 70 71 72def get_target_binary_path(arch: str, binary_name: str) -> str: 73 if arch == 'aarch64': 74 arch = 'arm64' 75 arch_dir = os.path.join(get_script_dir(), "bin", "android", arch) 76 if not os.path.isdir(arch_dir): 77 log_fatal("can't find arch directory: %s" % arch_dir) 78 binary_path = os.path.join(arch_dir, binary_name) 79 if not os.path.isfile(binary_path): 80 log_fatal("can't find binary: %s" % binary_path) 81 return binary_path 82 83 84def get_host_binary_path(binary_name: str) -> str: 85 dirname = os.path.join(get_script_dir(), 'bin') 86 if is_windows(): 87 if binary_name.endswith('.so'): 88 binary_name = binary_name[0:-3] + '.dll' 89 elif '.' not in binary_name: 90 binary_name += '.exe' 91 dirname = os.path.join(dirname, 'windows') 92 elif sys.platform == 'darwin': # OSX 93 if binary_name.endswith('.so'): 94 binary_name = binary_name[0:-3] + '.dylib' 95 dirname = os.path.join(dirname, 'darwin') 96 else: 97 dirname = os.path.join(dirname, 'linux') 98 dirname = os.path.join(dirname, 'x86_64' if sys.maxsize > 2 ** 32 else 'x86') 99 binary_path = os.path.join(dirname, binary_name) 100 if not os.path.isfile(binary_path): 101 log_fatal("can't find binary: %s" % binary_path) 102 return binary_path 103 104 105def is_executable_available(executable: str, option='--help') -> bool: 106 """ Run an executable to see if it exists. """ 107 try: 108 subproc = subprocess.Popen([executable, option], stdout=subprocess.PIPE, 109 stderr=subprocess.PIPE) 110 subproc.communicate() 111 return subproc.returncode == 0 112 except OSError: 113 return False 114 115 116class ToolFinder: 117 """ Find tools in ndk or sdk. """ 118 DEFAULT_SDK_PATH = { 119 'darwin': 'Library/Android/sdk', 120 'linux': 'Android/Sdk', 121 'windows': 'AppData/Local/Android/sdk', 122 } 123 124 EXPECTED_TOOLS = { 125 'adb': { 126 'is_binutils': False, 127 'test_option': 'version', 128 'path_in_sdk': 'platform-tools/adb', 129 }, 130 'llvm-objdump': { 131 'is_binutils': False, 132 'path_in_ndk': 133 lambda platform: 'toolchains/llvm/prebuilt/%s-x86_64/bin/llvm-objdump' % platform, 134 }, 135 'llvm-readelf': { 136 'is_binutils': False, 137 'path_in_ndk': 138 lambda platform: 'toolchains/llvm/prebuilt/%s-x86_64/bin/llvm-readelf' % platform, 139 }, 140 'llvm-symbolizer': { 141 'is_binutils': False, 142 'path_in_ndk': 143 lambda platform: 'toolchains/llvm/prebuilt/%s-x86_64/bin/llvm-symbolizer' % platform, 144 }, 145 'llvm-strip': { 146 'is_binutils': False, 147 'path_in_ndk': 148 lambda platform: 'toolchains/llvm/prebuilt/%s-x86_64/bin/llvm-strip' % platform, 149 }, 150 } 151 152 @classmethod 153 def find_ndk_and_sdk_paths(cls, ndk_path: Optional[str] = None 154 ) -> Iterator[Tuple[Optional[str], Optional[str]]]: 155 # Use the given ndk path. 156 if ndk_path and os.path.isdir(ndk_path): 157 ndk_path = os.path.abspath(ndk_path) 158 yield ndk_path, cls.find_sdk_path(ndk_path) 159 # Find ndk in the parent directory containing simpleperf scripts. 160 ndk_path = os.path.dirname(os.path.abspath(get_script_dir())) 161 yield ndk_path, cls.find_sdk_path(ndk_path) 162 # Find ndk in the default sdk installation path. 163 if is_windows(): 164 home = os.environ.get('HOMEDRIVE') + os.environ.get('HOMEPATH') 165 else: 166 home = os.environ.get('HOME') 167 if home: 168 platform = get_platform() 169 sdk_path = os.path.join(home, cls.DEFAULT_SDK_PATH[platform].replace('/', os.sep)) 170 if os.path.isdir(sdk_path): 171 path = os.path.join(sdk_path, 'ndk') 172 if os.path.isdir(path): 173 # Android Studio can install multiple ndk versions in 'ndk'. 174 # Find the newest one. 175 ndk_version = None 176 for name in os.listdir(path): 177 if not ndk_version or ndk_version < name: 178 ndk_version = name 179 if ndk_version: 180 yield os.path.join(path, ndk_version), sdk_path 181 ndk_path = os.path.join(sdk_path, 'ndk-bundle') 182 if os.path.isdir(ndk_path): 183 yield ndk_path, sdk_path 184 185 @classmethod 186 def find_sdk_path(cls, ndk_path: str) -> Optional[str]: 187 path = ndk_path 188 for _ in range(2): 189 path = os.path.dirname(path) 190 if os.path.isdir(os.path.join(path, 'platform-tools')): 191 return path 192 return None 193 194 @classmethod 195 def _get_binutils_path_in_ndk(cls, toolname: str, arch: Optional[str], platform: str 196 ) -> Tuple[str, str]: 197 if not arch: 198 arch = 'arm64' 199 if arch == 'arm64': 200 name = 'aarch64-linux-android-' + toolname 201 elif arch == 'arm': 202 name = 'arm-linux-androideabi-' + toolname 203 elif arch == 'x86_64': 204 name = 'x86_64-linux-android-' + toolname 205 elif arch == 'x86': 206 name = 'i686-linux-android-' + toolname 207 else: 208 log_fatal('unexpected arch %s' % arch) 209 path = 'toolchains/llvm/prebuilt/%s-x86_64/bin/%s' % (platform, name) 210 return (name, path) 211 212 @classmethod 213 def find_tool_path(cls, toolname: str, ndk_path: Optional[str] = None, 214 arch: Optional[str] = None) -> Optional[str]: 215 tool_info = cls.EXPECTED_TOOLS.get(toolname) 216 if not tool_info: 217 return None 218 219 is_binutils = tool_info['is_binutils'] 220 test_option = tool_info.get('test_option', '--help') 221 platform = get_platform() 222 223 # Find tool in clang prebuilts in Android platform. 224 if toolname.startswith('llvm-') and platform == 'linux' and get_script_dir().endswith( 225 'system/extras/simpleperf/scripts'): 226 path = str( 227 Path(get_script_dir()).parents[3] / 'prebuilts' / 'clang' / 'host' / 'linux-x86' / 228 'llvm-binutils-stable' / toolname) 229 if is_executable_available(path, test_option): 230 return path 231 232 # Find tool in NDK or SDK. 233 path_in_ndk = None 234 path_in_sdk = None 235 if is_binutils: 236 toolname_with_arch, path_in_ndk = cls._get_binutils_path_in_ndk( 237 toolname, arch, platform) 238 else: 239 toolname_with_arch = toolname 240 if 'path_in_ndk' in tool_info: 241 path_in_ndk = tool_info['path_in_ndk'](platform) 242 elif 'path_in_sdk' in tool_info: 243 path_in_sdk = tool_info['path_in_sdk'] 244 if path_in_ndk: 245 path_in_ndk = path_in_ndk.replace('/', os.sep) 246 elif path_in_sdk: 247 path_in_sdk = path_in_sdk.replace('/', os.sep) 248 249 for ndk_dir, sdk_dir in cls.find_ndk_and_sdk_paths(ndk_path): 250 if path_in_ndk and ndk_dir: 251 path = os.path.join(ndk_dir, path_in_ndk) 252 if is_executable_available(path, test_option): 253 return path 254 elif path_in_sdk and sdk_dir: 255 path = os.path.join(sdk_dir, path_in_sdk) 256 if is_executable_available(path, test_option): 257 return path 258 259 # Find tool in $PATH. 260 if is_executable_available(toolname_with_arch, test_option): 261 return toolname_with_arch 262 263 # Find tool without arch in $PATH. 264 if is_binutils and tool_info.get('accept_tool_without_arch'): 265 if is_executable_available(toolname, test_option): 266 return toolname 267 return None 268 269 270class AdbHelper(object): 271 def __init__(self, enable_switch_to_root: bool = True): 272 adb_path = ToolFinder.find_tool_path('adb') 273 if not adb_path: 274 log_exit("Can't find adb in PATH environment.") 275 self.adb_path: str = adb_path 276 self.enable_switch_to_root = enable_switch_to_root 277 self.serial_number: Optional[str] = None 278 279 def is_device_available(self) -> bool: 280 return self.run_and_return_output(['shell', 'whoami'])[0] 281 282 def run(self, adb_args: List[str], log_output: bool = False, log_stderr: bool = False) -> bool: 283 return self.run_and_return_output(adb_args, log_output, log_stderr)[0] 284 285 def run_and_return_output(self, adb_args: List[str], log_output: bool = False, 286 log_stderr: bool = False) -> Tuple[bool, str]: 287 adb_args = [self.adb_path] + adb_args 288 logging.debug('run adb cmd: %s' % adb_args) 289 env = None 290 if self.serial_number: 291 env = os.environ.copy() 292 env['ANDROID_SERIAL'] = self.serial_number 293 subproc = subprocess.Popen( 294 adb_args, env=env, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 295 stdout_data, stderr_data = subproc.communicate() 296 stdout_data = bytes_to_str(stdout_data) 297 stderr_data = bytes_to_str(stderr_data) 298 returncode = subproc.returncode 299 result = (returncode == 0) 300 if log_output and stdout_data: 301 logging.debug(stdout_data) 302 if log_stderr and stderr_data: 303 logging.warning(stderr_data) 304 logging.debug('run adb cmd: %s [result %s]' % (adb_args, result)) 305 return (result, stdout_data) 306 307 def check_run(self, adb_args: List[str], log_output: bool = False): 308 self.check_run_and_return_output(adb_args, log_output) 309 310 def check_run_and_return_output(self, adb_args: List[str], log_output: bool = False, 311 log_stderr: bool = False) -> str: 312 result, stdoutdata = self.run_and_return_output(adb_args, log_output, True) 313 if not result: 314 log_exit('run "adb %s" failed: %s' % (adb_args, stdoutdata)) 315 return stdoutdata 316 317 def _unroot(self): 318 result, stdoutdata = self.run_and_return_output(['shell', 'whoami']) 319 if not result: 320 return 321 if 'root' not in stdoutdata: 322 return 323 logging.info('unroot adb') 324 self.run(['unroot']) 325 time.sleep(1) 326 self.run(['wait-for-device']) 327 328 def switch_to_root(self) -> bool: 329 if not self.enable_switch_to_root: 330 self._unroot() 331 return False 332 result, stdoutdata = self.run_and_return_output(['shell', 'whoami']) 333 if not result: 334 return False 335 if 'root' in stdoutdata: 336 return True 337 build_type = self.get_property('ro.build.type') 338 if build_type == 'user': 339 return False 340 self.run(['root']) 341 time.sleep(1) 342 self.run(['wait-for-device']) 343 result, stdoutdata = self.run_and_return_output(['shell', 'whoami']) 344 return result and 'root' in stdoutdata 345 346 def get_property(self, name: str) -> Optional[str]: 347 result, stdoutdata = self.run_and_return_output(['shell', 'getprop', name]) 348 return stdoutdata.strip() if result else None 349 350 def set_property(self, name: str, value: str) -> bool: 351 return self.run(['shell', 'setprop', name, value]) 352 353 def get_device_arch(self) -> str: 354 output = self.check_run_and_return_output(['shell', 'uname', '-m']) 355 if 'aarch64' in output: 356 return 'arm64' 357 if 'arm' in output: 358 return 'arm' 359 if 'x86_64' in output: 360 return 'x86_64' 361 if '86' in output: 362 return 'x86' 363 if 'riscv64' in output: 364 return 'riscv64' 365 log_fatal('unsupported architecture: %s' % output.strip()) 366 return '' 367 368 def get_android_version(self) -> int: 369 """ Get Android version on device, like 7 is for Android N, 8 is for Android O.""" 370 build_version = self.get_property('ro.build.version.codename') 371 if not build_version or build_version == 'REL': 372 build_version = self.get_property('ro.build.version.release') 373 android_version = 0 374 if build_version: 375 if build_version[0].isdigit(): 376 i = 1 377 while i < len(build_version) and build_version[i].isdigit(): 378 i += 1 379 android_version = int(build_version[:i]) 380 else: 381 c = build_version[0].upper() 382 if c.isupper() and c >= 'L': 383 android_version = ord(c) - ord('L') + 5 384 return android_version 385 386 387def flatten_arg_list(arg_list: List[List[str]]) -> List[str]: 388 res = [] 389 if arg_list: 390 for items in arg_list: 391 res += items 392 return res 393 394 395def remove(dir_or_file: Union[Path, str]): 396 if os.path.isfile(dir_or_file): 397 os.remove(dir_or_file) 398 elif os.path.isdir(dir_or_file): 399 shutil.rmtree(dir_or_file, ignore_errors=True) 400 401 402def open_report_in_browser(report_path: str): 403 if is_darwin(): 404 # On darwin 10.12.6, webbrowser can't open browser, so try `open` cmd first. 405 try: 406 subprocess.check_call(['open', report_path]) 407 return 408 except subprocess.CalledProcessError: 409 pass 410 import webbrowser 411 try: 412 # Try to open the report with Chrome 413 browser = webbrowser.get('google-chrome') 414 browser.open(report_path, new=0, autoraise=True) 415 except webbrowser.Error: 416 # webbrowser.get() doesn't work well on darwin/windows. 417 webbrowser.open_new_tab(report_path) 418 419 420class BinaryFinder: 421 def __init__(self, binary_cache_dir: Optional[Union[Path, str]], readelf: ReadElf): 422 if isinstance(binary_cache_dir, str): 423 binary_cache_dir = Path(binary_cache_dir) 424 self.binary_cache_dir = binary_cache_dir 425 self.readelf = readelf 426 self.build_id_map = self._load_build_id_map() 427 428 def _load_build_id_map(self) -> Dict[str, Path]: 429 build_id_map: Dict[str, Path] = {} 430 if self.binary_cache_dir: 431 build_id_list_file = self.binary_cache_dir / 'build_id_list' 432 if build_id_list_file.is_file(): 433 with open(self.binary_cache_dir / 'build_id_list', 'rb') as fh: 434 for line in fh.readlines(): 435 # lines are in format "<build_id>=<path_in_binary_cache>". 436 items = bytes_to_str(line).strip().split('=') 437 if len(items) == 2: 438 build_id_map[items[0]] = self.binary_cache_dir / items[1] 439 return build_id_map 440 441 def find_binary(self, dso_path_in_record_file: str, 442 expected_build_id: Optional[str]) -> Optional[Path]: 443 """ If expected_build_id is None, don't check build id. 444 Otherwise, the build id of the found binary should match the expected one.""" 445 # Find binary from build id map. 446 if expected_build_id: 447 path = self.build_id_map.get(expected_build_id) 448 if path and self._check_path(path, expected_build_id): 449 return path 450 # Find binary by path in binary cache. 451 if self.binary_cache_dir: 452 path = self.binary_cache_dir / dso_path_in_record_file[1:].replace('/', os.sep) 453 if self._check_path(path, expected_build_id): 454 return path 455 # Find binary by its absolute path. 456 path = Path(dso_path_in_record_file) 457 if self._check_path(path, expected_build_id): 458 return path 459 return None 460 461 def _check_path(self, path: Path, expected_build_id: Optional[str]) -> bool: 462 if not self.readelf.is_elf_file(path): 463 return False 464 if expected_build_id is not None: 465 return self.readelf.get_build_id(path) == expected_build_id 466 return True 467 468 469class Addr2Nearestline(object): 470 """ Use llvm-symbolizer to convert (dso_path, func_addr, addr) to (source_file, line). 471 For instructions generated by C++ compilers without a matching statement in source code 472 (like stack corruption check, switch optimization, etc.), addr2line can't generate 473 line information. However, we want to assign the instruction to the nearest line before 474 the instruction (just like objdump -dl). So we use below strategy: 475 Instead of finding the exact line of the instruction in an address, we find the nearest 476 line to the instruction in an address. If an address doesn't have a line info, we find 477 the line info of address - 1. If still no line info, then use address - 2, address - 3, 478 etc. 479 480 The implementation steps are as below: 481 1. Collect all (dso_path, func_addr, addr) requests before converting. This saves the 482 times to call addr2line. 483 2. Convert addrs to (source_file, line) pairs for each dso_path as below: 484 2.1 Check if the dso_path has .debug_line. If not, omit its conversion. 485 2.2 Get arch of the dso_path, and decide the addr_step for it. addr_step is the step we 486 change addr each time. For example, since instructions of arm64 are all 4 bytes long, 487 addr_step for arm64 can be 4. 488 2.3 Use addr2line to find line info for each addr in the dso_path. 489 2.4 For each addr without line info, use addr2line to find line info for 490 range(addr - addr_step, addr - addr_step * 4 - 1, -addr_step). 491 2.5 For each addr without line info, use addr2line to find line info for 492 range(addr - addr_step * 5, addr - addr_step * 128 - 1, -addr_step). 493 (128 is a guess number. A nested switch statement in 494 system/core/demangle/Demangler.cpp has >300 bytes without line info in arm64.) 495 """ 496 class Dso(object): 497 """ Info of a dynamic shared library. 498 addrs: a map from address to Addr object in this dso. 499 """ 500 501 def __init__(self, build_id: Optional[str]): 502 self.build_id = build_id 503 self.addrs: Dict[int, Addr2Nearestline.Addr] = {} 504 # Saving file names for each addr takes a lot of memory. So we store file ids in Addr, 505 # and provide data structures connecting file id and file name here. 506 self.file_name_to_id: Dict[str, int] = {} 507 self.file_id_to_name: List[str] = [] 508 self.func_name_to_id: Dict[str, int] = {} 509 self.func_id_to_name: List[str] = [] 510 511 def get_file_id(self, file_path: str) -> int: 512 file_id = self.file_name_to_id.get(file_path) 513 if file_id is None: 514 file_id = self.file_name_to_id[file_path] = len(self.file_id_to_name) 515 self.file_id_to_name.append(file_path) 516 return file_id 517 518 def get_func_id(self, func_name: str) -> int: 519 func_id = self.func_name_to_id.get(func_name) 520 if func_id is None: 521 func_id = self.func_name_to_id[func_name] = len(self.func_id_to_name) 522 self.func_id_to_name.append(func_name) 523 return func_id 524 525 class Addr(object): 526 """ Info of an addr request. 527 func_addr: start_addr of the function containing addr. 528 source_lines: a list of [file_id, line_number] for addr. 529 source_lines[:-1] are all for inlined functions. 530 """ 531 532 def __init__(self, func_addr: int): 533 self.func_addr = func_addr 534 self.source_lines: Optional[List[int, int]] = None 535 536 def __init__( 537 self, ndk_path: Optional[str], 538 binary_finder: BinaryFinder, with_function_name: bool): 539 self.symbolizer_path = ToolFinder.find_tool_path('llvm-symbolizer', ndk_path) 540 if not self.symbolizer_path: 541 log_exit("Can't find llvm-symbolizer. " + NDK_ERROR_MESSAGE) 542 self.readelf = ReadElf(ndk_path) 543 self.dso_map: Dict[str, Addr2Nearestline.Dso] = {} # map from dso_path to Dso. 544 self.binary_finder = binary_finder 545 self.with_function_name = with_function_name 546 547 def add_addr(self, dso_path: str, build_id: Optional[str], func_addr: int, addr: int): 548 dso = self.dso_map.get(dso_path) 549 if dso is None: 550 dso = self.dso_map[dso_path] = self.Dso(build_id) 551 if addr not in dso.addrs: 552 dso.addrs[addr] = self.Addr(func_addr) 553 554 def convert_addrs_to_lines(self, jobs: int): 555 with ThreadPoolExecutor(jobs) as executor: 556 futures: List[Future] = [] 557 for dso_path, dso in self.dso_map.items(): 558 futures.append(executor.submit(self._convert_addrs_in_one_dso, dso_path, dso)) 559 for future in futures: 560 # Call future.result() to report exceptions raised in the executor. 561 future.result() 562 563 def _convert_addrs_in_one_dso(self, dso_path: str, dso: Addr2Nearestline.Dso): 564 real_path = self.binary_finder.find_binary(dso_path, dso.build_id) 565 if not real_path: 566 if dso_path not in ['//anon', 'unknown', '[kernel.kallsyms]']: 567 logging.debug("Can't find dso %s" % dso_path) 568 return 569 570 if not self._check_debug_line_section(real_path): 571 logging.debug("file %s doesn't contain .debug_line section." % real_path) 572 return 573 574 addr_step = self._get_addr_step(real_path) 575 self._collect_line_info(dso, real_path, [0]) 576 self._collect_line_info(dso, real_path, range(-addr_step, -addr_step * 4 - 1, -addr_step)) 577 self._collect_line_info(dso, real_path, 578 range(-addr_step * 5, -addr_step * 128 - 1, -addr_step)) 579 580 def _check_debug_line_section(self, real_path: Path) -> bool: 581 return '.debug_line' in self.readelf.get_sections(real_path) 582 583 def _get_addr_step(self, real_path: Path) -> int: 584 arch = self.readelf.get_arch(real_path) 585 if arch == 'arm64': 586 return 4 587 if arch == 'arm': 588 return 2 589 return 1 590 591 def _collect_line_info( 592 self, dso: Addr2Nearestline.Dso, real_path: Path, addr_shifts: List[int]): 593 """ Use addr2line to get line info in a dso, with given addr shifts. """ 594 # 1. Collect addrs to send to addr2line. 595 addr_set: Set[int] = set() 596 for addr in dso.addrs: 597 addr_obj = dso.addrs[addr] 598 if addr_obj.source_lines: # already has source line, no need to search. 599 continue 600 for shift in addr_shifts: 601 # The addr after shift shouldn't change to another function. 602 shifted_addr = max(addr + shift, addr_obj.func_addr) 603 addr_set.add(shifted_addr) 604 if shifted_addr == addr_obj.func_addr: 605 break 606 if not addr_set: 607 return 608 addr_request = '\n'.join(['0x%x' % addr for addr in sorted(addr_set)]) 609 610 # 2. Use addr2line to collect line info. 611 try: 612 subproc = subprocess.Popen(self._build_symbolizer_args(real_path), 613 stdin=subprocess.PIPE, stdout=subprocess.PIPE) 614 (stdoutdata, _) = subproc.communicate(str_to_bytes(addr_request)) 615 stdoutdata = bytes_to_str(stdoutdata) 616 except OSError: 617 return 618 addr_map = self.parse_line_output(stdoutdata, dso) 619 620 # 3. Fill line info in dso.addrs. 621 for addr in dso.addrs: 622 addr_obj = dso.addrs[addr] 623 if addr_obj.source_lines: 624 continue 625 for shift in addr_shifts: 626 shifted_addr = max(addr + shift, addr_obj.func_addr) 627 lines = addr_map.get(shifted_addr) 628 if lines: 629 addr_obj.source_lines = lines 630 break 631 if shifted_addr == addr_obj.func_addr: 632 break 633 634 def _build_symbolizer_args(self, binary_path: Path) -> List[str]: 635 args = [self.symbolizer_path, '--print-address', '--inlining', '--obj=%s' % binary_path] 636 if self.with_function_name: 637 args += ['--functions=linkage', '--demangle'] 638 else: 639 args.append('--functions=none') 640 return args 641 642 def parse_line_output(self, output: str, dso: Addr2Nearestline.Dso) -> Dict[int, 643 List[Tuple[int]]]: 644 """ 645 The output is a list of lines. 646 address1 647 function_name1 (the function name can be empty) 648 source_location1 649 function_name2 650 source_location2 651 ... 652 (end with empty line) 653 """ 654 655 addr_map: Dict[int, List[Tuple[int]]] = {} 656 lines = output.strip().splitlines() 657 i = 0 658 while i < len(lines): 659 address = self._parse_line_output_address(lines[i]) 660 i += 1 661 if address is None: 662 continue 663 info = [] 664 while i < len(lines): 665 if self.with_function_name: 666 if i + 1 == len(lines): 667 break 668 function_name = lines[i].strip() 669 if not function_name and (':' not in lines[i+1]): 670 # no more frames 671 break 672 i += 1 673 elif not lines[i]: 674 i += 1 675 break 676 677 file_path, line_number = self._parse_line_output_source_location(lines[i]) 678 i += 1 679 if not file_path or not line_number: 680 # An addr can have a list of (file, line), when the addr belongs to an inlined 681 # function. Sometimes only part of the list has ? mark. In this case, we think 682 # the line info is valid if the first line doesn't have ? mark. 683 if not info: 684 break 685 continue 686 file_id = dso.get_file_id(file_path) 687 if self.with_function_name: 688 func_id = dso.get_func_id(function_name) 689 info.append((file_id, line_number, func_id)) 690 else: 691 info.append((file_id, line_number)) 692 if info: 693 addr_map[address] = info 694 return addr_map 695 696 def _parse_line_output_address(self, output: str) -> Optional[int]: 697 if output.startswith('0x'): 698 return int(output, 16) 699 return None 700 701 def _parse_line_output_source_location(self, line: str) -> Tuple[Optional[str], Optional[int]]: 702 file_path, line_number = None, None 703 # Handle lines in format filename:line:column, like "runtest/two_functions.cpp:14:25". 704 # Filename may contain ':' like "C:\Users\...\file". 705 items = line.rsplit(':', 2) 706 if len(items) == 3: 707 file_path, line_number = items[:2] 708 if not file_path or ('?' in file_path) or not line_number or ('?' in line_number): 709 return None, None 710 try: 711 line_number = int(line_number) 712 except ValueError: 713 return None, None 714 return file_path, line_number 715 716 def get_dso(self, dso_path: str) -> Addr2Nearestline.Dso: 717 return self.dso_map.get(dso_path) 718 719 def get_addr_source(self, dso: Addr2Nearestline.Dso, addr: int) -> Optional[List[Tuple[int]]]: 720 source = dso.addrs[addr].source_lines 721 if source is None: 722 return None 723 if self.with_function_name: 724 return [(dso.file_id_to_name[file_id], line, dso.func_id_to_name[func_id]) 725 for (file_id, line, func_id) in source] 726 return [(dso.file_id_to_name[file_id], line) for (file_id, line) in source] 727 728 729class SourceFileSearcher(object): 730 """ Find source file paths in the file system. 731 The file paths reported by addr2line are the paths stored in debug sections 732 of shared libraries. And we need to convert them to file paths in the file 733 system. It is done in below steps: 734 1. Collect all file paths under the provided source_dirs. The suffix of a 735 source file should contain one of below: 736 h: for C/C++ header files. 737 c: for C/C++ source files. 738 java: for Java source files. 739 kt: for Kotlin source files. 740 2. Given an abstract_path reported by addr2line, select the best real path 741 as below: 742 2.1 Find all real paths with the same file name as the abstract path. 743 2.2 Select the real path having the longest common suffix with the abstract path. 744 """ 745 746 SOURCE_FILE_EXTS = {'.h', '.hh', '.H', '.hxx', '.hpp', '.h++', 747 '.c', '.cc', '.C', '.cxx', '.cpp', '.c++', 748 '.java', '.kt'} 749 750 @classmethod 751 def is_source_filename(cls, filename: str) -> bool: 752 ext = os.path.splitext(filename)[1] 753 return ext in cls.SOURCE_FILE_EXTS 754 755 def __init__(self, source_dirs: List[str]): 756 # Map from filename to a list of reversed directory path containing filename. 757 self.filename_to_rparents: Dict[str, List[str]] = {} 758 self._collect_paths(source_dirs) 759 760 def _collect_paths(self, source_dirs: List[str]): 761 for source_dir in source_dirs: 762 for parent, _, file_names in os.walk(source_dir): 763 rparent = None 764 for file_name in file_names: 765 if self.is_source_filename(file_name): 766 rparents = self.filename_to_rparents.get(file_name) 767 if rparents is None: 768 rparents = self.filename_to_rparents[file_name] = [] 769 if rparent is None: 770 rparent = parent[::-1] 771 rparents.append(rparent) 772 773 def get_real_path(self, abstract_path: str) -> Optional[str]: 774 abstract_path = abstract_path.replace('/', os.sep) 775 abstract_parent, file_name = os.path.split(abstract_path) 776 abstract_rparent = abstract_parent[::-1] 777 real_rparents = self.filename_to_rparents.get(file_name) 778 if real_rparents is None: 779 return None 780 best_matched_rparent = None 781 best_common_length = -1 782 for real_rparent in real_rparents: 783 length = len(os.path.commonprefix((real_rparent, abstract_rparent))) 784 if length > best_common_length: 785 best_common_length = length 786 best_matched_rparent = real_rparent 787 if best_matched_rparent is None: 788 return None 789 return os.path.join(best_matched_rparent[::-1], file_name) 790 791 792class AddrRange: 793 def __init__(self, start: int, len: int): 794 self.start = start 795 self.len = len 796 797 @property 798 def end(self) -> int: 799 return self.start + self.len 800 801 def is_in_range(self, addr: int) -> bool: 802 return addr >= self.start and addr < self.end 803 804 805class Disassembly: 806 def __init__(self): 807 self.lines: List[Tuple[str, int]] = [] 808 809 810class Objdump(object): 811 """ A wrapper of objdump to disassemble code. """ 812 813 def __init__(self, ndk_path: Optional[str], binary_finder: BinaryFinder): 814 self.ndk_path = ndk_path 815 self.binary_finder = binary_finder 816 self.readelf = ReadElf(ndk_path) 817 self.objdump_paths: Dict[str, str] = {} 818 819 def get_dso_info(self, dso_path: str, expected_build_id: Optional[str] 820 ) -> Optional[Tuple[str, str]]: 821 real_path = self.binary_finder.find_binary(dso_path, expected_build_id) 822 if not real_path: 823 return None 824 arch = self.readelf.get_arch(real_path) 825 if arch == 'unknown': 826 return None 827 return (str(real_path), arch) 828 829 def disassemble_function(self, dso_info, addr_range: AddrRange) -> Optional[Disassembly]: 830 """ Disassemble code for an addr range in a binary. 831 """ 832 real_path, arch = dso_info 833 objdump_path = self.objdump_paths.get(arch) 834 if not objdump_path: 835 objdump_path = ToolFinder.find_tool_path('llvm-objdump', self.ndk_path, arch) 836 if not objdump_path: 837 log_exit("Can't find llvm-objdump." + NDK_ERROR_MESSAGE) 838 self.objdump_paths[arch] = objdump_path 839 840 # Run objdump. 841 args = [objdump_path, '-dlC', '--no-show-raw-insn', 842 '--start-address=0x%x' % addr_range.start, 843 '--stop-address=0x%x' % (addr_range.end), 844 real_path] 845 if arch == 'arm' and 'llvm-objdump' in objdump_path: 846 args += ['--print-imm-hex'] 847 logging.debug('disassembling: %s', ' '.join(args)) 848 try: 849 subproc = subprocess.Popen(args, stdout=subprocess.PIPE) 850 (stdoutdata, _) = subproc.communicate() 851 stdoutdata = bytes_to_str(stdoutdata) 852 except OSError: 853 return None 854 855 if not stdoutdata: 856 return None 857 result = Disassembly() 858 for line in stdoutdata.split('\n'): 859 line = line.rstrip() # Remove '\r' on Windows. 860 items = line.split(':', 1) 861 try: 862 addr = int(items[0], 16) 863 except ValueError: 864 addr = 0 865 result.lines.append((line, addr)) 866 return result 867 868 def disassemble_functions(self, dso_info, sorted_addr_ranges: List[AddrRange] 869 ) -> Optional[List[Disassembly]]: 870 """ Disassemble code for multiple addr ranges in a binary. sorted_addr_ranges should be 871 sorted by addr_range.start. 872 """ 873 if not sorted_addr_ranges: 874 return [] 875 real_path, arch = dso_info 876 objdump_path = self.objdump_paths.get(arch) 877 if not objdump_path: 878 objdump_path = ToolFinder.find_tool_path('llvm-objdump', self.ndk_path, arch) 879 if not objdump_path: 880 log_exit("Can't find llvm-objdump." + NDK_ERROR_MESSAGE) 881 self.objdump_paths[arch] = objdump_path 882 883 # Run objdump. 884 start_addr = sorted_addr_ranges[0].start 885 stop_addr = max(addr_range.end for addr_range in sorted_addr_ranges) 886 args = [objdump_path, '-dlC', '--no-show-raw-insn', 887 '--start-address=0x%x' % start_addr, 888 '--stop-address=0x%x' % stop_addr, 889 real_path] 890 if arch == 'arm' and 'llvm-objdump' in objdump_path: 891 args += ['--print-imm-hex'] 892 try: 893 proc = subprocess.Popen(args, stdout=subprocess.PIPE, text=True) 894 result = self._parse_disassembly_for_functions(proc.stdout, sorted_addr_ranges) 895 proc.wait() 896 except OSError: 897 return None 898 return result 899 900 def _parse_disassembly_for_functions(self, fh: TextIO, sorted_addr_ranges: List[AddrRange]) -> Optional[List[Disassembly]]: 901 current_id = 0 902 in_range = False 903 result = [Disassembly() for _ in sorted_addr_ranges] 904 while True: 905 line = fh.readline() 906 if not line: 907 break 908 line = line.rstrip() # Remove '\r\n'. 909 addr = self._get_addr_from_disassembly_line(line) 910 if current_id >= len(sorted_addr_ranges): 911 continue 912 if addr: 913 if in_range and not sorted_addr_ranges[current_id].is_in_range(addr): 914 in_range = False 915 if not in_range: 916 # Skip addr ranges before the current address. 917 while current_id < len(sorted_addr_ranges) and sorted_addr_ranges[current_id].end <= addr: 918 current_id += 1 919 if current_id < len(sorted_addr_ranges) and sorted_addr_ranges[current_id].is_in_range(addr): 920 in_range = True 921 if in_range: 922 result[current_id].lines.append((line, addr)) 923 return result 924 925 def _get_addr_from_disassembly_line(self, line: str) -> int: 926 # line may be an instruction, like: " 24a469c: stp x29, x30, [sp, #-0x60]!" or 927 # "ffffffc0085d9664: paciasp". 928 # line may be a function start point, like "00000000024a4698 <DoWork()>:". 929 items = line.strip().split() 930 if not items: 931 return 0 932 s = items[0] 933 if s.endswith(':'): 934 s = s[:-1] 935 try: 936 return int(s, 16) 937 except ValueError: 938 return 0 939 940 941class ReadElf(object): 942 """ A wrapper of readelf. """ 943 944 def __init__(self, ndk_path: Optional[str]): 945 self.readelf_path = ToolFinder.find_tool_path('llvm-readelf', ndk_path) 946 if not self.readelf_path: 947 log_exit("Can't find llvm-readelf. " + NDK_ERROR_MESSAGE) 948 949 @staticmethod 950 def is_elf_file(path: Union[Path, str]) -> bool: 951 if os.path.isfile(path): 952 with open(path, 'rb') as fh: 953 return fh.read(4) == b'\x7fELF' 954 return False 955 956 def get_arch(self, elf_file_path: Union[Path, str]) -> str: 957 """ Get arch of an elf file. """ 958 if self.is_elf_file(elf_file_path): 959 try: 960 output = subprocess.check_output([self.readelf_path, '-h', str(elf_file_path)]) 961 output = bytes_to_str(output) 962 if output.find('AArch64') != -1: 963 return 'arm64' 964 if output.find('ARM') != -1: 965 return 'arm' 966 if output.find('X86-64') != -1: 967 return 'x86_64' 968 if output.find('80386') != -1: 969 return 'x86' 970 if output.find('RISC-V') != -1: 971 return 'riscv64' 972 except subprocess.CalledProcessError: 973 pass 974 return 'unknown' 975 976 def get_build_id(self, elf_file_path: Union[Path, str], with_padding=True) -> str: 977 """ Get build id of an elf file. """ 978 if self.is_elf_file(elf_file_path): 979 try: 980 output = subprocess.check_output([self.readelf_path, '-n', str(elf_file_path)]) 981 output = bytes_to_str(output) 982 result = re.search(r'Build ID:\s*(\S+)', output) 983 if result: 984 build_id = result.group(1) 985 if with_padding: 986 build_id = self.pad_build_id(build_id) 987 return build_id 988 except subprocess.CalledProcessError: 989 pass 990 return "" 991 992 @staticmethod 993 def pad_build_id(build_id: str) -> str: 994 """ Pad build id to 40 hex numbers (20 bytes). """ 995 if len(build_id) < 40: 996 build_id += '0' * (40 - len(build_id)) 997 else: 998 build_id = build_id[:40] 999 return '0x' + build_id 1000 1001 @staticmethod 1002 def unpad_build_id(build_id: str) -> str: 1003 if build_id.startswith('0x'): 1004 build_id = build_id[2:] 1005 # Unpad build id as TrimZeroesFromBuildIDString() in quipper. 1006 padding = '0' * 8 1007 while build_id.endswith(padding): 1008 build_id = build_id[:-len(padding)] 1009 return build_id 1010 1011 def get_sections(self, elf_file_path: Union[Path, str]) -> List[str]: 1012 """ Get sections of an elf file. """ 1013 section_names: List[str] = [] 1014 if self.is_elf_file(elf_file_path): 1015 try: 1016 output = subprocess.check_output([self.readelf_path, '-SW', str(elf_file_path)]) 1017 output = bytes_to_str(output) 1018 for line in output.split('\n'): 1019 # Parse line like:" [ 1] .note.android.ident NOTE 0000000000400190 ...". 1020 result = re.search(r'^\s+\[\s*\d+\]\s(.+?)\s', line) 1021 if result: 1022 section_name = result.group(1).strip() 1023 if section_name: 1024 section_names.append(section_name) 1025 except subprocess.CalledProcessError: 1026 pass 1027 return section_names 1028 1029 1030def extant_dir(arg: str) -> str: 1031 """ArgumentParser type that only accepts extant directories. 1032 1033 Args: 1034 arg: The string argument given on the command line. 1035 Returns: The argument as a realpath. 1036 Raises: 1037 argparse.ArgumentTypeError: The given path isn't a directory. 1038 """ 1039 path = os.path.realpath(arg) 1040 if not os.path.isdir(path): 1041 raise argparse.ArgumentTypeError('{} is not a directory.'.format(path)) 1042 return path 1043 1044 1045def extant_file(arg: str) -> str: 1046 """ArgumentParser type that only accepts extant files. 1047 1048 Args: 1049 arg: The string argument given on the command line. 1050 Returns: The argument as a realpath. 1051 Raises: 1052 argparse.ArgumentTypeError: The given path isn't a file. 1053 """ 1054 path = os.path.realpath(arg) 1055 if not os.path.isfile(path): 1056 raise argparse.ArgumentTypeError('{} is not a file.'.format(path)) 1057 return path 1058 1059 1060def log_fatal(msg: str): 1061 raise Exception(msg) 1062 1063 1064def log_exit(msg: str): 1065 sys.exit(msg) 1066 1067 1068class LogFormatter(logging.Formatter): 1069 """ Use custom logging format. """ 1070 1071 def __init__(self): 1072 super().__init__('%(asctime)s [%(levelname)s] (%(filename)s:%(lineno)d) %(message)s') 1073 1074 def formatTime(self, record, datefmt): 1075 return super().formatTime(record, '%H:%M:%S') + ',%03d' % record.msecs 1076 1077 1078class Log: 1079 initialized = False 1080 1081 @classmethod 1082 def init(cls, log_level: str = 'info'): 1083 assert not cls.initialized 1084 cls.initialized = True 1085 cls.logger = logging.root 1086 cls.logger.setLevel(log_level.upper()) 1087 handler = logging.StreamHandler() 1088 handler.setFormatter(LogFormatter()) 1089 cls.logger.addHandler(handler) 1090 1091 1092class ArgParseFormatter( 1093 argparse.ArgumentDefaultsHelpFormatter, argparse.RawDescriptionHelpFormatter): 1094 pass 1095 1096 1097@dataclass 1098class ReportLibOptions: 1099 show_art_frames: bool 1100 remove_method: List[str] 1101 trace_offcpu: str 1102 proguard_mapping_files: List[str] 1103 sample_filters: List[str] 1104 aggregate_threads: List[str] 1105 1106 1107class BaseArgumentParser(argparse.ArgumentParser): 1108 def __init__(self, *args, **kwargs): 1109 super().__init__(*args, **kwargs, formatter_class=ArgParseFormatter) 1110 self.has_sample_filter_options = False 1111 self.sample_filter_with_pid_shortcut = False 1112 self.has_report_lib_options = False 1113 1114 def add_report_lib_options(self, group: Optional[Any] = None, 1115 default_show_art_frames: bool = False, 1116 sample_filter_group: Optional[Any] = None, 1117 sample_filter_with_pid_shortcut: bool = True): 1118 self.has_report_lib_options = True 1119 parser = group if group else self 1120 parser.add_argument( 1121 '--proguard-mapping-file', nargs='+', 1122 help='Add proguard mapping file to de-obfuscate symbols') 1123 parser.add_argument('--show-art-frames', '--show_art_frames', 1124 action=argparse.BooleanOptionalAction, default=default_show_art_frames, 1125 help='Show frames of internal methods in the ART Java interpreter.') 1126 parser.add_argument('--remove-method', nargs='+', metavar='method_name_regex', 1127 help='remove methods with name containing the regular expression') 1128 parser.add_argument( 1129 '--trace-offcpu', choices=['on-cpu', 'off-cpu', 'on-off-cpu', 'mixed-on-off-cpu'], 1130 help="""Set report mode for profiles recorded with --trace-offcpu option. All possible 1131 modes are: on-cpu (only on-cpu samples), off-cpu (only off-cpu samples), 1132 on-off-cpu (both on-cpu and off-cpu samples, can be split by event name), 1133 mixed-on-off-cpu (on-cpu and off-cpu samples using the same event name). 1134 If not set, mixed-on-off-cpu mode is used. 1135 """) 1136 self._add_sample_filter_options(sample_filter_group, sample_filter_with_pid_shortcut) 1137 parser.add_argument( 1138 '--aggregate-threads', nargs='+', metavar='thread_name_regex', 1139 help="""Aggregate threads with names matching the same regex. As a result, samples from 1140 different threads (like a thread pool) can be shown in one flamegraph. 1141 """) 1142 1143 def _add_sample_filter_options( 1144 self, group: Optional[Any] = None, with_pid_shortcut: bool = True): 1145 if not group: 1146 group = self.add_argument_group('Sample filter options') 1147 group.add_argument('--cpu', nargs='+', help="""only include samples for the selected cpus. 1148 cpu can be a number like 1, or a range like 0-3""") 1149 group.add_argument('--exclude-pid', metavar='pid', nargs='+', type=int, 1150 help='exclude samples for selected processes') 1151 group.add_argument('--exclude-tid', metavar='tid', nargs='+', type=int, 1152 help='exclude samples for selected threads') 1153 group.add_argument( 1154 '--exclude-process-name', metavar='process_name_regex', nargs='+', 1155 help='exclude samples for processes with name containing the regular expression') 1156 group.add_argument( 1157 '--exclude-thread-name', metavar='thread_name_regex', nargs='+', 1158 help='exclude samples for threads with name containing the regular expression') 1159 1160 if with_pid_shortcut: 1161 group.add_argument('--pid', metavar='pid', nargs='+', type=int, 1162 help='only include samples for selected processes') 1163 group.add_argument('--tid', metavar='tid', nargs='+', type=int, 1164 help='only include samples for selected threads') 1165 group.add_argument('--include-pid', metavar='pid', nargs='+', type=int, 1166 help='only include samples for selected processes') 1167 group.add_argument('--include-tid', metavar='tid', nargs='+', type=int, 1168 help='only include samples for selected threads') 1169 group.add_argument( 1170 '--include-process-name', metavar='process_name_regex', nargs='+', 1171 help='only include samples for processes with name containing the regular expression') 1172 group.add_argument( 1173 '--comm', '--include-thread-name', metavar='thread_name_regex', 1174 dest='include_thread_name', nargs='+', 1175 help='only include samples for threads with name containing the regular expression') 1176 group.add_argument( 1177 '--filter-file', metavar='file', 1178 help='use filter file to filter samples based on timestamps. ' + 1179 'The file format is in doc/sampler_filter.md.') 1180 self.has_sample_filter_options = True 1181 self.sample_filter_with_pid_shortcut = with_pid_shortcut 1182 1183 def _build_sample_filter(self, args: argparse.Namespace) -> List[str]: 1184 """ Build sample filters, which can be passed to ReportLib.SetSampleFilter(). """ 1185 filters = [] 1186 if args.cpu: 1187 filters.extend(['--cpu', ','.join(args.cpu)]) 1188 if args.exclude_pid: 1189 filters.extend(['--exclude-pid', ','.join(str(pid) for pid in args.exclude_pid)]) 1190 if args.exclude_tid: 1191 filters.extend(['--exclude-tid', ','.join(str(tid) for tid in args.exclude_tid)]) 1192 if args.exclude_process_name: 1193 for name in args.exclude_process_name: 1194 filters.extend(['--exclude-process-name', name]) 1195 if args.exclude_thread_name: 1196 for name in args.exclude_thread_name: 1197 filters.extend(['--exclude-thread-name', name]) 1198 1199 if args.include_pid: 1200 filters.extend(['--include-pid', ','.join(str(pid) for pid in args.include_pid)]) 1201 if args.include_tid: 1202 filters.extend(['--include-tid', ','.join(str(tid) for tid in args.include_tid)]) 1203 if self.sample_filter_with_pid_shortcut: 1204 if args.pid: 1205 filters.extend(['--include-pid', ','.join(str(pid) for pid in args.pid)]) 1206 if args.tid: 1207 filters.extend(['--include-tid', ','.join(str(pid) for pid in args.tid)]) 1208 if args.include_process_name: 1209 for name in args.include_process_name: 1210 filters.extend(['--include-process-name', name]) 1211 if args.include_thread_name: 1212 for name in args.include_thread_name: 1213 filters.extend(['--include-thread-name', name]) 1214 if args.filter_file: 1215 filters.extend(['--filter-file', args.filter_file]) 1216 return filters 1217 1218 def parse_known_args(self, *args, **kwargs): 1219 self.add_argument( 1220 '--log', choices=['debug', 'info', 'warning'], 1221 default='info', help='set log level') 1222 namespace, left_args = super().parse_known_args(*args, **kwargs) 1223 1224 if self.has_report_lib_options: 1225 sample_filters = self._build_sample_filter(namespace) 1226 report_lib_options = ReportLibOptions( 1227 namespace.show_art_frames, namespace.remove_method, namespace.trace_offcpu, 1228 namespace.proguard_mapping_file, sample_filters, namespace.aggregate_threads) 1229 setattr(namespace, 'report_lib_options', report_lib_options) 1230 1231 if not Log.initialized: 1232 Log.init(namespace.log) 1233 return namespace, left_args 1234