1#!/usr/bin/env python3
2#
3# Copyright (C) 2016 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16#
17
18"""utils.py: export utility functions.
19"""
20
21from __future__ import annotations
22import argparse
23from concurrent.futures import Future, ThreadPoolExecutor
24from dataclasses import dataclass
25import logging
26import os
27import os.path
28from pathlib import Path
29import re
30import shutil
31import subprocess
32import sys
33import time
34from typing import Any, Dict, Iterator, List, Optional, Set, Tuple, Union, TextIO
35
36
37NDK_ERROR_MESSAGE = "Please install the Android NDK (https://developer.android.com/studio/projects/install-ndk), then set NDK path with --ndk_path option."
38
39
40def get_script_dir() -> str:
41    return os.path.dirname(os.path.realpath(__file__))
42
43
44def is_windows() -> bool:
45    return sys.platform == 'win32' or sys.platform == 'cygwin'
46
47
48def is_darwin() -> bool:
49    return sys.platform == 'darwin'
50
51
52def get_platform() -> str:
53    if is_windows():
54        return 'windows'
55    if is_darwin():
56        return 'darwin'
57    return 'linux'
58
59
60def str_to_bytes(str_value: str) -> bytes:
61    # In python 3, str are wide strings whereas the C api expects 8 bit strings,
62    # hence we have to convert. For now using utf-8 as the encoding.
63    return str_value.encode('utf-8')
64
65
66def bytes_to_str(bytes_value: Optional[bytes]) -> str:
67    if not bytes_value:
68        return ''
69    return bytes_value.decode('utf-8')
70
71
72def get_target_binary_path(arch: str, binary_name: str) -> str:
73    if arch == 'aarch64':
74        arch = 'arm64'
75    arch_dir = os.path.join(get_script_dir(), "bin", "android", arch)
76    if not os.path.isdir(arch_dir):
77        log_fatal("can't find arch directory: %s" % arch_dir)
78    binary_path = os.path.join(arch_dir, binary_name)
79    if not os.path.isfile(binary_path):
80        log_fatal("can't find binary: %s" % binary_path)
81    return binary_path
82
83
84def get_host_binary_path(binary_name: str) -> str:
85    dirname = os.path.join(get_script_dir(), 'bin')
86    if is_windows():
87        if binary_name.endswith('.so'):
88            binary_name = binary_name[0:-3] + '.dll'
89        elif '.' not in binary_name:
90            binary_name += '.exe'
91        dirname = os.path.join(dirname, 'windows')
92    elif sys.platform == 'darwin':  # OSX
93        if binary_name.endswith('.so'):
94            binary_name = binary_name[0:-3] + '.dylib'
95        dirname = os.path.join(dirname, 'darwin')
96    else:
97        dirname = os.path.join(dirname, 'linux')
98    dirname = os.path.join(dirname, 'x86_64' if sys.maxsize > 2 ** 32 else 'x86')
99    binary_path = os.path.join(dirname, binary_name)
100    if not os.path.isfile(binary_path):
101        log_fatal("can't find binary: %s" % binary_path)
102    return binary_path
103
104
105def is_executable_available(executable: str, option='--help') -> bool:
106    """ Run an executable to see if it exists. """
107    try:
108        subproc = subprocess.Popen([executable, option], stdout=subprocess.PIPE,
109                                   stderr=subprocess.PIPE)
110        subproc.communicate()
111        return subproc.returncode == 0
112    except OSError:
113        return False
114
115
116class ToolFinder:
117    """ Find tools in ndk or sdk. """
118    DEFAULT_SDK_PATH = {
119        'darwin': 'Library/Android/sdk',
120        'linux': 'Android/Sdk',
121        'windows': 'AppData/Local/Android/sdk',
122    }
123
124    EXPECTED_TOOLS = {
125        'adb': {
126            'is_binutils': False,
127            'test_option': 'version',
128            'path_in_sdk': 'platform-tools/adb',
129        },
130        'llvm-objdump': {
131            'is_binutils': False,
132            'path_in_ndk':
133                lambda platform: 'toolchains/llvm/prebuilt/%s-x86_64/bin/llvm-objdump' % platform,
134        },
135        'llvm-readelf': {
136            'is_binutils': False,
137            'path_in_ndk':
138                lambda platform: 'toolchains/llvm/prebuilt/%s-x86_64/bin/llvm-readelf' % platform,
139        },
140        'llvm-symbolizer': {
141            'is_binutils': False,
142            'path_in_ndk':
143                lambda platform: 'toolchains/llvm/prebuilt/%s-x86_64/bin/llvm-symbolizer' % platform,
144        },
145        'llvm-strip': {
146            'is_binutils': False,
147            'path_in_ndk':
148                lambda platform: 'toolchains/llvm/prebuilt/%s-x86_64/bin/llvm-strip' % platform,
149        },
150    }
151
152    @classmethod
153    def find_ndk_and_sdk_paths(cls, ndk_path: Optional[str] = None
154                               ) -> Iterator[Tuple[Optional[str], Optional[str]]]:
155        # Use the given ndk path.
156        if ndk_path and os.path.isdir(ndk_path):
157            ndk_path = os.path.abspath(ndk_path)
158            yield ndk_path, cls.find_sdk_path(ndk_path)
159        # Find ndk in the parent directory containing simpleperf scripts.
160        ndk_path = os.path.dirname(os.path.abspath(get_script_dir()))
161        yield ndk_path, cls.find_sdk_path(ndk_path)
162        # Find ndk in the default sdk installation path.
163        if is_windows():
164            home = os.environ.get('HOMEDRIVE') + os.environ.get('HOMEPATH')
165        else:
166            home = os.environ.get('HOME')
167        if home:
168            platform = get_platform()
169            sdk_path = os.path.join(home, cls.DEFAULT_SDK_PATH[platform].replace('/', os.sep))
170            if os.path.isdir(sdk_path):
171                path = os.path.join(sdk_path, 'ndk')
172                if os.path.isdir(path):
173                    # Android Studio can install multiple ndk versions in 'ndk'.
174                    # Find the newest one.
175                    ndk_version = None
176                    for name in os.listdir(path):
177                        if not ndk_version or ndk_version < name:
178                            ndk_version = name
179                    if ndk_version:
180                        yield os.path.join(path, ndk_version), sdk_path
181            ndk_path = os.path.join(sdk_path, 'ndk-bundle')
182            if os.path.isdir(ndk_path):
183                yield ndk_path, sdk_path
184
185    @classmethod
186    def find_sdk_path(cls, ndk_path: str) -> Optional[str]:
187        path = ndk_path
188        for _ in range(2):
189            path = os.path.dirname(path)
190            if os.path.isdir(os.path.join(path, 'platform-tools')):
191                return path
192        return None
193
194    @classmethod
195    def _get_binutils_path_in_ndk(cls, toolname: str, arch: Optional[str], platform: str
196                                  ) -> Tuple[str, str]:
197        if not arch:
198            arch = 'arm64'
199        if arch == 'arm64':
200            name = 'aarch64-linux-android-' + toolname
201        elif arch == 'arm':
202            name = 'arm-linux-androideabi-' + toolname
203        elif arch == 'x86_64':
204            name = 'x86_64-linux-android-' + toolname
205        elif arch == 'x86':
206            name = 'i686-linux-android-' + toolname
207        else:
208            log_fatal('unexpected arch %s' % arch)
209        path = 'toolchains/llvm/prebuilt/%s-x86_64/bin/%s' % (platform, name)
210        return (name, path)
211
212    @classmethod
213    def find_tool_path(cls, toolname: str, ndk_path: Optional[str] = None,
214                       arch: Optional[str] = None) -> Optional[str]:
215        tool_info = cls.EXPECTED_TOOLS.get(toolname)
216        if not tool_info:
217            return None
218
219        is_binutils = tool_info['is_binutils']
220        test_option = tool_info.get('test_option', '--help')
221        platform = get_platform()
222
223        # Find tool in clang prebuilts in Android platform.
224        if toolname.startswith('llvm-') and platform == 'linux' and get_script_dir().endswith(
225                'system/extras/simpleperf/scripts'):
226            path = str(
227                Path(get_script_dir()).parents[3] / 'prebuilts' / 'clang' / 'host' / 'linux-x86' /
228                'llvm-binutils-stable' / toolname)
229            if is_executable_available(path, test_option):
230                return path
231
232        # Find tool in NDK or SDK.
233        path_in_ndk = None
234        path_in_sdk = None
235        if is_binutils:
236            toolname_with_arch, path_in_ndk = cls._get_binutils_path_in_ndk(
237                toolname, arch, platform)
238        else:
239            toolname_with_arch = toolname
240            if 'path_in_ndk' in tool_info:
241                path_in_ndk = tool_info['path_in_ndk'](platform)
242            elif 'path_in_sdk' in tool_info:
243                path_in_sdk = tool_info['path_in_sdk']
244        if path_in_ndk:
245            path_in_ndk = path_in_ndk.replace('/', os.sep)
246        elif path_in_sdk:
247            path_in_sdk = path_in_sdk.replace('/', os.sep)
248
249        for ndk_dir, sdk_dir in cls.find_ndk_and_sdk_paths(ndk_path):
250            if path_in_ndk and ndk_dir:
251                path = os.path.join(ndk_dir, path_in_ndk)
252                if is_executable_available(path, test_option):
253                    return path
254            elif path_in_sdk and sdk_dir:
255                path = os.path.join(sdk_dir, path_in_sdk)
256                if is_executable_available(path, test_option):
257                    return path
258
259        # Find tool in $PATH.
260        if is_executable_available(toolname_with_arch, test_option):
261            return toolname_with_arch
262
263        # Find tool without arch in $PATH.
264        if is_binutils and tool_info.get('accept_tool_without_arch'):
265            if is_executable_available(toolname, test_option):
266                return toolname
267        return None
268
269
270class AdbHelper(object):
271    def __init__(self, enable_switch_to_root: bool = True):
272        adb_path = ToolFinder.find_tool_path('adb')
273        if not adb_path:
274            log_exit("Can't find adb in PATH environment.")
275        self.adb_path: str = adb_path
276        self.enable_switch_to_root = enable_switch_to_root
277        self.serial_number: Optional[str] = None
278
279    def is_device_available(self) -> bool:
280        return self.run_and_return_output(['shell', 'whoami'])[0]
281
282    def run(self, adb_args: List[str], log_output: bool = False, log_stderr: bool = False) -> bool:
283        return self.run_and_return_output(adb_args, log_output, log_stderr)[0]
284
285    def run_and_return_output(self, adb_args: List[str], log_output: bool = False,
286                              log_stderr: bool = False) -> Tuple[bool, str]:
287        adb_args = [self.adb_path] + adb_args
288        logging.debug('run adb cmd: %s' % adb_args)
289        env = None
290        if self.serial_number:
291            env = os.environ.copy()
292            env['ANDROID_SERIAL'] = self.serial_number
293        subproc = subprocess.Popen(
294            adb_args, env=env, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
295        stdout_data, stderr_data = subproc.communicate()
296        stdout_data = bytes_to_str(stdout_data)
297        stderr_data = bytes_to_str(stderr_data)
298        returncode = subproc.returncode
299        result = (returncode == 0)
300        if log_output and stdout_data:
301            logging.debug(stdout_data)
302        if log_stderr and stderr_data:
303            logging.warning(stderr_data)
304        logging.debug('run adb cmd: %s  [result %s]' % (adb_args, result))
305        return (result, stdout_data)
306
307    def check_run(self, adb_args: List[str], log_output: bool = False):
308        self.check_run_and_return_output(adb_args, log_output)
309
310    def check_run_and_return_output(self, adb_args: List[str], log_output: bool = False,
311                                    log_stderr: bool = False) -> str:
312        result, stdoutdata = self.run_and_return_output(adb_args, log_output, True)
313        if not result:
314            log_exit('run "adb %s" failed: %s' % (adb_args, stdoutdata))
315        return stdoutdata
316
317    def _unroot(self):
318        result, stdoutdata = self.run_and_return_output(['shell', 'whoami'])
319        if not result:
320            return
321        if 'root' not in stdoutdata:
322            return
323        logging.info('unroot adb')
324        self.run(['unroot'])
325        time.sleep(1)
326        self.run(['wait-for-device'])
327
328    def switch_to_root(self) -> bool:
329        if not self.enable_switch_to_root:
330            self._unroot()
331            return False
332        result, stdoutdata = self.run_and_return_output(['shell', 'whoami'])
333        if not result:
334            return False
335        if 'root' in stdoutdata:
336            return True
337        build_type = self.get_property('ro.build.type')
338        if build_type == 'user':
339            return False
340        self.run(['root'])
341        time.sleep(1)
342        self.run(['wait-for-device'])
343        result, stdoutdata = self.run_and_return_output(['shell', 'whoami'])
344        return result and 'root' in stdoutdata
345
346    def get_property(self, name: str) -> Optional[str]:
347        result, stdoutdata = self.run_and_return_output(['shell', 'getprop', name])
348        return stdoutdata.strip() if result else None
349
350    def set_property(self, name: str, value: str) -> bool:
351        return self.run(['shell', 'setprop', name, value])
352
353    def get_device_arch(self) -> str:
354        output = self.check_run_and_return_output(['shell', 'uname', '-m'])
355        if 'aarch64' in output:
356            return 'arm64'
357        if 'arm' in output:
358            return 'arm'
359        if 'x86_64' in output:
360            return 'x86_64'
361        if '86' in output:
362            return 'x86'
363        if 'riscv64' in output:
364            return 'riscv64'
365        log_fatal('unsupported architecture: %s' % output.strip())
366        return ''
367
368    def get_android_version(self) -> int:
369        """ Get Android version on device, like 7 is for Android N, 8 is for Android O."""
370        build_version = self.get_property('ro.build.version.codename')
371        if not build_version or build_version == 'REL':
372            build_version = self.get_property('ro.build.version.release')
373        android_version = 0
374        if build_version:
375            if build_version[0].isdigit():
376                i = 1
377                while i < len(build_version) and build_version[i].isdigit():
378                    i += 1
379                android_version = int(build_version[:i])
380            else:
381                c = build_version[0].upper()
382                if c.isupper() and c >= 'L':
383                    android_version = ord(c) - ord('L') + 5
384        return android_version
385
386
387def flatten_arg_list(arg_list: List[List[str]]) -> List[str]:
388    res = []
389    if arg_list:
390        for items in arg_list:
391            res += items
392    return res
393
394
395def remove(dir_or_file: Union[Path, str]):
396    if os.path.isfile(dir_or_file):
397        os.remove(dir_or_file)
398    elif os.path.isdir(dir_or_file):
399        shutil.rmtree(dir_or_file, ignore_errors=True)
400
401
402def open_report_in_browser(report_path: str):
403    if is_darwin():
404        # On darwin 10.12.6, webbrowser can't open browser, so try `open` cmd first.
405        try:
406            subprocess.check_call(['open', report_path])
407            return
408        except subprocess.CalledProcessError:
409            pass
410    import webbrowser
411    try:
412        # Try to open the report with Chrome
413        browser = webbrowser.get('google-chrome')
414        browser.open(report_path, new=0, autoraise=True)
415    except webbrowser.Error:
416        # webbrowser.get() doesn't work well on darwin/windows.
417        webbrowser.open_new_tab(report_path)
418
419
420class BinaryFinder:
421    def __init__(self, binary_cache_dir: Optional[Union[Path, str]], readelf: ReadElf):
422        if isinstance(binary_cache_dir, str):
423            binary_cache_dir = Path(binary_cache_dir)
424        self.binary_cache_dir = binary_cache_dir
425        self.readelf = readelf
426        self.build_id_map = self._load_build_id_map()
427
428    def _load_build_id_map(self) -> Dict[str, Path]:
429        build_id_map: Dict[str, Path] = {}
430        if self.binary_cache_dir:
431            build_id_list_file = self.binary_cache_dir / 'build_id_list'
432            if build_id_list_file.is_file():
433                with open(self.binary_cache_dir / 'build_id_list', 'rb') as fh:
434                    for line in fh.readlines():
435                        # lines are in format "<build_id>=<path_in_binary_cache>".
436                        items = bytes_to_str(line).strip().split('=')
437                        if len(items) == 2:
438                            build_id_map[items[0]] = self.binary_cache_dir / items[1]
439        return build_id_map
440
441    def find_binary(self, dso_path_in_record_file: str,
442                    expected_build_id: Optional[str]) -> Optional[Path]:
443        """ If expected_build_id is None, don't check build id.
444            Otherwise, the build id of the found binary should match the expected one."""
445        # Find binary from build id map.
446        if expected_build_id:
447            path = self.build_id_map.get(expected_build_id)
448            if path and self._check_path(path, expected_build_id):
449                return path
450        # Find binary by path in binary cache.
451        if self.binary_cache_dir:
452            path = self.binary_cache_dir / dso_path_in_record_file[1:].replace('/', os.sep)
453            if self._check_path(path, expected_build_id):
454                return path
455        # Find binary by its absolute path.
456        path = Path(dso_path_in_record_file)
457        if self._check_path(path, expected_build_id):
458            return path
459        return None
460
461    def _check_path(self, path: Path, expected_build_id: Optional[str]) -> bool:
462        if not self.readelf.is_elf_file(path):
463            return False
464        if expected_build_id is not None:
465            return self.readelf.get_build_id(path) == expected_build_id
466        return True
467
468
469class Addr2Nearestline(object):
470    """ Use llvm-symbolizer to convert (dso_path, func_addr, addr) to (source_file, line).
471        For instructions generated by C++ compilers without a matching statement in source code
472        (like stack corruption check, switch optimization, etc.), addr2line can't generate
473        line information. However, we want to assign the instruction to the nearest line before
474        the instruction (just like objdump -dl). So we use below strategy:
475        Instead of finding the exact line of the instruction in an address, we find the nearest
476        line to the instruction in an address. If an address doesn't have a line info, we find
477        the line info of address - 1. If still no line info, then use address - 2, address - 3,
478        etc.
479
480        The implementation steps are as below:
481        1. Collect all (dso_path, func_addr, addr) requests before converting. This saves the
482        times to call addr2line.
483        2. Convert addrs to (source_file, line) pairs for each dso_path as below:
484          2.1 Check if the dso_path has .debug_line. If not, omit its conversion.
485          2.2 Get arch of the dso_path, and decide the addr_step for it. addr_step is the step we
486          change addr each time. For example, since instructions of arm64 are all 4 bytes long,
487          addr_step for arm64 can be 4.
488          2.3 Use addr2line to find line info for each addr in the dso_path.
489          2.4 For each addr without line info, use addr2line to find line info for
490              range(addr - addr_step, addr - addr_step * 4 - 1, -addr_step).
491          2.5 For each addr without line info, use addr2line to find line info for
492              range(addr - addr_step * 5, addr - addr_step * 128 - 1, -addr_step).
493              (128 is a guess number. A nested switch statement in
494               system/core/demangle/Demangler.cpp has >300 bytes without line info in arm64.)
495    """
496    class Dso(object):
497        """ Info of a dynamic shared library.
498            addrs: a map from address to Addr object in this dso.
499        """
500
501        def __init__(self, build_id: Optional[str]):
502            self.build_id = build_id
503            self.addrs: Dict[int, Addr2Nearestline.Addr] = {}
504            # Saving file names for each addr takes a lot of memory. So we store file ids in Addr,
505            # and provide data structures connecting file id and file name here.
506            self.file_name_to_id: Dict[str, int] = {}
507            self.file_id_to_name: List[str] = []
508            self.func_name_to_id: Dict[str, int] = {}
509            self.func_id_to_name: List[str] = []
510
511        def get_file_id(self, file_path: str) -> int:
512            file_id = self.file_name_to_id.get(file_path)
513            if file_id is None:
514                file_id = self.file_name_to_id[file_path] = len(self.file_id_to_name)
515                self.file_id_to_name.append(file_path)
516            return file_id
517
518        def get_func_id(self, func_name: str) -> int:
519            func_id = self.func_name_to_id.get(func_name)
520            if func_id is None:
521                func_id = self.func_name_to_id[func_name] = len(self.func_id_to_name)
522                self.func_id_to_name.append(func_name)
523            return func_id
524
525    class Addr(object):
526        """ Info of an addr request.
527            func_addr: start_addr of the function containing addr.
528            source_lines: a list of [file_id, line_number] for addr.
529                          source_lines[:-1] are all for inlined functions.
530        """
531
532        def __init__(self, func_addr: int):
533            self.func_addr = func_addr
534            self.source_lines: Optional[List[int, int]] = None
535
536    def __init__(
537            self, ndk_path: Optional[str],
538            binary_finder: BinaryFinder, with_function_name: bool):
539        self.symbolizer_path = ToolFinder.find_tool_path('llvm-symbolizer', ndk_path)
540        if not self.symbolizer_path:
541            log_exit("Can't find llvm-symbolizer. " + NDK_ERROR_MESSAGE)
542        self.readelf = ReadElf(ndk_path)
543        self.dso_map: Dict[str, Addr2Nearestline.Dso] = {}  # map from dso_path to Dso.
544        self.binary_finder = binary_finder
545        self.with_function_name = with_function_name
546
547    def add_addr(self, dso_path: str, build_id: Optional[str], func_addr: int, addr: int):
548        dso = self.dso_map.get(dso_path)
549        if dso is None:
550            dso = self.dso_map[dso_path] = self.Dso(build_id)
551        if addr not in dso.addrs:
552            dso.addrs[addr] = self.Addr(func_addr)
553
554    def convert_addrs_to_lines(self, jobs: int):
555        with ThreadPoolExecutor(jobs) as executor:
556            futures: List[Future] = []
557            for dso_path, dso in self.dso_map.items():
558                futures.append(executor.submit(self._convert_addrs_in_one_dso, dso_path, dso))
559            for future in futures:
560                # Call future.result() to report exceptions raised in the executor.
561                future.result()
562
563    def _convert_addrs_in_one_dso(self, dso_path: str, dso: Addr2Nearestline.Dso):
564        real_path = self.binary_finder.find_binary(dso_path, dso.build_id)
565        if not real_path:
566            if dso_path not in ['//anon', 'unknown', '[kernel.kallsyms]']:
567                logging.debug("Can't find dso %s" % dso_path)
568            return
569
570        if not self._check_debug_line_section(real_path):
571            logging.debug("file %s doesn't contain .debug_line section." % real_path)
572            return
573
574        addr_step = self._get_addr_step(real_path)
575        self._collect_line_info(dso, real_path, [0])
576        self._collect_line_info(dso, real_path, range(-addr_step, -addr_step * 4 - 1, -addr_step))
577        self._collect_line_info(dso, real_path,
578                                range(-addr_step * 5, -addr_step * 128 - 1, -addr_step))
579
580    def _check_debug_line_section(self, real_path: Path) -> bool:
581        return '.debug_line' in self.readelf.get_sections(real_path)
582
583    def _get_addr_step(self, real_path: Path) -> int:
584        arch = self.readelf.get_arch(real_path)
585        if arch == 'arm64':
586            return 4
587        if arch == 'arm':
588            return 2
589        return 1
590
591    def _collect_line_info(
592            self, dso: Addr2Nearestline.Dso, real_path: Path, addr_shifts: List[int]):
593        """ Use addr2line to get line info in a dso, with given addr shifts. """
594        # 1. Collect addrs to send to addr2line.
595        addr_set: Set[int] = set()
596        for addr in dso.addrs:
597            addr_obj = dso.addrs[addr]
598            if addr_obj.source_lines:  # already has source line, no need to search.
599                continue
600            for shift in addr_shifts:
601                # The addr after shift shouldn't change to another function.
602                shifted_addr = max(addr + shift, addr_obj.func_addr)
603                addr_set.add(shifted_addr)
604                if shifted_addr == addr_obj.func_addr:
605                    break
606        if not addr_set:
607            return
608        addr_request = '\n'.join(['0x%x' % addr for addr in sorted(addr_set)])
609
610        # 2. Use addr2line to collect line info.
611        try:
612            subproc = subprocess.Popen(self._build_symbolizer_args(real_path),
613                                       stdin=subprocess.PIPE, stdout=subprocess.PIPE)
614            (stdoutdata, _) = subproc.communicate(str_to_bytes(addr_request))
615            stdoutdata = bytes_to_str(stdoutdata)
616        except OSError:
617            return
618        addr_map = self.parse_line_output(stdoutdata, dso)
619
620        # 3. Fill line info in dso.addrs.
621        for addr in dso.addrs:
622            addr_obj = dso.addrs[addr]
623            if addr_obj.source_lines:
624                continue
625            for shift in addr_shifts:
626                shifted_addr = max(addr + shift, addr_obj.func_addr)
627                lines = addr_map.get(shifted_addr)
628                if lines:
629                    addr_obj.source_lines = lines
630                    break
631                if shifted_addr == addr_obj.func_addr:
632                    break
633
634    def _build_symbolizer_args(self, binary_path: Path) -> List[str]:
635        args = [self.symbolizer_path, '--print-address', '--inlining', '--obj=%s' % binary_path]
636        if self.with_function_name:
637            args += ['--functions=linkage', '--demangle']
638        else:
639            args.append('--functions=none')
640        return args
641
642    def parse_line_output(self, output: str, dso: Addr2Nearestline.Dso) -> Dict[int,
643                                                                                List[Tuple[int]]]:
644        """
645        The output is a list of lines.
646            address1
647            function_name1 (the function name can be empty)
648            source_location1
649            function_name2
650            source_location2
651            ...
652            (end with empty line)
653        """
654
655        addr_map: Dict[int, List[Tuple[int]]] = {}
656        lines = output.strip().splitlines()
657        i = 0
658        while i < len(lines):
659            address = self._parse_line_output_address(lines[i])
660            i += 1
661            if address is None:
662                continue
663            info = []
664            while i < len(lines):
665                if self.with_function_name:
666                    if i + 1 == len(lines):
667                        break
668                    function_name = lines[i].strip()
669                    if not function_name and (':' not in lines[i+1]):
670                        # no more frames
671                        break
672                    i += 1
673                elif not lines[i]:
674                    i += 1
675                    break
676
677                file_path, line_number = self._parse_line_output_source_location(lines[i])
678                i += 1
679                if not file_path or not line_number:
680                    # An addr can have a list of (file, line), when the addr belongs to an inlined
681                    # function. Sometimes only part of the list has ? mark. In this case, we think
682                    # the line info is valid if the first line doesn't have ? mark.
683                    if not info:
684                        break
685                    continue
686                file_id = dso.get_file_id(file_path)
687                if self.with_function_name:
688                    func_id = dso.get_func_id(function_name)
689                    info.append((file_id, line_number, func_id))
690                else:
691                    info.append((file_id, line_number))
692            if info:
693                addr_map[address] = info
694        return addr_map
695
696    def _parse_line_output_address(self, output: str) -> Optional[int]:
697        if output.startswith('0x'):
698            return int(output, 16)
699        return None
700
701    def _parse_line_output_source_location(self, line: str) -> Tuple[Optional[str], Optional[int]]:
702        file_path, line_number = None, None
703        # Handle lines in format filename:line:column, like "runtest/two_functions.cpp:14:25".
704        # Filename may contain ':' like "C:\Users\...\file".
705        items = line.rsplit(':', 2)
706        if len(items) == 3:
707            file_path, line_number = items[:2]
708        if not file_path or ('?' in file_path) or not line_number or ('?' in line_number):
709            return None, None
710        try:
711            line_number = int(line_number)
712        except ValueError:
713            return None, None
714        return file_path, line_number
715
716    def get_dso(self, dso_path: str) -> Addr2Nearestline.Dso:
717        return self.dso_map.get(dso_path)
718
719    def get_addr_source(self, dso: Addr2Nearestline.Dso, addr: int) -> Optional[List[Tuple[int]]]:
720        source = dso.addrs[addr].source_lines
721        if source is None:
722            return None
723        if self.with_function_name:
724            return [(dso.file_id_to_name[file_id], line, dso.func_id_to_name[func_id])
725                    for (file_id, line, func_id) in source]
726        return [(dso.file_id_to_name[file_id], line) for (file_id, line) in source]
727
728
729class SourceFileSearcher(object):
730    """ Find source file paths in the file system.
731        The file paths reported by addr2line are the paths stored in debug sections
732        of shared libraries. And we need to convert them to file paths in the file
733        system. It is done in below steps:
734        1. Collect all file paths under the provided source_dirs. The suffix of a
735           source file should contain one of below:
736            h: for C/C++ header files.
737            c: for C/C++ source files.
738            java: for Java source files.
739            kt: for Kotlin source files.
740        2. Given an abstract_path reported by addr2line, select the best real path
741           as below:
742           2.1 Find all real paths with the same file name as the abstract path.
743           2.2 Select the real path having the longest common suffix with the abstract path.
744    """
745
746    SOURCE_FILE_EXTS = {'.h', '.hh', '.H', '.hxx', '.hpp', '.h++',
747                        '.c', '.cc', '.C', '.cxx', '.cpp', '.c++',
748                        '.java', '.kt'}
749
750    @classmethod
751    def is_source_filename(cls, filename: str) -> bool:
752        ext = os.path.splitext(filename)[1]
753        return ext in cls.SOURCE_FILE_EXTS
754
755    def __init__(self, source_dirs: List[str]):
756        # Map from filename to a list of reversed directory path containing filename.
757        self.filename_to_rparents: Dict[str, List[str]] = {}
758        self._collect_paths(source_dirs)
759
760    def _collect_paths(self, source_dirs: List[str]):
761        for source_dir in source_dirs:
762            for parent, _, file_names in os.walk(source_dir):
763                rparent = None
764                for file_name in file_names:
765                    if self.is_source_filename(file_name):
766                        rparents = self.filename_to_rparents.get(file_name)
767                        if rparents is None:
768                            rparents = self.filename_to_rparents[file_name] = []
769                        if rparent is None:
770                            rparent = parent[::-1]
771                        rparents.append(rparent)
772
773    def get_real_path(self, abstract_path: str) -> Optional[str]:
774        abstract_path = abstract_path.replace('/', os.sep)
775        abstract_parent, file_name = os.path.split(abstract_path)
776        abstract_rparent = abstract_parent[::-1]
777        real_rparents = self.filename_to_rparents.get(file_name)
778        if real_rparents is None:
779            return None
780        best_matched_rparent = None
781        best_common_length = -1
782        for real_rparent in real_rparents:
783            length = len(os.path.commonprefix((real_rparent, abstract_rparent)))
784            if length > best_common_length:
785                best_common_length = length
786                best_matched_rparent = real_rparent
787        if best_matched_rparent is None:
788            return None
789        return os.path.join(best_matched_rparent[::-1], file_name)
790
791
792class AddrRange:
793    def __init__(self, start: int, len: int):
794        self.start = start
795        self.len = len
796
797    @property
798    def end(self) -> int:
799        return self.start + self.len
800
801    def is_in_range(self, addr: int) -> bool:
802        return addr >= self.start and addr < self.end
803
804
805class Disassembly:
806    def __init__(self):
807        self.lines: List[Tuple[str, int]] = []
808
809
810class Objdump(object):
811    """ A wrapper of objdump to disassemble code. """
812
813    def __init__(self, ndk_path: Optional[str], binary_finder: BinaryFinder):
814        self.ndk_path = ndk_path
815        self.binary_finder = binary_finder
816        self.readelf = ReadElf(ndk_path)
817        self.objdump_paths: Dict[str, str] = {}
818
819    def get_dso_info(self, dso_path: str, expected_build_id: Optional[str]
820                     ) -> Optional[Tuple[str, str]]:
821        real_path = self.binary_finder.find_binary(dso_path, expected_build_id)
822        if not real_path:
823            return None
824        arch = self.readelf.get_arch(real_path)
825        if arch == 'unknown':
826            return None
827        return (str(real_path), arch)
828
829    def disassemble_function(self, dso_info, addr_range: AddrRange) -> Optional[Disassembly]:
830        """ Disassemble code for an addr range in a binary.
831        """
832        real_path, arch = dso_info
833        objdump_path = self.objdump_paths.get(arch)
834        if not objdump_path:
835            objdump_path = ToolFinder.find_tool_path('llvm-objdump', self.ndk_path, arch)
836            if not objdump_path:
837                log_exit("Can't find llvm-objdump." + NDK_ERROR_MESSAGE)
838            self.objdump_paths[arch] = objdump_path
839
840        # Run objdump.
841        args = [objdump_path, '-dlC', '--no-show-raw-insn',
842                '--start-address=0x%x' % addr_range.start,
843                '--stop-address=0x%x' % (addr_range.end),
844                real_path]
845        if arch == 'arm' and 'llvm-objdump' in objdump_path:
846            args += ['--print-imm-hex']
847        logging.debug('disassembling: %s', ' '.join(args))
848        try:
849            subproc = subprocess.Popen(args, stdout=subprocess.PIPE)
850            (stdoutdata, _) = subproc.communicate()
851            stdoutdata = bytes_to_str(stdoutdata)
852        except OSError:
853            return None
854
855        if not stdoutdata:
856            return None
857        result = Disassembly()
858        for line in stdoutdata.split('\n'):
859            line = line.rstrip()  # Remove '\r' on Windows.
860            items = line.split(':', 1)
861            try:
862                addr = int(items[0], 16)
863            except ValueError:
864                addr = 0
865            result.lines.append((line, addr))
866        return result
867
868    def disassemble_functions(self, dso_info, sorted_addr_ranges: List[AddrRange]
869                              ) -> Optional[List[Disassembly]]:
870        """ Disassemble code for multiple addr ranges in a binary. sorted_addr_ranges should be
871            sorted by addr_range.start.
872        """
873        if not sorted_addr_ranges:
874            return []
875        real_path, arch = dso_info
876        objdump_path = self.objdump_paths.get(arch)
877        if not objdump_path:
878            objdump_path = ToolFinder.find_tool_path('llvm-objdump', self.ndk_path, arch)
879            if not objdump_path:
880                log_exit("Can't find llvm-objdump." + NDK_ERROR_MESSAGE)
881            self.objdump_paths[arch] = objdump_path
882
883        # Run objdump.
884        start_addr = sorted_addr_ranges[0].start
885        stop_addr = max(addr_range.end for addr_range in sorted_addr_ranges)
886        args = [objdump_path, '-dlC', '--no-show-raw-insn',
887                '--start-address=0x%x' % start_addr,
888                '--stop-address=0x%x' % stop_addr,
889                real_path]
890        if arch == 'arm' and 'llvm-objdump' in objdump_path:
891            args += ['--print-imm-hex']
892        try:
893            proc = subprocess.Popen(args, stdout=subprocess.PIPE, text=True)
894            result = self._parse_disassembly_for_functions(proc.stdout, sorted_addr_ranges)
895            proc.wait()
896        except OSError:
897            return None
898        return result
899
900    def _parse_disassembly_for_functions(self, fh: TextIO, sorted_addr_ranges: List[AddrRange]) -> Optional[List[Disassembly]]:
901        current_id = 0
902        in_range = False
903        result = [Disassembly() for _ in sorted_addr_ranges]
904        while True:
905            line = fh.readline()
906            if not line:
907                break
908            line = line.rstrip()  # Remove '\r\n'.
909            addr = self._get_addr_from_disassembly_line(line)
910            if current_id >= len(sorted_addr_ranges):
911                continue
912            if addr:
913                if in_range and not sorted_addr_ranges[current_id].is_in_range(addr):
914                    in_range = False
915                if not in_range:
916                    # Skip addr ranges before the current address.
917                    while current_id < len(sorted_addr_ranges) and sorted_addr_ranges[current_id].end <= addr:
918                        current_id += 1
919                    if current_id < len(sorted_addr_ranges) and sorted_addr_ranges[current_id].is_in_range(addr):
920                        in_range = True
921            if in_range:
922                result[current_id].lines.append((line, addr))
923        return result
924
925    def _get_addr_from_disassembly_line(self, line: str) -> int:
926        # line may be an instruction, like: " 24a469c: stp x29, x30, [sp, #-0x60]!" or
927        #  "ffffffc0085d9664:      	paciasp".
928        # line may be a function start point, like "00000000024a4698 <DoWork()>:".
929        items = line.strip().split()
930        if not items:
931            return 0
932        s = items[0]
933        if s.endswith(':'):
934            s = s[:-1]
935        try:
936            return int(s, 16)
937        except ValueError:
938            return 0
939
940
941class ReadElf(object):
942    """ A wrapper of readelf. """
943
944    def __init__(self, ndk_path: Optional[str]):
945        self.readelf_path = ToolFinder.find_tool_path('llvm-readelf', ndk_path)
946        if not self.readelf_path:
947            log_exit("Can't find llvm-readelf. " + NDK_ERROR_MESSAGE)
948
949    @staticmethod
950    def is_elf_file(path: Union[Path, str]) -> bool:
951        if os.path.isfile(path):
952            with open(path, 'rb') as fh:
953                return fh.read(4) == b'\x7fELF'
954        return False
955
956    def get_arch(self, elf_file_path: Union[Path, str]) -> str:
957        """ Get arch of an elf file. """
958        if self.is_elf_file(elf_file_path):
959            try:
960                output = subprocess.check_output([self.readelf_path, '-h', str(elf_file_path)])
961                output = bytes_to_str(output)
962                if output.find('AArch64') != -1:
963                    return 'arm64'
964                if output.find('ARM') != -1:
965                    return 'arm'
966                if output.find('X86-64') != -1:
967                    return 'x86_64'
968                if output.find('80386') != -1:
969                    return 'x86'
970                if output.find('RISC-V') != -1:
971                    return 'riscv64'
972            except subprocess.CalledProcessError:
973                pass
974        return 'unknown'
975
976    def get_build_id(self, elf_file_path: Union[Path, str], with_padding=True) -> str:
977        """ Get build id of an elf file. """
978        if self.is_elf_file(elf_file_path):
979            try:
980                output = subprocess.check_output([self.readelf_path, '-n', str(elf_file_path)])
981                output = bytes_to_str(output)
982                result = re.search(r'Build ID:\s*(\S+)', output)
983                if result:
984                    build_id = result.group(1)
985                    if with_padding:
986                        build_id = self.pad_build_id(build_id)
987                    return build_id
988            except subprocess.CalledProcessError:
989                pass
990        return ""
991
992    @staticmethod
993    def pad_build_id(build_id: str) -> str:
994        """ Pad build id to 40 hex numbers (20 bytes). """
995        if len(build_id) < 40:
996            build_id += '0' * (40 - len(build_id))
997        else:
998            build_id = build_id[:40]
999        return '0x' + build_id
1000
1001    @staticmethod
1002    def unpad_build_id(build_id: str) -> str:
1003        if build_id.startswith('0x'):
1004            build_id = build_id[2:]
1005            # Unpad build id as TrimZeroesFromBuildIDString() in quipper.
1006            padding = '0' * 8
1007            while build_id.endswith(padding):
1008                build_id = build_id[:-len(padding)]
1009        return build_id
1010
1011    def get_sections(self, elf_file_path: Union[Path, str]) -> List[str]:
1012        """ Get sections of an elf file. """
1013        section_names: List[str] = []
1014        if self.is_elf_file(elf_file_path):
1015            try:
1016                output = subprocess.check_output([self.readelf_path, '-SW', str(elf_file_path)])
1017                output = bytes_to_str(output)
1018                for line in output.split('\n'):
1019                    # Parse line like:" [ 1] .note.android.ident NOTE  0000000000400190 ...".
1020                    result = re.search(r'^\s+\[\s*\d+\]\s(.+?)\s', line)
1021                    if result:
1022                        section_name = result.group(1).strip()
1023                        if section_name:
1024                            section_names.append(section_name)
1025            except subprocess.CalledProcessError:
1026                pass
1027        return section_names
1028
1029
1030def extant_dir(arg: str) -> str:
1031    """ArgumentParser type that only accepts extant directories.
1032
1033    Args:
1034        arg: The string argument given on the command line.
1035    Returns: The argument as a realpath.
1036    Raises:
1037        argparse.ArgumentTypeError: The given path isn't a directory.
1038    """
1039    path = os.path.realpath(arg)
1040    if not os.path.isdir(path):
1041        raise argparse.ArgumentTypeError('{} is not a directory.'.format(path))
1042    return path
1043
1044
1045def extant_file(arg: str) -> str:
1046    """ArgumentParser type that only accepts extant files.
1047
1048    Args:
1049        arg: The string argument given on the command line.
1050    Returns: The argument as a realpath.
1051    Raises:
1052        argparse.ArgumentTypeError: The given path isn't a file.
1053    """
1054    path = os.path.realpath(arg)
1055    if not os.path.isfile(path):
1056        raise argparse.ArgumentTypeError('{} is not a file.'.format(path))
1057    return path
1058
1059
1060def log_fatal(msg: str):
1061    raise Exception(msg)
1062
1063
1064def log_exit(msg: str):
1065    sys.exit(msg)
1066
1067
1068class LogFormatter(logging.Formatter):
1069    """ Use custom logging format. """
1070
1071    def __init__(self):
1072        super().__init__('%(asctime)s [%(levelname)s] (%(filename)s:%(lineno)d) %(message)s')
1073
1074    def formatTime(self, record, datefmt):
1075        return super().formatTime(record, '%H:%M:%S') + ',%03d' % record.msecs
1076
1077
1078class Log:
1079    initialized = False
1080
1081    @classmethod
1082    def init(cls, log_level: str = 'info'):
1083        assert not cls.initialized
1084        cls.initialized = True
1085        cls.logger = logging.root
1086        cls.logger.setLevel(log_level.upper())
1087        handler = logging.StreamHandler()
1088        handler.setFormatter(LogFormatter())
1089        cls.logger.addHandler(handler)
1090
1091
1092class ArgParseFormatter(
1093        argparse.ArgumentDefaultsHelpFormatter, argparse.RawDescriptionHelpFormatter):
1094    pass
1095
1096
1097@dataclass
1098class ReportLibOptions:
1099    show_art_frames: bool
1100    remove_method: List[str]
1101    trace_offcpu: str
1102    proguard_mapping_files: List[str]
1103    sample_filters: List[str]
1104    aggregate_threads: List[str]
1105
1106
1107class BaseArgumentParser(argparse.ArgumentParser):
1108    def __init__(self, *args, **kwargs):
1109        super().__init__(*args, **kwargs, formatter_class=ArgParseFormatter)
1110        self.has_sample_filter_options = False
1111        self.sample_filter_with_pid_shortcut = False
1112        self.has_report_lib_options = False
1113
1114    def add_report_lib_options(self, group: Optional[Any] = None,
1115                               default_show_art_frames: bool = False,
1116                               sample_filter_group: Optional[Any] = None,
1117                               sample_filter_with_pid_shortcut: bool = True):
1118        self.has_report_lib_options = True
1119        parser = group if group else self
1120        parser.add_argument(
1121            '--proguard-mapping-file', nargs='+',
1122            help='Add proguard mapping file to de-obfuscate symbols')
1123        parser.add_argument('--show-art-frames', '--show_art_frames',
1124                            action=argparse.BooleanOptionalAction, default=default_show_art_frames,
1125                            help='Show frames of internal methods in the ART Java interpreter.')
1126        parser.add_argument('--remove-method', nargs='+', metavar='method_name_regex',
1127                            help='remove methods with name containing the regular expression')
1128        parser.add_argument(
1129            '--trace-offcpu', choices=['on-cpu', 'off-cpu', 'on-off-cpu', 'mixed-on-off-cpu'],
1130            help="""Set report mode for profiles recorded with --trace-offcpu option. All possible
1131                    modes are: on-cpu (only on-cpu samples), off-cpu (only off-cpu samples),
1132                    on-off-cpu (both on-cpu and off-cpu samples, can be split by event name),
1133                    mixed-on-off-cpu (on-cpu and off-cpu samples using the same event name).
1134                    If not set, mixed-on-off-cpu mode is used.
1135                """)
1136        self._add_sample_filter_options(sample_filter_group, sample_filter_with_pid_shortcut)
1137        parser.add_argument(
1138            '--aggregate-threads', nargs='+', metavar='thread_name_regex',
1139            help="""Aggregate threads with names matching the same regex. As a result, samples from
1140                    different threads (like a thread pool) can be shown in one flamegraph.
1141                """)
1142
1143    def _add_sample_filter_options(
1144            self, group: Optional[Any] = None, with_pid_shortcut: bool = True):
1145        if not group:
1146            group = self.add_argument_group('Sample filter options')
1147        group.add_argument('--cpu', nargs='+', help="""only include samples for the selected cpus.
1148                            cpu can be a number like 1, or a range like 0-3""")
1149        group.add_argument('--exclude-pid', metavar='pid', nargs='+', type=int,
1150                           help='exclude samples for selected processes')
1151        group.add_argument('--exclude-tid', metavar='tid', nargs='+', type=int,
1152                           help='exclude samples for selected threads')
1153        group.add_argument(
1154            '--exclude-process-name', metavar='process_name_regex', nargs='+',
1155            help='exclude samples for processes with name containing the regular expression')
1156        group.add_argument(
1157            '--exclude-thread-name', metavar='thread_name_regex', nargs='+',
1158            help='exclude samples for threads with name containing the regular expression')
1159
1160        if with_pid_shortcut:
1161            group.add_argument('--pid', metavar='pid', nargs='+', type=int,
1162                               help='only include samples for selected processes')
1163            group.add_argument('--tid', metavar='tid', nargs='+', type=int,
1164                               help='only include samples for selected threads')
1165        group.add_argument('--include-pid', metavar='pid', nargs='+', type=int,
1166                           help='only include samples for selected processes')
1167        group.add_argument('--include-tid', metavar='tid', nargs='+', type=int,
1168                           help='only include samples for selected threads')
1169        group.add_argument(
1170            '--include-process-name', metavar='process_name_regex', nargs='+',
1171            help='only include samples for processes with name containing the regular expression')
1172        group.add_argument(
1173            '--comm', '--include-thread-name', metavar='thread_name_regex',
1174            dest='include_thread_name', nargs='+',
1175            help='only include samples for threads with name containing the regular expression')
1176        group.add_argument(
1177            '--filter-file', metavar='file',
1178            help='use filter file to filter samples based on timestamps. ' +
1179            'The file format is in doc/sampler_filter.md.')
1180        self.has_sample_filter_options = True
1181        self.sample_filter_with_pid_shortcut = with_pid_shortcut
1182
1183    def _build_sample_filter(self, args: argparse.Namespace) -> List[str]:
1184        """ Build sample filters, which can be passed to ReportLib.SetSampleFilter(). """
1185        filters = []
1186        if args.cpu:
1187            filters.extend(['--cpu', ','.join(args.cpu)])
1188        if args.exclude_pid:
1189            filters.extend(['--exclude-pid', ','.join(str(pid) for pid in args.exclude_pid)])
1190        if args.exclude_tid:
1191            filters.extend(['--exclude-tid', ','.join(str(tid) for tid in args.exclude_tid)])
1192        if args.exclude_process_name:
1193            for name in args.exclude_process_name:
1194                filters.extend(['--exclude-process-name', name])
1195        if args.exclude_thread_name:
1196            for name in args.exclude_thread_name:
1197                filters.extend(['--exclude-thread-name', name])
1198
1199        if args.include_pid:
1200            filters.extend(['--include-pid', ','.join(str(pid) for pid in args.include_pid)])
1201        if args.include_tid:
1202            filters.extend(['--include-tid', ','.join(str(tid) for tid in args.include_tid)])
1203        if self.sample_filter_with_pid_shortcut:
1204            if args.pid:
1205                filters.extend(['--include-pid', ','.join(str(pid) for pid in args.pid)])
1206            if args.tid:
1207                filters.extend(['--include-tid', ','.join(str(pid) for pid in args.tid)])
1208        if args.include_process_name:
1209            for name in args.include_process_name:
1210                filters.extend(['--include-process-name', name])
1211        if args.include_thread_name:
1212            for name in args.include_thread_name:
1213                filters.extend(['--include-thread-name', name])
1214        if args.filter_file:
1215            filters.extend(['--filter-file', args.filter_file])
1216        return filters
1217
1218    def parse_known_args(self, *args, **kwargs):
1219        self.add_argument(
1220            '--log', choices=['debug', 'info', 'warning'],
1221            default='info', help='set log level')
1222        namespace, left_args = super().parse_known_args(*args, **kwargs)
1223
1224        if self.has_report_lib_options:
1225            sample_filters = self._build_sample_filter(namespace)
1226            report_lib_options = ReportLibOptions(
1227                namespace.show_art_frames, namespace.remove_method, namespace.trace_offcpu,
1228                namespace.proguard_mapping_file, sample_filters, namespace.aggregate_threads)
1229            setattr(namespace, 'report_lib_options', report_lib_options)
1230
1231        if not Log.initialized:
1232            Log.init(namespace.log)
1233        return namespace, left_args
1234