#!/usr/bin/env python3 # # Copyright (C) 2021 The Android Open Source Project # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # """gecko_profile_generator.py: converts perf.data to Gecko Profile Format, which can be read by https://profiler.firefox.com/. Example: ./app_profiler.py ./gecko_profile_generator.py | gzip > gecko-profile.json.gz Then open gecko-profile.json.gz in https://profiler.firefox.com/ """ from collections import Counter from dataclasses import dataclass, field import json import logging import sys from typing import List, Dict, Optional, NamedTuple, Tuple from simpleperf_report_lib import GetReportLib from simpleperf_utils import BaseArgumentParser, ReportLibOptions StringID = int StackID = int FrameID = int CategoryID = int Milliseconds = float GeckoProfile = Dict # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L156 class Frame(NamedTuple): string_id: StringID relevantForJS: bool innerWindowID: int implementation: None optimizations: None line: None column: None category: CategoryID subcategory: int # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L216 class Stack(NamedTuple): prefix_id: Optional[StackID] frame_id: FrameID category_id: CategoryID # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L90 class Sample(NamedTuple): stack_id: Optional[StackID] time_ms: Milliseconds responsiveness: int complete_stack: bool def to_json(self): return [self.stack_id, self.time_ms, self.responsiveness] # Schema: https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/profile.js#L425 # Colors must be defined in: # https://github.com/firefox-devtools/profiler/blob/50124adbfa488adba6e2674a8f2618cf34b59cd2/res/css/categories.css CATEGORIES = [ { "name": 'User', # Follow Brendan Gregg's Flamegraph convention: yellow for userland # https://github.com/brendangregg/FlameGraph/blob/810687f180f3c4929b5d965f54817a5218c9d89b/flamegraph.pl#L419 "color": 'yellow', "subcategories": ['Other'] }, { "name": 'Kernel', # Follow Brendan Gregg's Flamegraph convention: orange for kernel # https://github.com/brendangregg/FlameGraph/blob/810687f180f3c4929b5d965f54817a5218c9d89b/flamegraph.pl#L417 "color": 'orange', "subcategories": ['Other'] }, { "name": 'Native', # Follow Brendan Gregg's Flamegraph convention: yellow for userland # https://github.com/brendangregg/FlameGraph/blob/810687f180f3c4929b5d965f54817a5218c9d89b/flamegraph.pl#L419 "color": 'yellow', "subcategories": ['Other'] }, { "name": 'DEX', # Follow Brendan Gregg's Flamegraph convention: green for Java/JIT # https://github.com/brendangregg/FlameGraph/blob/810687f180f3c4929b5d965f54817a5218c9d89b/flamegraph.pl#L411 "color": 'green', "subcategories": ['Other'] }, { "name": 'OAT', # Follow Brendan Gregg's Flamegraph convention: green for Java/JIT # https://github.com/brendangregg/FlameGraph/blob/810687f180f3c4929b5d965f54817a5218c9d89b/flamegraph.pl#L411 "color": 'green', "subcategories": ['Other'] }, { "name": 'Off-CPU', # Follow Brendan Gregg's Flamegraph convention: blue for off-CPU # https://github.com/brendangregg/FlameGraph/blob/810687f180f3c4929b5d965f54817a5218c9d89b/flamegraph.pl#L470 "color": 'blue', "subcategories": ['Other'] }, # Not used by this exporter yet, but some Firefox Profiler code assumes # there is an 'Other' category by searching for a category with # color=grey, so include this. { "name": 'Other', "color": 'grey', "subcategories": ['Other'] }, { "name": 'JIT', # Follow Brendan Gregg's Flamegraph convention: green for Java/JIT # https://github.com/brendangregg/FlameGraph/blob/810687f180f3c4929b5d965f54817a5218c9d89b/flamegraph.pl#L411 "color": 'green', "subcategories": ['Other'] }, ] def is_complete_stack(stack: List[str]) -> bool: """ Check if the callstack is complete. The stack starts from root. """ for entry in stack: if ('__libc_init' in entry) or ('__start_thread' in entry): return True return False @dataclass class Thread: """A builder for a profile of a single thread. Attributes: comm: Thread command-line (name). pid: process ID of containing process. tid: thread ID. samples: Timeline of profile samples. frameTable: interned stack frame ID -> stack frame. stringTable: interned string ID -> string. stringMap: interned string -> string ID. stackTable: interned stack ID -> stack. stackMap: (stack prefix ID, leaf stack frame ID) -> interned Stack ID. frameMap: Stack Frame string -> interned Frame ID. """ comm: str pid: int tid: int samples: List[Sample] = field(default_factory=list) frameTable: List[Frame] = field(default_factory=list) stringTable: List[str] = field(default_factory=list) # TODO: this is redundant with frameTable, could we remove this? stringMap: Dict[str, int] = field(default_factory=dict) stackTable: List[Stack] = field(default_factory=list) stackMap: Dict[Tuple[Optional[int], int], int] = field(default_factory=dict) frameMap: Dict[str, int] = field(default_factory=dict) def _intern_stack(self, frame_id: int, prefix_id: Optional[int]) -> int: """Gets a matching stack, or saves the new stack. Returns a Stack ID.""" key = (prefix_id, frame_id) stack_id = self.stackMap.get(key) if stack_id is not None: return stack_id stack_id = len(self.stackTable) self.stackTable.append(Stack(prefix_id=prefix_id, frame_id=frame_id, category_id=0)) self.stackMap[key] = stack_id return stack_id def _intern_string(self, string: str) -> int: """Gets a matching string, or saves the new string. Returns a String ID.""" string_id = self.stringMap.get(string) if string_id is not None: return string_id string_id = len(self.stringTable) self.stringTable.append(string) self.stringMap[string] = string_id return string_id def _intern_frame(self, frame_str: str) -> int: """Gets a matching stack frame, or saves the new frame. Returns a Frame ID.""" frame_id = self.frameMap.get(frame_str) if frame_id is not None: return frame_id frame_id = len(self.frameTable) self.frameMap[frame_str] = frame_id string_id = self._intern_string(frame_str) category = 0 # Heuristic: kernel code contains "kallsyms" as the library name. if "kallsyms" in frame_str or ".ko" in frame_str: category = 1 # Heuristic: empirically, off-CPU profiles mostly measure off-CPU # time accounted to the linux kernel __schedule function, which # handles blocking. This only works if we have kernel symbol # (kallsyms) access though. __schedule defined here: # https://cs.android.com/android/kernel/superproject/+/common-android-mainline:common/kernel/sched/core.c;l=6593;drc=0c99414a07ddaa18d8eb4be90b551d2687cbde2f if frame_str.startswith("__schedule "): category = 5 elif ".so" in frame_str: category = 2 elif ".vdex" in frame_str: category = 3 elif ".oat" in frame_str: category = 4 # "[JIT app cache]" is returned for JIT code here: # https://cs.android.com/android/platform/superproject/+/master:system/extras/simpleperf/dso.cpp;l=551;drc=4d8137f55782cc1e8cc93e4694ba3a7159d9a2bc elif "[JIT app cache]" in frame_str: category = 7 self.frameTable.append(Frame( string_id=string_id, relevantForJS=False, innerWindowID=0, implementation=None, optimizations=None, line=None, column=None, category=category, subcategory=0, )) return frame_id def add_sample(self, comm: str, stack: List[str], time_ms: Milliseconds) -> None: """Add a timestamped stack trace sample to the thread builder. Args: comm: command-line (name) of the thread at this sample stack: sampled stack frames. Root first, leaf last. time_ms: timestamp of sample in milliseconds """ # Unix threads often don't set their name immediately upon creation. # Use the last name if self.comm != comm: self.comm = comm prefix_stack_id = None for frame in stack: frame_id = self._intern_frame(frame) prefix_stack_id = self._intern_stack(frame_id, prefix_stack_id) self.samples.append(Sample(stack_id=prefix_stack_id, time_ms=time_ms, responsiveness=0, complete_stack=is_complete_stack(stack))) def sort_samples(self) -> None: """ The samples aren't guaranteed to be in order. Sort them by time. """ self.samples.sort(key=lambda s: s.time_ms) def remove_stack_gaps(self, max_remove_gap_length: int, gap_distr: Dict[int, int]) -> None: """ Ideally all callstacks are complete. But some may be broken for different reasons. To create a smooth view in "Stack Chart", remove small gaps of broken callstacks. Args: max_remove_gap_length: the max length of continuous broken-stack samples to remove """ if max_remove_gap_length == 0: return i = 0 remove_flags = [False] * len(self.samples) while i < len(self.samples): if self.samples[i].complete_stack: i += 1 continue n = 1 while (i + n < len(self.samples)) and (not self.samples[i + n].complete_stack): n += 1 gap_distr[n] += 1 if n <= max_remove_gap_length: for j in range(i, i + n): remove_flags[j] = True i += n if True in remove_flags: old_samples = self.samples self.samples = [s for s, remove in zip(old_samples, remove_flags) if not remove] def to_json_dict(self) -> Dict: """Converts this Thread to GeckoThread JSON format.""" # Gecko profile format is row-oriented data as List[List], # And a schema for interpreting each index. # Schema: # https://github.com/firefox-devtools/profiler/blob/main/docs-developer/gecko-profile-format.md # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L230 return { "tid": self.tid, "pid": self.pid, "name": self.comm, # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L51 "markers": { "schema": { "name": 0, "startTime": 1, "endTime": 2, "phase": 3, "category": 4, "data": 5, }, "data": [], }, # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L90 "samples": { "schema": { "stack": 0, "time": 1, "responsiveness": 2, }, "data": [s.to_json() for s in self.samples], }, # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L156 "frameTable": { "schema": { "location": 0, "relevantForJS": 1, "innerWindowID": 2, "implementation": 3, "optimizations": 4, "line": 5, "column": 6, "category": 7, "subcategory": 8, }, "data": self.frameTable, }, # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L216 "stackTable": { "schema": { "prefix": 0, "frame": 1, "category": 2, }, "data": self.stackTable, }, "stringTable": self.stringTable, "registerTime": 0, "unregisterTime": None, "processType": "default", } def remove_stack_gaps(max_remove_gap_length: int, thread_map: Dict[int, Thread]) -> None: """ Remove stack gaps for each thread, and print status. """ if max_remove_gap_length == 0: return total_sample_count = 0 remove_sample_count = 0 gap_distr = Counter() for tid in list(thread_map.keys()): thread = thread_map[tid] old_n = len(thread.samples) thread.remove_stack_gaps(max_remove_gap_length, gap_distr) new_n = len(thread.samples) total_sample_count += old_n remove_sample_count += old_n - new_n if new_n == 0: del thread_map[tid] if total_sample_count != 0: logging.info('Remove stack gaps with length <= %d. %d (%.2f%%) samples are removed.', max_remove_gap_length, remove_sample_count, remove_sample_count / total_sample_count * 100 ) logging.debug('Stack gap length distribution among samples (gap_length: count): %s', gap_distr) def _gecko_profile( record_file: str, symfs_dir: Optional[str], kallsyms_file: Optional[str], report_lib_options: ReportLibOptions, max_remove_gap_length: int, percpu_samples: bool) -> GeckoProfile: """convert a simpleperf profile to gecko format""" lib = GetReportLib(record_file) lib.ShowIpForUnknownSymbol() if symfs_dir is not None: lib.SetSymfs(symfs_dir) if kallsyms_file is not None: lib.SetKallsymsFile(kallsyms_file) if percpu_samples: # Grouping samples by cpus doesn't support off cpu samples. if lib.GetSupportedTraceOffCpuModes(): report_lib_options.trace_offcpu = 'on-cpu' lib.SetReportOptions(report_lib_options) arch = lib.GetArch() meta_info = lib.MetaInfo() record_cmd = lib.GetRecordCmd() # Map from tid to Thread thread_map: Dict[int, Thread] = {} # Map from pid to process name process_names: Dict[int, str] = {} while True: sample = lib.GetNextSample() if sample is None: lib.Close() break symbol = lib.GetSymbolOfCurrentSample() callchain = lib.GetCallChainOfCurrentSample() sample_time_ms = sample.time / 1000000 stack = ['%s (in %s)' % (symbol.symbol_name, symbol.dso_name)] for i in range(callchain.nr): entry = callchain.entries[i] stack.append('%s (in %s)' % (entry.symbol.symbol_name, entry.symbol.dso_name)) # We want root first, leaf last. stack.reverse() if percpu_samples: if sample.tid == sample.pid: process_names[sample.pid] = sample.thread_comm process_name = process_names.get(sample.pid) stack = [ '%s tid %d (in %s pid %d)' % (sample.thread_comm, sample.tid, process_name, sample.pid)] + stack thread = thread_map.get(sample.cpu) if thread is None: thread = Thread(comm=f'Cpu {sample.cpu}', pid=sample.cpu, tid=sample.cpu) thread_map[sample.cpu] = thread thread.add_sample( comm=f'Cpu {sample.cpu}', stack=stack, time_ms=sample_time_ms) else: # add thread sample thread = thread_map.get(sample.tid) if thread is None: thread = Thread(comm=sample.thread_comm, pid=sample.pid, tid=sample.tid) thread_map[sample.tid] = thread thread.add_sample( comm=sample.thread_comm, stack=stack, # We are being a bit fast and loose here with time here. simpleperf # uses CLOCK_MONOTONIC by default, which doesn't use the normal unix # epoch, but rather some arbitrary time. In practice, this doesn't # matter, the Firefox Profiler normalises all the timestamps to begin at # the minimum time. Consider fixing this in future, if needed, by # setting `simpleperf record --clockid realtime`. time_ms=sample_time_ms) for thread in thread_map.values(): thread.sort_samples() remove_stack_gaps(max_remove_gap_length, thread_map) threads = [thread.to_json_dict() for thread in thread_map.values()] profile_timestamp = meta_info.get('timestamp') end_time_ms = (int(profile_timestamp) * 1000) if profile_timestamp else 0 # Schema: https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L305 gecko_profile_meta = { "interval": 1, "processType": 0, "product": record_cmd, "device": meta_info.get("product_props"), "platform": meta_info.get("android_build_fingerprint"), "stackwalk": 1, "debug": 0, "gcpoison": 0, "asyncstack": 1, # The profile timestamp is actually the end time, not the start time. # This is close enough for our purposes; I mostly just want to know which # day the profile was taken! Consider fixing this in future, if needed, # by setting `simpleperf record --clockid realtime` and taking the minimum # sample time. "startTime": end_time_ms, "shutdownTime": None, "version": 24, "presymbolicated": True, "categories": CATEGORIES, "markerSchema": [], "abi": arch, "oscpu": meta_info.get("android_build_fingerprint"), "appBuildID": meta_info.get("app_versioncode"), } # Schema: # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L377 # https://github.com/firefox-devtools/profiler/blob/main/docs-developer/gecko-profile-format.md return { "meta": gecko_profile_meta, "libs": [], "threads": threads, "processes": [], "pausedRanges": [], } def main() -> None: parser = BaseArgumentParser(description=__doc__) parser.add_argument('--symfs', help='Set the path to find binaries with symbols and debug info.') parser.add_argument('--kallsyms', help='Set the path to find kernel symbols.') parser.add_argument('-i', '--record_file', nargs='?', default='perf.data', help='Default is perf.data.') parser.add_argument('--remove-gaps', metavar='MAX_GAP_LENGTH', dest='max_remove_gap_length', type=int, default=3, help=""" Ideally all callstacks are complete. But some may be broken for different reasons. To create a smooth view in "Stack Chart", remove small gaps of broken callstacks. MAX_GAP_LENGTH is the max length of continuous broken-stack samples we want to remove. """ ) parser.add_argument( '--percpu-samples', action='store_true', help='show samples based on cpus instead of threads') parser.add_report_lib_options() args = parser.parse_args() profile = _gecko_profile( record_file=args.record_file, symfs_dir=args.symfs, kallsyms_file=args.kallsyms, report_lib_options=args.report_lib_options, max_remove_gap_length=args.max_remove_gap_length, percpu_samples=args.percpu_samples, ) json.dump(profile, sys.stdout, sort_keys=True) if __name__ == '__main__': main()