1#!/usr/bin/env python3 2# 3# Copyright (C) 2021 The Android Open Source Project 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16# 17 18"""gecko_profile_generator.py: converts perf.data to Gecko Profile Format, 19 which can be read by https://profiler.firefox.com/. 20 21 Example: 22 ./app_profiler.py 23 ./gecko_profile_generator.py | gzip > gecko-profile.json.gz 24 25 Then open gecko-profile.json.gz in https://profiler.firefox.com/ 26""" 27 28from collections import Counter 29from dataclasses import dataclass, field 30import json 31import logging 32import sys 33from typing import List, Dict, Optional, NamedTuple, Tuple 34 35from simpleperf_report_lib import GetReportLib 36from simpleperf_utils import BaseArgumentParser, ReportLibOptions 37 38 39StringID = int 40StackID = int 41FrameID = int 42CategoryID = int 43Milliseconds = float 44GeckoProfile = Dict 45 46 47# https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L156 48class Frame(NamedTuple): 49 string_id: StringID 50 relevantForJS: bool 51 innerWindowID: int 52 implementation: None 53 optimizations: None 54 line: None 55 column: None 56 category: CategoryID 57 subcategory: int 58 59 60# https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L216 61class Stack(NamedTuple): 62 prefix_id: Optional[StackID] 63 frame_id: FrameID 64 category_id: CategoryID 65 66 67# https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L90 68class Sample(NamedTuple): 69 stack_id: Optional[StackID] 70 time_ms: Milliseconds 71 responsiveness: int 72 complete_stack: bool 73 74 def to_json(self): 75 return [self.stack_id, self.time_ms, self.responsiveness] 76 77 78# Schema: https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/profile.js#L425 79# Colors must be defined in: 80# https://github.com/firefox-devtools/profiler/blob/50124adbfa488adba6e2674a8f2618cf34b59cd2/res/css/categories.css 81CATEGORIES = [ 82 { 83 "name": 'User', 84 # Follow Brendan Gregg's Flamegraph convention: yellow for userland 85 # https://github.com/brendangregg/FlameGraph/blob/810687f180f3c4929b5d965f54817a5218c9d89b/flamegraph.pl#L419 86 "color": 'yellow', 87 "subcategories": ['Other'] 88 }, 89 { 90 "name": 'Kernel', 91 # Follow Brendan Gregg's Flamegraph convention: orange for kernel 92 # https://github.com/brendangregg/FlameGraph/blob/810687f180f3c4929b5d965f54817a5218c9d89b/flamegraph.pl#L417 93 "color": 'orange', 94 "subcategories": ['Other'] 95 }, 96 { 97 "name": 'Native', 98 # Follow Brendan Gregg's Flamegraph convention: yellow for userland 99 # https://github.com/brendangregg/FlameGraph/blob/810687f180f3c4929b5d965f54817a5218c9d89b/flamegraph.pl#L419 100 "color": 'yellow', 101 "subcategories": ['Other'] 102 }, 103 { 104 "name": 'DEX', 105 # Follow Brendan Gregg's Flamegraph convention: green for Java/JIT 106 # https://github.com/brendangregg/FlameGraph/blob/810687f180f3c4929b5d965f54817a5218c9d89b/flamegraph.pl#L411 107 "color": 'green', 108 "subcategories": ['Other'] 109 }, 110 { 111 "name": 'OAT', 112 # Follow Brendan Gregg's Flamegraph convention: green for Java/JIT 113 # https://github.com/brendangregg/FlameGraph/blob/810687f180f3c4929b5d965f54817a5218c9d89b/flamegraph.pl#L411 114 "color": 'green', 115 "subcategories": ['Other'] 116 }, 117 { 118 "name": 'Off-CPU', 119 # Follow Brendan Gregg's Flamegraph convention: blue for off-CPU 120 # https://github.com/brendangregg/FlameGraph/blob/810687f180f3c4929b5d965f54817a5218c9d89b/flamegraph.pl#L470 121 "color": 'blue', 122 "subcategories": ['Other'] 123 }, 124 # Not used by this exporter yet, but some Firefox Profiler code assumes 125 # there is an 'Other' category by searching for a category with 126 # color=grey, so include this. 127 { 128 "name": 'Other', 129 "color": 'grey', 130 "subcategories": ['Other'] 131 }, 132 { 133 "name": 'JIT', 134 # Follow Brendan Gregg's Flamegraph convention: green for Java/JIT 135 # https://github.com/brendangregg/FlameGraph/blob/810687f180f3c4929b5d965f54817a5218c9d89b/flamegraph.pl#L411 136 "color": 'green', 137 "subcategories": ['Other'] 138 }, 139] 140 141 142def is_complete_stack(stack: List[str]) -> bool: 143 """ Check if the callstack is complete. The stack starts from root. """ 144 for entry in stack: 145 if ('__libc_init' in entry) or ('__start_thread' in entry): 146 return True 147 return False 148 149 150@dataclass 151class Thread: 152 """A builder for a profile of a single thread. 153 154 Attributes: 155 comm: Thread command-line (name). 156 pid: process ID of containing process. 157 tid: thread ID. 158 samples: Timeline of profile samples. 159 frameTable: interned stack frame ID -> stack frame. 160 stringTable: interned string ID -> string. 161 stringMap: interned string -> string ID. 162 stackTable: interned stack ID -> stack. 163 stackMap: (stack prefix ID, leaf stack frame ID) -> interned Stack ID. 164 frameMap: Stack Frame string -> interned Frame ID. 165 """ 166 comm: str 167 pid: int 168 tid: int 169 samples: List[Sample] = field(default_factory=list) 170 frameTable: List[Frame] = field(default_factory=list) 171 stringTable: List[str] = field(default_factory=list) 172 # TODO: this is redundant with frameTable, could we remove this? 173 stringMap: Dict[str, int] = field(default_factory=dict) 174 stackTable: List[Stack] = field(default_factory=list) 175 stackMap: Dict[Tuple[Optional[int], int], int] = field(default_factory=dict) 176 frameMap: Dict[str, int] = field(default_factory=dict) 177 178 def _intern_stack(self, frame_id: int, prefix_id: Optional[int]) -> int: 179 """Gets a matching stack, or saves the new stack. Returns a Stack ID.""" 180 key = (prefix_id, frame_id) 181 stack_id = self.stackMap.get(key) 182 if stack_id is not None: 183 return stack_id 184 stack_id = len(self.stackTable) 185 self.stackTable.append(Stack(prefix_id=prefix_id, 186 frame_id=frame_id, 187 category_id=0)) 188 self.stackMap[key] = stack_id 189 return stack_id 190 191 def _intern_string(self, string: str) -> int: 192 """Gets a matching string, or saves the new string. Returns a String ID.""" 193 string_id = self.stringMap.get(string) 194 if string_id is not None: 195 return string_id 196 string_id = len(self.stringTable) 197 self.stringTable.append(string) 198 self.stringMap[string] = string_id 199 return string_id 200 201 def _intern_frame(self, frame_str: str) -> int: 202 """Gets a matching stack frame, or saves the new frame. Returns a Frame ID.""" 203 frame_id = self.frameMap.get(frame_str) 204 if frame_id is not None: 205 return frame_id 206 frame_id = len(self.frameTable) 207 self.frameMap[frame_str] = frame_id 208 string_id = self._intern_string(frame_str) 209 210 category = 0 211 # Heuristic: kernel code contains "kallsyms" as the library name. 212 if "kallsyms" in frame_str or ".ko" in frame_str: 213 category = 1 214 # Heuristic: empirically, off-CPU profiles mostly measure off-CPU 215 # time accounted to the linux kernel __schedule function, which 216 # handles blocking. This only works if we have kernel symbol 217 # (kallsyms) access though. __schedule defined here: 218 # https://cs.android.com/android/kernel/superproject/+/common-android-mainline:common/kernel/sched/core.c;l=6593;drc=0c99414a07ddaa18d8eb4be90b551d2687cbde2f 219 if frame_str.startswith("__schedule "): 220 category = 5 221 elif ".so" in frame_str: 222 category = 2 223 elif ".vdex" in frame_str: 224 category = 3 225 elif ".oat" in frame_str: 226 category = 4 227 # "[JIT app cache]" is returned for JIT code here: 228 # https://cs.android.com/android/platform/superproject/+/master:system/extras/simpleperf/dso.cpp;l=551;drc=4d8137f55782cc1e8cc93e4694ba3a7159d9a2bc 229 elif "[JIT app cache]" in frame_str: 230 category = 7 231 232 self.frameTable.append(Frame( 233 string_id=string_id, 234 relevantForJS=False, 235 innerWindowID=0, 236 implementation=None, 237 optimizations=None, 238 line=None, 239 column=None, 240 category=category, 241 subcategory=0, 242 )) 243 return frame_id 244 245 def add_sample(self, comm: str, stack: List[str], time_ms: Milliseconds) -> None: 246 """Add a timestamped stack trace sample to the thread builder. 247 248 Args: 249 comm: command-line (name) of the thread at this sample 250 stack: sampled stack frames. Root first, leaf last. 251 time_ms: timestamp of sample in milliseconds 252 """ 253 # Unix threads often don't set their name immediately upon creation. 254 # Use the last name 255 if self.comm != comm: 256 self.comm = comm 257 258 prefix_stack_id = None 259 for frame in stack: 260 frame_id = self._intern_frame(frame) 261 prefix_stack_id = self._intern_stack(frame_id, prefix_stack_id) 262 263 self.samples.append(Sample(stack_id=prefix_stack_id, 264 time_ms=time_ms, 265 responsiveness=0, 266 complete_stack=is_complete_stack(stack))) 267 268 def sort_samples(self) -> None: 269 """ The samples aren't guaranteed to be in order. Sort them by time. """ 270 self.samples.sort(key=lambda s: s.time_ms) 271 272 def remove_stack_gaps(self, max_remove_gap_length: int, gap_distr: Dict[int, int]) -> None: 273 """ Ideally all callstacks are complete. But some may be broken for different reasons. 274 To create a smooth view in "Stack Chart", remove small gaps of broken callstacks. 275 276 Args: 277 max_remove_gap_length: the max length of continuous broken-stack samples to remove 278 """ 279 if max_remove_gap_length == 0: 280 return 281 i = 0 282 remove_flags = [False] * len(self.samples) 283 while i < len(self.samples): 284 if self.samples[i].complete_stack: 285 i += 1 286 continue 287 n = 1 288 while (i + n < len(self.samples)) and (not self.samples[i + n].complete_stack): 289 n += 1 290 gap_distr[n] += 1 291 if n <= max_remove_gap_length: 292 for j in range(i, i + n): 293 remove_flags[j] = True 294 i += n 295 if True in remove_flags: 296 old_samples = self.samples 297 self.samples = [s for s, remove in zip(old_samples, remove_flags) if not remove] 298 299 def to_json_dict(self) -> Dict: 300 """Converts this Thread to GeckoThread JSON format.""" 301 302 # Gecko profile format is row-oriented data as List[List], 303 # And a schema for interpreting each index. 304 # Schema: 305 # https://github.com/firefox-devtools/profiler/blob/main/docs-developer/gecko-profile-format.md 306 # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L230 307 return { 308 "tid": self.tid, 309 "pid": self.pid, 310 "name": self.comm, 311 # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L51 312 "markers": { 313 "schema": { 314 "name": 0, 315 "startTime": 1, 316 "endTime": 2, 317 "phase": 3, 318 "category": 4, 319 "data": 5, 320 }, 321 "data": [], 322 }, 323 # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L90 324 "samples": { 325 "schema": { 326 "stack": 0, 327 "time": 1, 328 "responsiveness": 2, 329 }, 330 "data": [s.to_json() for s in self.samples], 331 }, 332 # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L156 333 "frameTable": { 334 "schema": { 335 "location": 0, 336 "relevantForJS": 1, 337 "innerWindowID": 2, 338 "implementation": 3, 339 "optimizations": 4, 340 "line": 5, 341 "column": 6, 342 "category": 7, 343 "subcategory": 8, 344 }, 345 "data": self.frameTable, 346 }, 347 # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L216 348 "stackTable": { 349 "schema": { 350 "prefix": 0, 351 "frame": 1, 352 "category": 2, 353 }, 354 "data": self.stackTable, 355 }, 356 "stringTable": self.stringTable, 357 "registerTime": 0, 358 "unregisterTime": None, 359 "processType": "default", 360 } 361 362 363def remove_stack_gaps(max_remove_gap_length: int, thread_map: Dict[int, Thread]) -> None: 364 """ Remove stack gaps for each thread, and print status. """ 365 if max_remove_gap_length == 0: 366 return 367 total_sample_count = 0 368 remove_sample_count = 0 369 gap_distr = Counter() 370 for tid in list(thread_map.keys()): 371 thread = thread_map[tid] 372 old_n = len(thread.samples) 373 thread.remove_stack_gaps(max_remove_gap_length, gap_distr) 374 new_n = len(thread.samples) 375 total_sample_count += old_n 376 remove_sample_count += old_n - new_n 377 if new_n == 0: 378 del thread_map[tid] 379 if total_sample_count != 0: 380 logging.info('Remove stack gaps with length <= %d. %d (%.2f%%) samples are removed.', 381 max_remove_gap_length, remove_sample_count, 382 remove_sample_count / total_sample_count * 100 383 ) 384 logging.debug('Stack gap length distribution among samples (gap_length: count): %s', 385 gap_distr) 386 387 388def _gecko_profile( 389 record_file: str, 390 symfs_dir: Optional[str], 391 kallsyms_file: Optional[str], 392 report_lib_options: ReportLibOptions, 393 max_remove_gap_length: int, 394 percpu_samples: bool) -> GeckoProfile: 395 """convert a simpleperf profile to gecko format""" 396 lib = GetReportLib(record_file) 397 398 lib.ShowIpForUnknownSymbol() 399 if symfs_dir is not None: 400 lib.SetSymfs(symfs_dir) 401 if kallsyms_file is not None: 402 lib.SetKallsymsFile(kallsyms_file) 403 if percpu_samples: 404 # Grouping samples by cpus doesn't support off cpu samples. 405 if lib.GetSupportedTraceOffCpuModes(): 406 report_lib_options.trace_offcpu = 'on-cpu' 407 lib.SetReportOptions(report_lib_options) 408 409 arch = lib.GetArch() 410 meta_info = lib.MetaInfo() 411 record_cmd = lib.GetRecordCmd() 412 413 # Map from tid to Thread 414 thread_map: Dict[int, Thread] = {} 415 # Map from pid to process name 416 process_names: Dict[int, str] = {} 417 418 while True: 419 sample = lib.GetNextSample() 420 if sample is None: 421 lib.Close() 422 break 423 symbol = lib.GetSymbolOfCurrentSample() 424 callchain = lib.GetCallChainOfCurrentSample() 425 sample_time_ms = sample.time / 1000000 426 427 stack = ['%s (in %s)' % (symbol.symbol_name, symbol.dso_name)] 428 for i in range(callchain.nr): 429 entry = callchain.entries[i] 430 stack.append('%s (in %s)' % (entry.symbol.symbol_name, entry.symbol.dso_name)) 431 # We want root first, leaf last. 432 stack.reverse() 433 434 if percpu_samples: 435 if sample.tid == sample.pid: 436 process_names[sample.pid] = sample.thread_comm 437 process_name = process_names.get(sample.pid) 438 stack = [ 439 '%s tid %d (in %s pid %d)' % 440 (sample.thread_comm, sample.tid, process_name, sample.pid)] + stack 441 thread = thread_map.get(sample.cpu) 442 if thread is None: 443 thread = Thread(comm=f'Cpu {sample.cpu}', pid=sample.cpu, tid=sample.cpu) 444 thread_map[sample.cpu] = thread 445 thread.add_sample( 446 comm=f'Cpu {sample.cpu}', 447 stack=stack, 448 time_ms=sample_time_ms) 449 else: 450 # add thread sample 451 thread = thread_map.get(sample.tid) 452 if thread is None: 453 thread = Thread(comm=sample.thread_comm, pid=sample.pid, tid=sample.tid) 454 thread_map[sample.tid] = thread 455 thread.add_sample( 456 comm=sample.thread_comm, 457 stack=stack, 458 # We are being a bit fast and loose here with time here. simpleperf 459 # uses CLOCK_MONOTONIC by default, which doesn't use the normal unix 460 # epoch, but rather some arbitrary time. In practice, this doesn't 461 # matter, the Firefox Profiler normalises all the timestamps to begin at 462 # the minimum time. Consider fixing this in future, if needed, by 463 # setting `simpleperf record --clockid realtime`. 464 time_ms=sample_time_ms) 465 466 for thread in thread_map.values(): 467 thread.sort_samples() 468 469 remove_stack_gaps(max_remove_gap_length, thread_map) 470 471 threads = [thread.to_json_dict() for thread in thread_map.values()] 472 473 profile_timestamp = meta_info.get('timestamp') 474 end_time_ms = (int(profile_timestamp) * 1000) if profile_timestamp else 0 475 476 # Schema: https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L305 477 gecko_profile_meta = { 478 "interval": 1, 479 "processType": 0, 480 "product": record_cmd, 481 "device": meta_info.get("product_props"), 482 "platform": meta_info.get("android_build_fingerprint"), 483 "stackwalk": 1, 484 "debug": 0, 485 "gcpoison": 0, 486 "asyncstack": 1, 487 # The profile timestamp is actually the end time, not the start time. 488 # This is close enough for our purposes; I mostly just want to know which 489 # day the profile was taken! Consider fixing this in future, if needed, 490 # by setting `simpleperf record --clockid realtime` and taking the minimum 491 # sample time. 492 "startTime": end_time_ms, 493 "shutdownTime": None, 494 "version": 24, 495 "presymbolicated": True, 496 "categories": CATEGORIES, 497 "markerSchema": [], 498 "abi": arch, 499 "oscpu": meta_info.get("android_build_fingerprint"), 500 "appBuildID": meta_info.get("app_versioncode"), 501 } 502 503 # Schema: 504 # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L377 505 # https://github.com/firefox-devtools/profiler/blob/main/docs-developer/gecko-profile-format.md 506 return { 507 "meta": gecko_profile_meta, 508 "libs": [], 509 "threads": threads, 510 "processes": [], 511 "pausedRanges": [], 512 } 513 514 515def main() -> None: 516 parser = BaseArgumentParser(description=__doc__) 517 parser.add_argument('--symfs', 518 help='Set the path to find binaries with symbols and debug info.') 519 parser.add_argument('--kallsyms', help='Set the path to find kernel symbols.') 520 parser.add_argument('-i', '--record_file', nargs='?', default='perf.data', 521 help='Default is perf.data.') 522 parser.add_argument('--remove-gaps', metavar='MAX_GAP_LENGTH', dest='max_remove_gap_length', 523 type=int, default=3, help=""" 524 Ideally all callstacks are complete. But some may be broken for different 525 reasons. To create a smooth view in "Stack Chart", remove small gaps of 526 broken callstacks. MAX_GAP_LENGTH is the max length of continuous 527 broken-stack samples we want to remove. 528 """ 529 ) 530 parser.add_argument( 531 '--percpu-samples', action='store_true', 532 help='show samples based on cpus instead of threads') 533 parser.add_report_lib_options() 534 args = parser.parse_args() 535 profile = _gecko_profile( 536 record_file=args.record_file, 537 symfs_dir=args.symfs, 538 kallsyms_file=args.kallsyms, 539 report_lib_options=args.report_lib_options, 540 max_remove_gap_length=args.max_remove_gap_length, 541 percpu_samples=args.percpu_samples, 542 ) 543 544 json.dump(profile, sys.stdout, sort_keys=True) 545 546 547if __name__ == '__main__': 548 main() 549