1#!/usr/bin/env python3
2#
3# Copyright (C) 2021 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16#
17
18"""gecko_profile_generator.py: converts perf.data to Gecko Profile Format,
19    which can be read by https://profiler.firefox.com/.
20
21  Example:
22    ./app_profiler.py
23    ./gecko_profile_generator.py | gzip > gecko-profile.json.gz
24
25  Then open gecko-profile.json.gz in https://profiler.firefox.com/
26"""
27
28from collections import Counter
29from dataclasses import dataclass, field
30import json
31import logging
32import sys
33from typing import List, Dict, Optional, NamedTuple, Tuple
34
35from simpleperf_report_lib import GetReportLib
36from simpleperf_utils import BaseArgumentParser, ReportLibOptions
37
38
39StringID = int
40StackID = int
41FrameID = int
42CategoryID = int
43Milliseconds = float
44GeckoProfile = Dict
45
46
47# https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L156
48class Frame(NamedTuple):
49    string_id: StringID
50    relevantForJS: bool
51    innerWindowID: int
52    implementation: None
53    optimizations: None
54    line: None
55    column: None
56    category: CategoryID
57    subcategory: int
58
59
60# https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L216
61class Stack(NamedTuple):
62    prefix_id: Optional[StackID]
63    frame_id: FrameID
64    category_id: CategoryID
65
66
67# https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L90
68class Sample(NamedTuple):
69    stack_id: Optional[StackID]
70    time_ms: Milliseconds
71    responsiveness: int
72    complete_stack: bool
73
74    def to_json(self):
75        return [self.stack_id, self.time_ms, self.responsiveness]
76
77
78# Schema: https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/profile.js#L425
79# Colors must be defined in:
80# https://github.com/firefox-devtools/profiler/blob/50124adbfa488adba6e2674a8f2618cf34b59cd2/res/css/categories.css
81CATEGORIES = [
82    {
83        "name": 'User',
84        # Follow Brendan Gregg's Flamegraph convention: yellow for userland
85        # https://github.com/brendangregg/FlameGraph/blob/810687f180f3c4929b5d965f54817a5218c9d89b/flamegraph.pl#L419
86        "color": 'yellow',
87        "subcategories": ['Other']
88    },
89    {
90        "name": 'Kernel',
91        # Follow Brendan Gregg's Flamegraph convention: orange for kernel
92        # https://github.com/brendangregg/FlameGraph/blob/810687f180f3c4929b5d965f54817a5218c9d89b/flamegraph.pl#L417
93        "color": 'orange',
94        "subcategories": ['Other']
95    },
96    {
97        "name": 'Native',
98        # Follow Brendan Gregg's Flamegraph convention: yellow for userland
99        # https://github.com/brendangregg/FlameGraph/blob/810687f180f3c4929b5d965f54817a5218c9d89b/flamegraph.pl#L419
100        "color": 'yellow',
101        "subcategories": ['Other']
102    },
103    {
104        "name": 'DEX',
105        # Follow Brendan Gregg's Flamegraph convention: green for Java/JIT
106        # https://github.com/brendangregg/FlameGraph/blob/810687f180f3c4929b5d965f54817a5218c9d89b/flamegraph.pl#L411
107        "color": 'green',
108        "subcategories": ['Other']
109    },
110    {
111        "name": 'OAT',
112        # Follow Brendan Gregg's Flamegraph convention: green for Java/JIT
113        # https://github.com/brendangregg/FlameGraph/blob/810687f180f3c4929b5d965f54817a5218c9d89b/flamegraph.pl#L411
114        "color": 'green',
115        "subcategories": ['Other']
116    },
117    {
118        "name": 'Off-CPU',
119        # Follow Brendan Gregg's Flamegraph convention: blue for off-CPU
120        # https://github.com/brendangregg/FlameGraph/blob/810687f180f3c4929b5d965f54817a5218c9d89b/flamegraph.pl#L470
121        "color": 'blue',
122        "subcategories": ['Other']
123    },
124    # Not used by this exporter yet, but some Firefox Profiler code assumes
125    # there is an 'Other' category by searching for a category with
126    # color=grey, so include this.
127    {
128        "name": 'Other',
129        "color": 'grey',
130        "subcategories": ['Other']
131    },
132    {
133        "name": 'JIT',
134        # Follow Brendan Gregg's Flamegraph convention: green for Java/JIT
135        # https://github.com/brendangregg/FlameGraph/blob/810687f180f3c4929b5d965f54817a5218c9d89b/flamegraph.pl#L411
136        "color": 'green',
137        "subcategories": ['Other']
138    },
139]
140
141
142def is_complete_stack(stack: List[str]) -> bool:
143    """ Check if the callstack is complete. The stack starts from root. """
144    for entry in stack:
145        if ('__libc_init' in entry) or ('__start_thread' in entry):
146            return True
147    return False
148
149
150@dataclass
151class Thread:
152    """A builder for a profile of a single thread.
153
154    Attributes:
155      comm: Thread command-line (name).
156      pid: process ID of containing process.
157      tid: thread ID.
158      samples: Timeline of profile samples.
159      frameTable: interned stack frame ID -> stack frame.
160      stringTable: interned string ID -> string.
161      stringMap: interned string -> string ID.
162      stackTable: interned stack ID -> stack.
163      stackMap: (stack prefix ID, leaf stack frame ID) -> interned Stack ID.
164      frameMap: Stack Frame string -> interned Frame ID.
165    """
166    comm: str
167    pid: int
168    tid: int
169    samples: List[Sample] = field(default_factory=list)
170    frameTable: List[Frame] = field(default_factory=list)
171    stringTable: List[str] = field(default_factory=list)
172    # TODO: this is redundant with frameTable, could we remove this?
173    stringMap: Dict[str, int] = field(default_factory=dict)
174    stackTable: List[Stack] = field(default_factory=list)
175    stackMap: Dict[Tuple[Optional[int], int], int] = field(default_factory=dict)
176    frameMap: Dict[str, int] = field(default_factory=dict)
177
178    def _intern_stack(self, frame_id: int, prefix_id: Optional[int]) -> int:
179        """Gets a matching stack, or saves the new stack. Returns a Stack ID."""
180        key = (prefix_id, frame_id)
181        stack_id = self.stackMap.get(key)
182        if stack_id is not None:
183            return stack_id
184        stack_id = len(self.stackTable)
185        self.stackTable.append(Stack(prefix_id=prefix_id,
186                                     frame_id=frame_id,
187                                     category_id=0))
188        self.stackMap[key] = stack_id
189        return stack_id
190
191    def _intern_string(self, string: str) -> int:
192        """Gets a matching string, or saves the new string. Returns a String ID."""
193        string_id = self.stringMap.get(string)
194        if string_id is not None:
195            return string_id
196        string_id = len(self.stringTable)
197        self.stringTable.append(string)
198        self.stringMap[string] = string_id
199        return string_id
200
201    def _intern_frame(self, frame_str: str) -> int:
202        """Gets a matching stack frame, or saves the new frame. Returns a Frame ID."""
203        frame_id = self.frameMap.get(frame_str)
204        if frame_id is not None:
205            return frame_id
206        frame_id = len(self.frameTable)
207        self.frameMap[frame_str] = frame_id
208        string_id = self._intern_string(frame_str)
209
210        category = 0
211        # Heuristic: kernel code contains "kallsyms" as the library name.
212        if "kallsyms" in frame_str or ".ko" in frame_str:
213            category = 1
214            # Heuristic: empirically, off-CPU profiles mostly measure off-CPU
215            # time accounted to the linux kernel __schedule function, which
216            # handles blocking. This only works if we have kernel symbol
217            # (kallsyms) access though.  __schedule defined here:
218            # https://cs.android.com/android/kernel/superproject/+/common-android-mainline:common/kernel/sched/core.c;l=6593;drc=0c99414a07ddaa18d8eb4be90b551d2687cbde2f
219            if frame_str.startswith("__schedule "):
220                category = 5
221        elif ".so" in frame_str:
222            category = 2
223        elif ".vdex" in frame_str:
224            category = 3
225        elif ".oat" in frame_str:
226            category = 4
227        # "[JIT app cache]" is returned for JIT code here:
228        # https://cs.android.com/android/platform/superproject/+/master:system/extras/simpleperf/dso.cpp;l=551;drc=4d8137f55782cc1e8cc93e4694ba3a7159d9a2bc
229        elif "[JIT app cache]" in frame_str:
230            category = 7
231
232        self.frameTable.append(Frame(
233            string_id=string_id,
234            relevantForJS=False,
235            innerWindowID=0,
236            implementation=None,
237            optimizations=None,
238            line=None,
239            column=None,
240            category=category,
241            subcategory=0,
242        ))
243        return frame_id
244
245    def add_sample(self, comm: str, stack: List[str], time_ms: Milliseconds) -> None:
246        """Add a timestamped stack trace sample to the thread builder.
247
248        Args:
249          comm: command-line (name) of the thread at this sample
250          stack: sampled stack frames. Root first, leaf last.
251          time_ms: timestamp of sample in milliseconds
252        """
253        # Unix threads often don't set their name immediately upon creation.
254        # Use the last name
255        if self.comm != comm:
256            self.comm = comm
257
258        prefix_stack_id = None
259        for frame in stack:
260            frame_id = self._intern_frame(frame)
261            prefix_stack_id = self._intern_stack(frame_id, prefix_stack_id)
262
263        self.samples.append(Sample(stack_id=prefix_stack_id,
264                                   time_ms=time_ms,
265                                   responsiveness=0,
266                                   complete_stack=is_complete_stack(stack)))
267
268    def sort_samples(self) -> None:
269        """ The samples aren't guaranteed to be in order. Sort them by time. """
270        self.samples.sort(key=lambda s: s.time_ms)
271
272    def remove_stack_gaps(self, max_remove_gap_length: int, gap_distr: Dict[int, int]) -> None:
273        """ Ideally all callstacks are complete. But some may be broken for different reasons.
274            To create a smooth view in "Stack Chart", remove small gaps of broken callstacks.
275
276        Args:
277            max_remove_gap_length: the max length of continuous broken-stack samples to remove
278        """
279        if max_remove_gap_length == 0:
280            return
281        i = 0
282        remove_flags = [False] * len(self.samples)
283        while i < len(self.samples):
284            if self.samples[i].complete_stack:
285                i += 1
286                continue
287            n = 1
288            while (i + n < len(self.samples)) and (not self.samples[i + n].complete_stack):
289                n += 1
290            gap_distr[n] += 1
291            if n <= max_remove_gap_length:
292                for j in range(i, i + n):
293                    remove_flags[j] = True
294            i += n
295        if True in remove_flags:
296            old_samples = self.samples
297            self.samples = [s for s, remove in zip(old_samples, remove_flags) if not remove]
298
299    def to_json_dict(self) -> Dict:
300        """Converts this Thread to GeckoThread JSON format."""
301
302        # Gecko profile format is row-oriented data as List[List],
303        # And a schema for interpreting each index.
304        # Schema:
305        # https://github.com/firefox-devtools/profiler/blob/main/docs-developer/gecko-profile-format.md
306        # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L230
307        return {
308            "tid": self.tid,
309            "pid": self.pid,
310            "name": self.comm,
311            # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L51
312            "markers": {
313                "schema": {
314                    "name": 0,
315                    "startTime": 1,
316                    "endTime": 2,
317                    "phase": 3,
318                    "category": 4,
319                    "data": 5,
320                },
321                "data": [],
322            },
323            # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L90
324            "samples": {
325                "schema": {
326                    "stack": 0,
327                    "time": 1,
328                    "responsiveness": 2,
329                },
330                "data": [s.to_json() for s in self.samples],
331            },
332            # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L156
333            "frameTable": {
334                "schema": {
335                    "location": 0,
336                    "relevantForJS": 1,
337                    "innerWindowID": 2,
338                    "implementation": 3,
339                    "optimizations": 4,
340                    "line": 5,
341                    "column": 6,
342                    "category": 7,
343                    "subcategory": 8,
344                },
345                "data": self.frameTable,
346            },
347            # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L216
348            "stackTable": {
349                "schema": {
350                    "prefix": 0,
351                    "frame": 1,
352                    "category": 2,
353                },
354                "data": self.stackTable,
355            },
356            "stringTable": self.stringTable,
357            "registerTime": 0,
358            "unregisterTime": None,
359            "processType": "default",
360        }
361
362
363def remove_stack_gaps(max_remove_gap_length: int, thread_map: Dict[int, Thread]) -> None:
364    """ Remove stack gaps for each thread, and print status. """
365    if max_remove_gap_length == 0:
366        return
367    total_sample_count = 0
368    remove_sample_count = 0
369    gap_distr = Counter()
370    for tid in list(thread_map.keys()):
371        thread = thread_map[tid]
372        old_n = len(thread.samples)
373        thread.remove_stack_gaps(max_remove_gap_length, gap_distr)
374        new_n = len(thread.samples)
375        total_sample_count += old_n
376        remove_sample_count += old_n - new_n
377        if new_n == 0:
378            del thread_map[tid]
379    if total_sample_count != 0:
380        logging.info('Remove stack gaps with length <= %d. %d (%.2f%%) samples are removed.',
381                     max_remove_gap_length, remove_sample_count,
382                     remove_sample_count / total_sample_count * 100
383                     )
384        logging.debug('Stack gap length distribution among samples (gap_length: count): %s',
385                      gap_distr)
386
387
388def _gecko_profile(
389        record_file: str,
390        symfs_dir: Optional[str],
391        kallsyms_file: Optional[str],
392        report_lib_options: ReportLibOptions,
393        max_remove_gap_length: int,
394        percpu_samples: bool) -> GeckoProfile:
395    """convert a simpleperf profile to gecko format"""
396    lib = GetReportLib(record_file)
397
398    lib.ShowIpForUnknownSymbol()
399    if symfs_dir is not None:
400        lib.SetSymfs(symfs_dir)
401    if kallsyms_file is not None:
402        lib.SetKallsymsFile(kallsyms_file)
403    if percpu_samples:
404        # Grouping samples by cpus doesn't support off cpu samples.
405        if lib.GetSupportedTraceOffCpuModes():
406            report_lib_options.trace_offcpu = 'on-cpu'
407    lib.SetReportOptions(report_lib_options)
408
409    arch = lib.GetArch()
410    meta_info = lib.MetaInfo()
411    record_cmd = lib.GetRecordCmd()
412
413    # Map from tid to Thread
414    thread_map: Dict[int, Thread] = {}
415    # Map from pid to process name
416    process_names: Dict[int, str] = {}
417
418    while True:
419        sample = lib.GetNextSample()
420        if sample is None:
421            lib.Close()
422            break
423        symbol = lib.GetSymbolOfCurrentSample()
424        callchain = lib.GetCallChainOfCurrentSample()
425        sample_time_ms = sample.time / 1000000
426
427        stack = ['%s (in %s)' % (symbol.symbol_name, symbol.dso_name)]
428        for i in range(callchain.nr):
429            entry = callchain.entries[i]
430            stack.append('%s (in %s)' % (entry.symbol.symbol_name, entry.symbol.dso_name))
431        # We want root first, leaf last.
432        stack.reverse()
433
434        if percpu_samples:
435            if sample.tid == sample.pid:
436                process_names[sample.pid] = sample.thread_comm
437            process_name = process_names.get(sample.pid)
438            stack = [
439                '%s tid %d (in %s pid %d)' %
440                (sample.thread_comm, sample.tid, process_name, sample.pid)] + stack
441            thread = thread_map.get(sample.cpu)
442            if thread is None:
443                thread = Thread(comm=f'Cpu {sample.cpu}', pid=sample.cpu, tid=sample.cpu)
444                thread_map[sample.cpu] = thread
445            thread.add_sample(
446                comm=f'Cpu {sample.cpu}',
447                stack=stack,
448                time_ms=sample_time_ms)
449        else:
450            # add thread sample
451            thread = thread_map.get(sample.tid)
452            if thread is None:
453                thread = Thread(comm=sample.thread_comm, pid=sample.pid, tid=sample.tid)
454                thread_map[sample.tid] = thread
455            thread.add_sample(
456                comm=sample.thread_comm,
457                stack=stack,
458                # We are being a bit fast and loose here with time here.  simpleperf
459                # uses CLOCK_MONOTONIC by default, which doesn't use the normal unix
460                # epoch, but rather some arbitrary time. In practice, this doesn't
461                # matter, the Firefox Profiler normalises all the timestamps to begin at
462                # the minimum time.  Consider fixing this in future, if needed, by
463                # setting `simpleperf record --clockid realtime`.
464                time_ms=sample_time_ms)
465
466    for thread in thread_map.values():
467        thread.sort_samples()
468
469    remove_stack_gaps(max_remove_gap_length, thread_map)
470
471    threads = [thread.to_json_dict() for thread in thread_map.values()]
472
473    profile_timestamp = meta_info.get('timestamp')
474    end_time_ms = (int(profile_timestamp) * 1000) if profile_timestamp else 0
475
476    # Schema: https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L305
477    gecko_profile_meta = {
478        "interval": 1,
479        "processType": 0,
480        "product": record_cmd,
481        "device": meta_info.get("product_props"),
482        "platform": meta_info.get("android_build_fingerprint"),
483        "stackwalk": 1,
484        "debug": 0,
485        "gcpoison": 0,
486        "asyncstack": 1,
487        # The profile timestamp is actually the end time, not the start time.
488        # This is close enough for our purposes; I mostly just want to know which
489        # day the profile was taken! Consider fixing this in future, if needed,
490        # by setting `simpleperf record --clockid realtime` and taking the minimum
491        # sample time.
492        "startTime": end_time_ms,
493        "shutdownTime": None,
494        "version": 24,
495        "presymbolicated": True,
496        "categories": CATEGORIES,
497        "markerSchema": [],
498        "abi": arch,
499        "oscpu": meta_info.get("android_build_fingerprint"),
500        "appBuildID": meta_info.get("app_versioncode"),
501    }
502
503    # Schema:
504    # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L377
505    # https://github.com/firefox-devtools/profiler/blob/main/docs-developer/gecko-profile-format.md
506    return {
507        "meta": gecko_profile_meta,
508        "libs": [],
509        "threads": threads,
510        "processes": [],
511        "pausedRanges": [],
512    }
513
514
515def main() -> None:
516    parser = BaseArgumentParser(description=__doc__)
517    parser.add_argument('--symfs',
518                        help='Set the path to find binaries with symbols and debug info.')
519    parser.add_argument('--kallsyms', help='Set the path to find kernel symbols.')
520    parser.add_argument('-i', '--record_file', nargs='?', default='perf.data',
521                        help='Default is perf.data.')
522    parser.add_argument('--remove-gaps', metavar='MAX_GAP_LENGTH', dest='max_remove_gap_length',
523                        type=int, default=3, help="""
524                        Ideally all callstacks are complete. But some may be broken for different
525                        reasons. To create a smooth view in "Stack Chart", remove small gaps of
526                        broken callstacks. MAX_GAP_LENGTH is the max length of continuous
527                        broken-stack samples we want to remove.
528                        """
529                        )
530    parser.add_argument(
531        '--percpu-samples', action='store_true',
532        help='show samples based on cpus instead of threads')
533    parser.add_report_lib_options()
534    args = parser.parse_args()
535    profile = _gecko_profile(
536        record_file=args.record_file,
537        symfs_dir=args.symfs,
538        kallsyms_file=args.kallsyms,
539        report_lib_options=args.report_lib_options,
540        max_remove_gap_length=args.max_remove_gap_length,
541        percpu_samples=args.percpu_samples,
542    )
543
544    json.dump(profile, sys.stdout, sort_keys=True)
545
546
547if __name__ == '__main__':
548    main()
549