1#!/usr/bin/env python3
2#
3# Copyright (C) 2016 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16#
17
18"""simpleperf_report_lib.py: a python wrapper of libsimpleperf_report.so.
19   Used to access samples in perf.data.
20
21"""
22
23import collections
24from collections import namedtuple
25import ctypes as ct
26from pathlib import Path
27import struct
28from typing import Any, Dict, List, Optional, Union
29
30from simpleperf_utils import (bytes_to_str, get_host_binary_path, is_windows, log_exit,
31                              str_to_bytes, ReportLibOptions)
32
33
34def _is_null(p: Optional[ct._Pointer]) -> bool:
35    if p:
36        return False
37    return ct.cast(p, ct.c_void_p).value is None
38
39
40def _char_pt(s: str) -> bytes:
41    return str_to_bytes(s)
42
43
44def _char_pt_to_str(char_pt: ct.c_char_p) -> str:
45    return bytes_to_str(char_pt)
46
47
48def _check(cond: bool, failmsg: str):
49    if not cond:
50        raise RuntimeError(failmsg)
51
52
53class SampleStruct(ct.Structure):
54    """ Instance of a sample in perf.data.
55        ip: the program counter of the thread generating the sample.
56        pid: process id (or thread group id) of the thread generating the sample.
57        tid: thread id.
58        thread_comm: thread name.
59        time: time at which the sample was generated. The value is in nanoseconds.
60              The clock is decided by the --clockid option in `simpleperf record`.
61        in_kernel: whether the instruction is in kernel space or user space.
62        cpu: the cpu generating the sample.
63        period: count of events have happened since last sample. For example, if we use
64             -e cpu-cycles, it means how many cpu-cycles have happened.
65             If we use -e cpu-clock, it means how many nanoseconds have passed.
66    """
67    _fields_ = [('ip', ct.c_uint64),
68                ('pid', ct.c_uint32),
69                ('tid', ct.c_uint32),
70                ('_thread_comm', ct.c_char_p),
71                ('time', ct.c_uint64),
72                ('_in_kernel', ct.c_uint32),
73                ('cpu', ct.c_uint32),
74                ('period', ct.c_uint64)]
75
76    @property
77    def thread_comm(self) -> str:
78        return _char_pt_to_str(self._thread_comm)
79
80    @property
81    def in_kernel(self) -> bool:
82        return bool(self._in_kernel)
83
84
85class TracingFieldFormatStruct(ct.Structure):
86    """Format of a tracing field.
87       name: name of the field.
88       offset: offset of the field in tracing data.
89       elem_size: size of the element type.
90       elem_count: the number of elements in this field, more than one if the field is an array.
91       is_signed: whether the element type is signed or unsigned.
92       is_dynamic: whether the element is a dynamic string.
93    """
94    _fields_ = [('_name', ct.c_char_p),
95                ('offset', ct.c_uint32),
96                ('elem_size', ct.c_uint32),
97                ('elem_count', ct.c_uint32),
98                ('is_signed', ct.c_uint32),
99                ('is_dynamic', ct.c_uint32)]
100
101    _unpack_key_dict = {1: 'b', 2: 'h', 4: 'i', 8: 'q'}
102
103    @property
104    def name(self) -> str:
105        return _char_pt_to_str(self._name)
106
107    def parse_value(self, data: ct.c_char_p) -> Union[str, bytes, List[bytes]]:
108        """ Parse value of a field in a tracepoint event.
109            The return value depends on the type of the field, and can be an int value, a string,
110            an array of int values, etc. If the type can't be parsed, return a byte array or an
111            array of byte arrays.
112        """
113        if self.is_dynamic:
114            offset, max_len = struct.unpack('<HH', data[self.offset:self.offset + 4])
115            length = 0
116            while length < max_len and bytes_to_str(data[offset + length]) != '\x00':
117                length += 1
118            return bytes_to_str(data[offset: offset + length])
119
120        if self.elem_count > 1 and self.elem_size == 1:
121            # Probably the field is a string.
122            # Don't use self.is_signed, which has different values on x86 and arm.
123            length = 0
124            while length < self.elem_count and bytes_to_str(data[self.offset + length]) != '\x00':
125                length += 1
126            return bytes_to_str(data[self.offset: self.offset + length])
127        unpack_key = self._unpack_key_dict.get(self.elem_size)
128        if unpack_key:
129            if not self.is_signed:
130                unpack_key = unpack_key.upper()
131            value = struct.unpack('%d%s' % (self.elem_count, unpack_key),
132                                  data[self.offset:self.offset + self.elem_count * self.elem_size])
133        else:
134            # Since we don't know the element type, just return the bytes.
135            value = []
136            offset = self.offset
137            for _ in range(self.elem_count):
138                value.append(data[offset: offset + self.elem_size])
139                offset += self.elem_size
140        if self.elem_count == 1:
141            value = value[0]
142        return value
143
144
145class TracingDataFormatStruct(ct.Structure):
146    """Format of tracing data of a tracepoint event, like
147       https://www.kernel.org/doc/html/latest/trace/events.html#event-formats.
148       size: total size of all fields in the tracing data.
149       field_count: the number of fields.
150       fields: an array of fields.
151    """
152    _fields_ = [('size', ct.c_uint32),
153                ('field_count', ct.c_uint32),
154                ('fields', ct.POINTER(TracingFieldFormatStruct))]
155
156
157class EventStruct(ct.Structure):
158    """Event type of a sample.
159       name: name of the event type.
160       tracing_data_format: only available when it is a tracepoint event.
161    """
162    _fields_ = [('_name', ct.c_char_p),
163                ('tracing_data_format', TracingDataFormatStruct)]
164
165    @property
166    def name(self) -> str:
167        return _char_pt_to_str(self._name)
168
169
170class MappingStruct(ct.Structure):
171    """ A mapping area in the monitored threads, like the content in /proc/<pid>/maps.
172        start: start addr in memory.
173        end: end addr in memory.
174        pgoff: offset in the mapped shared library.
175    """
176    _fields_ = [('start', ct.c_uint64),
177                ('end', ct.c_uint64),
178                ('pgoff', ct.c_uint64)]
179
180
181class SymbolStruct(ct.Structure):
182    """ Symbol info of the instruction hit by a sample or a callchain entry of a sample.
183        dso_name: path of the shared library containing the instruction.
184        vaddr_in_file: virtual address of the instruction in the shared library.
185        symbol_name: name of the function containing the instruction.
186        symbol_addr: start addr of the function containing the instruction.
187        symbol_len: length of the function in the shared library.
188        mapping: the mapping area hit by the instruction.
189    """
190    _fields_ = [('_dso_name', ct.c_char_p),
191                ('vaddr_in_file', ct.c_uint64),
192                ('_symbol_name', ct.c_char_p),
193                ('symbol_addr', ct.c_uint64),
194                ('symbol_len', ct.c_uint64),
195                ('mapping', ct.POINTER(MappingStruct))]
196
197    @property
198    def dso_name(self) -> str:
199        return _char_pt_to_str(self._dso_name)
200
201    @property
202    def symbol_name(self) -> str:
203        return _char_pt_to_str(self._symbol_name)
204
205
206class CallChainEntryStructure(ct.Structure):
207    """ A callchain entry of a sample.
208        ip: the address of the instruction of the callchain entry.
209        symbol: symbol info of the callchain entry.
210    """
211    _fields_ = [('ip', ct.c_uint64),
212                ('symbol', SymbolStruct)]
213
214
215class CallChainStructure(ct.Structure):
216    """ Callchain info of a sample.
217        nr: number of entries in the callchain.
218        entries: a pointer to an array of CallChainEntryStructure.
219
220        For example, if a sample is generated when a thread is running function C
221        with callchain function A -> function B -> function C.
222        Then nr = 2, and entries = [function B, function A].
223    """
224    _fields_ = [('nr', ct.c_uint32),
225                ('entries', ct.POINTER(CallChainEntryStructure))]
226
227
228class FeatureSectionStructure(ct.Structure):
229    """ A feature section in perf.data to store information like record cmd, device arch, etc.
230        data: a pointer to a buffer storing the section data.
231        data_size: data size in bytes.
232    """
233    _fields_ = [('data', ct.POINTER(ct.c_char)),
234                ('data_size', ct.c_uint32)]
235
236
237class ReportLibStructure(ct.Structure):
238    _fields_ = []
239
240
241def SetReportOptionsForReportLib(report_lib, options: ReportLibOptions):
242    if options.proguard_mapping_files:
243        for file_path in options.proguard_mapping_files:
244            report_lib.AddProguardMappingFile(file_path)
245    if options.show_art_frames:
246        report_lib.ShowArtFrames(True)
247    if options.remove_method:
248        for name in options.remove_method:
249            report_lib.RemoveMethod(name)
250    if options.trace_offcpu:
251        report_lib.SetTraceOffCpuMode(options.trace_offcpu)
252    if options.sample_filters:
253        report_lib.SetSampleFilter(options.sample_filters)
254    if options.aggregate_threads:
255        report_lib.AggregateThreads(options.aggregate_threads)
256
257
258# pylint: disable=invalid-name
259class ReportLib(object):
260    """ Read contents from perf.data. """
261
262    def __init__(self, native_lib_path: Optional[str] = None):
263        if native_lib_path is None:
264            native_lib_path = self._get_native_lib()
265
266        self._load_dependent_lib()
267        self._lib = ct.CDLL(native_lib_path)
268        self._CreateReportLibFunc = self._lib.CreateReportLib
269        self._CreateReportLibFunc.restype = ct.POINTER(ReportLibStructure)
270        self._DestroyReportLibFunc = self._lib.DestroyReportLib
271        self._SetLogSeverityFunc = self._lib.SetLogSeverity
272        self._SetSymfsFunc = self._lib.SetSymfs
273        self._SetRecordFileFunc = self._lib.SetRecordFile
274        self._SetKallsymsFileFunc = self._lib.SetKallsymsFile
275        self._ShowIpForUnknownSymbolFunc = self._lib.ShowIpForUnknownSymbol
276        self._ShowArtFramesFunc = self._lib.ShowArtFrames
277        self._RemoveMethodFunc = self._lib.RemoveMethod
278        self._RemoveMethodFunc.restype = ct.c_bool
279        self._MergeJavaMethodsFunc = self._lib.MergeJavaMethods
280        self._AddProguardMappingFileFunc = self._lib.AddProguardMappingFile
281        self._AddProguardMappingFileFunc.restype = ct.c_bool
282        self._GetSupportedTraceOffCpuModesFunc = self._lib.GetSupportedTraceOffCpuModes
283        self._GetSupportedTraceOffCpuModesFunc.restype = ct.c_char_p
284        self._SetTraceOffCpuModeFunc = self._lib.SetTraceOffCpuMode
285        self._SetTraceOffCpuModeFunc.restype = ct.c_bool
286        self._SetSampleFilterFunc = self._lib.SetSampleFilter
287        self._SetSampleFilterFunc.restype = ct.c_bool
288        self._AggregateThreadsFunc = self._lib.AggregateThreads
289        self._AggregateThreadsFunc.restype = ct.c_bool
290        self._GetNextSampleFunc = self._lib.GetNextSample
291        self._GetNextSampleFunc.restype = ct.POINTER(SampleStruct)
292        self._GetEventOfCurrentSampleFunc = self._lib.GetEventOfCurrentSample
293        self._GetEventOfCurrentSampleFunc.restype = ct.POINTER(EventStruct)
294        self._GetSymbolOfCurrentSampleFunc = self._lib.GetSymbolOfCurrentSample
295        self._GetSymbolOfCurrentSampleFunc.restype = ct.POINTER(SymbolStruct)
296        self._GetCallChainOfCurrentSampleFunc = self._lib.GetCallChainOfCurrentSample
297        self._GetCallChainOfCurrentSampleFunc.restype = ct.POINTER(CallChainStructure)
298        self._GetTracingDataOfCurrentSampleFunc = self._lib.GetTracingDataOfCurrentSample
299        self._GetTracingDataOfCurrentSampleFunc.restype = ct.POINTER(ct.c_char)
300        self._GetBuildIdForPathFunc = self._lib.GetBuildIdForPath
301        self._GetBuildIdForPathFunc.restype = ct.c_char_p
302        self._GetFeatureSection = self._lib.GetFeatureSection
303        self._GetFeatureSection.restype = ct.POINTER(FeatureSectionStructure)
304        self._instance = self._CreateReportLibFunc()
305        assert not _is_null(self._instance)
306
307        self.meta_info: Optional[Dict[str, str]] = None
308        self.current_sample: Optional[SampleStruct] = None
309        self.record_cmd: Optional[str] = None
310
311    def _get_native_lib(self) -> str:
312        return get_host_binary_path('libsimpleperf_report.so')
313
314    def _load_dependent_lib(self):
315        # As the windows dll is built with mingw we need to load 'libwinpthread-1.dll'.
316        if is_windows():
317            self._libwinpthread = ct.CDLL(get_host_binary_path('libwinpthread-1.dll'))
318
319    def Close(self):
320        if self._instance:
321            self._DestroyReportLibFunc(self._instance)
322            self._instance = None
323
324    def SetReportOptions(self, options: ReportLibOptions):
325        """ Set report options in one call. """
326        SetReportOptionsForReportLib(self, options)
327
328    def SetLogSeverity(self, log_level: str = 'info'):
329        """ Set log severity of native lib, can be verbose,debug,info,error,fatal."""
330        cond: bool = self._SetLogSeverityFunc(self.getInstance(), _char_pt(log_level))
331        _check(cond, 'Failed to set log level')
332
333    def SetSymfs(self, symfs_dir: str):
334        """ Set directory used to find symbols."""
335        cond: bool = self._SetSymfsFunc(self.getInstance(), _char_pt(symfs_dir))
336        _check(cond, 'Failed to set symbols directory')
337
338    def SetRecordFile(self, record_file: str):
339        """ Set the path of record file, like perf.data."""
340        cond: bool = self._SetRecordFileFunc(self.getInstance(), _char_pt(record_file))
341        _check(cond, 'Failed to set record file')
342
343    def ShowIpForUnknownSymbol(self):
344        self._ShowIpForUnknownSymbolFunc(self.getInstance())
345
346    def ShowArtFrames(self, show: bool = True):
347        """ Show frames of internal methods of the Java interpreter. """
348        self._ShowArtFramesFunc(self.getInstance(), show)
349
350    def RemoveMethod(self, method_name_regex: str):
351        """ Remove methods with name containing method_name_regex. """
352        res = self._RemoveMethodFunc(self.getInstance(), _char_pt(method_name_regex))
353        _check(res, f'failed to call RemoveMethod({method_name_regex})')
354
355    def MergeJavaMethods(self, merge: bool = True):
356        """ This option merges jitted java methods with the same name but in different jit
357            symfiles. If possible, it also merges jitted methods with interpreted methods,
358            by mapping jitted methods to their corresponding dex files.
359            Side effects:
360              It only works at method level, not instruction level.
361              It makes symbol.vaddr_in_file and symbol.mapping not accurate for jitted methods.
362            Java methods are merged by default.
363        """
364        self._MergeJavaMethodsFunc(self.getInstance(), merge)
365
366    def AddProguardMappingFile(self, mapping_file: Union[str, Path]):
367        """ Add proguard mapping.txt to de-obfuscate method names. """
368        if not self._AddProguardMappingFileFunc(self.getInstance(), _char_pt(str(mapping_file))):
369            raise ValueError(f'failed to add proguard mapping file: {mapping_file}')
370
371    def SetKallsymsFile(self, kallsym_file: str):
372        """ Set the file path to a copy of the /proc/kallsyms file (for off device decoding) """
373        cond: bool = self._SetKallsymsFileFunc(self.getInstance(), _char_pt(kallsym_file))
374        _check(cond, 'Failed to set kallsyms file')
375
376    def GetSupportedTraceOffCpuModes(self) -> List[str]:
377        """ Get trace-offcpu modes supported by the recording file. It should be called after
378            SetRecordFile(). The modes are only available for profiles recorded with --trace-offcpu
379            option. All possible modes are:
380              on-cpu:           report on-cpu samples with period representing time spent on cpu
381              off-cpu:          report off-cpu samples with period representing time spent off cpu
382              on-off-cpu:       report both on-cpu samples and off-cpu samples, which can be split
383                                by event name.
384              mixed-on-off-cpu: report on-cpu and off-cpu samples under the same event name.
385        """
386        modes_str = self._GetSupportedTraceOffCpuModesFunc(self.getInstance())
387        _check(not _is_null(modes_str), 'Failed to call GetSupportedTraceOffCpuModes()')
388        modes_str = _char_pt_to_str(modes_str)
389        return modes_str.split(',') if modes_str else []
390
391    def SetTraceOffCpuMode(self, mode: str):
392        """ Set trace-offcpu mode. It should be called after SetRecordFile(). The mode should be
393            one of the modes returned by GetSupportedTraceOffCpuModes().
394        """
395        res: bool = self._SetTraceOffCpuModeFunc(self.getInstance(), _char_pt(mode))
396        _check(res, f'Failed to call SetTraceOffCpuMode({mode})')
397
398    def SetSampleFilter(self, filters: List[str]):
399        """ Set options used to filter samples. Available options are:
400            --exclude-pid pid1,pid2,...   Exclude samples for selected processes.
401            --exclude-tid tid1,tid2,...   Exclude samples for selected threads.
402            --exclude-process-name process_name_regex   Exclude samples for processes with name
403                                                        containing the regular expression.
404            --exclude-thread-name thread_name_regex     Exclude samples for threads with name
405                                                        containing the regular expression.
406            --include-pid pid1,pid2,...   Include samples for selected processes.
407            --include-tid tid1,tid2,...   Include samples for selected threads.
408            --include-process-name process_name_regex   Include samples for processes with name
409                                                        containing the regular expression.
410            --include-thread-name thread_name_regex     Include samples for threads with name
411                                                        containing the regular expression.
412            --filter-file <file>          Use filter file to filter samples based on timestamps. The
413                                          file format is in doc/sampler_filter.md.
414
415            The filter argument should be a concatenation of options.
416        """
417        filter_array = (ct.c_char_p * len(filters))()
418        filter_array[:] = [_char_pt(f) for f in filters]
419        res: bool = self._SetSampleFilterFunc(self.getInstance(), filter_array, len(filters))
420        _check(res, f'Failed to call SetSampleFilter({filters})')
421
422    def AggregateThreads(self, thread_name_regex_list: List[str]):
423        """ Given a list of thread name regex, threads with names matching the same regex are merged
424            into one thread. As a result, samples from different threads (like a thread pool) can be
425            shown in one flamegraph.
426        """
427        regex_array = (ct.c_char_p * len(thread_name_regex_list))()
428        regex_array[:] = [_char_pt(f) for f in thread_name_regex_list]
429        res: bool = self._AggregateThreadsFunc(
430            self.getInstance(),
431            regex_array, len(thread_name_regex_list))
432        _check(res, f'Failed to call AggregateThreads({thread_name_regex_list})')
433
434    def GetNextSample(self) -> Optional[SampleStruct]:
435        """ Return the next sample. If no more samples, return None. """
436        psample = self._GetNextSampleFunc(self.getInstance())
437        if _is_null(psample):
438            self.current_sample = None
439        else:
440            self.current_sample = psample[0]
441        return self.current_sample
442
443    def GetCurrentSample(self) -> Optional[SampleStruct]:
444        return self.current_sample
445
446    def GetEventOfCurrentSample(self) -> EventStruct:
447        event = self._GetEventOfCurrentSampleFunc(self.getInstance())
448        assert not _is_null(event)
449        return event[0]
450
451    def GetSymbolOfCurrentSample(self) -> SymbolStruct:
452        symbol = self._GetSymbolOfCurrentSampleFunc(self.getInstance())
453        assert not _is_null(symbol)
454        return symbol[0]
455
456    def GetCallChainOfCurrentSample(self) -> CallChainStructure:
457        callchain = self._GetCallChainOfCurrentSampleFunc(self.getInstance())
458        assert not _is_null(callchain)
459        return callchain[0]
460
461    def GetTracingDataOfCurrentSample(self) -> Optional[Dict[str, Any]]:
462        data = self._GetTracingDataOfCurrentSampleFunc(self.getInstance())
463        if _is_null(data):
464            return None
465        event = self.GetEventOfCurrentSample()
466        result = collections.OrderedDict()
467        for i in range(event.tracing_data_format.field_count):
468            field = event.tracing_data_format.fields[i]
469            result[field.name] = field.parse_value(data)
470        return result
471
472    def GetBuildIdForPath(self, path: str) -> str:
473        build_id = self._GetBuildIdForPathFunc(self.getInstance(), _char_pt(path))
474        assert not _is_null(build_id)
475        return _char_pt_to_str(build_id)
476
477    def GetRecordCmd(self) -> str:
478        if self.record_cmd is not None:
479            return self.record_cmd
480        self.record_cmd = ''
481        feature_data = self._GetFeatureSection(self.getInstance(), _char_pt('cmdline'))
482        if not _is_null(feature_data):
483            void_p = ct.cast(feature_data[0].data, ct.c_void_p)
484            arg_count = ct.cast(void_p, ct.POINTER(ct.c_uint32)).contents.value
485            void_p.value += 4
486            args = []
487            for _ in range(arg_count):
488                str_len = ct.cast(void_p, ct.POINTER(ct.c_uint32)).contents.value
489                void_p.value += 4
490                char_p = ct.cast(void_p, ct.POINTER(ct.c_char))
491                current_str = ''
492                for j in range(str_len):
493                    c = bytes_to_str(char_p[j])
494                    if c != '\0':
495                        current_str += c
496                if ' ' in current_str:
497                    current_str = '"' + current_str + '"'
498                args.append(current_str)
499                void_p.value += str_len
500            self.record_cmd = ' '.join(args)
501        return self.record_cmd
502
503    def _GetFeatureString(self, feature_name: str) -> str:
504        feature_data = self._GetFeatureSection(self.getInstance(), _char_pt(feature_name))
505        result = ''
506        if not _is_null(feature_data):
507            void_p = ct.cast(feature_data[0].data, ct.c_void_p)
508            str_len = ct.cast(void_p, ct.POINTER(ct.c_uint32)).contents.value
509            void_p.value += 4
510            char_p = ct.cast(void_p, ct.POINTER(ct.c_char))
511            for i in range(str_len):
512                c = bytes_to_str(char_p[i])
513                if c == '\0':
514                    break
515                result += c
516        return result
517
518    def GetArch(self) -> str:
519        return self._GetFeatureString('arch')
520
521    def MetaInfo(self) -> Dict[str, str]:
522        """ Return a string to string map stored in meta_info section in perf.data.
523            It is used to pass some short meta information.
524        """
525        if self.meta_info is None:
526            self.meta_info = {}
527            feature_data = self._GetFeatureSection(self.getInstance(), _char_pt('meta_info'))
528            if not _is_null(feature_data):
529                str_list = []
530                data = feature_data[0].data
531                data_size = feature_data[0].data_size
532                current_str = ''
533                for i in range(data_size):
534                    c = bytes_to_str(data[i])
535                    if c != '\0':
536                        current_str += c
537                    else:
538                        str_list.append(current_str)
539                        current_str = ''
540                for i in range(0, len(str_list), 2):
541                    self.meta_info[str_list[i]] = str_list[i + 1]
542        return self.meta_info
543
544    def getInstance(self) -> ct._Pointer:
545        if self._instance is None:
546            raise Exception('Instance is Closed')
547        return self._instance
548
549
550ProtoSample = namedtuple('ProtoSample', ['ip', 'pid', 'tid',
551                         'thread_comm', 'time', 'in_kernel', 'cpu', 'period'])
552ProtoEvent = namedtuple('ProtoEvent', ['name', 'tracing_data_format'])
553ProtoSymbol = namedtuple(
554    'ProtoSymbol',
555    ['dso_name', 'vaddr_in_file', 'symbol_name', 'symbol_addr', 'symbol_len', 'mapping'])
556ProtoMapping = namedtuple('ProtoMapping', ['start', 'end', 'pgoff'])
557ProtoCallChain = namedtuple('ProtoCallChain', ['nr', 'entries'])
558ProtoCallChainEntry = namedtuple('ProtoCallChainEntry', ['ip', 'symbol'])
559
560
561class ProtoFileReportLib:
562    """ Read contents from profile in cmd_report_sample.proto format.
563        It is generated by `simpleperf report-sample`.
564    """
565
566    @staticmethod
567    def is_supported_format(record_file: str):
568        with open(record_file, 'rb') as fh:
569            if fh.read(10) == b'SIMPLEPERF':
570                return True
571
572    @staticmethod
573    def get_report_sample_pb2():
574        try:
575            import report_sample_pb2
576            return report_sample_pb2
577        except ImportError as e:
578            log_exit(f'{e}\nprotobuf package is missing or too old. Please install it like ' +
579                     '`pip install protobuf==4.21`.')
580
581    def __init__(self):
582        self.record_file = None
583        self.report_sample_pb2 = ProtoFileReportLib.get_report_sample_pb2()
584        self.records: List[self.report_sample_pb2.Record] = []
585        self.record_index = -1
586        self.files: List[self.report_sample_pb2.File] = []
587        self.thread_map: Dict[int, self.report_sample_pb2.Thread] = {}
588        self.meta_info: Optional[self.report_sample_pb2.MetaInfo] = None
589        self.fake_mapping_starts = []
590        self.sample_queue: List[self.report_sample_pb2.Sample] = collections.deque()
591        self.trace_offcpu_mode = None
592        # mapping from thread id to the last off-cpu sample in the thread
593        self.offcpu_samples = {}
594
595    def Close(self):
596        pass
597
598    def SetReportOptions(self, options: ReportLibOptions):
599        """ Set report options in one call. """
600        SetReportOptionsForReportLib(self, options)
601
602    def SetLogSeverity(self, log_level: str = 'info'):
603        pass
604
605    def SetSymfs(self, symfs_dir: str):
606        pass
607
608    def SetRecordFile(self, record_file: str):
609        self.record_file = record_file
610        with open(record_file, 'rb') as fh:
611            data = fh.read()
612        _check(data[:10] == b'SIMPLEPERF', f'magic number mismatch: {data[:10]}')
613        version = struct.unpack('<H', data[10:12])[0]
614        _check(version == 1, f'version mismatch: {version}')
615        i = 12
616        while i < len(data):
617            _check(i + 4 <= len(data), 'data format error')
618            size = struct.unpack('<I', data[i:i + 4])[0]
619            if size == 0:
620                break
621            i += 4
622            _check(i + size <= len(data), 'data format error')
623            record = self.report_sample_pb2.Record()
624            record.ParseFromString(data[i: i + size])
625            i += size
626            if record.HasField('sample') or record.HasField('context_switch'):
627                self.records.append(record)
628            elif record.HasField('file'):
629                self.files.append(record.file)
630            elif record.HasField('thread'):
631                self.thread_map[record.thread.thread_id] = record.thread
632            elif record.HasField('meta_info'):
633                self.meta_info = record.meta_info
634                if self.meta_info.trace_offcpu:
635                    self.trace_offcpu_mode = 'mixed-on-off-cpu'
636        fake_mapping_start = 0
637        for file in self.files:
638            self.fake_mapping_starts.append(fake_mapping_start)
639            fake_mapping_start += len(file.symbol) + 1
640
641    def AddProguardMappingFile(self, mapping_file: Union[str, Path]):
642        """ Add proguard mapping.txt to de-obfuscate method names. """
643        raise NotImplementedError(
644            'Adding proguard mapping files are not implemented for report_sample profiles')
645
646    def ShowIpForUnknownSymbol(self):
647        pass
648
649    def ShowArtFrames(self, show: bool = True):
650        raise NotImplementedError(
651            'Showing art frames are not implemented for report_sample profiles')
652
653    def RemoveMethod(self, method_name_regex: str):
654        """ Remove methods with name containing method_name_regex. """
655        raise NotImplementedError("Removing method isn't implemented for report_sample profiles")
656
657    def SetSampleFilter(self, filters: List[str]):
658        raise NotImplementedError('sample filters are not implemented for report_sample profiles')
659
660    def GetSupportedTraceOffCpuModes(self) -> List[str]:
661        """ Get trace-offcpu modes supported by the recording file. It should be called after
662            SetRecordFile(). The modes are only available for profiles recorded with --trace-offcpu
663            option. All possible modes are:
664              on-cpu:           report on-cpu samples with period representing time spent on cpu
665              off-cpu:          report off-cpu samples with period representing time spent off cpu
666              on-off-cpu:       report both on-cpu samples and off-cpu samples, which can be split
667                                by event name.
668              mixed-on-off-cpu: report on-cpu and off-cpu samples under the same event name.
669        """
670        _check(self.meta_info,
671               'GetSupportedTraceOffCpuModes() should be called after SetRecordFile()')
672        if self.meta_info.trace_offcpu:
673            return ['on-cpu', 'off-cpu', 'on-off-cpu', 'mixed-on-off-cpu']
674        return []
675
676    def SetTraceOffCpuMode(self, mode: str):
677        """ Set trace-offcpu mode. It should be called after SetRecordFile().
678        """
679        _check(mode in ['on-cpu', 'off-cpu', 'on-off-cpu', 'mixed-on-off-cpu'], 'invalide mode')
680        # Don't check if mode is in self.GetSupportedTraceOffCpuModes(). Because the profile may
681        # be generated by an old simpleperf.
682        self.trace_offcpu_mode = mode
683
684    def AggregateThreads(self, thread_name_regex_list: List[str]):
685        """ Given a list of thread name regex, threads with names matching the same regex are merged
686            into one thread. As a result, samples from different threads (like a thread pool) can be
687            shown in one flamegraph.
688        """
689        raise NotImplementedError(
690            'Aggregating threads are not implemented for report_sample profiles')
691
692    def GetNextSample(self) -> Optional[ProtoSample]:
693        if self.sample_queue:
694            self.sample_queue.popleft()
695        while not self.sample_queue:
696            self.record_index += 1
697            if self.record_index >= len(self.records):
698                break
699            record = self.records[self.record_index]
700            if record.HasField('sample'):
701                self._process_sample_record(record.sample)
702            elif record.HasField('context_switch'):
703                self._process_context_switch(record.context_switch)
704        return self.GetCurrentSample()
705
706    def _process_sample_record(self, sample) -> None:
707        if not self.trace_offcpu_mode:
708            self._add_to_sample_queue(sample)
709            return
710        event_name = self._get_event_name(sample.event_type_id)
711        is_offcpu = 'sched_switch' in event_name
712
713        if self.trace_offcpu_mode == 'on-cpu':
714            if not is_offcpu:
715                self._add_to_sample_queue(sample)
716            return
717
718        if prev_offcpu_sample := self.offcpu_samples.get(sample.thread_id):
719            # If there is a previous off-cpu sample, update its period.
720            prev_offcpu_sample.event_count = max(sample.time - prev_offcpu_sample.time, 1)
721            self._add_to_sample_queue(prev_offcpu_sample)
722
723        if is_offcpu:
724            self.offcpu_samples[sample.thread_id] = sample
725        else:
726            self.offcpu_samples[sample.thread_id] = None
727            if self.trace_offcpu_mode in ('on-off-cpu', 'mixed-on-off-cpu'):
728                self._add_to_sample_queue(sample)
729
730    def _process_context_switch(self, context_switch) -> None:
731        if not context_switch.switch_on:
732            return
733        if prev_offcpu_sample := self.offcpu_samples.get(context_switch.thread_id):
734            prev_offcpu_sample.event_count = max(context_switch.time - prev_offcpu_sample.time, 1)
735            self.offcpu_samples[context_switch.thread_id] = None
736            self._add_to_sample_queue(prev_offcpu_sample)
737
738    def _add_to_sample_queue(self, sample) -> None:
739        self.sample_queue.append(sample)
740
741    def GetCurrentSample(self) -> Optional[ProtoSample]:
742        if not self.sample_queue:
743            return None
744        sample = self.sample_queue[0]
745        thread = self.thread_map[sample.thread_id]
746        return ProtoSample(
747            ip=0, pid=thread.process_id, tid=thread.thread_id, thread_comm=thread.thread_name,
748            time=sample.time, in_kernel=False, cpu=0, period=sample.event_count)
749
750    def GetEventOfCurrentSample(self) -> ProtoEvent:
751        sample = self.sample_queue[0]
752        event_type_id = 0 if self.trace_offcpu_mode == 'mixed-on-off-cpu' else sample.event_type_id
753        event_name = self._get_event_name(event_type_id)
754        return ProtoEvent(name=event_name, tracing_data_format=None)
755
756    def _get_event_name(self, event_type_id: int) -> str:
757        return self.meta_info.event_type[event_type_id]
758
759    def GetSymbolOfCurrentSample(self) -> ProtoSymbol:
760        sample = self.sample_queue[0]
761        node = sample.callchain[0]
762        return self._build_symbol(node)
763
764    def GetCallChainOfCurrentSample(self) -> ProtoCallChain:
765        entries = []
766        sample = self.sample_queue[0]
767        for node in sample.callchain[1:]:
768            symbol = self._build_symbol(node)
769            entries.append(ProtoCallChainEntry(ip=0, symbol=symbol))
770        return ProtoCallChain(nr=len(entries), entries=entries)
771
772    def _build_symbol(self, node) -> ProtoSymbol:
773        file = self.files[node.file_id]
774        if node.symbol_id == -1:
775            symbol_name = 'unknown'
776            fake_symbol_addr = self.fake_mapping_starts[node.file_id] + len(file.symbol)
777            fake_symbol_pgoff = 0
778        else:
779            symbol_name = file.symbol[node.symbol_id]
780            fake_symbol_addr = self.fake_mapping_starts[node.file_id] = node.symbol_id + 1
781            fake_symbol_pgoff = node.symbol_id + 1
782        mapping = ProtoMapping(fake_symbol_addr, 1, fake_symbol_pgoff)
783        return ProtoSymbol(dso_name=file.path, vaddr_in_file=node.vaddr_in_file,
784                           symbol_name=symbol_name, symbol_addr=0, symbol_len=1, mapping=[mapping])
785
786    def GetBuildIdForPath(self, path: str) -> str:
787        return ''
788
789    def GetRecordCmd(self) -> str:
790        return ''
791
792    def GetArch(self) -> str:
793        return ''
794
795    def MetaInfo(self) -> Dict[str, str]:
796        return {}
797
798
799def GetReportLib(record_file: str) -> Union[ReportLib, ProtoFileReportLib]:
800    if ProtoFileReportLib.is_supported_format(record_file):
801        lib = ProtoFileReportLib()
802    else:
803        lib = ReportLib()
804    lib.SetRecordFile(record_file)
805    return lib
806