1#!/usr/bin/env python3
2#
3# Copyright (C) 2016 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16#
17
18"""annotate.py: annotate source files based on perf.data.
19"""
20
21import logging
22import os
23import os.path
24import shutil
25from texttable import Texttable
26from typing import Dict, Union
27
28from simpleperf_report_lib import GetReportLib
29from simpleperf_utils import (
30    Addr2Nearestline, BaseArgumentParser, BinaryFinder, extant_dir, flatten_arg_list, is_windows,
31    log_exit, ReadElf, SourceFileSearcher)
32
33
34class SourceLine(object):
35    def __init__(self, file_id, function, line):
36        self.file = file_id
37        self.function = function
38        self.line = line
39
40    @property
41    def file_key(self):
42        return self.file
43
44    @property
45    def function_key(self):
46        return (self.file, self.function)
47
48    @property
49    def line_key(self):
50        return (self.file, self.line)
51
52
53class Addr2Line(object):
54    """collect information of how to map [dso_name, vaddr] to [source_file:line].
55    """
56
57    def __init__(self, ndk_path, binary_cache_path, source_dirs):
58        binary_finder = BinaryFinder(binary_cache_path, ReadElf(ndk_path))
59        self.addr2line = Addr2Nearestline(ndk_path, binary_finder, True)
60        self.source_searcher = SourceFileSearcher(source_dirs)
61
62    def add_addr(self, dso_path: str, build_id: str, func_addr: int, addr: int):
63        self.addr2line.add_addr(dso_path, build_id, func_addr, addr)
64
65    def convert_addrs_to_lines(self):
66        self.addr2line.convert_addrs_to_lines(jobs=os.cpu_count())
67
68    def get_sources(self, dso_path, addr):
69        dso = self.addr2line.get_dso(dso_path)
70        if not dso:
71            return []
72        source = self.addr2line.get_addr_source(dso, addr)
73        if not source:
74            return []
75        result = []
76        for (source_file, source_line, function_name) in source:
77            source_file_path = self.source_searcher.get_real_path(source_file)
78            if not source_file_path:
79                source_file_path = source_file
80            result.append(SourceLine(source_file_path, function_name, source_line))
81        return result
82
83
84class Period(object):
85    """event count information. It can be used to represent event count
86       of a line, a function, a source file, or a binary. It contains two
87       parts: period and acc_period.
88       When used for a line, period is the event count occurred when running
89       that line, acc_period is the accumulated event count occurred when
90       running that line and functions called by that line. Same thing applies
91       when it is used for a function, a source file, or a binary.
92    """
93
94    def __init__(self, period=0, acc_period=0):
95        self.period = period
96        self.acc_period = acc_period
97
98    def __iadd__(self, other):
99        self.period += other.period
100        self.acc_period += other.acc_period
101        return self
102
103
104class DsoPeriod(object):
105    """Period for each shared library"""
106
107    def __init__(self, dso_name):
108        self.dso_name = dso_name
109        self.period = Period()
110
111    def add_period(self, period):
112        self.period += period
113
114
115class FilePeriod(object):
116    """Period for each source file"""
117
118    def __init__(self, file_id):
119        self.file = file_id
120        self.period = Period()
121        # Period for each line in the file.
122        self.line_dict = {}
123        # Period for each function in the source file.
124        self.function_dict = {}
125
126    def add_period(self, period):
127        self.period += period
128
129    def add_line_period(self, line, period):
130        a = self.line_dict.get(line)
131        if a is None:
132            self.line_dict[line] = a = Period()
133        a += period
134
135    def add_function_period(self, function_name, function_start_line, period):
136        a = self.function_dict.get(function_name)
137        if not a:
138            if function_start_line is None:
139                function_start_line = -1
140            self.function_dict[function_name] = a = [function_start_line, Period()]
141        a[1] += period
142
143
144class SourceFileAnnotator(object):
145    """group code for annotating source files"""
146
147    def __init__(self, config):
148        # check config variables
149        config_names = ['perf_data_list', 'source_dirs', 'dso_filters', 'ndk_path']
150        for name in config_names:
151            if name not in config:
152                log_exit('config [%s] is missing' % name)
153        symfs_dir = 'binary_cache'
154        if not os.path.isdir(symfs_dir):
155            symfs_dir = None
156        kallsyms = 'binary_cache/kallsyms'
157        if not os.path.isfile(kallsyms):
158            kallsyms = None
159
160        # init member variables
161        self.config = config
162        self.symfs_dir = symfs_dir
163        self.kallsyms = kallsyms
164        self.dso_filter = set(config['dso_filters']) if config.get('dso_filters') else None
165
166        config['annotate_dest_dir'] = 'annotated_files'
167        output_dir = config['annotate_dest_dir']
168        if os.path.isdir(output_dir):
169            shutil.rmtree(output_dir)
170        os.makedirs(output_dir)
171
172        self.addr2line = Addr2Line(self.config['ndk_path'], symfs_dir, config.get('source_dirs'))
173        self.period = 0
174        self.dso_periods = {}
175        self.file_periods = {}
176
177    def annotate(self):
178        self._collect_addrs()
179        self._convert_addrs_to_lines()
180        self._generate_periods()
181        self._write_summary()
182        self._annotate_files()
183
184    def _collect_addrs(self):
185        """Read perf.data, collect all addresses we need to convert to
186           source file:line.
187        """
188        for perf_data in self.config['perf_data_list']:
189            lib = GetReportLib(perf_data)
190            if self.symfs_dir:
191                lib.SetSymfs(self.symfs_dir)
192            if self.kallsyms:
193                lib.SetKallsymsFile(self.kallsyms)
194            lib.SetReportOptions(self.config['report_lib_options'])
195            while True:
196                sample = lib.GetNextSample()
197                if sample is None:
198                    lib.Close()
199                    break
200                symbols = []
201                symbols.append(lib.GetSymbolOfCurrentSample())
202                callchain = lib.GetCallChainOfCurrentSample()
203                for i in range(callchain.nr):
204                    symbols.append(callchain.entries[i].symbol)
205                for symbol in symbols:
206                    if self._filter_symbol(symbol):
207                        build_id = lib.GetBuildIdForPath(symbol.dso_name)
208                        self.addr2line.add_addr(symbol.dso_name, build_id, symbol.symbol_addr,
209                                                symbol.vaddr_in_file)
210                        self.addr2line.add_addr(symbol.dso_name, build_id, symbol.symbol_addr,
211                                                symbol.symbol_addr)
212
213    def _filter_symbol(self, symbol):
214        if not self.dso_filter or symbol.dso_name in self.dso_filter:
215            return True
216        return False
217
218    def _convert_addrs_to_lines(self):
219        self.addr2line.convert_addrs_to_lines()
220
221    def _generate_periods(self):
222        """read perf.data, collect Period for all types:
223            binaries, source files, functions, lines.
224        """
225        for perf_data in self.config['perf_data_list']:
226            lib = GetReportLib(perf_data)
227            if self.symfs_dir:
228                lib.SetSymfs(self.symfs_dir)
229            if self.kallsyms:
230                lib.SetKallsymsFile(self.kallsyms)
231            lib.SetReportOptions(self.config['report_lib_options'])
232            while True:
233                sample = lib.GetNextSample()
234                if sample is None:
235                    lib.Close()
236                    break
237                self._generate_periods_for_sample(lib, sample)
238
239    def _generate_periods_for_sample(self, lib, sample):
240        symbols = []
241        symbols.append(lib.GetSymbolOfCurrentSample())
242        callchain = lib.GetCallChainOfCurrentSample()
243        for i in range(callchain.nr):
244            symbols.append(callchain.entries[i].symbol)
245        # Each sample has a callchain, but its period is only used once
246        # to add period for each function/source_line/source_file/binary.
247        # For example, if more than one entry in the callchain hits a
248        # function, the event count of that function is only increased once.
249        # Otherwise, we may get periods > 100%.
250        is_sample_used = False
251        used_dso_dict = {}
252        used_file_dict = {}
253        used_function_dict = {}
254        used_line_dict = {}
255        period = Period(sample.period, sample.period)
256        for j, symbol in enumerate(symbols):
257            if j == 1:
258                period = Period(0, sample.period)
259            if not self._filter_symbol(symbol):
260                continue
261            is_sample_used = True
262            # Add period to dso.
263            self._add_dso_period(symbol.dso_name, period, used_dso_dict)
264            # Add period to source file.
265            sources = self.addr2line.get_sources(symbol.dso_name, symbol.vaddr_in_file)
266            for source in sources:
267                if source.file:
268                    self._add_file_period(source, period, used_file_dict)
269                    # Add period to line.
270                    if source.line:
271                        self._add_line_period(source, period, used_line_dict)
272            # Add period to function.
273            sources = self.addr2line.get_sources(symbol.dso_name, symbol.symbol_addr)
274            for source in sources:
275                if source.file:
276                    self._add_file_period(source, period, used_file_dict)
277                    if source.function:
278                        self._add_function_period(source, period, used_function_dict)
279
280        if is_sample_used:
281            self.period += sample.period
282
283    def _add_dso_period(self, dso_name: str, period: Period, used_dso_dict: Dict[str, bool]):
284        if dso_name not in used_dso_dict:
285            used_dso_dict[dso_name] = True
286            dso_period = self.dso_periods.get(dso_name)
287            if dso_period is None:
288                dso_period = self.dso_periods[dso_name] = DsoPeriod(dso_name)
289            dso_period.add_period(period)
290
291    def _add_file_period(self, source, period, used_file_dict):
292        if source.file_key not in used_file_dict:
293            used_file_dict[source.file_key] = True
294            file_period = self.file_periods.get(source.file)
295            if file_period is None:
296                file_period = self.file_periods[source.file] = FilePeriod(source.file)
297            file_period.add_period(period)
298
299    def _add_line_period(self, source, period, used_line_dict):
300        if source.line_key not in used_line_dict:
301            used_line_dict[source.line_key] = True
302            file_period = self.file_periods[source.file]
303            file_period.add_line_period(source.line, period)
304
305    def _add_function_period(self, source, period, used_function_dict):
306        if source.function_key not in used_function_dict:
307            used_function_dict[source.function_key] = True
308            file_period = self.file_periods[source.file]
309            file_period.add_function_period(source.function, source.line, period)
310
311    def _write_summary(self):
312        summary = os.path.join(self.config['annotate_dest_dir'], 'summary')
313        with open(summary, 'w') as f:
314            f.write('total period: %d\n\n' % self.period)
315            self._write_dso_summary(f)
316            self._write_file_summary(f)
317
318            file_periods = sorted(self.file_periods.values(),
319                                  key=lambda x: x.period.acc_period, reverse=True)
320            for file_period in file_periods:
321                self._write_function_line_summary(f, file_period)
322
323    def _write_dso_summary(self, summary_fh):
324        dso_periods = sorted(self.dso_periods.values(),
325                             key=lambda x: x.period.acc_period, reverse=True)
326        table = Texttable(max_width=self.config['summary_width'])
327        table.set_cols_align(['l', 'l', 'l'])
328        table.add_row(['Total', 'Self', 'DSO'])
329        for dso_period in dso_periods:
330            total_str = self._get_period_str(dso_period.period.acc_period)
331            self_str = self._get_period_str(dso_period.period.period)
332            table.add_row([total_str, self_str, dso_period.dso_name])
333        print(table.draw(), file=summary_fh)
334        print(file=summary_fh)
335
336    def _write_file_summary(self, summary_fh):
337        file_periods = sorted(self.file_periods.values(),
338                              key=lambda x: x.period.acc_period, reverse=True)
339        table = Texttable(max_width=self.config['summary_width'])
340        table.set_cols_align(['l', 'l', 'l'])
341        table.add_row(['Total', 'Self', 'Source File'])
342        for file_period in file_periods:
343            total_str = self._get_period_str(file_period.period.acc_period)
344            self_str = self._get_period_str(file_period.period.period)
345            table.add_row([total_str, self_str, file_period.file])
346        print(table.draw(), file=summary_fh)
347        print(file=summary_fh)
348
349    def _write_function_line_summary(self, summary_fh, file_period: FilePeriod):
350        table = Texttable(max_width=self.config['summary_width'])
351        table.set_cols_align(['l', 'l', 'l'])
352        table.add_row(['Total', 'Self', 'Function/Line in ' + file_period.file])
353        values = []
354        for func_name in file_period.function_dict.keys():
355            func_start_line, period = file_period.function_dict[func_name]
356            values.append((func_name, func_start_line, period))
357        values.sort(key=lambda x: x[2].acc_period, reverse=True)
358        for func_name, func_start_line, period in values:
359            total_str = self._get_period_str(period.acc_period)
360            self_str = self._get_period_str(period.period)
361            name = func_name + ' (line %d)' % func_start_line
362            table.add_row([total_str, self_str, name])
363        for line in sorted(file_period.line_dict.keys()):
364            period = file_period.line_dict[line]
365            total_str = self._get_period_str(period.acc_period)
366            self_str = self._get_period_str(period.period)
367            name = 'line %d' % line
368            table.add_row([total_str, self_str, name])
369
370        print(table.draw(), file=summary_fh)
371        print(file=summary_fh)
372
373    def _get_period_str(self, period: Union[Period, int]) -> str:
374        if isinstance(period, Period):
375            return 'Total %s, Self %s' % (
376                self._get_period_str(period.acc_period),
377                self._get_period_str(period.period))
378        if self.config['raw_period'] or self.period == 0:
379            return str(period)
380        return '%.2f%%' % (100.0 * period / self.period)
381
382    def _annotate_files(self):
383        """Annotate Source files: add acc_period/period for each source file.
384           1. Annotate java source files, which have $JAVA_SRC_ROOT prefix.
385           2. Annotate c++ source files.
386        """
387        dest_dir = self.config['annotate_dest_dir']
388        for key in self.file_periods:
389            from_path = key
390            if not os.path.isfile(from_path):
391                logging.warning("can't find source file for path %s" % from_path)
392                continue
393            if from_path.startswith('/'):
394                to_path = os.path.join(dest_dir, from_path[1:])
395            elif is_windows() and ':\\' in from_path:
396                to_path = os.path.join(dest_dir, from_path.replace(':\\', os.sep))
397            else:
398                to_path = os.path.join(dest_dir, from_path)
399            is_java = from_path.endswith('.java')
400            self._annotate_file(from_path, to_path, self.file_periods[key], is_java)
401
402    def _annotate_file(self, from_path, to_path, file_period, is_java):
403        """Annotate a source file.
404
405        Annotate a source file in three steps:
406          1. In the first line, show periods of this file.
407          2. For each function, show periods of this function.
408          3. For each line not hitting the same line as functions, show
409             line periods.
410        """
411        logging.info('annotate file %s' % from_path)
412        with open(from_path, 'r') as rf:
413            lines = rf.readlines()
414
415        annotates = {}
416        for line in file_period.line_dict.keys():
417            annotates[line] = self._get_period_str(file_period.line_dict[line])
418        for func_name in file_period.function_dict.keys():
419            func_start_line, period = file_period.function_dict[func_name]
420            if func_start_line == -1:
421                continue
422            line = func_start_line - 1 if is_java else func_start_line
423            annotates[line] = '[func] ' + self._get_period_str(period)
424        annotates[1] = '[file] ' + self._get_period_str(file_period.period)
425
426        max_annotate_cols = 0
427        for key in annotates:
428            max_annotate_cols = max(max_annotate_cols, len(annotates[key]))
429
430        empty_annotate = ' ' * (max_annotate_cols + 6)
431
432        dirname = os.path.dirname(to_path)
433        if not os.path.isdir(dirname):
434            os.makedirs(dirname)
435        with open(to_path, 'w') as wf:
436            for line in range(1, len(lines) + 1):
437                annotate = annotates.get(line)
438                if annotate is None:
439                    if not lines[line-1].strip():
440                        annotate = ''
441                    else:
442                        annotate = empty_annotate
443                else:
444                    annotate = '/* ' + annotate + (
445                        ' ' * (max_annotate_cols - len(annotate))) + ' */'
446                wf.write(annotate)
447                wf.write(lines[line-1])
448
449
450def main():
451    parser = BaseArgumentParser(description="""
452        Annotate source files based on profiling data. It reads line information from binary_cache
453        generated by app_profiler.py or binary_cache_builder.py, and generate annotated source
454        files in annotated_files directory.""")
455    parser.add_argument('-i', '--perf_data_list', nargs='+', action='append', help="""
456        The paths of profiling data. Default is perf.data.""")
457    parser.add_argument('-s', '--source_dirs', type=extant_dir, nargs='+', action='append', help="""
458        Directories to find source files.""")
459    parser.add_argument('--ndk_path', type=extant_dir, help='Set the path of a ndk release.')
460    parser.add_argument('--raw-period', action='store_true',
461                        help='show raw period instead of percentage')
462    parser.add_argument('--summary-width', type=int, default=80, help='max width of summary file')
463    sample_filter_group = parser.add_argument_group('Sample filter options')
464    sample_filter_group.add_argument('--dso', nargs='+', action='append', help="""
465        Use samples only in selected binaries.""")
466    parser.add_report_lib_options(sample_filter_group=sample_filter_group)
467
468    args = parser.parse_args()
469    config = {}
470    config['perf_data_list'] = flatten_arg_list(args.perf_data_list)
471    if not config['perf_data_list']:
472        config['perf_data_list'].append('perf.data')
473    config['source_dirs'] = flatten_arg_list(args.source_dirs)
474    config['dso_filters'] = flatten_arg_list(args.dso)
475    config['ndk_path'] = args.ndk_path
476    config['raw_period'] = args.raw_period
477    config['summary_width'] = args.summary_width
478    config['report_lib_options'] = args.report_lib_options
479
480    annotator = SourceFileAnnotator(config)
481    annotator.annotate()
482    logging.info('annotate finish successfully, please check result in annotated_files/.')
483
484
485if __name__ == '__main__':
486    main()
487