1#!/usr/bin/env python3 2# 3# Copyright (C) 2016 The Android Open Source Project 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16# 17 18"""annotate.py: annotate source files based on perf.data. 19""" 20 21import logging 22import os 23import os.path 24import shutil 25from texttable import Texttable 26from typing import Dict, Union 27 28from simpleperf_report_lib import GetReportLib 29from simpleperf_utils import ( 30 Addr2Nearestline, BaseArgumentParser, BinaryFinder, extant_dir, flatten_arg_list, is_windows, 31 log_exit, ReadElf, SourceFileSearcher) 32 33 34class SourceLine(object): 35 def __init__(self, file_id, function, line): 36 self.file = file_id 37 self.function = function 38 self.line = line 39 40 @property 41 def file_key(self): 42 return self.file 43 44 @property 45 def function_key(self): 46 return (self.file, self.function) 47 48 @property 49 def line_key(self): 50 return (self.file, self.line) 51 52 53class Addr2Line(object): 54 """collect information of how to map [dso_name, vaddr] to [source_file:line]. 55 """ 56 57 def __init__(self, ndk_path, binary_cache_path, source_dirs): 58 binary_finder = BinaryFinder(binary_cache_path, ReadElf(ndk_path)) 59 self.addr2line = Addr2Nearestline(ndk_path, binary_finder, True) 60 self.source_searcher = SourceFileSearcher(source_dirs) 61 62 def add_addr(self, dso_path: str, build_id: str, func_addr: int, addr: int): 63 self.addr2line.add_addr(dso_path, build_id, func_addr, addr) 64 65 def convert_addrs_to_lines(self): 66 self.addr2line.convert_addrs_to_lines(jobs=os.cpu_count()) 67 68 def get_sources(self, dso_path, addr): 69 dso = self.addr2line.get_dso(dso_path) 70 if not dso: 71 return [] 72 source = self.addr2line.get_addr_source(dso, addr) 73 if not source: 74 return [] 75 result = [] 76 for (source_file, source_line, function_name) in source: 77 source_file_path = self.source_searcher.get_real_path(source_file) 78 if not source_file_path: 79 source_file_path = source_file 80 result.append(SourceLine(source_file_path, function_name, source_line)) 81 return result 82 83 84class Period(object): 85 """event count information. It can be used to represent event count 86 of a line, a function, a source file, or a binary. It contains two 87 parts: period and acc_period. 88 When used for a line, period is the event count occurred when running 89 that line, acc_period is the accumulated event count occurred when 90 running that line and functions called by that line. Same thing applies 91 when it is used for a function, a source file, or a binary. 92 """ 93 94 def __init__(self, period=0, acc_period=0): 95 self.period = period 96 self.acc_period = acc_period 97 98 def __iadd__(self, other): 99 self.period += other.period 100 self.acc_period += other.acc_period 101 return self 102 103 104class DsoPeriod(object): 105 """Period for each shared library""" 106 107 def __init__(self, dso_name): 108 self.dso_name = dso_name 109 self.period = Period() 110 111 def add_period(self, period): 112 self.period += period 113 114 115class FilePeriod(object): 116 """Period for each source file""" 117 118 def __init__(self, file_id): 119 self.file = file_id 120 self.period = Period() 121 # Period for each line in the file. 122 self.line_dict = {} 123 # Period for each function in the source file. 124 self.function_dict = {} 125 126 def add_period(self, period): 127 self.period += period 128 129 def add_line_period(self, line, period): 130 a = self.line_dict.get(line) 131 if a is None: 132 self.line_dict[line] = a = Period() 133 a += period 134 135 def add_function_period(self, function_name, function_start_line, period): 136 a = self.function_dict.get(function_name) 137 if not a: 138 if function_start_line is None: 139 function_start_line = -1 140 self.function_dict[function_name] = a = [function_start_line, Period()] 141 a[1] += period 142 143 144class SourceFileAnnotator(object): 145 """group code for annotating source files""" 146 147 def __init__(self, config): 148 # check config variables 149 config_names = ['perf_data_list', 'source_dirs', 'dso_filters', 'ndk_path'] 150 for name in config_names: 151 if name not in config: 152 log_exit('config [%s] is missing' % name) 153 symfs_dir = 'binary_cache' 154 if not os.path.isdir(symfs_dir): 155 symfs_dir = None 156 kallsyms = 'binary_cache/kallsyms' 157 if not os.path.isfile(kallsyms): 158 kallsyms = None 159 160 # init member variables 161 self.config = config 162 self.symfs_dir = symfs_dir 163 self.kallsyms = kallsyms 164 self.dso_filter = set(config['dso_filters']) if config.get('dso_filters') else None 165 166 config['annotate_dest_dir'] = 'annotated_files' 167 output_dir = config['annotate_dest_dir'] 168 if os.path.isdir(output_dir): 169 shutil.rmtree(output_dir) 170 os.makedirs(output_dir) 171 172 self.addr2line = Addr2Line(self.config['ndk_path'], symfs_dir, config.get('source_dirs')) 173 self.period = 0 174 self.dso_periods = {} 175 self.file_periods = {} 176 177 def annotate(self): 178 self._collect_addrs() 179 self._convert_addrs_to_lines() 180 self._generate_periods() 181 self._write_summary() 182 self._annotate_files() 183 184 def _collect_addrs(self): 185 """Read perf.data, collect all addresses we need to convert to 186 source file:line. 187 """ 188 for perf_data in self.config['perf_data_list']: 189 lib = GetReportLib(perf_data) 190 if self.symfs_dir: 191 lib.SetSymfs(self.symfs_dir) 192 if self.kallsyms: 193 lib.SetKallsymsFile(self.kallsyms) 194 lib.SetReportOptions(self.config['report_lib_options']) 195 while True: 196 sample = lib.GetNextSample() 197 if sample is None: 198 lib.Close() 199 break 200 symbols = [] 201 symbols.append(lib.GetSymbolOfCurrentSample()) 202 callchain = lib.GetCallChainOfCurrentSample() 203 for i in range(callchain.nr): 204 symbols.append(callchain.entries[i].symbol) 205 for symbol in symbols: 206 if self._filter_symbol(symbol): 207 build_id = lib.GetBuildIdForPath(symbol.dso_name) 208 self.addr2line.add_addr(symbol.dso_name, build_id, symbol.symbol_addr, 209 symbol.vaddr_in_file) 210 self.addr2line.add_addr(symbol.dso_name, build_id, symbol.symbol_addr, 211 symbol.symbol_addr) 212 213 def _filter_symbol(self, symbol): 214 if not self.dso_filter or symbol.dso_name in self.dso_filter: 215 return True 216 return False 217 218 def _convert_addrs_to_lines(self): 219 self.addr2line.convert_addrs_to_lines() 220 221 def _generate_periods(self): 222 """read perf.data, collect Period for all types: 223 binaries, source files, functions, lines. 224 """ 225 for perf_data in self.config['perf_data_list']: 226 lib = GetReportLib(perf_data) 227 if self.symfs_dir: 228 lib.SetSymfs(self.symfs_dir) 229 if self.kallsyms: 230 lib.SetKallsymsFile(self.kallsyms) 231 lib.SetReportOptions(self.config['report_lib_options']) 232 while True: 233 sample = lib.GetNextSample() 234 if sample is None: 235 lib.Close() 236 break 237 self._generate_periods_for_sample(lib, sample) 238 239 def _generate_periods_for_sample(self, lib, sample): 240 symbols = [] 241 symbols.append(lib.GetSymbolOfCurrentSample()) 242 callchain = lib.GetCallChainOfCurrentSample() 243 for i in range(callchain.nr): 244 symbols.append(callchain.entries[i].symbol) 245 # Each sample has a callchain, but its period is only used once 246 # to add period for each function/source_line/source_file/binary. 247 # For example, if more than one entry in the callchain hits a 248 # function, the event count of that function is only increased once. 249 # Otherwise, we may get periods > 100%. 250 is_sample_used = False 251 used_dso_dict = {} 252 used_file_dict = {} 253 used_function_dict = {} 254 used_line_dict = {} 255 period = Period(sample.period, sample.period) 256 for j, symbol in enumerate(symbols): 257 if j == 1: 258 period = Period(0, sample.period) 259 if not self._filter_symbol(symbol): 260 continue 261 is_sample_used = True 262 # Add period to dso. 263 self._add_dso_period(symbol.dso_name, period, used_dso_dict) 264 # Add period to source file. 265 sources = self.addr2line.get_sources(symbol.dso_name, symbol.vaddr_in_file) 266 for source in sources: 267 if source.file: 268 self._add_file_period(source, period, used_file_dict) 269 # Add period to line. 270 if source.line: 271 self._add_line_period(source, period, used_line_dict) 272 # Add period to function. 273 sources = self.addr2line.get_sources(symbol.dso_name, symbol.symbol_addr) 274 for source in sources: 275 if source.file: 276 self._add_file_period(source, period, used_file_dict) 277 if source.function: 278 self._add_function_period(source, period, used_function_dict) 279 280 if is_sample_used: 281 self.period += sample.period 282 283 def _add_dso_period(self, dso_name: str, period: Period, used_dso_dict: Dict[str, bool]): 284 if dso_name not in used_dso_dict: 285 used_dso_dict[dso_name] = True 286 dso_period = self.dso_periods.get(dso_name) 287 if dso_period is None: 288 dso_period = self.dso_periods[dso_name] = DsoPeriod(dso_name) 289 dso_period.add_period(period) 290 291 def _add_file_period(self, source, period, used_file_dict): 292 if source.file_key not in used_file_dict: 293 used_file_dict[source.file_key] = True 294 file_period = self.file_periods.get(source.file) 295 if file_period is None: 296 file_period = self.file_periods[source.file] = FilePeriod(source.file) 297 file_period.add_period(period) 298 299 def _add_line_period(self, source, period, used_line_dict): 300 if source.line_key not in used_line_dict: 301 used_line_dict[source.line_key] = True 302 file_period = self.file_periods[source.file] 303 file_period.add_line_period(source.line, period) 304 305 def _add_function_period(self, source, period, used_function_dict): 306 if source.function_key not in used_function_dict: 307 used_function_dict[source.function_key] = True 308 file_period = self.file_periods[source.file] 309 file_period.add_function_period(source.function, source.line, period) 310 311 def _write_summary(self): 312 summary = os.path.join(self.config['annotate_dest_dir'], 'summary') 313 with open(summary, 'w') as f: 314 f.write('total period: %d\n\n' % self.period) 315 self._write_dso_summary(f) 316 self._write_file_summary(f) 317 318 file_periods = sorted(self.file_periods.values(), 319 key=lambda x: x.period.acc_period, reverse=True) 320 for file_period in file_periods: 321 self._write_function_line_summary(f, file_period) 322 323 def _write_dso_summary(self, summary_fh): 324 dso_periods = sorted(self.dso_periods.values(), 325 key=lambda x: x.period.acc_period, reverse=True) 326 table = Texttable(max_width=self.config['summary_width']) 327 table.set_cols_align(['l', 'l', 'l']) 328 table.add_row(['Total', 'Self', 'DSO']) 329 for dso_period in dso_periods: 330 total_str = self._get_period_str(dso_period.period.acc_period) 331 self_str = self._get_period_str(dso_period.period.period) 332 table.add_row([total_str, self_str, dso_period.dso_name]) 333 print(table.draw(), file=summary_fh) 334 print(file=summary_fh) 335 336 def _write_file_summary(self, summary_fh): 337 file_periods = sorted(self.file_periods.values(), 338 key=lambda x: x.period.acc_period, reverse=True) 339 table = Texttable(max_width=self.config['summary_width']) 340 table.set_cols_align(['l', 'l', 'l']) 341 table.add_row(['Total', 'Self', 'Source File']) 342 for file_period in file_periods: 343 total_str = self._get_period_str(file_period.period.acc_period) 344 self_str = self._get_period_str(file_period.period.period) 345 table.add_row([total_str, self_str, file_period.file]) 346 print(table.draw(), file=summary_fh) 347 print(file=summary_fh) 348 349 def _write_function_line_summary(self, summary_fh, file_period: FilePeriod): 350 table = Texttable(max_width=self.config['summary_width']) 351 table.set_cols_align(['l', 'l', 'l']) 352 table.add_row(['Total', 'Self', 'Function/Line in ' + file_period.file]) 353 values = [] 354 for func_name in file_period.function_dict.keys(): 355 func_start_line, period = file_period.function_dict[func_name] 356 values.append((func_name, func_start_line, period)) 357 values.sort(key=lambda x: x[2].acc_period, reverse=True) 358 for func_name, func_start_line, period in values: 359 total_str = self._get_period_str(period.acc_period) 360 self_str = self._get_period_str(period.period) 361 name = func_name + ' (line %d)' % func_start_line 362 table.add_row([total_str, self_str, name]) 363 for line in sorted(file_period.line_dict.keys()): 364 period = file_period.line_dict[line] 365 total_str = self._get_period_str(period.acc_period) 366 self_str = self._get_period_str(period.period) 367 name = 'line %d' % line 368 table.add_row([total_str, self_str, name]) 369 370 print(table.draw(), file=summary_fh) 371 print(file=summary_fh) 372 373 def _get_period_str(self, period: Union[Period, int]) -> str: 374 if isinstance(period, Period): 375 return 'Total %s, Self %s' % ( 376 self._get_period_str(period.acc_period), 377 self._get_period_str(period.period)) 378 if self.config['raw_period'] or self.period == 0: 379 return str(period) 380 return '%.2f%%' % (100.0 * period / self.period) 381 382 def _annotate_files(self): 383 """Annotate Source files: add acc_period/period for each source file. 384 1. Annotate java source files, which have $JAVA_SRC_ROOT prefix. 385 2. Annotate c++ source files. 386 """ 387 dest_dir = self.config['annotate_dest_dir'] 388 for key in self.file_periods: 389 from_path = key 390 if not os.path.isfile(from_path): 391 logging.warning("can't find source file for path %s" % from_path) 392 continue 393 if from_path.startswith('/'): 394 to_path = os.path.join(dest_dir, from_path[1:]) 395 elif is_windows() and ':\\' in from_path: 396 to_path = os.path.join(dest_dir, from_path.replace(':\\', os.sep)) 397 else: 398 to_path = os.path.join(dest_dir, from_path) 399 is_java = from_path.endswith('.java') 400 self._annotate_file(from_path, to_path, self.file_periods[key], is_java) 401 402 def _annotate_file(self, from_path, to_path, file_period, is_java): 403 """Annotate a source file. 404 405 Annotate a source file in three steps: 406 1. In the first line, show periods of this file. 407 2. For each function, show periods of this function. 408 3. For each line not hitting the same line as functions, show 409 line periods. 410 """ 411 logging.info('annotate file %s' % from_path) 412 with open(from_path, 'r') as rf: 413 lines = rf.readlines() 414 415 annotates = {} 416 for line in file_period.line_dict.keys(): 417 annotates[line] = self._get_period_str(file_period.line_dict[line]) 418 for func_name in file_period.function_dict.keys(): 419 func_start_line, period = file_period.function_dict[func_name] 420 if func_start_line == -1: 421 continue 422 line = func_start_line - 1 if is_java else func_start_line 423 annotates[line] = '[func] ' + self._get_period_str(period) 424 annotates[1] = '[file] ' + self._get_period_str(file_period.period) 425 426 max_annotate_cols = 0 427 for key in annotates: 428 max_annotate_cols = max(max_annotate_cols, len(annotates[key])) 429 430 empty_annotate = ' ' * (max_annotate_cols + 6) 431 432 dirname = os.path.dirname(to_path) 433 if not os.path.isdir(dirname): 434 os.makedirs(dirname) 435 with open(to_path, 'w') as wf: 436 for line in range(1, len(lines) + 1): 437 annotate = annotates.get(line) 438 if annotate is None: 439 if not lines[line-1].strip(): 440 annotate = '' 441 else: 442 annotate = empty_annotate 443 else: 444 annotate = '/* ' + annotate + ( 445 ' ' * (max_annotate_cols - len(annotate))) + ' */' 446 wf.write(annotate) 447 wf.write(lines[line-1]) 448 449 450def main(): 451 parser = BaseArgumentParser(description=""" 452 Annotate source files based on profiling data. It reads line information from binary_cache 453 generated by app_profiler.py or binary_cache_builder.py, and generate annotated source 454 files in annotated_files directory.""") 455 parser.add_argument('-i', '--perf_data_list', nargs='+', action='append', help=""" 456 The paths of profiling data. Default is perf.data.""") 457 parser.add_argument('-s', '--source_dirs', type=extant_dir, nargs='+', action='append', help=""" 458 Directories to find source files.""") 459 parser.add_argument('--ndk_path', type=extant_dir, help='Set the path of a ndk release.') 460 parser.add_argument('--raw-period', action='store_true', 461 help='show raw period instead of percentage') 462 parser.add_argument('--summary-width', type=int, default=80, help='max width of summary file') 463 sample_filter_group = parser.add_argument_group('Sample filter options') 464 sample_filter_group.add_argument('--dso', nargs='+', action='append', help=""" 465 Use samples only in selected binaries.""") 466 parser.add_report_lib_options(sample_filter_group=sample_filter_group) 467 468 args = parser.parse_args() 469 config = {} 470 config['perf_data_list'] = flatten_arg_list(args.perf_data_list) 471 if not config['perf_data_list']: 472 config['perf_data_list'].append('perf.data') 473 config['source_dirs'] = flatten_arg_list(args.source_dirs) 474 config['dso_filters'] = flatten_arg_list(args.dso) 475 config['ndk_path'] = args.ndk_path 476 config['raw_period'] = args.raw_period 477 config['summary_width'] = args.summary_width 478 config['report_lib_options'] = args.report_lib_options 479 480 annotator = SourceFileAnnotator(config) 481 annotator.annotate() 482 logging.info('annotate finish successfully, please check result in annotated_files/.') 483 484 485if __name__ == '__main__': 486 main() 487