1#!/usr/bin/env python3 2 3# Copyright (C) 2022 The Android Open Source Project 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16import dataclasses 17import datetime 18import glob 19import json 20import logging 21import os 22import pathlib 23import re 24import shutil 25import subprocess 26import textwrap 27from pathlib import Path 28from typing import Iterable 29 30from bp2build_metrics_proto.bp2build_metrics_pb2 import Bp2BuildMetrics 31from metrics_proto.metrics_pb2 import MetricsBase 32from metrics_proto.metrics_pb2 import PerfInfo 33from metrics_proto.metrics_pb2 import SoongBuildMetrics 34 35import util 36 37 38@dataclasses.dataclass 39class PerfInfoOrEvent: 40 """ 41 A duck-typed union of `soong_build_metrics.PerfInfo` and 42 `soong_build_bp2build_metrics.Event` protobuf message types 43 """ 44 45 id: str 46 real_time: datetime.timedelta 47 start_time: datetime.datetime 48 49 def __post_init__(self): 50 if isinstance(self.real_time, int): 51 self.real_time = datetime.timedelta(microseconds=self.real_time / 1000) 52 if isinstance(self.start_time, int): 53 epoch = datetime.datetime(1970, 1, 1, tzinfo=datetime.timezone.utc) 54 self.start_time = epoch + datetime.timedelta( 55 microseconds=self.start_time / 1000 56 ) 57 58 59BP2BUILD_PB = "bp2build_metrics.pb" 60BUILD_TRACE_GZ = "build.trace.gz" 61CRITICAL_PATH = "soong.log" 62SOONG_BUILD_PB = "soong_build_metrics.pb" 63SOONG_PB = "soong_metrics" 64 65def _convert_pprof_to_human_readable_format(pprof: Path, output_type: str = 'pdf'): 66 output = pprof.with_suffix("." + output_type).name 67 subprocess.run( 68 f"go tool pprof -{output_type} -output {output} {pprof.name}", 69 shell=True, 70 cwd=pprof.parent 71 ) 72 73def _archive_pprof(envvar: str, d:Path): 74 if envvar not in os.environ: 75 return 76 pprof_prefix = pathlib.Path(os.environ[envvar]) 77 if not pprof_prefix.is_absolute(): 78 logging.warning( 79 "Ignoring pprof files; please use an absolute path, e.g. " 80 f"{envvar}={util.get_out_dir().joinpath('pprof')}" 81 ) 82 return 83 for profile in os.listdir(str(pprof_prefix.parent)): 84 if profile.startswith(f"{pprof_prefix.name}."): 85 shutil.move(pprof_prefix.with_name(profile), d) 86 _convert_pprof_to_human_readable_format(d.joinpath(profile)) 87 88 89def archive_run(d: Path, build_info: util.BuildInfo): 90 with open(d.joinpath(util.BUILD_INFO_JSON), "w") as f: 91 json.dump(build_info, f, indent=True, cls=util.CustomEncoder) 92 metrics_to_copy = [ 93 BP2BUILD_PB, 94 BUILD_TRACE_GZ, 95 CRITICAL_PATH, 96 SOONG_BUILD_PB, 97 SOONG_PB, 98 ] 99 for metric_name in metrics_to_copy: 100 metric = util.get_out_dir().joinpath(metric_name) 101 if metric.exists(): 102 shutil.copy(metric, d.joinpath(metric_name)) 103 _archive_pprof("SOONG_PROFILE_CPU", d) 104 _archive_pprof("SOONG_PROFILE_MEM", d) 105 106 107def read_pbs(d: Path) -> tuple[dict[str, any], list[PerfInfoOrEvent]]: 108 """ 109 Reads metrics data from pb files and archives the file by copying 110 them under the log_dir. 111 Soong_build event names may contain "mixed_build" event. To normalize the 112 event names between mixed builds and soong-only build, convert 113 `soong_build/soong_build.xyz` and `soong_build/soong_build.mixed_build.xyz` 114 both to simply `soong_build/*.xyz` 115 """ 116 soong_pb = d.joinpath(SOONG_PB) 117 soong_build_pb = d.joinpath(SOONG_BUILD_PB) 118 bp2build_pb = d.joinpath(BP2BUILD_PB) 119 120 events: list[PerfInfoOrEvent] = [] 121 122 def gen_id(name: str, desc: str) -> str: 123 # Bp2BuildMetrics#Event doesn't have description 124 normalized = re.sub(r"^(?:soong_build|mixed_build)", "*", desc) 125 return f"{name}/{normalized}" 126 127 def extract_perf_info(root_obj): 128 for field_name in dir(root_obj): 129 if field_name.startswith("__"): 130 continue 131 field_value = getattr(root_obj, field_name) 132 if isinstance(field_value, Iterable): 133 for item in field_value: 134 if not isinstance(item, PerfInfo): 135 break 136 events.append( 137 PerfInfoOrEvent( 138 gen_id(item.name, item.description), 139 item.real_time, 140 item.start_time, 141 ) 142 ) 143 144 if soong_pb.exists(): 145 metrics_base = MetricsBase() 146 with open(soong_pb, "rb") as f: 147 metrics_base.ParseFromString(f.read()) 148 extract_perf_info(metrics_base) 149 150 soong_build_metrics = SoongBuildMetrics() 151 if soong_build_pb.exists(): 152 with open(soong_build_pb, "rb") as f: 153 soong_build_metrics.ParseFromString(f.read()) 154 extract_perf_info(soong_build_metrics) 155 156 if bp2build_pb.exists(): 157 bp2build_metrics = Bp2BuildMetrics() 158 with open(bp2build_pb, "rb") as f: 159 bp2build_metrics.ParseFromString(f.read()) 160 for event in bp2build_metrics.events: 161 events.append( 162 PerfInfoOrEvent(event.name, event.real_time, event.start_time) 163 ) 164 165 events.sort(key=lambda e: e.start_time) 166 167 retval = {} 168 if soong_build_metrics.mixed_builds_info: 169 ms = soong_build_metrics.mixed_builds_info.mixed_build_enabled_modules 170 retval["modules"] = soong_build_metrics.modules 171 retval["variants"] = soong_build_metrics.variants 172 if ms: 173 retval["mixed.enabled"] = len(ms) 174 with open(d.joinpath("mixed.enabled.txt"), "w") as f: 175 for m in ms: 176 print(m, file=f) 177 ms = soong_build_metrics.mixed_builds_info.mixed_build_disabled_modules 178 if ms: 179 retval["mixed.disabled"] = len(ms) 180 if bp2build_pb.exists(): 181 retval["generatedModuleCount"] = bp2build_metrics.generatedModuleCount 182 retval["unconvertedModuleCount"] = bp2build_metrics.unconvertedModuleCount 183 return retval, events 184 185 186Row = dict[str, any] 187 188 189def _get_column_headers(rows: list[Row], allow_cycles: bool) -> list[str]: 190 """ 191 Basically a topological sort or column headers. For each Row, the column order 192 can be thought of as a partial view of a chain of events in chronological 193 order. It's a partial view because not all events may have needed to occur for 194 a build. 195 """ 196 197 @dataclasses.dataclass 198 class Column: 199 header: str 200 indegree: int 201 nexts: set[str] 202 203 def __str__(self): 204 return f"#{self.indegree}->{self.header}->{self.nexts}" 205 206 def dfs(self, target: str, visited: set[str] = None) -> list[str]: 207 if not visited: 208 visited = set() 209 if target == self.header and self.header in visited: 210 return [self.header] 211 for n in self.nexts: 212 if n in visited: 213 continue 214 visited.add(n) 215 next_col = all_cols[n] 216 path = next_col.dfs(target, visited) 217 if path: 218 return [self.header, *path] 219 return [] 220 221 all_cols: dict[str, Column] = {} 222 for row in rows: 223 prev_col = None 224 for col in row: 225 if col not in all_cols: 226 column = Column(col, 0, set()) 227 all_cols[col] = column 228 if prev_col is not None and col not in prev_col.nexts: 229 all_cols[col].indegree += 1 230 prev_col.nexts.add(col) 231 prev_col = all_cols[col] 232 233 acc = [] 234 entries = [c for c in all_cols.values()] 235 while len(entries) > 0: 236 # sorting alphabetically to break ties for concurrent events 237 entries.sort(key=lambda c: c.header, reverse=True) 238 entries.sort(key=lambda c: c.indegree, reverse=True) 239 entry = entries.pop() 240 # take only one to maintain alphabetical sort 241 if entry.indegree != 0: 242 cycle = "->".join(entry.dfs(entry.header)) 243 s = f"event ordering has a cycle {cycle}" 244 logging.debug(s) 245 if not allow_cycles: 246 raise ValueError(s) 247 acc.append(entry.header) 248 for n in entry.nexts: 249 n = all_cols.get(n) 250 if n is not None: 251 n.indegree -= 1 252 else: 253 if not allow_cycles: 254 raise ValueError(f"unexpected error for: {n}") 255 return acc 256 257 258def get_build_info(d: Path) -> dict[str, any]: 259 build_info_json = d.joinpath(util.BUILD_INFO_JSON) 260 if not build_info_json.exists(): 261 return {} 262 with open(build_info_json, "r") as f: 263 logging.debug("reading %s", build_info_json) 264 build_info = json.load(f) 265 return build_info 266 267 268def _get_prefix_headers(prefix_rows: list[Row]) -> list[str]: 269 prefix_headers = [] 270 seen: set[str] = set() 271 for prefix_row in prefix_rows: 272 for prefix_header in prefix_row.keys(): 273 if prefix_header not in seen: 274 prefix_headers.append(prefix_header) 275 seen.add(prefix_header) 276 return prefix_headers 277 278 279def tabulate_metrics_csv(log_dir: Path): 280 prefix_rows: list[Row] = [] 281 rows: list[Row] = [] 282 dirs = glob.glob(f"{util.RUN_DIR_PREFIX}*", root_dir=log_dir) 283 dirs.sort(key=lambda x: int(x[1 + len(util.RUN_DIR_PREFIX) :])) 284 for d in dirs: 285 d = log_dir.joinpath(d) 286 prefix_row = get_build_info(d) 287 prefix_row["log"] = d.name 288 prefix_row["targets"] = " ".join(prefix_row.get("targets", [])) 289 extra, events = read_pbs(d) 290 prefix_row = prefix_row | extra 291 row = {e.id: util.hhmmss(e.real_time) for e in events} 292 prefix_rows.append(prefix_row) 293 rows.append(row) 294 295 prefix_headers: list[str] = _get_prefix_headers(prefix_rows) 296 headers: list[str] = _get_column_headers(rows, allow_cycles=True) 297 298 def getcols(r, keys): 299 return [str(r.get(col, "")) for col in keys] 300 301 lines = [",".join(prefix_headers + headers)] 302 for i in range(len(rows)): 303 cols = getcols(prefix_rows[i], prefix_headers) + getcols(rows[i], headers) 304 lines.append(",".join(cols)) 305 306 with open(log_dir.joinpath(util.METRICS_TABLE), mode="wt") as f: 307 f.writelines(f"{line}\n" for line in lines) 308 309 310def display_tabulated_metrics(log_dir: Path, ci_mode: bool): 311 cmd_str = util.get_cmd_to_display_tabulated_metrics(log_dir, ci_mode) 312 output = subprocess.check_output(cmd_str, shell=True, text=True) 313 logging.info( 314 textwrap.dedent( 315 f"""\ 316 %s 317 %s 318 TIP to view column headers: 319 %s 320 """ 321 ), 322 cmd_str, 323 output, 324 util.get_csv_columns_cmd(log_dir), 325 ) 326