1#!/usr/bin/env python3
2#
3# Copyright (C) 2022 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#   http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License."""
16
17import csv
18import pathlib
19import subprocess
20from diffs.diff import Diff, ExtractInfo
21
22
23class BloatyDiff(Diff):
24  """BloatyDiff compares the sizes of symbols present in cc objects
25
26  Bloaty McBloatface (bloaty) is used to discover size differences in object
27  files or cc binaries. This diff returns a list of symbols which are new or
28  larger in one file than the other.
29
30  The output does not distinguish between new symbols and ones that are simply
31  larger, so this output is best combined with the NmSymbolDiff to see which
32  symbols are new.
33
34  Example bloaty output (note: compileunits may not always be available):
35  $ bloaty --csv -d compileunits,symbols $BAZEL_OBJ -- $LEGACY_OBJ
36  compileunits,symbols,vmsize,filesize
37  external/zstd/lib/compress/zstd_fast.c,ZSTD_compressBlock_doubleFast_extDict_generic,6240,6344
38  external/zstd/lib/compress/zstd_fast.c,ZSTD_compressBlock_lazy_dictMatchState,-3428,-3551
39
40  The first entry is a symbol that is larger in the Bazel version of the binary,
41  and the second entry is a symbol that is larger in the Soong version of the
42  binary.
43  """
44  def __init__(self, tool_name, data_source, has_debug_symbols=False):
45    self.tool_name = tool_name
46    self.data_source = data_source
47    self.has_debug_symbols = has_debug_symbols
48
49  def _print_diff_row(self, row, ignore_keys):
50    attrs = sorted({
51      k: v
52      for k, v in row.items()
53      if k not in ignore_keys
54    }.items())
55    return row[self.data_source] + ": { " + ", ".join(f"{a[0]}: {a[1]}" for a in attrs) + " }"
56
57  def _collect_diff_compileunits(self, diffreader: csv.DictReader):
58    # maps from compileunit to list of diff rows
59    left_bigger = collections.defaultdict(list)
60    right_bigger = collections.defaultdict(list)
61
62    for row in diffreader:
63      compileunit = row["compileunits"]
64      if len(compileunit) > 0 and compileunit[0] == "[":
65        continue
66      filesize = row["filesize"]
67      if int(filesize) < 0:
68        left_bigger[compileunit].append(row)
69      elif int(filesize) > 0:
70        right_bigger[compileunit].append(row)
71
72    def print_diff_dict(dict):
73      lines = []
74      for compileunit, data in sorted(dict.items()):
75        lines.append("\t" + compileunit + ":")
76        rows = []
77        for row in data:
78          if row[self.data_source] and row[self.data_source][0] == "[":
79            continue
80          rows.append("\t\t" + self.print_diff_row(row, ignore_keys=[self.data_source, "compileunits"]))
81        lines.extend(sorted(rows))
82      return "\n".join(lines)
83
84    return print_diff_dict(left_bigger), print_diff_dict(right_bigger)
85
86  def _collect_diff(self, diffreader):
87    left_bigger = []
88    right_bigger = []
89
90    for row in diffreader:
91      filesize = row["filesize"]
92      if int(filesize) > 0:
93        left_bigger.append(row)
94      elif int(filesize) < 0:
95        right_bigger.append(row)
96
97    left_errors = "\n".join(["\t" + self._print_diff_row(row, ignore_keys=[self.data_source]) for row in left_bigger])
98    right_errors = "\n".join(["\t" + self._print_diff_row(row, ignore_keys=[self.data_source]) for row in right_bigger])
99    return left_errors, right_errors
100
101  def diff(self, left_path: pathlib.Path, right_path: pathlib.Path) -> list[str]:
102    try:
103      diff_csv = subprocess.run(["bloaty",
104                                  "--csv",
105                                  "-n", "0",
106                                  "-w",
107                                  "-d",
108                                  self.data_source + (",compileunits" if self.has_debug_symbols else ""),
109                                  str(left_path),
110                                  "--",
111                                  str(right_path)],
112                                 check=True, capture_output=True,
113                                 encoding="utf-8").stdout.splitlines()
114    except subprocess.CalledProcessError as e:
115      print("ERROR: bloaty tool returned non-zero exit status")
116      if self.has_debug_symbols:
117        print("ERROR: do objects contain debug symbols?")
118      raise e
119
120    diffreader = csv.DictReader(diff_csv)
121
122    if self.has_debug_symbols:
123      left_bigger, right_bigger = self._collect_diff_compileunits(diffreader)
124    else:
125      left_bigger, right_bigger = self._collect_diff(diffreader)
126
127    errors = []
128    if left_bigger:
129      errors.append(f"the following {self.data_source} are either unique or larger in\n{left_path}\n than those in\n{right_path}:\n{left_bigger}")
130    if right_bigger:
131      errors.append(f"the following {self.data_source} are either unique or larger in\n{right_path}\n than those in\n{left_path}:\n{right_bigger}")
132
133    return errors
134