1#! /usr/bin/env python3
2#
3# Copyright 2023, The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#     http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17import argparse
18from collections import defaultdict
19from enum import Enum
20import os
21import re
22
23
24class SortType(Enum):
25  NONE = 'none'
26  SIMPLE = 'simple'
27  OPT_NEIGHBOURS = 'opt_neighbours'
28
29
30def merge_same_procnames(entries):
31  path_regex = r'(.+)_(\d+).txt'
32  prog = re.compile(path_regex)
33
34  merged_entries = defaultdict(set)
35  for path, objs in entries:
36    basename = os.path.basename(path)
37    m = prog.match(basename)
38    if m:
39      merged_entries[m.group(1)].update(objs)
40
41  return sorted(merged_entries.items(), key=lambda x: len(x[1]))
42
43
44def opt_neighbours(sort_keys):
45  sort_keys = dict(sort_keys)
46  res = list()
47
48  # Start with a bin with the lowest process and objects count.
49  cur_key = min(
50      sort_keys.items(), key=lambda item: (item[0].bit_count(), len(item[1]))
51  )[0]
52  res.append((cur_key, sort_keys[cur_key]))
53  del sort_keys[cur_key]
54
55  # Find next most similar sort key and update the result.
56  while sort_keys:
57
58    def jaccard_index(x):
59      return (x & cur_key).bit_count() / (x | cur_key).bit_count()
60
61    next_key = max(sort_keys.keys(), key=jaccard_index)
62    res.append((next_key, sort_keys[next_key]))
63    del sort_keys[next_key]
64    cur_key = next_key
65  return res
66
67
68def process_dirty_entries(entries, sort_type):
69  dirty_image_objects = []
70
71  union = set()
72  for k, v in entries:
73    union = union.union(v)
74
75  if sort_type == SortType.NONE:
76    dirty_obj_lines = [obj + '\n' for obj in sorted(union)]
77    return (dirty_obj_lines, dict())
78
79  # sort_key -> [objs]
80  sort_keys = defaultdict(list)
81  for obj in union:
82    sort_key = 0
83    # Nth bit of sort_key is set if this object is dirty in Nth process.
84    for idx, (k, v) in enumerate(entries):
85      if obj in v:
86        sort_key = (sort_key << 1) | 1
87      else:
88        sort_key = sort_key << 1
89
90    sort_keys[sort_key].append(obj)
91
92  sort_keys = sorted(sort_keys.items())
93
94  if sort_type == SortType.OPT_NEIGHBOURS:
95    sort_keys = opt_neighbours(sort_keys)
96
97  dirty_obj_lines = list()
98  for idx, (_, objs) in enumerate(sort_keys):
99    for obj in objs:
100      dirty_obj_lines.append(obj + ' ' + str(idx) + '\n')
101
102  return (dirty_obj_lines, sort_keys)
103
104
105def main():
106  parser = argparse.ArgumentParser(
107      description=(
108          'Create dirty-image-objects file from specified imgdiag output files.'
109      ),
110      formatter_class=argparse.ArgumentDefaultsHelpFormatter,
111  )
112  parser.add_argument(
113      'imgdiag_files',
114      nargs='+',
115      help='imgdiag files to use.',
116  )
117  parser.add_argument(
118      '--sort-type',
119      choices=[e.value for e in SortType],
120      default=SortType.OPT_NEIGHBOURS.value,
121      help=(
122          'Object sorting type. "simple" puts objects with the same usage'
123          ' pattern in the same bins. "opt_neighbours" also tries to put bins'
124          ' with similar usage patterns close to each other.'
125      ),
126  )
127  parser.add_argument(
128      '--merge-same-procnames',
129      action=argparse.BooleanOptionalAction,
130      default=False,
131      help=(
132          'Merge dirty objects from files with the same process name (different'
133          ' pid). Files are expected to end with "_{pid}.txt"'
134      ),
135  )
136  parser.add_argument(
137      '--output-filename',
138      default='dirty-image-objects.txt',
139      help='Output file for dirty image objects.',
140  )
141  parser.add_argument(
142      '--print-stats',
143      action=argparse.BooleanOptionalAction,
144      default=False,
145      help='Print dirty object stats.',
146  )
147
148  args = parser.parse_args()
149
150  entries = list()
151  for path in args.imgdiag_files:
152    with open(path) as f:
153      lines = f.readlines()
154    prefix = 'dirty_obj: '
155    lines = [l.strip().removeprefix(prefix) for l in lines if prefix in l]
156    entries.append((path, set(lines)))
157
158  entries = sorted(entries, key=lambda x: len(x[1]))
159
160  if args.merge_same_procnames:
161    entries = merge_same_procnames(entries)
162
163  print('Using processes:')
164  for k, v in entries:
165    print(f'{k}: {len(v)}')
166  print()
167
168  dirty_image_objects, sort_keys = process_dirty_entries(
169      entries=entries, sort_type=SortType(args.sort_type)
170  )
171
172  with open(args.output_filename, 'w') as f:
173    f.writelines(dirty_image_objects)
174
175  if args.print_stats:
176    print(','.join(k for k, v in entries), ',obj_count')
177    total_count = 0
178    for sort_key, objs in sort_keys:
179      bits_csv = ','.join(
180          '{sort_key:0{width}b}'.format(sort_key=sort_key, width=len(entries))
181      )
182      print(bits_csv, ',', len(objs))
183      total_count += len(objs)
184    print('total: ', total_count)
185
186
187if __name__ == '__main__':
188  main()
189