1#!/usr/bin/env python3
2# Copyright 2019, The Android Open Source Project
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8#     http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15
16"""Atest indexing module."""
17
18from __future__ import annotations
19from __future__ import print_function
20
21from dataclasses import dataclass
22import functools
23import logging
24import os
25from pathlib import Path
26import pickle
27import shutil
28import subprocess
29import sys
30import tempfile
31import time
32from typing import List
33
34from atest import atest_utils as au
35from atest import atest_utils
36from atest import constants
37from atest.atest_enum import DetectType
38from atest.metrics import metrics, metrics_utils
39
40UPDATEDB = 'updatedb'
41LOCATE = 'locate'
42# The list was generated by command:
43# find `gettop` -type d -wholename `gettop`/out -prune  -o -type d -name '.*'
44# -print | awk -F/ '{{print $NF}}'| sort -u
45PRUNENAMES = [
46    '.abc',
47    '.appveyor',
48    '.azure-pipelines',
49    '.bazelci',
50    '.build-id',
51    '.buildkite',
52    '.buildscript',
53    '.cargo',
54    '.ci',
55    '.circleci',
56    '.clusterfuzzlite',
57    '.conan',
58    '.devcontainer',
59    '.dwz',
60    '.externalToolBuilders',
61    '.git',
62    '.githooks',
63    '.github',
64    '.gitlab',
65    '.gitlab-ci',
66    '.google',
67    '.hidden',
68    '.idea',
69    '.intermediates',
70    '.jenkins',
71    '.kokoro',
72    '.libs_cffi_backend',
73    '.more',
74    '.mvn',
75    '.prebuilt_info',
76    '.private',
77    '__pycache__',
78    '.repo',
79    '.settings',
80    '.static',
81    '.svn',
82    '.test',
83    '.travis',
84    '.travis_scripts',
85    '.tx',
86    '.vscode',
87]
88PRUNEPATHS = ['prebuilts']
89
90
91def debug_log(func):
92  """Decorator for logging with debug mode."""
93
94  @functools.wraps(func)
95  def wrapper(*args, **kwargs):
96    logging.debug('Running %s...', {func.__name__})
97    func(*args, **kwargs)
98    logging.debug('%s done.', {func.__name__})
99
100  return wrapper
101
102
103def run_updatedb(output_cache: Path, prunepaths: List[str] = None):
104  """Run updatedb and generate cache in $ANDROID_HOST_OUT/indices/plocate.db
105
106  Args:
107      output_cache: The file path of the updatedb cache.
108      prunepaths: a list of paths that are relative to the build top.
109  """
110  search_root = str(au.get_build_top())
111  prunepaths = prunepaths if prunepaths else PRUNEPATHS
112  prunepaths = [os.path.join(search_root, p) for p in prunepaths]
113  prunepaths.append(str(au.get_build_out_dir()))
114  updatedb_cmd = [UPDATEDB, '-l0']
115  updatedb_cmd.append('-U%s' % search_root)
116  updatedb_cmd.append('-n%s' % ' '.join(PRUNENAMES))
117  updatedb_cmd.append('-o%s' % output_cache)
118  # (b/206866627) /etc/updatedb.conf excludes /mnt from scanning on Linux.
119  # Use --prunepaths to override the default configuration.
120  updatedb_cmd.append('--prunepaths')
121  updatedb_cmd.append(' '.join(prunepaths))
122  # Support scanning bind mounts as well.
123  updatedb_cmd.extend(['--prune-bind-mounts', 'no'])
124
125  logging.debug('Running updatedb... ')
126  try:
127    full_env_vars = os.environ.copy()
128    logging.debug('Executing: %s', updatedb_cmd)
129    result = subprocess.run(updatedb_cmd, env=full_env_vars, check=True)
130  except (KeyboardInterrupt, SystemExit):
131    atest_utils.print_and_log_error('Process interrupted or failure.')
132  # Delete indices when plocate.db is locked() or other CalledProcessError.
133  # (b/141588997)
134  except subprocess.CalledProcessError as err:
135    atest_utils.print_and_log_error(
136        'Executing %s error.', ' '.join(updatedb_cmd)
137    )
138    metrics_utils.handle_exc_and_send_exit_event(constants.PLOCATEDB_LOCKED)
139    if err.output:
140      atest_utils.print_and_log_error(err.output)
141    output_cache.unlink()
142
143  return result.returncode == 0
144
145
146def _dump_index(dump_file, output, output_re, key, value):
147  """Dump indexed data with pickle.
148
149  Args:
150      dump_file: A string of absolute path of the index file.
151      output: A string generated by locate and grep.
152      output_re: An regex which is used for grouping patterns.
153      key: A string for dictionary key, e.g. classname, package, cc_class, etc.
154      value: A set of path.
155
156  The data structure will be like:
157  {
158    'Foo': {'/path/to/Foo.java', '/path2/to/Foo.kt'},
159    'Boo': {'/path3/to/Boo.java'}
160  }
161  """
162  _dict = {}
163  with tempfile.NamedTemporaryFile() as temp_file:
164    with open(temp_file.name, 'wb') as cache_file:
165      if isinstance(output, bytes):
166        output = output.decode()
167      for entry in output.splitlines():
168        match = output_re.match(entry)
169        if match:
170          _dict.setdefault(match.group(key), set()).add(match.group(value))
171      try:
172        pickle.dump(_dict, cache_file, protocol=2)
173      except IOError:
174        atest_utils.print_and_log_error('Failed in dumping %s', dump_file)
175    shutil.copy(temp_file.name, dump_file)
176
177
178# pylint: disable=anomalous-backslash-in-string
179def get_cc_result(indices: Indices):
180  """Search all testable cc/cpp and grep TEST(), TEST_F() or TEST_P().
181
182  After searching cc/cpp files, index corresponding data types in parallel.
183
184  Args:
185      indices: an Indices object.
186  """
187  find_cc_cmd = (
188      f"{LOCATE} -id{indices.locate_db} --regex '/*.test.*\.(cc|cpp)$'"
189      f"| xargs egrep -sH '{constants.CC_GREP_RE}' 2>/dev/null || true"
190  )
191  logging.debug('Probing CC classes:\n %s', find_cc_cmd)
192  result = subprocess.getoutput(find_cc_cmd)
193
194  au.start_threading(
195      target=_index_cc_classes, args=[result, indices.cc_classes_idx]
196  )
197
198
199# pylint: disable=anomalous-backslash-in-string
200def get_java_result(indices: Indices):
201  """Search all testable java/kt and grep package.
202
203  After searching java/kt files, index corresponding data types in parallel.
204
205  Args:
206      indices: an Indices object.
207  """
208  package_grep_re = r'^\s*package\s+[a-z][[:alnum:]]+[^{]'
209  find_java_cmd = (
210      f"{LOCATE} -id{indices.locate_db} --regex '/*.test.*\.(java|kt)$' "
211      # (b/204398677) suppress stderr when indexing target terminated.
212      f"| xargs egrep -sH '{package_grep_re}' 2>/dev/null|| true"
213  )
214  logging.debug('Probing Java classes:\n %s', find_java_cmd)
215  result = subprocess.getoutput(find_java_cmd)
216
217  au.start_threading(
218      target=_index_java_classes, args=[result, indices.classes_idx]
219  )
220  au.start_threading(
221      target=_index_qualified_classes, args=[result, indices.fqcn_idx]
222  )
223  au.start_threading(
224      target=_index_packages, args=[result, indices.packages_idx]
225  )
226
227
228@debug_log
229def _index_cc_classes(output, index):
230  """Index CC classes.
231
232  The data structure is like:
233  {
234    'FooTestCase': {'/path1/to/the/FooTestCase.cpp',
235                    '/path2/to/the/FooTestCase.cc'}
236  }
237
238  Args:
239      output: A string object generated by get_cc_result().
240      index: A string path of the index file.
241  """
242  _dump_index(
243      dump_file=index,
244      output=output,
245      output_re=constants.CC_OUTPUT_RE,
246      key='test_name',
247      value='file_path',
248  )
249
250
251@debug_log
252def _index_java_classes(output, index):
253  """Index Java classes.
254
255  The data structure is like: {
256
257      'FooTestCase': {'/path1/to/the/FooTestCase.java',
258                      '/path2/to/the/FooTestCase.kt'}
259  }
260
261  Args:
262      output: A string object generated by get_java_result().
263      index: A string path of the index file.
264  """
265  _dump_index(
266      dump_file=index,
267      output=output,
268      output_re=constants.CLASS_OUTPUT_RE,
269      key='class',
270      value='java_path',
271  )
272
273
274@debug_log
275def _index_packages(output, index):
276  """Index Java packages.
277
278  The data structure is like: {
279
280      'a.b.c.d': {'/path1/to/a/b/c/d/',
281                  '/path2/to/a/b/c/d/'
282  }
283
284  Args:
285      output: A string object generated by get_java_result().
286      index: A string path of the index file.
287  """
288  _dump_index(
289      dump_file=index,
290      output=output,
291      output_re=constants.PACKAGE_OUTPUT_RE,
292      key='package',
293      value='java_dir',
294  )
295
296
297@debug_log
298def _index_qualified_classes(output, index):
299  """Index Fully Qualified Java Classes(FQCN).
300
301  The data structure is like: {
302
303      'a.b.c.d.FooTestCase': {'/path1/to/a/b/c/d/FooTestCase.java',
304                              '/path2/to/a/b/c/d/FooTestCase.kt'}
305  }
306
307  Args:
308      output: A string object generated by get_java_result().
309      index: A string path of the index file.
310  """
311  _dict = {}
312  with tempfile.NamedTemporaryFile() as temp_file:
313    with open(temp_file.name, 'wb') as cache_file:
314      if isinstance(output, bytes):
315        output = output.decode()
316      for entry in output.split('\n'):
317        match = constants.QCLASS_OUTPUT_RE.match(entry)
318        if match:
319          fqcn = match.group('package') + '.' + match.group('class')
320          _dict.setdefault(fqcn, set()).add(match.group('java_path'))
321      try:
322        pickle.dump(_dict, cache_file, protocol=2)
323      except (KeyboardInterrupt, SystemExit):
324        atest_utils.print_and_log_error('Process interrupted or failure.')
325      except IOError:
326        atest_utils.print_and_log_error('Failed in dumping %s', index)
327    shutil.copy(temp_file.name, index)
328
329
330def index_targets():
331  """The entrypoint of indexing targets.
332
333  Utilise plocate database to index reference types of CLASS, CC_CLASS,
334  PACKAGE and QUALIFIED_CLASS.
335  """
336  start = time.time()
337  unavailable_cmds = [
338      cmd for cmd in [UPDATEDB, LOCATE] if not au.has_command(cmd)
339  ]
340  if unavailable_cmds:
341    logging.debug(
342        'command %s is unavailable; skip indexing...',
343        ' '.join(unavailable_cmds),
344    )
345    return None
346
347  indices = Indices()
348  output_cache = indices.locate_db
349  get_num_cmd = f'{LOCATE} -d{output_cache} --count /'
350  pre_number = 0
351  if output_cache.exists():
352    ret, pre_number = subprocess.getstatusoutput(get_num_cmd)
353    if ret != 0:
354      logging.debug('Found a broken db: %s', output_cache)
355      pre_number = sys.maxsize
356
357  if run_updatedb(output_cache):
358    if not indices.has_all_indices():
359      logging.debug('Missing essential indices; will re-index targets.')
360      return _index_targets(indices, start)
361
362    # (b/206886222) The checksum and plocate.db file size are not indicators
363    # to determining whether the source tree had changed. Therefore, when
364    # fulfilling the following conditions, Atest will trigger indexing:
365    #  1. different file numbers in current and previous plocate.db.
366    same_number_of_files = pre_number == subprocess.getoutput(get_num_cmd)
367    if not same_number_of_files:
368      logging.debug('Found file number changed; will re-index targets.')
369      return _index_targets(indices, start)
370
371    #  2. had issued `repo sync` before running atest.
372    checksum_file = au.get_index_path('repo_sync.md5')
373    repo_syncd = not au.check_md5(checksum_file, missing_ok=False)
374    if repo_syncd:
375      logging.debug('Found repo syncd; will re-index targets.')
376      repo_file = au.get_build_top('.repo/.repo_fetchtimes.json')
377      au.start_threading(target=au.save_md5, args=[[repo_file], checksum_file])
378      return _index_targets(indices, start)
379    logging.debug('Indices remains the same. Ignore indexing...')
380  else:
381    atest_utils.print_and_log_warning(
382        'Unable to run %s. Search targets will be very slow.', output_cache
383    )
384  return None
385
386
387def _index_targets(indices: Indices, start_from: float):
388  """The actual index_targets function."""
389  logging.debug('Indexing targets... ')
390  proc_java = au.start_threading(target=get_java_result, args=[indices])
391  proc_cc = au.start_threading(target=get_cc_result, args=[indices])
392  proc_java.join()
393  proc_cc.join()
394  elapsed_time = time.time() - start_from
395  logging.debug('Indexing targets took %ss', elapsed_time)
396  metrics.LocalDetectEvent(
397      detect_type=DetectType.INDEX_TARGETS_MS, result=int(elapsed_time * 1000)
398  )
399
400
401@dataclass
402class Indices:
403  """Class that stores index files."""
404
405  locate_db: Path
406  classes_idx: Path
407  cc_classes_idx: Path
408  packages_idx: Path
409  fqcn_idx: Path
410
411  def __init__(self):
412    """initiation of Indices object."""
413    self.locate_db = au.get_index_path('plocate.db')
414    self.classes_idx = au.get_index_path('classes.idx')
415    self.cc_classes_idx = au.get_index_path('cc_classes.idx')
416    self.packages_idx = au.get_index_path('packages.idx')
417    self.fqcn_idx = au.get_index_path('fqcn.idx')
418    au.get_index_path().mkdir(parents=True, exist_ok=True)
419
420  def has_all_indices(self):
421    """Whether all indices files exist."""
422    exists = [
423        self.locate_db.exists(),
424        self.classes_idx.exists(),
425        self.cc_classes_idx.exists(),
426        self.packages_idx.exists(),
427        self.fqcn_idx.exists(),
428    ]
429    if not all(exists):
430      logging.debug("Some index file doesn't exist: %s", exists)
431    return all(exists)
432