1#!/usr/bin/env python3 2# Copyright 2019, The Android Open Source Project 3# 4# Licensed under the Apache License, Version 2.0 (the "License"); 5# you may not use this file except in compliance with the License. 6# You may obtain a copy of the License at 7# 8# http://www.apache.org/licenses/LICENSE-2.0 9# 10# Unless required by applicable law or agreed to in writing, software 11# distributed under the License is distributed on an "AS IS" BASIS, 12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13# See the License for the specific language governing permissions and 14# limitations under the License. 15 16"""Atest indexing module.""" 17 18from __future__ import annotations 19from __future__ import print_function 20 21from dataclasses import dataclass 22import functools 23import logging 24import os 25from pathlib import Path 26import pickle 27import shutil 28import subprocess 29import sys 30import tempfile 31import time 32from typing import List 33 34from atest import atest_utils as au 35from atest import atest_utils 36from atest import constants 37from atest.atest_enum import DetectType 38from atest.metrics import metrics, metrics_utils 39 40UPDATEDB = 'updatedb' 41LOCATE = 'locate' 42# The list was generated by command: 43# find `gettop` -type d -wholename `gettop`/out -prune -o -type d -name '.*' 44# -print | awk -F/ '{{print $NF}}'| sort -u 45PRUNENAMES = [ 46 '.abc', 47 '.appveyor', 48 '.azure-pipelines', 49 '.bazelci', 50 '.build-id', 51 '.buildkite', 52 '.buildscript', 53 '.cargo', 54 '.ci', 55 '.circleci', 56 '.clusterfuzzlite', 57 '.conan', 58 '.devcontainer', 59 '.dwz', 60 '.externalToolBuilders', 61 '.git', 62 '.githooks', 63 '.github', 64 '.gitlab', 65 '.gitlab-ci', 66 '.google', 67 '.hidden', 68 '.idea', 69 '.intermediates', 70 '.jenkins', 71 '.kokoro', 72 '.libs_cffi_backend', 73 '.more', 74 '.mvn', 75 '.prebuilt_info', 76 '.private', 77 '__pycache__', 78 '.repo', 79 '.settings', 80 '.static', 81 '.svn', 82 '.test', 83 '.travis', 84 '.travis_scripts', 85 '.tx', 86 '.vscode', 87] 88PRUNEPATHS = ['prebuilts'] 89 90 91def debug_log(func): 92 """Decorator for logging with debug mode.""" 93 94 @functools.wraps(func) 95 def wrapper(*args, **kwargs): 96 logging.debug('Running %s...', {func.__name__}) 97 func(*args, **kwargs) 98 logging.debug('%s done.', {func.__name__}) 99 100 return wrapper 101 102 103def run_updatedb(output_cache: Path, prunepaths: List[str] = None): 104 """Run updatedb and generate cache in $ANDROID_HOST_OUT/indices/plocate.db 105 106 Args: 107 output_cache: The file path of the updatedb cache. 108 prunepaths: a list of paths that are relative to the build top. 109 """ 110 search_root = str(au.get_build_top()) 111 prunepaths = prunepaths if prunepaths else PRUNEPATHS 112 prunepaths = [os.path.join(search_root, p) for p in prunepaths] 113 prunepaths.append(str(au.get_build_out_dir())) 114 updatedb_cmd = [UPDATEDB, '-l0'] 115 updatedb_cmd.append('-U%s' % search_root) 116 updatedb_cmd.append('-n%s' % ' '.join(PRUNENAMES)) 117 updatedb_cmd.append('-o%s' % output_cache) 118 # (b/206866627) /etc/updatedb.conf excludes /mnt from scanning on Linux. 119 # Use --prunepaths to override the default configuration. 120 updatedb_cmd.append('--prunepaths') 121 updatedb_cmd.append(' '.join(prunepaths)) 122 # Support scanning bind mounts as well. 123 updatedb_cmd.extend(['--prune-bind-mounts', 'no']) 124 125 logging.debug('Running updatedb... ') 126 try: 127 full_env_vars = os.environ.copy() 128 logging.debug('Executing: %s', updatedb_cmd) 129 result = subprocess.run(updatedb_cmd, env=full_env_vars, check=True) 130 except (KeyboardInterrupt, SystemExit): 131 atest_utils.print_and_log_error('Process interrupted or failure.') 132 # Delete indices when plocate.db is locked() or other CalledProcessError. 133 # (b/141588997) 134 except subprocess.CalledProcessError as err: 135 atest_utils.print_and_log_error( 136 'Executing %s error.', ' '.join(updatedb_cmd) 137 ) 138 metrics_utils.handle_exc_and_send_exit_event(constants.PLOCATEDB_LOCKED) 139 if err.output: 140 atest_utils.print_and_log_error(err.output) 141 output_cache.unlink() 142 143 return result.returncode == 0 144 145 146def _dump_index(dump_file, output, output_re, key, value): 147 """Dump indexed data with pickle. 148 149 Args: 150 dump_file: A string of absolute path of the index file. 151 output: A string generated by locate and grep. 152 output_re: An regex which is used for grouping patterns. 153 key: A string for dictionary key, e.g. classname, package, cc_class, etc. 154 value: A set of path. 155 156 The data structure will be like: 157 { 158 'Foo': {'/path/to/Foo.java', '/path2/to/Foo.kt'}, 159 'Boo': {'/path3/to/Boo.java'} 160 } 161 """ 162 _dict = {} 163 with tempfile.NamedTemporaryFile() as temp_file: 164 with open(temp_file.name, 'wb') as cache_file: 165 if isinstance(output, bytes): 166 output = output.decode() 167 for entry in output.splitlines(): 168 match = output_re.match(entry) 169 if match: 170 _dict.setdefault(match.group(key), set()).add(match.group(value)) 171 try: 172 pickle.dump(_dict, cache_file, protocol=2) 173 except IOError: 174 atest_utils.print_and_log_error('Failed in dumping %s', dump_file) 175 shutil.copy(temp_file.name, dump_file) 176 177 178# pylint: disable=anomalous-backslash-in-string 179def get_cc_result(indices: Indices): 180 """Search all testable cc/cpp and grep TEST(), TEST_F() or TEST_P(). 181 182 After searching cc/cpp files, index corresponding data types in parallel. 183 184 Args: 185 indices: an Indices object. 186 """ 187 find_cc_cmd = ( 188 f"{LOCATE} -id{indices.locate_db} --regex '/*.test.*\.(cc|cpp)$'" 189 f"| xargs egrep -sH '{constants.CC_GREP_RE}' 2>/dev/null || true" 190 ) 191 logging.debug('Probing CC classes:\n %s', find_cc_cmd) 192 result = subprocess.getoutput(find_cc_cmd) 193 194 au.start_threading( 195 target=_index_cc_classes, args=[result, indices.cc_classes_idx] 196 ) 197 198 199# pylint: disable=anomalous-backslash-in-string 200def get_java_result(indices: Indices): 201 """Search all testable java/kt and grep package. 202 203 After searching java/kt files, index corresponding data types in parallel. 204 205 Args: 206 indices: an Indices object. 207 """ 208 package_grep_re = r'^\s*package\s+[a-z][[:alnum:]]+[^{]' 209 find_java_cmd = ( 210 f"{LOCATE} -id{indices.locate_db} --regex '/*.test.*\.(java|kt)$' " 211 # (b/204398677) suppress stderr when indexing target terminated. 212 f"| xargs egrep -sH '{package_grep_re}' 2>/dev/null|| true" 213 ) 214 logging.debug('Probing Java classes:\n %s', find_java_cmd) 215 result = subprocess.getoutput(find_java_cmd) 216 217 au.start_threading( 218 target=_index_java_classes, args=[result, indices.classes_idx] 219 ) 220 au.start_threading( 221 target=_index_qualified_classes, args=[result, indices.fqcn_idx] 222 ) 223 au.start_threading( 224 target=_index_packages, args=[result, indices.packages_idx] 225 ) 226 227 228@debug_log 229def _index_cc_classes(output, index): 230 """Index CC classes. 231 232 The data structure is like: 233 { 234 'FooTestCase': {'/path1/to/the/FooTestCase.cpp', 235 '/path2/to/the/FooTestCase.cc'} 236 } 237 238 Args: 239 output: A string object generated by get_cc_result(). 240 index: A string path of the index file. 241 """ 242 _dump_index( 243 dump_file=index, 244 output=output, 245 output_re=constants.CC_OUTPUT_RE, 246 key='test_name', 247 value='file_path', 248 ) 249 250 251@debug_log 252def _index_java_classes(output, index): 253 """Index Java classes. 254 255 The data structure is like: { 256 257 'FooTestCase': {'/path1/to/the/FooTestCase.java', 258 '/path2/to/the/FooTestCase.kt'} 259 } 260 261 Args: 262 output: A string object generated by get_java_result(). 263 index: A string path of the index file. 264 """ 265 _dump_index( 266 dump_file=index, 267 output=output, 268 output_re=constants.CLASS_OUTPUT_RE, 269 key='class', 270 value='java_path', 271 ) 272 273 274@debug_log 275def _index_packages(output, index): 276 """Index Java packages. 277 278 The data structure is like: { 279 280 'a.b.c.d': {'/path1/to/a/b/c/d/', 281 '/path2/to/a/b/c/d/' 282 } 283 284 Args: 285 output: A string object generated by get_java_result(). 286 index: A string path of the index file. 287 """ 288 _dump_index( 289 dump_file=index, 290 output=output, 291 output_re=constants.PACKAGE_OUTPUT_RE, 292 key='package', 293 value='java_dir', 294 ) 295 296 297@debug_log 298def _index_qualified_classes(output, index): 299 """Index Fully Qualified Java Classes(FQCN). 300 301 The data structure is like: { 302 303 'a.b.c.d.FooTestCase': {'/path1/to/a/b/c/d/FooTestCase.java', 304 '/path2/to/a/b/c/d/FooTestCase.kt'} 305 } 306 307 Args: 308 output: A string object generated by get_java_result(). 309 index: A string path of the index file. 310 """ 311 _dict = {} 312 with tempfile.NamedTemporaryFile() as temp_file: 313 with open(temp_file.name, 'wb') as cache_file: 314 if isinstance(output, bytes): 315 output = output.decode() 316 for entry in output.split('\n'): 317 match = constants.QCLASS_OUTPUT_RE.match(entry) 318 if match: 319 fqcn = match.group('package') + '.' + match.group('class') 320 _dict.setdefault(fqcn, set()).add(match.group('java_path')) 321 try: 322 pickle.dump(_dict, cache_file, protocol=2) 323 except (KeyboardInterrupt, SystemExit): 324 atest_utils.print_and_log_error('Process interrupted or failure.') 325 except IOError: 326 atest_utils.print_and_log_error('Failed in dumping %s', index) 327 shutil.copy(temp_file.name, index) 328 329 330def index_targets(): 331 """The entrypoint of indexing targets. 332 333 Utilise plocate database to index reference types of CLASS, CC_CLASS, 334 PACKAGE and QUALIFIED_CLASS. 335 """ 336 start = time.time() 337 unavailable_cmds = [ 338 cmd for cmd in [UPDATEDB, LOCATE] if not au.has_command(cmd) 339 ] 340 if unavailable_cmds: 341 logging.debug( 342 'command %s is unavailable; skip indexing...', 343 ' '.join(unavailable_cmds), 344 ) 345 return None 346 347 indices = Indices() 348 output_cache = indices.locate_db 349 get_num_cmd = f'{LOCATE} -d{output_cache} --count /' 350 pre_number = 0 351 if output_cache.exists(): 352 ret, pre_number = subprocess.getstatusoutput(get_num_cmd) 353 if ret != 0: 354 logging.debug('Found a broken db: %s', output_cache) 355 pre_number = sys.maxsize 356 357 if run_updatedb(output_cache): 358 if not indices.has_all_indices(): 359 logging.debug('Missing essential indices; will re-index targets.') 360 return _index_targets(indices, start) 361 362 # (b/206886222) The checksum and plocate.db file size are not indicators 363 # to determining whether the source tree had changed. Therefore, when 364 # fulfilling the following conditions, Atest will trigger indexing: 365 # 1. different file numbers in current and previous plocate.db. 366 same_number_of_files = pre_number == subprocess.getoutput(get_num_cmd) 367 if not same_number_of_files: 368 logging.debug('Found file number changed; will re-index targets.') 369 return _index_targets(indices, start) 370 371 # 2. had issued `repo sync` before running atest. 372 checksum_file = au.get_index_path('repo_sync.md5') 373 repo_syncd = not au.check_md5(checksum_file, missing_ok=False) 374 if repo_syncd: 375 logging.debug('Found repo syncd; will re-index targets.') 376 repo_file = au.get_build_top('.repo/.repo_fetchtimes.json') 377 au.start_threading(target=au.save_md5, args=[[repo_file], checksum_file]) 378 return _index_targets(indices, start) 379 logging.debug('Indices remains the same. Ignore indexing...') 380 else: 381 atest_utils.print_and_log_warning( 382 'Unable to run %s. Search targets will be very slow.', output_cache 383 ) 384 return None 385 386 387def _index_targets(indices: Indices, start_from: float): 388 """The actual index_targets function.""" 389 logging.debug('Indexing targets... ') 390 proc_java = au.start_threading(target=get_java_result, args=[indices]) 391 proc_cc = au.start_threading(target=get_cc_result, args=[indices]) 392 proc_java.join() 393 proc_cc.join() 394 elapsed_time = time.time() - start_from 395 logging.debug('Indexing targets took %ss', elapsed_time) 396 metrics.LocalDetectEvent( 397 detect_type=DetectType.INDEX_TARGETS_MS, result=int(elapsed_time * 1000) 398 ) 399 400 401@dataclass 402class Indices: 403 """Class that stores index files.""" 404 405 locate_db: Path 406 classes_idx: Path 407 cc_classes_idx: Path 408 packages_idx: Path 409 fqcn_idx: Path 410 411 def __init__(self): 412 """initiation of Indices object.""" 413 self.locate_db = au.get_index_path('plocate.db') 414 self.classes_idx = au.get_index_path('classes.idx') 415 self.cc_classes_idx = au.get_index_path('cc_classes.idx') 416 self.packages_idx = au.get_index_path('packages.idx') 417 self.fqcn_idx = au.get_index_path('fqcn.idx') 418 au.get_index_path().mkdir(parents=True, exist_ok=True) 419 420 def has_all_indices(self): 421 """Whether all indices files exist.""" 422 exists = [ 423 self.locate_db.exists(), 424 self.classes_idx.exists(), 425 self.cc_classes_idx.exists(), 426 self.packages_idx.exists(), 427 self.fqcn_idx.exists(), 428 ] 429 if not all(exists): 430 logging.debug("Some index file doesn't exist: %s", exists) 431 return all(exists) 432