1 /* 2 * Copyright (C) 2019 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 package com.android.tradefed.retry; 17 18 import com.android.annotations.VisibleForTesting; 19 import com.android.tradefed.config.IConfiguration; 20 import com.android.tradefed.config.IConfigurationReceiver; 21 import com.android.tradefed.config.Option; 22 import com.android.tradefed.device.DeviceNotAvailableException; 23 import com.android.tradefed.device.ITestDevice; 24 import com.android.tradefed.device.StubDevice; 25 import com.android.tradefed.device.internal.DeviceResetHandler; 26 import com.android.tradefed.device.internal.DeviceSnapshotHandler; 27 import com.android.tradefed.error.HarnessRuntimeException; 28 import com.android.tradefed.invoker.IInvocationContext; 29 import com.android.tradefed.invoker.TestInformation; 30 import com.android.tradefed.invoker.logger.CurrentInvocation; 31 import com.android.tradefed.invoker.logger.CurrentInvocation.IsolationGrade; 32 import com.android.tradefed.invoker.logger.InvocationMetricLogger; 33 import com.android.tradefed.invoker.logger.InvocationMetricLogger.InvocationMetricKey; 34 import com.android.tradefed.invoker.tracing.CloseableTraceScope; 35 import com.android.tradefed.log.LogUtil.CLog; 36 import com.android.tradefed.result.TestDescription; 37 import com.android.tradefed.result.TestResult; 38 import com.android.tradefed.result.TestRunResult; 39 import com.android.tradefed.result.TestStatus; 40 import com.android.tradefed.result.error.DeviceErrorIdentifier; 41 import com.android.tradefed.result.error.InfraErrorIdentifier; 42 import com.android.tradefed.testtype.IRemoteTest; 43 import com.android.tradefed.testtype.ITestFileFilterReceiver; 44 import com.android.tradefed.testtype.ITestFilterReceiver; 45 import com.android.tradefed.testtype.ITestInformationReceiver; 46 import com.android.tradefed.testtype.SubprocessTfLauncher; 47 import com.android.tradefed.testtype.retry.IAutoRetriableTest; 48 import com.android.tradefed.testtype.suite.ModuleDefinition; 49 import com.android.tradefed.testtype.suite.SuiteTestFilter; 50 import com.android.tradefed.util.FileUtil; 51 52 import java.io.File; 53 import java.io.IOException; 54 import java.util.ArrayList; 55 import java.util.HashSet; 56 import java.util.LinkedHashMap; 57 import java.util.LinkedHashSet; 58 import java.util.List; 59 import java.util.Map; 60 import java.util.Map.Entry; 61 import java.util.Set; 62 import java.util.stream.Collectors; 63 64 /** 65 * Base implementation of {@link IRetryDecision}. Base implementation only take local signals into 66 * account. 67 */ 68 public class BaseRetryDecision 69 implements IRetryDecision, IConfigurationReceiver, ITestInformationReceiver { 70 71 private static final int ABORT_MAX_FAILURES = 75; 72 73 @Option( 74 name = "reboot-at-last-retry", 75 description = "Reboot the device at the last retry attempt." 76 ) 77 private boolean mRebootAtLastRetry = false; 78 79 @Option( 80 name = "retry-isolation-grade", 81 description = "Control the isolation level that should be attempted between retries." 82 ) 83 private IsolationGrade mRetryIsolationGrade = IsolationGrade.NOT_ISOLATED; 84 85 @Option( 86 name = "max-testcase-run-count", 87 description = 88 "If the IRemoteTest can have its testcases run multiple times, " 89 + "the max number of runs for each testcase." 90 ) 91 private int mMaxRetryAttempts = 1; 92 93 @Option( 94 name = "retry-strategy", 95 description = 96 "The retry strategy to be used when re-running some tests with " 97 + "--max-testcase-run-count" 98 ) 99 private RetryStrategy mRetryStrategy = RetryStrategy.NO_RETRY; 100 101 @Option( 102 name = "skip-retry-in-presubmit", 103 description = "Skip retry attempts specifically in presubmit builds") 104 private boolean mSkipRetryInPresubmit = false; 105 106 @Option( 107 name = "auto-retry", 108 description = 109 "Whether or not to enable the new auto-retry. This is a feature flag for testing." 110 ) 111 private boolean mEnableAutoRetry = true; 112 113 @Option( 114 name = "skip-retrying-list", 115 description = 116 "If a test in the list, skip retrying it. The format is the same as the " 117 + "SuiteTestFilter.") 118 private Set<String> mSkipRetryingSet = new LinkedHashSet<>(); 119 120 @Option( 121 name = "updated-retry-reporting", 122 description = "Feature flag to use the updated retry reporting strategy.") 123 private boolean mUpdatedReporting = true; 124 125 @Option( 126 name = "updated-filtering", 127 description = "Feature flag to use the updated filtering logic.") 128 private boolean mUpdatedFiltering = true; 129 130 @Deprecated 131 @Option( 132 name = "module-preparation-retry", 133 description = "Whether or not to retry any module-level target preparation errors." + 134 "This flag is for feature testing, and eventualy it's all controlled under " + 135 "retry strategy." 136 ) 137 private boolean mModulePreparationRetry = false; 138 139 @Option( 140 name = "use-snapshot-for-reset", 141 description = "Feature flag to use snapshot/restore instead of powerwash.") 142 private boolean mUseSnapshotForReset = false; 143 144 private IInvocationContext mContext; 145 private IConfiguration mConfiguration; 146 private TestInformation mTestInformation; 147 148 private IRemoteTest mCurrentlyConsideredTest; 149 private Set<TestDescription> mPreviouslyFailing; 150 private RetryStatsHelper mStatistics; 151 152 /** Constructor for the retry decision */ BaseRetryDecision()153 public BaseRetryDecision() {} 154 155 @Override isAutoRetryEnabled()156 public boolean isAutoRetryEnabled() { 157 return mEnableAutoRetry; 158 } 159 160 @Override getRetryStrategy()161 public RetryStrategy getRetryStrategy() { 162 return mRetryStrategy; 163 } 164 165 @Override rebootAtLastAttempt()166 public boolean rebootAtLastAttempt() { 167 return mRebootAtLastRetry; 168 } 169 170 @Override getMaxRetryCount()171 public int getMaxRetryCount() { 172 return mMaxRetryAttempts; 173 } 174 175 @Override addToSkipRetryList(String filterEntry)176 public void addToSkipRetryList(String filterEntry) { 177 mSkipRetryingSet.add(filterEntry); 178 } 179 180 @Override shouldRetryPreparation( ModuleDefinition module, int attempt, int maxAttempt)181 public RetryPreparationDecision shouldRetryPreparation( 182 ModuleDefinition module, 183 int attempt, 184 int maxAttempt) { 185 RetryPreparationDecision decision = new RetryPreparationDecision(false, true); 186 switch (mRetryStrategy) { 187 case NO_RETRY: 188 // Currently, do not retry if RetryStrategy is NO_RETRY. 189 return decision; 190 default: 191 // Continue the logic for retry the failures. 192 break; 193 } 194 if (attempt == maxAttempt) { 195 // No need to retry if it reaches the maximum retry count. 196 return decision; 197 } 198 if (mSkipRetryInPresubmit && "WORK_NODE".equals(mContext.getAttribute("trigger"))) { 199 CLog.d("Skipping retry due to --skip-retry-in-presubmit"); 200 return decision; 201 } 202 203 // Resetting the device only happends when FULLY_ISOLATED is set, and that cleans up the 204 // device to pure state and re-run suite-level or module-level setup. Besides, it doesn't 205 // need to retry module for reboot isolation. 206 if (!IsolationGrade.FULLY_ISOLATED.equals(mRetryIsolationGrade)) { 207 CLog.i("Do not proceed on module retry because it's not set FULLY_ISOLATED."); 208 return decision; 209 } 210 211 try { 212 recoverStateOfDevices(getDevices(), attempt, module); 213 } catch (DeviceNotAvailableException e) { 214 // Retried failed, set the exception and return the decision. 215 decision = new RetryPreparationDecision(true, false); 216 decision.setPreviousException(e.getCause()); 217 return decision; 218 } 219 // Retried successfully, no exception will be caught, return the decision. 220 decision = new RetryPreparationDecision(false, false); 221 decision.setPreviousException(null); 222 return decision; 223 } 224 225 @Override setInvocationContext(IInvocationContext context)226 public void setInvocationContext(IInvocationContext context) { 227 mContext = context; 228 } 229 230 @Override setConfiguration(IConfiguration configuration)231 public void setConfiguration(IConfiguration configuration) { 232 mConfiguration = configuration; 233 } 234 235 @Override setTestInformation(TestInformation testInformation)236 public void setTestInformation(TestInformation testInformation) { 237 mTestInformation = testInformation; 238 } 239 240 @Override getTestInformation()241 public TestInformation getTestInformation() { 242 return mTestInformation; 243 } 244 245 @Override shouldRetry( IRemoteTest test, int attemptJustExecuted, List<TestRunResult> previousResults)246 public boolean shouldRetry( 247 IRemoteTest test, int attemptJustExecuted, List<TestRunResult> previousResults) 248 throws DeviceNotAvailableException { 249 return shouldRetry(test, null, attemptJustExecuted, previousResults, null); 250 } 251 252 @Override shouldRetry( IRemoteTest test, ModuleDefinition module, int attemptJustExecuted, List<TestRunResult> previousResults, DeviceNotAvailableException dnae)253 public boolean shouldRetry( 254 IRemoteTest test, 255 ModuleDefinition module, 256 int attemptJustExecuted, 257 List<TestRunResult> previousResults, 258 DeviceNotAvailableException dnae) 259 throws DeviceNotAvailableException { 260 // Keep track of some results for the test in progress for statistics purpose. 261 if (test != mCurrentlyConsideredTest) { 262 mCurrentlyConsideredTest = test; 263 mStatistics = new RetryStatsHelper(); 264 mPreviouslyFailing = new HashSet<>(); 265 } 266 267 if (mSkipRetryInPresubmit && "WORK_NODE".equals(mContext.getAttribute("trigger"))) { 268 CLog.d("Skipping retry due to --skip-retry-in-presubmit"); 269 return false; 270 } 271 272 boolean isAlreadyRecovered = false; 273 if (dnae != null) { 274 if (!module.shouldRecoverVirtualDevice()) { 275 throw dnae; 276 } 277 recoverStateOfDevices(getDevices(), attemptJustExecuted, module); 278 isAlreadyRecovered = true; 279 // Add metrics towards device is recovered by device reset. 280 if (IsolationGrade.FULLY_ISOLATED.equals(mRetryIsolationGrade)) { 281 InvocationMetricLogger.addInvocationMetrics( 282 InvocationMetricLogger.InvocationMetricKey 283 .DEVICE_RECOVERED_FROM_DEVICE_RESET, 284 1); 285 } 286 } 287 288 switch (mRetryStrategy) { 289 case NO_RETRY: 290 // Return directly if we are not considering retry at all. 291 return false; 292 case ITERATIONS: 293 // Still support isolating the iterations if that's configured 294 if (!isAlreadyRecovered) { 295 recoverStateOfDevices(getDevices(), attemptJustExecuted, module); 296 } 297 // For iterations, retry directly, we have nothing to setup 298 return true; 299 case RERUN_UNTIL_FAILURE: 300 // For retrying until failure, if any failures occurred, skip retry. 301 return !hasAnyFailures(previousResults); 302 default: 303 // Continue the logic for retry the failures. 304 break; 305 } 306 307 if (!hasAnyFailures(previousResults)) { 308 CLog.d("No test run or test case failures. No need to retry."); 309 mStatistics.addResultsFromRun(previousResults, 0L, attemptJustExecuted); 310 return false; 311 } 312 313 Set<String> moduleSkipList = new LinkedHashSet<String>(); 314 if (module != null && isInSkipList(module, moduleSkipList)) { 315 CLog.d("Skip retrying known failure test of %s", module.getId()); 316 InvocationMetricLogger.addInvocationMetrics( 317 InvocationMetricKey.RETRY_SKIPPED_ALL_FILTERED_COUNT, 1); 318 return false; 319 } 320 if (module == null) { 321 // If it's not a module, carry all filters 322 moduleSkipList.addAll(mSkipRetryingSet); 323 } 324 325 boolean shouldRetry = false; 326 long retryStartTime = System.currentTimeMillis(); 327 if (test instanceof ITestFilterReceiver) { 328 // TODO(b/77548917): Right now we only support ITestFilterReceiver. We should expect to 329 // support ITestFile*Filter*Receiver in the future. 330 ITestFilterReceiver filterableTest = (ITestFilterReceiver) test; 331 shouldRetry = handleRetryFailures(filterableTest, previousResults, moduleSkipList); 332 if (shouldRetry && !isAlreadyRecovered) { 333 // In case of retry, go through the recovery routine 334 recoverStateOfDevices(getDevices(), attemptJustExecuted, module); 335 } 336 } else if (test instanceof IAutoRetriableTest) { 337 // Routine for IRemoteTest that don't support filters but still needs retry. 338 IAutoRetriableTest autoRetryTest = (IAutoRetriableTest) test; 339 shouldRetry = 340 autoRetryTest.shouldRetry(attemptJustExecuted, previousResults, moduleSkipList); 341 if (shouldRetry && !isAlreadyRecovered) { 342 recoverStateOfDevices(getDevices(), attemptJustExecuted, module); 343 } 344 } else { 345 CLog.d( 346 "%s does not implement ITestFilterReceiver or IAutoRetriableTest, thus " 347 + "cannot work with auto-retry.", 348 test); 349 return false; 350 } 351 long retryCost = System.currentTimeMillis() - retryStartTime; 352 if (!shouldRetry) { 353 retryCost = 0L; 354 } 355 mStatistics.addResultsFromRun(previousResults, retryCost, attemptJustExecuted); 356 return shouldRetry; 357 } 358 359 @Override addLastAttempt(List<TestRunResult> lastResults)360 public void addLastAttempt(List<TestRunResult> lastResults) { 361 mStatistics.addResultsFromRun(lastResults); 362 } 363 364 @Override getRetryStatistics()365 public RetryStatistics getRetryStatistics() { 366 if (mStatistics == null) { 367 return new RetryStatsHelper().calculateStatistics(); 368 } 369 return mStatistics.calculateStatistics(); 370 } 371 372 /** Returns the map of failed test cases that should be retried. */ getFailedTestCases( List<TestRunResult> previousResults)373 public static Map<TestDescription, TestResult> getFailedTestCases( 374 List<TestRunResult> previousResults) { 375 Map<TestDescription, TestResult> failedTestCases = new LinkedHashMap<>(); 376 for (TestRunResult run : previousResults) { 377 if (run != null) { 378 for (Entry<TestDescription, TestResult> entry : run.getTestResults().entrySet()) { 379 if (TestStatus.FAILURE.equals(entry.getValue().getResultStatus())) { 380 failedTestCases.put(entry.getKey(), entry.getValue()); 381 } else if (TestStatus.SKIPPED.equals(entry.getValue().getResultStatus())) { 382 // Retry skipped test as well 383 failedTestCases.put(entry.getKey(), entry.getValue()); 384 } 385 } 386 } 387 } 388 return failedTestCases; 389 } 390 391 /** Returns true if we should use the updated reporting. */ 392 @Override useUpdatedReporting()393 public boolean useUpdatedReporting() { 394 return mUpdatedReporting; 395 } 396 397 @VisibleForTesting getIsolationGrade()398 public IsolationGrade getIsolationGrade() { 399 return mRetryIsolationGrade; 400 } 401 getSkipRetrySet()402 public Set<String> getSkipRetrySet() { 403 return mSkipRetryingSet; 404 } 405 getPassedTestCases(List<TestRunResult> previousResults)406 private static Set<TestDescription> getPassedTestCases(List<TestRunResult> previousResults) { 407 Set<TestDescription> previousPassed = new LinkedHashSet<>(); 408 for (TestRunResult run : previousResults) { 409 if (run != null) { 410 for (Entry<TestDescription, TestResult> entry : run.getTestResults().entrySet()) { 411 if (!TestStatus.FAILURE.equals(entry.getValue().getResultStatus()) 412 && !TestStatus.SKIPPED.equals(entry.getValue().getResultStatus())) { 413 previousPassed.add(entry.getKey()); 414 } 415 } 416 } 417 } 418 return previousPassed; 419 } 420 421 /** 422 * Skips retry if the module is fully skipped and populate module skip list if only some tests 423 * need to stop retrying. 424 */ isInSkipList(ModuleDefinition module, Set<String> moduleSkipList)425 private boolean isInSkipList(ModuleDefinition module, Set<String> moduleSkipList) { 426 String moduleId = module.getId(); 427 if (moduleId == null) { 428 return false; 429 } 430 SuiteTestFilter moduleIdFilter = SuiteTestFilter.createFrom(moduleId); 431 String abi = moduleIdFilter.getAbi(); 432 String name = moduleIdFilter.getName(); 433 434 boolean shouldSkip = false; 435 for (String skipTest : mSkipRetryingSet) { 436 // Only handle module level exclusion 437 SuiteTestFilter skipRetryingFilter = SuiteTestFilter.createFrom(skipTest); 438 String skipAbi = skipRetryingFilter.getAbi(); 439 String skipName = skipRetryingFilter.getName(); 440 String skipTestName = skipRetryingFilter.getTest(); 441 if (abi != null 442 && name != null 443 && skipName != null 444 && name.equals(skipName)) { 445 if (skipAbi != null && !abi.equals(skipAbi)) { 446 // If the skip has an explicit abi that doesn't match 447 // module, don't skip. If not specified, consider all modules 448 continue; 449 } 450 if (skipTestName == null) { 451 InvocationMetricLogger.addInvocationMetrics( 452 InvocationMetricKey.RETRY_MODULE_SKIPPED_COUNT, 1); 453 shouldSkip = true; 454 } else { 455 moduleSkipList.add(skipTestName); 456 } 457 } 458 } 459 return shouldSkip; 460 } 461 462 /** Returns the list of failure from the previous results. */ getRunFailures(List<TestRunResult> previousResults)463 private static List<TestRunResult> getRunFailures(List<TestRunResult> previousResults) { 464 List<TestRunResult> runFailed = new ArrayList<>(); 465 for (TestRunResult run : previousResults) { 466 if (run != null && run.isRunFailure()) { 467 runFailed.add(run); 468 } 469 } 470 return runFailed; 471 } 472 getNonRetriableFailures(List<TestRunResult> failedRun)473 private static List<TestRunResult> getNonRetriableFailures(List<TestRunResult> failedRun) { 474 List<TestRunResult> nonRetriableRuns = new ArrayList<>(); 475 for (TestRunResult run : failedRun) { 476 if (!run.getRunFailureDescription().isRetriable()) { 477 nonRetriableRuns.add(run); 478 } 479 } 480 return nonRetriableRuns; 481 } 482 handleRetryFailures( ITestFilterReceiver test, List<TestRunResult> previousResults, Set<String> moduleSkipList)483 private boolean handleRetryFailures( 484 ITestFilterReceiver test, 485 List<TestRunResult> previousResults, 486 Set<String> moduleSkipList) { 487 List<TestRunResult> runFailures = getRunFailures(previousResults); 488 List<TestRunResult> nonRetriableRunFailures = getNonRetriableFailures(runFailures); 489 if (!nonRetriableRunFailures.isEmpty()) { 490 CLog.d("Skipping retry since there was a non-retriable failure."); 491 return false; 492 } 493 if (mUpdatedFiltering && mUpdatedReporting) { 494 CLog.d("Using updated filtering logic."); 495 Map<TestDescription, TestResult> previousFailedTests = 496 getFailedTestCases(previousResults); 497 if (runFailures.isEmpty() && previousFailedTests.isEmpty()) { 498 CLog.d("No test run or test case failures. No need to retry."); 499 return false; 500 } 501 Set<TestDescription> previouslyPassedTests = getPassedTestCases(previousResults); 502 excludePassedTests(test, previouslyPassedTests); 503 boolean everythingFiltered = 504 excludeNonRetriableFailure(test, previousFailedTests, moduleSkipList); 505 if (everythingFiltered && runFailures.isEmpty()) { 506 CLog.d("No failures are retriable, skipping retry."); 507 InvocationMetricLogger.addInvocationMetrics( 508 InvocationMetricKey.RETRY_SKIPPED_ALL_FILTERED_COUNT, 1); 509 } 510 return !everythingFiltered || !runFailures.isEmpty(); 511 } else if (!runFailures.isEmpty()) { 512 if (shouldFullRerun(runFailures)) { 513 List<String> names = 514 runFailures.stream().map(e -> e.getName()).collect(Collectors.toList()); 515 CLog.d("Retry the full run since [%s] runs have failures.", names); 516 return true; 517 } 518 // If we don't attempt full rerun add filters. 519 CLog.d("Full rerun not required, excluding previously passed tests."); 520 Set<TestDescription> previouslyPassedTests = getPassedTestCases(previousResults); 521 excludePassedTests(test, previouslyPassedTests); 522 return true; 523 } 524 525 // In case of test case failure, we retry with filters. 526 Map<TestDescription, TestResult> previousFailedTests = getFailedTestCases(previousResults); 527 if (!mPreviouslyFailing.isEmpty()) { 528 previousFailedTests.keySet().retainAll(mPreviouslyFailing); 529 mPreviouslyFailing.retainAll(previousFailedTests.keySet()); 530 } 531 // Abort if number of failures is high for a given one test 532 if (previousFailedTests.size() > ABORT_MAX_FAILURES) { 533 CLog.d( 534 "Found %s failures, skipping auto-retry to avoid large overhead.", 535 previousFailedTests.size()); 536 return false; 537 } 538 539 if (!previousFailedTests.isEmpty()) { 540 CLog.d("Retrying the test case failure."); 541 addRetriedTestsToFilters(test, previousFailedTests); 542 return true; 543 } 544 545 CLog.d("No test run or test case failures. No need to retry."); 546 return false; 547 } 548 549 /** Returns true if there are any failures in the previous results. */ hasAnyFailures(List<TestRunResult> previousResults)550 private boolean hasAnyFailures(List<TestRunResult> previousResults) { 551 for (TestRunResult run : previousResults) { 552 if (run != null && (run.isRunFailure() || run.hasFailedTests())) { 553 return true; 554 } 555 } 556 return false; 557 } 558 559 /** If none of the run failures require a full rerun, trigger the partial rerun logic. */ shouldFullRerun(List<TestRunResult> runFailures)560 private boolean shouldFullRerun(List<TestRunResult> runFailures) { 561 for (TestRunResult run : runFailures) { 562 if (run.getRunFailureDescription().rerunFull()) { 563 return true; 564 } 565 } 566 return false; 567 } 568 569 /** Set the filters on the test runner for the retry. */ addRetriedTestsToFilters( ITestFilterReceiver test, Map<TestDescription, TestResult> tests)570 private void addRetriedTestsToFilters( 571 ITestFilterReceiver test, Map<TestDescription, TestResult> tests) { 572 // Limit the re-run to the failure we include, so clear filters then put our failures 573 test.clearIncludeFilters(); 574 for (Entry<TestDescription, TestResult> testCaseEntry : tests.entrySet()) { 575 TestDescription testCase = testCaseEntry.getKey(); 576 if (testCaseEntry.getValue().getFailure().isRetriable()) { 577 // We have to retry without the parameters since some runner don't support it. 578 String filter = 579 String.format( 580 "%s#%s", 581 testCase.getClassName(), testCase.getTestNameWithoutParams()); 582 test.addIncludeFilter(filter); 583 } else { 584 // If a test case failure is not retriable, track it, but don't retry it so we 585 // exclude it from the filters. 586 String filter = 587 String.format("%s#%s", testCase.getClassName(), testCase.getTestName()); 588 test.addExcludeFilter(filter); 589 } 590 mPreviouslyFailing.add(testCase); 591 } 592 } 593 excludePassedTests(ITestFilterReceiver test, Set<TestDescription> passedTests)594 private void excludePassedTests(ITestFilterReceiver test, Set<TestDescription> passedTests) { 595 // Exclude all passed tests for the retry. 596 for (TestDescription testCase : passedTests) { 597 String filter = String.format("%s#%s", testCase.getClassName(), testCase.getTestName()); 598 if (test instanceof ITestFileFilterReceiver) { 599 File excludeFilterFile = ((ITestFileFilterReceiver) test).getExcludeTestFile(); 600 if (excludeFilterFile == null) { 601 try { 602 excludeFilterFile = FileUtil.createTempFile("exclude-filter", ".txt"); 603 } catch (IOException e) { 604 throw new HarnessRuntimeException( 605 e.getMessage(), e, InfraErrorIdentifier.FAIL_TO_CREATE_FILE); 606 } 607 ((ITestFileFilterReceiver) test).setExcludeTestFile(excludeFilterFile); 608 } 609 try { 610 FileUtil.writeToFile(filter + "\n", excludeFilterFile, true); 611 } catch (IOException e) { 612 CLog.e(e); 613 continue; 614 } 615 } else { 616 test.addExcludeFilter(filter); 617 } 618 } 619 } 620 621 /** Returns true if all failure are filtered out */ excludeNonRetriableFailure( ITestFilterReceiver test, Map<TestDescription, TestResult> previousFailedTests, Set<String> skipListForModule)622 private boolean excludeNonRetriableFailure( 623 ITestFilterReceiver test, 624 Map<TestDescription, TestResult> previousFailedTests, 625 Set<String> skipListForModule) { 626 Set<TestDescription> failedTests = new HashSet<>(previousFailedTests.keySet()); 627 for (Entry<TestDescription, TestResult> testCaseEntry : previousFailedTests.entrySet()) { 628 TestDescription testCase = testCaseEntry.getKey(); 629 if (!TestStatus.FAILURE.equals(testCaseEntry.getValue().getResultStatus())) { 630 // Only consider failures for retriable failures. 631 continue; 632 } 633 if (!testCaseEntry.getValue().getFailure().isRetriable()) { 634 // If a test case failure is not retriable, exclude it from the filters. 635 String filter = 636 String.format("%s#%s", testCase.getClassName(), testCase.getTestName()); 637 test.addExcludeFilter(filter); 638 failedTests.remove(testCase); 639 } 640 if (skipListForModule.contains(testCase.toString())) { 641 // If a test case failure is excluded from retry, exclude it 642 String filter = 643 String.format("%s#%s", testCase.getClassName(), testCase.getTestName()); 644 test.addExcludeFilter(filter); 645 InvocationMetricLogger.addInvocationMetrics( 646 InvocationMetricKey.RETRY_TEST_SKIPPED_COUNT, 1); 647 failedTests.remove(testCase); 648 CLog.d("Skip retry of %s, it's in skip-retry-list.", filter); 649 } 650 } 651 652 return failedTests.isEmpty(); 653 } 654 655 /** Returns all the non-stub device associated with the {@link IRemoteTest}. */ getDevices()656 private List<ITestDevice> getDevices() { 657 List<ITestDevice> listDevices = new ArrayList<>(mContext.getDevices()); 658 // Return all the non-stub device (the one we can actually do some recovery against) 659 return listDevices 660 .stream() 661 .filter(d -> !(d.getIDevice() instanceof StubDevice)) 662 .collect(Collectors.toList()); 663 } 664 665 /** Recovery attempt on the device to get it a better state before next retry. */ recoverStateOfDevices( List<ITestDevice> devices, int lastAttempt, ModuleDefinition module)666 private void recoverStateOfDevices( 667 List<ITestDevice> devices, int lastAttempt, ModuleDefinition module) 668 throws DeviceNotAvailableException { 669 if (IsolationGrade.REBOOT_ISOLATED.equals(mRetryIsolationGrade)) { 670 long start = System.currentTimeMillis(); 671 try (CloseableTraceScope ignored = new CloseableTraceScope("reboot_isolation")) { 672 for (ITestDevice device : devices) { 673 device.reboot(); 674 } 675 CurrentInvocation.setModuleIsolation(IsolationGrade.REBOOT_ISOLATED); 676 CurrentInvocation.setRunIsolation(IsolationGrade.REBOOT_ISOLATED); 677 } finally { 678 InvocationMetricLogger.addInvocationPairMetrics( 679 InvocationMetricKey.REBOOT_RETRY_ISOLATION_PAIR, 680 start, System.currentTimeMillis()); 681 } 682 } else if (IsolationGrade.FULLY_ISOLATED.equals(mRetryIsolationGrade)) { 683 resetIsolation(module, devices); 684 } else if (lastAttempt == (mMaxRetryAttempts - 2)) { 685 // Reset only works for suite right now 686 if (mRebootAtLastRetry) { 687 for (ITestDevice device : devices) { 688 device.reboot(); 689 } 690 CurrentInvocation.setModuleIsolation(IsolationGrade.REBOOT_ISOLATED); 691 CurrentInvocation.setRunIsolation(IsolationGrade.REBOOT_ISOLATED); 692 } 693 } 694 } 695 resetIsolation(ModuleDefinition module, List<ITestDevice> devices)696 private void resetIsolation(ModuleDefinition module, List<ITestDevice> devices) 697 throws DeviceNotAvailableException { 698 long start = System.currentTimeMillis(); 699 try (CloseableTraceScope ignored = new CloseableTraceScope("reset_isolation")) { 700 isolateRetry(devices); 701 CLog.d( 702 "Current host properties being erased by reset: %s", 703 mTestInformation.properties().getAll()); 704 mTestInformation.properties().clear(); 705 // Rerun suite level preparer if we are inside a subprocess 706 reSetupModule( 707 module, 708 (mConfiguration 709 .getCommandOptions() 710 .getInvocationData() 711 .containsKey(SubprocessTfLauncher.SUBPROCESS_TAG_NAME) 712 && !mUseSnapshotForReset)); 713 } finally { 714 InvocationMetricLogger.addInvocationPairMetrics( 715 InvocationMetricKey.RESET_RETRY_ISOLATION_PAIR, 716 start, System.currentTimeMillis()); 717 } 718 } 719 720 @VisibleForTesting isolateRetry(List<ITestDevice> devices)721 protected void isolateRetry(List<ITestDevice> devices) throws DeviceNotAvailableException { 722 if (!mUseSnapshotForReset) { 723 DeviceResetHandler handler = new DeviceResetHandler(mContext); 724 for (ITestDevice device : devices) { 725 boolean resetSuccess = handler.resetDevice(device); 726 if (!resetSuccess) { 727 throw new DeviceNotAvailableException( 728 String.format("Failed to reset device: %s", device.getSerialNumber()), 729 device.getSerialNumber(), 730 DeviceErrorIdentifier.DEVICE_FAILED_TO_RESET); 731 } 732 } 733 } else { 734 for (ITestDevice device : devices) { 735 new DeviceSnapshotHandler() 736 .restoreSnapshotDevice(device, mContext.getInvocationId()); 737 } 738 } 739 } 740 reSetupModule(ModuleDefinition module, boolean includeSuitePreparers)741 private void reSetupModule(ModuleDefinition module, boolean includeSuitePreparers) 742 throws DeviceNotAvailableException { 743 if (module == null) { 744 return; 745 } 746 if (module.getId() != null) { 747 InvocationMetricLogger.addInvocationMetrics( 748 InvocationMetricKey.DEVICE_RESET_MODULES, module.getId()); 749 } 750 // Run all preparers including optionally suite level ones. 751 Throwable preparationException = 752 module.runPreparation(includeSuitePreparers); 753 if (preparationException != null) { 754 CLog.e(preparationException); 755 throw new DeviceNotAvailableException( 756 String.format( 757 "Failed to reset devices before retry: %s", 758 preparationException.toString()), 759 preparationException, 760 "serial", 761 DeviceErrorIdentifier.DEVICE_FAILED_TO_RESET); 762 } 763 } 764 } 765