1 /*
2  * Copyright (C) 2019 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 package com.android.tradefed.retry;
17 
18 import com.android.annotations.VisibleForTesting;
19 import com.android.tradefed.config.IConfiguration;
20 import com.android.tradefed.config.IConfigurationReceiver;
21 import com.android.tradefed.config.Option;
22 import com.android.tradefed.device.DeviceNotAvailableException;
23 import com.android.tradefed.device.ITestDevice;
24 import com.android.tradefed.device.StubDevice;
25 import com.android.tradefed.device.internal.DeviceResetHandler;
26 import com.android.tradefed.device.internal.DeviceSnapshotHandler;
27 import com.android.tradefed.error.HarnessRuntimeException;
28 import com.android.tradefed.invoker.IInvocationContext;
29 import com.android.tradefed.invoker.TestInformation;
30 import com.android.tradefed.invoker.logger.CurrentInvocation;
31 import com.android.tradefed.invoker.logger.CurrentInvocation.IsolationGrade;
32 import com.android.tradefed.invoker.logger.InvocationMetricLogger;
33 import com.android.tradefed.invoker.logger.InvocationMetricLogger.InvocationMetricKey;
34 import com.android.tradefed.invoker.tracing.CloseableTraceScope;
35 import com.android.tradefed.log.LogUtil.CLog;
36 import com.android.tradefed.result.TestDescription;
37 import com.android.tradefed.result.TestResult;
38 import com.android.tradefed.result.TestRunResult;
39 import com.android.tradefed.result.TestStatus;
40 import com.android.tradefed.result.error.DeviceErrorIdentifier;
41 import com.android.tradefed.result.error.InfraErrorIdentifier;
42 import com.android.tradefed.testtype.IRemoteTest;
43 import com.android.tradefed.testtype.ITestFileFilterReceiver;
44 import com.android.tradefed.testtype.ITestFilterReceiver;
45 import com.android.tradefed.testtype.ITestInformationReceiver;
46 import com.android.tradefed.testtype.SubprocessTfLauncher;
47 import com.android.tradefed.testtype.retry.IAutoRetriableTest;
48 import com.android.tradefed.testtype.suite.ModuleDefinition;
49 import com.android.tradefed.testtype.suite.SuiteTestFilter;
50 import com.android.tradefed.util.FileUtil;
51 
52 import java.io.File;
53 import java.io.IOException;
54 import java.util.ArrayList;
55 import java.util.HashSet;
56 import java.util.LinkedHashMap;
57 import java.util.LinkedHashSet;
58 import java.util.List;
59 import java.util.Map;
60 import java.util.Map.Entry;
61 import java.util.Set;
62 import java.util.stream.Collectors;
63 
64 /**
65  * Base implementation of {@link IRetryDecision}. Base implementation only take local signals into
66  * account.
67  */
68 public class BaseRetryDecision
69         implements IRetryDecision, IConfigurationReceiver, ITestInformationReceiver {
70 
71     private static final int ABORT_MAX_FAILURES = 75;
72 
73     @Option(
74         name = "reboot-at-last-retry",
75         description = "Reboot the device at the last retry attempt."
76     )
77     private boolean mRebootAtLastRetry = false;
78 
79     @Option(
80             name = "retry-isolation-grade",
81             description = "Control the isolation level that should be attempted between retries."
82     )
83     private IsolationGrade mRetryIsolationGrade = IsolationGrade.NOT_ISOLATED;
84 
85     @Option(
86         name = "max-testcase-run-count",
87         description =
88                 "If the IRemoteTest can have its testcases run multiple times, "
89                         + "the max number of runs for each testcase."
90     )
91     private int mMaxRetryAttempts = 1;
92 
93     @Option(
94         name = "retry-strategy",
95         description =
96                 "The retry strategy to be used when re-running some tests with "
97                         + "--max-testcase-run-count"
98     )
99     private RetryStrategy mRetryStrategy = RetryStrategy.NO_RETRY;
100 
101     @Option(
102             name = "skip-retry-in-presubmit",
103             description = "Skip retry attempts specifically in presubmit builds")
104     private boolean mSkipRetryInPresubmit = false;
105 
106     @Option(
107         name = "auto-retry",
108         description =
109                 "Whether or not to enable the new auto-retry. This is a feature flag for testing."
110     )
111     private boolean mEnableAutoRetry = true;
112 
113     @Option(
114             name = "skip-retrying-list",
115             description =
116                     "If a test in the list, skip retrying it. The format is the same as the "
117                             + "SuiteTestFilter.")
118     private Set<String> mSkipRetryingSet = new LinkedHashSet<>();
119 
120     @Option(
121             name = "updated-retry-reporting",
122             description = "Feature flag to use the updated retry reporting strategy.")
123     private boolean mUpdatedReporting = true;
124 
125     @Option(
126             name = "updated-filtering",
127             description = "Feature flag to use the updated filtering logic.")
128     private boolean mUpdatedFiltering = true;
129 
130     @Deprecated
131     @Option(
132             name = "module-preparation-retry",
133             description = "Whether or not to retry any module-level target preparation errors." +
134                     "This flag is for feature testing, and eventualy it's all controlled under " +
135                     "retry strategy."
136     )
137     private boolean mModulePreparationRetry = false;
138 
139     @Option(
140             name = "use-snapshot-for-reset",
141             description = "Feature flag to use snapshot/restore instead of powerwash.")
142     private boolean mUseSnapshotForReset = false;
143 
144     private IInvocationContext mContext;
145     private IConfiguration mConfiguration;
146     private TestInformation mTestInformation;
147 
148     private IRemoteTest mCurrentlyConsideredTest;
149     private Set<TestDescription> mPreviouslyFailing;
150     private RetryStatsHelper mStatistics;
151 
152     /** Constructor for the retry decision */
BaseRetryDecision()153     public BaseRetryDecision() {}
154 
155     @Override
isAutoRetryEnabled()156     public boolean isAutoRetryEnabled() {
157         return mEnableAutoRetry;
158     }
159 
160     @Override
getRetryStrategy()161     public RetryStrategy getRetryStrategy() {
162         return mRetryStrategy;
163     }
164 
165     @Override
rebootAtLastAttempt()166     public boolean rebootAtLastAttempt() {
167         return mRebootAtLastRetry;
168     }
169 
170     @Override
getMaxRetryCount()171     public int getMaxRetryCount() {
172         return mMaxRetryAttempts;
173     }
174 
175     @Override
addToSkipRetryList(String filterEntry)176     public void addToSkipRetryList(String filterEntry) {
177         mSkipRetryingSet.add(filterEntry);
178     }
179 
180     @Override
shouldRetryPreparation( ModuleDefinition module, int attempt, int maxAttempt)181     public RetryPreparationDecision shouldRetryPreparation(
182             ModuleDefinition module,
183             int attempt,
184             int maxAttempt) {
185         RetryPreparationDecision decision = new RetryPreparationDecision(false, true);
186         switch (mRetryStrategy) {
187             case NO_RETRY:
188                 // Currently, do not retry if RetryStrategy is NO_RETRY.
189                 return decision;
190             default:
191                 // Continue the logic for retry the failures.
192                 break;
193         }
194         if (attempt == maxAttempt) {
195             // No need to retry if it reaches the maximum retry count.
196             return decision;
197         }
198         if (mSkipRetryInPresubmit && "WORK_NODE".equals(mContext.getAttribute("trigger"))) {
199             CLog.d("Skipping retry due to --skip-retry-in-presubmit");
200             return decision;
201         }
202 
203         // Resetting the device only happends when FULLY_ISOLATED is set, and that cleans up the
204         // device to pure state and re-run suite-level or module-level setup. Besides, it doesn't
205         // need to retry module for reboot isolation.
206         if (!IsolationGrade.FULLY_ISOLATED.equals(mRetryIsolationGrade)) {
207             CLog.i("Do not proceed on module retry because it's not set FULLY_ISOLATED.");
208             return decision;
209         }
210 
211         try {
212             recoverStateOfDevices(getDevices(), attempt, module);
213         } catch (DeviceNotAvailableException e) {
214             // Retried failed, set the exception and return the decision.
215             decision = new RetryPreparationDecision(true, false);
216             decision.setPreviousException(e.getCause());
217             return decision;
218         }
219         // Retried successfully, no exception will be caught, return the decision.
220         decision = new RetryPreparationDecision(false, false);
221         decision.setPreviousException(null);
222         return decision;
223     }
224 
225     @Override
setInvocationContext(IInvocationContext context)226     public void setInvocationContext(IInvocationContext context) {
227         mContext = context;
228     }
229 
230     @Override
setConfiguration(IConfiguration configuration)231     public void setConfiguration(IConfiguration configuration) {
232         mConfiguration = configuration;
233     }
234 
235     @Override
setTestInformation(TestInformation testInformation)236     public void setTestInformation(TestInformation testInformation) {
237         mTestInformation = testInformation;
238     }
239 
240     @Override
getTestInformation()241     public TestInformation getTestInformation() {
242         return mTestInformation;
243     }
244 
245     @Override
shouldRetry( IRemoteTest test, int attemptJustExecuted, List<TestRunResult> previousResults)246     public boolean shouldRetry(
247             IRemoteTest test, int attemptJustExecuted, List<TestRunResult> previousResults)
248             throws DeviceNotAvailableException {
249         return shouldRetry(test, null, attemptJustExecuted, previousResults, null);
250     }
251 
252     @Override
shouldRetry( IRemoteTest test, ModuleDefinition module, int attemptJustExecuted, List<TestRunResult> previousResults, DeviceNotAvailableException dnae)253     public boolean shouldRetry(
254             IRemoteTest test,
255             ModuleDefinition module,
256             int attemptJustExecuted,
257             List<TestRunResult> previousResults,
258             DeviceNotAvailableException dnae)
259             throws DeviceNotAvailableException {
260         // Keep track of some results for the test in progress for statistics purpose.
261         if (test != mCurrentlyConsideredTest) {
262             mCurrentlyConsideredTest = test;
263             mStatistics = new RetryStatsHelper();
264             mPreviouslyFailing = new HashSet<>();
265         }
266 
267         if (mSkipRetryInPresubmit && "WORK_NODE".equals(mContext.getAttribute("trigger"))) {
268             CLog.d("Skipping retry due to --skip-retry-in-presubmit");
269             return false;
270         }
271 
272         boolean isAlreadyRecovered = false;
273         if (dnae != null) {
274             if (!module.shouldRecoverVirtualDevice()) {
275                 throw dnae;
276             }
277             recoverStateOfDevices(getDevices(), attemptJustExecuted, module);
278             isAlreadyRecovered = true;
279             // Add metrics towards device is recovered by device reset.
280             if (IsolationGrade.FULLY_ISOLATED.equals(mRetryIsolationGrade)) {
281                 InvocationMetricLogger.addInvocationMetrics(
282                         InvocationMetricLogger.InvocationMetricKey
283                                 .DEVICE_RECOVERED_FROM_DEVICE_RESET,
284                         1);
285             }
286         }
287 
288         switch (mRetryStrategy) {
289             case NO_RETRY:
290                 // Return directly if we are not considering retry at all.
291                 return false;
292             case ITERATIONS:
293                 // Still support isolating the iterations if that's configured
294                 if (!isAlreadyRecovered) {
295                     recoverStateOfDevices(getDevices(), attemptJustExecuted, module);
296                 }
297                 // For iterations, retry directly, we have nothing to setup
298                 return true;
299             case RERUN_UNTIL_FAILURE:
300                 // For retrying until failure, if any failures occurred, skip retry.
301                 return !hasAnyFailures(previousResults);
302             default:
303                 // Continue the logic for retry the failures.
304                 break;
305         }
306 
307         if (!hasAnyFailures(previousResults)) {
308             CLog.d("No test run or test case failures. No need to retry.");
309             mStatistics.addResultsFromRun(previousResults, 0L, attemptJustExecuted);
310             return false;
311         }
312 
313         Set<String> moduleSkipList = new LinkedHashSet<String>();
314         if (module != null && isInSkipList(module, moduleSkipList)) {
315             CLog.d("Skip retrying known failure test of %s", module.getId());
316             InvocationMetricLogger.addInvocationMetrics(
317                         InvocationMetricKey.RETRY_SKIPPED_ALL_FILTERED_COUNT, 1);
318             return false;
319         }
320         if (module == null) {
321             // If it's not a module, carry all filters
322             moduleSkipList.addAll(mSkipRetryingSet);
323         }
324 
325         boolean shouldRetry = false;
326         long retryStartTime = System.currentTimeMillis();
327         if (test instanceof ITestFilterReceiver) {
328             // TODO(b/77548917): Right now we only support ITestFilterReceiver. We should expect to
329             // support ITestFile*Filter*Receiver in the future.
330             ITestFilterReceiver filterableTest = (ITestFilterReceiver) test;
331             shouldRetry = handleRetryFailures(filterableTest, previousResults, moduleSkipList);
332             if (shouldRetry && !isAlreadyRecovered) {
333                 // In case of retry, go through the recovery routine
334                 recoverStateOfDevices(getDevices(), attemptJustExecuted, module);
335             }
336         } else if (test instanceof IAutoRetriableTest) {
337             // Routine for IRemoteTest that don't support filters but still needs retry.
338             IAutoRetriableTest autoRetryTest = (IAutoRetriableTest) test;
339             shouldRetry =
340                     autoRetryTest.shouldRetry(attemptJustExecuted, previousResults, moduleSkipList);
341             if (shouldRetry && !isAlreadyRecovered) {
342                 recoverStateOfDevices(getDevices(), attemptJustExecuted, module);
343             }
344         } else {
345             CLog.d(
346                     "%s does not implement ITestFilterReceiver or IAutoRetriableTest, thus "
347                             + "cannot work with auto-retry.",
348                     test);
349             return false;
350         }
351         long retryCost = System.currentTimeMillis() - retryStartTime;
352         if (!shouldRetry) {
353             retryCost = 0L;
354         }
355         mStatistics.addResultsFromRun(previousResults, retryCost, attemptJustExecuted);
356         return shouldRetry;
357     }
358 
359     @Override
addLastAttempt(List<TestRunResult> lastResults)360     public void addLastAttempt(List<TestRunResult> lastResults) {
361         mStatistics.addResultsFromRun(lastResults);
362     }
363 
364     @Override
getRetryStatistics()365     public RetryStatistics getRetryStatistics() {
366         if (mStatistics == null) {
367             return new RetryStatsHelper().calculateStatistics();
368         }
369         return mStatistics.calculateStatistics();
370     }
371 
372     /** Returns the map of failed test cases that should be retried. */
getFailedTestCases( List<TestRunResult> previousResults)373     public static Map<TestDescription, TestResult> getFailedTestCases(
374             List<TestRunResult> previousResults) {
375         Map<TestDescription, TestResult> failedTestCases = new LinkedHashMap<>();
376         for (TestRunResult run : previousResults) {
377             if (run != null) {
378                 for (Entry<TestDescription, TestResult> entry : run.getTestResults().entrySet()) {
379                     if (TestStatus.FAILURE.equals(entry.getValue().getResultStatus())) {
380                         failedTestCases.put(entry.getKey(), entry.getValue());
381                     } else if (TestStatus.SKIPPED.equals(entry.getValue().getResultStatus())) {
382                         // Retry skipped test as well
383                         failedTestCases.put(entry.getKey(), entry.getValue());
384                     }
385                 }
386             }
387         }
388         return failedTestCases;
389     }
390 
391     /** Returns true if we should use the updated reporting. */
392     @Override
useUpdatedReporting()393     public boolean useUpdatedReporting() {
394         return mUpdatedReporting;
395     }
396 
397     @VisibleForTesting
getIsolationGrade()398     public IsolationGrade getIsolationGrade() {
399         return mRetryIsolationGrade;
400     }
401 
getSkipRetrySet()402     public Set<String> getSkipRetrySet() {
403         return mSkipRetryingSet;
404     }
405 
getPassedTestCases(List<TestRunResult> previousResults)406     private static Set<TestDescription> getPassedTestCases(List<TestRunResult> previousResults) {
407         Set<TestDescription> previousPassed = new LinkedHashSet<>();
408         for (TestRunResult run : previousResults) {
409             if (run != null) {
410                 for (Entry<TestDescription, TestResult> entry : run.getTestResults().entrySet()) {
411                     if (!TestStatus.FAILURE.equals(entry.getValue().getResultStatus())
412                             && !TestStatus.SKIPPED.equals(entry.getValue().getResultStatus())) {
413                         previousPassed.add(entry.getKey());
414                     }
415                 }
416             }
417         }
418         return previousPassed;
419     }
420 
421     /**
422      * Skips retry if the module is fully skipped and populate module skip list if only some tests
423      * need to stop retrying.
424      */
isInSkipList(ModuleDefinition module, Set<String> moduleSkipList)425     private boolean isInSkipList(ModuleDefinition module, Set<String> moduleSkipList) {
426         String moduleId = module.getId();
427         if (moduleId == null) {
428             return false;
429         }
430         SuiteTestFilter moduleIdFilter = SuiteTestFilter.createFrom(moduleId);
431         String abi = moduleIdFilter.getAbi();
432         String name = moduleIdFilter.getName();
433 
434         boolean shouldSkip = false;
435         for (String skipTest : mSkipRetryingSet) {
436             // Only handle module level exclusion
437             SuiteTestFilter skipRetryingFilter = SuiteTestFilter.createFrom(skipTest);
438             String skipAbi = skipRetryingFilter.getAbi();
439             String skipName = skipRetryingFilter.getName();
440             String skipTestName = skipRetryingFilter.getTest();
441             if (abi != null
442                     && name != null
443                     && skipName != null
444                     && name.equals(skipName)) {
445                 if (skipAbi != null && !abi.equals(skipAbi)) {
446                     // If the skip has an explicit abi that doesn't match
447                     // module, don't skip. If not specified, consider all modules
448                     continue;
449                 }
450                 if (skipTestName == null) {
451                     InvocationMetricLogger.addInvocationMetrics(
452                             InvocationMetricKey.RETRY_MODULE_SKIPPED_COUNT, 1);
453                     shouldSkip = true;
454                 } else {
455                     moduleSkipList.add(skipTestName);
456                 }
457             }
458         }
459         return shouldSkip;
460     }
461 
462     /** Returns the list of failure from the previous results. */
getRunFailures(List<TestRunResult> previousResults)463     private static List<TestRunResult> getRunFailures(List<TestRunResult> previousResults) {
464         List<TestRunResult> runFailed = new ArrayList<>();
465         for (TestRunResult run : previousResults) {
466             if (run != null && run.isRunFailure()) {
467                 runFailed.add(run);
468             }
469         }
470         return runFailed;
471     }
472 
getNonRetriableFailures(List<TestRunResult> failedRun)473     private static List<TestRunResult> getNonRetriableFailures(List<TestRunResult> failedRun) {
474         List<TestRunResult> nonRetriableRuns = new ArrayList<>();
475         for (TestRunResult run : failedRun) {
476             if (!run.getRunFailureDescription().isRetriable()) {
477                 nonRetriableRuns.add(run);
478             }
479         }
480         return nonRetriableRuns;
481     }
482 
handleRetryFailures( ITestFilterReceiver test, List<TestRunResult> previousResults, Set<String> moduleSkipList)483     private boolean handleRetryFailures(
484             ITestFilterReceiver test,
485             List<TestRunResult> previousResults,
486             Set<String> moduleSkipList) {
487         List<TestRunResult> runFailures = getRunFailures(previousResults);
488         List<TestRunResult> nonRetriableRunFailures = getNonRetriableFailures(runFailures);
489         if (!nonRetriableRunFailures.isEmpty()) {
490             CLog.d("Skipping retry since there was a non-retriable failure.");
491             return false;
492         }
493         if (mUpdatedFiltering && mUpdatedReporting) {
494             CLog.d("Using updated filtering logic.");
495             Map<TestDescription, TestResult> previousFailedTests =
496                     getFailedTestCases(previousResults);
497             if (runFailures.isEmpty() && previousFailedTests.isEmpty()) {
498                 CLog.d("No test run or test case failures. No need to retry.");
499                 return false;
500             }
501             Set<TestDescription> previouslyPassedTests = getPassedTestCases(previousResults);
502             excludePassedTests(test, previouslyPassedTests);
503             boolean everythingFiltered =
504                     excludeNonRetriableFailure(test, previousFailedTests, moduleSkipList);
505             if (everythingFiltered && runFailures.isEmpty()) {
506                 CLog.d("No failures are retriable, skipping retry.");
507                 InvocationMetricLogger.addInvocationMetrics(
508                         InvocationMetricKey.RETRY_SKIPPED_ALL_FILTERED_COUNT, 1);
509             }
510             return !everythingFiltered || !runFailures.isEmpty();
511         } else if (!runFailures.isEmpty()) {
512             if (shouldFullRerun(runFailures)) {
513                 List<String> names =
514                         runFailures.stream().map(e -> e.getName()).collect(Collectors.toList());
515                 CLog.d("Retry the full run since [%s] runs have failures.", names);
516                 return true;
517             }
518             // If we don't attempt full rerun add filters.
519             CLog.d("Full rerun not required, excluding previously passed tests.");
520             Set<TestDescription> previouslyPassedTests = getPassedTestCases(previousResults);
521             excludePassedTests(test, previouslyPassedTests);
522             return true;
523         }
524 
525         // In case of test case failure, we retry with filters.
526         Map<TestDescription, TestResult> previousFailedTests = getFailedTestCases(previousResults);
527         if (!mPreviouslyFailing.isEmpty()) {
528             previousFailedTests.keySet().retainAll(mPreviouslyFailing);
529             mPreviouslyFailing.retainAll(previousFailedTests.keySet());
530         }
531         // Abort if number of failures is high for a given one test
532         if (previousFailedTests.size() > ABORT_MAX_FAILURES) {
533             CLog.d(
534                     "Found %s failures, skipping auto-retry to avoid large overhead.",
535                     previousFailedTests.size());
536             return false;
537         }
538 
539         if (!previousFailedTests.isEmpty()) {
540             CLog.d("Retrying the test case failure.");
541             addRetriedTestsToFilters(test, previousFailedTests);
542             return true;
543         }
544 
545         CLog.d("No test run or test case failures. No need to retry.");
546         return false;
547     }
548 
549     /** Returns true if there are any failures in the previous results. */
hasAnyFailures(List<TestRunResult> previousResults)550     private boolean hasAnyFailures(List<TestRunResult> previousResults) {
551         for (TestRunResult run : previousResults) {
552             if (run != null && (run.isRunFailure() || run.hasFailedTests())) {
553                 return true;
554             }
555         }
556         return false;
557     }
558 
559     /** If none of the run failures require a full rerun, trigger the partial rerun logic. */
shouldFullRerun(List<TestRunResult> runFailures)560     private boolean shouldFullRerun(List<TestRunResult> runFailures) {
561         for (TestRunResult run : runFailures) {
562             if (run.getRunFailureDescription().rerunFull()) {
563                 return true;
564             }
565         }
566         return false;
567     }
568 
569     /** Set the filters on the test runner for the retry. */
addRetriedTestsToFilters( ITestFilterReceiver test, Map<TestDescription, TestResult> tests)570     private void addRetriedTestsToFilters(
571             ITestFilterReceiver test, Map<TestDescription, TestResult> tests) {
572         // Limit the re-run to the failure we include, so clear filters then put our failures
573         test.clearIncludeFilters();
574         for (Entry<TestDescription, TestResult> testCaseEntry : tests.entrySet()) {
575             TestDescription testCase = testCaseEntry.getKey();
576             if (testCaseEntry.getValue().getFailure().isRetriable()) {
577                 // We have to retry without the parameters since some runner don't support it.
578                 String filter =
579                         String.format(
580                                 "%s#%s",
581                                 testCase.getClassName(), testCase.getTestNameWithoutParams());
582                 test.addIncludeFilter(filter);
583             } else {
584                 // If a test case failure is not retriable, track it, but don't retry it so we
585                 // exclude it from the filters.
586                 String filter =
587                         String.format("%s#%s", testCase.getClassName(), testCase.getTestName());
588                 test.addExcludeFilter(filter);
589             }
590             mPreviouslyFailing.add(testCase);
591         }
592     }
593 
excludePassedTests(ITestFilterReceiver test, Set<TestDescription> passedTests)594     private void excludePassedTests(ITestFilterReceiver test, Set<TestDescription> passedTests) {
595         // Exclude all passed tests for the retry.
596         for (TestDescription testCase : passedTests) {
597             String filter = String.format("%s#%s", testCase.getClassName(), testCase.getTestName());
598             if (test instanceof ITestFileFilterReceiver) {
599                 File excludeFilterFile = ((ITestFileFilterReceiver) test).getExcludeTestFile();
600                 if (excludeFilterFile == null) {
601                     try {
602                         excludeFilterFile = FileUtil.createTempFile("exclude-filter", ".txt");
603                     } catch (IOException e) {
604                         throw new HarnessRuntimeException(
605                                 e.getMessage(), e, InfraErrorIdentifier.FAIL_TO_CREATE_FILE);
606                     }
607                     ((ITestFileFilterReceiver) test).setExcludeTestFile(excludeFilterFile);
608                 }
609                 try {
610                     FileUtil.writeToFile(filter + "\n", excludeFilterFile, true);
611                 } catch (IOException e) {
612                     CLog.e(e);
613                     continue;
614                 }
615             } else {
616                 test.addExcludeFilter(filter);
617             }
618         }
619     }
620 
621     /** Returns true if all failure are filtered out */
excludeNonRetriableFailure( ITestFilterReceiver test, Map<TestDescription, TestResult> previousFailedTests, Set<String> skipListForModule)622     private boolean excludeNonRetriableFailure(
623             ITestFilterReceiver test,
624             Map<TestDescription, TestResult> previousFailedTests,
625             Set<String> skipListForModule) {
626         Set<TestDescription> failedTests = new HashSet<>(previousFailedTests.keySet());
627         for (Entry<TestDescription, TestResult> testCaseEntry : previousFailedTests.entrySet()) {
628             TestDescription testCase = testCaseEntry.getKey();
629             if (!TestStatus.FAILURE.equals(testCaseEntry.getValue().getResultStatus())) {
630                 // Only consider failures for retriable failures.
631                 continue;
632             }
633             if (!testCaseEntry.getValue().getFailure().isRetriable()) {
634                 // If a test case failure is not retriable, exclude it from the filters.
635                 String filter =
636                         String.format("%s#%s", testCase.getClassName(), testCase.getTestName());
637                 test.addExcludeFilter(filter);
638                 failedTests.remove(testCase);
639             }
640             if (skipListForModule.contains(testCase.toString())) {
641                 // If a test case failure is excluded from retry, exclude it
642                 String filter =
643                         String.format("%s#%s", testCase.getClassName(), testCase.getTestName());
644                 test.addExcludeFilter(filter);
645                 InvocationMetricLogger.addInvocationMetrics(
646                         InvocationMetricKey.RETRY_TEST_SKIPPED_COUNT, 1);
647                 failedTests.remove(testCase);
648                 CLog.d("Skip retry of %s, it's in skip-retry-list.", filter);
649             }
650         }
651 
652         return failedTests.isEmpty();
653     }
654 
655     /** Returns all the non-stub device associated with the {@link IRemoteTest}. */
getDevices()656     private List<ITestDevice> getDevices() {
657         List<ITestDevice> listDevices = new ArrayList<>(mContext.getDevices());
658         // Return all the non-stub device (the one we can actually do some recovery against)
659         return listDevices
660                 .stream()
661                 .filter(d -> !(d.getIDevice() instanceof StubDevice))
662                 .collect(Collectors.toList());
663     }
664 
665     /** Recovery attempt on the device to get it a better state before next retry. */
recoverStateOfDevices( List<ITestDevice> devices, int lastAttempt, ModuleDefinition module)666     private void recoverStateOfDevices(
667             List<ITestDevice> devices, int lastAttempt, ModuleDefinition module)
668             throws DeviceNotAvailableException {
669         if (IsolationGrade.REBOOT_ISOLATED.equals(mRetryIsolationGrade)) {
670             long start = System.currentTimeMillis();
671             try (CloseableTraceScope ignored = new CloseableTraceScope("reboot_isolation")) {
672                 for (ITestDevice device : devices) {
673                     device.reboot();
674                 }
675                 CurrentInvocation.setModuleIsolation(IsolationGrade.REBOOT_ISOLATED);
676                 CurrentInvocation.setRunIsolation(IsolationGrade.REBOOT_ISOLATED);
677             } finally {
678                 InvocationMetricLogger.addInvocationPairMetrics(
679                         InvocationMetricKey.REBOOT_RETRY_ISOLATION_PAIR,
680                         start, System.currentTimeMillis());
681             }
682         } else if (IsolationGrade.FULLY_ISOLATED.equals(mRetryIsolationGrade)) {
683             resetIsolation(module, devices);
684         } else if (lastAttempt == (mMaxRetryAttempts - 2)) {
685             // Reset only works for suite right now
686             if (mRebootAtLastRetry) {
687                 for (ITestDevice device : devices) {
688                     device.reboot();
689                 }
690                 CurrentInvocation.setModuleIsolation(IsolationGrade.REBOOT_ISOLATED);
691                 CurrentInvocation.setRunIsolation(IsolationGrade.REBOOT_ISOLATED);
692             }
693         }
694     }
695 
resetIsolation(ModuleDefinition module, List<ITestDevice> devices)696     private void resetIsolation(ModuleDefinition module, List<ITestDevice> devices)
697             throws DeviceNotAvailableException {
698         long start = System.currentTimeMillis();
699         try (CloseableTraceScope ignored = new CloseableTraceScope("reset_isolation")) {
700             isolateRetry(devices);
701             CLog.d(
702                     "Current host properties being erased by reset: %s",
703                     mTestInformation.properties().getAll());
704             mTestInformation.properties().clear();
705             // Rerun suite level preparer if we are inside a subprocess
706             reSetupModule(
707                     module,
708                     (mConfiguration
709                                     .getCommandOptions()
710                                     .getInvocationData()
711                                     .containsKey(SubprocessTfLauncher.SUBPROCESS_TAG_NAME)
712                             && !mUseSnapshotForReset));
713         } finally {
714             InvocationMetricLogger.addInvocationPairMetrics(
715                     InvocationMetricKey.RESET_RETRY_ISOLATION_PAIR,
716                     start, System.currentTimeMillis());
717         }
718     }
719 
720     @VisibleForTesting
isolateRetry(List<ITestDevice> devices)721     protected void isolateRetry(List<ITestDevice> devices) throws DeviceNotAvailableException {
722         if (!mUseSnapshotForReset) {
723             DeviceResetHandler handler = new DeviceResetHandler(mContext);
724             for (ITestDevice device : devices) {
725                 boolean resetSuccess = handler.resetDevice(device);
726                 if (!resetSuccess) {
727                     throw new DeviceNotAvailableException(
728                             String.format("Failed to reset device: %s", device.getSerialNumber()),
729                             device.getSerialNumber(),
730                             DeviceErrorIdentifier.DEVICE_FAILED_TO_RESET);
731                 }
732             }
733         } else {
734             for (ITestDevice device : devices) {
735                 new DeviceSnapshotHandler()
736                         .restoreSnapshotDevice(device, mContext.getInvocationId());
737             }
738         }
739     }
740 
reSetupModule(ModuleDefinition module, boolean includeSuitePreparers)741     private void reSetupModule(ModuleDefinition module, boolean includeSuitePreparers)
742             throws DeviceNotAvailableException {
743         if (module == null) {
744             return;
745         }
746         if (module.getId() != null) {
747             InvocationMetricLogger.addInvocationMetrics(
748                     InvocationMetricKey.DEVICE_RESET_MODULES, module.getId());
749         }
750         // Run all preparers including optionally suite level ones.
751         Throwable preparationException =
752                 module.runPreparation(includeSuitePreparers);
753         if (preparationException != null) {
754             CLog.e(preparationException);
755             throw new DeviceNotAvailableException(
756                     String.format(
757                             "Failed to reset devices before retry: %s",
758                             preparationException.toString()),
759                     preparationException,
760                     "serial",
761                     DeviceErrorIdentifier.DEVICE_FAILED_TO_RESET);
762         }
763     }
764 }
765