1 /*
2  * Copyright (C) 2018 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.android.tradefed.testtype.suite;
18 
19 import com.android.tradefed.config.ConfigurationDescriptor;
20 import com.android.tradefed.config.IConfiguration;
21 import com.android.tradefed.config.IConfigurationReceiver;
22 import com.android.tradefed.device.DeviceNotAvailableException;
23 import com.android.tradefed.device.DeviceUnresponsiveException;
24 import com.android.tradefed.device.metric.CollectorHelper;
25 import com.android.tradefed.device.metric.CountTestCasesCollector;
26 import com.android.tradefed.device.metric.IMetricCollector;
27 import com.android.tradefed.device.metric.IMetricCollectorReceiver;
28 import com.android.tradefed.error.IHarnessException;
29 import com.android.tradefed.invoker.IInvocationContext;
30 import com.android.tradefed.invoker.TestInformation;
31 import com.android.tradefed.invoker.logger.CurrentInvocation;
32 import com.android.tradefed.invoker.logger.CurrentInvocation.IsolationGrade;
33 import com.android.tradefed.invoker.tracing.CloseableTraceScope;
34 import com.android.tradefed.log.LogUtil.CLog;
35 import com.android.tradefed.metrics.proto.MetricMeasurement.Metric;
36 import com.android.tradefed.result.FailureDescription;
37 import com.android.tradefed.result.ILogSaver;
38 import com.android.tradefed.result.ITestInvocationListener;
39 import com.android.tradefed.result.ResultAndLogForwarder;
40 import com.android.tradefed.result.TestDescription;
41 import com.android.tradefed.result.TestRunResult;
42 import com.android.tradefed.result.TestStatus;
43 import com.android.tradefed.result.error.ErrorIdentifier;
44 import com.android.tradefed.retry.IRetryDecision;
45 import com.android.tradefed.retry.MergeStrategy;
46 import com.android.tradefed.retry.RetryLogSaverResultForwarder;
47 import com.android.tradefed.retry.RetryStatistics;
48 import com.android.tradefed.testtype.IRemoteTest;
49 import com.android.tradefed.testtype.ITestCollector;
50 import com.android.tradefed.testtype.ITestFilterReceiver;
51 import com.android.tradefed.util.StreamUtil;
52 
53 import com.google.common.annotations.VisibleForTesting;
54 
55 import java.time.Duration;
56 import java.util.ArrayList;
57 import java.util.Arrays;
58 import java.util.HashMap;
59 import java.util.LinkedHashMap;
60 import java.util.LinkedHashSet;
61 import java.util.List;
62 import java.util.Map;
63 import java.util.Set;
64 
65 /**
66  * A wrapper class works on the {@link IRemoteTest} to granulate the IRemoteTest in testcase level.
67  * An IRemoteTest can contain multiple testcases. Previously, these testcases are treated as a
68  * whole: When IRemoteTest runs, all testcases will run. Some IRemoteTest (The ones that implements
69  * ITestFilterReceiver) can accept an allowlist of testcases and only run those testcases. This
70  * class takes advantage of the existing feature and provides a more flexible way to run test suite.
71  *
72  * <ul>
73  *   <li>Single testcase can be retried multiple times (within the same IRemoteTest run) to reduce
74  *       the non-test-error failure rates.
75  *   <li>The retried testcases are dynamically collected from previous run failures.
76  * </ul>
77  *
78  * <p>Note:
79  *
80  * <ul>
81  *   <li>The prerequisite to run a subset of test cases is that the test type should implement the
82  *       interface {@link ITestFilterReceiver}.
83  *   <li>X is customized max retry number.
84  * </ul>
85  */
86 public class GranularRetriableTestWrapper implements IRemoteTest, ITestCollector {
87 
88     private IRetryDecision mRetryDecision;
89     private IRemoteTest mTest;
90     private ModuleDefinition mModule;
91     private List<IMetricCollector> mRunMetricCollectors;
92     private IInvocationContext mModuleInvocationContext;
93     private IConfiguration mModuleConfiguration;
94     private ModuleListener mMainGranularRunListener;
95     private RetryLogSaverResultForwarder mRetryAttemptForwarder;
96     private List<ITestInvocationListener> mModuleLevelListeners;
97     private ITestInvocationListener mRemoteTestTimeOutEnforcer;
98     private ILogSaver mLogSaver;
99     private String mModuleId;
100     private int mMaxRunLimit;
101 
102     private boolean mCollectTestsOnly = false;
103 
104     // Tracking of the metrics
105     private RetryStatistics mRetryStats = null;
106     private int mCountRetryUsed = 0;
107 
GranularRetriableTestWrapper( IRemoteTest test, ITestInvocationListener mainListener, List<ITestInvocationListener> moduleLevelListeners, int maxRunLimit)108     public GranularRetriableTestWrapper(
109             IRemoteTest test,
110             ITestInvocationListener mainListener,
111             List<ITestInvocationListener> moduleLevelListeners,
112             int maxRunLimit) {
113         this(test, null, mainListener, moduleLevelListeners, maxRunLimit);
114     }
115 
GranularRetriableTestWrapper( IRemoteTest test, ModuleDefinition module, ITestInvocationListener mainListener, List<ITestInvocationListener> moduleLevelListeners, int maxRunLimit)116     public GranularRetriableTestWrapper(
117             IRemoteTest test,
118             ModuleDefinition module,
119             ITestInvocationListener mainListener,
120             List<ITestInvocationListener> moduleLevelListeners,
121             int maxRunLimit) {
122         mTest = test;
123         mModule = module;
124         IInvocationContext context = null;
125         if (module != null) {
126             context = module.getModuleInvocationContext();
127         }
128         initializeGranularRunListener(mainListener, context);
129         mModuleLevelListeners = moduleLevelListeners;
130         mMaxRunLimit = maxRunLimit;
131     }
132 
133     /** Sets the {@link IRetryDecision} to be used. */
setRetryDecision(IRetryDecision decision)134     public void setRetryDecision(IRetryDecision decision) {
135         mRetryDecision = decision;
136     }
137 
138     /**
139      * Set the {@link ModuleDefinition} name as a {@link GranularRetriableTestWrapper} attribute.
140      *
141      * @param moduleId the name of the moduleDefinition.
142      */
setModuleId(String moduleId)143     public void setModuleId(String moduleId) {
144         mModuleId = moduleId;
145     }
146 
147     /**
148      * Set the {@link ModuleDefinition} RunStrategy as a {@link GranularRetriableTestWrapper}
149      * attribute.
150      *
151      * @param skipTestCases whether the testcases should be skipped.
152      */
setMarkTestsSkipped(boolean skipTestCases)153     public void setMarkTestsSkipped(boolean skipTestCases) {
154         mMainGranularRunListener.setMarkTestsSkipped(skipTestCases);
155     }
156 
157     /**
158      * Set the {@link ModuleDefinition}'s runMetricCollector as a {@link
159      * GranularRetriableTestWrapper} attribute.
160      *
161      * @param runMetricCollectors A list of MetricCollector for the module.
162      */
setMetricCollectors(List<IMetricCollector> runMetricCollectors)163     public void setMetricCollectors(List<IMetricCollector> runMetricCollectors) {
164         mRunMetricCollectors = runMetricCollectors;
165     }
166 
167     /**
168      * Set the {@link ModuleDefinition}'s ModuleConfig as a {@link GranularRetriableTestWrapper}
169      * attribute.
170      *
171      * @param moduleConfiguration Provide the module metrics.
172      */
setModuleConfig(IConfiguration moduleConfiguration)173     public void setModuleConfig(IConfiguration moduleConfiguration) {
174         mModuleConfiguration = moduleConfiguration;
175     }
176 
177     /**
178      * Set the {@link IInvocationContext} as a {@link GranularRetriableTestWrapper} attribute.
179      *
180      * @param moduleInvocationContext The wrapper uses the InvocationContext to initialize the
181      *     MetricCollector when necessary.
182      */
setInvocationContext(IInvocationContext moduleInvocationContext)183     public void setInvocationContext(IInvocationContext moduleInvocationContext) {
184         mModuleInvocationContext = moduleInvocationContext;
185     }
186 
187     /**
188      * Set the Module's {@link ILogSaver} as a {@link GranularRetriableTestWrapper} attribute.
189      *
190      * @param logSaver The listeners for each test run should save the logs.
191      */
setLogSaver(ILogSaver logSaver)192     public void setLogSaver(ILogSaver logSaver) {
193         mLogSaver = logSaver;
194     }
195 
196     /**
197      * Initialize granular run listener with {@link RemoteTestTimeOutEnforcer} if timeout is set.
198      * And set the test-mapping sources in granular run listener.
199      *
200      * @param listener The listener for each test run should be wrapped.
201      * @param moduleContext the invocation context of the module
202      */
initializeGranularRunListener( ITestInvocationListener listener, IInvocationContext moduleContext)203     private void initializeGranularRunListener(
204             ITestInvocationListener listener, IInvocationContext moduleContext) {
205         mMainGranularRunListener = new ModuleListener(listener, moduleContext);
206         if (mModule != null) {
207             ConfigurationDescriptor configDesc =
208                     mModule.getModuleInvocationContext().getConfigurationDescriptor();
209             if (configDesc.getMetaData(
210                     RemoteTestTimeOutEnforcer.REMOTE_TEST_TIMEOUT_OPTION) != null) {
211                 Duration duration = Duration.parse(
212                         configDesc.getMetaData(
213                                 RemoteTestTimeOutEnforcer.REMOTE_TEST_TIMEOUT_OPTION).get(0));
214                 mRemoteTestTimeOutEnforcer = new RemoteTestTimeOutEnforcer(
215                         mMainGranularRunListener, mModule, mTest, duration);
216             }
217             List<String> testMappingSources =
218                     configDesc.getMetaData(Integer.toString(mTest.hashCode()));
219             if (testMappingSources != null) {
220                 mMainGranularRunListener.setTestMappingSources(testMappingSources);
221             }
222         }
223     }
224 
225     /**
226      * Initialize a new {@link ModuleListener} for each test run.
227      *
228      * @return a {@link ITestInvocationListener} listener which contains the new {@link
229      *     ModuleListener}, the main {@link ITestInvocationListener} and main {@link
230      *     TestFailureListener}, and wrapped by RunMetricsCollector and Module MetricCollector (if
231      *     not initialized).
232      */
initializeListeners()233     private ITestInvocationListener initializeListeners() throws DeviceNotAvailableException {
234         List<ITestInvocationListener> currentTestListener = new ArrayList<>();
235         // Add all the module level listeners, including TestFailureListener
236         if (mModuleLevelListeners != null) {
237             currentTestListener.addAll(mModuleLevelListeners);
238         }
239         currentTestListener.add(mMainGranularRunListener);
240 
241         if (mRemoteTestTimeOutEnforcer != null) {
242             currentTestListener.add(mRemoteTestTimeOutEnforcer);
243         }
244 
245         mRetryAttemptForwarder = new RetryLogSaverResultForwarder(mLogSaver, currentTestListener);
246         ITestInvocationListener runListener = mRetryAttemptForwarder;
247 
248         // The module collectors itself are added: this list will be very limited.
249         // We clone them since the configuration object is shared across shards.
250         for (IMetricCollector collector :
251                 CollectorHelper.cloneCollectors(mModuleConfiguration.getMetricCollectors())) {
252             if (collector.isDisabled()) {
253                 CLog.d("%s has been disabled. Skipping.", collector);
254             } else {
255                 try (CloseableTraceScope ignored =
256                         new CloseableTraceScope(
257                                 "init_attempt_" + collector.getClass().getSimpleName())) {
258                     if (collector instanceof IConfigurationReceiver) {
259                         ((IConfigurationReceiver) collector).setConfiguration(mModuleConfiguration);
260                     }
261                     runListener = collector.init(mModuleInvocationContext, runListener);
262                 }
263             }
264         }
265 
266         return runListener;
267     }
268 
269     /**
270      * Schedule a series of {@link IRemoteTest#run(TestInformation, ITestInvocationListener)}.
271      *
272      * @param listener The ResultForwarder listener which contains a new moduleListener for each
273      *     run.
274      */
275     @Override
run(TestInformation testInfo, ITestInvocationListener listener)276     public void run(TestInformation testInfo, ITestInvocationListener listener)
277             throws DeviceNotAvailableException {
278         mMainGranularRunListener.setCollectTestsOnly(mCollectTestsOnly);
279         ITestInvocationListener allListeners = initializeListeners();
280         // First do the regular run, not retried.
281         DeviceNotAvailableException dnae = intraModuleRun(testInfo, allListeners, 0);
282 
283         if (mMaxRunLimit <= 1) {
284             // TODO: If module is the last one and there is no retry quota, it won't need to do
285             //  device recovery.
286             if (dnae == null || !mModule.shouldRecoverVirtualDevice()) {
287                 if (dnae != null) {
288                     throw dnae;
289                 }
290                 return;
291             }
292         }
293 
294         if (mRetryDecision == null) {
295             CLog.e("RetryDecision is null. Something is misconfigured this shouldn't happen");
296             return;
297         }
298 
299         // Bail out early if there is no need to retry at all.
300         if (!mRetryDecision.shouldRetry(
301                 mTest, mModule, 0, mMainGranularRunListener.getTestRunForAttempts(0), dnae)) {
302             return;
303         }
304 
305         // Avoid rechecking the shouldRetry below the first time as it could retrigger reboot.
306         boolean firstCheck = true;
307 
308         // Deal with retried attempted
309         long startTime = System.currentTimeMillis();
310         try {
311             CLog.d("Starting intra-module retry.");
312             for (int attemptNumber = 1; attemptNumber < mMaxRunLimit; attemptNumber++) {
313                 if (!firstCheck) {
314                     boolean retry =
315                             mRetryDecision.shouldRetry(
316                                     mTest,
317                                     mModule,
318                                     attemptNumber - 1,
319                                     mMainGranularRunListener.getTestRunForAttempts(
320                                             attemptNumber - 1),
321                                     dnae);
322                     if (!retry) {
323                         return;
324                     }
325                 }
326                 firstCheck = false;
327                 mCountRetryUsed++;
328                 CLog.d("Intra-module retry attempt number %s", attemptNumber);
329                 // Run the tests again
330                 dnae = intraModuleRun(testInfo, allListeners, attemptNumber);
331             }
332             // Feed the last attempt if we reached here.
333             mRetryDecision.addLastAttempt(
334                     mMainGranularRunListener.getTestRunForAttempts(mMaxRunLimit - 1));
335         } finally {
336             mRetryStats = mRetryDecision.getRetryStatistics();
337             // Track how long we spend in retry
338             mRetryStats.mRetryTime = System.currentTimeMillis() - startTime;
339         }
340     }
341 
342     /**
343      * The workflow for each individual {@link IRemoteTest} run.
344      *
345      * @return DeviceNotAvailableException while DNAE happened, null otherwise.
346      */
intraModuleRun( TestInformation testInfo, ITestInvocationListener runListener, int attempt)347     private final DeviceNotAvailableException intraModuleRun(
348             TestInformation testInfo, ITestInvocationListener runListener, int attempt) {
349         DeviceNotAvailableException exception = null;
350         mMainGranularRunListener.setAttemptIsolation(CurrentInvocation.runCurrentIsolation());
351         StartEndCollector startEndCollector = new StartEndCollector(runListener);
352         runListener = startEndCollector;
353         try (CloseableTraceScope ignored =
354                 new CloseableTraceScope(
355                         "attempt " + attempt + " " + mTest.getClass().getCanonicalName())) {
356             List<IMetricCollector> clonedCollectors = cloneCollectors(mRunMetricCollectors);
357             if (mTest instanceof IMetricCollectorReceiver) {
358                 ((IMetricCollectorReceiver) mTest).setMetricCollectors(clonedCollectors);
359                 // If test can receive collectors then let it handle how to set them up
360                 mTest.run(testInfo, runListener);
361             } else {
362                 if (mModuleConfiguration.getCommandOptions().reportTestCaseCount()) {
363                     CountTestCasesCollector counter = new CountTestCasesCollector(mTest);
364                     clonedCollectors.add(counter);
365                 }
366                 // Module only init the collectors here to avoid triggering the collectors when
367                 // replaying the cached events at the end. This ensures metrics are capture at
368                 // the proper time in the invocation.
369                 for (IMetricCollector collector : clonedCollectors) {
370                     if (collector.isDisabled()) {
371                         CLog.d("%s has been disabled. Skipping.", collector);
372                     } else {
373                         try (CloseableTraceScope ignoreCollector =
374                                 new CloseableTraceScope(
375                                         "init_run_" + collector.getClass().getSimpleName())) {
376                             if (collector instanceof IConfigurationReceiver) {
377                                 ((IConfigurationReceiver) collector)
378                                         .setConfiguration(mModuleConfiguration);
379                             }
380                             runListener = collector.init(mModuleInvocationContext, runListener);
381                         }
382                     }
383                 }
384                 mTest.run(testInfo, runListener);
385             }
386         } catch (RuntimeException | AssertionError re) {
387             CLog.e("Module '%s' - test '%s' threw exception:", mModuleId, mTest.getClass());
388             CLog.e(re);
389             CLog.e("Proceeding to the next test.");
390             if (!startEndCollector.mRunStartReported) {
391                 CLog.e("Event mismatch ! the test runner didn't report any testRunStart.");
392                 runListener.testRunStarted(mModule.getId(), 0);
393             }
394             runListener.testRunFailed(createFromException(re));
395             if (!startEndCollector.mRunEndedReported) {
396                 CLog.e("Event mismatch ! the test runner didn't report any testRunEnded.");
397                 runListener.testRunEnded(0L, new HashMap<String, Metric>());
398             }
399         } catch (DeviceUnresponsiveException due) {
400             // being able to catch a DeviceUnresponsiveException here implies that recovery was
401             // successful, and test execution should proceed to next module.
402             CLog.w(
403                     "Ignored DeviceUnresponsiveException because recovery was "
404                             + "successful, proceeding with next module. Stack trace:");
405             CLog.w(due);
406             CLog.w("Proceeding to the next test.");
407             // If it already was marked as failure do not remark it.
408             if (!mMainGranularRunListener.hasLastAttemptFailed()) {
409                 runListener.testRunFailed(createFromException(due));
410             }
411         } catch (DeviceNotAvailableException dnae) {
412             // TODO: See if it's possible to report IReportNotExecuted
413             CLog.e("Run in progress was not completed due to:");
414             CLog.e(dnae);
415             // If it already was marked as failure do not remark it.
416             if (!mMainGranularRunListener.hasLastAttemptFailed()) {
417                 runListener.testRunFailed(createFromException(dnae));
418             }
419             exception = dnae;
420         } finally {
421             mRetryAttemptForwarder.incrementAttempt();
422             // After one run, do not consider follow up isolated without action.
423             CurrentInvocation.setRunIsolation(IsolationGrade.NOT_ISOLATED);
424         }
425         return exception;
426     }
427 
428     /** Get the merged TestRunResults from each {@link IRemoteTest} run. */
getFinalTestRunResults()429     public final List<TestRunResult> getFinalTestRunResults() {
430         MergeStrategy strategy = MergeStrategy.getMergeStrategy(mRetryDecision.getRetryStrategy());
431         mMainGranularRunListener.setMergeStrategy(strategy);
432         return mMainGranularRunListener.getMergedTestRunResults();
433     }
434 
435     @VisibleForTesting
getTestRunResultCollected()436     Map<String, List<TestRunResult>> getTestRunResultCollected() {
437         Map<String, List<TestRunResult>> runResultMap = new LinkedHashMap<>();
438         for (String runName : mMainGranularRunListener.getTestRunNames()) {
439             runResultMap.put(runName, mMainGranularRunListener.getTestRunAttempts(runName));
440         }
441         return runResultMap;
442     }
443 
444     @VisibleForTesting
cloneCollectors(List<IMetricCollector> originalCollectors)445     List<IMetricCollector> cloneCollectors(List<IMetricCollector> originalCollectors) {
446         return CollectorHelper.cloneCollectors(originalCollectors);
447     }
448 
449     /**
450      * Calculate the number of testcases in the {@link IRemoteTest}. This value distincts the same
451      * testcases that are rescheduled multiple times.
452      */
getExpectedTestsCount()453     public final int getExpectedTestsCount() {
454         return mMainGranularRunListener.getExpectedTests();
455     }
456 
getPassedTests()457     public final Set<TestDescription> getPassedTests() {
458         Set<TestDescription> nonFailedTests = new LinkedHashSet<>();
459         for (TestRunResult runResult : mMainGranularRunListener.getMergedTestRunResults()) {
460             nonFailedTests.addAll(
461                     runResult.getTestsInState(
462                             Arrays.asList(
463                                     TestStatus.PASSED,
464                                     TestStatus.IGNORED,
465                                     TestStatus.ASSUMPTION_FAILURE)));
466         }
467         return nonFailedTests;
468     }
469 
470     /** Returns the listener containing all the results. */
getResultListener()471     public ModuleListener getResultListener() {
472         return mMainGranularRunListener;
473     }
474 
getRetryCount()475     public int getRetryCount() {
476         return mCountRetryUsed;
477     }
478 
479     @Override
setCollectTestsOnly(boolean shouldCollectTest)480     public void setCollectTestsOnly(boolean shouldCollectTest) {
481         mCollectTestsOnly = shouldCollectTest;
482     }
483 
createFromException(Throwable exception)484     private FailureDescription createFromException(Throwable exception) {
485         String message =
486                 (exception.getMessage() == null)
487                         ? String.format(
488                                 "No error message reported for: %s",
489                                 StreamUtil.getStackTrace(exception))
490                         : exception.getMessage();
491         FailureDescription failure =
492                 CurrentInvocation.createFailure(message, null).setCause(exception);
493         if (exception instanceof IHarnessException) {
494             ErrorIdentifier id = ((IHarnessException) exception).getErrorId();
495             failure.setErrorIdentifier(id);
496             if (id != null) {
497                 failure.setFailureStatus(id.status());
498             }
499             failure.setOrigin(((IHarnessException) exception).getOrigin());
500         }
501         return failure;
502     }
503 
504     /** Class helper to catch missing run start and end. */
505     public class StartEndCollector extends ResultAndLogForwarder {
506 
507         public boolean mRunStartReported = false;
508         public boolean mRunEndedReported = false;
509 
StartEndCollector(ITestInvocationListener listener)510         StartEndCollector(ITestInvocationListener listener) {
511             super(listener);
512         }
513 
514         @Override
testRunStarted(String runName, int testCount)515         public void testRunStarted(String runName, int testCount) {
516             super.testRunStarted(runName, testCount);
517             mRunStartReported = true;
518         }
519 
520         @Override
testRunStarted(String runName, int testCount, int attemptNumber)521         public void testRunStarted(String runName, int testCount, int attemptNumber) {
522             super.testRunStarted(runName, testCount, attemptNumber);
523             mRunStartReported = true;
524         }
525 
526         @Override
testRunStarted( String runName, int testCount, int attemptNumber, long startTime)527         public void testRunStarted(
528                 String runName, int testCount, int attemptNumber, long startTime) {
529             super.testRunStarted(runName, testCount, attemptNumber, startTime);
530             mRunStartReported = true;
531         }
532 
533         @Override
testRunEnded(long elapsedTime, HashMap<String, Metric> runMetrics)534         public void testRunEnded(long elapsedTime, HashMap<String, Metric> runMetrics) {
535             super.testRunEnded(elapsedTime, runMetrics);
536             mRunEndedReported = true;
537         }
538 
539         @Override
testRunEnded(long elapsedTimeMillis, Map<String, String> runMetrics)540         public void testRunEnded(long elapsedTimeMillis, Map<String, String> runMetrics) {
541             super.testRunEnded(elapsedTimeMillis, runMetrics);
542             mRunEndedReported = true;
543         }
544     }
545 }
546