1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 package com.android.tradefed.invoker.shard;
17 
18 import com.android.annotations.VisibleForTesting;
19 import com.android.tradefed.build.BuildRetrievalError;
20 import com.android.tradefed.config.Configuration;
21 import com.android.tradefed.config.ConfigurationDescriptor;
22 import com.android.tradefed.config.ConfigurationException;
23 import com.android.tradefed.config.DynamicRemoteFileResolver;
24 import com.android.tradefed.config.GlobalConfiguration;
25 import com.android.tradefed.config.IConfiguration;
26 import com.android.tradefed.config.IGlobalConfiguration;
27 import com.android.tradefed.invoker.IInvocationContext;
28 import com.android.tradefed.invoker.IRescheduler;
29 import com.android.tradefed.invoker.ShardListener;
30 import com.android.tradefed.invoker.ShardMainResultForwarder;
31 import com.android.tradefed.invoker.TestInformation;
32 import com.android.tradefed.invoker.shard.token.ITokenRequest;
33 import com.android.tradefed.log.ITestLogger;
34 import com.android.tradefed.log.LogUtil.CLog;
35 import com.android.tradefed.result.IShardableListener;
36 import com.android.tradefed.result.ITestInvocationListener;
37 import com.android.tradefed.result.ITestLoggerReceiver;
38 import com.android.tradefed.retry.IRetryDecision;
39 import com.android.tradefed.service.TradefedFeatureServer;
40 import com.android.tradefed.suite.checker.ISystemStatusChecker;
41 import com.android.tradefed.testtype.IBuildReceiver;
42 import com.android.tradefed.testtype.IDeviceTest;
43 import com.android.tradefed.testtype.IInvocationContextReceiver;
44 import com.android.tradefed.testtype.IRemoteTest;
45 import com.android.tradefed.testtype.IShardableTest;
46 import com.android.tradefed.testtype.suite.ITestSuite;
47 import com.android.tradefed.util.keystore.IKeyStoreClient;
48 import com.android.tradefed.util.keystore.KeyStoreException;
49 
50 import java.util.ArrayList;
51 import java.util.Collection;
52 import java.util.Collections;
53 import java.util.Iterator;
54 import java.util.LinkedHashMap;
55 import java.util.List;
56 import java.util.Map;
57 import java.util.Map.Entry;
58 import java.util.concurrent.CountDownLatch;
59 
60 /** Helper class that handles creating the shards and scheduling them for an invocation. */
61 public class ShardHelper implements IShardHelper {
62 
63     public static final String LAST_SHARD_DETECTOR = "last_shard_detector";
64     public static final String SHARED_TEST_INFORMATION = "shared_test_information";
65 
66     /**
67      * List of the list configuration obj that should be clone to each shard in order to avoid state
68      * issues.
69      */
70     private static final List<String> CONFIG_OBJ_TO_CLONE = new ArrayList<>();
71 
72     static {
73         CONFIG_OBJ_TO_CLONE.add(Configuration.SYSTEM_STATUS_CHECKER_TYPE_NAME);
74         CONFIG_OBJ_TO_CLONE.add(Configuration.DEVICE_METRICS_COLLECTOR_TYPE_NAME);
75         // Copy all the objects under the <device> tag from
76         // {@link Configuration#getMultiDeviceSupportedTag()} except DEVICE_REQUIREMENTS_TYPE_NAME
77         // which should be shared since all shards should have the same requirements.
78         CONFIG_OBJ_TO_CLONE.add(Configuration.BUILD_PROVIDER_TYPE_NAME);
79         CONFIG_OBJ_TO_CLONE.add(Configuration.TARGET_PREPARER_TYPE_NAME);
80         CONFIG_OBJ_TO_CLONE.add(Configuration.DEVICE_RECOVERY_TYPE_NAME);
81         CONFIG_OBJ_TO_CLONE.add(Configuration.DEVICE_OPTIONS_TYPE_NAME);
82 
83         CONFIG_OBJ_TO_CLONE.add(Configuration.MULTI_PREPARER_TYPE_NAME);
84         CONFIG_OBJ_TO_CLONE.add(Configuration.CMD_OPTIONS_TYPE_NAME);
85         CONFIG_OBJ_TO_CLONE.add(Configuration.LOGGER_TYPE_NAME);
86         // Deep clone of log_saver to ensure each shard manages its own logs
87         CONFIG_OBJ_TO_CLONE.add(Configuration.LOG_SAVER_TYPE_NAME);
88         // Deep clone RetryDecision to ensure each shard retry independently
89         CONFIG_OBJ_TO_CLONE.add(Configuration.RETRY_DECISION_TYPE_NAME);
90         // Deep clone ConfigurationDescriptor
91         CONFIG_OBJ_TO_CLONE.add(Configuration.CONFIGURATION_DESCRIPTION_TYPE_NAME);
92     }
93 
94     /**
95      * Attempt to shard the configuration into sub-configurations, to be re-scheduled to run on
96      * multiple resources in parallel.
97      *
98      * <p>A successful shard action renders the current config empty, and invocation should not
99      * proceed.
100      *
101      * @see IShardableTest
102      * @see IRescheduler
103      * @param config the current {@link IConfiguration}.
104      * @param testInfo the {@link TestInformation} holding the tests information.
105      * @param rescheduler the {@link IRescheduler}
106      * @return true if test was sharded. Otherwise return <code>false</code>
107      */
108     @Override
shardConfig( IConfiguration config, TestInformation testInfo, IRescheduler rescheduler, ITestLogger logger)109     public boolean shardConfig(
110             IConfiguration config,
111             TestInformation testInfo,
112             IRescheduler rescheduler,
113             ITestLogger logger) {
114         IInvocationContext context = testInfo.getContext();
115         List<IRemoteTest> shardableTests = new ArrayList<IRemoteTest>();
116         boolean isSharded = false;
117         Integer shardCount = config.getCommandOptions().getShardCount();
118         for (IRemoteTest test : config.getTests()) {
119             isSharded |= shardTest(shardableTests, test, shardCount, testInfo, logger);
120         }
121         if (!isSharded) {
122             return false;
123         }
124         // shard this invocation!
125         // create the TestInvocationListener that will collect results from all the shards,
126         // and forward them to the original set of listeners (minus any ISharddableListeners)
127         // once all shards complete
128         Map<Integer, List<IRemoteTest>> multiDevicesShards = buildMultiDevicesShard(shardableTests);
129         int expectedShard = shardableTests.size();
130         if (shardCount != null) {
131             expectedShard = Math.min(shardCount, shardableTests.size());
132         }
133         if (!multiDevicesShards.isEmpty()) {
134             // Account for one shard for the multi-devices
135             expectedShard += multiDevicesShards.size();
136         }
137         // Add a tracker so we know in invocation if the last shard is done running.
138         LastShardDetector lastShard = new LastShardDetector();
139         ShardMainResultForwarder resultCollector =
140                 new ShardMainResultForwarder(
141                         buildMainShardListeners(config, lastShard), expectedShard);
142 
143         config.getLogSaver().invocationStarted(context);
144         resultCollector.invocationStarted(context);
145         synchronized (shardableTests) {
146             scheduledMultiDevicesShard(
147                     multiDevicesShards,
148                     config,
149                     testInfo,
150                     rescheduler,
151                     resultCollector,
152                     expectedShard);
153             // When shardCount is available only create 1 poller per shard
154             // TODO: consider aggregating both case by picking a predefined shardCount if not
155             // available (like 4) for autosharding.
156             if (shardCount != null) {
157                 // We shuffle the tests for best results: avoid having the same module sub-tests
158                 // contiguously in the list.
159                 Collections.shuffle(shardableTests);
160                 // Rectify the expected number of poller to match
161                 CountDownLatch tracker =
162                         new CountDownLatch(expectedShard - multiDevicesShards.size());
163                 Collection<ITokenRequest> tokenPool = null;
164                 if (config.getCommandOptions().shouldUseTokenSharding()) {
165                     tokenPool = extractTokenTests(shardableTests);
166                 }
167                 for (int i = 0; i < expectedShard - multiDevicesShards.size(); i++) {
168                     IConfiguration shardConfig = cloneConfigObject(config);
169                     try {
170                         shardConfig.setConfigurationObject(LAST_SHARD_DETECTOR, lastShard);
171                     } catch (ConfigurationException e) {
172                         throw new RuntimeException(e);
173                     }
174                     TestsPoolPoller poller =
175                             new TestsPoolPoller(
176                                     createTestsPool(shardableTests, tokenPool), tracker);
177                     shardConfig.setTest(poller);
178                     rescheduleConfig(
179                             shardConfig, config, testInfo, rescheduler, resultCollector, i);
180                 }
181             } else {
182                 CountDownLatch tracker = new CountDownLatch(shardableTests.size());
183                 Collection<ITokenRequest> tokenPool = null;
184                 if (config.getCommandOptions().shouldUseTokenSharding()) {
185                     tokenPool = extractTokenTests(shardableTests);
186                 }
187                 int i = 0;
188                 for (IRemoteTest testShard : shardableTests) {
189                     CLog.d("Rescheduling sharded config...");
190                     IConfiguration shardConfig = cloneConfigObject(config);
191                     try {
192                         shardConfig.setConfigurationObject(LAST_SHARD_DETECTOR, lastShard);
193                     } catch (ConfigurationException e) {
194                         throw new RuntimeException(e);
195                     }
196                     if (config.getCommandOptions().shouldUseDynamicSharding()) {
197                         TestsPoolPoller poller =
198                                 new TestsPoolPoller(
199                                         createTestsPool(shardableTests, tokenPool), tracker);
200                         shardConfig.setTest(poller);
201                     } else {
202                         shardConfig.setTest(testShard);
203                     }
204                     rescheduleConfig(
205                             shardConfig, config, testInfo, rescheduler, resultCollector, i);
206                     i++;
207                 }
208             }
209         }
210         // If we are sharding inside sandbox, don't clean, let the parent do it.
211         if (!config.getConfigurationDescription().shouldUseSandbox()) {
212             // clean up original builds
213             for (String deviceName : context.getDeviceConfigNames()) {
214                 config.getDeviceConfigByName(deviceName)
215                         .getBuildProvider()
216                         .cleanUp(context.getBuildInfo(deviceName));
217             }
218         }
219         return true;
220     }
221 
createTestsPool( Collection<IRemoteTest> tests, Collection<ITokenRequest> tokenTests)222     private ITestsPool createTestsPool(
223             Collection<IRemoteTest> tests, Collection<ITokenRequest> tokenTests) {
224         return new LocalPool(tests, tokenTests);
225     }
226 
rescheduleConfig( IConfiguration shardConfig, IConfiguration config, TestInformation testInfo, IRescheduler rescheduler, ShardMainResultForwarder resultCollector, int index)227     private void rescheduleConfig(
228             IConfiguration shardConfig,
229             IConfiguration config,
230             TestInformation testInfo,
231             IRescheduler rescheduler,
232             ShardMainResultForwarder resultCollector,
233             int index) {
234         validateOptions(testInfo, shardConfig);
235         ShardBuildCloner.cloneBuildInfos(config, shardConfig, testInfo);
236 
237         shardConfig.setTestInvocationListeners(
238                 buildShardListeners(resultCollector, config, config.getTestInvocationListeners()));
239 
240         // Set the host_log suffix to avoid similar names
241         String suffix = String.format("_shard_index_%s", index);
242         if (shardConfig.getCommandOptions().getHostLogSuffix() != null) {
243             suffix = shardConfig.getCommandOptions().getHostLogSuffix() + suffix;
244         }
245         shardConfig.getCommandOptions().setHostLogSuffix(suffix);
246 
247         // Use the same {@link ITargetPreparer}, {@link IDeviceRecovery} etc as original config
248         // Make sure we don't run as sandboxed in shards, only parent invocation needs to
249         // run as sandboxed
250         shardConfig.getConfigurationDescription().setSandboxed(false);
251         shardConfig.getConfigurationDescription().setShardIndex(index);
252         rescheduler.scheduleConfig(shardConfig);
253     }
254 
255     /** Returns the current global configuration. */
256     @VisibleForTesting
getGlobalConfiguration()257     protected IGlobalConfiguration getGlobalConfiguration() {
258         return GlobalConfiguration.getInstance();
259     }
260 
261     /** Runs the {@link IConfiguration#validateOptions()} on the config. */
262     @VisibleForTesting
validateOptions(TestInformation testInfo, IConfiguration config)263     protected void validateOptions(TestInformation testInfo, IConfiguration config) {
264         try {
265             config.validateOptions();
266             DynamicRemoteFileResolver resolver = new DynamicRemoteFileResolver();
267             resolver.setDevice(testInfo.getDevice());
268             resolver.addExtraArgs(config.getCommandOptions().getDynamicDownloadArgs());
269             config.resolveDynamicOptions(resolver);
270         } catch (ConfigurationException | BuildRetrievalError e) {
271             throw new RuntimeException(e);
272         }
273     }
274 
275     /**
276      * Helper to clone {@link ISystemStatusChecker}s from the original config to the clonedConfig.
277      */
cloneConfigObject(IConfiguration origConfig)278     private IConfiguration cloneConfigObject(IConfiguration origConfig) {
279         IKeyStoreClient client = null;
280         try {
281             client = getGlobalConfiguration().getKeyStoreFactory().createKeyStoreClient();
282         } catch (KeyStoreException e) {
283             throw new RuntimeException(
284                     String.format(
285                             "failed to load keystore client when sharding: %s", e.getMessage()),
286                     e);
287         }
288 
289         try {
290             IConfiguration deepCopy = origConfig.partialDeepClone(CONFIG_OBJ_TO_CLONE, client);
291             // Sharding was done, no need for children to look into it.
292             deepCopy.getCommandOptions().setShardCount(null);
293             deepCopy.getConfigurationDescription()
294                     .addMetadata(ConfigurationDescriptor.LOCAL_SHARDED_KEY, "true");
295             // Remove parent shard server reference from the copy.
296             deepCopy.getConfigurationDescription().removeMetadata(
297                     TradefedFeatureServer.SERVER_REFERENCE);
298             return deepCopy;
299         } catch (ConfigurationException e) {
300             throw new RuntimeException(
301                     String.format("failed to deep copy a configuration: %s", e.getMessage()), e);
302         }
303     }
304 
305     /**
306      * Attempt to shard given {@link IRemoteTest}.
307      *
308      * @param shardableTests the list of {@link IRemoteTest}s to add to
309      * @param test the {@link IRemoteTest} to shard
310      * @param shardCount attempted number of shard, can be null.
311      * @param testInfo the {@link TestInformation} of the current invocation.
312      * @return <code>true</code> if test was sharded
313      */
shardTest( List<IRemoteTest> shardableTests, IRemoteTest test, Integer shardCount, TestInformation testInfo, ITestLogger logger)314     private static boolean shardTest(
315             List<IRemoteTest> shardableTests,
316             IRemoteTest test,
317             Integer shardCount,
318             TestInformation testInfo,
319             ITestLogger logger) {
320         boolean isSharded = false;
321         if (test instanceof IShardableTest) {
322             // inject device and build since they might be required to shard.
323             if (test instanceof IBuildReceiver) {
324                 ((IBuildReceiver) test).setBuild(testInfo.getBuildInfo());
325             }
326             if (test instanceof IDeviceTest) {
327                 ((IDeviceTest) test).setDevice(testInfo.getDevice());
328             }
329             if (test instanceof IInvocationContextReceiver) {
330                 ((IInvocationContextReceiver) test).setInvocationContext(testInfo.getContext());
331             }
332             if (test instanceof ITestLoggerReceiver) {
333                 ((ITestLoggerReceiver) test).setTestLogger(logger);
334             }
335 
336             IShardableTest shardableTest = (IShardableTest) test;
337             // Give the shardCount hint to tests if they need it.
338             Collection<IRemoteTest> shards = shardableTest.split(shardCount, testInfo);
339             if (shards != null) {
340                 shardableTests.addAll(shards);
341                 isSharded = true;
342             }
343         }
344         if (!isSharded) {
345             shardableTests.add(test);
346         }
347         return isSharded;
348     }
349 
350     /**
351      * Builds the {@link ITestInvocationListener} listeners that will collect the results from all
352      * shards. Currently excludes {@link IShardableListener}s.
353      */
buildMainShardListeners( IConfiguration config, LastShardDetector lastShardDetector)354     private static List<ITestInvocationListener> buildMainShardListeners(
355             IConfiguration config, LastShardDetector lastShardDetector) {
356         List<ITestInvocationListener> newListeners = new ArrayList<ITestInvocationListener>();
357         for (ITestInvocationListener l : config.getTestInvocationListeners()) {
358             if (!(l instanceof IShardableListener)) {
359                 newListeners.add(l);
360             } else if (!((IShardableListener) l).supportShardListener()) {
361                 newListeners.add(l);
362             }
363         }
364         newListeners.add(lastShardDetector);
365         return newListeners;
366     }
367 
368     /**
369      * Builds the list of {@link ITestInvocationListener}s for each shard. Currently includes any
370      * {@link IShardableListener}, plus a single listener that will forward results to the main
371      * shard collector.
372      */
buildShardListeners( ShardMainResultForwarder resultCollector, IConfiguration config, List<ITestInvocationListener> origListeners)373     private static List<ITestInvocationListener> buildShardListeners(
374             ShardMainResultForwarder resultCollector,
375             IConfiguration config,
376             List<ITestInvocationListener> origListeners) {
377         List<ITestInvocationListener> shardListeners = new ArrayList<ITestInvocationListener>();
378         for (ITestInvocationListener l : origListeners) {
379             if (l instanceof IShardableListener
380                     && ((IShardableListener) l).supportShardListener()) {
381                 shardListeners.add(((IShardableListener) l).clone());
382             }
383         }
384         ShardListener origConfigListener = new ShardListener(resultCollector);
385         origConfigListener.setSupportGranularResults(isAutoRetryEnabled(config));
386         shardListeners.add(origConfigListener);
387         return shardListeners;
388     }
389 
isAutoRetryEnabled(IConfiguration config)390     private static boolean isAutoRetryEnabled(IConfiguration config) {
391         IRetryDecision decision = config.getRetryDecision();
392         if (decision.isAutoRetryEnabled() && decision.getMaxRetryCount() > 0) {
393             return true;
394         }
395         return false;
396     }
397 
extractTokenTests(Collection<IRemoteTest> shardableTests)398     private Collection<ITokenRequest> extractTokenTests(Collection<IRemoteTest> shardableTests) {
399         List<ITokenRequest> tokenPool = new ArrayList<>();
400         Iterator<IRemoteTest> itr = new ArrayList<>(shardableTests).iterator();
401 
402         while (itr.hasNext()) {
403             IRemoteTest test = itr.next();
404             if (test instanceof ITokenRequest) {
405                 tokenPool.add((ITokenRequest) test);
406                 shardableTests.remove(test);
407             }
408         }
409         return tokenPool;
410     }
411 
buildMultiDevicesShard( List<IRemoteTest> shardableTests)412     private Map<Integer, List<IRemoteTest>> buildMultiDevicesShard(
413             List<IRemoteTest> shardableTests) {
414         Map<Integer, List<IRemoteTest>> neededDevicePerTest =
415                 new LinkedHashMap<Integer, List<IRemoteTest>>();
416         for (IRemoteTest test : new ArrayList<>(shardableTests)) {
417             if (test instanceof ITestSuite
418                     && ((ITestSuite) test).getDirectModule().neededDevices() > 1) {
419                 shardableTests.remove(test);
420                 int neededDevices = ((ITestSuite) test).getDirectModule().neededDevices();
421                 if (!neededDevicePerTest.containsKey(neededDevices)) {
422                     neededDevicePerTest.put(neededDevices, new ArrayList<IRemoteTest>());
423                 }
424                 List<IRemoteTest> multiDevicesTests = neededDevicePerTest.get(neededDevices);
425                 multiDevicesTests.add(test);
426             }
427         }
428         return neededDevicePerTest;
429     }
430 
431     /**
432      * Schedule a replicated config for each device-needed count so it will self allocate the
433      * appropriate number of devices.
434      */
scheduledMultiDevicesShard( Map<Integer, List<IRemoteTest>> multiDevicesShards, IConfiguration config, TestInformation testInfo, IRescheduler rescheduler, ShardMainResultForwarder resultCollector, int expectedShard)435     private void scheduledMultiDevicesShard(
436             Map<Integer, List<IRemoteTest>> multiDevicesShards,
437             IConfiguration config,
438             TestInformation testInfo,
439             IRescheduler rescheduler,
440             ShardMainResultForwarder resultCollector,
441             int expectedShard) {
442         if (multiDevicesShards.isEmpty()) {
443             return;
444         }
445         int index = expectedShard - multiDevicesShards.size();
446         for (Entry<Integer, List<IRemoteTest>> multiDevicesTest : multiDevicesShards.entrySet()) {
447             IConfiguration shardConfig = cloneConfigObject(config);
448             shardConfig.setTests(multiDevicesTest.getValue());
449             shardConfig.getCommandOptions().setMultiDeviceCount(multiDevicesTest.getKey());
450             shardConfig.getCommandOptions().setReplicateSetup(true);
451             rescheduleConfig(shardConfig, config, testInfo, rescheduler, resultCollector, index);
452             index++;
453         }
454     }
455 }
456