1 /* 2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 package com.android.tradefed.invoker.shard; 17 18 import com.android.annotations.VisibleForTesting; 19 import com.android.tradefed.build.BuildRetrievalError; 20 import com.android.tradefed.config.Configuration; 21 import com.android.tradefed.config.ConfigurationDescriptor; 22 import com.android.tradefed.config.ConfigurationException; 23 import com.android.tradefed.config.DynamicRemoteFileResolver; 24 import com.android.tradefed.config.GlobalConfiguration; 25 import com.android.tradefed.config.IConfiguration; 26 import com.android.tradefed.config.IGlobalConfiguration; 27 import com.android.tradefed.invoker.IInvocationContext; 28 import com.android.tradefed.invoker.IRescheduler; 29 import com.android.tradefed.invoker.ShardListener; 30 import com.android.tradefed.invoker.ShardMainResultForwarder; 31 import com.android.tradefed.invoker.TestInformation; 32 import com.android.tradefed.invoker.shard.token.ITokenRequest; 33 import com.android.tradefed.log.ITestLogger; 34 import com.android.tradefed.log.LogUtil.CLog; 35 import com.android.tradefed.result.IShardableListener; 36 import com.android.tradefed.result.ITestInvocationListener; 37 import com.android.tradefed.result.ITestLoggerReceiver; 38 import com.android.tradefed.retry.IRetryDecision; 39 import com.android.tradefed.service.TradefedFeatureServer; 40 import com.android.tradefed.suite.checker.ISystemStatusChecker; 41 import com.android.tradefed.testtype.IBuildReceiver; 42 import com.android.tradefed.testtype.IDeviceTest; 43 import com.android.tradefed.testtype.IInvocationContextReceiver; 44 import com.android.tradefed.testtype.IRemoteTest; 45 import com.android.tradefed.testtype.IShardableTest; 46 import com.android.tradefed.testtype.suite.ITestSuite; 47 import com.android.tradefed.util.keystore.IKeyStoreClient; 48 import com.android.tradefed.util.keystore.KeyStoreException; 49 50 import java.util.ArrayList; 51 import java.util.Collection; 52 import java.util.Collections; 53 import java.util.Iterator; 54 import java.util.LinkedHashMap; 55 import java.util.List; 56 import java.util.Map; 57 import java.util.Map.Entry; 58 import java.util.concurrent.CountDownLatch; 59 60 /** Helper class that handles creating the shards and scheduling them for an invocation. */ 61 public class ShardHelper implements IShardHelper { 62 63 public static final String LAST_SHARD_DETECTOR = "last_shard_detector"; 64 public static final String SHARED_TEST_INFORMATION = "shared_test_information"; 65 66 /** 67 * List of the list configuration obj that should be clone to each shard in order to avoid state 68 * issues. 69 */ 70 private static final List<String> CONFIG_OBJ_TO_CLONE = new ArrayList<>(); 71 72 static { 73 CONFIG_OBJ_TO_CLONE.add(Configuration.SYSTEM_STATUS_CHECKER_TYPE_NAME); 74 CONFIG_OBJ_TO_CLONE.add(Configuration.DEVICE_METRICS_COLLECTOR_TYPE_NAME); 75 // Copy all the objects under the <device> tag from 76 // {@link Configuration#getMultiDeviceSupportedTag()} except DEVICE_REQUIREMENTS_TYPE_NAME 77 // which should be shared since all shards should have the same requirements. 78 CONFIG_OBJ_TO_CLONE.add(Configuration.BUILD_PROVIDER_TYPE_NAME); 79 CONFIG_OBJ_TO_CLONE.add(Configuration.TARGET_PREPARER_TYPE_NAME); 80 CONFIG_OBJ_TO_CLONE.add(Configuration.DEVICE_RECOVERY_TYPE_NAME); 81 CONFIG_OBJ_TO_CLONE.add(Configuration.DEVICE_OPTIONS_TYPE_NAME); 82 83 CONFIG_OBJ_TO_CLONE.add(Configuration.MULTI_PREPARER_TYPE_NAME); 84 CONFIG_OBJ_TO_CLONE.add(Configuration.CMD_OPTIONS_TYPE_NAME); 85 CONFIG_OBJ_TO_CLONE.add(Configuration.LOGGER_TYPE_NAME); 86 // Deep clone of log_saver to ensure each shard manages its own logs 87 CONFIG_OBJ_TO_CLONE.add(Configuration.LOG_SAVER_TYPE_NAME); 88 // Deep clone RetryDecision to ensure each shard retry independently 89 CONFIG_OBJ_TO_CLONE.add(Configuration.RETRY_DECISION_TYPE_NAME); 90 // Deep clone ConfigurationDescriptor 91 CONFIG_OBJ_TO_CLONE.add(Configuration.CONFIGURATION_DESCRIPTION_TYPE_NAME); 92 } 93 94 /** 95 * Attempt to shard the configuration into sub-configurations, to be re-scheduled to run on 96 * multiple resources in parallel. 97 * 98 * <p>A successful shard action renders the current config empty, and invocation should not 99 * proceed. 100 * 101 * @see IShardableTest 102 * @see IRescheduler 103 * @param config the current {@link IConfiguration}. 104 * @param testInfo the {@link TestInformation} holding the tests information. 105 * @param rescheduler the {@link IRescheduler} 106 * @return true if test was sharded. Otherwise return <code>false</code> 107 */ 108 @Override shardConfig( IConfiguration config, TestInformation testInfo, IRescheduler rescheduler, ITestLogger logger)109 public boolean shardConfig( 110 IConfiguration config, 111 TestInformation testInfo, 112 IRescheduler rescheduler, 113 ITestLogger logger) { 114 IInvocationContext context = testInfo.getContext(); 115 List<IRemoteTest> shardableTests = new ArrayList<IRemoteTest>(); 116 boolean isSharded = false; 117 Integer shardCount = config.getCommandOptions().getShardCount(); 118 for (IRemoteTest test : config.getTests()) { 119 isSharded |= shardTest(shardableTests, test, shardCount, testInfo, logger); 120 } 121 if (!isSharded) { 122 return false; 123 } 124 // shard this invocation! 125 // create the TestInvocationListener that will collect results from all the shards, 126 // and forward them to the original set of listeners (minus any ISharddableListeners) 127 // once all shards complete 128 Map<Integer, List<IRemoteTest>> multiDevicesShards = buildMultiDevicesShard(shardableTests); 129 int expectedShard = shardableTests.size(); 130 if (shardCount != null) { 131 expectedShard = Math.min(shardCount, shardableTests.size()); 132 } 133 if (!multiDevicesShards.isEmpty()) { 134 // Account for one shard for the multi-devices 135 expectedShard += multiDevicesShards.size(); 136 } 137 // Add a tracker so we know in invocation if the last shard is done running. 138 LastShardDetector lastShard = new LastShardDetector(); 139 ShardMainResultForwarder resultCollector = 140 new ShardMainResultForwarder( 141 buildMainShardListeners(config, lastShard), expectedShard); 142 143 config.getLogSaver().invocationStarted(context); 144 resultCollector.invocationStarted(context); 145 synchronized (shardableTests) { 146 scheduledMultiDevicesShard( 147 multiDevicesShards, 148 config, 149 testInfo, 150 rescheduler, 151 resultCollector, 152 expectedShard); 153 // When shardCount is available only create 1 poller per shard 154 // TODO: consider aggregating both case by picking a predefined shardCount if not 155 // available (like 4) for autosharding. 156 if (shardCount != null) { 157 // We shuffle the tests for best results: avoid having the same module sub-tests 158 // contiguously in the list. 159 Collections.shuffle(shardableTests); 160 // Rectify the expected number of poller to match 161 CountDownLatch tracker = 162 new CountDownLatch(expectedShard - multiDevicesShards.size()); 163 Collection<ITokenRequest> tokenPool = null; 164 if (config.getCommandOptions().shouldUseTokenSharding()) { 165 tokenPool = extractTokenTests(shardableTests); 166 } 167 for (int i = 0; i < expectedShard - multiDevicesShards.size(); i++) { 168 IConfiguration shardConfig = cloneConfigObject(config); 169 try { 170 shardConfig.setConfigurationObject(LAST_SHARD_DETECTOR, lastShard); 171 } catch (ConfigurationException e) { 172 throw new RuntimeException(e); 173 } 174 TestsPoolPoller poller = 175 new TestsPoolPoller( 176 createTestsPool(shardableTests, tokenPool), tracker); 177 shardConfig.setTest(poller); 178 rescheduleConfig( 179 shardConfig, config, testInfo, rescheduler, resultCollector, i); 180 } 181 } else { 182 CountDownLatch tracker = new CountDownLatch(shardableTests.size()); 183 Collection<ITokenRequest> tokenPool = null; 184 if (config.getCommandOptions().shouldUseTokenSharding()) { 185 tokenPool = extractTokenTests(shardableTests); 186 } 187 int i = 0; 188 for (IRemoteTest testShard : shardableTests) { 189 CLog.d("Rescheduling sharded config..."); 190 IConfiguration shardConfig = cloneConfigObject(config); 191 try { 192 shardConfig.setConfigurationObject(LAST_SHARD_DETECTOR, lastShard); 193 } catch (ConfigurationException e) { 194 throw new RuntimeException(e); 195 } 196 if (config.getCommandOptions().shouldUseDynamicSharding()) { 197 TestsPoolPoller poller = 198 new TestsPoolPoller( 199 createTestsPool(shardableTests, tokenPool), tracker); 200 shardConfig.setTest(poller); 201 } else { 202 shardConfig.setTest(testShard); 203 } 204 rescheduleConfig( 205 shardConfig, config, testInfo, rescheduler, resultCollector, i); 206 i++; 207 } 208 } 209 } 210 // If we are sharding inside sandbox, don't clean, let the parent do it. 211 if (!config.getConfigurationDescription().shouldUseSandbox()) { 212 // clean up original builds 213 for (String deviceName : context.getDeviceConfigNames()) { 214 config.getDeviceConfigByName(deviceName) 215 .getBuildProvider() 216 .cleanUp(context.getBuildInfo(deviceName)); 217 } 218 } 219 return true; 220 } 221 createTestsPool( Collection<IRemoteTest> tests, Collection<ITokenRequest> tokenTests)222 private ITestsPool createTestsPool( 223 Collection<IRemoteTest> tests, Collection<ITokenRequest> tokenTests) { 224 return new LocalPool(tests, tokenTests); 225 } 226 rescheduleConfig( IConfiguration shardConfig, IConfiguration config, TestInformation testInfo, IRescheduler rescheduler, ShardMainResultForwarder resultCollector, int index)227 private void rescheduleConfig( 228 IConfiguration shardConfig, 229 IConfiguration config, 230 TestInformation testInfo, 231 IRescheduler rescheduler, 232 ShardMainResultForwarder resultCollector, 233 int index) { 234 validateOptions(testInfo, shardConfig); 235 ShardBuildCloner.cloneBuildInfos(config, shardConfig, testInfo); 236 237 shardConfig.setTestInvocationListeners( 238 buildShardListeners(resultCollector, config, config.getTestInvocationListeners())); 239 240 // Set the host_log suffix to avoid similar names 241 String suffix = String.format("_shard_index_%s", index); 242 if (shardConfig.getCommandOptions().getHostLogSuffix() != null) { 243 suffix = shardConfig.getCommandOptions().getHostLogSuffix() + suffix; 244 } 245 shardConfig.getCommandOptions().setHostLogSuffix(suffix); 246 247 // Use the same {@link ITargetPreparer}, {@link IDeviceRecovery} etc as original config 248 // Make sure we don't run as sandboxed in shards, only parent invocation needs to 249 // run as sandboxed 250 shardConfig.getConfigurationDescription().setSandboxed(false); 251 shardConfig.getConfigurationDescription().setShardIndex(index); 252 rescheduler.scheduleConfig(shardConfig); 253 } 254 255 /** Returns the current global configuration. */ 256 @VisibleForTesting getGlobalConfiguration()257 protected IGlobalConfiguration getGlobalConfiguration() { 258 return GlobalConfiguration.getInstance(); 259 } 260 261 /** Runs the {@link IConfiguration#validateOptions()} on the config. */ 262 @VisibleForTesting validateOptions(TestInformation testInfo, IConfiguration config)263 protected void validateOptions(TestInformation testInfo, IConfiguration config) { 264 try { 265 config.validateOptions(); 266 DynamicRemoteFileResolver resolver = new DynamicRemoteFileResolver(); 267 resolver.setDevice(testInfo.getDevice()); 268 resolver.addExtraArgs(config.getCommandOptions().getDynamicDownloadArgs()); 269 config.resolveDynamicOptions(resolver); 270 } catch (ConfigurationException | BuildRetrievalError e) { 271 throw new RuntimeException(e); 272 } 273 } 274 275 /** 276 * Helper to clone {@link ISystemStatusChecker}s from the original config to the clonedConfig. 277 */ cloneConfigObject(IConfiguration origConfig)278 private IConfiguration cloneConfigObject(IConfiguration origConfig) { 279 IKeyStoreClient client = null; 280 try { 281 client = getGlobalConfiguration().getKeyStoreFactory().createKeyStoreClient(); 282 } catch (KeyStoreException e) { 283 throw new RuntimeException( 284 String.format( 285 "failed to load keystore client when sharding: %s", e.getMessage()), 286 e); 287 } 288 289 try { 290 IConfiguration deepCopy = origConfig.partialDeepClone(CONFIG_OBJ_TO_CLONE, client); 291 // Sharding was done, no need for children to look into it. 292 deepCopy.getCommandOptions().setShardCount(null); 293 deepCopy.getConfigurationDescription() 294 .addMetadata(ConfigurationDescriptor.LOCAL_SHARDED_KEY, "true"); 295 // Remove parent shard server reference from the copy. 296 deepCopy.getConfigurationDescription().removeMetadata( 297 TradefedFeatureServer.SERVER_REFERENCE); 298 return deepCopy; 299 } catch (ConfigurationException e) { 300 throw new RuntimeException( 301 String.format("failed to deep copy a configuration: %s", e.getMessage()), e); 302 } 303 } 304 305 /** 306 * Attempt to shard given {@link IRemoteTest}. 307 * 308 * @param shardableTests the list of {@link IRemoteTest}s to add to 309 * @param test the {@link IRemoteTest} to shard 310 * @param shardCount attempted number of shard, can be null. 311 * @param testInfo the {@link TestInformation} of the current invocation. 312 * @return <code>true</code> if test was sharded 313 */ shardTest( List<IRemoteTest> shardableTests, IRemoteTest test, Integer shardCount, TestInformation testInfo, ITestLogger logger)314 private static boolean shardTest( 315 List<IRemoteTest> shardableTests, 316 IRemoteTest test, 317 Integer shardCount, 318 TestInformation testInfo, 319 ITestLogger logger) { 320 boolean isSharded = false; 321 if (test instanceof IShardableTest) { 322 // inject device and build since they might be required to shard. 323 if (test instanceof IBuildReceiver) { 324 ((IBuildReceiver) test).setBuild(testInfo.getBuildInfo()); 325 } 326 if (test instanceof IDeviceTest) { 327 ((IDeviceTest) test).setDevice(testInfo.getDevice()); 328 } 329 if (test instanceof IInvocationContextReceiver) { 330 ((IInvocationContextReceiver) test).setInvocationContext(testInfo.getContext()); 331 } 332 if (test instanceof ITestLoggerReceiver) { 333 ((ITestLoggerReceiver) test).setTestLogger(logger); 334 } 335 336 IShardableTest shardableTest = (IShardableTest) test; 337 // Give the shardCount hint to tests if they need it. 338 Collection<IRemoteTest> shards = shardableTest.split(shardCount, testInfo); 339 if (shards != null) { 340 shardableTests.addAll(shards); 341 isSharded = true; 342 } 343 } 344 if (!isSharded) { 345 shardableTests.add(test); 346 } 347 return isSharded; 348 } 349 350 /** 351 * Builds the {@link ITestInvocationListener} listeners that will collect the results from all 352 * shards. Currently excludes {@link IShardableListener}s. 353 */ buildMainShardListeners( IConfiguration config, LastShardDetector lastShardDetector)354 private static List<ITestInvocationListener> buildMainShardListeners( 355 IConfiguration config, LastShardDetector lastShardDetector) { 356 List<ITestInvocationListener> newListeners = new ArrayList<ITestInvocationListener>(); 357 for (ITestInvocationListener l : config.getTestInvocationListeners()) { 358 if (!(l instanceof IShardableListener)) { 359 newListeners.add(l); 360 } else if (!((IShardableListener) l).supportShardListener()) { 361 newListeners.add(l); 362 } 363 } 364 newListeners.add(lastShardDetector); 365 return newListeners; 366 } 367 368 /** 369 * Builds the list of {@link ITestInvocationListener}s for each shard. Currently includes any 370 * {@link IShardableListener}, plus a single listener that will forward results to the main 371 * shard collector. 372 */ buildShardListeners( ShardMainResultForwarder resultCollector, IConfiguration config, List<ITestInvocationListener> origListeners)373 private static List<ITestInvocationListener> buildShardListeners( 374 ShardMainResultForwarder resultCollector, 375 IConfiguration config, 376 List<ITestInvocationListener> origListeners) { 377 List<ITestInvocationListener> shardListeners = new ArrayList<ITestInvocationListener>(); 378 for (ITestInvocationListener l : origListeners) { 379 if (l instanceof IShardableListener 380 && ((IShardableListener) l).supportShardListener()) { 381 shardListeners.add(((IShardableListener) l).clone()); 382 } 383 } 384 ShardListener origConfigListener = new ShardListener(resultCollector); 385 origConfigListener.setSupportGranularResults(isAutoRetryEnabled(config)); 386 shardListeners.add(origConfigListener); 387 return shardListeners; 388 } 389 isAutoRetryEnabled(IConfiguration config)390 private static boolean isAutoRetryEnabled(IConfiguration config) { 391 IRetryDecision decision = config.getRetryDecision(); 392 if (decision.isAutoRetryEnabled() && decision.getMaxRetryCount() > 0) { 393 return true; 394 } 395 return false; 396 } 397 extractTokenTests(Collection<IRemoteTest> shardableTests)398 private Collection<ITokenRequest> extractTokenTests(Collection<IRemoteTest> shardableTests) { 399 List<ITokenRequest> tokenPool = new ArrayList<>(); 400 Iterator<IRemoteTest> itr = new ArrayList<>(shardableTests).iterator(); 401 402 while (itr.hasNext()) { 403 IRemoteTest test = itr.next(); 404 if (test instanceof ITokenRequest) { 405 tokenPool.add((ITokenRequest) test); 406 shardableTests.remove(test); 407 } 408 } 409 return tokenPool; 410 } 411 buildMultiDevicesShard( List<IRemoteTest> shardableTests)412 private Map<Integer, List<IRemoteTest>> buildMultiDevicesShard( 413 List<IRemoteTest> shardableTests) { 414 Map<Integer, List<IRemoteTest>> neededDevicePerTest = 415 new LinkedHashMap<Integer, List<IRemoteTest>>(); 416 for (IRemoteTest test : new ArrayList<>(shardableTests)) { 417 if (test instanceof ITestSuite 418 && ((ITestSuite) test).getDirectModule().neededDevices() > 1) { 419 shardableTests.remove(test); 420 int neededDevices = ((ITestSuite) test).getDirectModule().neededDevices(); 421 if (!neededDevicePerTest.containsKey(neededDevices)) { 422 neededDevicePerTest.put(neededDevices, new ArrayList<IRemoteTest>()); 423 } 424 List<IRemoteTest> multiDevicesTests = neededDevicePerTest.get(neededDevices); 425 multiDevicesTests.add(test); 426 } 427 } 428 return neededDevicePerTest; 429 } 430 431 /** 432 * Schedule a replicated config for each device-needed count so it will self allocate the 433 * appropriate number of devices. 434 */ scheduledMultiDevicesShard( Map<Integer, List<IRemoteTest>> multiDevicesShards, IConfiguration config, TestInformation testInfo, IRescheduler rescheduler, ShardMainResultForwarder resultCollector, int expectedShard)435 private void scheduledMultiDevicesShard( 436 Map<Integer, List<IRemoteTest>> multiDevicesShards, 437 IConfiguration config, 438 TestInformation testInfo, 439 IRescheduler rescheduler, 440 ShardMainResultForwarder resultCollector, 441 int expectedShard) { 442 if (multiDevicesShards.isEmpty()) { 443 return; 444 } 445 int index = expectedShard - multiDevicesShards.size(); 446 for (Entry<Integer, List<IRemoteTest>> multiDevicesTest : multiDevicesShards.entrySet()) { 447 IConfiguration shardConfig = cloneConfigObject(config); 448 shardConfig.setTests(multiDevicesTest.getValue()); 449 shardConfig.getCommandOptions().setMultiDeviceCount(multiDevicesTest.getKey()); 450 shardConfig.getCommandOptions().setReplicateSetup(true); 451 rescheduleConfig(shardConfig, config, testInfo, rescheduler, resultCollector, index); 452 index++; 453 } 454 } 455 } 456