1 /* 2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 package com.android.tradefed.invoker.shard; 17 18 import com.android.annotations.VisibleForTesting; 19 import com.android.tradefed.config.IConfiguration; 20 import com.android.tradefed.error.HarnessRuntimeException; 21 import com.android.tradefed.invoker.IRescheduler; 22 import com.android.tradefed.invoker.TestInformation; 23 import com.android.tradefed.log.ITestLogger; 24 import com.android.tradefed.log.LogUtil.CLog; 25 import com.android.tradefed.result.ITestLoggerReceiver; 26 import com.android.tradefed.result.error.InfraErrorIdentifier; 27 import com.android.tradefed.testtype.IBuildReceiver; 28 import com.android.tradefed.testtype.IDeviceTest; 29 import com.android.tradefed.testtype.IInvocationContextReceiver; 30 import com.android.tradefed.testtype.IRemoteTest; 31 import com.android.tradefed.testtype.IRuntimeHintProvider; 32 import com.android.tradefed.testtype.IShardableTest; 33 import com.android.tradefed.testtype.suite.ITestSuite; 34 import com.android.tradefed.testtype.suite.ModuleMerger; 35 import com.android.tradefed.util.TimeUtil; 36 37 import java.util.ArrayList; 38 import java.util.Collection; 39 import java.util.Collections; 40 import java.util.Comparator; 41 import java.util.List; 42 import java.util.regex.Matcher; 43 import java.util.regex.Pattern; 44 45 /** Sharding strategy to create strict shards that do not report together, */ 46 public class StrictShardHelper extends ShardHelper { 47 48 /** {@inheritDoc} */ 49 @Override shardConfig( IConfiguration config, TestInformation testInfo, IRescheduler rescheduler, ITestLogger logger)50 public boolean shardConfig( 51 IConfiguration config, 52 TestInformation testInfo, 53 IRescheduler rescheduler, 54 ITestLogger logger) { 55 // need to look up attempt id somewhere and make sure we only attempt this on attempt 0 56 if (config.getCommandOptions().shouldRemoteDynamicShard()) { 57 // We are using dynamic sharding 58 DynamicShardHelper helper = new DynamicShardHelper(); 59 // TODO(murj) handle the case where dynamic sharding fails 60 return helper.shardConfig(config, testInfo, rescheduler, logger); 61 } 62 63 Integer shardCount = config.getCommandOptions().getShardCount(); 64 Integer shardIndex = config.getCommandOptions().getShardIndex(); 65 boolean optimizeMainline = config.getCommandOptions().getOptimizeMainlineTest(); 66 67 if (shardIndex == null) { 68 return super.shardConfig(config, testInfo, rescheduler, logger); 69 } 70 if (shardCount == null) { 71 throw new RuntimeException("shard-count is null while shard-index is " + shardIndex); 72 } 73 // No sharding needed if shard-count=1 74 if (shardCount == 1) { 75 return false; 76 } 77 78 // Split tests in place, without actually sharding. 79 List<IRemoteTest> listAllTests = getAllTests(config, shardCount, testInfo, logger); 80 // We cannot shuffle to get better average results 81 normalizeDistribution(listAllTests, shardCount); 82 List<IRemoteTest> splitList; 83 if (shardCount == 1) { 84 // not sharded 85 splitList = listAllTests; 86 } else { 87 splitList = 88 splitTests( 89 listAllTests, 90 shardCount, 91 config.getCommandOptions().shouldUseEvenModuleSharding()) 92 .get(shardIndex); 93 } 94 aggregateSuiteModules(splitList); 95 if (optimizeMainline) { 96 CLog.i("Reordering the test modules list for index: %s", shardIndex); 97 reorderTestModules(splitList); 98 } 99 config.setTests(splitList); 100 return false; 101 } 102 103 /** 104 * Helper to re order the list full list of {@link IRemoteTest} for mainline. 105 * 106 * @param tests the {@link IRemoteTest} containing all the tests that need to run. 107 */ reorderTestModules(List<IRemoteTest> tests)108 private void reorderTestModules(List<IRemoteTest> tests) { 109 Collections.sort(tests, new Comparator<IRemoteTest>() { 110 @Override 111 public int compare(IRemoteTest o1, IRemoteTest o2) { 112 String moduleId1 = ((ITestSuite)o1).getDirectModule().getId(); 113 String moduleId2 = ((ITestSuite)o2).getDirectModule().getId(); 114 return getMainlineId(moduleId1).compareTo(getMainlineId(moduleId2)); 115 } 116 }); 117 } 118 119 /** 120 * Returns the parameterized mainline modules' name defined in the square brackets. 121 * 122 * @param id The module's name. 123 * @throws RuntimeException if the module name doesn't match the pattern for mainline modules. 124 */ getMainlineId(String id)125 private String getMainlineId(String id) { 126 // Pattern used to identify the parameterized mainline modules defined in the square 127 // brackets. 128 Pattern parameterizedMainlineRegex = Pattern.compile("\\[(.*(\\.apk|.apex|.apks))\\]$"); 129 Matcher m = parameterizedMainlineRegex.matcher(id); 130 if (m.find()) { 131 return m.group(1); 132 } 133 throw new HarnessRuntimeException( 134 String.format( 135 "Module: %s doesn't match the pattern for mainline modules. The " 136 + "pattern should end with apk/apex/apks.", 137 id), 138 InfraErrorIdentifier.OPTION_CONFIGURATION_ERROR); 139 } 140 141 /** 142 * Helper to return the full list of {@link IRemoteTest} based on {@link IShardableTest} split. 143 * 144 * @param config the {@link IConfiguration} describing the invocation. 145 * @param shardCount the shard count hint to be provided to some tests. 146 * @param testInfo the {@link TestInformation} of the parent invocation. 147 * @return the list of all {@link IRemoteTest}. 148 */ getAllTests( IConfiguration config, Integer shardCount, TestInformation testInfo, ITestLogger logger)149 private List<IRemoteTest> getAllTests( 150 IConfiguration config, 151 Integer shardCount, 152 TestInformation testInfo, 153 ITestLogger logger) { 154 List<IRemoteTest> allTests = new ArrayList<>(); 155 for (IRemoteTest test : config.getTests()) { 156 if (test instanceof IShardableTest) { 157 // Inject current information to help with sharding 158 if (test instanceof IBuildReceiver) { 159 ((IBuildReceiver) test).setBuild(testInfo.getBuildInfo()); 160 } 161 if (test instanceof IDeviceTest) { 162 ((IDeviceTest) test).setDevice(testInfo.getDevice()); 163 } 164 if (test instanceof IInvocationContextReceiver) { 165 ((IInvocationContextReceiver) test).setInvocationContext(testInfo.getContext()); 166 } 167 if (test instanceof ITestLoggerReceiver) { 168 ((ITestLoggerReceiver) test).setTestLogger(logger); 169 } 170 171 // Handling of the ITestSuite is a special case, we do not allow pool of tests 172 // since each shard needs to be independent. 173 if (test instanceof ITestSuite) { 174 ((ITestSuite) test).setShouldMakeDynamicModule(false); 175 } 176 177 Collection<IRemoteTest> subTests = 178 ((IShardableTest) test).split(shardCount, testInfo); 179 if (subTests == null) { 180 // test did not shard so we add it as is. 181 allTests.add(test); 182 } else { 183 allTests.addAll(subTests); 184 } 185 } else { 186 // if test is not shardable we add it as is. 187 allTests.add(test); 188 } 189 } 190 return allTests; 191 } 192 193 /** 194 * Split the list of tests to run however the implementation see fit. Sharding needs to be 195 * consistent. It is acceptable to return an empty list if no tests can be run in the shard. 196 * 197 * <p>Implement this in order to provide a test suite specific sharding. The default 198 * implementation attempts to balance the number of IRemoteTest per shards as much as possible 199 * as a first step, then use a minor criteria or run-hint to adjust the lists a bit more. 200 * 201 * @param fullList the initial full list of {@link IRemoteTest} containing all the tests that 202 * need to run. 203 * @param shardCount the total number of shard that need to run. 204 * @param useEvenModuleSharding whether to use a strategy that evenly distributes number of 205 * modules across shards 206 * @return a list of list {@link IRemoteTest}s that have been assigned to each shard. The list 207 * size will be the shardCount. 208 */ 209 @VisibleForTesting splitTests( List<IRemoteTest> fullList, int shardCount, boolean useEvenModuleSharding)210 protected List<List<IRemoteTest>> splitTests( 211 List<IRemoteTest> fullList, int shardCount, boolean useEvenModuleSharding) { 212 List<List<IRemoteTest>> shards; 213 if (useEvenModuleSharding) { 214 CLog.d("Using the sharding strategy to distribute number of modules more evenly."); 215 shards = shardList(fullList, shardCount); 216 } else { 217 shards = new ArrayList<>(); 218 // We are using Match.ceil to avoid the last shard having too much extra. 219 int numPerShard = (int) Math.ceil(fullList.size() / (float) shardCount); 220 221 boolean needsCorrection = false; 222 float correctionRatio = 0f; 223 if (fullList.size() > shardCount) { 224 // In some cases because of the Math.ceil, some combination might run out of tests 225 // before the last shard, in that case we populate a correction to rebalance the 226 // tests. 227 needsCorrection = (numPerShard * (shardCount - 1)) > fullList.size(); 228 correctionRatio = numPerShard - (fullList.size() / (float) shardCount); 229 } 230 // Recalculate the number of tests per shard with the correction taken into account. 231 numPerShard = (int) Math.floor(numPerShard - correctionRatio); 232 // Based of the parameters, distribute the tests across shards. 233 shards = balancedDistrib(fullList, shardCount, numPerShard, needsCorrection); 234 } 235 // Do last minute rebalancing 236 topBottom(shards, shardCount); 237 return shards; 238 } 239 balancedDistrib( List<IRemoteTest> fullList, int shardCount, int numPerShard, boolean needsCorrection)240 private List<List<IRemoteTest>> balancedDistrib( 241 List<IRemoteTest> fullList, int shardCount, int numPerShard, boolean needsCorrection) { 242 List<List<IRemoteTest>> shards = new ArrayList<>(); 243 List<IRemoteTest> correctionList = new ArrayList<>(); 244 int correctionSize = 0; 245 246 // Generate all the shards 247 for (int i = 0; i < shardCount; i++) { 248 List<IRemoteTest> shardList; 249 if (i >= fullList.size()) { 250 // Return empty list when we don't have enough tests for all the shards. 251 shardList = new ArrayList<IRemoteTest>(); 252 shards.add(shardList); 253 continue; 254 } 255 256 if (i == shardCount - 1) { 257 // last shard take everything remaining except the correction: 258 if (needsCorrection) { 259 // We omit the size of the correction needed. 260 correctionSize = fullList.size() - (numPerShard + (i * numPerShard)); 261 correctionList = 262 fullList.subList(fullList.size() - correctionSize, fullList.size()); 263 } 264 shardList = fullList.subList(i * numPerShard, fullList.size() - correctionSize); 265 shards.add(new ArrayList<>(shardList)); 266 continue; 267 } 268 shardList = fullList.subList(i * numPerShard, numPerShard + (i * numPerShard)); 269 shards.add(new ArrayList<>(shardList)); 270 } 271 272 // If we have correction omitted tests, disperse them on each shard, at this point the 273 // number of tests in correction is ensured to be bellow the number of shards. 274 for (int i = 0; i < shardCount; i++) { 275 if (i < correctionList.size()) { 276 shards.get(i).add(correctionList.get(i)); 277 } else { 278 break; 279 } 280 } 281 return shards; 282 } 283 284 @VisibleForTesting shardList(List<T> fullList, int shardCount)285 static <T> List<List<T>> shardList(List<T> fullList, int shardCount) { 286 int totalSize = fullList.size(); 287 int smallShardSize = totalSize / shardCount; 288 int bigShardSize = smallShardSize + 1; 289 int bigShardCount = totalSize % shardCount; 290 291 // Correctness: 292 // sum(shard sizes) 293 // == smallShardSize * smallShardCount + bigShardSize * bigShardCount 294 // == smallShardSize * (shardCount - bigShardCount) + bigShardSize * bigShardCount 295 // == smallShardSize * (shardCount - bigShardCount) + (smallShardSize + 1) * bigShardCount 296 // == smallShardSize * (shardCount - bigShardCount + bigShardCount) + bigShardCount 297 // == smallShardSize * shardCount + bigShardCount 298 // == floor(totalSize / shardCount) * shardCount + remainder(totalSize / shardCount) 299 // == totalSize 300 301 List<List<T>> shards = new ArrayList<>(); 302 int i = 0; 303 for (; i < bigShardCount * bigShardSize; i += bigShardSize) { 304 shards.add(fullList.subList(i, i + bigShardSize)); 305 } 306 for (; i < totalSize; i += smallShardSize) { 307 shards.add(fullList.subList(i, i + smallShardSize)); 308 } 309 while (shards.size() < shardCount) { 310 shards.add(new ArrayList<>()); 311 } 312 return shards; 313 } 314 315 /** 316 * Move around predictably the tests in order to have a better uniformization of the tests in 317 * each shard. 318 */ normalizeDistribution(List<IRemoteTest> listAllTests, int shardCount)319 private void normalizeDistribution(List<IRemoteTest> listAllTests, int shardCount) { 320 final int numRound = shardCount; 321 final int distance = shardCount - 1; 322 for (int i = 0; i < numRound; i++) { 323 for (int j = 0; j < listAllTests.size(); j = j + distance) { 324 // Push the test at the end 325 IRemoteTest push = listAllTests.remove(j); 326 listAllTests.add(push); 327 } 328 } 329 } 330 331 /** 332 * Special handling for suite from {@link ITestSuite}. We aggregate the tests in the same shard 333 * in order to optimize target_preparation step. 334 * 335 * @param tests the {@link List} of {@link IRemoteTest} for that shard. 336 */ aggregateSuiteModules(List<IRemoteTest> tests)337 private void aggregateSuiteModules(List<IRemoteTest> tests) { 338 List<IRemoteTest> dupList = new ArrayList<>(tests); 339 for (int i = 0; i < dupList.size(); i++) { 340 if (dupList.get(i) instanceof ITestSuite) { 341 // We iterate the other tests to see if we can find another from the same module. 342 for (int j = i + 1; j < dupList.size(); j++) { 343 // If the test was not already merged 344 if (tests.contains(dupList.get(j))) { 345 if (dupList.get(j) instanceof ITestSuite) { 346 if (ModuleMerger.arePartOfSameSuite( 347 (ITestSuite) dupList.get(i), (ITestSuite) dupList.get(j))) { 348 ModuleMerger.mergeSplittedITestSuite( 349 (ITestSuite) dupList.get(i), (ITestSuite) dupList.get(j)); 350 tests.remove(dupList.get(j)); 351 } 352 } 353 } 354 } 355 } 356 } 357 } 358 topBottom(List<List<IRemoteTest>> allShards, int shardCount)359 private void topBottom(List<List<IRemoteTest>> allShards, int shardCount) { 360 // We only attempt this when the number of shard is pretty high 361 if (shardCount < 4) { 362 return; 363 } 364 // Generate approximate RuntimeHint for each shard 365 int index = 0; 366 List<SortShardObj> shardTimes = new ArrayList<>(); 367 for (List<IRemoteTest> shard : allShards) { 368 long aggTime = 0L; 369 CLog.d("++++++++++++++++++ SHARD %s +++++++++++++++", index); 370 for (IRemoteTest test : shard) { 371 if (test instanceof IRuntimeHintProvider) { 372 aggTime += ((IRuntimeHintProvider) test).getRuntimeHint(); 373 } 374 } 375 CLog.d("Shard %s approximate time: %s", index, TimeUtil.formatElapsedTime(aggTime)); 376 shardTimes.add(new SortShardObj(index, aggTime)); 377 index++; 378 CLog.d("+++++++++++++++++++++++++++++++++++++++++++"); 379 } 380 381 Collections.sort(shardTimes); 382 if ((shardTimes.get(0).mAggTime - shardTimes.get(shardTimes.size() - 1).mAggTime) 383 < 60 * 60 * 1000L) { 384 return; 385 } 386 387 // take 30% top shard (10 shard = top 3 shards) 388 for (int i = 0; i < (shardCount * 0.3); i++) { 389 CLog.d( 390 "Top shard %s is index %s with %s", 391 i, 392 shardTimes.get(i).mIndex, 393 TimeUtil.formatElapsedTime(shardTimes.get(i).mAggTime)); 394 int give = shardTimes.get(i).mIndex; 395 int receive = shardTimes.get(shardTimes.size() - 1 - i).mIndex; 396 CLog.d("Giving from shard %s to shard %s", give, receive); 397 for (int j = 0; j < (allShards.get(give).size() * (0.2f / (i + 1))); j++) { 398 IRemoteTest givetest = allShards.get(give).remove(0); 399 allShards.get(receive).add(givetest); 400 } 401 } 402 } 403 404 /** Object holder for shard, their index and their aggregated execution time. */ 405 private class SortShardObj implements Comparable<SortShardObj> { 406 public final int mIndex; 407 public final Long mAggTime; 408 SortShardObj(int index, long aggTime)409 public SortShardObj(int index, long aggTime) { 410 mIndex = index; 411 mAggTime = aggTime; 412 } 413 414 @Override compareTo(SortShardObj obj)415 public int compareTo(SortShardObj obj) { 416 return obj.mAggTime.compareTo(mAggTime); 417 } 418 } 419 } 420