1 /*
2  * Copyright (C) 2016 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "scheduler.h"
18 
19 #include <string>
20 
21 #include "base/scoped_arena_allocator.h"
22 #include "base/scoped_arena_containers.h"
23 #include "data_type-inl.h"
24 #include "optimizing/load_store_analysis.h"
25 #include "prepare_for_register_allocation.h"
26 
27 #ifdef ART_ENABLE_CODEGEN_arm64
28 #include "scheduler_arm64.h"
29 #endif
30 
31 #ifdef ART_ENABLE_CODEGEN_arm
32 #include "scheduler_arm.h"
33 #endif
34 
35 namespace art HIDDEN {
36 
AddDependency(SchedulingNode * node,SchedulingNode * dependency,bool is_data_dependency)37 void SchedulingGraph::AddDependency(SchedulingNode* node,
38                                     SchedulingNode* dependency,
39                                     bool is_data_dependency) {
40   if (node == nullptr || dependency == nullptr) {
41     // A `nullptr` node indicates an instruction out of scheduling range (eg. in
42     // an other block), so we do not need to add a dependency edge to the graph.
43     return;
44   }
45 
46   if (is_data_dependency) {
47     node->AddDataPredecessor(dependency);
48   } else {
49     node->AddOtherPredecessor(dependency);
50   }
51 }
52 
HasReorderingDependency(const HInstruction * instr1,const HInstruction * instr2)53 bool SideEffectDependencyAnalysis::HasReorderingDependency(const HInstruction* instr1,
54                                                            const HInstruction* instr2) {
55   SideEffects instr1_side_effects = instr1->GetSideEffects();
56   SideEffects instr2_side_effects = instr2->GetSideEffects();
57 
58   // Read after write.
59   if (instr1_side_effects.MayDependOn(instr2_side_effects)) {
60     return true;
61   }
62 
63   // Write after read.
64   if (instr2_side_effects.MayDependOn(instr1_side_effects)) {
65     return true;
66   }
67 
68   // Memory write after write.
69   if (instr1_side_effects.DoesAnyWrite() && instr2_side_effects.DoesAnyWrite()) {
70     return true;
71   }
72 
73   return false;
74 }
75 
ArrayAccessHeapLocation(HInstruction * instruction) const76 size_t SideEffectDependencyAnalysis::MemoryDependencyAnalysis::ArrayAccessHeapLocation(
77     HInstruction* instruction) const {
78   DCHECK(heap_location_collector_ != nullptr);
79   size_t heap_loc = heap_location_collector_->GetArrayHeapLocation(instruction);
80   // This array access should be analyzed and added to HeapLocationCollector before.
81   DCHECK(heap_loc != HeapLocationCollector::kHeapLocationNotFound);
82   return heap_loc;
83 }
84 
ArrayAccessMayAlias(HInstruction * instr1,HInstruction * instr2) const85 bool SideEffectDependencyAnalysis::MemoryDependencyAnalysis::ArrayAccessMayAlias(
86     HInstruction* instr1, HInstruction* instr2) const {
87   DCHECK(heap_location_collector_ != nullptr);
88   size_t instr1_heap_loc = ArrayAccessHeapLocation(instr1);
89   size_t instr2_heap_loc = ArrayAccessHeapLocation(instr2);
90 
91   // For example: arr[0] and arr[0]
92   if (instr1_heap_loc == instr2_heap_loc) {
93     return true;
94   }
95 
96   // For example: arr[0] and arr[i]
97   if (heap_location_collector_->MayAlias(instr1_heap_loc, instr2_heap_loc)) {
98     return true;
99   }
100 
101   return false;
102 }
103 
IsArrayAccess(const HInstruction * instruction)104 static bool IsArrayAccess(const HInstruction* instruction) {
105   return instruction->IsArrayGet() || instruction->IsArraySet();
106 }
107 
IsInstanceFieldAccess(const HInstruction * instruction)108 static bool IsInstanceFieldAccess(const HInstruction* instruction) {
109   return instruction->IsInstanceFieldGet() || instruction->IsInstanceFieldSet();
110 }
111 
IsStaticFieldAccess(const HInstruction * instruction)112 static bool IsStaticFieldAccess(const HInstruction* instruction) {
113   return instruction->IsStaticFieldGet() || instruction->IsStaticFieldSet();
114 }
115 
IsFieldAccess(const HInstruction * instruction)116 static bool IsFieldAccess(const HInstruction* instruction) {
117   return IsInstanceFieldAccess(instruction) || IsStaticFieldAccess(instruction);
118 }
119 
GetFieldInfo(const HInstruction * instruction)120 static const FieldInfo* GetFieldInfo(const HInstruction* instruction) {
121   return &instruction->GetFieldInfo();
122 }
123 
FieldAccessHeapLocation(const HInstruction * instr) const124 size_t SideEffectDependencyAnalysis::MemoryDependencyAnalysis::FieldAccessHeapLocation(
125     const HInstruction* instr) const {
126   DCHECK(instr != nullptr);
127   DCHECK(GetFieldInfo(instr) != nullptr);
128   DCHECK(heap_location_collector_ != nullptr);
129 
130   HInstruction* ref = instr->InputAt(0);
131   size_t heap_loc = heap_location_collector_->GetFieldHeapLocation(ref, GetFieldInfo(instr));
132   // This field access should be analyzed and added to HeapLocationCollector before.
133   DCHECK(heap_loc != HeapLocationCollector::kHeapLocationNotFound);
134 
135   return heap_loc;
136 }
137 
FieldAccessMayAlias(const HInstruction * instr1,const HInstruction * instr2) const138 bool SideEffectDependencyAnalysis::MemoryDependencyAnalysis::FieldAccessMayAlias(
139     const HInstruction* instr1, const HInstruction* instr2) const {
140   DCHECK(heap_location_collector_ != nullptr);
141 
142   // Static and instance field accesses should not alias.
143   if ((IsInstanceFieldAccess(instr1) && IsStaticFieldAccess(instr2)) ||
144       (IsStaticFieldAccess(instr1) && IsInstanceFieldAccess(instr2))) {
145     return false;
146   }
147 
148   // If both fields accesses are resolved.
149   size_t instr1_field_access_heap_loc = FieldAccessHeapLocation(instr1);
150   size_t instr2_field_access_heap_loc = FieldAccessHeapLocation(instr2);
151 
152   if (instr1_field_access_heap_loc == instr2_field_access_heap_loc) {
153     return true;
154   }
155 
156   if (!heap_location_collector_->MayAlias(instr1_field_access_heap_loc,
157                                           instr2_field_access_heap_loc)) {
158     return false;
159   }
160 
161   return true;
162 }
163 
HasMemoryDependency(HInstruction * instr1,HInstruction * instr2) const164 bool SideEffectDependencyAnalysis::MemoryDependencyAnalysis::HasMemoryDependency(
165     HInstruction* instr1, HInstruction* instr2) const {
166   if (!HasReorderingDependency(instr1, instr2)) {
167     return false;
168   }
169 
170   if (heap_location_collector_ == nullptr ||
171       heap_location_collector_->GetNumberOfHeapLocations() == 0) {
172     // Without HeapLocation information from load store analysis,
173     // we cannot do further disambiguation analysis on these two instructions.
174     // Just simply say that those two instructions have memory dependency.
175     return true;
176   }
177 
178   // Note: Unresolved field access instructions are currently marked as not schedulable.
179   // If we change that, we should still keep in mind that these instructions can throw and
180   // read or write volatile fields and, if static, cause class initialization and write to
181   // arbitrary heap locations, and therefore cannot be reordered with any other field or
182   // array access to preserve the observable behavior. The only exception is access to
183   // singleton members that could actually be reodered across these instructions but we
184   // currently do not analyze singletons here anyway.
185 
186   if (IsArrayAccess(instr1) && IsArrayAccess(instr2)) {
187     return ArrayAccessMayAlias(instr1, instr2);
188   }
189   if (IsFieldAccess(instr1) && IsFieldAccess(instr2)) {
190     return FieldAccessMayAlias(instr1, instr2);
191   }
192 
193   // TODO(xueliang): LSA to support alias analysis among HVecLoad, HVecStore and ArrayAccess
194   if (instr1->IsVecMemoryOperation() && instr2->IsVecMemoryOperation()) {
195     return true;
196   }
197   if (instr1->IsVecMemoryOperation() && IsArrayAccess(instr2)) {
198     return true;
199   }
200   if (IsArrayAccess(instr1) && instr2->IsVecMemoryOperation()) {
201     return true;
202   }
203 
204   // Heap accesses of different kinds should not alias.
205   if (IsArrayAccess(instr1) && IsFieldAccess(instr2)) {
206     return false;
207   }
208   if (IsFieldAccess(instr1) && IsArrayAccess(instr2)) {
209     return false;
210   }
211   if (instr1->IsVecMemoryOperation() && IsFieldAccess(instr2)) {
212     return false;
213   }
214   if (IsFieldAccess(instr1) && instr2->IsVecMemoryOperation()) {
215     return false;
216   }
217 
218   // We conservatively treat all other cases having dependency,
219   // for example, Invoke and ArrayGet.
220   return true;
221 }
222 
HasExceptionDependency(const HInstruction * instr1,const HInstruction * instr2)223 bool SideEffectDependencyAnalysis::HasExceptionDependency(const HInstruction* instr1,
224                                                           const HInstruction* instr2) {
225   if (instr2->CanThrow() && instr1->GetSideEffects().DoesAnyWrite()) {
226     return true;
227   }
228   if (instr2->GetSideEffects().DoesAnyWrite() && instr1->CanThrow()) {
229     return true;
230   }
231   if (instr2->CanThrow() && instr1->CanThrow()) {
232     return true;
233   }
234 
235   // Above checks should cover all cases where we cannot reorder two
236   // instructions which may throw exception.
237   return false;
238 }
239 
240 // Check if the specified instruction is a better candidate which more likely will
241 // have other instructions depending on it.
IsBetterCandidateWithMoreLikelyDependencies(HInstruction * new_candidate,HInstruction * old_candidate)242 static bool IsBetterCandidateWithMoreLikelyDependencies(HInstruction* new_candidate,
243                                                         HInstruction* old_candidate) {
244   if (!new_candidate->GetSideEffects().Includes(old_candidate->GetSideEffects())) {
245     // Weaker side effects.
246     return false;
247   }
248   if (old_candidate->GetSideEffects().Includes(new_candidate->GetSideEffects())) {
249     // Same side effects, check if `new_candidate` has stronger `CanThrow()`.
250     return new_candidate->CanThrow() && !old_candidate->CanThrow();
251   } else {
252     // Stronger side effects, check if `new_candidate` has at least as strong `CanThrow()`.
253     return new_candidate->CanThrow() || !old_candidate->CanThrow();
254   }
255 }
256 
AddCrossIterationDependencies(SchedulingNode * node)257 void SchedulingGraph::AddCrossIterationDependencies(SchedulingNode* node) {
258   for (HInstruction* instruction : node->GetInstruction()->GetInputs()) {
259     // Having a phi-function from a loop header as an input means the current node of the
260     // scheduling graph has a cross-iteration dependency because such phi-functions bring values
261     // from the previous iteration to the current iteration.
262     if (!instruction->IsLoopHeaderPhi()) {
263       continue;
264     }
265     for (HInstruction* phi_input : instruction->GetInputs()) {
266       // As a scheduling graph of the current basic block is built by
267       // processing instructions bottom-up, nullptr returned by GetNode means
268       // an instruction defining a value for the phi is either before the
269       // instruction represented by node or it is in a different basic block.
270       SchedulingNode* def_node = GetNode(phi_input);
271 
272       // We don't create a dependency if there are uses besides the use in phi.
273       // In such cases a register to hold phi_input is usually allocated and
274       // a MOV instruction is generated. In cases with multiple uses and no MOV
275       // instruction, reordering creating a MOV instruction can improve
276       // performance more than an attempt to avoid a MOV instruction.
277       if (def_node != nullptr && def_node != node && phi_input->GetUses().HasExactlyOneElement()) {
278         // We have an implicit data dependency between node and def_node.
279         // AddAddDataDependency cannot be used because it is for explicit data dependencies.
280         // So AddOtherDependency is used.
281         AddOtherDependency(def_node, node);
282       }
283     }
284   }
285 }
286 
AddDependencies(SchedulingNode * instruction_node,bool is_scheduling_barrier)287 void SchedulingGraph::AddDependencies(SchedulingNode* instruction_node,
288                                       bool is_scheduling_barrier) {
289   HInstruction* instruction = instruction_node->GetInstruction();
290 
291   // Define-use dependencies.
292   for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) {
293     AddDataDependency(GetNode(use.GetUser()), instruction_node);
294   }
295 
296   // Scheduling barrier dependencies.
297   DCHECK_IMPLIES(is_scheduling_barrier, contains_scheduling_barrier_);
298   if (contains_scheduling_barrier_) {
299     // A barrier depends on instructions after it. And instructions before the
300     // barrier depend on it.
301     for (HInstruction* other = instruction->GetNext(); other != nullptr; other = other->GetNext()) {
302       SchedulingNode* other_node = GetNode(other);
303       CHECK(other_node != nullptr)
304           << other->DebugName()
305           << " is in block " << other->GetBlock()->GetBlockId()
306           << ", and expected in block " << instruction->GetBlock()->GetBlockId();
307       bool other_is_barrier = other_node->IsSchedulingBarrier();
308       if (is_scheduling_barrier || other_is_barrier) {
309         AddOtherDependency(other_node, instruction_node);
310       }
311       if (other_is_barrier) {
312         // This other scheduling barrier guarantees ordering of instructions after
313         // it, so avoid creating additional useless dependencies in the graph.
314         // For example if we have
315         //     instr_1
316         //     barrier_2
317         //     instr_3
318         //     barrier_4
319         //     instr_5
320         // we only create the following non-data dependencies
321         //     1 -> 2
322         //     2 -> 3
323         //     2 -> 4
324         //     3 -> 4
325         //     4 -> 5
326         // and do not create
327         //     1 -> 4
328         //     2 -> 5
329         // Note that in this example we could also avoid creating the dependency
330         // `2 -> 4`.  But if we remove `instr_3` that dependency is required to
331         // order the barriers. So we generate it to avoid a special case.
332         break;
333       }
334     }
335   }
336 
337   // Side effect dependencies.
338   if (!instruction->GetSideEffects().DoesNothing() || instruction->CanThrow()) {
339     HInstruction* dep_chain_candidate = nullptr;
340     for (HInstruction* other = instruction->GetNext(); other != nullptr; other = other->GetNext()) {
341       SchedulingNode* other_node = GetNode(other);
342       if (other_node->IsSchedulingBarrier()) {
343         // We have reached a scheduling barrier so we can stop further
344         // processing.
345         //
346         // As a "other" dependency is not set up if a data dependency exists, we need to check that
347         // one of them must exist.
348         DCHECK(other_node->HasOtherDependency(instruction_node)
349                || other_node->HasDataDependency(instruction_node));
350         break;
351       }
352       if (side_effect_dependency_analysis_.HasSideEffectDependency(other, instruction)) {
353         if (dep_chain_candidate != nullptr &&
354             side_effect_dependency_analysis_.HasSideEffectDependency(other, dep_chain_candidate)) {
355           // Skip an explicit dependency to reduce memory usage, rely on the transitive dependency.
356         } else {
357           AddOtherDependency(other_node, instruction_node);
358         }
359         // Check if `other` is a better candidate which more likely will have other instructions
360         // depending on it.
361         if (dep_chain_candidate == nullptr ||
362             IsBetterCandidateWithMoreLikelyDependencies(other, dep_chain_candidate)) {
363           dep_chain_candidate = other;
364         }
365       }
366     }
367   }
368 
369   // Environment dependencies.
370   // We do not need to process those if the instruction is a scheduling barrier,
371   // since the barrier already has non-data dependencies on all following
372   // instructions.
373   if (!is_scheduling_barrier) {
374     for (const HUseListNode<HEnvironment*>& use : instruction->GetEnvUses()) {
375       // Note that here we could stop processing if the environment holder is
376       // across a scheduling barrier. But checking this would likely require
377       // more work than simply iterating through environment uses.
378       AddOtherDependency(GetNode(use.GetUser()->GetHolder()), instruction_node);
379     }
380   }
381 
382   AddCrossIterationDependencies(instruction_node);
383 }
384 
InstructionTypeId(const HInstruction * instruction)385 static const std::string InstructionTypeId(const HInstruction* instruction) {
386   return DataType::TypeId(instruction->GetType()) + std::to_string(instruction->GetId());
387 }
388 
389 // Ideally we would reuse the graph visualizer code, but it is not available
390 // from here and it is not worth moving all that code only for our use.
DumpAsDotNode(std::ostream & output,const SchedulingNode * node)391 static void DumpAsDotNode(std::ostream& output, const SchedulingNode* node) {
392   const HInstruction* instruction = node->GetInstruction();
393   // Use the instruction typed id as the node identifier.
394   std::string instruction_id = InstructionTypeId(instruction);
395   output << instruction_id << "[shape=record, label=\""
396       << instruction_id << ' ' << instruction->DebugName() << " [";
397   // List the instruction's inputs in its description. When visualizing the
398   // graph this helps differentiating data inputs from other dependencies.
399   const char* seperator = "";
400   for (const HInstruction* input : instruction->GetInputs()) {
401     output << seperator << InstructionTypeId(input);
402     seperator = ",";
403   }
404   output << "]";
405   // Other properties of the node.
406   output << "\\ninternal_latency: " << node->GetInternalLatency();
407   output << "\\ncritical_path: " << node->GetCriticalPath();
408   if (node->IsSchedulingBarrier()) {
409     output << "\\n(barrier)";
410   }
411   output << "\"];\n";
412   // We want program order to go from top to bottom in the graph output, so we
413   // reverse the edges and specify `dir=back`.
414   for (const SchedulingNode* predecessor : node->GetDataPredecessors()) {
415     const HInstruction* predecessor_instruction = predecessor->GetInstruction();
416     output << InstructionTypeId(predecessor_instruction) << ":s -> " << instruction_id << ":n "
417         << "[label=\"" << predecessor->GetLatency() << "\",dir=back]\n";
418   }
419   for (const SchedulingNode* predecessor : node->GetOtherPredecessors()) {
420     const HInstruction* predecessor_instruction = predecessor->GetInstruction();
421     output << InstructionTypeId(predecessor_instruction) << ":s -> " << instruction_id << ":n "
422         << "[dir=back,color=blue]\n";
423   }
424 }
425 
DumpAsDotGraph(const std::string & description,const ScopedArenaVector<SchedulingNode * > & initial_candidates)426 void SchedulingGraph::DumpAsDotGraph(const std::string& description,
427                                      const ScopedArenaVector<SchedulingNode*>& initial_candidates) {
428   // TODO(xueliang): ideally we should move scheduling information into HInstruction, after that
429   // we should move this dotty graph dump feature to visualizer, and have a compiler option for it.
430   std::ofstream output("scheduling_graphs.dot", std::ofstream::out | std::ofstream::app);
431   // Description of this graph, as a comment.
432   output << "// " << description << "\n";
433   // Start the dot graph. Use an increasing index for easier differentiation.
434   output << "digraph G {\n";
435   for (const auto& entry : nodes_map_) {
436     SchedulingNode* node = entry.second.get();
437     DumpAsDotNode(output, node);
438   }
439   // Create a fake 'end_of_scheduling' node to help visualization of critical_paths.
440   for (SchedulingNode* node : initial_candidates) {
441     const HInstruction* instruction = node->GetInstruction();
442     output << InstructionTypeId(instruction) << ":s -> end_of_scheduling:n "
443       << "[label=\"" << node->GetLatency() << "\",dir=back]\n";
444   }
445   // End of the dot graph.
446   output << "}\n";
447   output.close();
448 }
449 
SelectMaterializedCondition(ScopedArenaVector<SchedulingNode * > * nodes,const SchedulingGraph & graph) const450 SchedulingNode* CriticalPathSchedulingNodeSelector::SelectMaterializedCondition(
451     ScopedArenaVector<SchedulingNode*>* nodes, const SchedulingGraph& graph) const {
452   // Schedule condition inputs that can be materialized immediately before their use.
453   // In following example, after we've scheduled HSelect, we want LessThan to be scheduled
454   // immediately, because it is a materialized condition, and will be emitted right before HSelect
455   // in codegen phase.
456   //
457   // i20 HLessThan [...]                  HLessThan    HAdd      HAdd
458   // i21 HAdd [...]                ===>      |          |         |
459   // i22 HAdd [...]                          +----------+---------+
460   // i23 HSelect [i21, i22, i20]                     HSelect
461 
462   if (prev_select_ == nullptr) {
463     return nullptr;
464   }
465 
466   const HInstruction* instruction = prev_select_->GetInstruction();
467   const HCondition* condition = nullptr;
468   DCHECK(instruction != nullptr);
469 
470   if (instruction->IsIf()) {
471     condition = instruction->AsIf()->InputAt(0)->AsConditionOrNull();
472   } else if (instruction->IsSelect()) {
473     condition = instruction->AsSelect()->GetCondition()->AsConditionOrNull();
474   }
475 
476   SchedulingNode* condition_node = (condition != nullptr) ? graph.GetNode(condition) : nullptr;
477 
478   if ((condition_node != nullptr) &&
479       condition->HasOnlyOneNonEnvironmentUse() &&
480       ContainsElement(*nodes, condition_node)) {
481     DCHECK(!condition_node->HasUnscheduledSuccessors());
482     // Remove the condition from the list of candidates and schedule it.
483     RemoveElement(*nodes, condition_node);
484     return condition_node;
485   }
486 
487   return nullptr;
488 }
489 
PopHighestPriorityNode(ScopedArenaVector<SchedulingNode * > * nodes,const SchedulingGraph & graph)490 SchedulingNode* CriticalPathSchedulingNodeSelector::PopHighestPriorityNode(
491     ScopedArenaVector<SchedulingNode*>* nodes, const SchedulingGraph& graph) {
492   DCHECK(!nodes->empty());
493   SchedulingNode* select_node = nullptr;
494 
495   // Optimize for materialized condition and its emit before use scenario.
496   select_node = SelectMaterializedCondition(nodes, graph);
497 
498   if (select_node == nullptr) {
499     // Get highest priority node based on critical path information.
500     select_node = (*nodes)[0];
501     size_t select = 0;
502     for (size_t i = 1, e = nodes->size(); i < e; i++) {
503       SchedulingNode* check = (*nodes)[i];
504       SchedulingNode* candidate = (*nodes)[select];
505       select_node = GetHigherPrioritySchedulingNode(candidate, check);
506       if (select_node == check) {
507         select = i;
508       }
509     }
510     DeleteNodeAtIndex(nodes, select);
511   }
512 
513   prev_select_ = select_node;
514   return select_node;
515 }
516 
GetHigherPrioritySchedulingNode(SchedulingNode * candidate,SchedulingNode * check) const517 SchedulingNode* CriticalPathSchedulingNodeSelector::GetHigherPrioritySchedulingNode(
518     SchedulingNode* candidate, SchedulingNode* check) const {
519   uint32_t candidate_path = candidate->GetCriticalPath();
520   uint32_t check_path = check->GetCriticalPath();
521   // First look at the critical_path.
522   if (check_path != candidate_path) {
523     return check_path < candidate_path ? check : candidate;
524   }
525   // If both critical paths are equal, schedule instructions with a higher latency
526   // first in program order.
527   return check->GetLatency() < candidate->GetLatency() ? check : candidate;
528 }
529 
Schedule(HGraph * graph)530 void HScheduler::Schedule(HGraph* graph) {
531   // We run lsa here instead of in a separate pass to better control whether we
532   // should run the analysis or not.
533   const HeapLocationCollector* heap_location_collector = nullptr;
534   ScopedArenaAllocator allocator(graph->GetArenaStack());
535   LoadStoreAnalysis lsa(graph, /*stats=*/nullptr, &allocator);
536   if (!only_optimize_loop_blocks_ || graph->HasLoops()) {
537     lsa.Run();
538     heap_location_collector = &lsa.GetHeapLocationCollector();
539   }
540 
541   for (HBasicBlock* block : graph->GetReversePostOrder()) {
542     if (IsSchedulable(block)) {
543       Schedule(block, heap_location_collector);
544     }
545   }
546 }
547 
Schedule(HBasicBlock * block,const HeapLocationCollector * heap_location_collector)548 void HScheduler::Schedule(HBasicBlock* block,
549                           const HeapLocationCollector* heap_location_collector) {
550   ScopedArenaAllocator allocator(block->GetGraph()->GetArenaStack());
551 
552   // Build the scheduling graph.
553   auto [scheduling_graph, scheduling_nodes] =
554       BuildSchedulingGraph(block, &allocator, heap_location_collector);
555 
556   if (scheduling_graph.Size() <= 1) {
557     return;
558   }
559 
560   cursor_ = block->GetLastInstruction();
561 
562   // The list of candidates for scheduling. A node becomes a candidate when all
563   // its predecessors have been scheduled.
564   ScopedArenaVector<SchedulingNode*> candidates(allocator.Adapter(kArenaAllocScheduler));
565 
566   // Find the initial candidates for scheduling.
567   for (SchedulingNode* node : scheduling_nodes) {
568     if (!node->HasUnscheduledSuccessors()) {
569       node->MaybeUpdateCriticalPath(node->GetLatency());
570       candidates.push_back(node);
571     }
572   }
573 
574   ScopedArenaVector<SchedulingNode*> initial_candidates(allocator.Adapter(kArenaAllocScheduler));
575   if (kDumpDotSchedulingGraphs) {
576     // Remember the list of initial candidates for debug output purposes.
577     initial_candidates.assign(candidates.begin(), candidates.end());
578   }
579 
580   // Schedule all nodes.
581   selector_->Reset();
582   while (!candidates.empty()) {
583     SchedulingNode* node = selector_->PopHighestPriorityNode(&candidates, scheduling_graph);
584     Schedule(node, &candidates);
585   }
586 
587   if (kDumpDotSchedulingGraphs) {
588     // Dump the graph in `dot` format.
589     HGraph* graph = block->GetGraph();
590     std::stringstream description;
591     description << graph->GetDexFile().PrettyMethod(graph->GetMethodIdx())
592         << " B" << block->GetBlockId();
593     scheduling_graph.DumpAsDotGraph(description.str(), initial_candidates);
594   }
595 }
596 
Schedule(SchedulingNode * scheduling_node,ScopedArenaVector<SchedulingNode * > * candidates)597 void HScheduler::Schedule(SchedulingNode* scheduling_node,
598                           /*inout*/ ScopedArenaVector<SchedulingNode*>* candidates) {
599   // Check whether any of the node's predecessors will be valid candidates after
600   // this node is scheduled.
601   uint32_t path_to_node = scheduling_node->GetCriticalPath();
602   for (SchedulingNode* predecessor : scheduling_node->GetDataPredecessors()) {
603     predecessor->MaybeUpdateCriticalPath(
604         path_to_node + predecessor->GetInternalLatency() + predecessor->GetLatency());
605     predecessor->DecrementNumberOfUnscheduledSuccessors();
606     if (!predecessor->HasUnscheduledSuccessors()) {
607       candidates->push_back(predecessor);
608     }
609   }
610   for (SchedulingNode* predecessor : scheduling_node->GetOtherPredecessors()) {
611     // Do not update the critical path.
612     // The 'other' (so 'non-data') dependencies (usually) do not represent a
613     // 'material' dependency of nodes on others. They exist for program
614     // correctness. So we do not use them to compute the critical path.
615     predecessor->DecrementNumberOfUnscheduledSuccessors();
616     if (!predecessor->HasUnscheduledSuccessors()) {
617       candidates->push_back(predecessor);
618     }
619   }
620 
621   Schedule(scheduling_node->GetInstruction());
622 }
623 
624 // Move an instruction after cursor instruction inside one basic block.
MoveAfterInBlock(HInstruction * instruction,HInstruction * cursor)625 static void MoveAfterInBlock(HInstruction* instruction, HInstruction* cursor) {
626   DCHECK_EQ(instruction->GetBlock(), cursor->GetBlock());
627   DCHECK_NE(cursor, cursor->GetBlock()->GetLastInstruction());
628   DCHECK(!instruction->IsControlFlow());
629   DCHECK(!cursor->IsControlFlow());
630   instruction->MoveBefore(cursor->GetNext(), /* do_checks= */ false);
631 }
632 
Schedule(HInstruction * instruction)633 void HScheduler::Schedule(HInstruction* instruction) {
634   if (instruction == cursor_) {
635     cursor_ = cursor_->GetPrevious();
636   } else {
637     MoveAfterInBlock(instruction, cursor_);
638   }
639 }
640 
IsSchedulable(const HInstruction * instruction) const641 bool HScheduler::IsSchedulable(const HInstruction* instruction) const {
642   // We want to avoid exhaustively listing all instructions, so we first check
643   // for instruction categories that we know are safe.
644   if (instruction->IsControlFlow() ||
645       instruction->IsConstant()) {
646     return true;
647   }
648   // Currently all unary and binary operations are safe to schedule, so avoid
649   // checking for each of them individually.
650   // Since nothing prevents a new scheduling-unsafe HInstruction to subclass
651   // HUnaryOperation (or HBinaryOperation), check in debug mode that we have
652   // the exhaustive lists here.
653   if (instruction->IsUnaryOperation()) {
654     DCHECK(instruction->IsAbs() ||
655            instruction->IsBooleanNot() ||
656            instruction->IsNot() ||
657            instruction->IsNeg()) << "unexpected instruction " << instruction->DebugName();
658     return true;
659   }
660   if (instruction->IsBinaryOperation()) {
661     DCHECK(instruction->IsAdd() ||
662            instruction->IsAnd() ||
663            instruction->IsCompare() ||
664            instruction->IsCondition() ||
665            instruction->IsDiv() ||
666            instruction->IsMin() ||
667            instruction->IsMax() ||
668            instruction->IsMul() ||
669            instruction->IsOr() ||
670            instruction->IsRem() ||
671            instruction->IsRor() ||
672            instruction->IsShl() ||
673            instruction->IsShr() ||
674            instruction->IsSub() ||
675            instruction->IsUShr() ||
676            instruction->IsXor()) << "unexpected instruction " << instruction->DebugName();
677     return true;
678   }
679   // The scheduler should not see any of these.
680   DCHECK(!instruction->IsParallelMove()) << "unexpected instruction " << instruction->DebugName();
681   // List of instructions explicitly excluded:
682   //    HClearException
683   //    HClinitCheck
684   //    HDeoptimize
685   //    HLoadClass
686   //    HLoadException
687   //    HMemoryBarrier
688   //    HMonitorOperation
689   //    HNop
690   //    HThrow
691   //    HTryBoundary
692   //    All unresolved field access instructions
693   //    All volatile field access instructions, e.g. HInstanceFieldGet
694   // TODO: Some of the instructions above may be safe to schedule (maybe as
695   // scheduling barriers).
696   return instruction->IsArrayGet() ||
697          instruction->IsArraySet() ||
698          instruction->IsArrayLength() ||
699          instruction->IsBoundType() ||
700          instruction->IsBoundsCheck() ||
701          instruction->IsCheckCast() ||
702          instruction->IsClassTableGet() ||
703          instruction->IsCurrentMethod() ||
704          instruction->IsDivZeroCheck() ||
705          (instruction->IsInstanceFieldGet() && !instruction->AsInstanceFieldGet()->IsVolatile()) ||
706          (instruction->IsInstanceFieldSet() && !instruction->AsInstanceFieldSet()->IsVolatile()) ||
707          instruction->IsInstanceOf() ||
708          instruction->IsInvokeInterface() ||
709          instruction->IsInvokeStaticOrDirect() ||
710          instruction->IsInvokeUnresolved() ||
711          instruction->IsInvokeVirtual() ||
712          instruction->IsLoadString() ||
713          instruction->IsNewArray() ||
714          instruction->IsNewInstance() ||
715          instruction->IsNullCheck() ||
716          instruction->IsPackedSwitch() ||
717          instruction->IsParameterValue() ||
718          instruction->IsPhi() ||
719          instruction->IsReturn() ||
720          instruction->IsReturnVoid() ||
721          instruction->IsSelect() ||
722          (instruction->IsStaticFieldGet() && !instruction->AsStaticFieldGet()->IsVolatile()) ||
723          (instruction->IsStaticFieldSet() && !instruction->AsStaticFieldSet()->IsVolatile()) ||
724          instruction->IsSuspendCheck() ||
725          instruction->IsTypeConversion();
726 }
727 
IsSchedulable(const HBasicBlock * block) const728 bool HScheduler::IsSchedulable(const HBasicBlock* block) const {
729   // We may be only interested in loop blocks.
730   if (only_optimize_loop_blocks_ && !block->IsInLoop()) {
731     return false;
732   }
733   if (block->GetTryCatchInformation() != nullptr) {
734     // Do not schedule blocks that are part of try-catch.
735     // Because scheduler cannot see if catch block has assumptions on the instruction order in
736     // the try block. In following example, if we enable scheduler for the try block,
737     // MulitiplyAccumulate may be scheduled before DivZeroCheck,
738     // which can result in an incorrect value in the catch block.
739     //   try {
740     //     a = a/b;    // DivZeroCheck
741     //                 // Div
742     //     c = c*d+e;  // MulitiplyAccumulate
743     //   } catch {System.out.print(c); }
744     return false;
745   }
746   // Check whether all instructions in this block are schedulable.
747   for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
748     if (!IsSchedulable(it.Current())) {
749       return false;
750     }
751   }
752   return true;
753 }
754 
IsSchedulingBarrier(const HInstruction * instr) const755 bool HScheduler::IsSchedulingBarrier(const HInstruction* instr) const {
756   return instr->IsControlFlow() ||
757       // Don't break calling convention.
758       instr->IsParameterValue() ||
759       // Code generation of goto relies on SuspendCheck's position.
760       instr->IsSuspendCheck();
761 }
762 
Run(bool only_optimize_loop_blocks,bool schedule_randomly)763 bool HInstructionScheduling::Run(bool only_optimize_loop_blocks,
764                                  bool schedule_randomly) {
765 #if defined(ART_ENABLE_CODEGEN_arm64) || defined(ART_ENABLE_CODEGEN_arm)
766   // Phase-local allocator that allocates scheduler internal data structures like
767   // scheduling nodes, internel nodes map, dependencies, etc.
768   CriticalPathSchedulingNodeSelector critical_path_selector;
769   // Do not create the `RandomSchedulingNodeSelector` if not requested.
770   // The construction is expensive, including a call to `srand()`.
771   std::optional<RandomSchedulingNodeSelector> random_selector;
772   SchedulingNodeSelector* selector = &critical_path_selector;
773   if (schedule_randomly) {
774     random_selector.emplace();
775     selector = &random_selector.value();
776   }
777 #else
778   // Avoid compilation error when compiling for unsupported instruction set.
779   UNUSED(only_optimize_loop_blocks);
780   UNUSED(schedule_randomly);
781   UNUSED(codegen_);
782 #endif
783 
784   switch (instruction_set_) {
785 #ifdef ART_ENABLE_CODEGEN_arm64
786     case InstructionSet::kArm64: {
787       arm64::HSchedulerARM64 scheduler(selector);
788       scheduler.SetOnlyOptimizeLoopBlocks(only_optimize_loop_blocks);
789       scheduler.Schedule(graph_);
790       break;
791     }
792 #endif
793 #if defined(ART_ENABLE_CODEGEN_arm)
794     case InstructionSet::kThumb2:
795     case InstructionSet::kArm: {
796       arm::HSchedulerARM scheduler(selector, codegen_);
797       scheduler.SetOnlyOptimizeLoopBlocks(only_optimize_loop_blocks);
798       scheduler.Schedule(graph_);
799       break;
800     }
801 #endif
802     default:
803       break;
804   }
805   return true;
806 }
807 
808 }  // namespace art
809