1 /*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #ifndef ART_COMPILER_OPTIMIZING_SUPERBLOCK_CLONER_H_
18 #define ART_COMPILER_OPTIMIZING_SUPERBLOCK_CLONER_H_
19
20 #include "base/arena_bit_vector.h"
21 #include "base/arena_containers.h"
22 #include "base/bit_vector-inl.h"
23 #include "base/macros.h"
24 #include "nodes.h"
25
26 namespace art HIDDEN {
27
28 class InductionVarRange;
29
30 static const bool kSuperblockClonerLogging = false;
31 static const bool kSuperblockClonerVerify = false;
32
33 // Represents an edge between two HBasicBlocks.
34 //
35 // Note: objects of this class are small - pass them by value.
36 class HEdge : public ArenaObject<kArenaAllocSuperblockCloner> {
37 public:
HEdge(HBasicBlock * from,HBasicBlock * to)38 HEdge(HBasicBlock* from, HBasicBlock* to) : from_(from->GetBlockId()), to_(to->GetBlockId()) {
39 DCHECK_NE(to_, kInvalidBlockId);
40 DCHECK_NE(from_, kInvalidBlockId);
41 }
HEdge(uint32_t from,uint32_t to)42 HEdge(uint32_t from, uint32_t to) : from_(from), to_(to) {
43 DCHECK_NE(to_, kInvalidBlockId);
44 DCHECK_NE(from_, kInvalidBlockId);
45 }
HEdge()46 HEdge() : from_(kInvalidBlockId), to_(kInvalidBlockId) {}
47
GetFrom()48 uint32_t GetFrom() const { return from_; }
GetTo()49 uint32_t GetTo() const { return to_; }
50
51 bool operator==(const HEdge& other) const {
52 return this->from_ == other.from_ && this->to_ == other.to_;
53 }
54
55 bool operator!=(const HEdge& other) const { return !operator==(other); }
56 void Dump(std::ostream& stream) const;
57
58 // Returns whether an edge represents a valid edge in CF graph: whether the from_ block
59 // has to_ block as a successor.
IsValid()60 bool IsValid() const { return from_ != kInvalidBlockId && to_ != kInvalidBlockId; }
61
62 private:
63 // Predecessor block id.
64 uint32_t from_;
65 // Successor block id.
66 uint32_t to_;
67 };
68
69 // Returns whether a HEdge edge corresponds to an existing edge in the graph.
IsEdgeValid(HEdge edge,HGraph * graph)70 inline bool IsEdgeValid(HEdge edge, HGraph* graph) {
71 if (!edge.IsValid()) {
72 return false;
73 }
74 uint32_t from = edge.GetFrom();
75 uint32_t to = edge.GetTo();
76 if (from >= graph->GetBlocks().size() || to >= graph->GetBlocks().size()) {
77 return false;
78 }
79
80 HBasicBlock* block_from = graph->GetBlocks()[from];
81 HBasicBlock* block_to = graph->GetBlocks()[to];
82 if (block_from == nullptr || block_to == nullptr) {
83 return false;
84 }
85
86 return block_from->HasSuccessor(block_to, 0);
87 }
88
89 // SuperblockCloner provides a feature of cloning subgraphs in a smart, high level way without
90 // fine grain manipulation with IR; data flow and graph properties are resolved/adjusted
91 // automatically. The clone transformation is defined by specifying a set of basic blocks to copy
92 // and a set of rules how to treat edges, remap their successors. By using this approach such
93 // optimizations as Branch Target Expansion, Loop Peeling, Loop Unrolling, Loop Versioning can be
94 // implemented.
95 //
96 // The idea of the transformation is based on "Superblock cloning" technique described in the book
97 // "Engineering a Compiler. Second Edition", Keith D. Cooper, Linda Torczon, Rice University
98 // Houston, Texas. 2nd edition, Morgan Kaufmann. The original paper is "The Superblock: An Efective
99 // Technique for VLIW and Superscalar Compilation" by Hwu, W.M.W., Mahlke, S.A., Chen, W.Y. et al.
100 // J Supercomput (1993) 7: 229. doi:10.1007/BF01205185.
101 //
102 // There are two states of the IR graph: original graph (before the transformation) and
103 // copy graph (after).
104 //
105 // Before the transformation:
106 // Defining a set of basic block to copy (orig_bb_set) partitions all of the edges in the original
107 // graph into 4 categories/sets (use the following notation for edges: "(pred, succ)",
108 // where pred, succ - basic blocks):
109 // - internal - pred, succ are members of ‘orig_bb_set’.
110 // - outside - pred, succ are not members of ‘orig_bb_set’.
111 // - incoming - pred is not a member of ‘orig_bb_set’, succ is.
112 // - outgoing - pred is a member of ‘orig_bb_set’, succ is not.
113 //
114 // Transformation:
115 //
116 // 1. Initial cloning:
117 // 1.1. For each ‘orig_block’ in orig_bb_set create a copy ‘copy_block’; these new blocks
118 // form ‘copy_bb_set’.
119 // 1.2. For each edge (X, Y) from internal set create an edge (X_1, Y_1) where X_1, Y_1 are the
120 // copies of X, Y basic blocks correspondingly; these new edges form ‘copy_internal’ edge
121 // set.
122 // 1.3. For each edge (X, Y) from outgoing set create an edge (X_1, Y_1) where X_1, Y_1 are the
123 // copies of X, Y basic blocks correspondingly; these new edges form ‘copy_outgoing’ edge
124 // set.
125 // 2. Successors remapping.
126 // 2.1. 'remap_orig_internal’ - set of edges (X, Y) from ‘orig_bb_set’ whose successors should
127 // be remapped to copy nodes: ((X, Y) will be transformed into (X, Y_1)).
128 // 2.2. ‘remap_copy_internal’ - set of edges (X_1, Y_1) from ‘copy_bb_set’ whose successors
129 // should be remapped to copy nodes: (X_1, Y_1) will be transformed into (X_1, Y)).
130 // 2.3. 'remap_incoming’ - set of edges (X, Y) from the ‘incoming’ edge set in the original graph
131 // whose successors should be remapped to copies nodes: ((X, Y) will be transformed into
132 // (X, Y_1)).
133 // 3. Adjust control flow structures and relations (dominance, reverse post order, loops, etc).
134 // 4. Fix/resolve data flow.
135 // 5. Do cleanups (DCE, critical edges splitting, etc).
136 //
137 class SuperblockCloner : public ValueObject {
138 public:
139 // TODO: Investigate optimal types for the containers.
140 using HBasicBlockMap = ArenaSafeMap<HBasicBlock*, HBasicBlock*>;
141 using HInstructionMap = ArenaSafeMap<HInstruction*, HInstruction*>;
142 using HBasicBlockSet = ArenaBitVector;
143 using HEdgeSet = ArenaHashSet<HEdge>;
144
145 SuperblockCloner(HGraph* graph,
146 const HBasicBlockSet* orig_bb_set,
147 HBasicBlockMap* bb_map,
148 HInstructionMap* hir_map,
149 InductionVarRange* induction_range);
150
151 // Sets edge successor remapping info specified by corresponding edge sets.
152 void SetSuccessorRemappingInfo(const HEdgeSet* remap_orig_internal,
153 const HEdgeSet* remap_copy_internal,
154 const HEdgeSet* remap_incoming);
155
156 // Returns whether the specified subgraph is copyable.
157 // TODO: Start from small range of graph patterns then extend it.
158 bool IsSubgraphClonable() const;
159
160 // Returns whether selected subgraph satisfies the criteria for fast data flow resolution
161 // when iterative DF algorithm is not required and dominators/instructions inputs can be
162 // trivially adjusted.
163 //
164 // TODO: formally describe the criteria.
165 //
166 // Loop peeling, unrolling and versioning satisfy the criteria.
167 bool IsFastCase() const;
168
169 // Runs the copy algorithm according to the description.
170 void Run();
171
172 // Cleans up the graph after transformation: splits critical edges, recalculates control flow
173 // information (back-edges, dominators, loop info, etc), eliminates redundant phis.
174 void CleanUp();
175
176 // Returns a clone of a basic block (orig_block).
177 //
178 // - The copy block will have no successors/predecessors; they should be set up manually.
179 // - For each instruction in the orig_block a copy is created and inserted into the copy block;
180 // this correspondence is recorded in the map (old instruction, new instruction).
181 // - Graph HIR is not valid after this transformation: all of the HIRs have their inputs the
182 // same, as in the original block, PHIs do not reflect a correct correspondence between the
183 // value and predecessors (as the copy block has no predecessors by now), etc.
184 HBasicBlock* CloneBasicBlock(const HBasicBlock* orig_block);
185
186 // Creates a clone for each basic blocks in orig_bb_set adding corresponding entries into bb_map_
187 // and hir_map_.
188 void CloneBasicBlocks();
189
GetInstrCopy(HInstruction * orig_instr)190 HInstruction* GetInstrCopy(HInstruction* orig_instr) const {
191 auto copy_input_iter = hir_map_->find(orig_instr);
192 DCHECK(copy_input_iter != hir_map_->end());
193 return copy_input_iter->second;
194 }
195
GetBlockCopy(HBasicBlock * orig_block)196 HBasicBlock* GetBlockCopy(HBasicBlock* orig_block) const {
197 HBasicBlock* block = bb_map_->Get(orig_block);
198 DCHECK(block != nullptr);
199 return block;
200 }
201
GetInstrOrig(HInstruction * copy_instr)202 HInstruction* GetInstrOrig(HInstruction* copy_instr) const {
203 for (auto it : *hir_map_) {
204 if (it.second == copy_instr) {
205 return it.first;
206 }
207 }
208 return nullptr;
209 }
210
IsInOrigBBSet(uint32_t block_id)211 bool IsInOrigBBSet(uint32_t block_id) const {
212 return orig_bb_set_.IsBitSet(block_id);
213 }
214
IsInOrigBBSet(const HBasicBlock * block)215 bool IsInOrigBBSet(const HBasicBlock* block) const {
216 return IsInOrigBBSet(block->GetBlockId());
217 }
218
219 // Returns the area (the most outer loop) in the graph for which control flow (back edges, loops,
220 // dominators) needs to be adjusted.
GetRegionToBeAdjusted()221 HLoopInformation* GetRegionToBeAdjusted() const {
222 return outer_loop_;
223 }
224
225 private:
226 // Fills the 'exits' vector with the subgraph exits.
227 void SearchForSubgraphExits(ArenaVector<HBasicBlock*>* exits) const;
228
229 // Finds and records information about the area in the graph for which control flow (back edges,
230 // loops, dominators) needs to be adjusted.
231 void FindAndSetLocalAreaForAdjustments();
232
233 // Remaps edges' successors according to the info specified in the edges sets.
234 //
235 // Only edge successors/predecessors and phis' input records (to have a correspondence between
236 // a phi input record (not value) and a block's predecessor) are adjusted at this stage: neither
237 // phis' nor instructions' inputs values are resolved.
238 void RemapEdgesSuccessors();
239
240 // Adjusts control flow (back edges, loops, dominators) for the local area defined by
241 // FindAndSetLocalAreaForAdjustments.
242 void AdjustControlFlowInfo();
243
244 // Resolves Data Flow - adjusts phis' and instructions' inputs in order to have a valid graph in
245 // the SSA form.
246 void ResolveDataFlow();
247
248 //
249 // Helpers for live-outs processing and Subgraph-closed SSA.
250 //
251 // - live-outs - values which are defined inside the subgraph and have uses outside.
252 // - Subgraph-closed SSA - SSA form for which all the values defined inside the subgraph
253 // have no outside uses except for the phi-nodes in the subgraph exits.
254 //
255 // Note: now if the subgraph has live-outs it is only clonable if it has a single exit; this
256 // makes the subgraph-closed SSA form construction much easier.
257 //
258 // TODO: Support subgraphs with live-outs and multiple exits.
259 //
260
261 // For each live-out value 'val' in the region puts a record <val, val> into the map.
262 // Returns whether all of the instructions in the subgraph are clonable.
263 bool CollectLiveOutsAndCheckClonable(HInstructionMap* live_outs_) const;
264
265 // Constructs Subgraph-closed SSA; precondition - a subgraph has a single exit.
266 //
267 // For each live-out 'val' in 'live_outs_' map inserts a HPhi 'phi' into the exit node, updates
268 // the record in the map to <val, phi> and replaces all outside uses with this phi.
269 void ConstructSubgraphClosedSSA();
270
271 // Fixes the data flow for the live-out 'val' by adding a 'copy_val' input to the corresponding
272 // (<val, phi>) phi after the cloning is done.
273 void FixSubgraphClosedSSAAfterCloning();
274
275 //
276 // Helpers for CloneBasicBlock.
277 //
278
279 // Adjusts copy instruction's inputs: if the input of the original instruction is defined in the
280 // orig_bb_set, replaces it with a corresponding copy otherwise leaves it the same as original.
281 void ReplaceInputsWithCopies(HInstruction* copy_instr);
282
283 // Recursively clones the environment for the copy instruction. If the input of the original
284 // environment is defined in the orig_bb_set, replaces it with a corresponding copy otherwise
285 // leaves it the same as original.
286 void DeepCloneEnvironmentWithRemapping(HInstruction* copy_instr, const HEnvironment* orig_env);
287
288 //
289 // Helpers for RemapEdgesSuccessors.
290 //
291
292 // Remaps incoming or original internal edge to its copy, adjusts the phi inputs in orig_succ and
293 // copy_succ.
294 void RemapOrigInternalOrIncomingEdge(HBasicBlock* orig_block, HBasicBlock* orig_succ);
295
296 // Adds copy internal edge (from copy_block to copy_succ), updates phis in the copy_succ.
297 void AddCopyInternalEdge(HBasicBlock* orig_block, HBasicBlock* orig_succ);
298
299 // Remaps copy internal edge to its origin, adjusts the phi inputs in orig_succ.
300 void RemapCopyInternalEdge(HBasicBlock* orig_block, HBasicBlock* orig_succ);
301
302 // Checks whether the edges remapping info corresponds to the subgraph versioning case:
303 // - none of the incoming edges are to be remapped (they are being duplicated).
304 // - none of the internal edges are to be remapped.
305 bool IsRemapInfoForVersioning() const;
306
307 // Processes incoming edges for subgraph versioning case: for each incoming edge (X, Y) adds
308 // an edge (X, Y_1) where Y_1 = Copy(Y) and add corresponding phi input to copy phi.
309 //
310 // Note: such node X will now have two successors, its unconditional branch instruction
311 // will be invalid and should be adjusted to some conditional branch by the client code.
312 void CopyIncomingEdgesForVersioning();
313
314 //
315 // Local versions of control flow calculation/adjustment routines.
316 //
317
318 void FindBackEdgesLocal(HBasicBlock* entry_block, ArenaBitVector* local_set);
319 void RecalculateBackEdgesInfo(ArenaBitVector* outer_loop_bb_set);
320 GraphAnalysisResult AnalyzeLoopsLocally(ArenaBitVector* outer_loop_bb_set);
321 void CleanUpControlFlow();
322
323 //
324 // Helpers for ResolveDataFlow
325 //
326
327 // Resolves the inputs of the phi.
328 void ResolvePhi(HPhi* phi);
329
330 // Update induction range after when fixing SSA.
331 void UpdateInductionRangeInfoOf(
332 HInstruction* user, HInstruction* old_instruction, HInstruction* replacement);
333
334 //
335 // Debug and logging methods.
336 //
337 void CheckInstructionInputsRemapping(HInstruction* orig_instr);
338 bool CheckRemappingInfoIsValid();
339 void VerifyGraph();
340 void DumpInputSets();
341
GetBlockById(uint32_t block_id)342 HBasicBlock* GetBlockById(uint32_t block_id) const {
343 DCHECK(block_id < graph_->GetBlocks().size());
344 HBasicBlock* block = graph_->GetBlocks()[block_id];
345 DCHECK(block != nullptr);
346 return block;
347 }
348
349 HGraph* const graph_;
350 ArenaAllocator* const arena_;
351
352 // Set of basic block in the original graph to be copied.
353 HBasicBlockSet orig_bb_set_;
354
355 // Sets of edges which require successors remapping.
356 const HEdgeSet* remap_orig_internal_;
357 const HEdgeSet* remap_copy_internal_;
358 const HEdgeSet* remap_incoming_;
359
360 // Correspondence map for blocks: (original block, copy block).
361 HBasicBlockMap* bb_map_;
362 // Correspondence map for instructions: (original HInstruction, copy HInstruction).
363 HInstructionMap* hir_map_;
364 // As a result of cloning, the induction range analysis information can be invalidated
365 // and must be updated. If not null, the cloner updates it for changed instructions.
366 InductionVarRange* induction_range_;
367 // Area in the graph for which control flow (back edges, loops, dominators) needs to be adjusted.
368 HLoopInformation* outer_loop_;
369 HBasicBlockSet outer_loop_bb_set_;
370
371 HInstructionMap live_outs_;
372
373 ART_FRIEND_TEST(SuperblockClonerTest, AdjustControlFlowInfo);
374 ART_FRIEND_TEST(SuperblockClonerTest, IsGraphConnected);
375
376 DISALLOW_COPY_AND_ASSIGN(SuperblockCloner);
377 };
378
379 // Helper class to perform loop peeling/unrolling/versioning.
380 //
381 // This helper should be used when correspondence map between original and copied
382 // basic blocks/instructions are demanded.
383 class LoopClonerHelper : public ValueObject {
384 public:
LoopClonerHelper(HLoopInformation * info,SuperblockCloner::HBasicBlockMap * bb_map,SuperblockCloner::HInstructionMap * hir_map,InductionVarRange * induction_range)385 LoopClonerHelper(HLoopInformation* info,
386 SuperblockCloner::HBasicBlockMap* bb_map,
387 SuperblockCloner::HInstructionMap* hir_map,
388 InductionVarRange* induction_range) :
389 loop_info_(info),
390 cloner_(info->GetHeader()->GetGraph(), &info->GetBlocks(), bb_map, hir_map, induction_range) {
391 // For now do transformations only for natural loops.
392 DCHECK(!info->IsIrreducible());
393 }
394
395 // Returns whether the loop can be peeled/unrolled (static function).
396 static bool IsLoopClonable(HLoopInformation* loop_info);
397
398 // Returns whether the loop can be peeled/unrolled.
IsLoopClonable()399 bool IsLoopClonable() const { return cloner_.IsSubgraphClonable(); }
400
401 // Perform loop peeling.
402 //
403 // Control flow of an example (ignoring critical edges splitting).
404 //
405 // Before After
406 //
407 // |B| |B|
408 // | |
409 // v v
410 // |1| |1|
411 // | |
412 // v v
413 // |2|<-\ |2A|
414 // / \ / / \
415 // v v/ / v
416 // |4| |3| / |3A|
417 // | / /
418 // v | v
419 // |E| \ |2|<-\
420 // \ / \ /
421 // v v /
422 // |4| |3|
423 // |
424 // v
425 // |E|
DoPeeling()426 HBasicBlock* DoPeeling() {
427 return DoLoopTransformationImpl(TransformationKind::kPeeling);
428 }
429
430 // Perform loop unrolling.
431 //
432 // Control flow of an example (ignoring critical edges splitting).
433 //
434 // Before After
435 //
436 // |B| |B|
437 // | |
438 // v v
439 // |1| |1|
440 // | |
441 // v v
442 // |2|<-\ |2A|<-\
443 // / \ / / \ \
444 // v v/ / v \
445 // |4| |3| / |3A| |
446 // | / / /
447 // v | v /
448 // |E| \ |2| /
449 // \ / \ /
450 // v v/
451 // |4| |3|
452 // |
453 // v
454 // |E|
DoUnrolling()455 HBasicBlock* DoUnrolling() {
456 return DoLoopTransformationImpl(TransformationKind::kUnrolling);
457 }
458
459 // Perform loop versioning.
460 //
461 // Control flow of an example (ignoring critical edges splitting).
462 //
463 // Before After
464 //
465 // |B| |B|
466 // | |
467 // v v
468 // |1| |1|_________
469 // | | |
470 // v v v
471 // |2|<-\ |2|<-\ |2A|<-\
472 // / \ / / \ / / \ /
473 // v v/ | v/ | v/
474 // | |3| | |3| | |3A|
475 // | | __________|
476 // | ||
477 // v vv
478 // |4| |4|
479 // | |
480 // v v
481 // |E| |E|
DoVersioning()482 HBasicBlock* DoVersioning() {
483 return DoLoopTransformationImpl(TransformationKind::kVersioning);
484 }
485
GetRegionToBeAdjusted()486 HLoopInformation* GetRegionToBeAdjusted() const { return cloner_.GetRegionToBeAdjusted(); }
487
488 protected:
489 enum class TransformationKind {
490 kPeeling,
491 kUnrolling,
492 kVersioning,
493 };
494
495 // Applies a specific loop transformation to the loop.
496 HBasicBlock* DoLoopTransformationImpl(TransformationKind transformation);
497
498 private:
499 HLoopInformation* loop_info_;
500 SuperblockCloner cloner_;
501
502 DISALLOW_COPY_AND_ASSIGN(LoopClonerHelper);
503 };
504
505 // Helper class to perform loop peeling/unrolling/versioning.
506 //
507 // This helper should be used when there is no need to get correspondence information between
508 // original and copied basic blocks/instructions.
509 class LoopClonerSimpleHelper : public ValueObject {
510 public:
511 LoopClonerSimpleHelper(HLoopInformation* info, InductionVarRange* induction_range);
IsLoopClonable()512 bool IsLoopClonable() const { return helper_.IsLoopClonable(); }
DoPeeling()513 HBasicBlock* DoPeeling() { return helper_.DoPeeling(); }
DoUnrolling()514 HBasicBlock* DoUnrolling() { return helper_.DoUnrolling(); }
DoVersioning()515 HBasicBlock* DoVersioning() { return helper_.DoVersioning(); }
GetRegionToBeAdjusted()516 HLoopInformation* GetRegionToBeAdjusted() const { return helper_.GetRegionToBeAdjusted(); }
517
GetBasicBlockMap()518 const SuperblockCloner::HBasicBlockMap* GetBasicBlockMap() const { return &bb_map_; }
GetInstructionMap()519 const SuperblockCloner::HInstructionMap* GetInstructionMap() const { return &hir_map_; }
520
521 private:
522 SuperblockCloner::HBasicBlockMap bb_map_;
523 SuperblockCloner::HInstructionMap hir_map_;
524 LoopClonerHelper helper_;
525
526 DISALLOW_COPY_AND_ASSIGN(LoopClonerSimpleHelper);
527 };
528
529 // Collects edge remapping info for loop peeling/unrolling for the loop specified by loop info.
530 void CollectRemappingInfoForPeelUnroll(bool to_unroll,
531 HLoopInformation* loop_info,
532 SuperblockCloner::HEdgeSet* remap_orig_internal,
533 SuperblockCloner::HEdgeSet* remap_copy_internal,
534 SuperblockCloner::HEdgeSet* remap_incoming);
535
536 // Returns whether blocks from 'work_set' are reachable from the rest of the graph.
537 //
538 // Returns whether such a set 'outer_entries' of basic blocks exists that:
539 // - each block from 'outer_entries' is not from 'work_set'.
540 // - each block from 'work_set' is reachable from at least one block from 'outer_entries'.
541 //
542 // After the function returns work_set contains only blocks from the original 'work_set'
543 // which are unreachable from the rest of the graph.
544 bool IsSubgraphConnected(SuperblockCloner::HBasicBlockSet* work_set, HGraph* graph);
545
546 // Returns a common predecessor of loop1 and loop2 in the loop tree or nullptr if it is the whole
547 // graph.
548 HLoopInformation* FindCommonLoop(HLoopInformation* loop1, HLoopInformation* loop2);
549 } // namespace art
550
551 namespace std {
552
553 template <>
554 struct hash<art::HEdge> {
555 size_t operator()(art::HEdge const& x) const noexcept {
556 // Use Cantor pairing function as the hash function.
557 size_t a = x.GetFrom();
558 size_t b = x.GetTo();
559 return (a + b) * (a + b + 1) / 2 + b;
560 }
561 };
562 ostream& operator<<(ostream& os, const art::HEdge& e);
563
564 } // namespace std
565
566 #endif // ART_COMPILER_OPTIMIZING_SUPERBLOCK_CLONER_H_
567