blob: ece0914ddbc11470f4d26d34ba2ad4100a18678c [file] [log] [blame]
Artem Serov7f4aff62017-06-21 17:02:18 +01001/*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef ART_COMPILER_OPTIMIZING_SUPERBLOCK_CLONER_H_
18#define ART_COMPILER_OPTIMIZING_SUPERBLOCK_CLONER_H_
19
20#include "base/arena_bit_vector.h"
21#include "base/arena_containers.h"
22#include "base/bit_vector-inl.h"
23#include "nodes.h"
24
25namespace art {
26
Nicolas Geoffray256c94b2019-04-29 10:55:09 +010027class InductionVarRange;
28
Artem Serov7f4aff62017-06-21 17:02:18 +010029static const bool kSuperblockClonerLogging = false;
Artem Serov7f4aff62017-06-21 17:02:18 +010030
31// Represents an edge between two HBasicBlocks.
32//
33// Note: objects of this class are small - pass them by value.
34class HEdge : public ArenaObject<kArenaAllocSuperblockCloner> {
35 public:
36 HEdge(HBasicBlock* from, HBasicBlock* to) : from_(from->GetBlockId()), to_(to->GetBlockId()) {
37 DCHECK_NE(to_, kInvalidBlockId);
38 DCHECK_NE(from_, kInvalidBlockId);
39 }
40 HEdge(uint32_t from, uint32_t to) : from_(from), to_(to) {
41 DCHECK_NE(to_, kInvalidBlockId);
42 DCHECK_NE(from_, kInvalidBlockId);
43 }
44 HEdge() : from_(kInvalidBlockId), to_(kInvalidBlockId) {}
45
46 uint32_t GetFrom() const { return from_; }
47 uint32_t GetTo() const { return to_; }
48
49 bool operator==(const HEdge& other) const {
50 return this->from_ == other.from_ && this->to_ == other.to_;
51 }
52
53 bool operator!=(const HEdge& other) const { return !operator==(other); }
54 void Dump(std::ostream& stream) const;
55
56 // Returns whether an edge represents a valid edge in CF graph: whether the from_ block
57 // has to_ block as a successor.
58 bool IsValid() const { return from_ != kInvalidBlockId && to_ != kInvalidBlockId; }
59
60 private:
61 // Predecessor block id.
62 uint32_t from_;
63 // Successor block id.
64 uint32_t to_;
65};
66
67// Returns whether a HEdge edge corresponds to an existing edge in the graph.
68inline bool IsEdgeValid(HEdge edge, HGraph* graph) {
69 if (!edge.IsValid()) {
70 return false;
71 }
72 uint32_t from = edge.GetFrom();
73 uint32_t to = edge.GetTo();
74 if (from >= graph->GetBlocks().size() || to >= graph->GetBlocks().size()) {
75 return false;
76 }
77
78 HBasicBlock* block_from = graph->GetBlocks()[from];
79 HBasicBlock* block_to = graph->GetBlocks()[to];
80 if (block_from == nullptr || block_to == nullptr) {
81 return false;
82 }
83
84 return block_from->HasSuccessor(block_to, 0);
85}
86
87// SuperblockCloner provides a feature of cloning subgraphs in a smart, high level way without
88// fine grain manipulation with IR; data flow and graph properties are resolved/adjusted
89// automatically. The clone transformation is defined by specifying a set of basic blocks to copy
90// and a set of rules how to treat edges, remap their successors. By using this approach such
91// optimizations as Branch Target Expansion, Loop Peeling, Loop Unrolling can be implemented.
92//
93// The idea of the transformation is based on "Superblock cloning" technique described in the book
94// "Engineering a Compiler. Second Edition", Keith D. Cooper, Linda Torczon, Rice University
95// Houston, Texas. 2nd edition, Morgan Kaufmann. The original paper is "The Superblock: An Efective
96// Technique for VLIW and Superscalar Compilation" by Hwu, W.M.W., Mahlke, S.A., Chen, W.Y. et al.
97// J Supercomput (1993) 7: 229. doi:10.1007/BF01205185.
98//
99// There are two states of the IR graph: original graph (before the transformation) and
100// copy graph (after).
101//
102// Before the transformation:
103// Defining a set of basic block to copy (orig_bb_set) partitions all of the edges in the original
104// graph into 4 categories/sets (use the following notation for edges: "(pred, succ)",
105// where pred, succ - basic blocks):
106// - internal - pred, succ are members of ‘orig_bb_set’.
107// - outside - pred, succ are not members of ‘orig_bb_set’.
108// - incoming - pred is not a member of ‘orig_bb_set’, succ is.
109// - outgoing - pred is a member of ‘orig_bb_set’, succ is not.
110//
111// Transformation:
112//
113// 1. Initial cloning:
114// 1.1. For each ‘orig_block’ in orig_bb_set create a copy ‘copy_block’; these new blocks
115// form ‘copy_bb_set’.
116// 1.2. For each edge (X, Y) from internal set create an edge (X_1, Y_1) where X_1, Y_1 are the
117// copies of X, Y basic blocks correspondingly; these new edges form ‘copy_internal’ edge
118// set.
119// 1.3. For each edge (X, Y) from outgoing set create an edge (X_1, Y_1) where X_1, Y_1 are the
120// copies of X, Y basic blocks correspondingly; these new edges form ‘copy_outgoing’ edge
121// set.
122// 2. Successors remapping.
123// 2.1. 'remap_orig_internal’ - set of edges (X, Y) from ‘orig_bb_set’ whose successors should
124// be remapped to copy nodes: ((X, Y) will be transformed into (X, Y_1)).
125// 2.2. ‘remap_copy_internal’ - set of edges (X_1, Y_1) from ‘copy_bb_set’ whose successors
126// should be remapped to copy nodes: (X_1, Y_1) will be transformed into (X_1, Y)).
127// 2.3. 'remap_incoming’ - set of edges (X, Y) from the ‘incoming’ edge set in the original graph
128// whose successors should be remapped to copies nodes: ((X, Y) will be transformed into
129// (X, Y_1)).
130// 3. Adjust control flow structures and relations (dominance, reverse post order, loops, etc).
131// 4. Fix/resolve data flow.
132// 5. Do cleanups (DCE, critical edges splitting, etc).
133//
134class SuperblockCloner : public ValueObject {
135 public:
136 // TODO: Investigate optimal types for the containers.
137 using HBasicBlockMap = ArenaSafeMap<HBasicBlock*, HBasicBlock*>;
138 using HInstructionMap = ArenaSafeMap<HInstruction*, HInstruction*>;
139 using HBasicBlockSet = ArenaBitVector;
140 using HEdgeSet = ArenaHashSet<HEdge>;
141
142 SuperblockCloner(HGraph* graph,
143 const HBasicBlockSet* orig_bb_set,
144 HBasicBlockMap* bb_map,
Nicolas Geoffray256c94b2019-04-29 10:55:09 +0100145 HInstructionMap* hir_map,
146 InductionVarRange* induction_range);
Artem Serov7f4aff62017-06-21 17:02:18 +0100147
148 // Sets edge successor remapping info specified by corresponding edge sets.
149 void SetSuccessorRemappingInfo(const HEdgeSet* remap_orig_internal,
150 const HEdgeSet* remap_copy_internal,
151 const HEdgeSet* remap_incoming);
152
153 // Returns whether the specified subgraph is copyable.
154 // TODO: Start from small range of graph patterns then extend it.
155 bool IsSubgraphClonable() const;
156
Artem Serov02eebcf2017-12-13 19:48:31 +0000157 // Returns whether selected subgraph satisfies the criteria for fast data flow resolution
158 // when iterative DF algorithm is not required and dominators/instructions inputs can be
159 // trivially adjusted.
160 //
161 // TODO: formally describe the criteria.
162 //
163 // Loop peeling and unrolling satisfy the criteria.
164 bool IsFastCase() const;
165
Artem Serov7f4aff62017-06-21 17:02:18 +0100166 // Runs the copy algorithm according to the description.
167 void Run();
168
169 // Cleans up the graph after transformation: splits critical edges, recalculates control flow
170 // information (back-edges, dominators, loop info, etc), eliminates redundant phis.
171 void CleanUp();
172
173 // Returns a clone of a basic block (orig_block).
174 //
175 // - The copy block will have no successors/predecessors; they should be set up manually.
176 // - For each instruction in the orig_block a copy is created and inserted into the copy block;
177 // this correspondence is recorded in the map (old instruction, new instruction).
178 // - Graph HIR is not valid after this transformation: all of the HIRs have their inputs the
179 // same, as in the original block, PHIs do not reflect a correct correspondence between the
180 // value and predecessors (as the copy block has no predecessors by now), etc.
181 HBasicBlock* CloneBasicBlock(const HBasicBlock* orig_block);
182
183 // Creates a clone for each basic blocks in orig_bb_set adding corresponding entries into bb_map_
184 // and hir_map_.
185 void CloneBasicBlocks();
186
187 HInstruction* GetInstrCopy(HInstruction* orig_instr) const {
188 auto copy_input_iter = hir_map_->find(orig_instr);
189 DCHECK(copy_input_iter != hir_map_->end());
190 return copy_input_iter->second;
191 }
192
193 HBasicBlock* GetBlockCopy(HBasicBlock* orig_block) const {
194 HBasicBlock* block = bb_map_->Get(orig_block);
195 DCHECK(block != nullptr);
196 return block;
197 }
198
199 HInstruction* GetInstrOrig(HInstruction* copy_instr) const {
200 for (auto it : *hir_map_) {
201 if (it.second == copy_instr) {
202 return it.first;
203 }
204 }
205 return nullptr;
206 }
207
208 bool IsInOrigBBSet(uint32_t block_id) const {
209 return orig_bb_set_.IsBitSet(block_id);
210 }
211
212 bool IsInOrigBBSet(const HBasicBlock* block) const {
213 return IsInOrigBBSet(block->GetBlockId());
214 }
215
Artem Serov02eebcf2017-12-13 19:48:31 +0000216 // Returns the area (the most outer loop) in the graph for which control flow (back edges, loops,
217 // dominators) needs to be adjusted.
218 HLoopInformation* GetRegionToBeAdjusted() const {
219 return outer_loop_;
220 }
221
Artem Serov7f4aff62017-06-21 17:02:18 +0100222 private:
223 // Fills the 'exits' vector with the subgraph exits.
Artem Serovca210e32017-12-15 13:43:20 +0000224 void SearchForSubgraphExits(ArenaVector<HBasicBlock*>* exits) const;
Artem Serov7f4aff62017-06-21 17:02:18 +0100225
Artem Serov02eebcf2017-12-13 19:48:31 +0000226 // Finds and records information about the area in the graph for which control flow (back edges,
Artem Serov7f4aff62017-06-21 17:02:18 +0100227 // loops, dominators) needs to be adjusted.
228 void FindAndSetLocalAreaForAdjustments();
229
230 // Remaps edges' successors according to the info specified in the edges sets.
231 //
232 // Only edge successors/predecessors and phis' input records (to have a correspondence between
233 // a phi input record (not value) and a block's predecessor) are adjusted at this stage: neither
234 // phis' nor instructions' inputs values are resolved.
235 void RemapEdgesSuccessors();
236
Artem Serov02eebcf2017-12-13 19:48:31 +0000237 // Adjusts control flow (back edges, loops, dominators) for the local area defined by
Artem Serov7f4aff62017-06-21 17:02:18 +0100238 // FindAndSetLocalAreaForAdjustments.
239 void AdjustControlFlowInfo();
240
241 // Resolves Data Flow - adjusts phis' and instructions' inputs in order to have a valid graph in
242 // the SSA form.
243 void ResolveDataFlow();
244
245 //
Artem Serovca210e32017-12-15 13:43:20 +0000246 // Helpers for live-outs processing and Subgraph-closed SSA.
247 //
248 // - live-outs - values which are defined inside the subgraph and have uses outside.
249 // - Subgraph-closed SSA - SSA form for which all the values defined inside the subgraph
250 // have no outside uses except for the phi-nodes in the subgraph exits.
251 //
252 // Note: now if the subgraph has live-outs it is only clonable if it has a single exit; this
253 // makes the subgraph-closed SSA form construction much easier.
254 //
255 // TODO: Support subgraphs with live-outs and multiple exits.
256 //
257
258 // For each live-out value 'val' in the region puts a record <val, val> into the map.
259 // Returns whether all of the instructions in the subgraph are clonable.
260 bool CollectLiveOutsAndCheckClonable(HInstructionMap* live_outs_) const;
261
262 // Constructs Subgraph-closed SSA; precondition - a subgraph has a single exit.
263 //
264 // For each live-out 'val' in 'live_outs_' map inserts a HPhi 'phi' into the exit node, updates
265 // the record in the map to <val, phi> and replaces all outside uses with this phi.
266 void ConstructSubgraphClosedSSA();
267
268 // Fixes the data flow for the live-out 'val' by adding a 'copy_val' input to the corresponding
269 // (<val, phi>) phi after the cloning is done.
270 void FixSubgraphClosedSSAAfterCloning();
271
272 //
Artem Serov7f4aff62017-06-21 17:02:18 +0100273 // Helpers for CloneBasicBlock.
274 //
275
276 // Adjusts copy instruction's inputs: if the input of the original instruction is defined in the
277 // orig_bb_set, replaces it with a corresponding copy otherwise leaves it the same as original.
278 void ReplaceInputsWithCopies(HInstruction* copy_instr);
279
280 // Recursively clones the environment for the copy instruction. If the input of the original
281 // environment is defined in the orig_bb_set, replaces it with a corresponding copy otherwise
282 // leaves it the same as original.
283 void DeepCloneEnvironmentWithRemapping(HInstruction* copy_instr, const HEnvironment* orig_env);
284
285 //
286 // Helpers for RemapEdgesSuccessors.
287 //
288
289 // Remaps incoming or original internal edge to its copy, adjusts the phi inputs in orig_succ and
290 // copy_succ.
291 void RemapOrigInternalOrIncomingEdge(HBasicBlock* orig_block, HBasicBlock* orig_succ);
292
293 // Adds copy internal edge (from copy_block to copy_succ), updates phis in the copy_succ.
294 void AddCopyInternalEdge(HBasicBlock* orig_block, HBasicBlock* orig_succ);
295
296 // Remaps copy internal edge to its origin, adjusts the phi inputs in orig_succ.
297 void RemapCopyInternalEdge(HBasicBlock* orig_block, HBasicBlock* orig_succ);
298
299 //
300 // Local versions of control flow calculation/adjustment routines.
301 //
302
303 void FindBackEdgesLocal(HBasicBlock* entry_block, ArenaBitVector* local_set);
304 void RecalculateBackEdgesInfo(ArenaBitVector* outer_loop_bb_set);
305 GraphAnalysisResult AnalyzeLoopsLocally(ArenaBitVector* outer_loop_bb_set);
306 void CleanUpControlFlow();
307
308 //
309 // Helpers for ResolveDataFlow
310 //
311
312 // Resolves the inputs of the phi.
313 void ResolvePhi(HPhi* phi);
314
Nicolas Geoffray256c94b2019-04-29 10:55:09 +0100315 // Update induction range after when fixing SSA.
316 void UpdateInductionRangeInfoOf(
317 HInstruction* user, HInstruction* old_instruction, HInstruction* replacement);
318
Artem Serov7f4aff62017-06-21 17:02:18 +0100319 //
320 // Debug and logging methods.
321 //
322 void CheckInstructionInputsRemapping(HInstruction* orig_instr);
Artem Serov02eebcf2017-12-13 19:48:31 +0000323 bool CheckRemappingInfoIsValid();
324 void VerifyGraph();
325 void DumpInputSets();
Artem Serov7f4aff62017-06-21 17:02:18 +0100326
327 HBasicBlock* GetBlockById(uint32_t block_id) const {
328 DCHECK(block_id < graph_->GetBlocks().size());
329 HBasicBlock* block = graph_->GetBlocks()[block_id];
330 DCHECK(block != nullptr);
331 return block;
332 }
333
334 HGraph* const graph_;
335 ArenaAllocator* const arena_;
336
337 // Set of basic block in the original graph to be copied.
338 HBasicBlockSet orig_bb_set_;
339
340 // Sets of edges which require successors remapping.
341 const HEdgeSet* remap_orig_internal_;
342 const HEdgeSet* remap_copy_internal_;
343 const HEdgeSet* remap_incoming_;
344
345 // Correspondence map for blocks: (original block, copy block).
346 HBasicBlockMap* bb_map_;
347 // Correspondence map for instructions: (original HInstruction, copy HInstruction).
348 HInstructionMap* hir_map_;
Nicolas Geoffray256c94b2019-04-29 10:55:09 +0100349 // As a result of cloning, the induction range analysis information can be invalidated
350 // and must be updated. If not null, the cloner updates it for changed instructions.
351 InductionVarRange* induction_range_;
Artem Serov02eebcf2017-12-13 19:48:31 +0000352 // Area in the graph for which control flow (back edges, loops, dominators) needs to be adjusted.
Artem Serov7f4aff62017-06-21 17:02:18 +0100353 HLoopInformation* outer_loop_;
354 HBasicBlockSet outer_loop_bb_set_;
355
Artem Serovca210e32017-12-15 13:43:20 +0000356 HInstructionMap live_outs_;
357
Artem Serov7f4aff62017-06-21 17:02:18 +0100358 ART_FRIEND_TEST(SuperblockClonerTest, AdjustControlFlowInfo);
Artem Serov02eebcf2017-12-13 19:48:31 +0000359 ART_FRIEND_TEST(SuperblockClonerTest, IsGraphConnected);
Artem Serov7f4aff62017-06-21 17:02:18 +0100360
361 DISALLOW_COPY_AND_ASSIGN(SuperblockCloner);
362};
363
Artem Serov02eebcf2017-12-13 19:48:31 +0000364// Helper class to perform loop peeling/unrolling.
365//
366// This helper should be used when correspondence map between original and copied
367// basic blocks/instructions are demanded.
368class PeelUnrollHelper : public ValueObject {
369 public:
Nicolas Geoffray256c94b2019-04-29 10:55:09 +0100370 PeelUnrollHelper(HLoopInformation* info,
371 SuperblockCloner::HBasicBlockMap* bb_map,
372 SuperblockCloner::HInstructionMap* hir_map,
373 InductionVarRange* induction_range) :
Artem Serov02eebcf2017-12-13 19:48:31 +0000374 loop_info_(info),
Nicolas Geoffray256c94b2019-04-29 10:55:09 +0100375 cloner_(info->GetHeader()->GetGraph(), &info->GetBlocks(), bb_map, hir_map, induction_range) {
Artem Serov02eebcf2017-12-13 19:48:31 +0000376 // For now do peeling/unrolling only for natural loops.
377 DCHECK(!info->IsIrreducible());
378 }
379
380 // Returns whether the loop can be peeled/unrolled (static function).
381 static bool IsLoopClonable(HLoopInformation* loop_info);
382
383 // Returns whether the loop can be peeled/unrolled.
384 bool IsLoopClonable() const { return cloner_.IsSubgraphClonable(); }
385
Andreas Gampe3db70682018-12-26 15:12:03 -0800386 HBasicBlock* DoPeeling() { return DoPeelUnrollImpl(/* to_unroll= */ false); }
387 HBasicBlock* DoUnrolling() { return DoPeelUnrollImpl(/* to_unroll= */ true); }
Artem Serov02eebcf2017-12-13 19:48:31 +0000388 HLoopInformation* GetRegionToBeAdjusted() const { return cloner_.GetRegionToBeAdjusted(); }
389
390 protected:
391 // Applies loop peeling/unrolling for the loop specified by 'loop_info'.
392 //
393 // Depending on 'do_unroll' either unrolls loop by 2 or peels one iteration from it.
394 HBasicBlock* DoPeelUnrollImpl(bool to_unroll);
395
396 private:
397 HLoopInformation* loop_info_;
398 SuperblockCloner cloner_;
399
400 DISALLOW_COPY_AND_ASSIGN(PeelUnrollHelper);
401};
402
403// Helper class to perform loop peeling/unrolling.
404//
405// This helper should be used when there is no need to get correspondence information between
406// original and copied basic blocks/instructions.
407class PeelUnrollSimpleHelper : public ValueObject {
408 public:
Nicolas Geoffray256c94b2019-04-29 10:55:09 +0100409 PeelUnrollSimpleHelper(HLoopInformation* info, InductionVarRange* induction_range);
Artem Serov02eebcf2017-12-13 19:48:31 +0000410 bool IsLoopClonable() const { return helper_.IsLoopClonable(); }
411 HBasicBlock* DoPeeling() { return helper_.DoPeeling(); }
412 HBasicBlock* DoUnrolling() { return helper_.DoUnrolling(); }
413 HLoopInformation* GetRegionToBeAdjusted() const { return helper_.GetRegionToBeAdjusted(); }
414
Artem Serov72411e62017-10-19 16:18:07 +0100415 const SuperblockCloner::HBasicBlockMap* GetBasicBlockMap() const { return &bb_map_; }
416 const SuperblockCloner::HInstructionMap* GetInstructionMap() const { return &hir_map_; }
417
Artem Serov02eebcf2017-12-13 19:48:31 +0000418 private:
419 SuperblockCloner::HBasicBlockMap bb_map_;
420 SuperblockCloner::HInstructionMap hir_map_;
421 PeelUnrollHelper helper_;
422
423 DISALLOW_COPY_AND_ASSIGN(PeelUnrollSimpleHelper);
424};
425
426// Collects edge remapping info for loop peeling/unrolling for the loop specified by loop info.
427void CollectRemappingInfoForPeelUnroll(bool to_unroll,
428 HLoopInformation* loop_info,
429 SuperblockCloner::HEdgeSet* remap_orig_internal,
430 SuperblockCloner::HEdgeSet* remap_copy_internal,
431 SuperblockCloner::HEdgeSet* remap_incoming);
432
433// Returns whether blocks from 'work_set' are reachable from the rest of the graph.
434//
435// Returns whether such a set 'outer_entries' of basic blocks exists that:
436// - each block from 'outer_entries' is not from 'work_set'.
437// - each block from 'work_set' is reachable from at least one block from 'outer_entries'.
438//
439// After the function returns work_set contains only blocks from the original 'work_set'
440// which are unreachable from the rest of the graph.
441bool IsSubgraphConnected(SuperblockCloner::HBasicBlockSet* work_set, HGraph* graph);
442
443// Returns a common predecessor of loop1 and loop2 in the loop tree or nullptr if it is the whole
444// graph.
445HLoopInformation* FindCommonLoop(HLoopInformation* loop1, HLoopInformation* loop2);
Artem Serov7f4aff62017-06-21 17:02:18 +0100446} // namespace art
447
448namespace std {
449
450template <>
451struct hash<art::HEdge> {
452 size_t operator()(art::HEdge const& x) const noexcept {
453 // Use Cantor pairing function as the hash function.
Artem Serov02eebcf2017-12-13 19:48:31 +0000454 size_t a = x.GetFrom();
455 size_t b = x.GetTo();
Artem Serov7f4aff62017-06-21 17:02:18 +0100456 return (a + b) * (a + b + 1) / 2 + b;
457 }
458};
Artem Serov02eebcf2017-12-13 19:48:31 +0000459ostream& operator<<(ostream& os, const art::HEdge& e);
Artem Serov7f4aff62017-06-21 17:02:18 +0100460
461} // namespace std
462
463#endif // ART_COMPILER_OPTIMIZING_SUPERBLOCK_CLONER_H_