blob: ee74f1001f4f5ac28648e37ea1a73aca8a83163c [file] [log] [blame]
Artem Serov7f4aff62017-06-21 17:02:18 +01001/*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "superblock_cloner.h"
18
19#include "common_dominator.h"
20#include "graph_checker.h"
21
22#include <iostream>
23
24namespace art {
25
26using HBasicBlockMap = SuperblockCloner::HBasicBlockMap;
27using HInstructionMap = SuperblockCloner::HInstructionMap;
28using HBasicBlockSet = SuperblockCloner::HBasicBlockSet;
29using HEdgeSet = SuperblockCloner::HEdgeSet;
30
Artem Serov02eebcf2017-12-13 19:48:31 +000031// When doing peeling we can choose whether to keep original loop (made of original basic blocks)
32// and form a peeled iteration of the copy blocks (preserve the header) or transfer original loop
33// blocks to the peeled iteration and create new loop from the copy blocks. Similar for unrolling.
34static const bool kPeelUnrollPreserveHeader = true;
35
Artem Serov7f4aff62017-06-21 17:02:18 +010036void HEdge::Dump(std::ostream& stream) const {
37 stream << "(" << from_ << "->" << to_ << ")";
38}
39
40//
41// Static helper methods.
42//
43
44// Returns whether instruction has any uses (regular or environmental) outside the region,
45// defined by basic block set.
46static bool IsUsedOutsideRegion(const HInstruction* instr, const HBasicBlockSet& bb_set) {
47 auto& uses = instr->GetUses();
48 for (auto use_node = uses.begin(), e = uses.end(); use_node != e; ++use_node) {
49 HInstruction* user = use_node->GetUser();
50 if (!bb_set.IsBitSet(user->GetBlock()->GetBlockId())) {
51 return true;
52 }
53 }
54
55 auto& env_uses = instr->GetEnvUses();
56 for (auto use_node = env_uses.begin(), e = env_uses.end(); use_node != e; ++use_node) {
57 HInstruction* user = use_node->GetUser()->GetHolder();
58 if (!bb_set.IsBitSet(user->GetBlock()->GetBlockId())) {
59 return true;
60 }
61 }
62
63 return false;
64}
65
66// Returns whether the phi's inputs are the same HInstruction.
67static bool ArePhiInputsTheSame(const HPhi* phi) {
68 HInstruction* first_input = phi->InputAt(0);
69 for (size_t i = 1, e = phi->InputCount(); i < e; i++) {
70 if (phi->InputAt(i) != first_input) {
71 return false;
72 }
73 }
74
75 return true;
76}
77
Artem Serov02eebcf2017-12-13 19:48:31 +000078// Returns whether two Edge sets are equal (ArenaHashSet doesn't have "Equal" method).
79static bool EdgeHashSetsEqual(const HEdgeSet* set1, const HEdgeSet* set2) {
80 if (set1->Size() != set2->Size()) {
81 return false;
Artem Serov7f4aff62017-06-21 17:02:18 +010082 }
83
Artem Serov02eebcf2017-12-13 19:48:31 +000084 for (auto e : *set1) {
85 if (set2->Find(e) == set2->end()) {
86 return false;
87 }
Artem Serov7f4aff62017-06-21 17:02:18 +010088 }
Artem Serov02eebcf2017-12-13 19:48:31 +000089 return true;
Artem Serov7f4aff62017-06-21 17:02:18 +010090}
91
92// Calls HGraph::OrderLoopHeaderPredecessors for each loop in the graph.
93static void OrderLoopsHeadersPredecessors(HGraph* graph) {
94 for (HBasicBlock* block : graph->GetPostOrder()) {
95 if (block->IsLoopHeader()) {
96 graph->OrderLoopHeaderPredecessors(block);
97 }
98 }
99}
100
Artem Serov02eebcf2017-12-13 19:48:31 +0000101// Performs DFS on the subgraph (specified by 'bb_set') starting from the specified block; while
102// traversing the function removes basic blocks from the bb_set (instead of traditional DFS
103// 'marking'). So what is left in the 'bb_set' after the traversal is not reachable from the start
104// block.
105static void TraverseSubgraphForConnectivity(HBasicBlock* block, HBasicBlockSet* bb_set) {
106 DCHECK(bb_set->IsBitSet(block->GetBlockId()));
107 bb_set->ClearBit(block->GetBlockId());
108
109 for (HBasicBlock* succ : block->GetSuccessors()) {
110 if (bb_set->IsBitSet(succ->GetBlockId())) {
111 TraverseSubgraphForConnectivity(succ, bb_set);
112 }
113 }
114}
115
Artem Serov7f4aff62017-06-21 17:02:18 +0100116//
117// Helpers for CloneBasicBlock.
118//
119
120void SuperblockCloner::ReplaceInputsWithCopies(HInstruction* copy_instr) {
121 DCHECK(!copy_instr->IsPhi());
122 for (size_t i = 0, e = copy_instr->InputCount(); i < e; i++) {
123 // Copy instruction holds the same input as the original instruction holds.
124 HInstruction* orig_input = copy_instr->InputAt(i);
125 if (!IsInOrigBBSet(orig_input->GetBlock())) {
126 // Defined outside the subgraph.
127 continue;
128 }
129 HInstruction* copy_input = GetInstrCopy(orig_input);
130 // copy_instr will be registered as a user of copy_inputs after returning from this function:
131 // 'copy_block->AddInstruction(copy_instr)'.
132 copy_instr->SetRawInputAt(i, copy_input);
133 }
134}
135
136void SuperblockCloner::DeepCloneEnvironmentWithRemapping(HInstruction* copy_instr,
137 const HEnvironment* orig_env) {
138 if (orig_env->GetParent() != nullptr) {
139 DeepCloneEnvironmentWithRemapping(copy_instr, orig_env->GetParent());
140 }
141 HEnvironment* copy_env = new (arena_) HEnvironment(arena_, *orig_env, copy_instr);
142
143 for (size_t i = 0; i < orig_env->Size(); i++) {
144 HInstruction* env_input = orig_env->GetInstructionAt(i);
145 if (env_input != nullptr && IsInOrigBBSet(env_input->GetBlock())) {
146 env_input = GetInstrCopy(env_input);
147 DCHECK(env_input != nullptr && env_input->GetBlock() != nullptr);
148 }
149 copy_env->SetRawEnvAt(i, env_input);
150 if (env_input != nullptr) {
151 env_input->AddEnvUseAt(copy_env, i);
152 }
153 }
154 // InsertRawEnvironment assumes that instruction already has an environment that's why we use
155 // SetRawEnvironment in the 'else' case.
156 // As this function calls itself recursively with the same copy_instr - this copy_instr may
157 // have partially copied chain of HEnvironments.
158 if (copy_instr->HasEnvironment()) {
159 copy_instr->InsertRawEnvironment(copy_env);
160 } else {
161 copy_instr->SetRawEnvironment(copy_env);
162 }
163}
164
165//
166// Helpers for RemapEdgesSuccessors.
167//
168
169void SuperblockCloner::RemapOrigInternalOrIncomingEdge(HBasicBlock* orig_block,
170 HBasicBlock* orig_succ) {
171 DCHECK(IsInOrigBBSet(orig_succ));
172 HBasicBlock* copy_succ = GetBlockCopy(orig_succ);
173
174 size_t this_index = orig_succ->GetPredecessorIndexOf(orig_block);
175 size_t phi_input_count = 0;
176 // This flag reflects whether the original successor has at least one phi and this phi
177 // has been already processed in the loop. Used for validation purposes in DCHECK to check that
178 // in the end all of the phis in the copy successor have the same number of inputs - the number
179 // of copy successor's predecessors.
180 bool first_phi_met = false;
181 for (HInstructionIterator it(orig_succ->GetPhis()); !it.Done(); it.Advance()) {
182 HPhi* orig_phi = it.Current()->AsPhi();
183 HPhi* copy_phi = GetInstrCopy(orig_phi)->AsPhi();
184 HInstruction* orig_phi_input = orig_phi->InputAt(this_index);
185 // Remove corresponding input for original phi.
186 orig_phi->RemoveInputAt(this_index);
187 // Copy phi doesn't yet have either orig_block as predecessor or the input that corresponds
188 // to orig_block, so add the input at the end of the list.
189 copy_phi->AddInput(orig_phi_input);
190 if (!first_phi_met) {
191 phi_input_count = copy_phi->InputCount();
192 first_phi_met = true;
193 } else {
194 DCHECK_EQ(phi_input_count, copy_phi->InputCount());
195 }
196 }
197 // orig_block will be put at the end of the copy_succ's predecessors list; that corresponds
198 // to the previously added phi inputs position.
199 orig_block->ReplaceSuccessor(orig_succ, copy_succ);
200 DCHECK(!first_phi_met || copy_succ->GetPredecessors().size() == phi_input_count);
201}
202
203void SuperblockCloner::AddCopyInternalEdge(HBasicBlock* orig_block,
204 HBasicBlock* orig_succ) {
205 DCHECK(IsInOrigBBSet(orig_succ));
206 HBasicBlock* copy_block = GetBlockCopy(orig_block);
207 HBasicBlock* copy_succ = GetBlockCopy(orig_succ);
208 copy_block->AddSuccessor(copy_succ);
209
210 size_t orig_index = orig_succ->GetPredecessorIndexOf(orig_block);
211 for (HInstructionIterator it(orig_succ->GetPhis()); !it.Done(); it.Advance()) {
212 HPhi* orig_phi = it.Current()->AsPhi();
213 HPhi* copy_phi = GetInstrCopy(orig_phi)->AsPhi();
214 HInstruction* orig_phi_input = orig_phi->InputAt(orig_index);
215 copy_phi->AddInput(orig_phi_input);
216 }
217}
218
219void SuperblockCloner::RemapCopyInternalEdge(HBasicBlock* orig_block,
220 HBasicBlock* orig_succ) {
221 DCHECK(IsInOrigBBSet(orig_succ));
222 HBasicBlock* copy_block = GetBlockCopy(orig_block);
223 copy_block->AddSuccessor(orig_succ);
224 DCHECK(copy_block->HasSuccessor(orig_succ));
225
226 size_t orig_index = orig_succ->GetPredecessorIndexOf(orig_block);
227 for (HInstructionIterator it(orig_succ->GetPhis()); !it.Done(); it.Advance()) {
228 HPhi* orig_phi = it.Current()->AsPhi();
229 HInstruction* orig_phi_input = orig_phi->InputAt(orig_index);
230 orig_phi->AddInput(orig_phi_input);
231 }
232}
233
234//
235// Local versions of CF calculation/adjustment routines.
236//
237
238// TODO: merge with the original version in nodes.cc. The concern is that we don't want to affect
239// the performance of the base version by checking the local set.
240// TODO: this version works when updating the back edges info for natural loop-based local_set.
241// Check which exactly types of subgraphs can be analysed or rename it to
242// FindBackEdgesInTheNaturalLoop.
243void SuperblockCloner::FindBackEdgesLocal(HBasicBlock* entry_block, ArenaBitVector* local_set) {
244 ArenaBitVector visited(arena_, graph_->GetBlocks().size(), false, kArenaAllocSuperblockCloner);
245 // "visited" must be empty on entry, it's an output argument for all visited (i.e. live) blocks.
246 DCHECK_EQ(visited.GetHighestBitSet(), -1);
247
248 // Nodes that we're currently visiting, indexed by block id.
249 ArenaBitVector visiting(arena_, graph_->GetBlocks().size(), false, kArenaAllocGraphBuilder);
250 // Number of successors visited from a given node, indexed by block id.
251 ArenaVector<size_t> successors_visited(graph_->GetBlocks().size(),
252 0u,
253 arena_->Adapter(kArenaAllocGraphBuilder));
254 // Stack of nodes that we're currently visiting (same as marked in "visiting" above).
255 ArenaVector<HBasicBlock*> worklist(arena_->Adapter(kArenaAllocGraphBuilder));
256 constexpr size_t kDefaultWorklistSize = 8;
257 worklist.reserve(kDefaultWorklistSize);
258
259 visited.SetBit(entry_block->GetBlockId());
260 visiting.SetBit(entry_block->GetBlockId());
261 worklist.push_back(entry_block);
262
263 while (!worklist.empty()) {
264 HBasicBlock* current = worklist.back();
265 uint32_t current_id = current->GetBlockId();
266 if (successors_visited[current_id] == current->GetSuccessors().size()) {
267 visiting.ClearBit(current_id);
268 worklist.pop_back();
269 } else {
270 HBasicBlock* successor = current->GetSuccessors()[successors_visited[current_id]++];
271 uint32_t successor_id = successor->GetBlockId();
272 if (!local_set->IsBitSet(successor_id)) {
273 continue;
274 }
275
276 if (visiting.IsBitSet(successor_id)) {
277 DCHECK(ContainsElement(worklist, successor));
278 successor->AddBackEdgeWhileUpdating(current);
279 } else if (!visited.IsBitSet(successor_id)) {
280 visited.SetBit(successor_id);
281 visiting.SetBit(successor_id);
282 worklist.push_back(successor);
283 }
284 }
285 }
286}
287
288void SuperblockCloner::RecalculateBackEdgesInfo(ArenaBitVector* outer_loop_bb_set) {
Artem Serov7f4aff62017-06-21 17:02:18 +0100289 HBasicBlock* block_entry = nullptr;
290
291 if (outer_loop_ == nullptr) {
292 for (auto block : graph_->GetBlocks()) {
293 if (block != nullptr) {
294 outer_loop_bb_set->SetBit(block->GetBlockId());
295 HLoopInformation* info = block->GetLoopInformation();
296 if (info != nullptr) {
297 info->ResetBasicBlockData();
298 }
299 }
300 }
301 block_entry = graph_->GetEntryBlock();
302 } else {
303 outer_loop_bb_set->Copy(&outer_loop_bb_set_);
304 block_entry = outer_loop_->GetHeader();
305
306 // Add newly created copy blocks.
307 for (auto entry : *bb_map_) {
308 outer_loop_bb_set->SetBit(entry.second->GetBlockId());
309 }
310
311 // Clear loop_info for the whole outer loop.
312 for (uint32_t idx : outer_loop_bb_set->Indexes()) {
313 HBasicBlock* block = GetBlockById(idx);
314 HLoopInformation* info = block->GetLoopInformation();
315 if (info != nullptr) {
316 info->ResetBasicBlockData();
317 }
318 }
319 }
320
321 FindBackEdgesLocal(block_entry, outer_loop_bb_set);
322
323 for (uint32_t idx : outer_loop_bb_set->Indexes()) {
324 HBasicBlock* block = GetBlockById(idx);
325 HLoopInformation* info = block->GetLoopInformation();
326 // Reset LoopInformation for regular blocks and old headers which are no longer loop headers.
327 if (info != nullptr &&
328 (info->GetHeader() != block || info->NumberOfBackEdges() == 0)) {
329 block->SetLoopInformation(nullptr);
330 }
331 }
332}
333
334// This is a modified version of HGraph::AnalyzeLoops.
335GraphAnalysisResult SuperblockCloner::AnalyzeLoopsLocally(ArenaBitVector* outer_loop_bb_set) {
336 // We iterate post order to ensure we visit inner loops before outer loops.
337 // `PopulateRecursive` needs this guarantee to know whether a natural loop
338 // contains an irreducible loop.
339 for (HBasicBlock* block : graph_->GetPostOrder()) {
340 if (!outer_loop_bb_set->IsBitSet(block->GetBlockId())) {
341 continue;
342 }
343 if (block->IsLoopHeader()) {
344 if (block->IsCatchBlock()) {
345 // TODO: Dealing with exceptional back edges could be tricky because
346 // they only approximate the real control flow. Bail out for now.
347 return kAnalysisFailThrowCatchLoop;
348 }
349 block->GetLoopInformation()->Populate();
350 }
351 }
352
353 for (HBasicBlock* block : graph_->GetPostOrder()) {
354 if (!outer_loop_bb_set->IsBitSet(block->GetBlockId())) {
355 continue;
356 }
357 if (block->IsLoopHeader()) {
358 HLoopInformation* cur_loop = block->GetLoopInformation();
359 HLoopInformation* outer_loop = cur_loop->GetPreHeader()->GetLoopInformation();
360 if (outer_loop != nullptr) {
361 outer_loop->PopulateInnerLoopUpwards(cur_loop);
362 }
363 }
364 }
365
366 return kAnalysisSuccess;
367}
368
369void SuperblockCloner::CleanUpControlFlow() {
370 // TODO: full control flow clean up for now, optimize it.
371 graph_->ClearDominanceInformation();
372
373 ArenaBitVector outer_loop_bb_set(
374 arena_, graph_->GetBlocks().size(), false, kArenaAllocSuperblockCloner);
375 RecalculateBackEdgesInfo(&outer_loop_bb_set);
376
377 // TODO: do it locally.
378 graph_->SimplifyCFG();
379 graph_->ComputeDominanceInformation();
380
381 // AnalyzeLoopsLocally requires a correct post-ordering information which was calculated just
382 // before in ComputeDominanceInformation.
383 GraphAnalysisResult result = AnalyzeLoopsLocally(&outer_loop_bb_set);
384 DCHECK_EQ(result, kAnalysisSuccess);
385
386 // TODO: do it locally
387 OrderLoopsHeadersPredecessors(graph_);
388
389 graph_->ComputeTryBlockInformation();
390}
391
392//
393// Helpers for ResolveDataFlow
394//
395
396void SuperblockCloner::ResolvePhi(HPhi* phi) {
397 HBasicBlock* phi_block = phi->GetBlock();
398 for (size_t i = 0, e = phi->InputCount(); i < e; i++) {
399 HInstruction* input = phi->InputAt(i);
400 HBasicBlock* input_block = input->GetBlock();
401
402 // Originally defined outside the region.
403 if (!IsInOrigBBSet(input_block)) {
404 continue;
405 }
406 HBasicBlock* corresponding_block = phi_block->GetPredecessors()[i];
407 if (!IsInOrigBBSet(corresponding_block)) {
408 phi->ReplaceInput(GetInstrCopy(input), i);
409 }
410 }
411}
412
413//
414// Main algorithm methods.
415//
416
417void SuperblockCloner::SearchForSubgraphExits(ArenaVector<HBasicBlock*>* exits) {
418 DCHECK(exits->empty());
419 for (uint32_t block_id : orig_bb_set_.Indexes()) {
420 HBasicBlock* block = GetBlockById(block_id);
421 for (HBasicBlock* succ : block->GetSuccessors()) {
422 if (!IsInOrigBBSet(succ)) {
423 exits->push_back(succ);
424 }
425 }
426 }
427}
428
429void SuperblockCloner::FindAndSetLocalAreaForAdjustments() {
430 DCHECK(outer_loop_ == nullptr);
431 ArenaVector<HBasicBlock*> exits(arena_->Adapter(kArenaAllocSuperblockCloner));
432 SearchForSubgraphExits(&exits);
433
434 // For a reducible graph we need to update back-edges and dominance information only for
435 // the outermost loop which is affected by the transformation - it can be found by picking
436 // the common most outer loop of loops to which the subgraph exits blocks belong.
437 // Note: it can a loop or the whole graph (outer_loop_ will be nullptr in this case).
438 for (HBasicBlock* exit : exits) {
439 HLoopInformation* loop_exit_loop_info = exit->GetLoopInformation();
440 if (loop_exit_loop_info == nullptr) {
441 outer_loop_ = nullptr;
442 break;
443 }
Artem Serov02eebcf2017-12-13 19:48:31 +0000444 if (outer_loop_ == nullptr) {
445 // We should not use the initial outer_loop_ value 'nullptr' when finding the most outer
446 // common loop.
447 outer_loop_ = loop_exit_loop_info;
448 }
Artem Serov7f4aff62017-06-21 17:02:18 +0100449 outer_loop_ = FindCommonLoop(outer_loop_, loop_exit_loop_info);
450 }
451
452 if (outer_loop_ != nullptr) {
453 // Save the loop population info as it will be changed later.
454 outer_loop_bb_set_.Copy(&outer_loop_->GetBlocks());
455 }
456}
457
458void SuperblockCloner::RemapEdgesSuccessors() {
459 // Redirect incoming edges.
460 for (HEdge e : *remap_incoming_) {
461 HBasicBlock* orig_block = GetBlockById(e.GetFrom());
462 HBasicBlock* orig_succ = GetBlockById(e.GetTo());
463 RemapOrigInternalOrIncomingEdge(orig_block, orig_succ);
464 }
465
466 // Redirect internal edges.
467 for (uint32_t orig_block_id : orig_bb_set_.Indexes()) {
468 HBasicBlock* orig_block = GetBlockById(orig_block_id);
469
470 for (HBasicBlock* orig_succ : orig_block->GetSuccessors()) {
471 uint32_t orig_succ_id = orig_succ->GetBlockId();
472
473 // Check for outgoing edge.
474 if (!IsInOrigBBSet(orig_succ)) {
475 HBasicBlock* copy_block = GetBlockCopy(orig_block);
476 copy_block->AddSuccessor(orig_succ);
477 continue;
478 }
479
480 auto orig_redir = remap_orig_internal_->Find(HEdge(orig_block_id, orig_succ_id));
481 auto copy_redir = remap_copy_internal_->Find(HEdge(orig_block_id, orig_succ_id));
482
483 // Due to construction all successors of copied block were set to original.
484 if (copy_redir != remap_copy_internal_->end()) {
485 RemapCopyInternalEdge(orig_block, orig_succ);
486 } else {
487 AddCopyInternalEdge(orig_block, orig_succ);
488 }
489
490 if (orig_redir != remap_orig_internal_->end()) {
491 RemapOrigInternalOrIncomingEdge(orig_block, orig_succ);
492 }
493 }
494 }
495}
496
497void SuperblockCloner::AdjustControlFlowInfo() {
498 ArenaBitVector outer_loop_bb_set(
499 arena_, graph_->GetBlocks().size(), false, kArenaAllocSuperblockCloner);
500 RecalculateBackEdgesInfo(&outer_loop_bb_set);
501
502 graph_->ClearDominanceInformation();
503 // TODO: Do it locally.
504 graph_->ComputeDominanceInformation();
505}
506
507// TODO: Current FastCase restriction guarantees that instructions' inputs are already mapped to
508// the valid values; only phis' inputs must be adjusted.
509void SuperblockCloner::ResolveDataFlow() {
510 for (auto entry : *bb_map_) {
511 HBasicBlock* orig_block = entry.first;
512
513 for (HInstructionIterator it(orig_block->GetPhis()); !it.Done(); it.Advance()) {
514 HPhi* orig_phi = it.Current()->AsPhi();
515 HPhi* copy_phi = GetInstrCopy(orig_phi)->AsPhi();
516 ResolvePhi(orig_phi);
517 ResolvePhi(copy_phi);
518 }
519 if (kIsDebugBuild) {
520 // Inputs of instruction copies must be already mapped to correspondent inputs copies.
521 for (HInstructionIterator it(orig_block->GetInstructions()); !it.Done(); it.Advance()) {
522 CheckInstructionInputsRemapping(it.Current());
523 }
524 }
525 }
526}
527
528//
529// Debug and logging methods.
530//
531
Artem Serov02eebcf2017-12-13 19:48:31 +0000532// Debug function to dump graph' BasicBlocks info.
533void DumpBB(HGraph* graph) {
534 for (HBasicBlock* bb : graph->GetBlocks()) {
535 if (bb == nullptr) {
536 continue;
537 }
538 std::cout << bb->GetBlockId();
539 std::cout << " <- ";
540 for (HBasicBlock* pred : bb->GetPredecessors()) {
541 std::cout << pred->GetBlockId() << " ";
542 }
543 std::cout << " -> ";
544 for (HBasicBlock* succ : bb->GetSuccessors()) {
545 std::cout << succ->GetBlockId() << " ";
546 }
547
548 if (bb->GetDominator()) {
549 std::cout << " dom " << bb->GetDominator()->GetBlockId();
550 }
551
552 if (bb->GetLoopInformation()) {
553 std::cout << "\tloop: " << bb->GetLoopInformation()->GetHeader()->GetBlockId();
554 }
555
556 std::cout << std::endl;
557 }
558}
559
Artem Serov7f4aff62017-06-21 17:02:18 +0100560void SuperblockCloner::CheckInstructionInputsRemapping(HInstruction* orig_instr) {
561 DCHECK(!orig_instr->IsPhi());
562 HInstruction* copy_instr = GetInstrCopy(orig_instr);
563 for (size_t i = 0, e = orig_instr->InputCount(); i < e; i++) {
564 HInstruction* orig_input = orig_instr->InputAt(i);
565 DCHECK(orig_input->GetBlock()->Dominates(orig_instr->GetBlock()));
566
567 // If original input is defined outside the region then it will remain for both original
568 // instruction and the copy after the transformation.
569 if (!IsInOrigBBSet(orig_input->GetBlock())) {
570 continue;
571 }
572 HInstruction* copy_input = GetInstrCopy(orig_input);
573 DCHECK(copy_input->GetBlock()->Dominates(copy_instr->GetBlock()));
574 }
575
576 // Resolve environment.
577 if (orig_instr->HasEnvironment()) {
578 HEnvironment* orig_env = orig_instr->GetEnvironment();
579
580 for (size_t i = 0, e = orig_env->Size(); i < e; ++i) {
581 HInstruction* orig_input = orig_env->GetInstructionAt(i);
582
583 // If original input is defined outside the region then it will remain for both original
584 // instruction and the copy after the transformation.
585 if (orig_input == nullptr || !IsInOrigBBSet(orig_input->GetBlock())) {
586 continue;
587 }
588
589 HInstruction* copy_input = GetInstrCopy(orig_input);
590 DCHECK(copy_input->GetBlock()->Dominates(copy_instr->GetBlock()));
591 }
592 }
593}
594
Artem Serov02eebcf2017-12-13 19:48:31 +0000595bool SuperblockCloner::CheckRemappingInfoIsValid() {
596 for (HEdge edge : *remap_orig_internal_) {
597 if (!IsEdgeValid(edge, graph_) ||
598 !IsInOrigBBSet(edge.GetFrom()) ||
599 !IsInOrigBBSet(edge.GetTo())) {
600 return false;
601 }
602 }
603
604 for (auto edge : *remap_copy_internal_) {
605 if (!IsEdgeValid(edge, graph_) ||
606 !IsInOrigBBSet(edge.GetFrom()) ||
607 !IsInOrigBBSet(edge.GetTo())) {
608 return false;
609 }
610 }
611
612 for (auto edge : *remap_incoming_) {
613 if (!IsEdgeValid(edge, graph_) ||
614 IsInOrigBBSet(edge.GetFrom()) ||
615 !IsInOrigBBSet(edge.GetTo())) {
616 return false;
617 }
618 }
619
620 return true;
621}
622
623void SuperblockCloner::VerifyGraph() {
624 for (auto it : *hir_map_) {
625 HInstruction* orig_instr = it.first;
626 HInstruction* copy_instr = it.second;
627 if (!orig_instr->IsPhi() && !orig_instr->IsSuspendCheck()) {
628 DCHECK(it.first->GetBlock() != nullptr);
629 }
630 if (!copy_instr->IsPhi() && !copy_instr->IsSuspendCheck()) {
631 DCHECK(it.second->GetBlock() != nullptr);
632 }
633 }
634
635 GraphChecker checker(graph_);
636 checker.Run();
637 if (!checker.IsValid()) {
638 for (const std::string& error : checker.GetErrors()) {
639 std::cout << error << std::endl;
640 }
641 LOG(FATAL) << "GraphChecker failed: superblock cloner\n";
642 }
643}
644
645void DumpBBSet(const ArenaBitVector* set) {
646 for (uint32_t idx : set->Indexes()) {
647 std::cout << idx << "\n";
648 }
649}
650
651void SuperblockCloner::DumpInputSets() {
652 std::cout << graph_->PrettyMethod() << "\n";
653 std::cout << "orig_bb_set:\n";
654 for (uint32_t idx : orig_bb_set_.Indexes()) {
655 std::cout << idx << "\n";
656 }
657 std::cout << "remap_orig_internal:\n";
658 for (HEdge e : *remap_orig_internal_) {
659 std::cout << e << "\n";
660 }
661 std::cout << "remap_copy_internal:\n";
662 for (auto e : *remap_copy_internal_) {
663 std::cout << e << "\n";
664 }
665 std::cout << "remap_incoming:\n";
666 for (auto e : *remap_incoming_) {
667 std::cout << e << "\n";
668 }
669}
670
Artem Serov7f4aff62017-06-21 17:02:18 +0100671//
672// Public methods.
673//
674
675SuperblockCloner::SuperblockCloner(HGraph* graph,
676 const HBasicBlockSet* orig_bb_set,
677 HBasicBlockMap* bb_map,
678 HInstructionMap* hir_map)
679 : graph_(graph),
680 arena_(graph->GetAllocator()),
681 orig_bb_set_(arena_, orig_bb_set->GetSizeOf(), true, kArenaAllocSuperblockCloner),
682 remap_orig_internal_(nullptr),
683 remap_copy_internal_(nullptr),
684 remap_incoming_(nullptr),
685 bb_map_(bb_map),
686 hir_map_(hir_map),
687 outer_loop_(nullptr),
688 outer_loop_bb_set_(arena_, orig_bb_set->GetSizeOf(), true, kArenaAllocSuperblockCloner) {
689 orig_bb_set_.Copy(orig_bb_set);
690}
691
692void SuperblockCloner::SetSuccessorRemappingInfo(const HEdgeSet* remap_orig_internal,
693 const HEdgeSet* remap_copy_internal,
694 const HEdgeSet* remap_incoming) {
695 remap_orig_internal_ = remap_orig_internal;
696 remap_copy_internal_ = remap_copy_internal;
697 remap_incoming_ = remap_incoming;
Artem Serov02eebcf2017-12-13 19:48:31 +0000698 DCHECK(CheckRemappingInfoIsValid());
Artem Serov7f4aff62017-06-21 17:02:18 +0100699}
700
701bool SuperblockCloner::IsSubgraphClonable() const {
702 // TODO: Support irreducible graphs and graphs with try-catch.
703 if (graph_->HasIrreducibleLoops() || graph_->HasTryCatch()) {
704 return false;
705 }
706
707 // Check that there are no instructions defined in the subgraph and used outside.
708 // TODO: Improve this by accepting graph with such uses but only one exit.
709 for (uint32_t idx : orig_bb_set_.Indexes()) {
710 HBasicBlock* block = GetBlockById(idx);
711
712 for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
713 HInstruction* instr = it.Current();
714 if (!instr->IsClonable() ||
715 IsUsedOutsideRegion(instr, orig_bb_set_)) {
716 return false;
717 }
718 }
719
720 for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
721 HInstruction* instr = it.Current();
722 if (!instr->IsClonable() ||
723 IsUsedOutsideRegion(instr, orig_bb_set_)) {
724 return false;
725 }
726 }
727 }
728
729 return true;
730}
731
Artem Serov02eebcf2017-12-13 19:48:31 +0000732bool SuperblockCloner::IsFastCase() const {
733 // Check that loop unrolling/loop peeling is being conducted.
734 // Check that all the basic blocks belong to the same loop.
735 bool flag = false;
736 HLoopInformation* common_loop_info = nullptr;
737 for (uint32_t idx : orig_bb_set_.Indexes()) {
738 HBasicBlock* block = GetBlockById(idx);
739 HLoopInformation* block_loop_info = block->GetLoopInformation();
740 if (!flag) {
741 common_loop_info = block_loop_info;
742 } else {
743 if (block_loop_info != common_loop_info) {
744 return false;
745 }
746 }
747 }
748
749 // Check that orig_bb_set_ corresponds to loop peeling/unrolling.
750 if (common_loop_info == nullptr || !orig_bb_set_.SameBitsSet(&common_loop_info->GetBlocks())) {
751 return false;
752 }
753
754 bool peeling_or_unrolling = false;
755 HEdgeSet remap_orig_internal(graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
756 HEdgeSet remap_copy_internal(graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
757 HEdgeSet remap_incoming(graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
758
759
760 // Check whether remapping info corresponds to loop unrolling.
761 CollectRemappingInfoForPeelUnroll(/* to_unroll*/ true,
762 common_loop_info,
763 &remap_orig_internal,
764 &remap_copy_internal,
765 &remap_incoming);
766
767 peeling_or_unrolling |= EdgeHashSetsEqual(&remap_orig_internal, remap_orig_internal_) &&
768 EdgeHashSetsEqual(&remap_copy_internal, remap_copy_internal_) &&
769 EdgeHashSetsEqual(&remap_incoming, remap_incoming_);
770
771 remap_orig_internal.Clear();
772 remap_copy_internal.Clear();
773 remap_incoming.Clear();
774
775 // Check whether remapping info corresponds to loop peeling.
776 CollectRemappingInfoForPeelUnroll(/* to_unroll*/ false,
777 common_loop_info,
778 &remap_orig_internal,
779 &remap_copy_internal,
780 &remap_incoming);
781
782 peeling_or_unrolling |= EdgeHashSetsEqual(&remap_orig_internal, remap_orig_internal_) &&
783 EdgeHashSetsEqual(&remap_copy_internal, remap_copy_internal_) &&
784 EdgeHashSetsEqual(&remap_incoming, remap_incoming_);
785
786 return peeling_or_unrolling;
787}
788
Artem Serov7f4aff62017-06-21 17:02:18 +0100789void SuperblockCloner::Run() {
790 DCHECK(bb_map_ != nullptr);
791 DCHECK(hir_map_ != nullptr);
792 DCHECK(remap_orig_internal_ != nullptr &&
793 remap_copy_internal_ != nullptr &&
794 remap_incoming_ != nullptr);
795 DCHECK(IsSubgraphClonable());
Artem Serov02eebcf2017-12-13 19:48:31 +0000796 DCHECK(IsFastCase());
797
798 if (kSuperblockClonerLogging) {
799 DumpInputSets();
800 }
Artem Serov7f4aff62017-06-21 17:02:18 +0100801
802 // Find an area in the graph for which control flow information should be adjusted.
803 FindAndSetLocalAreaForAdjustments();
804 // Clone the basic blocks from the orig_bb_set_; data flow is invalid after the call and is to be
805 // adjusted.
806 CloneBasicBlocks();
807 // Connect the blocks together/remap successors and fix phis which are directly affected my the
808 // remapping.
809 RemapEdgesSuccessors();
Artem Serov02eebcf2017-12-13 19:48:31 +0000810
811 // Check that the subgraph is connected.
812 if (kIsDebugBuild) {
813 HBasicBlockSet work_set(arena_, orig_bb_set_.GetSizeOf(), true, kArenaAllocSuperblockCloner);
814
815 // Add original and copy blocks of the subgraph to the work set.
816 for (auto iter : *bb_map_) {
817 work_set.SetBit(iter.first->GetBlockId()); // Original block.
818 work_set.SetBit(iter.second->GetBlockId()); // Copy block.
819 }
820 CHECK(IsSubgraphConnected(&work_set, graph_));
821 }
822
Artem Serov7f4aff62017-06-21 17:02:18 +0100823 // Recalculate dominance and backedge information which is required by the next stage.
824 AdjustControlFlowInfo();
825 // Fix data flow of the graph.
826 ResolveDataFlow();
827}
828
829void SuperblockCloner::CleanUp() {
830 CleanUpControlFlow();
831
832 // Remove phis which have all inputs being same.
833 // When a block has a single predecessor it must not have any phis. However after the
834 // transformation it could happen that there is such block with a phi with a single input.
835 // As this is needed to be processed we also simplify phis with multiple same inputs here.
836 for (auto entry : *bb_map_) {
837 HBasicBlock* orig_block = entry.first;
838 for (HInstructionIterator inst_it(orig_block->GetPhis()); !inst_it.Done(); inst_it.Advance()) {
839 HPhi* phi = inst_it.Current()->AsPhi();
840 if (ArePhiInputsTheSame(phi)) {
841 phi->ReplaceWith(phi->InputAt(0));
842 orig_block->RemovePhi(phi);
843 }
844 }
845
846 HBasicBlock* copy_block = GetBlockCopy(orig_block);
847 for (HInstructionIterator inst_it(copy_block->GetPhis()); !inst_it.Done(); inst_it.Advance()) {
848 HPhi* phi = inst_it.Current()->AsPhi();
849 if (ArePhiInputsTheSame(phi)) {
850 phi->ReplaceWith(phi->InputAt(0));
851 copy_block->RemovePhi(phi);
852 }
853 }
854 }
Artem Serov02eebcf2017-12-13 19:48:31 +0000855
Artem Serov121f2032017-10-23 19:19:06 +0100856 if (kIsDebugBuild) {
Artem Serov02eebcf2017-12-13 19:48:31 +0000857 VerifyGraph();
858 }
Artem Serov7f4aff62017-06-21 17:02:18 +0100859}
860
861HBasicBlock* SuperblockCloner::CloneBasicBlock(const HBasicBlock* orig_block) {
862 HGraph* graph = orig_block->GetGraph();
863 HBasicBlock* copy_block = new (arena_) HBasicBlock(graph, orig_block->GetDexPc());
864 graph->AddBlock(copy_block);
865
866 // Clone all the phis and add them to the map.
867 for (HInstructionIterator it(orig_block->GetPhis()); !it.Done(); it.Advance()) {
868 HInstruction* orig_instr = it.Current();
869 HInstruction* copy_instr = orig_instr->Clone(arena_);
870 copy_block->AddPhi(copy_instr->AsPhi());
871 copy_instr->AsPhi()->RemoveAllInputs();
872 DCHECK(!orig_instr->HasEnvironment());
873 hir_map_->Put(orig_instr, copy_instr);
874 }
875
876 // Clone all the instructions and add them to the map.
877 for (HInstructionIterator it(orig_block->GetInstructions()); !it.Done(); it.Advance()) {
878 HInstruction* orig_instr = it.Current();
879 HInstruction* copy_instr = orig_instr->Clone(arena_);
880 ReplaceInputsWithCopies(copy_instr);
881 copy_block->AddInstruction(copy_instr);
882 if (orig_instr->HasEnvironment()) {
883 DeepCloneEnvironmentWithRemapping(copy_instr, orig_instr->GetEnvironment());
884 }
885 hir_map_->Put(orig_instr, copy_instr);
886 }
887
888 return copy_block;
889}
890
891void SuperblockCloner::CloneBasicBlocks() {
892 // By this time ReversePostOrder must be valid: in 'CloneBasicBlock' inputs of the copied
893 // instructions might be replaced by copies of the original inputs (depending where those inputs
894 // are defined). So the definitions of the original inputs must be visited before their original
895 // uses. The property of the reducible graphs "if 'A' dom 'B' then rpo_num('A') >= rpo_num('B')"
896 // guarantees that.
897 for (HBasicBlock* orig_block : graph_->GetReversePostOrder()) {
898 if (!IsInOrigBBSet(orig_block)) {
899 continue;
900 }
901 HBasicBlock* copy_block = CloneBasicBlock(orig_block);
902 bb_map_->Put(orig_block, copy_block);
903 if (kSuperblockClonerLogging) {
904 std::cout << "new block :" << copy_block->GetBlockId() << ": " << orig_block->GetBlockId() <<
905 std::endl;
906 }
907 }
908}
909
Artem Serov02eebcf2017-12-13 19:48:31 +0000910//
911// Stand-alone methods.
912//
913
914void CollectRemappingInfoForPeelUnroll(bool to_unroll,
915 HLoopInformation* loop_info,
916 HEdgeSet* remap_orig_internal,
917 HEdgeSet* remap_copy_internal,
918 HEdgeSet* remap_incoming) {
919 DCHECK(loop_info != nullptr);
920 HBasicBlock* loop_header = loop_info->GetHeader();
921 // Set up remap_orig_internal edges set - set is empty.
922 // Set up remap_copy_internal edges set.
923 for (HBasicBlock* back_edge_block : loop_info->GetBackEdges()) {
924 HEdge e = HEdge(back_edge_block, loop_header);
925 if (to_unroll) {
926 remap_orig_internal->Insert(e);
927 remap_copy_internal->Insert(e);
928 } else {
929 if (kPeelUnrollPreserveHeader) {
930 remap_copy_internal->Insert(e);
931 } else {
932 remap_orig_internal->Insert(e);
933 }
934 }
935 }
936
937 // Set up remap_incoming edges set.
938 if (to_unroll != kPeelUnrollPreserveHeader) {
939 remap_incoming->Insert(HEdge(loop_info->GetPreHeader(), loop_header));
940 }
941}
942
943bool IsSubgraphConnected(SuperblockCloner::HBasicBlockSet* work_set, HGraph* graph) {
944 ArenaVector<HBasicBlock*> entry_blocks(
945 graph->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
946
947 // Find subgraph entry blocks.
948 for (uint32_t orig_block_id : work_set->Indexes()) {
949 HBasicBlock* block = graph->GetBlocks()[orig_block_id];
950 for (HBasicBlock* pred : block->GetPredecessors()) {
951 if (!work_set->IsBitSet(pred->GetBlockId())) {
952 entry_blocks.push_back(block);
953 break;
954 }
955 }
956 }
957
958 for (HBasicBlock* entry_block : entry_blocks) {
959 if (work_set->IsBitSet(entry_block->GetBlockId())) {
960 TraverseSubgraphForConnectivity(entry_block, work_set);
961 }
962 }
963
964 // Return whether there are unvisited - unreachable - blocks.
965 return work_set->NumSetBits() == 0;
966}
967
968HLoopInformation* FindCommonLoop(HLoopInformation* loop1, HLoopInformation* loop2) {
969 if (loop1 == nullptr || loop2 == nullptr) {
970 return nullptr;
971 }
972
973 if (loop1->IsIn(*loop2)) {
974 return loop2;
975 }
976
977 HLoopInformation* current = loop1;
978 while (current != nullptr && !loop2->IsIn(*current)) {
979 current = current->GetPreHeader()->GetLoopInformation();
980 }
981
982 return current;
983}
984
985bool PeelUnrollHelper::IsLoopClonable(HLoopInformation* loop_info) {
986 PeelUnrollHelper helper(loop_info, nullptr, nullptr);
987 return helper.IsLoopClonable();
988}
989
990HBasicBlock* PeelUnrollHelper::DoPeelUnrollImpl(bool to_unroll) {
991 // For now do peeling only for natural loops.
992 DCHECK(!loop_info_->IsIrreducible());
993
994 HBasicBlock* loop_header = loop_info_->GetHeader();
995 HGraph* graph = loop_header->GetGraph();
996 ArenaAllocator allocator(graph->GetAllocator()->GetArenaPool());
997
998 HEdgeSet remap_orig_internal(graph->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
999 HEdgeSet remap_copy_internal(graph->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
1000 HEdgeSet remap_incoming(graph->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
1001
1002 CollectRemappingInfoForPeelUnroll(to_unroll,
1003 loop_info_,
1004 &remap_orig_internal,
1005 &remap_copy_internal,
1006 &remap_incoming);
1007
1008 cloner_.SetSuccessorRemappingInfo(&remap_orig_internal, &remap_copy_internal, &remap_incoming);
1009 cloner_.Run();
1010 cloner_.CleanUp();
1011
1012 return kPeelUnrollPreserveHeader ? loop_header : cloner_.GetBlockCopy(loop_header);
1013}
1014
1015PeelUnrollSimpleHelper::PeelUnrollSimpleHelper(HLoopInformation* info)
1016 : bb_map_(std::less<HBasicBlock*>(),
1017 info->GetHeader()->GetGraph()->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)),
1018 hir_map_(std::less<HInstruction*>(),
1019 info->GetHeader()->GetGraph()->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)),
1020 helper_(info, &bb_map_, &hir_map_) {}
1021
Artem Serov7f4aff62017-06-21 17:02:18 +01001022} // namespace art
Artem Serov02eebcf2017-12-13 19:48:31 +00001023
1024namespace std {
1025
1026ostream& operator<<(ostream& os, const art::HEdge& e) {
1027 e.Dump(os);
1028 return os;
1029}
1030
1031} // namespace std