blob: d8f50aab2889e72243ad3393c0d644de53c932d0 [file] [log] [blame]
Aart Bik281c6812016-08-26 11:31:48 -07001/*
2 * Copyright (C) 2016 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef ART_COMPILER_OPTIMIZING_LOOP_OPTIMIZATION_H_
18#define ART_COMPILER_OPTIMIZING_LOOP_OPTIMIZATION_H_
19
Aart Bik281c6812016-08-26 11:31:48 -070020#include "induction_var_range.h"
21#include "nodes.h"
22#include "optimization.h"
23
24namespace art {
25
Aart Bik92685a82017-03-06 11:13:43 -080026class CompilerDriver;
27
Aart Bik281c6812016-08-26 11:31:48 -070028/**
29 * Loop optimizations. Builds a loop hierarchy and applies optimizations to
Aart Bikf8f5a162017-02-06 15:35:29 -080030 * the detected nested loops, such as removal of dead induction and empty loops
31 * and inner loop vectorization.
Aart Bik281c6812016-08-26 11:31:48 -070032 */
33class HLoopOptimization : public HOptimization {
34 public:
Aart Bik92685a82017-03-06 11:13:43 -080035 HLoopOptimization(HGraph* graph,
36 CompilerDriver* compiler_driver,
37 HInductionVarAnalysis* induction_analysis);
Aart Bik281c6812016-08-26 11:31:48 -070038
39 void Run() OVERRIDE;
40
41 static constexpr const char* kLoopOptimizationPassName = "loop_optimization";
42
43 private:
44 /**
45 * A single loop inside the loop hierarchy representation.
46 */
Aart Bik96202302016-10-04 17:33:56 -070047 struct LoopNode : public ArenaObject<kArenaAllocLoopOptimization> {
Aart Bik281c6812016-08-26 11:31:48 -070048 explicit LoopNode(HLoopInformation* lp_info)
49 : loop_info(lp_info),
50 outer(nullptr),
51 inner(nullptr),
52 previous(nullptr),
53 next(nullptr) {}
Aart Bikf8f5a162017-02-06 15:35:29 -080054 HLoopInformation* loop_info;
Aart Bik281c6812016-08-26 11:31:48 -070055 LoopNode* outer;
56 LoopNode* inner;
57 LoopNode* previous;
58 LoopNode* next;
59 };
60
Aart Bikf8f5a162017-02-06 15:35:29 -080061 /*
62 * Vectorization restrictions (bit mask).
63 */
64 enum VectorRestrictions {
65 kNone = 0, // no restrictions
66 kNoMul = 1, // no multiplication
67 kNoDiv = 2, // no division
68 kNoShift = 4, // no shift
69 kNoShr = 8, // no arithmetic shift right
70 kNoHiBits = 16, // "wider" operations cannot bring in higher order bits
Aart Bik6daebeb2017-04-03 14:35:41 -070071 kNoAbs = 32, // no absolute value
Aart Bikf8f5a162017-02-06 15:35:29 -080072 };
Aart Bik96202302016-10-04 17:33:56 -070073
Aart Bikf8f5a162017-02-06 15:35:29 -080074 /*
75 * Vectorization mode during synthesis
76 * (sequential peeling/cleanup loop or vector loop).
77 */
78 enum VectorMode {
79 kSequential,
80 kVector
81 };
82
83 /*
84 * Representation of a unit-stride array reference.
85 */
86 struct ArrayReference {
87 ArrayReference(HInstruction* b, HInstruction* o, Primitive::Type t, bool l)
88 : base(b), offset(o), type(t), lhs(l) { }
89 bool operator<(const ArrayReference& other) const {
90 return
91 (base < other.base) ||
92 (base == other.base &&
93 (offset < other.offset || (offset == other.offset &&
94 (type < other.type ||
95 (type == other.type && lhs < other.lhs)))));
96 }
97 HInstruction* base; // base address
98 HInstruction* offset; // offset + i
99 Primitive::Type type; // component type
100 bool lhs; // def/use
101 };
102
103 // Loop setup and traversal.
104 void LocalRun();
Aart Bik281c6812016-08-26 11:31:48 -0700105 void AddLoop(HLoopInformation* loop_info);
106 void RemoveLoop(LoopNode* node);
Aart Bik281c6812016-08-26 11:31:48 -0700107 void TraverseLoopsInnerToOuter(LoopNode* node);
108
Aart Bikf8f5a162017-02-06 15:35:29 -0800109 // Optimization.
Aart Bik281c6812016-08-26 11:31:48 -0700110 void SimplifyInduction(LoopNode* node);
Aart Bik482095d2016-10-10 15:39:10 -0700111 void SimplifyBlocks(LoopNode* node);
Aart Bikf8f5a162017-02-06 15:35:29 -0800112 void OptimizeInnerLoop(LoopNode* node);
113
114 // Vectorization analysis and synthesis.
115 bool CanVectorize(LoopNode* node, HBasicBlock* block, int64_t trip_count);
116 void Vectorize(LoopNode* node, HBasicBlock* block, HBasicBlock* exit, int64_t trip_count);
117 void GenerateNewLoop(LoopNode* node,
118 HBasicBlock* block,
119 HBasicBlock* new_preheader,
120 HInstruction* lo,
121 HInstruction* hi,
122 HInstruction* step);
123 bool VectorizeDef(LoopNode* node, HInstruction* instruction, bool generate_code);
124 bool VectorizeUse(LoopNode* node,
125 HInstruction* instruction,
126 bool generate_code,
127 Primitive::Type type,
128 uint64_t restrictions);
129 bool TrySetVectorType(Primitive::Type type, /*out*/ uint64_t* restrictions);
130 bool TrySetVectorLength(uint32_t length);
131 void GenerateVecInv(HInstruction* org, Primitive::Type type);
132 void GenerateVecSub(HInstruction* org, HInstruction* off);
133 void GenerateVecMem(HInstruction* org,
134 HInstruction* opa,
135 HInstruction* opb,
136 Primitive::Type type);
137 void GenerateVecOp(HInstruction* org, HInstruction* opa, HInstruction* opb, Primitive::Type type);
Aart Bik281c6812016-08-26 11:31:48 -0700138
Aart Bik6b69e0a2017-01-11 10:20:43 -0800139 // Helpers.
Aart Bikf8f5a162017-02-06 15:35:29 -0800140 bool TrySetPhiInduction(HPhi* phi, bool restrict_uses);
141 bool TrySetSimpleLoopHeader(HBasicBlock* block);
Aart Bikcc42be02016-10-20 16:14:16 -0700142 bool IsEmptyBody(HBasicBlock* block);
Aart Bik482095d2016-10-10 15:39:10 -0700143 bool IsOnlyUsedAfterLoop(HLoopInformation* loop_info,
Aart Bik8c4a8542016-10-06 11:36:57 -0700144 HInstruction* instruction,
Aart Bik6b69e0a2017-01-11 10:20:43 -0800145 bool collect_loop_uses,
Aart Bik8c4a8542016-10-06 11:36:57 -0700146 /*out*/ int32_t* use_count);
Aart Bikf8f5a162017-02-06 15:35:29 -0800147 bool IsUsedOutsideLoop(HLoopInformation* loop_info,
148 HInstruction* instruction);
Aart Bik807868e2016-11-03 17:51:43 -0700149 bool TryReplaceWithLastValue(HInstruction* instruction, HBasicBlock* block);
Aart Bikf8f5a162017-02-06 15:35:29 -0800150 bool TryAssignLastValue(HLoopInformation* loop_info,
151 HInstruction* instruction,
152 HBasicBlock* block,
153 bool collect_loop_uses);
Aart Bik6b69e0a2017-01-11 10:20:43 -0800154 void RemoveDeadInstructions(const HInstructionList& list);
Aart Bik281c6812016-08-26 11:31:48 -0700155
Aart Bik92685a82017-03-06 11:13:43 -0800156 // Compiler driver (to query ISA features).
157 const CompilerDriver* compiler_driver_;
158
Aart Bik96202302016-10-04 17:33:56 -0700159 // Range information based on prior induction variable analysis.
Aart Bik281c6812016-08-26 11:31:48 -0700160 InductionVarRange induction_range_;
161
162 // Phase-local heap memory allocator for the loop optimizer. Storage obtained
Aart Bik96202302016-10-04 17:33:56 -0700163 // through this allocator is immediately released when the loop optimizer is done.
Nicolas Geoffrayebe16742016-10-05 09:55:42 +0100164 ArenaAllocator* loop_allocator_;
Aart Bik281c6812016-08-26 11:31:48 -0700165
Aart Bikf8f5a162017-02-06 15:35:29 -0800166 // Global heap memory allocator. Used to build HIR.
167 ArenaAllocator* global_allocator_;
168
Aart Bik96202302016-10-04 17:33:56 -0700169 // Entries into the loop hierarchy representation. The hierarchy resides
170 // in phase-local heap memory.
Aart Bik281c6812016-08-26 11:31:48 -0700171 LoopNode* top_loop_;
172 LoopNode* last_loop_;
173
Aart Bik8c4a8542016-10-06 11:36:57 -0700174 // Temporary bookkeeping of a set of instructions.
175 // Contents reside in phase-local heap memory.
176 ArenaSet<HInstruction*>* iset_;
177
Aart Bik482095d2016-10-10 15:39:10 -0700178 // Counter that tracks how many induction cycles have been simplified. Useful
179 // to trigger incremental updates of induction variable analysis of outer loops
180 // when the induction of inner loops has changed.
Aart Bikf8f5a162017-02-06 15:35:29 -0800181 uint32_t induction_simplication_count_;
Aart Bik482095d2016-10-10 15:39:10 -0700182
Aart Bikdf7822e2016-12-06 10:05:30 -0800183 // Flag that tracks if any simplifications have occurred.
184 bool simplified_;
185
Aart Bikf8f5a162017-02-06 15:35:29 -0800186 // Number of "lanes" for selected packed type.
187 uint32_t vector_length_;
188
189 // Set of array references in the vector loop.
190 // Contents reside in phase-local heap memory.
191 ArenaSet<ArrayReference>* vector_refs_;
192
193 // Mapping used during vectorization synthesis for both the scalar peeling/cleanup
194 // loop (simd_ is false) and the actual vector loop (simd_ is true). The data
195 // structure maps original instructions into the new instructions.
196 // Contents reside in phase-local heap memory.
197 ArenaSafeMap<HInstruction*, HInstruction*>* vector_map_;
198
199 // Temporary vectorization bookkeeping.
200 HBasicBlock* vector_preheader_; // preheader of the new loop
201 HBasicBlock* vector_header_; // header of the new loop
202 HBasicBlock* vector_body_; // body of the new loop
203 HInstruction* vector_runtime_test_a_;
204 HInstruction* vector_runtime_test_b_; // defines a != b runtime test
205 HPhi* vector_phi_; // the Phi representing the normalized loop index
206 VectorMode vector_mode_; // selects synthesis mode
207
Aart Bik281c6812016-08-26 11:31:48 -0700208 friend class LoopOptimizationTest;
209
210 DISALLOW_COPY_AND_ASSIGN(HLoopOptimization);
211};
212
213} // namespace art
214
215#endif // ART_COMPILER_OPTIMIZING_LOOP_OPTIMIZATION_H_