blob: 9414e5a0c6a8f333f56ea1a08b5e4d5254e9eab2 [file] [log] [blame]
Aart Bik281c6812016-08-26 11:31:48 -07001/*
2 * Copyright (C) 2016 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef ART_COMPILER_OPTIMIZING_LOOP_OPTIMIZATION_H_
18#define ART_COMPILER_OPTIMIZING_LOOP_OPTIMIZATION_H_
19
Vladimir Markoca6fff82017-10-03 14:49:14 +010020#include "base/scoped_arena_allocator.h"
21#include "base/scoped_arena_containers.h"
Aart Bik281c6812016-08-26 11:31:48 -070022#include "induction_var_range.h"
23#include "nodes.h"
24#include "optimization.h"
25
26namespace art {
27
Aart Bik92685a82017-03-06 11:13:43 -080028class CompilerDriver;
29
Aart Bik281c6812016-08-26 11:31:48 -070030/**
31 * Loop optimizations. Builds a loop hierarchy and applies optimizations to
Aart Bikf8f5a162017-02-06 15:35:29 -080032 * the detected nested loops, such as removal of dead induction and empty loops
33 * and inner loop vectorization.
Aart Bik281c6812016-08-26 11:31:48 -070034 */
35class HLoopOptimization : public HOptimization {
36 public:
Aart Bik92685a82017-03-06 11:13:43 -080037 HLoopOptimization(HGraph* graph,
38 CompilerDriver* compiler_driver,
Aart Bikb92cc332017-09-06 15:53:17 -070039 HInductionVarAnalysis* induction_analysis,
Aart Bik2ca10eb2017-11-15 15:17:53 -080040 OptimizingCompilerStats* stats,
41 const char* name = kLoopOptimizationPassName);
Aart Bik281c6812016-08-26 11:31:48 -070042
43 void Run() OVERRIDE;
44
45 static constexpr const char* kLoopOptimizationPassName = "loop_optimization";
46
47 private:
48 /**
49 * A single loop inside the loop hierarchy representation.
50 */
Aart Bik96202302016-10-04 17:33:56 -070051 struct LoopNode : public ArenaObject<kArenaAllocLoopOptimization> {
Aart Bik281c6812016-08-26 11:31:48 -070052 explicit LoopNode(HLoopInformation* lp_info)
53 : loop_info(lp_info),
54 outer(nullptr),
55 inner(nullptr),
56 previous(nullptr),
57 next(nullptr) {}
Aart Bikf8f5a162017-02-06 15:35:29 -080058 HLoopInformation* loop_info;
Aart Bik281c6812016-08-26 11:31:48 -070059 LoopNode* outer;
60 LoopNode* inner;
61 LoopNode* previous;
62 LoopNode* next;
63 };
64
Aart Bikf8f5a162017-02-06 15:35:29 -080065 /*
66 * Vectorization restrictions (bit mask).
67 */
68 enum VectorRestrictions {
Aart Bik0148de42017-09-05 09:25:01 -070069 kNone = 0, // no restrictions
70 kNoMul = 1 << 0, // no multiplication
71 kNoDiv = 1 << 1, // no division
72 kNoShift = 1 << 2, // no shift
73 kNoShr = 1 << 3, // no arithmetic shift right
74 kNoHiBits = 1 << 4, // "wider" operations cannot bring in higher order bits
75 kNoSignedHAdd = 1 << 5, // no signed halving add
76 kNoUnroundedHAdd = 1 << 6, // no unrounded halving add
77 kNoAbs = 1 << 7, // no absolute value
78 kNoMinMax = 1 << 8, // no min/max
79 kNoStringCharAt = 1 << 9, // no StringCharAt
80 kNoReduction = 1 << 10, // no reduction
Aart Bikdbbac8f2017-09-01 13:06:08 -070081 kNoSAD = 1 << 11, // no sum of absolute differences (SAD)
Artem Serov6e9b1372017-10-05 16:48:30 +010082 kNoWideSAD = 1 << 12, // no sum of absolute differences (SAD) with operand widening
Aart Bik29aa0822018-03-08 11:28:00 -080083 kNoSaturation = 1 << 13, // no saturation arithmetic
Aart Bikf8f5a162017-02-06 15:35:29 -080084 };
Aart Bik96202302016-10-04 17:33:56 -070085
Aart Bikf8f5a162017-02-06 15:35:29 -080086 /*
87 * Vectorization mode during synthesis
88 * (sequential peeling/cleanup loop or vector loop).
89 */
90 enum VectorMode {
91 kSequential,
92 kVector
93 };
94
95 /*
96 * Representation of a unit-stride array reference.
97 */
98 struct ArrayReference {
Aart Bik38a3f212017-10-20 17:02:21 -070099 ArrayReference(HInstruction* b, HInstruction* o, DataType::Type t, bool l, bool c = false)
100 : base(b), offset(o), type(t), lhs(l), is_string_char_at(c) { }
Aart Bikf8f5a162017-02-06 15:35:29 -0800101 bool operator<(const ArrayReference& other) const {
102 return
103 (base < other.base) ||
104 (base == other.base &&
105 (offset < other.offset || (offset == other.offset &&
106 (type < other.type ||
Aart Bik38a3f212017-10-20 17:02:21 -0700107 (type == other.type &&
108 (lhs < other.lhs ||
109 (lhs == other.lhs &&
110 is_string_char_at < other.is_string_char_at)))))));
Aart Bikf8f5a162017-02-06 15:35:29 -0800111 }
Aart Bik38a3f212017-10-20 17:02:21 -0700112 HInstruction* base; // base address
113 HInstruction* offset; // offset + i
114 DataType::Type type; // component type
115 bool lhs; // def/use
116 bool is_string_char_at; // compressed string read
Aart Bikf8f5a162017-02-06 15:35:29 -0800117 };
118
Aart Bikb29f6842017-07-28 15:58:41 -0700119 //
Aart Bikf8f5a162017-02-06 15:35:29 -0800120 // Loop setup and traversal.
Aart Bikb29f6842017-07-28 15:58:41 -0700121 //
122
Aart Bikf8f5a162017-02-06 15:35:29 -0800123 void LocalRun();
Aart Bik281c6812016-08-26 11:31:48 -0700124 void AddLoop(HLoopInformation* loop_info);
125 void RemoveLoop(LoopNode* node);
Aart Bik281c6812016-08-26 11:31:48 -0700126
Aart Bikb29f6842017-07-28 15:58:41 -0700127 // Traverses all loops inner to outer to perform simplifications and optimizations.
128 // Returns true if loops nested inside current loop (node) have changed.
129 bool TraverseLoopsInnerToOuter(LoopNode* node);
130
131 //
Aart Bikf8f5a162017-02-06 15:35:29 -0800132 // Optimization.
Aart Bikb29f6842017-07-28 15:58:41 -0700133 //
134
Aart Bik281c6812016-08-26 11:31:48 -0700135 void SimplifyInduction(LoopNode* node);
Aart Bik482095d2016-10-10 15:39:10 -0700136 void SimplifyBlocks(LoopNode* node);
Aart Bikf8f5a162017-02-06 15:35:29 -0800137
Aart Bikb29f6842017-07-28 15:58:41 -0700138 // Performs optimizations specific to inner loop (empty loop removal,
139 // unrolling, vectorization). Returns true if anything changed.
140 bool OptimizeInnerLoop(LoopNode* node);
141
142 //
Aart Bikf8f5a162017-02-06 15:35:29 -0800143 // Vectorization analysis and synthesis.
Aart Bikb29f6842017-07-28 15:58:41 -0700144 //
145
Aart Bik14a68b42017-06-08 14:06:58 -0700146 bool ShouldVectorize(LoopNode* node, HBasicBlock* block, int64_t trip_count);
Aart Bikf8f5a162017-02-06 15:35:29 -0800147 void Vectorize(LoopNode* node, HBasicBlock* block, HBasicBlock* exit, int64_t trip_count);
148 void GenerateNewLoop(LoopNode* node,
149 HBasicBlock* block,
150 HBasicBlock* new_preheader,
151 HInstruction* lo,
152 HInstruction* hi,
Aart Bik14a68b42017-06-08 14:06:58 -0700153 HInstruction* step,
154 uint32_t unroll);
Aart Bikf8f5a162017-02-06 15:35:29 -0800155 bool VectorizeDef(LoopNode* node, HInstruction* instruction, bool generate_code);
156 bool VectorizeUse(LoopNode* node,
157 HInstruction* instruction,
158 bool generate_code,
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100159 DataType::Type type,
Aart Bikf8f5a162017-02-06 15:35:29 -0800160 uint64_t restrictions);
Aart Bik38a3f212017-10-20 17:02:21 -0700161 uint32_t GetVectorSizeInBytes();
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100162 bool TrySetVectorType(DataType::Type type, /*out*/ uint64_t* restrictions);
Aart Bikf8f5a162017-02-06 15:35:29 -0800163 bool TrySetVectorLength(uint32_t length);
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100164 void GenerateVecInv(HInstruction* org, DataType::Type type);
Aart Bik14a68b42017-06-08 14:06:58 -0700165 void GenerateVecSub(HInstruction* org, HInstruction* offset);
Aart Bikf8f5a162017-02-06 15:35:29 -0800166 void GenerateVecMem(HInstruction* org,
167 HInstruction* opa,
168 HInstruction* opb,
Aart Bik14a68b42017-06-08 14:06:58 -0700169 HInstruction* offset,
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100170 DataType::Type type);
Aart Bik0148de42017-09-05 09:25:01 -0700171 void GenerateVecReductionPhi(HPhi* phi);
172 void GenerateVecReductionPhiInputs(HPhi* phi, HInstruction* reduction);
173 HInstruction* ReduceAndExtractIfNeeded(HInstruction* instruction);
Aart Bik304c8a52017-05-23 11:01:13 -0700174 void GenerateVecOp(HInstruction* org,
175 HInstruction* opa,
176 HInstruction* opb,
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100177 DataType::Type type,
Aart Bik304c8a52017-05-23 11:01:13 -0700178 bool is_unsigned = false);
Aart Bik281c6812016-08-26 11:31:48 -0700179
Aart Bikf3e61ee2017-04-12 17:09:20 -0700180 // Vectorization idioms.
Aart Bik29aa0822018-03-08 11:28:00 -0800181 bool VectorizeSaturationIdiom(LoopNode* node,
182 HInstruction* instruction,
183 bool generate_code,
184 DataType::Type type,
185 uint64_t restrictions);
Aart Bikf3e61ee2017-04-12 17:09:20 -0700186 bool VectorizeHalvingAddIdiom(LoopNode* node,
187 HInstruction* instruction,
188 bool generate_code,
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100189 DataType::Type type,
Aart Bikf3e61ee2017-04-12 17:09:20 -0700190 uint64_t restrictions);
Aart Bikdbbac8f2017-09-01 13:06:08 -0700191 bool VectorizeSADIdiom(LoopNode* node,
192 HInstruction* instruction,
193 bool generate_code,
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100194 DataType::Type type,
Aart Bikdbbac8f2017-09-01 13:06:08 -0700195 uint64_t restrictions);
Aart Bikf3e61ee2017-04-12 17:09:20 -0700196
Aart Bik14a68b42017-06-08 14:06:58 -0700197 // Vectorization heuristics.
Aart Bik38a3f212017-10-20 17:02:21 -0700198 Alignment ComputeAlignment(HInstruction* offset,
199 DataType::Type type,
200 bool is_string_char_at,
201 uint32_t peeling = 0);
202 void SetAlignmentStrategy(uint32_t peeling_votes[],
203 const ArrayReference* peeling_candidate);
204 uint32_t MaxNumberPeeled();
Aart Bik14a68b42017-06-08 14:06:58 -0700205 bool IsVectorizationProfitable(int64_t trip_count);
Aart Bik14a68b42017-06-08 14:06:58 -0700206 uint32_t GetUnrollingFactor(HBasicBlock* block, int64_t trip_count);
207
Aart Bikb29f6842017-07-28 15:58:41 -0700208 //
Aart Bik6b69e0a2017-01-11 10:20:43 -0800209 // Helpers.
Aart Bikb29f6842017-07-28 15:58:41 -0700210 //
211
Aart Bikf8f5a162017-02-06 15:35:29 -0800212 bool TrySetPhiInduction(HPhi* phi, bool restrict_uses);
Aart Bikb29f6842017-07-28 15:58:41 -0700213 bool TrySetPhiReduction(HPhi* phi);
214
215 // Detects loop header with a single induction (returned in main_phi), possibly
216 // other phis for reductions, but no other side effects. Returns true on success.
217 bool TrySetSimpleLoopHeader(HBasicBlock* block, /*out*/ HPhi** main_phi);
218
Aart Bikcc42be02016-10-20 16:14:16 -0700219 bool IsEmptyBody(HBasicBlock* block);
Aart Bik482095d2016-10-10 15:39:10 -0700220 bool IsOnlyUsedAfterLoop(HLoopInformation* loop_info,
Aart Bik8c4a8542016-10-06 11:36:57 -0700221 HInstruction* instruction,
Aart Bik6b69e0a2017-01-11 10:20:43 -0800222 bool collect_loop_uses,
Aart Bik38a3f212017-10-20 17:02:21 -0700223 /*out*/ uint32_t* use_count);
Aart Bikf8f5a162017-02-06 15:35:29 -0800224 bool IsUsedOutsideLoop(HLoopInformation* loop_info,
225 HInstruction* instruction);
Nicolas Geoffray1a0a5192017-06-22 11:56:01 +0100226 bool TryReplaceWithLastValue(HLoopInformation* loop_info,
227 HInstruction* instruction,
228 HBasicBlock* block);
Aart Bikf8f5a162017-02-06 15:35:29 -0800229 bool TryAssignLastValue(HLoopInformation* loop_info,
230 HInstruction* instruction,
231 HBasicBlock* block,
232 bool collect_loop_uses);
Aart Bik6b69e0a2017-01-11 10:20:43 -0800233 void RemoveDeadInstructions(const HInstructionList& list);
Nicolas Geoffray1a0a5192017-06-22 11:56:01 +0100234 bool CanRemoveCycle(); // Whether the current 'iset_' is removable.
Aart Bik281c6812016-08-26 11:31:48 -0700235
Aart Bik92685a82017-03-06 11:13:43 -0800236 // Compiler driver (to query ISA features).
237 const CompilerDriver* compiler_driver_;
238
Aart Bik96202302016-10-04 17:33:56 -0700239 // Range information based on prior induction variable analysis.
Aart Bik281c6812016-08-26 11:31:48 -0700240 InductionVarRange induction_range_;
241
242 // Phase-local heap memory allocator for the loop optimizer. Storage obtained
Aart Bik96202302016-10-04 17:33:56 -0700243 // through this allocator is immediately released when the loop optimizer is done.
Vladimir Markoca6fff82017-10-03 14:49:14 +0100244 ScopedArenaAllocator* loop_allocator_;
Aart Bik281c6812016-08-26 11:31:48 -0700245
Aart Bikf8f5a162017-02-06 15:35:29 -0800246 // Global heap memory allocator. Used to build HIR.
247 ArenaAllocator* global_allocator_;
248
Aart Bik96202302016-10-04 17:33:56 -0700249 // Entries into the loop hierarchy representation. The hierarchy resides
250 // in phase-local heap memory.
Aart Bik281c6812016-08-26 11:31:48 -0700251 LoopNode* top_loop_;
252 LoopNode* last_loop_;
253
Aart Bik8c4a8542016-10-06 11:36:57 -0700254 // Temporary bookkeeping of a set of instructions.
255 // Contents reside in phase-local heap memory.
Vladimir Markoca6fff82017-10-03 14:49:14 +0100256 ScopedArenaSet<HInstruction*>* iset_;
Aart Bik8c4a8542016-10-06 11:36:57 -0700257
Aart Bikb29f6842017-07-28 15:58:41 -0700258 // Temporary bookkeeping of reduction instructions. Mapping is two-fold:
259 // (1) reductions in the loop-body are mapped back to their phi definition,
260 // (2) phi definitions are mapped to their initial value (updated during
261 // code generation to feed the proper values into the new chain).
262 // Contents reside in phase-local heap memory.
Vladimir Markoca6fff82017-10-03 14:49:14 +0100263 ScopedArenaSafeMap<HInstruction*, HInstruction*>* reductions_;
Aart Bik482095d2016-10-10 15:39:10 -0700264
Aart Bikdf7822e2016-12-06 10:05:30 -0800265 // Flag that tracks if any simplifications have occurred.
266 bool simplified_;
267
Aart Bikf8f5a162017-02-06 15:35:29 -0800268 // Number of "lanes" for selected packed type.
269 uint32_t vector_length_;
270
271 // Set of array references in the vector loop.
272 // Contents reside in phase-local heap memory.
Vladimir Markoca6fff82017-10-03 14:49:14 +0100273 ScopedArenaSet<ArrayReference>* vector_refs_;
Aart Bikf8f5a162017-02-06 15:35:29 -0800274
Aart Bik38a3f212017-10-20 17:02:21 -0700275 // Static or dynamic loop peeling for alignment.
276 uint32_t vector_static_peeling_factor_;
277 const ArrayReference* vector_dynamic_peeling_candidate_;
Aart Bik14a68b42017-06-08 14:06:58 -0700278
279 // Dynamic data dependence test of the form a != b.
280 HInstruction* vector_runtime_test_a_;
281 HInstruction* vector_runtime_test_b_;
282
Aart Bikf8f5a162017-02-06 15:35:29 -0800283 // Mapping used during vectorization synthesis for both the scalar peeling/cleanup
Aart Bik14a68b42017-06-08 14:06:58 -0700284 // loop (mode is kSequential) and the actual vector loop (mode is kVector). The data
Aart Bikf8f5a162017-02-06 15:35:29 -0800285 // structure maps original instructions into the new instructions.
286 // Contents reside in phase-local heap memory.
Vladimir Markoca6fff82017-10-03 14:49:14 +0100287 ScopedArenaSafeMap<HInstruction*, HInstruction*>* vector_map_;
Aart Bikf8f5a162017-02-06 15:35:29 -0800288
Aart Bik0148de42017-09-05 09:25:01 -0700289 // Permanent mapping used during vectorization synthesis.
290 // Contents reside in phase-local heap memory.
Vladimir Markoca6fff82017-10-03 14:49:14 +0100291 ScopedArenaSafeMap<HInstruction*, HInstruction*>* vector_permanent_map_;
Aart Bik0148de42017-09-05 09:25:01 -0700292
Aart Bikf8f5a162017-02-06 15:35:29 -0800293 // Temporary vectorization bookkeeping.
Aart Bik14a68b42017-06-08 14:06:58 -0700294 VectorMode vector_mode_; // synthesis mode
Aart Bikf8f5a162017-02-06 15:35:29 -0800295 HBasicBlock* vector_preheader_; // preheader of the new loop
296 HBasicBlock* vector_header_; // header of the new loop
297 HBasicBlock* vector_body_; // body of the new loop
Aart Bik14a68b42017-06-08 14:06:58 -0700298 HInstruction* vector_index_; // normalized index of the new loop
Aart Bikf8f5a162017-02-06 15:35:29 -0800299
Aart Bik281c6812016-08-26 11:31:48 -0700300 friend class LoopOptimizationTest;
301
302 DISALLOW_COPY_AND_ASSIGN(HLoopOptimization);
303};
304
305} // namespace art
306
307#endif // ART_COMPILER_OPTIMIZING_LOOP_OPTIMIZATION_H_