blob: 8f84796ff4f0c7bc424e96f561f04d906f592a0c [file] [log] [blame]
Aart Bik281c6812016-08-26 11:31:48 -07001/*
2 * Copyright (C) 2016 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "loop_optimization.h"
18
Aart Bikf8f5a162017-02-06 15:35:29 -080019#include "arch/arm/instruction_set_features_arm.h"
20#include "arch/arm64/instruction_set_features_arm64.h"
Andreas Gampe8cf9cb32017-07-19 09:28:38 -070021#include "arch/instruction_set.h"
Aart Bikf8f5a162017-02-06 15:35:29 -080022#include "arch/mips/instruction_set_features_mips.h"
23#include "arch/mips64/instruction_set_features_mips64.h"
24#include "arch/x86/instruction_set_features_x86.h"
25#include "arch/x86_64/instruction_set_features_x86_64.h"
Aart Bik92685a82017-03-06 11:13:43 -080026#include "driver/compiler_driver.h"
Aart Bik96202302016-10-04 17:33:56 -070027#include "linear_order.h"
Aart Bik281c6812016-08-26 11:31:48 -070028
29namespace art {
30
Vladimir Markod5d2f2c2017-09-26 12:37:26 +010031// TODO: Clean up the packed type detection so that we have the right type straight away
32// and do not need to go through this normalization.
33static inline void NormalizePackedType(/* inout */ DataType::Type* type,
34 /* inout */ bool* is_unsigned) {
35 switch (*type) {
36 case DataType::Type::kBool:
37 DCHECK(!*is_unsigned);
38 break;
39 case DataType::Type::kUint8:
40 case DataType::Type::kInt8:
41 if (*is_unsigned) {
42 *is_unsigned = false;
43 *type = DataType::Type::kUint8;
44 } else {
45 *type = DataType::Type::kInt8;
46 }
47 break;
48 case DataType::Type::kUint16:
49 case DataType::Type::kInt16:
50 if (*is_unsigned) {
51 *is_unsigned = false;
52 *type = DataType::Type::kUint16;
53 } else {
54 *type = DataType::Type::kInt16;
55 }
56 break;
57 case DataType::Type::kInt32:
58 case DataType::Type::kInt64:
59 // We do not have kUint32 and kUint64 at the moment.
60 break;
61 case DataType::Type::kFloat32:
62 case DataType::Type::kFloat64:
63 DCHECK(!*is_unsigned);
64 break;
65 default:
66 LOG(FATAL) << "Unexpected type " << *type;
67 UNREACHABLE();
68 }
69}
70
Aart Bikf8f5a162017-02-06 15:35:29 -080071// Enables vectorization (SIMDization) in the loop optimizer.
72static constexpr bool kEnableVectorization = true;
73
Aart Bik14a68b42017-06-08 14:06:58 -070074// All current SIMD targets want 16-byte alignment.
75static constexpr size_t kAlignedBase = 16;
76
Aart Bik521b50f2017-09-09 10:44:45 -070077// No loop unrolling factor (just one copy of the loop-body).
78static constexpr uint32_t kNoUnrollingFactor = 1;
79
Aart Bik9abf8942016-10-14 09:49:42 -070080// Remove the instruction from the graph. A bit more elaborate than the usual
81// instruction removal, since there may be a cycle in the use structure.
Aart Bik281c6812016-08-26 11:31:48 -070082static void RemoveFromCycle(HInstruction* instruction) {
Aart Bik281c6812016-08-26 11:31:48 -070083 instruction->RemoveAsUserOfAllInputs();
84 instruction->RemoveEnvironmentUsers();
85 instruction->GetBlock()->RemoveInstructionOrPhi(instruction, /*ensure_safety=*/ false);
Artem Serov21c7e6f2017-07-27 16:04:42 +010086 RemoveEnvironmentUses(instruction);
87 ResetEnvironmentInputRecords(instruction);
Aart Bik281c6812016-08-26 11:31:48 -070088}
89
Aart Bik807868e2016-11-03 17:51:43 -070090// Detect a goto block and sets succ to the single successor.
Aart Bike3dedc52016-11-02 17:50:27 -070091static bool IsGotoBlock(HBasicBlock* block, /*out*/ HBasicBlock** succ) {
92 if (block->GetPredecessors().size() == 1 &&
93 block->GetSuccessors().size() == 1 &&
94 block->IsSingleGoto()) {
95 *succ = block->GetSingleSuccessor();
96 return true;
97 }
98 return false;
99}
100
Aart Bik807868e2016-11-03 17:51:43 -0700101// Detect an early exit loop.
102static bool IsEarlyExit(HLoopInformation* loop_info) {
103 HBlocksInLoopReversePostOrderIterator it_loop(*loop_info);
104 for (it_loop.Advance(); !it_loop.Done(); it_loop.Advance()) {
105 for (HBasicBlock* successor : it_loop.Current()->GetSuccessors()) {
106 if (!loop_info->Contains(*successor)) {
107 return true;
108 }
109 }
110 }
111 return false;
112}
113
Aart Bik68ca7022017-09-26 16:44:23 -0700114// Forward declaration.
115static bool IsZeroExtensionAndGet(HInstruction* instruction,
116 DataType::Type type,
Aart Bikdf011c32017-09-28 12:53:04 -0700117 /*out*/ HInstruction** operand);
Aart Bik68ca7022017-09-26 16:44:23 -0700118
Aart Bikdf011c32017-09-28 12:53:04 -0700119// Detect a sign extension in instruction from the given type.
Aart Bikdbbac8f2017-09-01 13:06:08 -0700120// Returns the promoted operand on success.
Aart Bikf3e61ee2017-04-12 17:09:20 -0700121static bool IsSignExtensionAndGet(HInstruction* instruction,
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100122 DataType::Type type,
Aart Bikdf011c32017-09-28 12:53:04 -0700123 /*out*/ HInstruction** operand) {
Aart Bikf3e61ee2017-04-12 17:09:20 -0700124 // Accept any already wider constant that would be handled properly by sign
125 // extension when represented in the *width* of the given narrower data type
Aart Bik4d1a9d42017-10-19 14:40:55 -0700126 // (the fact that Uint8/Uint16 normally zero extend does not matter here).
Aart Bikf3e61ee2017-04-12 17:09:20 -0700127 int64_t value = 0;
Aart Bik50e20d52017-05-05 14:07:29 -0700128 if (IsInt64AndGet(instruction, /*out*/ &value)) {
Aart Bikf3e61ee2017-04-12 17:09:20 -0700129 switch (type) {
Vladimir Markod5d2f2c2017-09-26 12:37:26 +0100130 case DataType::Type::kUint8:
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100131 case DataType::Type::kInt8:
Aart Bikdbbac8f2017-09-01 13:06:08 -0700132 if (IsInt<8>(value)) {
Aart Bikf3e61ee2017-04-12 17:09:20 -0700133 *operand = instruction;
134 return true;
135 }
136 return false;
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100137 case DataType::Type::kUint16:
138 case DataType::Type::kInt16:
Aart Bikdbbac8f2017-09-01 13:06:08 -0700139 if (IsInt<16>(value)) {
Aart Bikf3e61ee2017-04-12 17:09:20 -0700140 *operand = instruction;
141 return true;
142 }
143 return false;
144 default:
145 return false;
146 }
147 }
Aart Bikdf011c32017-09-28 12:53:04 -0700148 // An implicit widening conversion of any signed expression sign-extends.
149 if (instruction->GetType() == type) {
Aart Bikf3e61ee2017-04-12 17:09:20 -0700150 switch (type) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100151 case DataType::Type::kInt8:
152 case DataType::Type::kInt16:
Aart Bikf3e61ee2017-04-12 17:09:20 -0700153 *operand = instruction;
154 return true;
155 default:
156 return false;
157 }
158 }
Aart Bikdf011c32017-09-28 12:53:04 -0700159 // An explicit widening conversion of a signed expression sign-extends.
Aart Bik68ca7022017-09-26 16:44:23 -0700160 if (instruction->IsTypeConversion()) {
Aart Bikdf011c32017-09-28 12:53:04 -0700161 HInstruction* conv = instruction->InputAt(0);
162 DataType::Type from = conv->GetType();
Aart Bik68ca7022017-09-26 16:44:23 -0700163 switch (instruction->GetType()) {
Aart Bikdf011c32017-09-28 12:53:04 -0700164 case DataType::Type::kInt32:
Aart Bik68ca7022017-09-26 16:44:23 -0700165 case DataType::Type::kInt64:
Aart Bikdf011c32017-09-28 12:53:04 -0700166 if (type == from && (from == DataType::Type::kInt8 ||
167 from == DataType::Type::kInt16 ||
168 from == DataType::Type::kInt32)) {
169 *operand = conv;
170 return true;
171 }
172 return false;
Aart Bik68ca7022017-09-26 16:44:23 -0700173 case DataType::Type::kInt16:
174 return type == DataType::Type::kUint16 &&
175 from == DataType::Type::kUint16 &&
Aart Bikdf011c32017-09-28 12:53:04 -0700176 IsZeroExtensionAndGet(instruction->InputAt(0), type, /*out*/ operand);
Aart Bik68ca7022017-09-26 16:44:23 -0700177 default:
178 return false;
179 }
Aart Bikdbbac8f2017-09-01 13:06:08 -0700180 }
Aart Bikf3e61ee2017-04-12 17:09:20 -0700181 return false;
182}
183
Aart Bikdf011c32017-09-28 12:53:04 -0700184// Detect a zero extension in instruction from the given type.
Aart Bikdbbac8f2017-09-01 13:06:08 -0700185// Returns the promoted operand on success.
Aart Bikf3e61ee2017-04-12 17:09:20 -0700186static bool IsZeroExtensionAndGet(HInstruction* instruction,
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100187 DataType::Type type,
Aart Bikdf011c32017-09-28 12:53:04 -0700188 /*out*/ HInstruction** operand) {
Aart Bikf3e61ee2017-04-12 17:09:20 -0700189 // Accept any already wider constant that would be handled properly by zero
190 // extension when represented in the *width* of the given narrower data type
Aart Bikdf011c32017-09-28 12:53:04 -0700191 // (the fact that Int8/Int16 normally sign extend does not matter here).
Aart Bikf3e61ee2017-04-12 17:09:20 -0700192 int64_t value = 0;
Aart Bik50e20d52017-05-05 14:07:29 -0700193 if (IsInt64AndGet(instruction, /*out*/ &value)) {
Aart Bikf3e61ee2017-04-12 17:09:20 -0700194 switch (type) {
Vladimir Markod5d2f2c2017-09-26 12:37:26 +0100195 case DataType::Type::kUint8:
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100196 case DataType::Type::kInt8:
Aart Bikdbbac8f2017-09-01 13:06:08 -0700197 if (IsUint<8>(value)) {
Aart Bikf3e61ee2017-04-12 17:09:20 -0700198 *operand = instruction;
199 return true;
200 }
201 return false;
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100202 case DataType::Type::kUint16:
203 case DataType::Type::kInt16:
Aart Bikdbbac8f2017-09-01 13:06:08 -0700204 if (IsUint<16>(value)) {
Aart Bikf3e61ee2017-04-12 17:09:20 -0700205 *operand = instruction;
206 return true;
207 }
208 return false;
209 default:
210 return false;
211 }
212 }
Aart Bikdf011c32017-09-28 12:53:04 -0700213 // An implicit widening conversion of any unsigned expression zero-extends.
214 if (instruction->GetType() == type) {
Vladimir Markod5d2f2c2017-09-26 12:37:26 +0100215 switch (type) {
216 case DataType::Type::kUint8:
217 case DataType::Type::kUint16:
218 *operand = instruction;
219 return true;
220 default:
221 return false;
Aart Bikf3e61ee2017-04-12 17:09:20 -0700222 }
223 }
Aart Bikdf011c32017-09-28 12:53:04 -0700224 // An explicit widening conversion of an unsigned expression zero-extends.
Aart Bik68ca7022017-09-26 16:44:23 -0700225 if (instruction->IsTypeConversion()) {
Aart Bikdf011c32017-09-28 12:53:04 -0700226 HInstruction* conv = instruction->InputAt(0);
227 DataType::Type from = conv->GetType();
Aart Bik68ca7022017-09-26 16:44:23 -0700228 switch (instruction->GetType()) {
Aart Bikdf011c32017-09-28 12:53:04 -0700229 case DataType::Type::kInt32:
Aart Bik68ca7022017-09-26 16:44:23 -0700230 case DataType::Type::kInt64:
Aart Bikdf011c32017-09-28 12:53:04 -0700231 if (type == from && from == DataType::Type::kUint16) {
232 *operand = conv;
233 return true;
234 }
235 return false;
Aart Bik68ca7022017-09-26 16:44:23 -0700236 case DataType::Type::kUint16:
237 return type == DataType::Type::kInt16 &&
238 from == DataType::Type::kInt16 &&
Aart Bikdf011c32017-09-28 12:53:04 -0700239 IsSignExtensionAndGet(instruction->InputAt(0), type, /*out*/ operand);
Aart Bik68ca7022017-09-26 16:44:23 -0700240 default:
241 return false;
242 }
Aart Bikdbbac8f2017-09-01 13:06:08 -0700243 }
Aart Bikf3e61ee2017-04-12 17:09:20 -0700244 return false;
245}
246
Aart Bik304c8a52017-05-23 11:01:13 -0700247// Detect situations with same-extension narrower operands.
248// Returns true on success and sets is_unsigned accordingly.
249static bool IsNarrowerOperands(HInstruction* a,
250 HInstruction* b,
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100251 DataType::Type type,
Aart Bik304c8a52017-05-23 11:01:13 -0700252 /*out*/ HInstruction** r,
253 /*out*/ HInstruction** s,
254 /*out*/ bool* is_unsigned) {
Aart Bik4d1a9d42017-10-19 14:40:55 -0700255 // Look for a matching sign extension.
256 DataType::Type stype = HVecOperation::ToSignedType(type);
257 if (IsSignExtensionAndGet(a, stype, r) && IsSignExtensionAndGet(b, stype, s)) {
Aart Bik304c8a52017-05-23 11:01:13 -0700258 *is_unsigned = false;
259 return true;
Aart Bik4d1a9d42017-10-19 14:40:55 -0700260 }
261 // Look for a matching zero extension.
262 DataType::Type utype = HVecOperation::ToUnsignedType(type);
263 if (IsZeroExtensionAndGet(a, utype, r) && IsZeroExtensionAndGet(b, utype, s)) {
Aart Bik304c8a52017-05-23 11:01:13 -0700264 *is_unsigned = true;
265 return true;
266 }
267 return false;
268}
269
270// As above, single operand.
271static bool IsNarrowerOperand(HInstruction* a,
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100272 DataType::Type type,
Aart Bik304c8a52017-05-23 11:01:13 -0700273 /*out*/ HInstruction** r,
274 /*out*/ bool* is_unsigned) {
Aart Bik4d1a9d42017-10-19 14:40:55 -0700275 // Look for a matching sign extension.
276 DataType::Type stype = HVecOperation::ToSignedType(type);
277 if (IsSignExtensionAndGet(a, stype, r)) {
Aart Bik304c8a52017-05-23 11:01:13 -0700278 *is_unsigned = false;
279 return true;
Aart Bik4d1a9d42017-10-19 14:40:55 -0700280 }
281 // Look for a matching zero extension.
282 DataType::Type utype = HVecOperation::ToUnsignedType(type);
283 if (IsZeroExtensionAndGet(a, utype, r)) {
Aart Bik304c8a52017-05-23 11:01:13 -0700284 *is_unsigned = true;
285 return true;
286 }
287 return false;
288}
289
Aart Bikdbbac8f2017-09-01 13:06:08 -0700290// Compute relative vector length based on type difference.
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100291static size_t GetOtherVL(DataType::Type other_type, DataType::Type vector_type, size_t vl) {
Vladimir Markod5d2f2c2017-09-26 12:37:26 +0100292 DCHECK(DataType::IsIntegralType(other_type));
293 DCHECK(DataType::IsIntegralType(vector_type));
294 DCHECK_GE(DataType::SizeShift(other_type), DataType::SizeShift(vector_type));
295 return vl >> (DataType::SizeShift(other_type) - DataType::SizeShift(vector_type));
Aart Bikdbbac8f2017-09-01 13:06:08 -0700296}
297
Aart Bik5f805002017-05-16 16:42:41 -0700298// Detect up to two instructions a and b, and an acccumulated constant c.
299static bool IsAddConstHelper(HInstruction* instruction,
300 /*out*/ HInstruction** a,
301 /*out*/ HInstruction** b,
302 /*out*/ int64_t* c,
303 int32_t depth) {
304 static constexpr int32_t kMaxDepth = 8; // don't search too deep
305 int64_t value = 0;
306 if (IsInt64AndGet(instruction, &value)) {
307 *c += value;
308 return true;
309 } else if (instruction->IsAdd() && depth <= kMaxDepth) {
310 return IsAddConstHelper(instruction->InputAt(0), a, b, c, depth + 1) &&
311 IsAddConstHelper(instruction->InputAt(1), a, b, c, depth + 1);
312 } else if (*a == nullptr) {
313 *a = instruction;
314 return true;
315 } else if (*b == nullptr) {
316 *b = instruction;
317 return true;
318 }
319 return false; // too many non-const operands
320}
321
322// Detect a + b + c for an optional constant c.
323static bool IsAddConst(HInstruction* instruction,
324 /*out*/ HInstruction** a,
325 /*out*/ HInstruction** b,
326 /*out*/ int64_t* c) {
327 if (instruction->IsAdd()) {
328 // Try to find a + b and accumulated c.
329 if (IsAddConstHelper(instruction->InputAt(0), a, b, c, /*depth*/ 0) &&
330 IsAddConstHelper(instruction->InputAt(1), a, b, c, /*depth*/ 0) &&
331 *b != nullptr) {
332 return true;
333 }
334 // Found a + b.
335 *a = instruction->InputAt(0);
336 *b = instruction->InputAt(1);
337 *c = 0;
338 return true;
339 }
340 return false;
341}
342
Aart Bikdf011c32017-09-28 12:53:04 -0700343// Detect a + c for constant c.
344static bool IsAddConst(HInstruction* instruction,
345 /*out*/ HInstruction** a,
346 /*out*/ int64_t* c) {
347 if (instruction->IsAdd()) {
348 if (IsInt64AndGet(instruction->InputAt(0), c)) {
349 *a = instruction->InputAt(1);
350 return true;
351 } else if (IsInt64AndGet(instruction->InputAt(1), c)) {
352 *a = instruction->InputAt(0);
353 return true;
354 }
355 }
356 return false;
357}
358
Aart Bikb29f6842017-07-28 15:58:41 -0700359// Detect reductions of the following forms,
Aart Bikb29f6842017-07-28 15:58:41 -0700360// x = x_phi + ..
361// x = x_phi - ..
362// x = max(x_phi, ..)
363// x = min(x_phi, ..)
364static bool HasReductionFormat(HInstruction* reduction, HInstruction* phi) {
365 if (reduction->IsAdd()) {
Aart Bikdbbac8f2017-09-01 13:06:08 -0700366 return (reduction->InputAt(0) == phi && reduction->InputAt(1) != phi) ||
367 (reduction->InputAt(0) != phi && reduction->InputAt(1) == phi);
Aart Bikb29f6842017-07-28 15:58:41 -0700368 } else if (reduction->IsSub()) {
Aart Bikdbbac8f2017-09-01 13:06:08 -0700369 return (reduction->InputAt(0) == phi && reduction->InputAt(1) != phi);
Aart Bikb29f6842017-07-28 15:58:41 -0700370 } else if (reduction->IsInvokeStaticOrDirect()) {
371 switch (reduction->AsInvokeStaticOrDirect()->GetIntrinsic()) {
372 case Intrinsics::kMathMinIntInt:
373 case Intrinsics::kMathMinLongLong:
374 case Intrinsics::kMathMinFloatFloat:
375 case Intrinsics::kMathMinDoubleDouble:
376 case Intrinsics::kMathMaxIntInt:
377 case Intrinsics::kMathMaxLongLong:
378 case Intrinsics::kMathMaxFloatFloat:
379 case Intrinsics::kMathMaxDoubleDouble:
Aart Bikdbbac8f2017-09-01 13:06:08 -0700380 return (reduction->InputAt(0) == phi && reduction->InputAt(1) != phi) ||
381 (reduction->InputAt(0) != phi && reduction->InputAt(1) == phi);
Aart Bikb29f6842017-07-28 15:58:41 -0700382 default:
383 return false;
384 }
385 }
386 return false;
387}
388
Aart Bikdbbac8f2017-09-01 13:06:08 -0700389// Translates vector operation to reduction kind.
390static HVecReduce::ReductionKind GetReductionKind(HVecOperation* reduction) {
391 if (reduction->IsVecAdd() || reduction->IsVecSub() || reduction->IsVecSADAccumulate()) {
Aart Bik0148de42017-09-05 09:25:01 -0700392 return HVecReduce::kSum;
393 } else if (reduction->IsVecMin()) {
394 return HVecReduce::kMin;
395 } else if (reduction->IsVecMax()) {
396 return HVecReduce::kMax;
397 }
398 LOG(FATAL) << "Unsupported SIMD reduction";
399 UNREACHABLE();
400}
401
Aart Bikf8f5a162017-02-06 15:35:29 -0800402// Test vector restrictions.
403static bool HasVectorRestrictions(uint64_t restrictions, uint64_t tested) {
404 return (restrictions & tested) != 0;
405}
406
Aart Bikf3e61ee2017-04-12 17:09:20 -0700407// Insert an instruction.
Aart Bikf8f5a162017-02-06 15:35:29 -0800408static HInstruction* Insert(HBasicBlock* block, HInstruction* instruction) {
409 DCHECK(block != nullptr);
410 DCHECK(instruction != nullptr);
411 block->InsertInstructionBefore(instruction, block->GetLastInstruction());
412 return instruction;
413}
414
Artem Serov21c7e6f2017-07-27 16:04:42 +0100415// Check that instructions from the induction sets are fully removed: have no uses
416// and no other instructions use them.
Vladimir Markoca6fff82017-10-03 14:49:14 +0100417static bool CheckInductionSetFullyRemoved(ScopedArenaSet<HInstruction*>* iset) {
Artem Serov21c7e6f2017-07-27 16:04:42 +0100418 for (HInstruction* instr : *iset) {
419 if (instr->GetBlock() != nullptr ||
420 !instr->GetUses().empty() ||
421 !instr->GetEnvUses().empty() ||
422 HasEnvironmentUsedByOthers(instr)) {
423 return false;
424 }
425 }
Artem Serov21c7e6f2017-07-27 16:04:42 +0100426 return true;
427}
428
Aart Bik281c6812016-08-26 11:31:48 -0700429//
Aart Bikb29f6842017-07-28 15:58:41 -0700430// Public methods.
Aart Bik281c6812016-08-26 11:31:48 -0700431//
432
433HLoopOptimization::HLoopOptimization(HGraph* graph,
Aart Bik92685a82017-03-06 11:13:43 -0800434 CompilerDriver* compiler_driver,
Aart Bikb92cc332017-09-06 15:53:17 -0700435 HInductionVarAnalysis* induction_analysis,
436 OptimizingCompilerStats* stats)
437 : HOptimization(graph, kLoopOptimizationPassName, stats),
Aart Bik92685a82017-03-06 11:13:43 -0800438 compiler_driver_(compiler_driver),
Aart Bik281c6812016-08-26 11:31:48 -0700439 induction_range_(induction_analysis),
Aart Bik96202302016-10-04 17:33:56 -0700440 loop_allocator_(nullptr),
Vladimir Markoca6fff82017-10-03 14:49:14 +0100441 global_allocator_(graph_->GetAllocator()),
Aart Bik281c6812016-08-26 11:31:48 -0700442 top_loop_(nullptr),
Aart Bik8c4a8542016-10-06 11:36:57 -0700443 last_loop_(nullptr),
Aart Bik482095d2016-10-10 15:39:10 -0700444 iset_(nullptr),
Aart Bikb29f6842017-07-28 15:58:41 -0700445 reductions_(nullptr),
Aart Bikf8f5a162017-02-06 15:35:29 -0800446 simplified_(false),
447 vector_length_(0),
448 vector_refs_(nullptr),
Aart Bik14a68b42017-06-08 14:06:58 -0700449 vector_peeling_candidate_(nullptr),
450 vector_runtime_test_a_(nullptr),
451 vector_runtime_test_b_(nullptr),
Aart Bik0148de42017-09-05 09:25:01 -0700452 vector_map_(nullptr),
Vladimir Markoca6fff82017-10-03 14:49:14 +0100453 vector_permanent_map_(nullptr),
454 vector_mode_(kSequential),
455 vector_preheader_(nullptr),
456 vector_header_(nullptr),
457 vector_body_(nullptr),
458 vector_index_(nullptr) {
Aart Bik281c6812016-08-26 11:31:48 -0700459}
460
461void HLoopOptimization::Run() {
Mingyao Yang01b47b02017-02-03 12:09:57 -0800462 // Skip if there is no loop or the graph has try-catch/irreducible loops.
Aart Bik281c6812016-08-26 11:31:48 -0700463 // TODO: make this less of a sledgehammer.
Mingyao Yang69d75ff2017-02-07 13:06:06 -0800464 if (!graph_->HasLoops() || graph_->HasTryCatch() || graph_->HasIrreducibleLoops()) {
Aart Bik281c6812016-08-26 11:31:48 -0700465 return;
466 }
467
Vladimir Markoca6fff82017-10-03 14:49:14 +0100468 // Phase-local allocator.
469 ScopedArenaAllocator allocator(graph_->GetArenaStack());
Aart Bik96202302016-10-04 17:33:56 -0700470 loop_allocator_ = &allocator;
Nicolas Geoffrayebe16742016-10-05 09:55:42 +0100471
Aart Bik96202302016-10-04 17:33:56 -0700472 // Perform loop optimizations.
473 LocalRun();
Mingyao Yang69d75ff2017-02-07 13:06:06 -0800474 if (top_loop_ == nullptr) {
Aart Bikf8f5a162017-02-06 15:35:29 -0800475 graph_->SetHasLoops(false); // no more loops
Mingyao Yang69d75ff2017-02-07 13:06:06 -0800476 }
477
Aart Bik96202302016-10-04 17:33:56 -0700478 // Detach.
479 loop_allocator_ = nullptr;
480 last_loop_ = top_loop_ = nullptr;
481}
482
Aart Bikb29f6842017-07-28 15:58:41 -0700483//
484// Loop setup and traversal.
485//
486
Aart Bik96202302016-10-04 17:33:56 -0700487void HLoopOptimization::LocalRun() {
488 // Build the linear order using the phase-local allocator. This step enables building
489 // a loop hierarchy that properly reflects the outer-inner and previous-next relation.
Vladimir Markoca6fff82017-10-03 14:49:14 +0100490 ScopedArenaVector<HBasicBlock*> linear_order(loop_allocator_->Adapter(kArenaAllocLinearOrder));
491 LinearizeGraph(graph_, &linear_order);
Aart Bik96202302016-10-04 17:33:56 -0700492
Aart Bik281c6812016-08-26 11:31:48 -0700493 // Build the loop hierarchy.
Aart Bik96202302016-10-04 17:33:56 -0700494 for (HBasicBlock* block : linear_order) {
Aart Bik281c6812016-08-26 11:31:48 -0700495 if (block->IsLoopHeader()) {
496 AddLoop(block->GetLoopInformation());
497 }
498 }
Aart Bik96202302016-10-04 17:33:56 -0700499
Aart Bik8c4a8542016-10-06 11:36:57 -0700500 // Traverse the loop hierarchy inner-to-outer and optimize. Traversal can use
Aart Bikf8f5a162017-02-06 15:35:29 -0800501 // temporary data structures using the phase-local allocator. All new HIR
502 // should use the global allocator.
Aart Bik8c4a8542016-10-06 11:36:57 -0700503 if (top_loop_ != nullptr) {
Vladimir Markoca6fff82017-10-03 14:49:14 +0100504 ScopedArenaSet<HInstruction*> iset(loop_allocator_->Adapter(kArenaAllocLoopOptimization));
505 ScopedArenaSafeMap<HInstruction*, HInstruction*> reds(
Aart Bikb29f6842017-07-28 15:58:41 -0700506 std::less<HInstruction*>(), loop_allocator_->Adapter(kArenaAllocLoopOptimization));
Vladimir Markoca6fff82017-10-03 14:49:14 +0100507 ScopedArenaSet<ArrayReference> refs(loop_allocator_->Adapter(kArenaAllocLoopOptimization));
508 ScopedArenaSafeMap<HInstruction*, HInstruction*> map(
Aart Bikf8f5a162017-02-06 15:35:29 -0800509 std::less<HInstruction*>(), loop_allocator_->Adapter(kArenaAllocLoopOptimization));
Vladimir Markoca6fff82017-10-03 14:49:14 +0100510 ScopedArenaSafeMap<HInstruction*, HInstruction*> perm(
Aart Bik0148de42017-09-05 09:25:01 -0700511 std::less<HInstruction*>(), loop_allocator_->Adapter(kArenaAllocLoopOptimization));
Aart Bikf8f5a162017-02-06 15:35:29 -0800512 // Attach.
Aart Bik8c4a8542016-10-06 11:36:57 -0700513 iset_ = &iset;
Aart Bikb29f6842017-07-28 15:58:41 -0700514 reductions_ = &reds;
Aart Bikf8f5a162017-02-06 15:35:29 -0800515 vector_refs_ = &refs;
516 vector_map_ = &map;
Aart Bik0148de42017-09-05 09:25:01 -0700517 vector_permanent_map_ = &perm;
Aart Bikf8f5a162017-02-06 15:35:29 -0800518 // Traverse.
Aart Bik8c4a8542016-10-06 11:36:57 -0700519 TraverseLoopsInnerToOuter(top_loop_);
Aart Bikf8f5a162017-02-06 15:35:29 -0800520 // Detach.
521 iset_ = nullptr;
Aart Bikb29f6842017-07-28 15:58:41 -0700522 reductions_ = nullptr;
Aart Bikf8f5a162017-02-06 15:35:29 -0800523 vector_refs_ = nullptr;
524 vector_map_ = nullptr;
Aart Bik0148de42017-09-05 09:25:01 -0700525 vector_permanent_map_ = nullptr;
Aart Bik8c4a8542016-10-06 11:36:57 -0700526 }
Aart Bik281c6812016-08-26 11:31:48 -0700527}
528
529void HLoopOptimization::AddLoop(HLoopInformation* loop_info) {
530 DCHECK(loop_info != nullptr);
Aart Bikf8f5a162017-02-06 15:35:29 -0800531 LoopNode* node = new (loop_allocator_) LoopNode(loop_info);
Aart Bik281c6812016-08-26 11:31:48 -0700532 if (last_loop_ == nullptr) {
533 // First loop.
534 DCHECK(top_loop_ == nullptr);
535 last_loop_ = top_loop_ = node;
536 } else if (loop_info->IsIn(*last_loop_->loop_info)) {
537 // Inner loop.
538 node->outer = last_loop_;
539 DCHECK(last_loop_->inner == nullptr);
540 last_loop_ = last_loop_->inner = node;
541 } else {
542 // Subsequent loop.
543 while (last_loop_->outer != nullptr && !loop_info->IsIn(*last_loop_->outer->loop_info)) {
544 last_loop_ = last_loop_->outer;
545 }
546 node->outer = last_loop_->outer;
547 node->previous = last_loop_;
548 DCHECK(last_loop_->next == nullptr);
549 last_loop_ = last_loop_->next = node;
550 }
551}
552
553void HLoopOptimization::RemoveLoop(LoopNode* node) {
554 DCHECK(node != nullptr);
Aart Bik8c4a8542016-10-06 11:36:57 -0700555 DCHECK(node->inner == nullptr);
556 if (node->previous != nullptr) {
557 // Within sequence.
558 node->previous->next = node->next;
559 if (node->next != nullptr) {
560 node->next->previous = node->previous;
561 }
562 } else {
563 // First of sequence.
564 if (node->outer != nullptr) {
565 node->outer->inner = node->next;
566 } else {
567 top_loop_ = node->next;
568 }
569 if (node->next != nullptr) {
570 node->next->outer = node->outer;
571 node->next->previous = nullptr;
572 }
573 }
Aart Bik281c6812016-08-26 11:31:48 -0700574}
575
Aart Bikb29f6842017-07-28 15:58:41 -0700576bool HLoopOptimization::TraverseLoopsInnerToOuter(LoopNode* node) {
577 bool changed = false;
Aart Bik281c6812016-08-26 11:31:48 -0700578 for ( ; node != nullptr; node = node->next) {
Aart Bikb29f6842017-07-28 15:58:41 -0700579 // Visit inner loops first. Recompute induction information for this
580 // loop if the induction of any inner loop has changed.
581 if (TraverseLoopsInnerToOuter(node->inner)) {
Aart Bik482095d2016-10-10 15:39:10 -0700582 induction_range_.ReVisit(node->loop_info);
583 }
Aart Bikf8f5a162017-02-06 15:35:29 -0800584 // Repeat simplifications in the loop-body until no more changes occur.
Aart Bik6b69e0a2017-01-11 10:20:43 -0800585 // Note that since each simplification consists of eliminating code (without
586 // introducing new code), this process is always finite.
Aart Bikdf7822e2016-12-06 10:05:30 -0800587 do {
588 simplified_ = false;
Aart Bikdf7822e2016-12-06 10:05:30 -0800589 SimplifyInduction(node);
Aart Bik6b69e0a2017-01-11 10:20:43 -0800590 SimplifyBlocks(node);
Aart Bikb29f6842017-07-28 15:58:41 -0700591 changed = simplified_ || changed;
Aart Bikdf7822e2016-12-06 10:05:30 -0800592 } while (simplified_);
Aart Bikf8f5a162017-02-06 15:35:29 -0800593 // Optimize inner loop.
Aart Bik9abf8942016-10-14 09:49:42 -0700594 if (node->inner == nullptr) {
Aart Bikb29f6842017-07-28 15:58:41 -0700595 changed = OptimizeInnerLoop(node) || changed;
Aart Bik9abf8942016-10-14 09:49:42 -0700596 }
Aart Bik281c6812016-08-26 11:31:48 -0700597 }
Aart Bikb29f6842017-07-28 15:58:41 -0700598 return changed;
Aart Bik281c6812016-08-26 11:31:48 -0700599}
600
Aart Bikf8f5a162017-02-06 15:35:29 -0800601//
602// Optimization.
603//
604
Aart Bik281c6812016-08-26 11:31:48 -0700605void HLoopOptimization::SimplifyInduction(LoopNode* node) {
606 HBasicBlock* header = node->loop_info->GetHeader();
607 HBasicBlock* preheader = node->loop_info->GetPreHeader();
Aart Bik8c4a8542016-10-06 11:36:57 -0700608 // Scan the phis in the header to find opportunities to simplify an induction
609 // cycle that is only used outside the loop. Replace these uses, if any, with
610 // the last value and remove the induction cycle.
611 // Examples: for (int i = 0; x != null; i++) { .... no i .... }
612 // for (int i = 0; i < 10; i++, k++) { .... no k .... } return k;
Aart Bik281c6812016-08-26 11:31:48 -0700613 for (HInstructionIterator it(header->GetPhis()); !it.Done(); it.Advance()) {
614 HPhi* phi = it.Current()->AsPhi();
Aart Bikf8f5a162017-02-06 15:35:29 -0800615 if (TrySetPhiInduction(phi, /*restrict_uses*/ true) &&
616 TryAssignLastValue(node->loop_info, phi, preheader, /*collect_loop_uses*/ false)) {
Aart Bik671e48a2017-08-09 13:16:56 -0700617 // Note that it's ok to have replaced uses after the loop with the last value, without
618 // being able to remove the cycle. Environment uses (which are the reason we may not be
619 // able to remove the cycle) within the loop will still hold the right value. We must
620 // have tried first, however, to replace outside uses.
621 if (CanRemoveCycle()) {
622 simplified_ = true;
623 for (HInstruction* i : *iset_) {
624 RemoveFromCycle(i);
625 }
626 DCHECK(CheckInductionSetFullyRemoved(iset_));
Aart Bik281c6812016-08-26 11:31:48 -0700627 }
Aart Bik482095d2016-10-10 15:39:10 -0700628 }
629 }
630}
631
632void HLoopOptimization::SimplifyBlocks(LoopNode* node) {
Aart Bikdf7822e2016-12-06 10:05:30 -0800633 // Iterate over all basic blocks in the loop-body.
634 for (HBlocksInLoopIterator it(*node->loop_info); !it.Done(); it.Advance()) {
635 HBasicBlock* block = it.Current();
636 // Remove dead instructions from the loop-body.
Aart Bik6b69e0a2017-01-11 10:20:43 -0800637 RemoveDeadInstructions(block->GetPhis());
638 RemoveDeadInstructions(block->GetInstructions());
Aart Bikdf7822e2016-12-06 10:05:30 -0800639 // Remove trivial control flow blocks from the loop-body.
Aart Bik6b69e0a2017-01-11 10:20:43 -0800640 if (block->GetPredecessors().size() == 1 &&
641 block->GetSuccessors().size() == 1 &&
642 block->GetSingleSuccessor()->GetPredecessors().size() == 1) {
Aart Bikdf7822e2016-12-06 10:05:30 -0800643 simplified_ = true;
Aart Bik6b69e0a2017-01-11 10:20:43 -0800644 block->MergeWith(block->GetSingleSuccessor());
Aart Bikdf7822e2016-12-06 10:05:30 -0800645 } else if (block->GetSuccessors().size() == 2) {
646 // Trivial if block can be bypassed to either branch.
647 HBasicBlock* succ0 = block->GetSuccessors()[0];
648 HBasicBlock* succ1 = block->GetSuccessors()[1];
649 HBasicBlock* meet0 = nullptr;
650 HBasicBlock* meet1 = nullptr;
651 if (succ0 != succ1 &&
652 IsGotoBlock(succ0, &meet0) &&
653 IsGotoBlock(succ1, &meet1) &&
654 meet0 == meet1 && // meets again
655 meet0 != block && // no self-loop
656 meet0->GetPhis().IsEmpty()) { // not used for merging
657 simplified_ = true;
658 succ0->DisconnectAndDelete();
659 if (block->Dominates(meet0)) {
660 block->RemoveDominatedBlock(meet0);
661 succ1->AddDominatedBlock(meet0);
662 meet0->SetDominator(succ1);
Aart Bike3dedc52016-11-02 17:50:27 -0700663 }
Aart Bik482095d2016-10-10 15:39:10 -0700664 }
Aart Bik281c6812016-08-26 11:31:48 -0700665 }
Aart Bikdf7822e2016-12-06 10:05:30 -0800666 }
Aart Bik281c6812016-08-26 11:31:48 -0700667}
668
Aart Bikb29f6842017-07-28 15:58:41 -0700669bool HLoopOptimization::OptimizeInnerLoop(LoopNode* node) {
Aart Bik281c6812016-08-26 11:31:48 -0700670 HBasicBlock* header = node->loop_info->GetHeader();
671 HBasicBlock* preheader = node->loop_info->GetPreHeader();
Aart Bik9abf8942016-10-14 09:49:42 -0700672 // Ensure loop header logic is finite.
Aart Bikf8f5a162017-02-06 15:35:29 -0800673 int64_t trip_count = 0;
674 if (!induction_range_.IsFinite(node->loop_info, &trip_count)) {
Aart Bikb29f6842017-07-28 15:58:41 -0700675 return false;
Aart Bik9abf8942016-10-14 09:49:42 -0700676 }
Aart Bik281c6812016-08-26 11:31:48 -0700677 // Ensure there is only a single loop-body (besides the header).
678 HBasicBlock* body = nullptr;
679 for (HBlocksInLoopIterator it(*node->loop_info); !it.Done(); it.Advance()) {
680 if (it.Current() != header) {
681 if (body != nullptr) {
Aart Bikb29f6842017-07-28 15:58:41 -0700682 return false;
Aart Bik281c6812016-08-26 11:31:48 -0700683 }
684 body = it.Current();
685 }
686 }
Andreas Gampef45d61c2017-06-07 10:29:33 -0700687 CHECK(body != nullptr);
Aart Bik281c6812016-08-26 11:31:48 -0700688 // Ensure there is only a single exit point.
689 if (header->GetSuccessors().size() != 2) {
Aart Bikb29f6842017-07-28 15:58:41 -0700690 return false;
Aart Bik281c6812016-08-26 11:31:48 -0700691 }
692 HBasicBlock* exit = (header->GetSuccessors()[0] == body)
693 ? header->GetSuccessors()[1]
694 : header->GetSuccessors()[0];
Aart Bik8c4a8542016-10-06 11:36:57 -0700695 // Ensure exit can only be reached by exiting loop.
Aart Bik281c6812016-08-26 11:31:48 -0700696 if (exit->GetPredecessors().size() != 1) {
Aart Bikb29f6842017-07-28 15:58:41 -0700697 return false;
Aart Bik281c6812016-08-26 11:31:48 -0700698 }
Aart Bik6b69e0a2017-01-11 10:20:43 -0800699 // Detect either an empty loop (no side effects other than plain iteration) or
700 // a trivial loop (just iterating once). Replace subsequent index uses, if any,
701 // with the last value and remove the loop, possibly after unrolling its body.
Aart Bikb29f6842017-07-28 15:58:41 -0700702 HPhi* main_phi = nullptr;
703 if (TrySetSimpleLoopHeader(header, &main_phi)) {
Aart Bik6b69e0a2017-01-11 10:20:43 -0800704 bool is_empty = IsEmptyBody(body);
Aart Bikb29f6842017-07-28 15:58:41 -0700705 if (reductions_->empty() && // TODO: possible with some effort
706 (is_empty || trip_count == 1) &&
707 TryAssignLastValue(node->loop_info, main_phi, preheader, /*collect_loop_uses*/ true)) {
Aart Bik6b69e0a2017-01-11 10:20:43 -0800708 if (!is_empty) {
Aart Bikf8f5a162017-02-06 15:35:29 -0800709 // Unroll the loop-body, which sees initial value of the index.
Aart Bikb29f6842017-07-28 15:58:41 -0700710 main_phi->ReplaceWith(main_phi->InputAt(0));
Aart Bik6b69e0a2017-01-11 10:20:43 -0800711 preheader->MergeInstructionsWith(body);
712 }
713 body->DisconnectAndDelete();
714 exit->RemovePredecessor(header);
715 header->RemoveSuccessor(exit);
716 header->RemoveDominatedBlock(exit);
717 header->DisconnectAndDelete();
718 preheader->AddSuccessor(exit);
Aart Bikf8f5a162017-02-06 15:35:29 -0800719 preheader->AddInstruction(new (global_allocator_) HGoto());
Aart Bik6b69e0a2017-01-11 10:20:43 -0800720 preheader->AddDominatedBlock(exit);
721 exit->SetDominator(preheader);
722 RemoveLoop(node); // update hierarchy
Aart Bikb29f6842017-07-28 15:58:41 -0700723 return true;
Aart Bikf8f5a162017-02-06 15:35:29 -0800724 }
725 }
Aart Bikf8f5a162017-02-06 15:35:29 -0800726 // Vectorize loop, if possible and valid.
Aart Bikb29f6842017-07-28 15:58:41 -0700727 if (kEnableVectorization &&
728 TrySetSimpleLoopHeader(header, &main_phi) &&
Aart Bikb29f6842017-07-28 15:58:41 -0700729 ShouldVectorize(node, body, trip_count) &&
730 TryAssignLastValue(node->loop_info, main_phi, preheader, /*collect_loop_uses*/ true)) {
731 Vectorize(node, body, exit, trip_count);
732 graph_->SetHasSIMD(true); // flag SIMD usage
Aart Bik21b85922017-09-06 13:29:16 -0700733 MaybeRecordStat(stats_, MethodCompilationStat::kLoopVectorized);
Aart Bikb29f6842017-07-28 15:58:41 -0700734 return true;
Aart Bikf8f5a162017-02-06 15:35:29 -0800735 }
Aart Bikb29f6842017-07-28 15:58:41 -0700736 return false;
Aart Bikf8f5a162017-02-06 15:35:29 -0800737}
738
739//
740// Loop vectorization. The implementation is based on the book by Aart J.C. Bik:
741// "The Software Vectorization Handbook. Applying Multimedia Extensions for Maximum Performance."
742// Intel Press, June, 2004 (http://www.aartbik.com/).
743//
744
Aart Bik14a68b42017-06-08 14:06:58 -0700745bool HLoopOptimization::ShouldVectorize(LoopNode* node, HBasicBlock* block, int64_t trip_count) {
Aart Bikf8f5a162017-02-06 15:35:29 -0800746 // Reset vector bookkeeping.
747 vector_length_ = 0;
748 vector_refs_->clear();
Aart Bik14a68b42017-06-08 14:06:58 -0700749 vector_peeling_candidate_ = nullptr;
Aart Bikf8f5a162017-02-06 15:35:29 -0800750 vector_runtime_test_a_ =
751 vector_runtime_test_b_= nullptr;
752
753 // Phis in the loop-body prevent vectorization.
754 if (!block->GetPhis().IsEmpty()) {
755 return false;
756 }
757
758 // Scan the loop-body, starting a right-hand-side tree traversal at each left-hand-side
759 // occurrence, which allows passing down attributes down the use tree.
760 for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
761 if (!VectorizeDef(node, it.Current(), /*generate_code*/ false)) {
762 return false; // failure to vectorize a left-hand-side
763 }
764 }
765
Aart Bik14a68b42017-06-08 14:06:58 -0700766 // Does vectorization seem profitable?
767 if (!IsVectorizationProfitable(trip_count)) {
768 return false;
Aart Bikf8f5a162017-02-06 15:35:29 -0800769 }
770
771 // Data dependence analysis. Find each pair of references with same type, where
772 // at least one is a write. Each such pair denotes a possible data dependence.
773 // This analysis exploits the property that differently typed arrays cannot be
774 // aliased, as well as the property that references either point to the same
775 // array or to two completely disjoint arrays, i.e., no partial aliasing.
776 // Other than a few simply heuristics, no detailed subscript analysis is done.
Aart Bikb29f6842017-07-28 15:58:41 -0700777 // The scan over references also finds a suitable dynamic loop peeling candidate.
778 const ArrayReference* candidate = nullptr;
Aart Bikf8f5a162017-02-06 15:35:29 -0800779 for (auto i = vector_refs_->begin(); i != vector_refs_->end(); ++i) {
780 for (auto j = i; ++j != vector_refs_->end(); ) {
781 if (i->type == j->type && (i->lhs || j->lhs)) {
782 // Found same-typed a[i+x] vs. b[i+y], where at least one is a write.
783 HInstruction* a = i->base;
784 HInstruction* b = j->base;
785 HInstruction* x = i->offset;
786 HInstruction* y = j->offset;
787 if (a == b) {
788 // Found a[i+x] vs. a[i+y]. Accept if x == y (loop-independent data dependence).
789 // Conservatively assume a loop-carried data dependence otherwise, and reject.
790 if (x != y) {
791 return false;
792 }
793 } else {
794 // Found a[i+x] vs. b[i+y]. Accept if x == y (at worst loop-independent data dependence).
795 // Conservatively assume a potential loop-carried data dependence otherwise, avoided by
796 // generating an explicit a != b disambiguation runtime test on the two references.
797 if (x != y) {
Aart Bik37dc4df2017-06-28 14:08:00 -0700798 // To avoid excessive overhead, we only accept one a != b test.
799 if (vector_runtime_test_a_ == nullptr) {
800 // First test found.
801 vector_runtime_test_a_ = a;
802 vector_runtime_test_b_ = b;
803 } else if ((vector_runtime_test_a_ != a || vector_runtime_test_b_ != b) &&
804 (vector_runtime_test_a_ != b || vector_runtime_test_b_ != a)) {
805 return false; // second test would be needed
Aart Bikf8f5a162017-02-06 15:35:29 -0800806 }
Aart Bikf8f5a162017-02-06 15:35:29 -0800807 }
808 }
809 }
810 }
811 }
812
Aart Bik14a68b42017-06-08 14:06:58 -0700813 // Consider dynamic loop peeling for alignment.
Aart Bikb29f6842017-07-28 15:58:41 -0700814 SetPeelingCandidate(candidate, trip_count);
Aart Bik14a68b42017-06-08 14:06:58 -0700815
Aart Bikf8f5a162017-02-06 15:35:29 -0800816 // Success!
817 return true;
818}
819
820void HLoopOptimization::Vectorize(LoopNode* node,
821 HBasicBlock* block,
822 HBasicBlock* exit,
823 int64_t trip_count) {
Aart Bikf8f5a162017-02-06 15:35:29 -0800824 HBasicBlock* header = node->loop_info->GetHeader();
825 HBasicBlock* preheader = node->loop_info->GetPreHeader();
826
Aart Bik14a68b42017-06-08 14:06:58 -0700827 // Pick a loop unrolling factor for the vector loop.
828 uint32_t unroll = GetUnrollingFactor(block, trip_count);
829 uint32_t chunk = vector_length_ * unroll;
830
831 // A cleanup loop is needed, at least, for any unknown trip count or
832 // for a known trip count with remainder iterations after vectorization.
833 bool needs_cleanup = trip_count == 0 || (trip_count % chunk) != 0;
Aart Bikf8f5a162017-02-06 15:35:29 -0800834
835 // Adjust vector bookkeeping.
Aart Bikb29f6842017-07-28 15:58:41 -0700836 HPhi* main_phi = nullptr;
837 bool is_simple_loop_header = TrySetSimpleLoopHeader(header, &main_phi); // refills sets
Aart Bikf8f5a162017-02-06 15:35:29 -0800838 DCHECK(is_simple_loop_header);
Aart Bik14a68b42017-06-08 14:06:58 -0700839 vector_header_ = header;
840 vector_body_ = block;
Aart Bikf8f5a162017-02-06 15:35:29 -0800841
Aart Bikdbbac8f2017-09-01 13:06:08 -0700842 // Loop induction type.
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100843 DataType::Type induc_type = main_phi->GetType();
844 DCHECK(induc_type == DataType::Type::kInt32 || induc_type == DataType::Type::kInt64)
845 << induc_type;
Aart Bikdbbac8f2017-09-01 13:06:08 -0700846
Aart Bikb29f6842017-07-28 15:58:41 -0700847 // Generate dynamic loop peeling trip count, if needed, under the assumption
848 // that the Android runtime guarantees at least "component size" alignment:
849 // ptc = (ALIGN - (&a[initial] % ALIGN)) / type-size
Aart Bik14a68b42017-06-08 14:06:58 -0700850 HInstruction* ptc = nullptr;
851 if (vector_peeling_candidate_ != nullptr) {
852 DCHECK_LT(vector_length_, trip_count) << "dynamic peeling currently requires known trip count";
853 //
854 // TODO: Implement this. Compute address of first access memory location and
855 // compute peeling factor to obtain kAlignedBase alignment.
856 //
857 needs_cleanup = true;
858 }
859
860 // Generate loop control:
Aart Bikf8f5a162017-02-06 15:35:29 -0800861 // stc = <trip-count>;
Aart Bik14a68b42017-06-08 14:06:58 -0700862 // vtc = stc - (stc - ptc) % chunk;
863 // i = 0;
Aart Bikf8f5a162017-02-06 15:35:29 -0800864 HInstruction* stc = induction_range_.GenerateTripCount(node->loop_info, graph_, preheader);
865 HInstruction* vtc = stc;
866 if (needs_cleanup) {
Aart Bik14a68b42017-06-08 14:06:58 -0700867 DCHECK(IsPowerOfTwo(chunk));
868 HInstruction* diff = stc;
869 if (ptc != nullptr) {
870 diff = Insert(preheader, new (global_allocator_) HSub(induc_type, stc, ptc));
871 }
Aart Bikf8f5a162017-02-06 15:35:29 -0800872 HInstruction* rem = Insert(
873 preheader, new (global_allocator_) HAnd(induc_type,
Aart Bik14a68b42017-06-08 14:06:58 -0700874 diff,
Aart Bikdbbac8f2017-09-01 13:06:08 -0700875 graph_->GetConstant(induc_type, chunk - 1)));
Aart Bikf8f5a162017-02-06 15:35:29 -0800876 vtc = Insert(preheader, new (global_allocator_) HSub(induc_type, stc, rem));
877 }
Aart Bikdbbac8f2017-09-01 13:06:08 -0700878 vector_index_ = graph_->GetConstant(induc_type, 0);
Aart Bikf8f5a162017-02-06 15:35:29 -0800879
880 // Generate runtime disambiguation test:
881 // vtc = a != b ? vtc : 0;
882 if (vector_runtime_test_a_ != nullptr) {
883 HInstruction* rt = Insert(
884 preheader,
885 new (global_allocator_) HNotEqual(vector_runtime_test_a_, vector_runtime_test_b_));
886 vtc = Insert(preheader,
Aart Bikdbbac8f2017-09-01 13:06:08 -0700887 new (global_allocator_)
888 HSelect(rt, vtc, graph_->GetConstant(induc_type, 0), kNoDexPc));
Aart Bikf8f5a162017-02-06 15:35:29 -0800889 needs_cleanup = true;
890 }
891
Aart Bik14a68b42017-06-08 14:06:58 -0700892 // Generate dynamic peeling loop for alignment, if needed:
893 // for ( ; i < ptc; i += 1)
894 // <loop-body>
895 if (ptc != nullptr) {
896 vector_mode_ = kSequential;
897 GenerateNewLoop(node,
898 block,
899 graph_->TransformLoopForVectorization(vector_header_, vector_body_, exit),
900 vector_index_,
901 ptc,
Aart Bikdbbac8f2017-09-01 13:06:08 -0700902 graph_->GetConstant(induc_type, 1),
Aart Bik521b50f2017-09-09 10:44:45 -0700903 kNoUnrollingFactor);
Aart Bik14a68b42017-06-08 14:06:58 -0700904 }
905
906 // Generate vector loop, possibly further unrolled:
907 // for ( ; i < vtc; i += chunk)
Aart Bikf8f5a162017-02-06 15:35:29 -0800908 // <vectorized-loop-body>
909 vector_mode_ = kVector;
910 GenerateNewLoop(node,
911 block,
Aart Bik14a68b42017-06-08 14:06:58 -0700912 graph_->TransformLoopForVectorization(vector_header_, vector_body_, exit),
913 vector_index_,
Aart Bikf8f5a162017-02-06 15:35:29 -0800914 vtc,
Aart Bikdbbac8f2017-09-01 13:06:08 -0700915 graph_->GetConstant(induc_type, vector_length_), // increment per unroll
Aart Bik14a68b42017-06-08 14:06:58 -0700916 unroll);
Aart Bikf8f5a162017-02-06 15:35:29 -0800917 HLoopInformation* vloop = vector_header_->GetLoopInformation();
918
919 // Generate cleanup loop, if needed:
920 // for ( ; i < stc; i += 1)
921 // <loop-body>
922 if (needs_cleanup) {
923 vector_mode_ = kSequential;
924 GenerateNewLoop(node,
925 block,
926 graph_->TransformLoopForVectorization(vector_header_, vector_body_, exit),
Aart Bik14a68b42017-06-08 14:06:58 -0700927 vector_index_,
Aart Bikf8f5a162017-02-06 15:35:29 -0800928 stc,
Aart Bikdbbac8f2017-09-01 13:06:08 -0700929 graph_->GetConstant(induc_type, 1),
Aart Bik521b50f2017-09-09 10:44:45 -0700930 kNoUnrollingFactor);
Aart Bikf8f5a162017-02-06 15:35:29 -0800931 }
932
Aart Bik0148de42017-09-05 09:25:01 -0700933 // Link reductions to their final uses.
934 for (auto i = reductions_->begin(); i != reductions_->end(); ++i) {
935 if (i->first->IsPhi()) {
Aart Bikdbbac8f2017-09-01 13:06:08 -0700936 HInstruction* phi = i->first;
937 HInstruction* repl = ReduceAndExtractIfNeeded(i->second);
938 // Deal with regular uses.
939 for (const HUseListNode<HInstruction*>& use : phi->GetUses()) {
940 induction_range_.Replace(use.GetUser(), phi, repl); // update induction use
941 }
942 phi->ReplaceWith(repl);
Aart Bik0148de42017-09-05 09:25:01 -0700943 }
944 }
945
Aart Bikf8f5a162017-02-06 15:35:29 -0800946 // Remove the original loop by disconnecting the body block
947 // and removing all instructions from the header.
948 block->DisconnectAndDelete();
949 while (!header->GetFirstInstruction()->IsGoto()) {
950 header->RemoveInstruction(header->GetFirstInstruction());
951 }
Aart Bikb29f6842017-07-28 15:58:41 -0700952
Aart Bik14a68b42017-06-08 14:06:58 -0700953 // Update loop hierarchy: the old header now resides in the same outer loop
954 // as the old preheader. Note that we don't bother putting sequential
955 // loops back in the hierarchy at this point.
Aart Bikf8f5a162017-02-06 15:35:29 -0800956 header->SetLoopInformation(preheader->GetLoopInformation()); // outward
957 node->loop_info = vloop;
958}
959
960void HLoopOptimization::GenerateNewLoop(LoopNode* node,
961 HBasicBlock* block,
962 HBasicBlock* new_preheader,
963 HInstruction* lo,
964 HInstruction* hi,
Aart Bik14a68b42017-06-08 14:06:58 -0700965 HInstruction* step,
966 uint32_t unroll) {
967 DCHECK(unroll == 1 || vector_mode_ == kVector);
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100968 DataType::Type induc_type = lo->GetType();
Aart Bikf8f5a162017-02-06 15:35:29 -0800969 // Prepare new loop.
Aart Bikf8f5a162017-02-06 15:35:29 -0800970 vector_preheader_ = new_preheader,
971 vector_header_ = vector_preheader_->GetSingleSuccessor();
972 vector_body_ = vector_header_->GetSuccessors()[1];
Aart Bik14a68b42017-06-08 14:06:58 -0700973 HPhi* phi = new (global_allocator_) HPhi(global_allocator_,
974 kNoRegNumber,
975 0,
976 HPhi::ToPhiType(induc_type));
Aart Bikb07d1bc2017-04-05 10:03:15 -0700977 // Generate header and prepare body.
Aart Bikf8f5a162017-02-06 15:35:29 -0800978 // for (i = lo; i < hi; i += step)
979 // <loop-body>
Aart Bik14a68b42017-06-08 14:06:58 -0700980 HInstruction* cond = new (global_allocator_) HAboveOrEqual(phi, hi);
981 vector_header_->AddPhi(phi);
Aart Bikf8f5a162017-02-06 15:35:29 -0800982 vector_header_->AddInstruction(cond);
983 vector_header_->AddInstruction(new (global_allocator_) HIf(cond));
Aart Bik14a68b42017-06-08 14:06:58 -0700984 vector_index_ = phi;
Aart Bik0148de42017-09-05 09:25:01 -0700985 vector_permanent_map_->clear(); // preserved over unrolling
Aart Bik14a68b42017-06-08 14:06:58 -0700986 for (uint32_t u = 0; u < unroll; u++) {
Aart Bik14a68b42017-06-08 14:06:58 -0700987 // Generate instruction map.
Aart Bik0148de42017-09-05 09:25:01 -0700988 vector_map_->clear();
Aart Bik14a68b42017-06-08 14:06:58 -0700989 for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
990 bool vectorized_def = VectorizeDef(node, it.Current(), /*generate_code*/ true);
991 DCHECK(vectorized_def);
992 }
993 // Generate body from the instruction map, but in original program order.
994 HEnvironment* env = vector_header_->GetFirstInstruction()->GetEnvironment();
995 for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
996 auto i = vector_map_->find(it.Current());
997 if (i != vector_map_->end() && !i->second->IsInBlock()) {
998 Insert(vector_body_, i->second);
999 // Deal with instructions that need an environment, such as the scalar intrinsics.
1000 if (i->second->NeedsEnvironment()) {
1001 i->second->CopyEnvironmentFromWithLoopPhiAdjustment(env, vector_header_);
1002 }
1003 }
1004 }
Aart Bik0148de42017-09-05 09:25:01 -07001005 // Generate the induction.
Aart Bik14a68b42017-06-08 14:06:58 -07001006 vector_index_ = new (global_allocator_) HAdd(induc_type, vector_index_, step);
1007 Insert(vector_body_, vector_index_);
Aart Bikf8f5a162017-02-06 15:35:29 -08001008 }
Aart Bik0148de42017-09-05 09:25:01 -07001009 // Finalize phi inputs for the reductions (if any).
1010 for (auto i = reductions_->begin(); i != reductions_->end(); ++i) {
1011 if (!i->first->IsPhi()) {
1012 DCHECK(i->second->IsPhi());
1013 GenerateVecReductionPhiInputs(i->second->AsPhi(), i->first);
1014 }
1015 }
Aart Bikb29f6842017-07-28 15:58:41 -07001016 // Finalize phi inputs for the loop index.
Aart Bik14a68b42017-06-08 14:06:58 -07001017 phi->AddInput(lo);
1018 phi->AddInput(vector_index_);
1019 vector_index_ = phi;
Aart Bikf8f5a162017-02-06 15:35:29 -08001020}
1021
Aart Bikf8f5a162017-02-06 15:35:29 -08001022bool HLoopOptimization::VectorizeDef(LoopNode* node,
1023 HInstruction* instruction,
1024 bool generate_code) {
1025 // Accept a left-hand-side array base[index] for
1026 // (1) supported vector type,
1027 // (2) loop-invariant base,
1028 // (3) unit stride index,
1029 // (4) vectorizable right-hand-side value.
1030 uint64_t restrictions = kNone;
1031 if (instruction->IsArraySet()) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001032 DataType::Type type = instruction->AsArraySet()->GetComponentType();
Aart Bikf8f5a162017-02-06 15:35:29 -08001033 HInstruction* base = instruction->InputAt(0);
1034 HInstruction* index = instruction->InputAt(1);
1035 HInstruction* value = instruction->InputAt(2);
1036 HInstruction* offset = nullptr;
1037 if (TrySetVectorType(type, &restrictions) &&
1038 node->loop_info->IsDefinedOutOfTheLoop(base) &&
Aart Bik37dc4df2017-06-28 14:08:00 -07001039 induction_range_.IsUnitStride(instruction, index, graph_, &offset) &&
Aart Bikf8f5a162017-02-06 15:35:29 -08001040 VectorizeUse(node, value, generate_code, type, restrictions)) {
1041 if (generate_code) {
1042 GenerateVecSub(index, offset);
Aart Bik14a68b42017-06-08 14:06:58 -07001043 GenerateVecMem(instruction, vector_map_->Get(index), vector_map_->Get(value), offset, type);
Aart Bikf8f5a162017-02-06 15:35:29 -08001044 } else {
1045 vector_refs_->insert(ArrayReference(base, offset, type, /*lhs*/ true));
1046 }
Aart Bik6b69e0a2017-01-11 10:20:43 -08001047 return true;
1048 }
Aart Bikf8f5a162017-02-06 15:35:29 -08001049 return false;
1050 }
Aart Bik0148de42017-09-05 09:25:01 -07001051 // Accept a left-hand-side reduction for
1052 // (1) supported vector type,
1053 // (2) vectorizable right-hand-side value.
1054 auto redit = reductions_->find(instruction);
1055 if (redit != reductions_->end()) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001056 DataType::Type type = instruction->GetType();
Aart Bikdbbac8f2017-09-01 13:06:08 -07001057 // Recognize SAD idiom or direct reduction.
1058 if (VectorizeSADIdiom(node, instruction, generate_code, type, restrictions) ||
1059 (TrySetVectorType(type, &restrictions) &&
1060 VectorizeUse(node, instruction, generate_code, type, restrictions))) {
Aart Bik0148de42017-09-05 09:25:01 -07001061 if (generate_code) {
1062 HInstruction* new_red = vector_map_->Get(instruction);
1063 vector_permanent_map_->Put(new_red, vector_map_->Get(redit->second));
1064 vector_permanent_map_->Overwrite(redit->second, new_red);
1065 }
1066 return true;
1067 }
1068 return false;
1069 }
Aart Bikf8f5a162017-02-06 15:35:29 -08001070 // Branch back okay.
1071 if (instruction->IsGoto()) {
1072 return true;
1073 }
1074 // Otherwise accept only expressions with no effects outside the immediate loop-body.
1075 // Note that actual uses are inspected during right-hand-side tree traversal.
1076 return !IsUsedOutsideLoop(node->loop_info, instruction) && !instruction->DoesAnyWrite();
1077}
1078
Aart Bik304c8a52017-05-23 11:01:13 -07001079// TODO: saturation arithmetic.
Aart Bikf8f5a162017-02-06 15:35:29 -08001080bool HLoopOptimization::VectorizeUse(LoopNode* node,
1081 HInstruction* instruction,
1082 bool generate_code,
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001083 DataType::Type type,
Aart Bikf8f5a162017-02-06 15:35:29 -08001084 uint64_t restrictions) {
1085 // Accept anything for which code has already been generated.
1086 if (generate_code) {
1087 if (vector_map_->find(instruction) != vector_map_->end()) {
1088 return true;
1089 }
1090 }
1091 // Continue the right-hand-side tree traversal, passing in proper
1092 // types and vector restrictions along the way. During code generation,
1093 // all new nodes are drawn from the global allocator.
1094 if (node->loop_info->IsDefinedOutOfTheLoop(instruction)) {
1095 // Accept invariant use, using scalar expansion.
1096 if (generate_code) {
1097 GenerateVecInv(instruction, type);
1098 }
1099 return true;
1100 } else if (instruction->IsArrayGet()) {
Goran Jakovljevic19680d32017-05-11 10:38:36 +02001101 // Deal with vector restrictions.
Vladimir Markod5d2f2c2017-09-26 12:37:26 +01001102 bool is_string_char_at = instruction->AsArrayGet()->IsStringCharAt();
1103 if (is_string_char_at && HasVectorRestrictions(restrictions, kNoStringCharAt)) {
Goran Jakovljevic19680d32017-05-11 10:38:36 +02001104 return false;
1105 }
Aart Bikf8f5a162017-02-06 15:35:29 -08001106 // Accept a right-hand-side array base[index] for
Vladimir Markod5d2f2c2017-09-26 12:37:26 +01001107 // (1) matching vector type (exact match or signed/unsigned integral type of the same size),
Aart Bikf8f5a162017-02-06 15:35:29 -08001108 // (2) loop-invariant base,
1109 // (3) unit stride index,
1110 // (4) vectorizable right-hand-side value.
1111 HInstruction* base = instruction->InputAt(0);
1112 HInstruction* index = instruction->InputAt(1);
1113 HInstruction* offset = nullptr;
Aart Bik46b6dbc2017-10-03 11:37:37 -07001114 if (HVecOperation::ToSignedType(type) == HVecOperation::ToSignedType(instruction->GetType()) &&
Aart Bikf8f5a162017-02-06 15:35:29 -08001115 node->loop_info->IsDefinedOutOfTheLoop(base) &&
Aart Bik37dc4df2017-06-28 14:08:00 -07001116 induction_range_.IsUnitStride(instruction, index, graph_, &offset)) {
Aart Bikf8f5a162017-02-06 15:35:29 -08001117 if (generate_code) {
1118 GenerateVecSub(index, offset);
Aart Bik14a68b42017-06-08 14:06:58 -07001119 GenerateVecMem(instruction, vector_map_->Get(index), nullptr, offset, type);
Aart Bikf8f5a162017-02-06 15:35:29 -08001120 } else {
1121 vector_refs_->insert(ArrayReference(base, offset, type, /*lhs*/ false));
1122 }
1123 return true;
1124 }
Aart Bik0148de42017-09-05 09:25:01 -07001125 } else if (instruction->IsPhi()) {
1126 // Accept particular phi operations.
1127 if (reductions_->find(instruction) != reductions_->end()) {
1128 // Deal with vector restrictions.
1129 if (HasVectorRestrictions(restrictions, kNoReduction)) {
1130 return false;
1131 }
1132 // Accept a reduction.
1133 if (generate_code) {
1134 GenerateVecReductionPhi(instruction->AsPhi());
1135 }
1136 return true;
1137 }
1138 // TODO: accept right-hand-side induction?
1139 return false;
Aart Bikf8f5a162017-02-06 15:35:29 -08001140 } else if (instruction->IsTypeConversion()) {
1141 // Accept particular type conversions.
1142 HTypeConversion* conversion = instruction->AsTypeConversion();
1143 HInstruction* opa = conversion->InputAt(0);
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001144 DataType::Type from = conversion->GetInputType();
1145 DataType::Type to = conversion->GetResultType();
1146 if (DataType::IsIntegralType(from) && DataType::IsIntegralType(to)) {
1147 size_t size_vec = DataType::Size(type);
1148 size_t size_from = DataType::Size(from);
1149 size_t size_to = DataType::Size(to);
Aart Bikdbbac8f2017-09-01 13:06:08 -07001150 // Accept an integral conversion
1151 // (1a) narrowing into vector type, "wider" operations cannot bring in higher order bits, or
1152 // (1b) widening from at least vector type, and
1153 // (2) vectorizable operand.
1154 if ((size_to < size_from &&
1155 size_to == size_vec &&
1156 VectorizeUse(node, opa, generate_code, type, restrictions | kNoHiBits)) ||
1157 (size_to >= size_from &&
1158 size_from >= size_vec &&
Aart Bik4d1a9d42017-10-19 14:40:55 -07001159 VectorizeUse(node, opa, generate_code, type, restrictions))) {
Aart Bikf8f5a162017-02-06 15:35:29 -08001160 if (generate_code) {
1161 if (vector_mode_ == kVector) {
1162 vector_map_->Put(instruction, vector_map_->Get(opa)); // operand pass-through
1163 } else {
1164 GenerateVecOp(instruction, vector_map_->Get(opa), nullptr, type);
1165 }
1166 }
1167 return true;
1168 }
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001169 } else if (to == DataType::Type::kFloat32 && from == DataType::Type::kInt32) {
Aart Bikf8f5a162017-02-06 15:35:29 -08001170 DCHECK_EQ(to, type);
1171 // Accept int to float conversion for
1172 // (1) supported int,
1173 // (2) vectorizable operand.
1174 if (TrySetVectorType(from, &restrictions) &&
1175 VectorizeUse(node, opa, generate_code, from, restrictions)) {
1176 if (generate_code) {
1177 GenerateVecOp(instruction, vector_map_->Get(opa), nullptr, type);
1178 }
1179 return true;
1180 }
1181 }
1182 return false;
1183 } else if (instruction->IsNeg() || instruction->IsNot() || instruction->IsBooleanNot()) {
1184 // Accept unary operator for vectorizable operand.
1185 HInstruction* opa = instruction->InputAt(0);
1186 if (VectorizeUse(node, opa, generate_code, type, restrictions)) {
1187 if (generate_code) {
1188 GenerateVecOp(instruction, vector_map_->Get(opa), nullptr, type);
1189 }
1190 return true;
1191 }
1192 } else if (instruction->IsAdd() || instruction->IsSub() ||
1193 instruction->IsMul() || instruction->IsDiv() ||
1194 instruction->IsAnd() || instruction->IsOr() || instruction->IsXor()) {
1195 // Deal with vector restrictions.
1196 if ((instruction->IsMul() && HasVectorRestrictions(restrictions, kNoMul)) ||
1197 (instruction->IsDiv() && HasVectorRestrictions(restrictions, kNoDiv))) {
1198 return false;
1199 }
1200 // Accept binary operator for vectorizable operands.
1201 HInstruction* opa = instruction->InputAt(0);
1202 HInstruction* opb = instruction->InputAt(1);
1203 if (VectorizeUse(node, opa, generate_code, type, restrictions) &&
1204 VectorizeUse(node, opb, generate_code, type, restrictions)) {
1205 if (generate_code) {
1206 GenerateVecOp(instruction, vector_map_->Get(opa), vector_map_->Get(opb), type);
1207 }
1208 return true;
1209 }
1210 } else if (instruction->IsShl() || instruction->IsShr() || instruction->IsUShr()) {
Aart Bikdbbac8f2017-09-01 13:06:08 -07001211 // Recognize halving add idiom.
Aart Bikf3e61ee2017-04-12 17:09:20 -07001212 if (VectorizeHalvingAddIdiom(node, instruction, generate_code, type, restrictions)) {
1213 return true;
1214 }
Aart Bikf8f5a162017-02-06 15:35:29 -08001215 // Deal with vector restrictions.
Aart Bik304c8a52017-05-23 11:01:13 -07001216 HInstruction* opa = instruction->InputAt(0);
1217 HInstruction* opb = instruction->InputAt(1);
1218 HInstruction* r = opa;
1219 bool is_unsigned = false;
Aart Bikf8f5a162017-02-06 15:35:29 -08001220 if ((HasVectorRestrictions(restrictions, kNoShift)) ||
1221 (instruction->IsShr() && HasVectorRestrictions(restrictions, kNoShr))) {
1222 return false; // unsupported instruction
Aart Bik304c8a52017-05-23 11:01:13 -07001223 } else if (HasVectorRestrictions(restrictions, kNoHiBits)) {
1224 // Shifts right need extra care to account for higher order bits.
1225 // TODO: less likely shr/unsigned and ushr/signed can by flipping signess.
1226 if (instruction->IsShr() &&
1227 (!IsNarrowerOperand(opa, type, &r, &is_unsigned) || is_unsigned)) {
1228 return false; // reject, unless all operands are sign-extension narrower
1229 } else if (instruction->IsUShr() &&
1230 (!IsNarrowerOperand(opa, type, &r, &is_unsigned) || !is_unsigned)) {
1231 return false; // reject, unless all operands are zero-extension narrower
1232 }
Aart Bikf8f5a162017-02-06 15:35:29 -08001233 }
1234 // Accept shift operator for vectorizable/invariant operands.
1235 // TODO: accept symbolic, albeit loop invariant shift factors.
Aart Bik304c8a52017-05-23 11:01:13 -07001236 DCHECK(r != nullptr);
1237 if (generate_code && vector_mode_ != kVector) { // de-idiom
1238 r = opa;
1239 }
Aart Bik50e20d52017-05-05 14:07:29 -07001240 int64_t distance = 0;
Aart Bik304c8a52017-05-23 11:01:13 -07001241 if (VectorizeUse(node, r, generate_code, type, restrictions) &&
Aart Bik50e20d52017-05-05 14:07:29 -07001242 IsInt64AndGet(opb, /*out*/ &distance)) {
Aart Bik65ffd8e2017-05-01 16:50:45 -07001243 // Restrict shift distance to packed data type width.
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001244 int64_t max_distance = DataType::Size(type) * 8;
Aart Bik65ffd8e2017-05-01 16:50:45 -07001245 if (0 <= distance && distance < max_distance) {
1246 if (generate_code) {
Aart Bik304c8a52017-05-23 11:01:13 -07001247 GenerateVecOp(instruction, vector_map_->Get(r), opb, type);
Aart Bik65ffd8e2017-05-01 16:50:45 -07001248 }
1249 return true;
Aart Bikf8f5a162017-02-06 15:35:29 -08001250 }
Aart Bikf8f5a162017-02-06 15:35:29 -08001251 }
1252 } else if (instruction->IsInvokeStaticOrDirect()) {
Aart Bik6daebeb2017-04-03 14:35:41 -07001253 // Accept particular intrinsics.
1254 HInvokeStaticOrDirect* invoke = instruction->AsInvokeStaticOrDirect();
1255 switch (invoke->GetIntrinsic()) {
1256 case Intrinsics::kMathAbsInt:
1257 case Intrinsics::kMathAbsLong:
1258 case Intrinsics::kMathAbsFloat:
1259 case Intrinsics::kMathAbsDouble: {
1260 // Deal with vector restrictions.
Aart Bik304c8a52017-05-23 11:01:13 -07001261 HInstruction* opa = instruction->InputAt(0);
1262 HInstruction* r = opa;
1263 bool is_unsigned = false;
1264 if (HasVectorRestrictions(restrictions, kNoAbs)) {
Aart Bik6daebeb2017-04-03 14:35:41 -07001265 return false;
Aart Bik304c8a52017-05-23 11:01:13 -07001266 } else if (HasVectorRestrictions(restrictions, kNoHiBits) &&
1267 (!IsNarrowerOperand(opa, type, &r, &is_unsigned) || is_unsigned)) {
1268 return false; // reject, unless operand is sign-extension narrower
Aart Bik6daebeb2017-04-03 14:35:41 -07001269 }
1270 // Accept ABS(x) for vectorizable operand.
Aart Bik304c8a52017-05-23 11:01:13 -07001271 DCHECK(r != nullptr);
1272 if (generate_code && vector_mode_ != kVector) { // de-idiom
1273 r = opa;
1274 }
1275 if (VectorizeUse(node, r, generate_code, type, restrictions)) {
Aart Bik6daebeb2017-04-03 14:35:41 -07001276 if (generate_code) {
Vladimir Markod5d2f2c2017-09-26 12:37:26 +01001277 NormalizePackedType(&type, &is_unsigned);
Aart Bik304c8a52017-05-23 11:01:13 -07001278 GenerateVecOp(instruction, vector_map_->Get(r), nullptr, type);
Aart Bik6daebeb2017-04-03 14:35:41 -07001279 }
1280 return true;
1281 }
1282 return false;
1283 }
Aart Bikc8e93c72017-05-10 10:49:22 -07001284 case Intrinsics::kMathMinIntInt:
1285 case Intrinsics::kMathMinLongLong:
1286 case Intrinsics::kMathMinFloatFloat:
1287 case Intrinsics::kMathMinDoubleDouble:
1288 case Intrinsics::kMathMaxIntInt:
1289 case Intrinsics::kMathMaxLongLong:
1290 case Intrinsics::kMathMaxFloatFloat:
1291 case Intrinsics::kMathMaxDoubleDouble: {
1292 // Deal with vector restrictions.
Nicolas Geoffray92316902017-05-23 08:06:07 +00001293 HInstruction* opa = instruction->InputAt(0);
1294 HInstruction* opb = instruction->InputAt(1);
Aart Bik304c8a52017-05-23 11:01:13 -07001295 HInstruction* r = opa;
1296 HInstruction* s = opb;
1297 bool is_unsigned = false;
1298 if (HasVectorRestrictions(restrictions, kNoMinMax)) {
1299 return false;
1300 } else if (HasVectorRestrictions(restrictions, kNoHiBits) &&
1301 !IsNarrowerOperands(opa, opb, type, &r, &s, &is_unsigned)) {
1302 return false; // reject, unless all operands are same-extension narrower
1303 }
1304 // Accept MIN/MAX(x, y) for vectorizable operands.
Aart Bikdbbac8f2017-09-01 13:06:08 -07001305 DCHECK(r != nullptr);
1306 DCHECK(s != nullptr);
Aart Bik304c8a52017-05-23 11:01:13 -07001307 if (generate_code && vector_mode_ != kVector) { // de-idiom
1308 r = opa;
1309 s = opb;
1310 }
1311 if (VectorizeUse(node, r, generate_code, type, restrictions) &&
1312 VectorizeUse(node, s, generate_code, type, restrictions)) {
Aart Bikc8e93c72017-05-10 10:49:22 -07001313 if (generate_code) {
Aart Bik304c8a52017-05-23 11:01:13 -07001314 GenerateVecOp(
1315 instruction, vector_map_->Get(r), vector_map_->Get(s), type, is_unsigned);
Aart Bikc8e93c72017-05-10 10:49:22 -07001316 }
1317 return true;
1318 }
1319 return false;
1320 }
Aart Bik6daebeb2017-04-03 14:35:41 -07001321 default:
1322 return false;
1323 } // switch
Aart Bik281c6812016-08-26 11:31:48 -07001324 }
Aart Bik6b69e0a2017-01-11 10:20:43 -08001325 return false;
Aart Bik281c6812016-08-26 11:31:48 -07001326}
1327
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001328bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrictions) {
Aart Bikf8f5a162017-02-06 15:35:29 -08001329 const InstructionSetFeatures* features = compiler_driver_->GetInstructionSetFeatures();
1330 switch (compiler_driver_->GetInstructionSet()) {
1331 case kArm:
1332 case kThumb2:
Artem Serov8f7c4102017-06-21 11:21:37 +01001333 // Allow vectorization for all ARM devices, because Android assumes that
Aart Bikb29f6842017-07-28 15:58:41 -07001334 // ARM 32-bit always supports advanced SIMD (64-bit SIMD).
Artem Serov8f7c4102017-06-21 11:21:37 +01001335 switch (type) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001336 case DataType::Type::kBool:
Vladimir Markod5d2f2c2017-09-26 12:37:26 +01001337 case DataType::Type::kUint8:
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001338 case DataType::Type::kInt8:
Aart Bik0148de42017-09-05 09:25:01 -07001339 *restrictions |= kNoDiv | kNoReduction;
Artem Serov8f7c4102017-06-21 11:21:37 +01001340 return TrySetVectorLength(8);
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001341 case DataType::Type::kUint16:
1342 case DataType::Type::kInt16:
Aart Bik0148de42017-09-05 09:25:01 -07001343 *restrictions |= kNoDiv | kNoStringCharAt | kNoReduction;
Artem Serov8f7c4102017-06-21 11:21:37 +01001344 return TrySetVectorLength(4);
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001345 case DataType::Type::kInt32:
Artem Serov6e9b1372017-10-05 16:48:30 +01001346 *restrictions |= kNoDiv | kNoWideSAD;
Artem Serov8f7c4102017-06-21 11:21:37 +01001347 return TrySetVectorLength(2);
1348 default:
1349 break;
1350 }
Aart Bikf8f5a162017-02-06 15:35:29 -08001351 return false;
1352 case kArm64:
1353 // Allow vectorization for all ARM devices, because Android assumes that
Aart Bikb29f6842017-07-28 15:58:41 -07001354 // ARMv8 AArch64 always supports advanced SIMD (128-bit SIMD).
Aart Bikf8f5a162017-02-06 15:35:29 -08001355 switch (type) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001356 case DataType::Type::kBool:
Vladimir Markod5d2f2c2017-09-26 12:37:26 +01001357 case DataType::Type::kUint8:
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001358 case DataType::Type::kInt8:
Aart Bikdbbac8f2017-09-01 13:06:08 -07001359 *restrictions |= kNoDiv;
Artem Serovd4bccf12017-04-03 18:47:32 +01001360 return TrySetVectorLength(16);
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001361 case DataType::Type::kUint16:
1362 case DataType::Type::kInt16:
Aart Bikdbbac8f2017-09-01 13:06:08 -07001363 *restrictions |= kNoDiv;
Artem Serovd4bccf12017-04-03 18:47:32 +01001364 return TrySetVectorLength(8);
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001365 case DataType::Type::kInt32:
Aart Bikf8f5a162017-02-06 15:35:29 -08001366 *restrictions |= kNoDiv;
Artem Serovd4bccf12017-04-03 18:47:32 +01001367 return TrySetVectorLength(4);
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001368 case DataType::Type::kInt64:
Aart Bikc8e93c72017-05-10 10:49:22 -07001369 *restrictions |= kNoDiv | kNoMul | kNoMinMax;
Aart Bikf8f5a162017-02-06 15:35:29 -08001370 return TrySetVectorLength(2);
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001371 case DataType::Type::kFloat32:
Aart Bik0148de42017-09-05 09:25:01 -07001372 *restrictions |= kNoReduction;
Artem Serovd4bccf12017-04-03 18:47:32 +01001373 return TrySetVectorLength(4);
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001374 case DataType::Type::kFloat64:
Aart Bik0148de42017-09-05 09:25:01 -07001375 *restrictions |= kNoReduction;
Aart Bikf8f5a162017-02-06 15:35:29 -08001376 return TrySetVectorLength(2);
1377 default:
1378 return false;
1379 }
1380 case kX86:
1381 case kX86_64:
Aart Bikb29f6842017-07-28 15:58:41 -07001382 // Allow vectorization for SSE4.1-enabled X86 devices only (128-bit SIMD).
Aart Bikf8f5a162017-02-06 15:35:29 -08001383 if (features->AsX86InstructionSetFeatures()->HasSSE4_1()) {
1384 switch (type) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001385 case DataType::Type::kBool:
Vladimir Markod5d2f2c2017-09-26 12:37:26 +01001386 case DataType::Type::kUint8:
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001387 case DataType::Type::kInt8:
Aart Bik0148de42017-09-05 09:25:01 -07001388 *restrictions |=
Aart Bikdbbac8f2017-09-01 13:06:08 -07001389 kNoMul | kNoDiv | kNoShift | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd | kNoSAD;
Aart Bikf8f5a162017-02-06 15:35:29 -08001390 return TrySetVectorLength(16);
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001391 case DataType::Type::kUint16:
1392 case DataType::Type::kInt16:
Aart Bikdbbac8f2017-09-01 13:06:08 -07001393 *restrictions |= kNoDiv | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd | kNoSAD;
Aart Bikf8f5a162017-02-06 15:35:29 -08001394 return TrySetVectorLength(8);
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001395 case DataType::Type::kInt32:
Aart Bikdbbac8f2017-09-01 13:06:08 -07001396 *restrictions |= kNoDiv | kNoSAD;
Aart Bikf8f5a162017-02-06 15:35:29 -08001397 return TrySetVectorLength(4);
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001398 case DataType::Type::kInt64:
Aart Bikdbbac8f2017-09-01 13:06:08 -07001399 *restrictions |= kNoMul | kNoDiv | kNoShr | kNoAbs | kNoMinMax | kNoSAD;
Aart Bikf8f5a162017-02-06 15:35:29 -08001400 return TrySetVectorLength(2);
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001401 case DataType::Type::kFloat32:
Aart Bik0148de42017-09-05 09:25:01 -07001402 *restrictions |= kNoMinMax | kNoReduction; // minmax: -0.0 vs +0.0
Aart Bikf8f5a162017-02-06 15:35:29 -08001403 return TrySetVectorLength(4);
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001404 case DataType::Type::kFloat64:
Aart Bik0148de42017-09-05 09:25:01 -07001405 *restrictions |= kNoMinMax | kNoReduction; // minmax: -0.0 vs +0.0
Aart Bikf8f5a162017-02-06 15:35:29 -08001406 return TrySetVectorLength(2);
1407 default:
1408 break;
1409 } // switch type
1410 }
1411 return false;
1412 case kMips:
Lena Djokic51765b02017-06-22 13:49:59 +02001413 if (features->AsMipsInstructionSetFeatures()->HasMsa()) {
1414 switch (type) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001415 case DataType::Type::kBool:
Vladimir Markod5d2f2c2017-09-26 12:37:26 +01001416 case DataType::Type::kUint8:
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001417 case DataType::Type::kInt8:
Aart Bikdbbac8f2017-09-01 13:06:08 -07001418 *restrictions |= kNoDiv | kNoReduction | kNoSAD;
Lena Djokic51765b02017-06-22 13:49:59 +02001419 return TrySetVectorLength(16);
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001420 case DataType::Type::kUint16:
1421 case DataType::Type::kInt16:
Aart Bikdbbac8f2017-09-01 13:06:08 -07001422 *restrictions |= kNoDiv | kNoStringCharAt | kNoReduction | kNoSAD;
Lena Djokic51765b02017-06-22 13:49:59 +02001423 return TrySetVectorLength(8);
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001424 case DataType::Type::kInt32:
Lena Djokice434c4f2017-10-23 16:40:22 +02001425 *restrictions |= kNoDiv | kNoSAD;
Lena Djokic51765b02017-06-22 13:49:59 +02001426 return TrySetVectorLength(4);
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001427 case DataType::Type::kInt64:
Lena Djokice434c4f2017-10-23 16:40:22 +02001428 *restrictions |= kNoDiv | kNoSAD;
Lena Djokic51765b02017-06-22 13:49:59 +02001429 return TrySetVectorLength(2);
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001430 case DataType::Type::kFloat32:
Aart Bik0148de42017-09-05 09:25:01 -07001431 *restrictions |= kNoMinMax | kNoReduction; // min/max(x, NaN)
Lena Djokic51765b02017-06-22 13:49:59 +02001432 return TrySetVectorLength(4);
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001433 case DataType::Type::kFloat64:
Aart Bik0148de42017-09-05 09:25:01 -07001434 *restrictions |= kNoMinMax | kNoReduction; // min/max(x, NaN)
Lena Djokic51765b02017-06-22 13:49:59 +02001435 return TrySetVectorLength(2);
1436 default:
1437 break;
1438 } // switch type
1439 }
Aart Bikf8f5a162017-02-06 15:35:29 -08001440 return false;
Goran Jakovljevic19680d32017-05-11 10:38:36 +02001441 case kMips64:
1442 if (features->AsMips64InstructionSetFeatures()->HasMsa()) {
1443 switch (type) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001444 case DataType::Type::kBool:
Vladimir Markod5d2f2c2017-09-26 12:37:26 +01001445 case DataType::Type::kUint8:
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001446 case DataType::Type::kInt8:
Aart Bikdbbac8f2017-09-01 13:06:08 -07001447 *restrictions |= kNoDiv | kNoReduction | kNoSAD;
Goran Jakovljevic19680d32017-05-11 10:38:36 +02001448 return TrySetVectorLength(16);
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001449 case DataType::Type::kUint16:
1450 case DataType::Type::kInt16:
Aart Bikdbbac8f2017-09-01 13:06:08 -07001451 *restrictions |= kNoDiv | kNoStringCharAt | kNoReduction | kNoSAD;
Goran Jakovljevic19680d32017-05-11 10:38:36 +02001452 return TrySetVectorLength(8);
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001453 case DataType::Type::kInt32:
Lena Djokice434c4f2017-10-23 16:40:22 +02001454 *restrictions |= kNoDiv | kNoSAD;
Goran Jakovljevic19680d32017-05-11 10:38:36 +02001455 return TrySetVectorLength(4);
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001456 case DataType::Type::kInt64:
Lena Djokice434c4f2017-10-23 16:40:22 +02001457 *restrictions |= kNoDiv | kNoSAD;
Goran Jakovljevic19680d32017-05-11 10:38:36 +02001458 return TrySetVectorLength(2);
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001459 case DataType::Type::kFloat32:
Aart Bik0148de42017-09-05 09:25:01 -07001460 *restrictions |= kNoMinMax | kNoReduction; // min/max(x, NaN)
Goran Jakovljevic19680d32017-05-11 10:38:36 +02001461 return TrySetVectorLength(4);
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001462 case DataType::Type::kFloat64:
Aart Bik0148de42017-09-05 09:25:01 -07001463 *restrictions |= kNoMinMax | kNoReduction; // min/max(x, NaN)
Goran Jakovljevic19680d32017-05-11 10:38:36 +02001464 return TrySetVectorLength(2);
1465 default:
1466 break;
1467 } // switch type
1468 }
1469 return false;
Aart Bikf8f5a162017-02-06 15:35:29 -08001470 default:
1471 return false;
1472 } // switch instruction set
1473}
1474
1475bool HLoopOptimization::TrySetVectorLength(uint32_t length) {
1476 DCHECK(IsPowerOfTwo(length) && length >= 2u);
1477 // First time set?
1478 if (vector_length_ == 0) {
1479 vector_length_ = length;
1480 }
1481 // Different types are acceptable within a loop-body, as long as all the corresponding vector
1482 // lengths match exactly to obtain a uniform traversal through the vector iteration space
1483 // (idiomatic exceptions to this rule can be handled by further unrolling sub-expressions).
1484 return vector_length_ == length;
1485}
1486
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001487void HLoopOptimization::GenerateVecInv(HInstruction* org, DataType::Type type) {
Aart Bikf8f5a162017-02-06 15:35:29 -08001488 if (vector_map_->find(org) == vector_map_->end()) {
1489 // In scalar code, just use a self pass-through for scalar invariants
1490 // (viz. expression remains itself).
1491 if (vector_mode_ == kSequential) {
1492 vector_map_->Put(org, org);
1493 return;
1494 }
1495 // In vector code, explicit scalar expansion is needed.
Aart Bik0148de42017-09-05 09:25:01 -07001496 HInstruction* vector = nullptr;
1497 auto it = vector_permanent_map_->find(org);
1498 if (it != vector_permanent_map_->end()) {
1499 vector = it->second; // reuse during unrolling
1500 } else {
Aart Bikdbbac8f2017-09-01 13:06:08 -07001501 // Generates ReplicateScalar( (optional_type_conv) org ).
1502 HInstruction* input = org;
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001503 DataType::Type input_type = input->GetType();
1504 if (type != input_type && (type == DataType::Type::kInt64 ||
1505 input_type == DataType::Type::kInt64)) {
Aart Bikdbbac8f2017-09-01 13:06:08 -07001506 input = Insert(vector_preheader_,
1507 new (global_allocator_) HTypeConversion(type, input, kNoDexPc));
1508 }
1509 vector = new (global_allocator_)
Aart Bik46b6dbc2017-10-03 11:37:37 -07001510 HVecReplicateScalar(global_allocator_, input, type, vector_length_, kNoDexPc);
Aart Bik0148de42017-09-05 09:25:01 -07001511 vector_permanent_map_->Put(org, Insert(vector_preheader_, vector));
1512 }
1513 vector_map_->Put(org, vector);
Aart Bikf8f5a162017-02-06 15:35:29 -08001514 }
1515}
1516
1517void HLoopOptimization::GenerateVecSub(HInstruction* org, HInstruction* offset) {
1518 if (vector_map_->find(org) == vector_map_->end()) {
Aart Bik14a68b42017-06-08 14:06:58 -07001519 HInstruction* subscript = vector_index_;
Aart Bik37dc4df2017-06-28 14:08:00 -07001520 int64_t value = 0;
1521 if (!IsInt64AndGet(offset, &value) || value != 0) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001522 subscript = new (global_allocator_) HAdd(DataType::Type::kInt32, subscript, offset);
Aart Bikf8f5a162017-02-06 15:35:29 -08001523 if (org->IsPhi()) {
1524 Insert(vector_body_, subscript); // lacks layout placeholder
1525 }
1526 }
1527 vector_map_->Put(org, subscript);
1528 }
1529}
1530
1531void HLoopOptimization::GenerateVecMem(HInstruction* org,
1532 HInstruction* opa,
1533 HInstruction* opb,
Aart Bik14a68b42017-06-08 14:06:58 -07001534 HInstruction* offset,
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001535 DataType::Type type) {
Aart Bik46b6dbc2017-10-03 11:37:37 -07001536 uint32_t dex_pc = org->GetDexPc();
Aart Bikf8f5a162017-02-06 15:35:29 -08001537 HInstruction* vector = nullptr;
1538 if (vector_mode_ == kVector) {
1539 // Vector store or load.
Aart Bik14a68b42017-06-08 14:06:58 -07001540 HInstruction* base = org->InputAt(0);
Aart Bikf8f5a162017-02-06 15:35:29 -08001541 if (opb != nullptr) {
1542 vector = new (global_allocator_) HVecStore(
Aart Bik46b6dbc2017-10-03 11:37:37 -07001543 global_allocator_, base, opa, opb, type, org->GetSideEffects(), vector_length_, dex_pc);
Aart Bikf8f5a162017-02-06 15:35:29 -08001544 } else {
Aart Bikdb14fcf2017-04-25 15:53:58 -07001545 bool is_string_char_at = org->AsArrayGet()->IsStringCharAt();
Vladimir Markod5d2f2c2017-09-26 12:37:26 +01001546 vector = new (global_allocator_) HVecLoad(global_allocator_,
1547 base,
1548 opa,
1549 type,
1550 org->GetSideEffects(),
1551 vector_length_,
Aart Bik46b6dbc2017-10-03 11:37:37 -07001552 is_string_char_at,
1553 dex_pc);
Aart Bik14a68b42017-06-08 14:06:58 -07001554 }
1555 // Known dynamically enforced alignment?
Aart Bik14a68b42017-06-08 14:06:58 -07001556 if (vector_peeling_candidate_ != nullptr &&
1557 vector_peeling_candidate_->base == base &&
1558 vector_peeling_candidate_->offset == offset) {
1559 vector->AsVecMemoryOperation()->SetAlignment(Alignment(kAlignedBase, 0));
Aart Bikf8f5a162017-02-06 15:35:29 -08001560 }
1561 } else {
1562 // Scalar store or load.
1563 DCHECK(vector_mode_ == kSequential);
1564 if (opb != nullptr) {
Aart Bik4d1a9d42017-10-19 14:40:55 -07001565 DataType::Type component_type = org->AsArraySet()->GetComponentType();
Vladimir Markod5d2f2c2017-09-26 12:37:26 +01001566 vector = new (global_allocator_) HArraySet(
Aart Bik4d1a9d42017-10-19 14:40:55 -07001567 org->InputAt(0), opa, opb, component_type, org->GetSideEffects(), dex_pc);
Aart Bikf8f5a162017-02-06 15:35:29 -08001568 } else {
Aart Bikdb14fcf2017-04-25 15:53:58 -07001569 bool is_string_char_at = org->AsArrayGet()->IsStringCharAt();
1570 vector = new (global_allocator_) HArrayGet(
Aart Bik4d1a9d42017-10-19 14:40:55 -07001571 org->InputAt(0), opa, org->GetType(), org->GetSideEffects(), dex_pc, is_string_char_at);
Aart Bikf8f5a162017-02-06 15:35:29 -08001572 }
1573 }
1574 vector_map_->Put(org, vector);
1575}
1576
Aart Bik0148de42017-09-05 09:25:01 -07001577void HLoopOptimization::GenerateVecReductionPhi(HPhi* phi) {
1578 DCHECK(reductions_->find(phi) != reductions_->end());
1579 DCHECK(reductions_->Get(phi->InputAt(1)) == phi);
1580 HInstruction* vector = nullptr;
1581 if (vector_mode_ == kSequential) {
1582 HPhi* new_phi = new (global_allocator_) HPhi(
1583 global_allocator_, kNoRegNumber, 0, phi->GetType());
1584 vector_header_->AddPhi(new_phi);
1585 vector = new_phi;
1586 } else {
1587 // Link vector reduction back to prior unrolled update, or a first phi.
1588 auto it = vector_permanent_map_->find(phi);
1589 if (it != vector_permanent_map_->end()) {
1590 vector = it->second;
1591 } else {
1592 HPhi* new_phi = new (global_allocator_) HPhi(
1593 global_allocator_, kNoRegNumber, 0, HVecOperation::kSIMDType);
1594 vector_header_->AddPhi(new_phi);
1595 vector = new_phi;
1596 }
1597 }
1598 vector_map_->Put(phi, vector);
1599}
1600
1601void HLoopOptimization::GenerateVecReductionPhiInputs(HPhi* phi, HInstruction* reduction) {
1602 HInstruction* new_phi = vector_map_->Get(phi);
1603 HInstruction* new_init = reductions_->Get(phi);
1604 HInstruction* new_red = vector_map_->Get(reduction);
1605 // Link unrolled vector loop back to new phi.
1606 for (; !new_phi->IsPhi(); new_phi = vector_permanent_map_->Get(new_phi)) {
1607 DCHECK(new_phi->IsVecOperation());
1608 }
1609 // Prepare the new initialization.
1610 if (vector_mode_ == kVector) {
Goran Jakovljevic89b8df02017-10-13 08:33:17 +02001611 // Generate a [initial, 0, .., 0] vector for add or
1612 // a [initial, initial, .., initial] vector for min/max.
Aart Bikdbbac8f2017-09-01 13:06:08 -07001613 HVecOperation* red_vector = new_red->AsVecOperation();
Goran Jakovljevic89b8df02017-10-13 08:33:17 +02001614 HVecReduce::ReductionKind kind = GetReductionKind(red_vector);
Aart Bikdbbac8f2017-09-01 13:06:08 -07001615 size_t vector_length = red_vector->GetVectorLength();
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001616 DataType::Type type = red_vector->GetPackedType();
Goran Jakovljevic89b8df02017-10-13 08:33:17 +02001617 if (kind == HVecReduce::ReductionKind::kSum) {
1618 new_init = Insert(vector_preheader_,
1619 new (global_allocator_) HVecSetScalars(global_allocator_,
1620 &new_init,
1621 type,
1622 vector_length,
1623 1,
1624 kNoDexPc));
1625 } else {
1626 new_init = Insert(vector_preheader_,
1627 new (global_allocator_) HVecReplicateScalar(global_allocator_,
1628 new_init,
1629 type,
1630 vector_length,
1631 kNoDexPc));
1632 }
Aart Bik0148de42017-09-05 09:25:01 -07001633 } else {
1634 new_init = ReduceAndExtractIfNeeded(new_init);
1635 }
1636 // Set the phi inputs.
1637 DCHECK(new_phi->IsPhi());
1638 new_phi->AsPhi()->AddInput(new_init);
1639 new_phi->AsPhi()->AddInput(new_red);
1640 // New feed value for next phi (safe mutation in iteration).
1641 reductions_->find(phi)->second = new_phi;
1642}
1643
1644HInstruction* HLoopOptimization::ReduceAndExtractIfNeeded(HInstruction* instruction) {
1645 if (instruction->IsPhi()) {
1646 HInstruction* input = instruction->InputAt(1);
1647 if (input->IsVecOperation()) {
Aart Bikdbbac8f2017-09-01 13:06:08 -07001648 HVecOperation* input_vector = input->AsVecOperation();
1649 size_t vector_length = input_vector->GetVectorLength();
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001650 DataType::Type type = input_vector->GetPackedType();
Aart Bikdbbac8f2017-09-01 13:06:08 -07001651 HVecReduce::ReductionKind kind = GetReductionKind(input_vector);
Aart Bik0148de42017-09-05 09:25:01 -07001652 HBasicBlock* exit = instruction->GetBlock()->GetSuccessors()[0];
1653 // Generate a vector reduction and scalar extract
1654 // x = REDUCE( [x_1, .., x_n] )
1655 // y = x_1
1656 // along the exit of the defining loop.
Aart Bik0148de42017-09-05 09:25:01 -07001657 HInstruction* reduce = new (global_allocator_) HVecReduce(
Aart Bik46b6dbc2017-10-03 11:37:37 -07001658 global_allocator_, instruction, type, vector_length, kind, kNoDexPc);
Aart Bik0148de42017-09-05 09:25:01 -07001659 exit->InsertInstructionBefore(reduce, exit->GetFirstInstruction());
1660 instruction = new (global_allocator_) HVecExtractScalar(
Aart Bik46b6dbc2017-10-03 11:37:37 -07001661 global_allocator_, reduce, type, vector_length, 0, kNoDexPc);
Aart Bik0148de42017-09-05 09:25:01 -07001662 exit->InsertInstructionAfter(instruction, reduce);
1663 }
1664 }
1665 return instruction;
1666}
1667
Aart Bikf8f5a162017-02-06 15:35:29 -08001668#define GENERATE_VEC(x, y) \
1669 if (vector_mode_ == kVector) { \
1670 vector = (x); \
1671 } else { \
1672 DCHECK(vector_mode_ == kSequential); \
1673 vector = (y); \
1674 } \
1675 break;
1676
1677void HLoopOptimization::GenerateVecOp(HInstruction* org,
1678 HInstruction* opa,
1679 HInstruction* opb,
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001680 DataType::Type type,
Aart Bik304c8a52017-05-23 11:01:13 -07001681 bool is_unsigned) {
Aart Bik46b6dbc2017-10-03 11:37:37 -07001682 uint32_t dex_pc = org->GetDexPc();
Aart Bikf8f5a162017-02-06 15:35:29 -08001683 HInstruction* vector = nullptr;
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001684 DataType::Type org_type = org->GetType();
Aart Bikf8f5a162017-02-06 15:35:29 -08001685 switch (org->GetKind()) {
1686 case HInstruction::kNeg:
1687 DCHECK(opb == nullptr);
1688 GENERATE_VEC(
Aart Bik46b6dbc2017-10-03 11:37:37 -07001689 new (global_allocator_) HVecNeg(global_allocator_, opa, type, vector_length_, dex_pc),
1690 new (global_allocator_) HNeg(org_type, opa, dex_pc));
Aart Bikf8f5a162017-02-06 15:35:29 -08001691 case HInstruction::kNot:
1692 DCHECK(opb == nullptr);
1693 GENERATE_VEC(
Aart Bik46b6dbc2017-10-03 11:37:37 -07001694 new (global_allocator_) HVecNot(global_allocator_, opa, type, vector_length_, dex_pc),
1695 new (global_allocator_) HNot(org_type, opa, dex_pc));
Aart Bikf8f5a162017-02-06 15:35:29 -08001696 case HInstruction::kBooleanNot:
1697 DCHECK(opb == nullptr);
1698 GENERATE_VEC(
Aart Bik46b6dbc2017-10-03 11:37:37 -07001699 new (global_allocator_) HVecNot(global_allocator_, opa, type, vector_length_, dex_pc),
1700 new (global_allocator_) HBooleanNot(opa, dex_pc));
Aart Bikf8f5a162017-02-06 15:35:29 -08001701 case HInstruction::kTypeConversion:
1702 DCHECK(opb == nullptr);
1703 GENERATE_VEC(
Aart Bik46b6dbc2017-10-03 11:37:37 -07001704 new (global_allocator_) HVecCnv(global_allocator_, opa, type, vector_length_, dex_pc),
1705 new (global_allocator_) HTypeConversion(org_type, opa, dex_pc));
Aart Bikf8f5a162017-02-06 15:35:29 -08001706 case HInstruction::kAdd:
1707 GENERATE_VEC(
Aart Bik46b6dbc2017-10-03 11:37:37 -07001708 new (global_allocator_) HVecAdd(global_allocator_, opa, opb, type, vector_length_, dex_pc),
1709 new (global_allocator_) HAdd(org_type, opa, opb, dex_pc));
Aart Bikf8f5a162017-02-06 15:35:29 -08001710 case HInstruction::kSub:
1711 GENERATE_VEC(
Aart Bik46b6dbc2017-10-03 11:37:37 -07001712 new (global_allocator_) HVecSub(global_allocator_, opa, opb, type, vector_length_, dex_pc),
1713 new (global_allocator_) HSub(org_type, opa, opb, dex_pc));
Aart Bikf8f5a162017-02-06 15:35:29 -08001714 case HInstruction::kMul:
1715 GENERATE_VEC(
Aart Bik46b6dbc2017-10-03 11:37:37 -07001716 new (global_allocator_) HVecMul(global_allocator_, opa, opb, type, vector_length_, dex_pc),
1717 new (global_allocator_) HMul(org_type, opa, opb, dex_pc));
Aart Bikf8f5a162017-02-06 15:35:29 -08001718 case HInstruction::kDiv:
1719 GENERATE_VEC(
Aart Bik46b6dbc2017-10-03 11:37:37 -07001720 new (global_allocator_) HVecDiv(global_allocator_, opa, opb, type, vector_length_, dex_pc),
1721 new (global_allocator_) HDiv(org_type, opa, opb, dex_pc));
Aart Bikf8f5a162017-02-06 15:35:29 -08001722 case HInstruction::kAnd:
1723 GENERATE_VEC(
Aart Bik46b6dbc2017-10-03 11:37:37 -07001724 new (global_allocator_) HVecAnd(global_allocator_, opa, opb, type, vector_length_, dex_pc),
1725 new (global_allocator_) HAnd(org_type, opa, opb, dex_pc));
Aart Bikf8f5a162017-02-06 15:35:29 -08001726 case HInstruction::kOr:
1727 GENERATE_VEC(
Aart Bik46b6dbc2017-10-03 11:37:37 -07001728 new (global_allocator_) HVecOr(global_allocator_, opa, opb, type, vector_length_, dex_pc),
1729 new (global_allocator_) HOr(org_type, opa, opb, dex_pc));
Aart Bikf8f5a162017-02-06 15:35:29 -08001730 case HInstruction::kXor:
1731 GENERATE_VEC(
Aart Bik46b6dbc2017-10-03 11:37:37 -07001732 new (global_allocator_) HVecXor(global_allocator_, opa, opb, type, vector_length_, dex_pc),
1733 new (global_allocator_) HXor(org_type, opa, opb, dex_pc));
Aart Bikf8f5a162017-02-06 15:35:29 -08001734 case HInstruction::kShl:
1735 GENERATE_VEC(
Aart Bik46b6dbc2017-10-03 11:37:37 -07001736 new (global_allocator_) HVecShl(global_allocator_, opa, opb, type, vector_length_, dex_pc),
1737 new (global_allocator_) HShl(org_type, opa, opb, dex_pc));
Aart Bikf8f5a162017-02-06 15:35:29 -08001738 case HInstruction::kShr:
1739 GENERATE_VEC(
Aart Bik46b6dbc2017-10-03 11:37:37 -07001740 new (global_allocator_) HVecShr(global_allocator_, opa, opb, type, vector_length_, dex_pc),
1741 new (global_allocator_) HShr(org_type, opa, opb, dex_pc));
Aart Bikf8f5a162017-02-06 15:35:29 -08001742 case HInstruction::kUShr:
1743 GENERATE_VEC(
Aart Bik46b6dbc2017-10-03 11:37:37 -07001744 new (global_allocator_) HVecUShr(global_allocator_, opa, opb, type, vector_length_, dex_pc),
1745 new (global_allocator_) HUShr(org_type, opa, opb, dex_pc));
Aart Bikf8f5a162017-02-06 15:35:29 -08001746 case HInstruction::kInvokeStaticOrDirect: {
Aart Bik6daebeb2017-04-03 14:35:41 -07001747 HInvokeStaticOrDirect* invoke = org->AsInvokeStaticOrDirect();
1748 if (vector_mode_ == kVector) {
1749 switch (invoke->GetIntrinsic()) {
1750 case Intrinsics::kMathAbsInt:
1751 case Intrinsics::kMathAbsLong:
1752 case Intrinsics::kMathAbsFloat:
1753 case Intrinsics::kMathAbsDouble:
1754 DCHECK(opb == nullptr);
Aart Bik46b6dbc2017-10-03 11:37:37 -07001755 vector = new (global_allocator_)
1756 HVecAbs(global_allocator_, opa, type, vector_length_, dex_pc);
Aart Bik6daebeb2017-04-03 14:35:41 -07001757 break;
Aart Bikc8e93c72017-05-10 10:49:22 -07001758 case Intrinsics::kMathMinIntInt:
1759 case Intrinsics::kMathMinLongLong:
1760 case Intrinsics::kMathMinFloatFloat:
1761 case Intrinsics::kMathMinDoubleDouble: {
Vladimir Markod5d2f2c2017-09-26 12:37:26 +01001762 NormalizePackedType(&type, &is_unsigned);
Aart Bikc8e93c72017-05-10 10:49:22 -07001763 vector = new (global_allocator_)
Aart Bik46b6dbc2017-10-03 11:37:37 -07001764 HVecMin(global_allocator_, opa, opb, type, vector_length_, is_unsigned, dex_pc);
Aart Bikc8e93c72017-05-10 10:49:22 -07001765 break;
1766 }
1767 case Intrinsics::kMathMaxIntInt:
1768 case Intrinsics::kMathMaxLongLong:
1769 case Intrinsics::kMathMaxFloatFloat:
1770 case Intrinsics::kMathMaxDoubleDouble: {
Vladimir Markod5d2f2c2017-09-26 12:37:26 +01001771 NormalizePackedType(&type, &is_unsigned);
Aart Bikc8e93c72017-05-10 10:49:22 -07001772 vector = new (global_allocator_)
Aart Bik46b6dbc2017-10-03 11:37:37 -07001773 HVecMax(global_allocator_, opa, opb, type, vector_length_, is_unsigned, dex_pc);
Aart Bikc8e93c72017-05-10 10:49:22 -07001774 break;
1775 }
Aart Bik6daebeb2017-04-03 14:35:41 -07001776 default:
1777 LOG(FATAL) << "Unsupported SIMD intrinsic";
1778 UNREACHABLE();
1779 } // switch invoke
1780 } else {
Aart Bik24b905f2017-04-06 09:59:06 -07001781 // In scalar code, simply clone the method invoke, and replace its operands with the
1782 // corresponding new scalar instructions in the loop. The instruction will get an
1783 // environment while being inserted from the instruction map in original program order.
Aart Bik6daebeb2017-04-03 14:35:41 -07001784 DCHECK(vector_mode_ == kSequential);
Aart Bik6e92fb32017-06-05 14:05:09 -07001785 size_t num_args = invoke->GetNumberOfArguments();
Aart Bik6daebeb2017-04-03 14:35:41 -07001786 HInvokeStaticOrDirect* new_invoke = new (global_allocator_) HInvokeStaticOrDirect(
1787 global_allocator_,
Aart Bik6e92fb32017-06-05 14:05:09 -07001788 num_args,
Aart Bik6daebeb2017-04-03 14:35:41 -07001789 invoke->GetType(),
1790 invoke->GetDexPc(),
1791 invoke->GetDexMethodIndex(),
1792 invoke->GetResolvedMethod(),
1793 invoke->GetDispatchInfo(),
1794 invoke->GetInvokeType(),
1795 invoke->GetTargetMethod(),
1796 invoke->GetClinitCheckRequirement());
1797 HInputsRef inputs = invoke->GetInputs();
Aart Bik6e92fb32017-06-05 14:05:09 -07001798 size_t num_inputs = inputs.size();
1799 DCHECK_LE(num_args, num_inputs);
1800 DCHECK_EQ(num_inputs, new_invoke->GetInputs().size()); // both invokes agree
1801 for (size_t index = 0; index < num_inputs; ++index) {
1802 HInstruction* new_input = index < num_args
1803 ? vector_map_->Get(inputs[index])
1804 : inputs[index]; // beyond arguments: just pass through
1805 new_invoke->SetArgumentAt(index, new_input);
Aart Bik6daebeb2017-04-03 14:35:41 -07001806 }
Aart Bik98990262017-04-10 13:15:57 -07001807 new_invoke->SetIntrinsic(invoke->GetIntrinsic(),
1808 kNeedsEnvironmentOrCache,
1809 kNoSideEffects,
1810 kNoThrow);
Aart Bik6daebeb2017-04-03 14:35:41 -07001811 vector = new_invoke;
1812 }
Aart Bikf8f5a162017-02-06 15:35:29 -08001813 break;
1814 }
1815 default:
1816 break;
1817 } // switch
1818 CHECK(vector != nullptr) << "Unsupported SIMD operator";
1819 vector_map_->Put(org, vector);
1820}
1821
1822#undef GENERATE_VEC
1823
1824//
Aart Bikf3e61ee2017-04-12 17:09:20 -07001825// Vectorization idioms.
1826//
1827
1828// Method recognizes the following idioms:
Aart Bikdbbac8f2017-09-01 13:06:08 -07001829// rounding halving add (a + b + 1) >> 1 for unsigned/signed operands a, b
1830// truncated halving add (a + b) >> 1 for unsigned/signed operands a, b
Aart Bikf3e61ee2017-04-12 17:09:20 -07001831// Provided that the operands are promoted to a wider form to do the arithmetic and
1832// then cast back to narrower form, the idioms can be mapped into efficient SIMD
1833// implementation that operates directly in narrower form (plus one extra bit).
1834// TODO: current version recognizes implicit byte/short/char widening only;
1835// explicit widening from int to long could be added later.
1836bool HLoopOptimization::VectorizeHalvingAddIdiom(LoopNode* node,
1837 HInstruction* instruction,
1838 bool generate_code,
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001839 DataType::Type type,
Aart Bikf3e61ee2017-04-12 17:09:20 -07001840 uint64_t restrictions) {
1841 // Test for top level arithmetic shift right x >> 1 or logical shift right x >>> 1
Aart Bik304c8a52017-05-23 11:01:13 -07001842 // (note whether the sign bit in wider precision is shifted in has no effect
Aart Bikf3e61ee2017-04-12 17:09:20 -07001843 // on the narrow precision computed by the idiom).
Aart Bikf3e61ee2017-04-12 17:09:20 -07001844 if ((instruction->IsShr() ||
1845 instruction->IsUShr()) &&
Aart Bik0148de42017-09-05 09:25:01 -07001846 IsInt64Value(instruction->InputAt(1), 1)) {
Aart Bik5f805002017-05-16 16:42:41 -07001847 // Test for (a + b + c) >> 1 for optional constant c.
1848 HInstruction* a = nullptr;
1849 HInstruction* b = nullptr;
1850 int64_t c = 0;
1851 if (IsAddConst(instruction->InputAt(0), /*out*/ &a, /*out*/ &b, /*out*/ &c)) {
Aart Bik304c8a52017-05-23 11:01:13 -07001852 DCHECK(a != nullptr && b != nullptr);
Aart Bik5f805002017-05-16 16:42:41 -07001853 // Accept c == 1 (rounded) or c == 0 (not rounded).
1854 bool is_rounded = false;
1855 if (c == 1) {
1856 is_rounded = true;
1857 } else if (c != 0) {
1858 return false;
1859 }
1860 // Accept consistent zero or sign extension on operands a and b.
Aart Bikf3e61ee2017-04-12 17:09:20 -07001861 HInstruction* r = nullptr;
1862 HInstruction* s = nullptr;
1863 bool is_unsigned = false;
Aart Bik304c8a52017-05-23 11:01:13 -07001864 if (!IsNarrowerOperands(a, b, type, &r, &s, &is_unsigned)) {
Aart Bikf3e61ee2017-04-12 17:09:20 -07001865 return false;
1866 }
1867 // Deal with vector restrictions.
1868 if ((!is_unsigned && HasVectorRestrictions(restrictions, kNoSignedHAdd)) ||
1869 (!is_rounded && HasVectorRestrictions(restrictions, kNoUnroundedHAdd))) {
1870 return false;
1871 }
1872 // Accept recognized halving add for vectorizable operands. Vectorized code uses the
1873 // shorthand idiomatic operation. Sequential code uses the original scalar expressions.
Aart Bikdbbac8f2017-09-01 13:06:08 -07001874 DCHECK(r != nullptr);
1875 DCHECK(s != nullptr);
Aart Bik304c8a52017-05-23 11:01:13 -07001876 if (generate_code && vector_mode_ != kVector) { // de-idiom
1877 r = instruction->InputAt(0);
1878 s = instruction->InputAt(1);
1879 }
Aart Bikf3e61ee2017-04-12 17:09:20 -07001880 if (VectorizeUse(node, r, generate_code, type, restrictions) &&
1881 VectorizeUse(node, s, generate_code, type, restrictions)) {
1882 if (generate_code) {
1883 if (vector_mode_ == kVector) {
Vladimir Markod5d2f2c2017-09-26 12:37:26 +01001884 NormalizePackedType(&type, &is_unsigned);
Aart Bikf3e61ee2017-04-12 17:09:20 -07001885 vector_map_->Put(instruction, new (global_allocator_) HVecHalvingAdd(
1886 global_allocator_,
1887 vector_map_->Get(r),
1888 vector_map_->Get(s),
1889 type,
1890 vector_length_,
Vladimir Markod5d2f2c2017-09-26 12:37:26 +01001891 is_rounded,
Aart Bik46b6dbc2017-10-03 11:37:37 -07001892 is_unsigned,
1893 kNoDexPc));
Aart Bik21b85922017-09-06 13:29:16 -07001894 MaybeRecordStat(stats_, MethodCompilationStat::kLoopVectorizedIdiom);
Aart Bikf3e61ee2017-04-12 17:09:20 -07001895 } else {
Aart Bik304c8a52017-05-23 11:01:13 -07001896 GenerateVecOp(instruction, vector_map_->Get(r), vector_map_->Get(s), type);
Aart Bikf3e61ee2017-04-12 17:09:20 -07001897 }
1898 }
1899 return true;
1900 }
1901 }
1902 }
1903 return false;
1904}
1905
Aart Bikdbbac8f2017-09-01 13:06:08 -07001906// Method recognizes the following idiom:
1907// q += ABS(a - b) for signed operands a, b
1908// Provided that the operands have the same type or are promoted to a wider form.
1909// Since this may involve a vector length change, the idiom is handled by going directly
1910// to a sad-accumulate node (rather than relying combining finer grained nodes later).
1911// TODO: unsigned SAD too?
1912bool HLoopOptimization::VectorizeSADIdiom(LoopNode* node,
1913 HInstruction* instruction,
1914 bool generate_code,
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001915 DataType::Type reduction_type,
Aart Bikdbbac8f2017-09-01 13:06:08 -07001916 uint64_t restrictions) {
1917 // Filter integral "q += ABS(a - b);" reduction, where ABS and SUB
1918 // are done in the same precision (either int or long).
1919 if (!instruction->IsAdd() ||
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001920 (reduction_type != DataType::Type::kInt32 && reduction_type != DataType::Type::kInt64)) {
Aart Bikdbbac8f2017-09-01 13:06:08 -07001921 return false;
1922 }
1923 HInstruction* q = instruction->InputAt(0);
1924 HInstruction* v = instruction->InputAt(1);
1925 HInstruction* a = nullptr;
1926 HInstruction* b = nullptr;
1927 if (v->IsInvokeStaticOrDirect() &&
1928 (v->AsInvokeStaticOrDirect()->GetIntrinsic() == Intrinsics::kMathAbsInt ||
1929 v->AsInvokeStaticOrDirect()->GetIntrinsic() == Intrinsics::kMathAbsLong)) {
1930 HInstruction* x = v->InputAt(0);
Aart Bikdf011c32017-09-28 12:53:04 -07001931 if (x->GetType() == reduction_type) {
1932 int64_t c = 0;
1933 if (x->IsSub()) {
1934 a = x->InputAt(0);
1935 b = x->InputAt(1);
1936 } else if (IsAddConst(x, /*out*/ &a, /*out*/ &c)) {
1937 b = graph_->GetConstant(reduction_type, -c); // hidden SUB!
1938 }
Aart Bikdbbac8f2017-09-01 13:06:08 -07001939 }
1940 }
1941 if (a == nullptr || b == nullptr) {
1942 return false;
1943 }
1944 // Accept same-type or consistent sign extension for narrower-type on operands a and b.
1945 // The same-type or narrower operands are called r (a or lower) and s (b or lower).
Aart Bikdf011c32017-09-28 12:53:04 -07001946 // We inspect the operands carefully to pick the most suited type.
Aart Bikdbbac8f2017-09-01 13:06:08 -07001947 HInstruction* r = a;
1948 HInstruction* s = b;
1949 bool is_unsigned = false;
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001950 DataType::Type sub_type = a->GetType();
Aart Bikdf011c32017-09-28 12:53:04 -07001951 if (DataType::Size(b->GetType()) < DataType::Size(sub_type)) {
1952 sub_type = b->GetType();
1953 }
1954 if (a->IsTypeConversion() &&
1955 DataType::Size(a->InputAt(0)->GetType()) < DataType::Size(sub_type)) {
1956 sub_type = a->InputAt(0)->GetType();
1957 }
1958 if (b->IsTypeConversion() &&
1959 DataType::Size(b->InputAt(0)->GetType()) < DataType::Size(sub_type)) {
1960 sub_type = b->InputAt(0)->GetType();
Aart Bikdbbac8f2017-09-01 13:06:08 -07001961 }
1962 if (reduction_type != sub_type &&
1963 (!IsNarrowerOperands(a, b, sub_type, &r, &s, &is_unsigned) || is_unsigned)) {
1964 return false;
1965 }
1966 // Try same/narrower type and deal with vector restrictions.
Artem Serov6e9b1372017-10-05 16:48:30 +01001967 if (!TrySetVectorType(sub_type, &restrictions) ||
1968 HasVectorRestrictions(restrictions, kNoSAD) ||
1969 (reduction_type != sub_type && HasVectorRestrictions(restrictions, kNoWideSAD))) {
Aart Bikdbbac8f2017-09-01 13:06:08 -07001970 return false;
1971 }
1972 // Accept SAD idiom for vectorizable operands. Vectorized code uses the shorthand
1973 // idiomatic operation. Sequential code uses the original scalar expressions.
1974 DCHECK(r != nullptr);
1975 DCHECK(s != nullptr);
1976 if (generate_code && vector_mode_ != kVector) { // de-idiom
1977 r = s = v->InputAt(0);
1978 }
1979 if (VectorizeUse(node, q, generate_code, sub_type, restrictions) &&
1980 VectorizeUse(node, r, generate_code, sub_type, restrictions) &&
1981 VectorizeUse(node, s, generate_code, sub_type, restrictions)) {
1982 if (generate_code) {
Vladimir Markod5d2f2c2017-09-26 12:37:26 +01001983 NormalizePackedType(&reduction_type, &is_unsigned);
Aart Bikdbbac8f2017-09-01 13:06:08 -07001984 if (vector_mode_ == kVector) {
1985 vector_map_->Put(instruction, new (global_allocator_) HVecSADAccumulate(
1986 global_allocator_,
1987 vector_map_->Get(q),
1988 vector_map_->Get(r),
1989 vector_map_->Get(s),
1990 reduction_type,
Aart Bik46b6dbc2017-10-03 11:37:37 -07001991 GetOtherVL(reduction_type, sub_type, vector_length_),
1992 kNoDexPc));
Aart Bikdbbac8f2017-09-01 13:06:08 -07001993 MaybeRecordStat(stats_, MethodCompilationStat::kLoopVectorizedIdiom);
1994 } else {
1995 GenerateVecOp(v, vector_map_->Get(r), nullptr, reduction_type);
1996 GenerateVecOp(instruction, vector_map_->Get(q), vector_map_->Get(v), reduction_type);
1997 }
1998 }
1999 return true;
2000 }
2001 return false;
2002}
2003
Aart Bikf3e61ee2017-04-12 17:09:20 -07002004//
Aart Bik14a68b42017-06-08 14:06:58 -07002005// Vectorization heuristics.
2006//
2007
2008bool HLoopOptimization::IsVectorizationProfitable(int64_t trip_count) {
2009 // Current heuristic: non-empty body with sufficient number
2010 // of iterations (if known).
2011 // TODO: refine by looking at e.g. operation count, alignment, etc.
2012 if (vector_length_ == 0) {
2013 return false; // nothing found
2014 } else if (0 < trip_count && trip_count < vector_length_) {
2015 return false; // insufficient iterations
2016 }
2017 return true;
2018}
2019
Aart Bikb29f6842017-07-28 15:58:41 -07002020void HLoopOptimization::SetPeelingCandidate(const ArrayReference* candidate,
2021 int64_t trip_count ATTRIBUTE_UNUSED) {
Aart Bik14a68b42017-06-08 14:06:58 -07002022 // Current heuristic: none.
2023 // TODO: implement
Aart Bikb29f6842017-07-28 15:58:41 -07002024 vector_peeling_candidate_ = candidate;
Aart Bik14a68b42017-06-08 14:06:58 -07002025}
2026
Artem Serovf26bb6c2017-09-01 10:59:03 +01002027static constexpr uint32_t ARM64_SIMD_MAXIMUM_UNROLL_FACTOR = 8;
2028static constexpr uint32_t ARM64_SIMD_HEURISTIC_MAX_BODY_SIZE = 50;
2029
Aart Bik14a68b42017-06-08 14:06:58 -07002030uint32_t HLoopOptimization::GetUnrollingFactor(HBasicBlock* block, int64_t trip_count) {
Aart Bik14a68b42017-06-08 14:06:58 -07002031 switch (compiler_driver_->GetInstructionSet()) {
Artem Serovf26bb6c2017-09-01 10:59:03 +01002032 case kArm64: {
Aart Bik521b50f2017-09-09 10:44:45 -07002033 // Don't unroll with insufficient iterations.
Artem Serovf26bb6c2017-09-01 10:59:03 +01002034 // TODO: Unroll loops with unknown trip count.
Aart Bik521b50f2017-09-09 10:44:45 -07002035 DCHECK_NE(vector_length_, 0u);
Artem Serovf26bb6c2017-09-01 10:59:03 +01002036 if (trip_count < 2 * vector_length_) {
Aart Bik521b50f2017-09-09 10:44:45 -07002037 return kNoUnrollingFactor;
Aart Bik14a68b42017-06-08 14:06:58 -07002038 }
Aart Bik521b50f2017-09-09 10:44:45 -07002039 // Don't unroll for large loop body size.
Artem Serovf26bb6c2017-09-01 10:59:03 +01002040 uint32_t instruction_count = block->GetInstructions().CountSize();
Aart Bik521b50f2017-09-09 10:44:45 -07002041 if (instruction_count >= ARM64_SIMD_HEURISTIC_MAX_BODY_SIZE) {
2042 return kNoUnrollingFactor;
2043 }
Artem Serovf26bb6c2017-09-01 10:59:03 +01002044 // Find a beneficial unroll factor with the following restrictions:
2045 // - At least one iteration of the transformed loop should be executed.
2046 // - The loop body shouldn't be "too big" (heuristic).
2047 uint32_t uf1 = ARM64_SIMD_HEURISTIC_MAX_BODY_SIZE / instruction_count;
2048 uint32_t uf2 = trip_count / vector_length_;
2049 uint32_t unroll_factor =
2050 TruncToPowerOfTwo(std::min({uf1, uf2, ARM64_SIMD_MAXIMUM_UNROLL_FACTOR}));
2051 DCHECK_GE(unroll_factor, 1u);
Artem Serovf26bb6c2017-09-01 10:59:03 +01002052 return unroll_factor;
Aart Bik14a68b42017-06-08 14:06:58 -07002053 }
Artem Serovf26bb6c2017-09-01 10:59:03 +01002054 case kX86:
2055 case kX86_64:
Aart Bik14a68b42017-06-08 14:06:58 -07002056 default:
Aart Bik521b50f2017-09-09 10:44:45 -07002057 return kNoUnrollingFactor;
Aart Bik14a68b42017-06-08 14:06:58 -07002058 }
2059}
2060
2061//
Aart Bikf8f5a162017-02-06 15:35:29 -08002062// Helpers.
2063//
2064
2065bool HLoopOptimization::TrySetPhiInduction(HPhi* phi, bool restrict_uses) {
Aart Bikb29f6842017-07-28 15:58:41 -07002066 // Start with empty phi induction.
2067 iset_->clear();
2068
Nicolas Geoffrayf57c1ae2017-06-28 17:40:18 +01002069 // Special case Phis that have equivalent in a debuggable setup. Our graph checker isn't
2070 // smart enough to follow strongly connected components (and it's probably not worth
2071 // it to make it so). See b/33775412.
2072 if (graph_->IsDebuggable() && phi->HasEquivalentPhi()) {
2073 return false;
2074 }
Aart Bikb29f6842017-07-28 15:58:41 -07002075
2076 // Lookup phi induction cycle.
Aart Bikcc42be02016-10-20 16:14:16 -07002077 ArenaSet<HInstruction*>* set = induction_range_.LookupCycle(phi);
2078 if (set != nullptr) {
2079 for (HInstruction* i : *set) {
Aart Bike3dedc52016-11-02 17:50:27 -07002080 // Check that, other than instructions that are no longer in the graph (removed earlier)
Aart Bikf8f5a162017-02-06 15:35:29 -08002081 // each instruction is removable and, when restrict uses are requested, other than for phi,
2082 // all uses are contained within the cycle.
Aart Bike3dedc52016-11-02 17:50:27 -07002083 if (!i->IsInBlock()) {
2084 continue;
2085 } else if (!i->IsRemovable()) {
2086 return false;
Aart Bikf8f5a162017-02-06 15:35:29 -08002087 } else if (i != phi && restrict_uses) {
Aart Bikb29f6842017-07-28 15:58:41 -07002088 // Deal with regular uses.
Aart Bikcc42be02016-10-20 16:14:16 -07002089 for (const HUseListNode<HInstruction*>& use : i->GetUses()) {
2090 if (set->find(use.GetUser()) == set->end()) {
2091 return false;
2092 }
2093 }
2094 }
Aart Bike3dedc52016-11-02 17:50:27 -07002095 iset_->insert(i); // copy
Aart Bikcc42be02016-10-20 16:14:16 -07002096 }
Aart Bikcc42be02016-10-20 16:14:16 -07002097 return true;
2098 }
2099 return false;
2100}
2101
Aart Bikb29f6842017-07-28 15:58:41 -07002102bool HLoopOptimization::TrySetPhiReduction(HPhi* phi) {
Aart Bikcc42be02016-10-20 16:14:16 -07002103 DCHECK(iset_->empty());
Aart Bikb29f6842017-07-28 15:58:41 -07002104 // Only unclassified phi cycles are candidates for reductions.
2105 if (induction_range_.IsClassified(phi)) {
2106 return false;
2107 }
2108 // Accept operations like x = x + .., provided that the phi and the reduction are
2109 // used exactly once inside the loop, and by each other.
2110 HInputsRef inputs = phi->GetInputs();
2111 if (inputs.size() == 2) {
2112 HInstruction* reduction = inputs[1];
2113 if (HasReductionFormat(reduction, phi)) {
2114 HLoopInformation* loop_info = phi->GetBlock()->GetLoopInformation();
2115 int32_t use_count = 0;
2116 bool single_use_inside_loop =
2117 // Reduction update only used by phi.
2118 reduction->GetUses().HasExactlyOneElement() &&
2119 !reduction->HasEnvironmentUses() &&
2120 // Reduction update is only use of phi inside the loop.
2121 IsOnlyUsedAfterLoop(loop_info, phi, /*collect_loop_uses*/ true, &use_count) &&
2122 iset_->size() == 1;
2123 iset_->clear(); // leave the way you found it
2124 if (single_use_inside_loop) {
2125 // Link reduction back, and start recording feed value.
2126 reductions_->Put(reduction, phi);
2127 reductions_->Put(phi, phi->InputAt(0));
2128 return true;
2129 }
2130 }
2131 }
2132 return false;
2133}
2134
2135bool HLoopOptimization::TrySetSimpleLoopHeader(HBasicBlock* block, /*out*/ HPhi** main_phi) {
2136 // Start with empty phi induction and reductions.
2137 iset_->clear();
2138 reductions_->clear();
2139
2140 // Scan the phis to find the following (the induction structure has already
2141 // been optimized, so we don't need to worry about trivial cases):
2142 // (1) optional reductions in loop,
2143 // (2) the main induction, used in loop control.
2144 HPhi* phi = nullptr;
2145 for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
2146 if (TrySetPhiReduction(it.Current()->AsPhi())) {
2147 continue;
2148 } else if (phi == nullptr) {
2149 // Found the first candidate for main induction.
2150 phi = it.Current()->AsPhi();
2151 } else {
2152 return false;
2153 }
2154 }
2155
2156 // Then test for a typical loopheader:
2157 // s: SuspendCheck
2158 // c: Condition(phi, bound)
2159 // i: If(c)
2160 if (phi != nullptr && TrySetPhiInduction(phi, /*restrict_uses*/ false)) {
Aart Bikcc42be02016-10-20 16:14:16 -07002161 HInstruction* s = block->GetFirstInstruction();
2162 if (s != nullptr && s->IsSuspendCheck()) {
2163 HInstruction* c = s->GetNext();
Aart Bikd86c0852017-04-14 12:00:15 -07002164 if (c != nullptr &&
2165 c->IsCondition() &&
2166 c->GetUses().HasExactlyOneElement() && // only used for termination
2167 !c->HasEnvironmentUses()) { // unlikely, but not impossible
Aart Bikcc42be02016-10-20 16:14:16 -07002168 HInstruction* i = c->GetNext();
2169 if (i != nullptr && i->IsIf() && i->InputAt(0) == c) {
2170 iset_->insert(c);
2171 iset_->insert(s);
Aart Bikb29f6842017-07-28 15:58:41 -07002172 *main_phi = phi;
Aart Bikcc42be02016-10-20 16:14:16 -07002173 return true;
2174 }
2175 }
2176 }
2177 }
2178 return false;
2179}
2180
2181bool HLoopOptimization::IsEmptyBody(HBasicBlock* block) {
Aart Bikf8f5a162017-02-06 15:35:29 -08002182 if (!block->GetPhis().IsEmpty()) {
2183 return false;
2184 }
2185 for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
2186 HInstruction* instruction = it.Current();
2187 if (!instruction->IsGoto() && iset_->find(instruction) == iset_->end()) {
2188 return false;
Aart Bikcc42be02016-10-20 16:14:16 -07002189 }
Aart Bikf8f5a162017-02-06 15:35:29 -08002190 }
2191 return true;
2192}
2193
2194bool HLoopOptimization::IsUsedOutsideLoop(HLoopInformation* loop_info,
2195 HInstruction* instruction) {
Aart Bikb29f6842017-07-28 15:58:41 -07002196 // Deal with regular uses.
Aart Bikf8f5a162017-02-06 15:35:29 -08002197 for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) {
2198 if (use.GetUser()->GetBlock()->GetLoopInformation() != loop_info) {
2199 return true;
2200 }
Aart Bikcc42be02016-10-20 16:14:16 -07002201 }
2202 return false;
2203}
2204
Aart Bik482095d2016-10-10 15:39:10 -07002205bool HLoopOptimization::IsOnlyUsedAfterLoop(HLoopInformation* loop_info,
Aart Bik8c4a8542016-10-06 11:36:57 -07002206 HInstruction* instruction,
Aart Bik6b69e0a2017-01-11 10:20:43 -08002207 bool collect_loop_uses,
Aart Bik8c4a8542016-10-06 11:36:57 -07002208 /*out*/ int32_t* use_count) {
Aart Bikb29f6842017-07-28 15:58:41 -07002209 // Deal with regular uses.
Aart Bik8c4a8542016-10-06 11:36:57 -07002210 for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) {
2211 HInstruction* user = use.GetUser();
2212 if (iset_->find(user) == iset_->end()) { // not excluded?
2213 HLoopInformation* other_loop_info = user->GetBlock()->GetLoopInformation();
Aart Bik482095d2016-10-10 15:39:10 -07002214 if (other_loop_info != nullptr && other_loop_info->IsIn(*loop_info)) {
Aart Bik6b69e0a2017-01-11 10:20:43 -08002215 // If collect_loop_uses is set, simply keep adding those uses to the set.
2216 // Otherwise, reject uses inside the loop that were not already in the set.
2217 if (collect_loop_uses) {
2218 iset_->insert(user);
2219 continue;
2220 }
Aart Bik8c4a8542016-10-06 11:36:57 -07002221 return false;
2222 }
2223 ++*use_count;
2224 }
2225 }
2226 return true;
2227}
2228
Nicolas Geoffray1a0a5192017-06-22 11:56:01 +01002229bool HLoopOptimization::TryReplaceWithLastValue(HLoopInformation* loop_info,
2230 HInstruction* instruction,
2231 HBasicBlock* block) {
2232 // Try to replace outside uses with the last value.
Aart Bik807868e2016-11-03 17:51:43 -07002233 if (induction_range_.CanGenerateLastValue(instruction)) {
Aart Bik6b69e0a2017-01-11 10:20:43 -08002234 HInstruction* replacement = induction_range_.GenerateLastValue(instruction, graph_, block);
Aart Bikb29f6842017-07-28 15:58:41 -07002235 // Deal with regular uses.
Aart Bik6b69e0a2017-01-11 10:20:43 -08002236 const HUseList<HInstruction*>& uses = instruction->GetUses();
2237 for (auto it = uses.begin(), end = uses.end(); it != end;) {
2238 HInstruction* user = it->GetUser();
2239 size_t index = it->GetIndex();
2240 ++it; // increment before replacing
2241 if (iset_->find(user) == iset_->end()) { // not excluded?
Nicolas Geoffray1a0a5192017-06-22 11:56:01 +01002242 if (kIsDebugBuild) {
2243 // We have checked earlier in 'IsOnlyUsedAfterLoop' that the use is after the loop.
2244 HLoopInformation* other_loop_info = user->GetBlock()->GetLoopInformation();
2245 CHECK(other_loop_info == nullptr || !other_loop_info->IsIn(*loop_info));
2246 }
Aart Bik6b69e0a2017-01-11 10:20:43 -08002247 user->ReplaceInput(replacement, index);
2248 induction_range_.Replace(user, instruction, replacement); // update induction
2249 }
2250 }
Aart Bikb29f6842017-07-28 15:58:41 -07002251 // Deal with environment uses.
Aart Bik6b69e0a2017-01-11 10:20:43 -08002252 const HUseList<HEnvironment*>& env_uses = instruction->GetEnvUses();
2253 for (auto it = env_uses.begin(), end = env_uses.end(); it != end;) {
2254 HEnvironment* user = it->GetUser();
2255 size_t index = it->GetIndex();
2256 ++it; // increment before replacing
2257 if (iset_->find(user->GetHolder()) == iset_->end()) { // not excluded?
Nicolas Geoffray1a0a5192017-06-22 11:56:01 +01002258 // Only update environment uses after the loop.
Aart Bik14a68b42017-06-08 14:06:58 -07002259 HLoopInformation* other_loop_info = user->GetHolder()->GetBlock()->GetLoopInformation();
Nicolas Geoffray1a0a5192017-06-22 11:56:01 +01002260 if (other_loop_info == nullptr || !other_loop_info->IsIn(*loop_info)) {
2261 user->RemoveAsUserOfInput(index);
2262 user->SetRawEnvAt(index, replacement);
2263 replacement->AddEnvUseAt(user, index);
2264 }
Aart Bik6b69e0a2017-01-11 10:20:43 -08002265 }
2266 }
Aart Bik807868e2016-11-03 17:51:43 -07002267 return true;
Aart Bik8c4a8542016-10-06 11:36:57 -07002268 }
Aart Bik807868e2016-11-03 17:51:43 -07002269 return false;
Aart Bik8c4a8542016-10-06 11:36:57 -07002270}
2271
Aart Bikf8f5a162017-02-06 15:35:29 -08002272bool HLoopOptimization::TryAssignLastValue(HLoopInformation* loop_info,
2273 HInstruction* instruction,
2274 HBasicBlock* block,
2275 bool collect_loop_uses) {
2276 // Assigning the last value is always successful if there are no uses.
2277 // Otherwise, it succeeds in a no early-exit loop by generating the
2278 // proper last value assignment.
2279 int32_t use_count = 0;
2280 return IsOnlyUsedAfterLoop(loop_info, instruction, collect_loop_uses, &use_count) &&
2281 (use_count == 0 ||
Nicolas Geoffray1a0a5192017-06-22 11:56:01 +01002282 (!IsEarlyExit(loop_info) && TryReplaceWithLastValue(loop_info, instruction, block)));
Aart Bikf8f5a162017-02-06 15:35:29 -08002283}
2284
Aart Bik6b69e0a2017-01-11 10:20:43 -08002285void HLoopOptimization::RemoveDeadInstructions(const HInstructionList& list) {
2286 for (HBackwardInstructionIterator i(list); !i.Done(); i.Advance()) {
2287 HInstruction* instruction = i.Current();
2288 if (instruction->IsDeadAndRemovable()) {
2289 simplified_ = true;
2290 instruction->GetBlock()->RemoveInstructionOrPhi(instruction);
2291 }
2292 }
2293}
2294
Aart Bik14a68b42017-06-08 14:06:58 -07002295bool HLoopOptimization::CanRemoveCycle() {
2296 for (HInstruction* i : *iset_) {
2297 // We can never remove instructions that have environment
2298 // uses when we compile 'debuggable'.
2299 if (i->HasEnvironmentUses() && graph_->IsDebuggable()) {
2300 return false;
2301 }
2302 // A deoptimization should never have an environment input removed.
2303 for (const HUseListNode<HEnvironment*>& use : i->GetEnvUses()) {
2304 if (use.GetUser()->GetHolder()->IsDeoptimize()) {
2305 return false;
2306 }
2307 }
2308 }
2309 return true;
2310}
2311
Aart Bik281c6812016-08-26 11:31:48 -07002312} // namespace art