Revert "Basic SIMD reduction support."
Fails 530-checker-lse on arm64.
Bug: 64091002, 65212948
This reverts commit cfa59b49cde265dc5329a7e6956445f9f7a75f15.
Change-Id: Icb5d6c805516db0a1d911c3ede9a246ccef89a22
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index f8f4eb2..027ba77 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -285,19 +285,6 @@
return false;
}
-// Translates operation to reduction kind.
-static HVecReduce::ReductionKind GetReductionKind(HInstruction* reduction) {
- if (reduction->IsVecAdd() || reduction->IsVecSub()) {
- return HVecReduce::kSum;
- } else if (reduction->IsVecMin()) {
- return HVecReduce::kMin;
- } else if (reduction->IsVecMax()) {
- return HVecReduce::kMax;
- }
- LOG(FATAL) << "Unsupported SIMD reduction";
- UNREACHABLE();
-}
-
// Test vector restrictions.
static bool HasVectorRestrictions(uint64_t restrictions, uint64_t tested) {
return (restrictions & tested) != 0;
@@ -347,8 +334,7 @@
vector_peeling_candidate_(nullptr),
vector_runtime_test_a_(nullptr),
vector_runtime_test_b_(nullptr),
- vector_map_(nullptr),
- vector_permanent_map_(nullptr) {
+ vector_map_(nullptr) {
}
void HLoopOptimization::Run() {
@@ -402,14 +388,11 @@
ArenaSet<ArrayReference> refs(loop_allocator_->Adapter(kArenaAllocLoopOptimization));
ArenaSafeMap<HInstruction*, HInstruction*> map(
std::less<HInstruction*>(), loop_allocator_->Adapter(kArenaAllocLoopOptimization));
- ArenaSafeMap<HInstruction*, HInstruction*> perm(
- std::less<HInstruction*>(), loop_allocator_->Adapter(kArenaAllocLoopOptimization));
// Attach.
iset_ = &iset;
reductions_ = &reds;
vector_refs_ = &refs;
vector_map_ = ↦
- vector_permanent_map_ = &perm;
// Traverse.
TraverseLoopsInnerToOuter(top_loop_);
// Detach.
@@ -417,7 +400,6 @@
reductions_ = nullptr;
vector_refs_ = nullptr;
vector_map_ = nullptr;
- vector_permanent_map_ = nullptr;
}
}
@@ -621,6 +603,7 @@
// Vectorize loop, if possible and valid.
if (kEnableVectorization &&
TrySetSimpleLoopHeader(header, &main_phi) &&
+ reductions_->empty() && // TODO: possible with some effort
ShouldVectorize(node, body, trip_count) &&
TryAssignLastValue(node->loop_info, main_phi, preheader, /*collect_loop_uses*/ true)) {
Vectorize(node, body, exit, trip_count);
@@ -819,13 +802,6 @@
/*unroll*/ 1);
}
- // Link reductions to their final uses.
- for (auto i = reductions_->begin(); i != reductions_->end(); ++i) {
- if (i->first->IsPhi()) {
- i->first->ReplaceWith(ReduceAndExtractIfNeeded(i->second));
- }
- }
-
// Remove the original loop by disconnecting the body block
// and removing all instructions from the header.
block->DisconnectAndDelete();
@@ -865,10 +841,21 @@
vector_header_->AddInstruction(cond);
vector_header_->AddInstruction(new (global_allocator_) HIf(cond));
vector_index_ = phi;
- vector_permanent_map_->clear(); // preserved over unrolling
for (uint32_t u = 0; u < unroll; u++) {
+ // Clear map, leaving loop invariants setup during unrolling.
+ if (u == 0) {
+ vector_map_->clear();
+ } else {
+ for (auto i = vector_map_->begin(); i != vector_map_->end(); ) {
+ if (i->second->IsVecReplicateScalar()) {
+ DCHECK(node->loop_info->IsDefinedOutOfTheLoop(i->first));
+ ++i;
+ } else {
+ i = vector_map_->erase(i);
+ }
+ }
+ }
// Generate instruction map.
- vector_map_->clear();
for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
bool vectorized_def = VectorizeDef(node, it.Current(), /*generate_code*/ true);
DCHECK(vectorized_def);
@@ -885,17 +872,9 @@
}
}
}
- // Generate the induction.
vector_index_ = new (global_allocator_) HAdd(induc_type, vector_index_, step);
Insert(vector_body_, vector_index_);
}
- // Finalize phi inputs for the reductions (if any).
- for (auto i = reductions_->begin(); i != reductions_->end(); ++i) {
- if (!i->first->IsPhi()) {
- DCHECK(i->second->IsPhi());
- GenerateVecReductionPhiInputs(i->second->AsPhi(), i->first);
- }
- }
// Finalize phi inputs for the loop index.
phi->AddInput(lo);
phi->AddInput(vector_index_);
@@ -931,23 +910,6 @@
}
return false;
}
- // Accept a left-hand-side reduction for
- // (1) supported vector type,
- // (2) vectorizable right-hand-side value.
- auto redit = reductions_->find(instruction);
- if (redit != reductions_->end()) {
- Primitive::Type type = instruction->GetType();
- if (TrySetVectorType(type, &restrictions) &&
- VectorizeUse(node, instruction, generate_code, type, restrictions)) {
- if (generate_code) {
- HInstruction* new_red = vector_map_->Get(instruction);
- vector_permanent_map_->Put(new_red, vector_map_->Get(redit->second));
- vector_permanent_map_->Overwrite(redit->second, new_red);
- }
- return true;
- }
- return false;
- }
// Branch back okay.
if (instruction->IsGoto()) {
return true;
@@ -1003,21 +965,6 @@
}
return true;
}
- } else if (instruction->IsPhi()) {
- // Accept particular phi operations.
- if (reductions_->find(instruction) != reductions_->end()) {
- // Deal with vector restrictions.
- if (HasVectorRestrictions(restrictions, kNoReduction)) {
- return false;
- }
- // Accept a reduction.
- if (generate_code) {
- GenerateVecReductionPhi(instruction->AsPhi());
- }
- return true;
- }
- // TODO: accept right-hand-side induction?
- return false;
} else if (instruction->IsTypeConversion()) {
// Accept particular type conversions.
HTypeConversion* conversion = instruction->AsTypeConversion();
@@ -1208,14 +1155,14 @@
switch (type) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte:
- *restrictions |= kNoDiv | kNoReduction;
+ *restrictions |= kNoDiv;
return TrySetVectorLength(8);
case Primitive::kPrimChar:
case Primitive::kPrimShort:
- *restrictions |= kNoDiv | kNoStringCharAt | kNoReduction;
+ *restrictions |= kNoDiv | kNoStringCharAt;
return TrySetVectorLength(4);
case Primitive::kPrimInt:
- *restrictions |= kNoDiv | kNoReduction;
+ *restrictions |= kNoDiv;
return TrySetVectorLength(2);
default:
break;
@@ -1227,11 +1174,11 @@
switch (type) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte:
- *restrictions |= kNoDiv | kNoReduction;
+ *restrictions |= kNoDiv;
return TrySetVectorLength(16);
case Primitive::kPrimChar:
case Primitive::kPrimShort:
- *restrictions |= kNoDiv | kNoReduction;
+ *restrictions |= kNoDiv;
return TrySetVectorLength(8);
case Primitive::kPrimInt:
*restrictions |= kNoDiv;
@@ -1240,10 +1187,8 @@
*restrictions |= kNoDiv | kNoMul | kNoMinMax;
return TrySetVectorLength(2);
case Primitive::kPrimFloat:
- *restrictions |= kNoReduction;
return TrySetVectorLength(4);
case Primitive::kPrimDouble:
- *restrictions |= kNoReduction;
return TrySetVectorLength(2);
default:
return false;
@@ -1255,12 +1200,11 @@
switch (type) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte:
- *restrictions |=
- kNoMul | kNoDiv | kNoShift | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd | kNoReduction;
+ *restrictions |= kNoMul | kNoDiv | kNoShift | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd;
return TrySetVectorLength(16);
case Primitive::kPrimChar:
case Primitive::kPrimShort:
- *restrictions |= kNoDiv | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd | kNoReduction;
+ *restrictions |= kNoDiv | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd;
return TrySetVectorLength(8);
case Primitive::kPrimInt:
*restrictions |= kNoDiv;
@@ -1269,10 +1213,10 @@
*restrictions |= kNoMul | kNoDiv | kNoShr | kNoAbs | kNoMinMax;
return TrySetVectorLength(2);
case Primitive::kPrimFloat:
- *restrictions |= kNoMinMax | kNoReduction; // minmax: -0.0 vs +0.0
+ *restrictions |= kNoMinMax; // -0.0 vs +0.0
return TrySetVectorLength(4);
case Primitive::kPrimDouble:
- *restrictions |= kNoMinMax | kNoReduction; // minmax: -0.0 vs +0.0
+ *restrictions |= kNoMinMax; // -0.0 vs +0.0
return TrySetVectorLength(2);
default:
break;
@@ -1284,23 +1228,23 @@
switch (type) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte:
- *restrictions |= kNoDiv | kNoReduction;
+ *restrictions |= kNoDiv;
return TrySetVectorLength(16);
case Primitive::kPrimChar:
case Primitive::kPrimShort:
- *restrictions |= kNoDiv | kNoStringCharAt | kNoReduction;
+ *restrictions |= kNoDiv | kNoStringCharAt;
return TrySetVectorLength(8);
case Primitive::kPrimInt:
- *restrictions |= kNoDiv | kNoReduction;
+ *restrictions |= kNoDiv;
return TrySetVectorLength(4);
case Primitive::kPrimLong:
- *restrictions |= kNoDiv | kNoReduction;
+ *restrictions |= kNoDiv;
return TrySetVectorLength(2);
case Primitive::kPrimFloat:
- *restrictions |= kNoMinMax | kNoReduction; // min/max(x, NaN)
+ *restrictions |= kNoMinMax; // min/max(x, NaN)
return TrySetVectorLength(4);
case Primitive::kPrimDouble:
- *restrictions |= kNoMinMax | kNoReduction; // min/max(x, NaN)
+ *restrictions |= kNoMinMax; // min/max(x, NaN)
return TrySetVectorLength(2);
default:
break;
@@ -1312,23 +1256,23 @@
switch (type) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte:
- *restrictions |= kNoDiv | kNoReduction;
+ *restrictions |= kNoDiv;
return TrySetVectorLength(16);
case Primitive::kPrimChar:
case Primitive::kPrimShort:
- *restrictions |= kNoDiv | kNoStringCharAt | kNoReduction;
+ *restrictions |= kNoDiv | kNoStringCharAt;
return TrySetVectorLength(8);
case Primitive::kPrimInt:
- *restrictions |= kNoDiv | kNoReduction;
+ *restrictions |= kNoDiv;
return TrySetVectorLength(4);
case Primitive::kPrimLong:
- *restrictions |= kNoDiv | kNoReduction;
+ *restrictions |= kNoDiv;
return TrySetVectorLength(2);
case Primitive::kPrimFloat:
- *restrictions |= kNoMinMax | kNoReduction; // min/max(x, NaN)
+ *restrictions |= kNoMinMax; // min/max(x, NaN)
return TrySetVectorLength(4);
case Primitive::kPrimDouble:
- *restrictions |= kNoMinMax | kNoReduction; // min/max(x, NaN)
+ *restrictions |= kNoMinMax; // min/max(x, NaN)
return TrySetVectorLength(2);
default:
break;
@@ -1361,16 +1305,9 @@
return;
}
// In vector code, explicit scalar expansion is needed.
- HInstruction* vector = nullptr;
- auto it = vector_permanent_map_->find(org);
- if (it != vector_permanent_map_->end()) {
- vector = it->second; // reuse during unrolling
- } else {
- vector = new (global_allocator_) HVecReplicateScalar(
- global_allocator_, org, type, vector_length_);
- vector_permanent_map_->Put(org, Insert(vector_preheader_, vector));
- }
- vector_map_->Put(org, vector);
+ HInstruction* vector = new (global_allocator_) HVecReplicateScalar(
+ global_allocator_, org, type, vector_length_);
+ vector_map_->Put(org, Insert(vector_preheader_, vector));
}
}
@@ -1425,78 +1362,6 @@
vector_map_->Put(org, vector);
}
-void HLoopOptimization::GenerateVecReductionPhi(HPhi* phi) {
- DCHECK(reductions_->find(phi) != reductions_->end());
- DCHECK(reductions_->Get(phi->InputAt(1)) == phi);
- HInstruction* vector = nullptr;
- if (vector_mode_ == kSequential) {
- HPhi* new_phi = new (global_allocator_) HPhi(
- global_allocator_, kNoRegNumber, 0, phi->GetType());
- vector_header_->AddPhi(new_phi);
- vector = new_phi;
- } else {
- // Link vector reduction back to prior unrolled update, or a first phi.
- auto it = vector_permanent_map_->find(phi);
- if (it != vector_permanent_map_->end()) {
- vector = it->second;
- } else {
- HPhi* new_phi = new (global_allocator_) HPhi(
- global_allocator_, kNoRegNumber, 0, HVecOperation::kSIMDType);
- vector_header_->AddPhi(new_phi);
- vector = new_phi;
- }
- }
- vector_map_->Put(phi, vector);
-}
-
-void HLoopOptimization::GenerateVecReductionPhiInputs(HPhi* phi, HInstruction* reduction) {
- HInstruction* new_phi = vector_map_->Get(phi);
- HInstruction* new_init = reductions_->Get(phi);
- HInstruction* new_red = vector_map_->Get(reduction);
- // Link unrolled vector loop back to new phi.
- for (; !new_phi->IsPhi(); new_phi = vector_permanent_map_->Get(new_phi)) {
- DCHECK(new_phi->IsVecOperation());
- }
- // Prepare the new initialization.
- if (vector_mode_ == kVector) {
- // Generate a [initial, 0, .., 0] vector.
- new_init = Insert(
- vector_preheader_,
- new (global_allocator_) HVecSetScalars(
- global_allocator_, &new_init, phi->GetType(), vector_length_, 1));
- } else {
- new_init = ReduceAndExtractIfNeeded(new_init);
- }
- // Set the phi inputs.
- DCHECK(new_phi->IsPhi());
- new_phi->AsPhi()->AddInput(new_init);
- new_phi->AsPhi()->AddInput(new_red);
- // New feed value for next phi (safe mutation in iteration).
- reductions_->find(phi)->second = new_phi;
-}
-
-HInstruction* HLoopOptimization::ReduceAndExtractIfNeeded(HInstruction* instruction) {
- if (instruction->IsPhi()) {
- HInstruction* input = instruction->InputAt(1);
- if (input->IsVecOperation()) {
- Primitive::Type type = input->AsVecOperation()->GetPackedType();
- HBasicBlock* exit = instruction->GetBlock()->GetSuccessors()[0];
- // Generate a vector reduction and scalar extract
- // x = REDUCE( [x_1, .., x_n] )
- // y = x_1
- // along the exit of the defining loop.
- HVecReduce::ReductionKind kind = GetReductionKind(input);
- HInstruction* reduce = new (global_allocator_) HVecReduce(
- global_allocator_, instruction, type, vector_length_, kind);
- exit->InsertInstructionBefore(reduce, exit->GetFirstInstruction());
- instruction = new (global_allocator_) HVecExtractScalar(
- global_allocator_, reduce, type, vector_length_, 0);
- exit->InsertInstructionAfter(instruction, reduce);
- }
- }
- return instruction;
-}
-
#define GENERATE_VEC(x, y) \
if (vector_mode_ == kVector) { \
vector = (x); \
@@ -1677,9 +1542,10 @@
// Test for top level arithmetic shift right x >> 1 or logical shift right x >>> 1
// (note whether the sign bit in wider precision is shifted in has no effect
// on the narrow precision computed by the idiom).
+ int64_t distance = 0;
if ((instruction->IsShr() ||
instruction->IsUShr()) &&
- IsInt64Value(instruction->InputAt(1), 1)) {
+ IsInt64AndGet(instruction->InputAt(1), /*out*/ &distance) && distance == 1) {
// Test for (a + b + c) >> 1 for optional constant c.
HInstruction* a = nullptr;
HInstruction* b = nullptr;