Implement halving add idiom (with checker tests).
Rationale:
First of several idioms that map to very efficient SIMD instructions.
Note that the is-zero-ext and is-sign-ext are general-purpose utilities
that will be widely used in the vectorizer to detect low precision
idioms, so expect that code to be shared with many CLs to come.
Test: test-art-host, test-art-target
Change-Id: If7dc2926c72a2e4b5cea15c44ef68cf5503e9be9
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index 8e88c1e..5a95abd 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -63,12 +63,122 @@
return false;
}
+// Detect a sign extension from the given type. Returns the promoted operand on success.
+static bool IsSignExtensionAndGet(HInstruction* instruction,
+ Primitive::Type type,
+ /*out*/ HInstruction** operand) {
+ // Accept any already wider constant that would be handled properly by sign
+ // extension when represented in the *width* of the given narrower data type
+ // (the fact that char normally zero extends does not matter here).
+ int64_t value = 0;
+ if (IsInt64AndGet(instruction, &value)) {
+ switch (type) {
+ case Primitive::kPrimByte:
+ if (std::numeric_limits<int8_t>::min() <= value &&
+ std::numeric_limits<int8_t>::max() >= value) {
+ *operand = instruction;
+ return true;
+ }
+ return false;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ if (std::numeric_limits<int16_t>::min() <= value &&
+ std::numeric_limits<int16_t>::max() <= value) {
+ *operand = instruction;
+ return true;
+ }
+ return false;
+ default:
+ return false;
+ }
+ }
+ // An implicit widening conversion of a signed integer to an integral type sign-extends
+ // the two's-complement representation of the integer value to fill the wider format.
+ if (instruction->GetType() == type && (instruction->IsArrayGet() ||
+ instruction->IsStaticFieldGet() ||
+ instruction->IsInstanceFieldGet())) {
+ switch (type) {
+ case Primitive::kPrimByte:
+ case Primitive::kPrimShort:
+ *operand = instruction;
+ return true;
+ default:
+ return false;
+ }
+ }
+ // TODO: perhaps explicit conversions later too?
+ // (this may return something different from instruction)
+ return false;
+}
+
+// Detect a zero extension from the given type. Returns the promoted operand on success.
+static bool IsZeroExtensionAndGet(HInstruction* instruction,
+ Primitive::Type type,
+ /*out*/ HInstruction** operand) {
+ // Accept any already wider constant that would be handled properly by zero
+ // extension when represented in the *width* of the given narrower data type
+ // (the fact that byte/short normally sign extend does not matter here).
+ int64_t value = 0;
+ if (IsInt64AndGet(instruction, &value)) {
+ switch (type) {
+ case Primitive::kPrimByte:
+ if (std::numeric_limits<uint8_t>::min() <= value &&
+ std::numeric_limits<uint8_t>::max() >= value) {
+ *operand = instruction;
+ return true;
+ }
+ return false;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ if (std::numeric_limits<uint16_t>::min() <= value &&
+ std::numeric_limits<uint16_t>::max() <= value) {
+ *operand = instruction;
+ return true;
+ }
+ return false;
+ default:
+ return false;
+ }
+ }
+ // An implicit widening conversion of a char to an integral type zero-extends
+ // the representation of the char value to fill the wider format.
+ if (instruction->GetType() == type && (instruction->IsArrayGet() ||
+ instruction->IsStaticFieldGet() ||
+ instruction->IsInstanceFieldGet())) {
+ if (type == Primitive::kPrimChar) {
+ *operand = instruction;
+ return true;
+ }
+ }
+ // A sign (or zero) extension followed by an explicit removal of just the
+ // higher sign bits is equivalent to a zero extension of the underlying operand.
+ if (instruction->IsAnd()) {
+ int64_t mask = 0;
+ HInstruction* a = instruction->InputAt(0);
+ HInstruction* b = instruction->InputAt(1);
+ // In (a & b) find (mask & b) or (a & mask) with sign or zero extension on the non-mask.
+ if ((IsInt64AndGet(a, /*out*/ &mask) && (IsSignExtensionAndGet(b, type, /*out*/ operand) ||
+ IsZeroExtensionAndGet(b, type, /*out*/ operand))) ||
+ (IsInt64AndGet(b, /*out*/ &mask) && (IsSignExtensionAndGet(a, type, /*out*/ operand) ||
+ IsZeroExtensionAndGet(a, type, /*out*/ operand)))) {
+ switch ((*operand)->GetType()) {
+ case Primitive::kPrimByte: return mask == std::numeric_limits<uint8_t>::max();
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort: return mask == std::numeric_limits<uint16_t>::max();
+ default: return false;
+ }
+ }
+ }
+ // TODO: perhaps explicit conversions later too?
+ return false;
+}
+
// Test vector restrictions.
static bool HasVectorRestrictions(uint64_t restrictions, uint64_t tested) {
return (restrictions & tested) != 0;
}
-// Inserts an instruction.
+// Insert an instruction.
static HInstruction* Insert(HBasicBlock* block, HInstruction* instruction) {
DCHECK(block != nullptr);
DCHECK(instruction != nullptr);
@@ -713,6 +823,10 @@
return true;
}
} else if (instruction->IsShl() || instruction->IsShr() || instruction->IsUShr()) {
+ // Recognize vectorization idioms.
+ if (VectorizeHalvingAddIdiom(node, instruction, generate_code, type, restrictions)) {
+ return true;
+ }
// Deal with vector restrictions.
if ((HasVectorRestrictions(restrictions, kNoShift)) ||
(instruction->IsShr() && HasVectorRestrictions(restrictions, kNoShr))) {
@@ -806,11 +920,11 @@
switch (type) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte:
- *restrictions |= kNoMul | kNoDiv | kNoShift | kNoAbs;
+ *restrictions |= kNoMul | kNoDiv | kNoShift | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd;
return TrySetVectorLength(16);
case Primitive::kPrimChar:
case Primitive::kPrimShort:
- *restrictions |= kNoDiv | kNoAbs;
+ *restrictions |= kNoDiv | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd;
return TrySetVectorLength(8);
case Primitive::kPrimInt:
*restrictions |= kNoDiv;
@@ -1039,6 +1153,90 @@
#undef GENERATE_VEC
//
+// Vectorization idioms.
+//
+
+// Method recognizes the following idioms:
+// rounding halving add (a + b + 1) >> 1 for unsigned/signed operands a, b
+// regular halving add (a + b) >> 1 for unsigned/signed operands a, b
+// Provided that the operands are promoted to a wider form to do the arithmetic and
+// then cast back to narrower form, the idioms can be mapped into efficient SIMD
+// implementation that operates directly in narrower form (plus one extra bit).
+// TODO: current version recognizes implicit byte/short/char widening only;
+// explicit widening from int to long could be added later.
+bool HLoopOptimization::VectorizeHalvingAddIdiom(LoopNode* node,
+ HInstruction* instruction,
+ bool generate_code,
+ Primitive::Type type,
+ uint64_t restrictions) {
+ // Test for top level arithmetic shift right x >> 1 or logical shift right x >>> 1
+ // (note whether the sign bit in higher precision is shifted in has no effect
+ // on the narrow precision computed by the idiom).
+ int64_t value = 0;
+ if ((instruction->IsShr() ||
+ instruction->IsUShr()) &&
+ IsInt64AndGet(instruction->InputAt(1), &value) && value == 1) {
+ //
+ // TODO: make following code less sensitive to associativity and commutativity differences.
+ //
+ HInstruction* x = instruction->InputAt(0);
+ // Test for an optional rounding part (x + 1) >> 1.
+ bool is_rounded = false;
+ if (x->IsAdd() && IsInt64AndGet(x->InputAt(1), &value) && value == 1) {
+ x = x->InputAt(0);
+ is_rounded = true;
+ }
+ // Test for a core addition (a + b) >> 1 (possibly rounded), either unsigned or signed.
+ if (x->IsAdd()) {
+ HInstruction* a = x->InputAt(0);
+ HInstruction* b = x->InputAt(1);
+ HInstruction* r = nullptr;
+ HInstruction* s = nullptr;
+ bool is_unsigned = false;
+ if (IsZeroExtensionAndGet(a, type, &r) && IsZeroExtensionAndGet(b, type, &s)) {
+ is_unsigned = true;
+ } else if (IsSignExtensionAndGet(a, type, &r) && IsSignExtensionAndGet(b, type, &s)) {
+ is_unsigned = false;
+ } else {
+ return false;
+ }
+ // Deal with vector restrictions.
+ if ((!is_unsigned && HasVectorRestrictions(restrictions, kNoSignedHAdd)) ||
+ (!is_rounded && HasVectorRestrictions(restrictions, kNoUnroundedHAdd))) {
+ return false;
+ }
+ // Accept recognized halving add for vectorizable operands. Vectorized code uses the
+ // shorthand idiomatic operation. Sequential code uses the original scalar expressions.
+ DCHECK(r != nullptr && s != nullptr);
+ if (VectorizeUse(node, r, generate_code, type, restrictions) &&
+ VectorizeUse(node, s, generate_code, type, restrictions)) {
+ if (generate_code) {
+ if (vector_mode_ == kVector) {
+ vector_map_->Put(instruction, new (global_allocator_) HVecHalvingAdd(
+ global_allocator_,
+ vector_map_->Get(r),
+ vector_map_->Get(s),
+ type,
+ vector_length_,
+ is_unsigned,
+ is_rounded));
+ } else {
+ VectorizeUse(node, instruction->InputAt(0), generate_code, type, restrictions);
+ VectorizeUse(node, instruction->InputAt(1), generate_code, type, restrictions);
+ GenerateVecOp(instruction,
+ vector_map_->Get(instruction->InputAt(0)),
+ vector_map_->Get(instruction->InputAt(1)),
+ type);
+ }
+ }
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+//
// Helpers.
//