Min/max SIMDization support.
Rationale:
The more vectorized, the better!
Test: test-art-target, test-art-host
Change-Id: I758becca5beaa5b97fab2ab70f2e00cb53458703
diff --git a/compiler/optimizing/code_generator_vector_arm64.cc b/compiler/optimizing/code_generator_vector_arm64.cc
index 57f7e6b..0739c6e 100644
--- a/compiler/optimizing/code_generator_vector_arm64.cc
+++ b/compiler/optimizing/code_generator_vector_arm64.cc
@@ -468,7 +468,50 @@
}
void InstructionCodeGeneratorARM64::VisitVecMin(HVecMin* instruction) {
- LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
+ LocationSummary* locations = instruction->GetLocations();
+ VRegister lhs = VRegisterFrom(locations->InAt(0));
+ VRegister rhs = VRegisterFrom(locations->InAt(1));
+ VRegister dst = VRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ if (instruction->IsUnsigned()) {
+ __ Umin(dst.V16B(), lhs.V16B(), rhs.V16B());
+ } else {
+ __ Smin(dst.V16B(), lhs.V16B(), rhs.V16B());
+ }
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ if (instruction->IsUnsigned()) {
+ __ Umin(dst.V8H(), lhs.V8H(), rhs.V8H());
+ } else {
+ __ Smin(dst.V8H(), lhs.V8H(), rhs.V8H());
+ }
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ if (instruction->IsUnsigned()) {
+ __ Umin(dst.V4S(), lhs.V4S(), rhs.V4S());
+ } else {
+ __ Smin(dst.V4S(), lhs.V4S(), rhs.V4S());
+ }
+ break;
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ DCHECK(!instruction->IsUnsigned());
+ __ Fmin(dst.V4S(), lhs.V4S(), rhs.V4S());
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ DCHECK(!instruction->IsUnsigned());
+ __ Fmin(dst.V2D(), lhs.V2D(), rhs.V2D());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
void LocationsBuilderARM64::VisitVecMax(HVecMax* instruction) {
@@ -476,7 +519,50 @@
}
void InstructionCodeGeneratorARM64::VisitVecMax(HVecMax* instruction) {
- LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
+ LocationSummary* locations = instruction->GetLocations();
+ VRegister lhs = VRegisterFrom(locations->InAt(0));
+ VRegister rhs = VRegisterFrom(locations->InAt(1));
+ VRegister dst = VRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ if (instruction->IsUnsigned()) {
+ __ Umax(dst.V16B(), lhs.V16B(), rhs.V16B());
+ } else {
+ __ Smax(dst.V16B(), lhs.V16B(), rhs.V16B());
+ }
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ if (instruction->IsUnsigned()) {
+ __ Umax(dst.V8H(), lhs.V8H(), rhs.V8H());
+ } else {
+ __ Smax(dst.V8H(), lhs.V8H(), rhs.V8H());
+ }
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ if (instruction->IsUnsigned()) {
+ __ Umax(dst.V4S(), lhs.V4S(), rhs.V4S());
+ } else {
+ __ Smax(dst.V4S(), lhs.V4S(), rhs.V4S());
+ }
+ break;
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ DCHECK(!instruction->IsUnsigned());
+ __ Fmax(dst.V4S(), lhs.V4S(), rhs.V4S());
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ DCHECK(!instruction->IsUnsigned());
+ __ Fmax(dst.V2D(), lhs.V2D(), rhs.V2D());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
void LocationsBuilderARM64::VisitVecAnd(HVecAnd* instruction) {
diff --git a/compiler/optimizing/code_generator_vector_x86.cc b/compiler/optimizing/code_generator_vector_x86.cc
index 5bb19c1..14782d7 100644
--- a/compiler/optimizing/code_generator_vector_x86.cc
+++ b/compiler/optimizing/code_generator_vector_x86.cc
@@ -483,7 +483,51 @@
}
void InstructionCodeGeneratorX86::VisitVecMin(HVecMin* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ LocationSummary* locations = instruction->GetLocations();
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ if (instruction->IsUnsigned()) {
+ __ pminub(dst, src);
+ } else {
+ __ pminsb(dst, src);
+ }
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ if (instruction->IsUnsigned()) {
+ __ pminuw(dst, src);
+ } else {
+ __ pminsw(dst, src);
+ }
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ if (instruction->IsUnsigned()) {
+ __ pminud(dst, src);
+ } else {
+ __ pminsd(dst, src);
+ }
+ break;
+ // Next cases are sloppy wrt 0.0 vs -0.0.
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ DCHECK(!instruction->IsUnsigned());
+ __ minps(dst, src);
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ DCHECK(!instruction->IsUnsigned());
+ __ minpd(dst, src);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
void LocationsBuilderX86::VisitVecMax(HVecMax* instruction) {
@@ -491,7 +535,51 @@
}
void InstructionCodeGeneratorX86::VisitVecMax(HVecMax* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ LocationSummary* locations = instruction->GetLocations();
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ if (instruction->IsUnsigned()) {
+ __ pmaxub(dst, src);
+ } else {
+ __ pmaxsb(dst, src);
+ }
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ if (instruction->IsUnsigned()) {
+ __ pmaxuw(dst, src);
+ } else {
+ __ pmaxsw(dst, src);
+ }
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ if (instruction->IsUnsigned()) {
+ __ pmaxud(dst, src);
+ } else {
+ __ pmaxsd(dst, src);
+ }
+ break;
+ // Next cases are sloppy wrt 0.0 vs -0.0.
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ DCHECK(!instruction->IsUnsigned());
+ __ maxps(dst, src);
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ DCHECK(!instruction->IsUnsigned());
+ __ maxpd(dst, src);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
void LocationsBuilderX86::VisitVecAnd(HVecAnd* instruction) {
diff --git a/compiler/optimizing/code_generator_vector_x86_64.cc b/compiler/optimizing/code_generator_vector_x86_64.cc
index 6d4aae8..246044e 100644
--- a/compiler/optimizing/code_generator_vector_x86_64.cc
+++ b/compiler/optimizing/code_generator_vector_x86_64.cc
@@ -353,6 +353,10 @@
DCHECK(locations->InAt(0).Equals(locations->Out()));
XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+
+ DCHECK(instruction->IsRounded());
+ DCHECK(instruction->IsUnsigned());
+
switch (instruction->GetPackedType()) {
case Primitive::kPrimByte:
DCHECK_EQ(16u, instruction->GetVectorLength());
@@ -472,7 +476,51 @@
}
void InstructionCodeGeneratorX86_64::VisitVecMin(HVecMin* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ LocationSummary* locations = instruction->GetLocations();
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ if (instruction->IsUnsigned()) {
+ __ pminub(dst, src);
+ } else {
+ __ pminsb(dst, src);
+ }
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ if (instruction->IsUnsigned()) {
+ __ pminuw(dst, src);
+ } else {
+ __ pminsw(dst, src);
+ }
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ if (instruction->IsUnsigned()) {
+ __ pminud(dst, src);
+ } else {
+ __ pminsd(dst, src);
+ }
+ break;
+ // Next cases are sloppy wrt 0.0 vs -0.0.
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ DCHECK(!instruction->IsUnsigned());
+ __ minps(dst, src);
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ DCHECK(!instruction->IsUnsigned());
+ __ minpd(dst, src);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
void LocationsBuilderX86_64::VisitVecMax(HVecMax* instruction) {
@@ -480,7 +528,51 @@
}
void InstructionCodeGeneratorX86_64::VisitVecMax(HVecMax* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ LocationSummary* locations = instruction->GetLocations();
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ if (instruction->IsUnsigned()) {
+ __ pmaxub(dst, src);
+ } else {
+ __ pmaxsb(dst, src);
+ }
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ if (instruction->IsUnsigned()) {
+ __ pmaxuw(dst, src);
+ } else {
+ __ pmaxsw(dst, src);
+ }
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ if (instruction->IsUnsigned()) {
+ __ pmaxud(dst, src);
+ } else {
+ __ pmaxsd(dst, src);
+ }
+ break;
+ // Next cases are sloppy wrt 0.0 vs -0.0.
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ DCHECK(!instruction->IsUnsigned());
+ __ maxps(dst, src);
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ DCHECK(!instruction->IsUnsigned());
+ __ maxpd(dst, src);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
void LocationsBuilderX86_64::VisitVecAnd(HVecAnd* instruction) {
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index e5d94c3..02816cf 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -514,6 +514,14 @@
StartAttributeStream("rounded") << std::boolalpha << hadd->IsRounded() << std::noboolalpha;
}
+ void VisitVecMin(HVecMin* min) OVERRIDE {
+ StartAttributeStream("unsigned") << std::boolalpha << min->IsUnsigned() << std::noboolalpha;
+ }
+
+ void VisitVecMax(HVecMax* max) OVERRIDE {
+ StartAttributeStream("unsigned") << std::boolalpha << max->IsUnsigned() << std::noboolalpha;
+ }
+
void VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) OVERRIDE {
StartAttributeStream("kind") << instruction->GetOpKind();
}
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index 881802d..4067aa3 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -869,6 +869,32 @@
}
return false;
}
+ case Intrinsics::kMathMinIntInt:
+ case Intrinsics::kMathMinLongLong:
+ case Intrinsics::kMathMinFloatFloat:
+ case Intrinsics::kMathMinDoubleDouble:
+ case Intrinsics::kMathMaxIntInt:
+ case Intrinsics::kMathMaxLongLong:
+ case Intrinsics::kMathMaxFloatFloat:
+ case Intrinsics::kMathMaxDoubleDouble: {
+ // Deal with vector restrictions.
+ if (HasVectorRestrictions(restrictions, kNoMinMax) ||
+ HasVectorRestrictions(restrictions, kNoHiBits)) {
+ // TODO: we can do better for some hibits cases.
+ return false;
+ }
+ // Accept MIN/MAX(x, y) for vectorizable operands.
+ HInstruction* opa = instruction->InputAt(0);
+ HInstruction* opb = instruction->InputAt(1);
+ if (VectorizeUse(node, opa, generate_code, type, restrictions) &&
+ VectorizeUse(node, opb, generate_code, type, restrictions)) {
+ if (generate_code) {
+ GenerateVecOp(instruction, vector_map_->Get(opa), vector_map_->Get(opb), type);
+ }
+ return true;
+ }
+ return false;
+ }
default:
return false;
} // switch
@@ -898,7 +924,7 @@
*restrictions |= kNoDiv;
return TrySetVectorLength(4);
case Primitive::kPrimLong:
- *restrictions |= kNoDiv | kNoMul;
+ *restrictions |= kNoDiv | kNoMul | kNoMinMax;
return TrySetVectorLength(2);
case Primitive::kPrimFloat:
return TrySetVectorLength(4);
@@ -924,11 +950,13 @@
*restrictions |= kNoDiv;
return TrySetVectorLength(4);
case Primitive::kPrimLong:
- *restrictions |= kNoMul | kNoDiv | kNoShr | kNoAbs;
+ *restrictions |= kNoMul | kNoDiv | kNoShr | kNoAbs | kNoMinMax;
return TrySetVectorLength(2);
case Primitive::kPrimFloat:
+ *restrictions |= kNoMinMax; // -0.0 vs +0.0
return TrySetVectorLength(4);
case Primitive::kPrimDouble:
+ *restrictions |= kNoMinMax; // -0.0 vs +0.0
return TrySetVectorLength(2);
default:
break;
@@ -1108,6 +1136,24 @@
DCHECK(opb == nullptr);
vector = new (global_allocator_) HVecAbs(global_allocator_, opa, type, vector_length_);
break;
+ case Intrinsics::kMathMinIntInt:
+ case Intrinsics::kMathMinLongLong:
+ case Intrinsics::kMathMinFloatFloat:
+ case Intrinsics::kMathMinDoubleDouble: {
+ bool is_unsigned = false; // TODO: detect unsigned versions
+ vector = new (global_allocator_)
+ HVecMin(global_allocator_, opa, opb, type, vector_length_, is_unsigned);
+ break;
+ }
+ case Intrinsics::kMathMaxIntInt:
+ case Intrinsics::kMathMaxLongLong:
+ case Intrinsics::kMathMaxFloatFloat:
+ case Intrinsics::kMathMaxDoubleDouble: {
+ bool is_unsigned = false; // TODO: detect unsigned versions
+ vector = new (global_allocator_)
+ HVecMax(global_allocator_, opa, opb, type, vector_length_, is_unsigned);
+ break;
+ }
default:
LOG(FATAL) << "Unsupported SIMD intrinsic";
UNREACHABLE();
diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h
index 4a7da86..6d5978d 100644
--- a/compiler/optimizing/loop_optimization.h
+++ b/compiler/optimizing/loop_optimization.h
@@ -71,6 +71,7 @@
kNoSignedHAdd = 32, // no signed halving add
kNoUnroundedHAdd = 64, // no unrounded halving add
kNoAbs = 128, // no absolute value
+ kNoMinMax = 256, // no min/max
};
/*
diff --git a/compiler/optimizing/nodes_vector.h b/compiler/optimizing/nodes_vector.h
index 52c247b..c2bb6e7 100644
--- a/compiler/optimizing/nodes_vector.h
+++ b/compiler/optimizing/nodes_vector.h
@@ -451,13 +451,24 @@
HInstruction* right,
Primitive::Type packed_type,
size_t vector_length,
+ bool is_unsigned,
uint32_t dex_pc = kNoDexPc)
: HVecBinaryOperation(arena, left, right, packed_type, vector_length, dex_pc) {
DCHECK(HasConsistentPackedTypes(left, packed_type));
DCHECK(HasConsistentPackedTypes(right, packed_type));
+ SetPackedFlag<kFieldMinOpIsUnsigned>(is_unsigned);
}
+
+ bool IsUnsigned() const { return GetPackedFlag<kFieldMinOpIsUnsigned>(); }
+
DECLARE_INSTRUCTION(VecMin);
+
private:
+ // Additional packed bits.
+ static constexpr size_t kFieldMinOpIsUnsigned = HVecOperation::kNumberOfVectorOpPackedBits;
+ static constexpr size_t kNumberOfMinOpPackedBits = kFieldMinOpIsUnsigned + 1;
+ static_assert(kNumberOfMinOpPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields.");
+
DISALLOW_COPY_AND_ASSIGN(HVecMin);
};
@@ -470,13 +481,24 @@
HInstruction* right,
Primitive::Type packed_type,
size_t vector_length,
+ bool is_unsigned,
uint32_t dex_pc = kNoDexPc)
: HVecBinaryOperation(arena, left, right, packed_type, vector_length, dex_pc) {
DCHECK(HasConsistentPackedTypes(left, packed_type));
DCHECK(HasConsistentPackedTypes(right, packed_type));
+ SetPackedFlag<kFieldMaxOpIsUnsigned>(is_unsigned);
}
+
+ bool IsUnsigned() const { return GetPackedFlag<kFieldMaxOpIsUnsigned>(); }
+
DECLARE_INSTRUCTION(VecMax);
+
private:
+ // Additional packed bits.
+ static constexpr size_t kFieldMaxOpIsUnsigned = HVecOperation::kNumberOfVectorOpPackedBits;
+ static constexpr size_t kNumberOfMaxOpPackedBits = kFieldMaxOpIsUnsigned + 1;
+ static_assert(kNumberOfMaxOpPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields.");
+
DISALLOW_COPY_AND_ASSIGN(HVecMax);
};
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index 1736618..bef32f8 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -1238,6 +1238,139 @@
EmitXmmRegisterOperand(dst, src);
}
+void X86Assembler::pminsb(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x38);
+ EmitUint8(0x38);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+void X86Assembler::pmaxsb(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x38);
+ EmitUint8(0x3C);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+void X86Assembler::pminsw(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0xEA);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+void X86Assembler::pmaxsw(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0xEE);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+void X86Assembler::pminsd(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x38);
+ EmitUint8(0x39);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+void X86Assembler::pmaxsd(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x38);
+ EmitUint8(0x3D);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+void X86Assembler::pminub(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0xDA);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+void X86Assembler::pmaxub(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0xDE);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+void X86Assembler::pminuw(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x38);
+ EmitUint8(0x3A);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+void X86Assembler::pmaxuw(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x38);
+ EmitUint8(0x3E);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+void X86Assembler::pminud(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x38);
+ EmitUint8(0x3B);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+void X86Assembler::pmaxud(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x38);
+ EmitUint8(0x3F);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+void X86Assembler::minps(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x0F);
+ EmitUint8(0x5D);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+void X86Assembler::maxps(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x0F);
+ EmitUint8(0x5F);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+void X86Assembler::minpd(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x5D);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+void X86Assembler::maxpd(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x5F);
+ EmitXmmRegisterOperand(dst, src);
+}
void X86Assembler::pcmpeqb(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index a747cda..c4bb9ee 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -498,6 +498,25 @@
void pavgb(XmmRegister dst, XmmRegister src); // no addr variant (for now)
void pavgw(XmmRegister dst, XmmRegister src);
+ void pminsb(XmmRegister dst, XmmRegister src); // no addr variant (for now)
+ void pmaxsb(XmmRegister dst, XmmRegister src);
+ void pminsw(XmmRegister dst, XmmRegister src);
+ void pmaxsw(XmmRegister dst, XmmRegister src);
+ void pminsd(XmmRegister dst, XmmRegister src);
+ void pmaxsd(XmmRegister dst, XmmRegister src);
+
+ void pminub(XmmRegister dst, XmmRegister src); // no addr variant (for now)
+ void pmaxub(XmmRegister dst, XmmRegister src);
+ void pminuw(XmmRegister dst, XmmRegister src);
+ void pmaxuw(XmmRegister dst, XmmRegister src);
+ void pminud(XmmRegister dst, XmmRegister src);
+ void pmaxud(XmmRegister dst, XmmRegister src);
+
+ void minps(XmmRegister dst, XmmRegister src); // no addr variant (for now)
+ void maxps(XmmRegister dst, XmmRegister src);
+ void minpd(XmmRegister dst, XmmRegister src);
+ void maxpd(XmmRegister dst, XmmRegister src);
+
void pcmpeqb(XmmRegister dst, XmmRegister src);
void pcmpeqw(XmmRegister dst, XmmRegister src);
void pcmpeqd(XmmRegister dst, XmmRegister src);
diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc
index f75f972..34f2a47 100644
--- a/compiler/utils/x86/assembler_x86_test.cc
+++ b/compiler/utils/x86/assembler_x86_test.cc
@@ -613,6 +613,70 @@
DriverStr(RepeatFF(&x86::X86Assembler::pavgw, "pavgw %{reg2}, %{reg1}"), "pavgw");
}
+TEST_F(AssemblerX86Test, PMinSB) {
+ DriverStr(RepeatFF(&x86::X86Assembler::pminsb, "pminsb %{reg2}, %{reg1}"), "pminsb");
+}
+
+TEST_F(AssemblerX86Test, PMaxSB) {
+ DriverStr(RepeatFF(&x86::X86Assembler::pmaxsb, "pmaxsb %{reg2}, %{reg1}"), "pmaxsb");
+}
+
+TEST_F(AssemblerX86Test, PMinSW) {
+ DriverStr(RepeatFF(&x86::X86Assembler::pminsw, "pminsw %{reg2}, %{reg1}"), "pminsw");
+}
+
+TEST_F(AssemblerX86Test, PMaxSW) {
+ DriverStr(RepeatFF(&x86::X86Assembler::pmaxsw, "pmaxsw %{reg2}, %{reg1}"), "pmaxsw");
+}
+
+TEST_F(AssemblerX86Test, PMinSD) {
+ DriverStr(RepeatFF(&x86::X86Assembler::pminsd, "pminsd %{reg2}, %{reg1}"), "pminsd");
+}
+
+TEST_F(AssemblerX86Test, PMaxSD) {
+ DriverStr(RepeatFF(&x86::X86Assembler::pmaxsd, "pmaxsd %{reg2}, %{reg1}"), "pmaxsd");
+}
+
+TEST_F(AssemblerX86Test, PMinUB) {
+ DriverStr(RepeatFF(&x86::X86Assembler::pminub, "pminub %{reg2}, %{reg1}"), "pminub");
+}
+
+TEST_F(AssemblerX86Test, PMaxUB) {
+ DriverStr(RepeatFF(&x86::X86Assembler::pmaxub, "pmaxub %{reg2}, %{reg1}"), "pmaxub");
+}
+
+TEST_F(AssemblerX86Test, PMinUW) {
+ DriverStr(RepeatFF(&x86::X86Assembler::pminuw, "pminuw %{reg2}, %{reg1}"), "pminuw");
+}
+
+TEST_F(AssemblerX86Test, PMaxUW) {
+ DriverStr(RepeatFF(&x86::X86Assembler::pmaxuw, "pmaxuw %{reg2}, %{reg1}"), "pmaxuw");
+}
+
+TEST_F(AssemblerX86Test, PMinUD) {
+ DriverStr(RepeatFF(&x86::X86Assembler::pminud, "pminud %{reg2}, %{reg1}"), "pminud");
+}
+
+TEST_F(AssemblerX86Test, PMaxUD) {
+ DriverStr(RepeatFF(&x86::X86Assembler::pmaxud, "pmaxud %{reg2}, %{reg1}"), "pmaxud");
+}
+
+TEST_F(AssemblerX86Test, MinPS) {
+ DriverStr(RepeatFF(&x86::X86Assembler::minps, "minps %{reg2}, %{reg1}"), "minps");
+}
+
+TEST_F(AssemblerX86Test, MaxPS) {
+ DriverStr(RepeatFF(&x86::X86Assembler::maxps, "maxps %{reg2}, %{reg1}"), "maxps");
+}
+
+TEST_F(AssemblerX86Test, MinPD) {
+ DriverStr(RepeatFF(&x86::X86Assembler::minpd, "minpd %{reg2}, %{reg1}"), "minpd");
+}
+
+TEST_F(AssemblerX86Test, MaxPD) {
+ DriverStr(RepeatFF(&x86::X86Assembler::maxpd, "maxpd %{reg2}, %{reg1}"), "maxpd");
+}
+
TEST_F(AssemblerX86Test, PCmpeqB) {
DriverStr(RepeatFF(&x86::X86Assembler::pcmpeqb, "pcmpeqb %{reg2}, %{reg1}"), "cmpeqb");
}
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index 1b7a485..82d1174 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -1445,6 +1445,156 @@
EmitXmmRegisterOperand(dst.LowBits(), src);
}
+void X86_64Assembler::pminsb(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0x38);
+ EmitUint8(0x38);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::pmaxsb(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0x38);
+ EmitUint8(0x3C);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::pminsw(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0xEA);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::pmaxsw(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0xEE);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::pminsd(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0x38);
+ EmitUint8(0x39);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::pmaxsd(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0x38);
+ EmitUint8(0x3D);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::pminub(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0xDA);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::pmaxub(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0xDE);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::pminuw(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0x38);
+ EmitUint8(0x3A);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::pmaxuw(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0x38);
+ EmitUint8(0x3E);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::pminud(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0x38);
+ EmitUint8(0x3B);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::pmaxud(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0x38);
+ EmitUint8(0x3F);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::minps(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0x5D);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::maxps(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0x5F);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::minpd(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0x5D);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::maxpd(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0x5F);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
void X86_64Assembler::pcmpeqb(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 0ddc46c..6e584fe 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -526,6 +526,25 @@
void pavgb(XmmRegister dst, XmmRegister src); // no addr variant (for now)
void pavgw(XmmRegister dst, XmmRegister src);
+ void pminsb(XmmRegister dst, XmmRegister src); // no addr variant (for now)
+ void pmaxsb(XmmRegister dst, XmmRegister src);
+ void pminsw(XmmRegister dst, XmmRegister src);
+ void pmaxsw(XmmRegister dst, XmmRegister src);
+ void pminsd(XmmRegister dst, XmmRegister src);
+ void pmaxsd(XmmRegister dst, XmmRegister src);
+
+ void pminub(XmmRegister dst, XmmRegister src); // no addr variant (for now)
+ void pmaxub(XmmRegister dst, XmmRegister src);
+ void pminuw(XmmRegister dst, XmmRegister src);
+ void pmaxuw(XmmRegister dst, XmmRegister src);
+ void pminud(XmmRegister dst, XmmRegister src);
+ void pmaxud(XmmRegister dst, XmmRegister src);
+
+ void minps(XmmRegister dst, XmmRegister src); // no addr variant (for now)
+ void maxps(XmmRegister dst, XmmRegister src);
+ void minpd(XmmRegister dst, XmmRegister src);
+ void maxpd(XmmRegister dst, XmmRegister src);
+
void pcmpeqb(XmmRegister dst, XmmRegister src);
void pcmpeqw(XmmRegister dst, XmmRegister src);
void pcmpeqd(XmmRegister dst, XmmRegister src);
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index e7d8401..b574003 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -1301,6 +1301,70 @@
DriverStr(RepeatFF(&x86_64::X86_64Assembler::pavgw, "pavgw %{reg2}, %{reg1}"), "pavgw");
}
+TEST_F(AssemblerX86_64Test, Pminsb) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::pminsb, "pminsb %{reg2}, %{reg1}"), "pminsb");
+}
+
+TEST_F(AssemblerX86_64Test, Pmaxsb) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::pmaxsb, "pmaxsb %{reg2}, %{reg1}"), "pmaxsb");
+}
+
+TEST_F(AssemblerX86_64Test, Pminsw) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::pminsw, "pminsw %{reg2}, %{reg1}"), "pminsw");
+}
+
+TEST_F(AssemblerX86_64Test, Pmaxsw) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::pmaxsw, "pmaxsw %{reg2}, %{reg1}"), "pmaxsw");
+}
+
+TEST_F(AssemblerX86_64Test, Pminsd) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::pminsd, "pminsd %{reg2}, %{reg1}"), "pminsd");
+}
+
+TEST_F(AssemblerX86_64Test, Pmaxsd) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::pmaxsd, "pmaxsd %{reg2}, %{reg1}"), "pmaxsd");
+}
+
+TEST_F(AssemblerX86_64Test, Pminub) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::pminub, "pminub %{reg2}, %{reg1}"), "pminub");
+}
+
+TEST_F(AssemblerX86_64Test, Pmaxub) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::pmaxub, "pmaxub %{reg2}, %{reg1}"), "pmaxub");
+}
+
+TEST_F(AssemblerX86_64Test, Pminuw) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::pminuw, "pminuw %{reg2}, %{reg1}"), "pminuw");
+}
+
+TEST_F(AssemblerX86_64Test, Pmaxuw) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::pmaxuw, "pmaxuw %{reg2}, %{reg1}"), "pmaxuw");
+}
+
+TEST_F(AssemblerX86_64Test, Pminud) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::pminud, "pminud %{reg2}, %{reg1}"), "pminud");
+}
+
+TEST_F(AssemblerX86_64Test, Pmaxud) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::pmaxud, "pmaxud %{reg2}, %{reg1}"), "pmaxud");
+}
+
+TEST_F(AssemblerX86_64Test, Minps) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::minps, "minps %{reg2}, %{reg1}"), "minps");
+}
+
+TEST_F(AssemblerX86_64Test, Maxps) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::maxps, "maxps %{reg2}, %{reg1}"), "maxps");
+}
+
+TEST_F(AssemblerX86_64Test, Minpd) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::minpd, "minpd %{reg2}, %{reg1}"), "minpd");
+}
+
+TEST_F(AssemblerX86_64Test, Maxpd) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::maxpd, "maxpd %{reg2}, %{reg1}"), "maxpd");
+}
+
TEST_F(AssemblerX86_64Test, PCmpeqb) {
DriverStr(RepeatFF(&x86_64::X86_64Assembler::pcmpeqb, "pcmpeqb %{reg2}, %{reg1}"), "pcmpeqb");
}
diff --git a/disassembler/disassembler_x86.cc b/disassembler/disassembler_x86.cc
index e12bcec..4824f70 100644
--- a/disassembler/disassembler_x86.cc
+++ b/disassembler/disassembler_x86.cc
@@ -581,13 +581,69 @@
load = true;
src_reg_file = dst_reg_file = SSE;
break;
- case 0x39:
+ case 0x37:
opcode1 = "pcmpgtq";
prefix[2] = 0;
has_modrm = true;
load = true;
src_reg_file = dst_reg_file = SSE;
break;
+ case 0x38:
+ opcode1 = "pminsb";
+ prefix[2] = 0;
+ has_modrm = true;
+ load = true;
+ src_reg_file = dst_reg_file = SSE;
+ break;
+ case 0x39:
+ opcode1 = "pminsd";
+ prefix[2] = 0;
+ has_modrm = true;
+ load = true;
+ src_reg_file = dst_reg_file = SSE;
+ break;
+ case 0x3A:
+ opcode1 = "pminuw";
+ prefix[2] = 0;
+ has_modrm = true;
+ load = true;
+ src_reg_file = dst_reg_file = SSE;
+ break;
+ case 0x3B:
+ opcode1 = "pminud";
+ prefix[2] = 0;
+ has_modrm = true;
+ load = true;
+ src_reg_file = dst_reg_file = SSE;
+ break;
+ case 0x3C:
+ opcode1 = "pmaxsb";
+ prefix[2] = 0;
+ has_modrm = true;
+ load = true;
+ src_reg_file = dst_reg_file = SSE;
+ break;
+ case 0x3D:
+ opcode1 = "pmaxsd";
+ prefix[2] = 0;
+ has_modrm = true;
+ load = true;
+ src_reg_file = dst_reg_file = SSE;
+ break;
+ case 0x3E:
+ opcode1 = "pmaxuw";
+ prefix[2] = 0;
+ has_modrm = true;
+ load = true;
+ src_reg_file = dst_reg_file = SSE;
+ break;
+ case 0x3F:
+ opcode1 = "pmaxud";
+ prefix[2] = 0;
+ has_modrm = true;
+ load = true;
+ src_reg_file = dst_reg_file = SSE;
+ break;
case 0x40:
opcode1 = "pmulld";
prefix[2] = 0;
@@ -1133,8 +1189,12 @@
opcode1 = opcode_tmp.c_str();
}
break;
+ case 0xDA:
+ case 0xDE:
case 0xE0:
case 0xE3:
+ case 0xEA:
+ case 0xEE:
if (prefix[2] == 0x66) {
src_reg_file = dst_reg_file = SSE;
prefix[2] = 0; // clear prefix now it's served its purpose as part of the opcode
@@ -1142,8 +1202,12 @@
src_reg_file = dst_reg_file = MMX;
}
switch (*instr) {
+ case 0xDA: opcode1 = "pminub"; break;
+ case 0xDE: opcode1 = "pmaxub"; break;
case 0xE0: opcode1 = "pavgb"; break;
case 0xE3: opcode1 = "pavgw"; break;
+ case 0xEA: opcode1 = "pminsw"; break;
+ case 0xEE: opcode1 = "pmaxsw"; break;
}
prefix[2] = 0;
has_modrm = true;
diff --git a/test/651-checker-byte-simd-minmax/expected.txt b/test/651-checker-byte-simd-minmax/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/651-checker-byte-simd-minmax/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/651-checker-byte-simd-minmax/info.txt b/test/651-checker-byte-simd-minmax/info.txt
new file mode 100644
index 0000000..73af124
--- /dev/null
+++ b/test/651-checker-byte-simd-minmax/info.txt
@@ -0,0 +1 @@
+Functional tests on min/max SIMD vectorization.
diff --git a/test/651-checker-byte-simd-minmax/src/Main.java b/test/651-checker-byte-simd-minmax/src/Main.java
new file mode 100644
index 0000000..8211ace
--- /dev/null
+++ b/test/651-checker-byte-simd-minmax/src/Main.java
@@ -0,0 +1,94 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Tests for MIN/MAX vectorization.
+ */
+public class Main {
+
+ /// CHECK-START: void Main.doitMin(byte[], byte[], byte[]) loop_optimization (before)
+ /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Get1:b\d+>> ArrayGet loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get2:b\d+>> ArrayGet loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Min:i\d+>> InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMinIntInt loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Cnv:b\d+>> TypeConversion [<<Min>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>> outer_loop:none
+ //
+ // TODO: narrow type vectorization.
+ /// CHECK-START: void Main.doitMin(byte[], byte[], byte[]) loop_optimization (after)
+ /// CHECK-NOT: VecMin
+ private static void doitMin(byte[] x, byte[] y, byte[] z) {
+ int min = Math.min(x.length, Math.min(y.length, z.length));
+ for (int i = 0; i < min; i++) {
+ x[i] = (byte) Math.min(y[i], z[i]);
+ }
+ }
+
+ /// CHECK-START: void Main.doitMax(byte[], byte[], byte[]) loop_optimization (before)
+ /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Get1:b\d+>> ArrayGet loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get2:b\d+>> ArrayGet loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Max:i\d+>> InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMaxIntInt loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Cnv:b\d+>> TypeConversion [<<Max>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>> outer_loop:none
+ //
+ // TODO: narrow type vectorization.
+ /// CHECK-START: void Main.doitMax(byte[], byte[], byte[]) loop_optimization (after)
+ /// CHECK-NOT: VecMax
+ private static void doitMax(byte[] x, byte[] y, byte[] z) {
+ int min = Math.min(x.length, Math.min(y.length, z.length));
+ for (int i = 0; i < min; i++) {
+ x[i] = (byte) Math.max(y[i], z[i]);
+ }
+ }
+
+ public static void main(String[] args) {
+ // Initialize cross-values for all possible values.
+ int total = 256 * 256;
+ byte[] x = new byte[total];
+ byte[] y = new byte[total];
+ byte[] z = new byte[total];
+ int k = 0;
+ for (int i = 0; i < 256; i++) {
+ for (int j = 0; j < 256; j++) {
+ x[k] = 0;
+ y[k] = (byte) i;
+ z[k] = (byte) j;
+ k++;
+ }
+ }
+
+ // And test.
+ doitMin(x, y, z);
+ for (int i = 0; i < total; i++) {
+ byte expected = (byte) Math.min(y[i], z[i]);
+ expectEquals(expected, x[i]);
+ }
+ doitMax(x, y, z);
+ for (int i = 0; i < total; i++) {
+ byte expected = (byte) Math.max(y[i], z[i]);
+ expectEquals(expected, x[i]);
+ }
+
+ System.out.println("passed");
+ }
+
+ private static void expectEquals(byte expected, byte result) {
+ if (expected != result) {
+ throw new Error("Expected: " + expected + ", found: " + result);
+ }
+ }
+}
diff --git a/test/651-checker-char-simd-minmax/expected.txt b/test/651-checker-char-simd-minmax/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/651-checker-char-simd-minmax/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/651-checker-char-simd-minmax/info.txt b/test/651-checker-char-simd-minmax/info.txt
new file mode 100644
index 0000000..73af124
--- /dev/null
+++ b/test/651-checker-char-simd-minmax/info.txt
@@ -0,0 +1 @@
+Functional tests on min/max SIMD vectorization.
diff --git a/test/651-checker-char-simd-minmax/src/Main.java b/test/651-checker-char-simd-minmax/src/Main.java
new file mode 100644
index 0000000..5ce7b94
--- /dev/null
+++ b/test/651-checker-char-simd-minmax/src/Main.java
@@ -0,0 +1,102 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Tests for MIN/MAX vectorization.
+ */
+public class Main {
+
+ /// CHECK-START: void Main.doitMin(char[], char[], char[]) loop_optimization (before)
+ /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Get1:c\d+>> ArrayGet loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get2:c\d+>> ArrayGet loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Min:i\d+>> InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMinIntInt loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Cnv:c\d+>> TypeConversion [<<Min>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>> outer_loop:none
+ //
+ // TODO: narrow type vectorization.
+ /// CHECK-START: void Main.doitMin(char[], char[], char[]) loop_optimization (after)
+ /// CHECK-NOT: VecMin
+ private static void doitMin(char[] x, char[] y, char[] z) {
+ int min = Math.min(x.length, Math.min(y.length, z.length));
+ for (int i = 0; i < min; i++) {
+ x[i] = (char) Math.min(y[i], z[i]);
+ }
+ }
+
+ /// CHECK-START: void Main.doitMax(char[], char[], char[]) loop_optimization (before)
+ /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Get1:c\d+>> ArrayGet loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get2:c\d+>> ArrayGet loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Max:i\d+>> InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMaxIntInt loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Cnv:c\d+>> TypeConversion [<<Max>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>> outer_loop:none
+ //
+ // TODO: narrow type vectorization.
+ /// CHECK-START: void Main.doitMax(char[], char[], char[]) loop_optimization (after)
+ /// CHECK-NOT: VecMax
+ private static void doitMax(char[] x, char[] y, char[] z) {
+ int min = Math.min(x.length, Math.min(y.length, z.length));
+ for (int i = 0; i < min; i++) {
+ x[i] = (char) Math.max(y[i], z[i]);
+ }
+ }
+
+ public static void main(String[] args) {
+ char[] interesting = {
+ 0x0000, 0x0001, 0x007f, 0x0080, 0x0081, 0x00ff,
+ 0x0100, 0x0101, 0x017f, 0x0180, 0x0181, 0x01ff,
+ 0x7f00, 0x7f01, 0x7f7f, 0x7f80, 0x7f81, 0x7fff,
+ 0x8000, 0x8001, 0x807f, 0x8080, 0x8081, 0x80ff,
+ 0x8100, 0x8101, 0x817f, 0x8180, 0x8181, 0x81ff,
+ 0xff00, 0xff01, 0xff7f, 0xff80, 0xff81, 0xffff
+ };
+ // Initialize cross-values for the interesting values.
+ int total = interesting.length * interesting.length;
+ char[] x = new char[total];
+ char[] y = new char[total];
+ char[] z = new char[total];
+ int k = 0;
+ for (int i = 0; i < interesting.length; i++) {
+ for (int j = 0; j < interesting.length; j++) {
+ x[k] = 0;
+ y[k] = interesting[i];
+ z[k] = interesting[j];
+ k++;
+ }
+ }
+
+ // And test.
+ doitMin(x, y, z);
+ for (int i = 0; i < total; i++) {
+ char expected = (char) Math.min(y[i], z[i]);
+ expectEquals(expected, x[i]);
+ }
+ doitMax(x, y, z);
+ for (int i = 0; i < total; i++) {
+ char expected = (char) Math.max(y[i], z[i]);
+ expectEquals(expected, x[i]);
+ }
+
+ System.out.println("passed");
+ }
+
+ private static void expectEquals(char expected, char result) {
+ if (expected != result) {
+ throw new Error("Expected: " + expected + ", found: " + result);
+ }
+ }
+}
diff --git a/test/651-checker-double-simd-minmax/expected.txt b/test/651-checker-double-simd-minmax/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/651-checker-double-simd-minmax/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/651-checker-double-simd-minmax/info.txt b/test/651-checker-double-simd-minmax/info.txt
new file mode 100644
index 0000000..73af124
--- /dev/null
+++ b/test/651-checker-double-simd-minmax/info.txt
@@ -0,0 +1 @@
+Functional tests on min/max SIMD vectorization.
diff --git a/test/651-checker-double-simd-minmax/src/Main.java b/test/651-checker-double-simd-minmax/src/Main.java
new file mode 100644
index 0000000..e1711ae
--- /dev/null
+++ b/test/651-checker-double-simd-minmax/src/Main.java
@@ -0,0 +1,126 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Tests for MIN/MAX vectorization.
+ */
+public class Main {
+
+ /// CHECK-START: void Main.doitMin(double[], double[], double[]) loop_optimization (before)
+ /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Get1:d\d+>> ArrayGet loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get2:d\d+>> ArrayGet loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Min:d\d+>> InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMinDoubleDouble loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>> outer_loop:none
+ //
+ // TODO x86: 0.0 vs -0.0?
+ //
+ /// CHECK-START-ARM64: void Main.doitMin(double[], double[], double[]) loop_optimization (after)
+ /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Get1:d\d+>> VecLoad loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get2:d\d+>> VecLoad loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Min:d\d+>> VecMin [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>> outer_loop:none
+ private static void doitMin(double[] x, double[] y, double[] z) {
+ int min = Math.min(x.length, Math.min(y.length, z.length));
+ for (int i = 0; i < min; i++) {
+ x[i] = Math.min(y[i], z[i]);
+ }
+ }
+
+ /// CHECK-START: void Main.doitMax(double[], double[], double[]) loop_optimization (before)
+ /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Get1:d\d+>> ArrayGet loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get2:d\d+>> ArrayGet loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Max:d\d+>> InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMaxDoubleDouble loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>> outer_loop:none
+ //
+ // TODO-x86: 0.0 vs -0.0?
+ //
+ /// CHECK-START-ARM64: void Main.doitMax(double[], double[], double[]) loop_optimization (after)
+ /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Get1:d\d+>> VecLoad loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get2:d\d+>> VecLoad loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Max:d\d+>> VecMax [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>> outer_loop:none
+ private static void doitMax(double[] x, double[] y, double[] z) {
+ int min = Math.min(x.length, Math.min(y.length, z.length));
+ for (int i = 0; i < min; i++) {
+ x[i] = Math.max(y[i], z[i]);
+ }
+ }
+
+ public static void main(String[] args) {
+ double[] interesting = {
+ -0.0f,
+ +0.0f,
+ -1.0f,
+ +1.0f,
+ -3.14f,
+ +3.14f,
+ -100.0f,
+ +100.0f,
+ -4444.44f,
+ +4444.44f,
+ Double.MIN_NORMAL,
+ Double.MIN_VALUE,
+ Double.MAX_VALUE,
+ Double.NEGATIVE_INFINITY,
+ Double.POSITIVE_INFINITY,
+ Double.NaN
+ };
+ // Initialize cross-values for the interesting values.
+ int total = interesting.length * interesting.length;
+ double[] x = new double[total];
+ double[] y = new double[total];
+ double[] z = new double[total];
+ int k = 0;
+ for (int i = 0; i < interesting.length; i++) {
+ for (int j = 0; j < interesting.length; j++) {
+ x[k] = 0;
+ y[k] = interesting[i];
+ z[k] = interesting[j];
+ k++;
+ }
+ }
+
+ // And test.
+ doitMin(x, y, z);
+ for (int i = 0; i < total; i++) {
+ double expected = Math.min(y[i], z[i]);
+ expectEquals(expected, x[i]);
+ }
+ doitMax(x, y, z);
+ for (int i = 0; i < total; i++) {
+ double expected = Math.max(y[i], z[i]);
+ expectEquals(expected, x[i]);
+ }
+
+ System.out.println("passed");
+ }
+
+ private static void expectEquals(double expected, double result) {
+ // Tests the bits directly. This distinguishes correctly between +0.0
+ // and -0.0 and returns a canonical representation for all NaN.
+ long expected_bits = Double.doubleToLongBits(expected);
+ long result_bits = Double.doubleToLongBits(result);
+ if (expected_bits != result_bits) {
+ throw new Error("Expected: " + expected +
+ "(0x" + Long.toHexString(expected_bits) + "), found: " + result +
+ "(0x" + Long.toHexString(result_bits) + ")");
+ }
+ }
+}
diff --git a/test/651-checker-float-simd-minmax/expected.txt b/test/651-checker-float-simd-minmax/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/651-checker-float-simd-minmax/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/651-checker-float-simd-minmax/info.txt b/test/651-checker-float-simd-minmax/info.txt
new file mode 100644
index 0000000..73af124
--- /dev/null
+++ b/test/651-checker-float-simd-minmax/info.txt
@@ -0,0 +1 @@
+Functional tests on min/max SIMD vectorization.
diff --git a/test/651-checker-float-simd-minmax/src/Main.java b/test/651-checker-float-simd-minmax/src/Main.java
new file mode 100644
index 0000000..bd412e0
--- /dev/null
+++ b/test/651-checker-float-simd-minmax/src/Main.java
@@ -0,0 +1,126 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Tests for MIN/MAX vectorization.
+ */
+public class Main {
+
+ /// CHECK-START: void Main.doitMin(float[], float[], float[]) loop_optimization (before)
+ /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Get1:f\d+>> ArrayGet loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get2:f\d+>> ArrayGet loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Min:f\d+>> InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMinFloatFloat loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>> outer_loop:none
+ //
+ // TODO x86: 0.0 vs -0.0?
+ //
+ /// CHECK-START-ARM64: void Main.doitMin(float[], float[], float[]) loop_optimization (after)
+ /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Get1:d\d+>> VecLoad loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get2:d\d+>> VecLoad loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Min:d\d+>> VecMin [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>> outer_loop:none
+ private static void doitMin(float[] x, float[] y, float[] z) {
+ int min = Math.min(x.length, Math.min(y.length, z.length));
+ for (int i = 0; i < min; i++) {
+ x[i] = Math.min(y[i], z[i]);
+ }
+ }
+
+ /// CHECK-START: void Main.doitMax(float[], float[], float[]) loop_optimization (before)
+ /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Get1:f\d+>> ArrayGet loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get2:f\d+>> ArrayGet loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Max:f\d+>> InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMaxFloatFloat loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>> outer_loop:none
+ //
+ // TODO x86: 0.0 vs -0.0?
+ //
+ /// CHECK-START-ARM64: void Main.doitMax(float[], float[], float[]) loop_optimization (after)
+ /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Get1:d\d+>> VecLoad loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get2:d\d+>> VecLoad loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Max:d\d+>> VecMax [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>> outer_loop:none
+ private static void doitMax(float[] x, float[] y, float[] z) {
+ int min = Math.min(x.length, Math.min(y.length, z.length));
+ for (int i = 0; i < min; i++) {
+ x[i] = Math.max(y[i], z[i]);
+ }
+ }
+
+ public static void main(String[] args) {
+ float[] interesting = {
+ -0.0f,
+ +0.0f,
+ -1.0f,
+ +1.0f,
+ -3.14f,
+ +3.14f,
+ -100.0f,
+ +100.0f,
+ -4444.44f,
+ +4444.44f,
+ Float.MIN_NORMAL,
+ Float.MIN_VALUE,
+ Float.MAX_VALUE,
+ Float.NEGATIVE_INFINITY,
+ Float.POSITIVE_INFINITY,
+ Float.NaN
+ };
+ // Initialize cross-values for the interesting values.
+ int total = interesting.length * interesting.length;
+ float[] x = new float[total];
+ float[] y = new float[total];
+ float[] z = new float[total];
+ int k = 0;
+ for (int i = 0; i < interesting.length; i++) {
+ for (int j = 0; j < interesting.length; j++) {
+ x[k] = 0;
+ y[k] = interesting[i];
+ z[k] = interesting[j];
+ k++;
+ }
+ }
+
+ // And test.
+ doitMin(x, y, z);
+ for (int i = 0; i < total; i++) {
+ float expected = Math.min(y[i], z[i]);
+ expectEquals(expected, x[i]);
+ }
+ doitMax(x, y, z);
+ for (int i = 0; i < total; i++) {
+ float expected = Math.max(y[i], z[i]);
+ expectEquals(expected, x[i]);
+ }
+
+ System.out.println("passed");
+ }
+
+ private static void expectEquals(float expected, float result) {
+ // Tests the bits directly. This distinguishes correctly between +0.0
+ // and -0.0 and returns a canonical representation for all NaN.
+ int expected_bits = Float.floatToIntBits(expected);
+ int result_bits = Float.floatToIntBits(result);
+ if (expected_bits != result_bits) {
+ throw new Error("Expected: " + expected +
+ "(0x" + Integer.toHexString(expected_bits) + "), found: " + result +
+ "(0x" + Integer.toHexString(result_bits) + ")");
+ }
+ }
+}
diff --git a/test/651-checker-int-simd-minmax/expected.txt b/test/651-checker-int-simd-minmax/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/651-checker-int-simd-minmax/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/651-checker-int-simd-minmax/info.txt b/test/651-checker-int-simd-minmax/info.txt
new file mode 100644
index 0000000..73af124
--- /dev/null
+++ b/test/651-checker-int-simd-minmax/info.txt
@@ -0,0 +1 @@
+Functional tests on min/max SIMD vectorization.
diff --git a/test/651-checker-int-simd-minmax/src/Main.java b/test/651-checker-int-simd-minmax/src/Main.java
new file mode 100644
index 0000000..4e05a9d
--- /dev/null
+++ b/test/651-checker-int-simd-minmax/src/Main.java
@@ -0,0 +1,106 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Tests for MIN/MAX vectorization.
+ */
+public class Main {
+
+ /// CHECK-START: void Main.doitMin(int[], int[], int[]) loop_optimization (before)
+ /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Get1:i\d+>> ArrayGet loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get2:i\d+>> ArrayGet loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Min:i\d+>> InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMinIntInt loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: void Main.doitMin(int[], int[], int[]) loop_optimization (after)
+ /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Get1:d\d+>> VecLoad loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get2:d\d+>> VecLoad loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Min:d\d+>> VecMin [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>> outer_loop:none
+ private static void doitMin(int[] x, int[] y, int[] z) {
+ int min = Math.min(x.length, Math.min(y.length, z.length));
+ for (int i = 0; i < min; i++) {
+ x[i] = Math.min(y[i], z[i]);
+ }
+ }
+
+ /// CHECK-START: void Main.doitMax(int[], int[], int[]) loop_optimization (before)
+ /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Get1:i\d+>> ArrayGet loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get2:i\d+>> ArrayGet loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Max:i\d+>> InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMaxIntInt loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: void Main.doitMax(int[], int[], int[]) loop_optimization (after)
+ /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Get1:d\d+>> VecLoad loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get2:d\d+>> VecLoad loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Max:d\d+>> VecMax [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>> outer_loop:none
+ private static void doitMax(int[] x, int[] y, int[] z) {
+ int min = Math.min(x.length, Math.min(y.length, z.length));
+ for (int i = 0; i < min; i++) {
+ x[i] = Math.max(y[i], z[i]);
+ }
+ }
+
+ public static void main(String[] args) {
+ int[] interesting = {
+ 0x00000000, 0x00000001, 0x00007fff, 0x00008000, 0x00008001, 0x0000ffff,
+ 0x00010000, 0x00010001, 0x00017fff, 0x00018000, 0x00018001, 0x0001ffff,
+ 0x7fff0000, 0x7fff0001, 0x7fff7fff, 0x7fff8000, 0x7fff8001, 0x7fffffff,
+ 0x80000000, 0x80000001, 0x80007fff, 0x80008000, 0x80008001, 0x8000ffff,
+ 0x80010000, 0x80010001, 0x80017fff, 0x80018000, 0x80018001, 0x8001ffff,
+ 0xffff0000, 0xffff0001, 0xffff7fff, 0xffff8000, 0xffff8001, 0xffffffff
+ };
+ // Initialize cross-values for the interesting values.
+ int total = interesting.length * interesting.length;
+ int[] x = new int[total];
+ int[] y = new int[total];
+ int[] z = new int[total];
+ int k = 0;
+ for (int i = 0; i < interesting.length; i++) {
+ for (int j = 0; j < interesting.length; j++) {
+ x[k] = 0;
+ y[k] = interesting[i];
+ z[k] = interesting[j];
+ k++;
+ }
+ }
+
+ // And test.
+ doitMin(x, y, z);
+ for (int i = 0; i < total; i++) {
+ int expected = Math.min(y[i], z[i]);
+ expectEquals(expected, x[i]);
+ }
+ doitMax(x, y, z);
+ for (int i = 0; i < total; i++) {
+ int expected = Math.max(y[i], z[i]);
+ expectEquals(expected, x[i]);
+ }
+
+ System.out.println("passed");
+ }
+
+ private static void expectEquals(int expected, int result) {
+ if (expected != result) {
+ throw new Error("Expected: " + expected + ", found: " + result);
+ }
+ }
+}
diff --git a/test/651-checker-long-simd-minmax/expected.txt b/test/651-checker-long-simd-minmax/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/651-checker-long-simd-minmax/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/651-checker-long-simd-minmax/info.txt b/test/651-checker-long-simd-minmax/info.txt
new file mode 100644
index 0000000..73af124
--- /dev/null
+++ b/test/651-checker-long-simd-minmax/info.txt
@@ -0,0 +1 @@
+Functional tests on min/max SIMD vectorization.
diff --git a/test/651-checker-long-simd-minmax/src/Main.java b/test/651-checker-long-simd-minmax/src/Main.java
new file mode 100644
index 0000000..51cf67e
--- /dev/null
+++ b/test/651-checker-long-simd-minmax/src/Main.java
@@ -0,0 +1,108 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Tests for MIN/MAX vectorization.
+ */
+public class Main {
+
+ /// CHECK-START: void Main.doitMin(long[], long[], long[]) loop_optimization (before)
+ /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Get1:j\d+>> ArrayGet loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get2:j\d+>> ArrayGet loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Min:j\d+>> InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMinLongLong loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>> outer_loop:none
+ //
+ // Not directly supported for longs.
+ //
+ /// CHECK-START: void Main.doitMin(long[], long[], long[]) loop_optimization (after)
+ /// CHECK-NOT: VecMin
+ private static void doitMin(long[] x, long[] y, long[] z) {
+ int min = Math.min(x.length, Math.min(y.length, z.length));
+ for (int i = 0; i < min; i++) {
+ x[i] = Math.min(y[i], z[i]);
+ }
+ }
+
+ /// CHECK-START: void Main.doitMax(long[], long[], long[]) loop_optimization (before)
+ /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Get1:j\d+>> ArrayGet loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get2:j\d+>> ArrayGet loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Max:j\d+>> InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMaxLongLong loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>> outer_loop:none
+ //
+ // Not directly supported for longs.
+ //
+ /// CHECK-START: void Main.doitMax(long[], long[], long[]) loop_optimization (after)
+ /// CHECK-NOT: VecMax
+ private static void doitMax(long[] x, long[] y, long[] z) {
+ int min = Math.min(x.length, Math.min(y.length, z.length));
+ for (int i = 0; i < min; i++) {
+ x[i] = Math.max(y[i], z[i]);
+ }
+ }
+
+ public static void main(String[] args) {
+ long[] interesting = {
+ 0x0000000000000000L, 0x0000000000000001L, 0x000000007fffffffL,
+ 0x0000000080000000L, 0x0000000080000001L, 0x00000000ffffffffL,
+ 0x0000000100000000L, 0x0000000100000001L, 0x000000017fffffffL,
+ 0x0000000180000000L, 0x0000000180000001L, 0x00000001ffffffffL,
+ 0x7fffffff00000000L, 0x7fffffff00000001L, 0x7fffffff7fffffffL,
+ 0x7fffffff80000000L, 0x7fffffff80000001L, 0x7fffffffffffffffL,
+ 0x8000000000000000L, 0x8000000000000001L, 0x800000007fffffffL,
+ 0x8000000080000000L, 0x8000000080000001L, 0x80000000ffffffffL,
+ 0x8000000100000000L, 0x8000000100000001L, 0x800000017fffffffL,
+ 0x8000000180000000L, 0x8000000180000001L, 0x80000001ffffffffL,
+ 0xffffffff00000000L, 0xffffffff00000001L, 0xffffffff7fffffffL,
+ 0xffffffff80000000L, 0xffffffff80000001L, 0xffffffffffffffffL
+ };
+ // Initialize cross-values for the interesting values.
+ int total = interesting.length * interesting.length;
+ long[] x = new long[total];
+ long[] y = new long[total];
+ long[] z = new long[total];
+ int k = 0;
+ for (int i = 0; i < interesting.length; i++) {
+ for (int j = 0; j < interesting.length; j++) {
+ x[k] = 0;
+ y[k] = interesting[i];
+ z[k] = interesting[j];
+ k++;
+ }
+ }
+
+ // And test.
+ doitMin(x, y, z);
+ for (int i = 0; i < total; i++) {
+ long expected = Math.min(y[i], z[i]);
+ expectEquals(expected, x[i]);
+ }
+ doitMax(x, y, z);
+ for (int i = 0; i < total; i++) {
+ long expected = Math.max(y[i], z[i]);
+ expectEquals(expected, x[i]);
+ }
+
+ System.out.println("passed");
+ }
+
+ private static void expectEquals(long expected, long result) {
+ if (expected != result) {
+ throw new Error("Expected: " + expected + ", found: " + result);
+ }
+ }
+}
diff --git a/test/651-checker-short-simd-minmax/expected.txt b/test/651-checker-short-simd-minmax/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/651-checker-short-simd-minmax/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/651-checker-short-simd-minmax/info.txt b/test/651-checker-short-simd-minmax/info.txt
new file mode 100644
index 0000000..73af124
--- /dev/null
+++ b/test/651-checker-short-simd-minmax/info.txt
@@ -0,0 +1 @@
+Functional tests on min/max SIMD vectorization.
diff --git a/test/651-checker-short-simd-minmax/src/Main.java b/test/651-checker-short-simd-minmax/src/Main.java
new file mode 100644
index 0000000..f34f526
--- /dev/null
+++ b/test/651-checker-short-simd-minmax/src/Main.java
@@ -0,0 +1,108 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Tests for MIN/MAX vectorization.
+ */
+public class Main {
+
+ /// CHECK-START: void Main.doitMin(short[], short[], short[]) loop_optimization (before)
+ /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Get1:s\d+>> ArrayGet loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get2:s\d+>> ArrayGet loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Min:i\d+>> InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMinIntInt loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Cnv:s\d+>> TypeConversion [<<Min>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>> outer_loop:none
+ //
+ // TODO: narrow type vectorization.
+ /// CHECK-START: void Main.doitMin(short[], short[], short[]) loop_optimization (after)
+ /// CHECK-NOT: VecMin
+ private static void doitMin(short[] x, short[] y, short[] z) {
+ int min = Math.min(x.length, Math.min(y.length, z.length));
+ for (int i = 0; i < min; i++) {
+ x[i] = (short) Math.min(y[i], z[i]);
+ }
+ }
+
+ /// CHECK-START: void Main.doitMax(short[], short[], short[]) loop_optimization (before)
+ /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Get1:s\d+>> ArrayGet loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get2:s\d+>> ArrayGet loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Max:i\d+>> InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMaxIntInt loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Cnv:s\d+>> TypeConversion [<<Max>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>> outer_loop:none
+ //
+ // TODO: narrow type vectorization.
+ /// CHECK-START: void Main.doitMax(short[], short[], short[]) loop_optimization (after)
+ /// CHECK-NOT: VecMax
+ private static void doitMax(short[] x, short[] y, short[] z) {
+ int min = Math.min(x.length, Math.min(y.length, z.length));
+ for (int i = 0; i < min; i++) {
+ x[i] = (short) Math.max(y[i], z[i]);
+ }
+ }
+
+ public static void main(String[] args) {
+ short[] interesting = {
+ (short) 0x0000, (short) 0x0001, (short) 0x007f,
+ (short) 0x0080, (short) 0x0081, (short) 0x00ff,
+ (short) 0x0100, (short) 0x0101, (short) 0x017f,
+ (short) 0x0180, (short) 0x0181, (short) 0x01ff,
+ (short) 0x7f00, (short) 0x7f01, (short) 0x7f7f,
+ (short) 0x7f80, (short) 0x7f81, (short) 0x7fff,
+ (short) 0x8000, (short) 0x8001, (short) 0x807f,
+ (short) 0x8080, (short) 0x8081, (short) 0x80ff,
+ (short) 0x8100, (short) 0x8101, (short) 0x817f,
+ (short) 0x8180, (short) 0x8181, (short) 0x81ff,
+ (short) 0xff00, (short) 0xff01, (short) 0xff7f,
+ (short) 0xff80, (short) 0xff81, (short) 0xffff
+ };
+ // Initialize cross-values for the interesting values.
+ int total = interesting.length * interesting.length;
+ short[] x = new short[total];
+ short[] y = new short[total];
+ short[] z = new short[total];
+ int k = 0;
+ for (int i = 0; i < interesting.length; i++) {
+ for (int j = 0; j < interesting.length; j++) {
+ x[k] = 0;
+ y[k] = interesting[i];
+ z[k] = interesting[j];
+ k++;
+ }
+ }
+
+ // And test.
+ doitMin(x, y, z);
+ for (int i = 0; i < total; i++) {
+ short expected = (short) Math.min(y[i], z[i]);
+ expectEquals(expected, x[i]);
+ }
+ doitMax(x, y, z);
+ for (int i = 0; i < total; i++) {
+ short expected = (short) Math.max(y[i], z[i]);
+ expectEquals(expected, x[i]);
+ }
+
+ System.out.println("passed");
+ }
+
+ private static void expectEquals(short expected, short result) {
+ if (expected != result) {
+ throw new Error("Expected: " + expected + ", found: " + result);
+ }
+ }
+}