MIPS: Implement Sum-of-Abs-Differences
Test: test-art-host test-art-target
Change-Id: I32a3e21f96cdcbab2e108d71746670408deb901a
diff --git a/compiler/optimizing/code_generator_vector_mips.cc b/compiler/optimizing/code_generator_vector_mips.cc
index 384b642..3cf150a 100644
--- a/compiler/optimizing/code_generator_vector_mips.cc
+++ b/compiler/optimizing/code_generator_vector_mips.cc
@@ -1071,11 +1071,195 @@
void LocationsBuilderMIPS::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction);
+ LocationSummary* locations = instruction->GetLocations();
+ // All conversions require at least one temporary register.
+ locations->AddTemp(Location::RequiresFpuRegister());
+ // Some conversions require a second temporary register.
+ HVecOperation* a = instruction->InputAt(1)->AsVecOperation();
+ HVecOperation* b = instruction->InputAt(2)->AsVecOperation();
+ DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()),
+ HVecOperation::ToSignedType(b->GetPackedType()));
+ switch (a->GetPackedType()) {
+ case DataType::Type::kInt32:
+ if (instruction->GetPackedType() == DataType::Type::kInt32) {
+ break;
+ }
+ FALLTHROUGH_INTENDED;
+ case DataType::Type::kUint8:
+ case DataType::Type::kInt8:
+ case DataType::Type::kUint16:
+ case DataType::Type::kInt16:
+ locations->AddTemp(Location::RequiresFpuRegister());
+ break;
+ default:
+ break;
+ }
}
void InstructionCodeGeneratorMIPS::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
- // TODO: implement this, location helper already filled out (shared with MulAcc).
+ LocationSummary* locations = instruction->GetLocations();
+ VectorRegister acc = VectorRegisterFrom(locations->InAt(0));
+ VectorRegister left = VectorRegisterFrom(locations->InAt(1));
+ VectorRegister right = VectorRegisterFrom(locations->InAt(2));
+ VectorRegister tmp = static_cast<VectorRegister>(FTMP);
+ VectorRegister tmp1 = VectorRegisterFrom(locations->GetTemp(0));
+
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+
+ // Handle all feasible acc_T += sad(a_S, b_S) type combinations (T x S).
+ HVecOperation* a = instruction->InputAt(1)->AsVecOperation();
+ HVecOperation* b = instruction->InputAt(2)->AsVecOperation();
+ DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()),
+ HVecOperation::ToSignedType(b->GetPackedType()));
+ switch (a->GetPackedType()) {
+ case DataType::Type::kUint8:
+ case DataType::Type::kInt8:
+ DCHECK_EQ(16u, a->GetVectorLength());
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kUint16:
+ case DataType::Type::kInt16: {
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ VectorRegister tmp2 = VectorRegisterFrom(locations->GetTemp(1));
+ __ FillB(tmp, ZERO);
+ __ Hadd_sH(tmp1, left, tmp);
+ __ Hadd_sH(tmp2, right, tmp);
+ __ Asub_sH(tmp1, tmp1, tmp2);
+ __ AddvH(acc, acc, tmp1);
+ __ Hadd_sH(tmp1, tmp, left);
+ __ Hadd_sH(tmp2, tmp, right);
+ __ Asub_sH(tmp1, tmp1, tmp2);
+ __ AddvH(acc, acc, tmp1);
+ break;
+ }
+ case DataType::Type::kInt32: {
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ VectorRegister tmp2 = VectorRegisterFrom(locations->GetTemp(1));
+ __ FillB(tmp, ZERO);
+ __ Hadd_sH(tmp1, left, tmp);
+ __ Hadd_sH(tmp2, right, tmp);
+ __ Asub_sH(tmp1, tmp1, tmp2);
+ __ Hadd_sW(tmp1, tmp1, tmp1);
+ __ AddvW(acc, acc, tmp1);
+ __ Hadd_sH(tmp1, tmp, left);
+ __ Hadd_sH(tmp2, tmp, right);
+ __ Asub_sH(tmp1, tmp1, tmp2);
+ __ Hadd_sW(tmp1, tmp1, tmp1);
+ __ AddvW(acc, acc, tmp1);
+ break;
+ }
+ case DataType::Type::kInt64: {
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ VectorRegister tmp2 = VectorRegisterFrom(locations->GetTemp(1));
+ __ FillB(tmp, ZERO);
+ __ Hadd_sH(tmp1, left, tmp);
+ __ Hadd_sH(tmp2, right, tmp);
+ __ Asub_sH(tmp1, tmp1, tmp2);
+ __ Hadd_sW(tmp1, tmp1, tmp1);
+ __ Hadd_sD(tmp1, tmp1, tmp1);
+ __ AddvD(acc, acc, tmp1);
+ __ Hadd_sH(tmp1, tmp, left);
+ __ Hadd_sH(tmp2, tmp, right);
+ __ Asub_sH(tmp1, tmp1, tmp2);
+ __ Hadd_sW(tmp1, tmp1, tmp1);
+ __ Hadd_sD(tmp1, tmp1, tmp1);
+ __ AddvD(acc, acc, tmp1);
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+ break;
+ case DataType::Type::kUint16:
+ case DataType::Type::kInt16:
+ DCHECK_EQ(8u, a->GetVectorLength());
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kInt32: {
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ VectorRegister tmp2 = VectorRegisterFrom(locations->GetTemp(1));
+ __ FillH(tmp, ZERO);
+ __ Hadd_sW(tmp1, left, tmp);
+ __ Hadd_sW(tmp2, right, tmp);
+ __ Asub_sW(tmp1, tmp1, tmp2);
+ __ AddvW(acc, acc, tmp1);
+ __ Hadd_sW(tmp1, tmp, left);
+ __ Hadd_sW(tmp2, tmp, right);
+ __ Asub_sW(tmp1, tmp1, tmp2);
+ __ AddvW(acc, acc, tmp1);
+ break;
+ }
+ case DataType::Type::kInt64: {
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ VectorRegister tmp2 = VectorRegisterFrom(locations->GetTemp(1));
+ __ FillH(tmp, ZERO);
+ __ Hadd_sW(tmp1, left, tmp);
+ __ Hadd_sW(tmp2, right, tmp);
+ __ Asub_sW(tmp1, tmp1, tmp2);
+ __ Hadd_sD(tmp1, tmp1, tmp1);
+ __ AddvD(acc, acc, tmp1);
+ __ Hadd_sW(tmp1, tmp, left);
+ __ Hadd_sW(tmp2, tmp, right);
+ __ Asub_sW(tmp1, tmp1, tmp2);
+ __ Hadd_sD(tmp1, tmp1, tmp1);
+ __ AddvD(acc, acc, tmp1);
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+ break;
+ case DataType::Type::kInt32:
+ DCHECK_EQ(4u, a->GetVectorLength());
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kInt32: {
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ FillW(tmp, ZERO);
+ __ SubvW(tmp1, left, right);
+ __ Add_aW(tmp1, tmp1, tmp);
+ __ AddvW(acc, acc, tmp1);
+ break;
+ }
+ case DataType::Type::kInt64: {
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ VectorRegister tmp2 = VectorRegisterFrom(locations->GetTemp(1));
+ __ FillW(tmp, ZERO);
+ __ Hadd_sD(tmp1, left, tmp);
+ __ Hadd_sD(tmp2, right, tmp);
+ __ Asub_sD(tmp1, tmp1, tmp2);
+ __ AddvD(acc, acc, tmp1);
+ __ Hadd_sD(tmp1, tmp, left);
+ __ Hadd_sD(tmp2, tmp, right);
+ __ Asub_sD(tmp1, tmp1, tmp2);
+ __ AddvD(acc, acc, tmp1);
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+ break;
+ case DataType::Type::kInt64: {
+ DCHECK_EQ(2u, a->GetVectorLength());
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kInt64: {
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ FillW(tmp, ZERO);
+ __ SubvD(tmp1, left, right);
+ __ Add_aD(tmp1, tmp1, tmp);
+ __ AddvD(acc, acc, tmp1);
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
// Helper to set up locations for vector memory operations.
diff --git a/compiler/optimizing/code_generator_vector_mips64.cc b/compiler/optimizing/code_generator_vector_mips64.cc
index 0c59b73..2d69533 100644
--- a/compiler/optimizing/code_generator_vector_mips64.cc
+++ b/compiler/optimizing/code_generator_vector_mips64.cc
@@ -1069,11 +1069,195 @@
void LocationsBuilderMIPS64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction);
+ LocationSummary* locations = instruction->GetLocations();
+ // All conversions require at least one temporary register.
+ locations->AddTemp(Location::RequiresFpuRegister());
+ // Some conversions require a second temporary register.
+ HVecOperation* a = instruction->InputAt(1)->AsVecOperation();
+ HVecOperation* b = instruction->InputAt(2)->AsVecOperation();
+ DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()),
+ HVecOperation::ToSignedType(b->GetPackedType()));
+ switch (a->GetPackedType()) {
+ case DataType::Type::kInt32:
+ if (instruction->GetPackedType() == DataType::Type::kInt32) {
+ break;
+ }
+ FALLTHROUGH_INTENDED;
+ case DataType::Type::kUint8:
+ case DataType::Type::kInt8:
+ case DataType::Type::kUint16:
+ case DataType::Type::kInt16:
+ locations->AddTemp(Location::RequiresFpuRegister());
+ break;
+ default:
+ break;
+ }
}
void InstructionCodeGeneratorMIPS64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
- // TODO: implement this, location helper already filled out (shared with MulAcc).
+ LocationSummary* locations = instruction->GetLocations();
+ VectorRegister acc = VectorRegisterFrom(locations->InAt(0));
+ VectorRegister left = VectorRegisterFrom(locations->InAt(1));
+ VectorRegister right = VectorRegisterFrom(locations->InAt(2));
+ VectorRegister tmp = static_cast<VectorRegister>(FTMP);
+ VectorRegister tmp1 = VectorRegisterFrom(locations->GetTemp(0));
+
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+
+ // Handle all feasible acc_T += sad(a_S, b_S) type combinations (T x S).
+ HVecOperation* a = instruction->InputAt(1)->AsVecOperation();
+ HVecOperation* b = instruction->InputAt(2)->AsVecOperation();
+ DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()),
+ HVecOperation::ToSignedType(b->GetPackedType()));
+ switch (a->GetPackedType()) {
+ case DataType::Type::kUint8:
+ case DataType::Type::kInt8:
+ DCHECK_EQ(16u, a->GetVectorLength());
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kUint16:
+ case DataType::Type::kInt16: {
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ VectorRegister tmp2 = VectorRegisterFrom(locations->GetTemp(1));
+ __ FillB(tmp, ZERO);
+ __ Hadd_sH(tmp1, left, tmp);
+ __ Hadd_sH(tmp2, right, tmp);
+ __ Asub_sH(tmp1, tmp1, tmp2);
+ __ AddvH(acc, acc, tmp1);
+ __ Hadd_sH(tmp1, tmp, left);
+ __ Hadd_sH(tmp2, tmp, right);
+ __ Asub_sH(tmp1, tmp1, tmp2);
+ __ AddvH(acc, acc, tmp1);
+ break;
+ }
+ case DataType::Type::kInt32: {
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ VectorRegister tmp2 = VectorRegisterFrom(locations->GetTemp(1));
+ __ FillB(tmp, ZERO);
+ __ Hadd_sH(tmp1, left, tmp);
+ __ Hadd_sH(tmp2, right, tmp);
+ __ Asub_sH(tmp1, tmp1, tmp2);
+ __ Hadd_sW(tmp1, tmp1, tmp1);
+ __ AddvW(acc, acc, tmp1);
+ __ Hadd_sH(tmp1, tmp, left);
+ __ Hadd_sH(tmp2, tmp, right);
+ __ Asub_sH(tmp1, tmp1, tmp2);
+ __ Hadd_sW(tmp1, tmp1, tmp1);
+ __ AddvW(acc, acc, tmp1);
+ break;
+ }
+ case DataType::Type::kInt64: {
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ VectorRegister tmp2 = VectorRegisterFrom(locations->GetTemp(1));
+ __ FillB(tmp, ZERO);
+ __ Hadd_sH(tmp1, left, tmp);
+ __ Hadd_sH(tmp2, right, tmp);
+ __ Asub_sH(tmp1, tmp1, tmp2);
+ __ Hadd_sW(tmp1, tmp1, tmp1);
+ __ Hadd_sD(tmp1, tmp1, tmp1);
+ __ AddvD(acc, acc, tmp1);
+ __ Hadd_sH(tmp1, tmp, left);
+ __ Hadd_sH(tmp2, tmp, right);
+ __ Asub_sH(tmp1, tmp1, tmp2);
+ __ Hadd_sW(tmp1, tmp1, tmp1);
+ __ Hadd_sD(tmp1, tmp1, tmp1);
+ __ AddvD(acc, acc, tmp1);
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+ break;
+ case DataType::Type::kUint16:
+ case DataType::Type::kInt16:
+ DCHECK_EQ(8u, a->GetVectorLength());
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kInt32: {
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ VectorRegister tmp2 = VectorRegisterFrom(locations->GetTemp(1));
+ __ FillH(tmp, ZERO);
+ __ Hadd_sW(tmp1, left, tmp);
+ __ Hadd_sW(tmp2, right, tmp);
+ __ Asub_sW(tmp1, tmp1, tmp2);
+ __ AddvW(acc, acc, tmp1);
+ __ Hadd_sW(tmp1, tmp, left);
+ __ Hadd_sW(tmp2, tmp, right);
+ __ Asub_sW(tmp1, tmp1, tmp2);
+ __ AddvW(acc, acc, tmp1);
+ break;
+ }
+ case DataType::Type::kInt64: {
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ VectorRegister tmp2 = VectorRegisterFrom(locations->GetTemp(1));
+ __ FillH(tmp, ZERO);
+ __ Hadd_sW(tmp1, left, tmp);
+ __ Hadd_sW(tmp2, right, tmp);
+ __ Asub_sW(tmp1, tmp1, tmp2);
+ __ Hadd_sD(tmp1, tmp1, tmp1);
+ __ AddvD(acc, acc, tmp1);
+ __ Hadd_sW(tmp1, tmp, left);
+ __ Hadd_sW(tmp2, tmp, right);
+ __ Asub_sW(tmp1, tmp1, tmp2);
+ __ Hadd_sD(tmp1, tmp1, tmp1);
+ __ AddvD(acc, acc, tmp1);
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+ break;
+ case DataType::Type::kInt32:
+ DCHECK_EQ(4u, a->GetVectorLength());
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kInt32: {
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ FillW(tmp, ZERO);
+ __ SubvW(tmp1, left, right);
+ __ Add_aW(tmp1, tmp1, tmp);
+ __ AddvW(acc, acc, tmp1);
+ break;
+ }
+ case DataType::Type::kInt64: {
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ VectorRegister tmp2 = VectorRegisterFrom(locations->GetTemp(1));
+ __ FillW(tmp, ZERO);
+ __ Hadd_sD(tmp1, left, tmp);
+ __ Hadd_sD(tmp2, right, tmp);
+ __ Asub_sD(tmp1, tmp1, tmp2);
+ __ AddvD(acc, acc, tmp1);
+ __ Hadd_sD(tmp1, tmp, left);
+ __ Hadd_sD(tmp2, tmp, right);
+ __ Asub_sD(tmp1, tmp1, tmp2);
+ __ AddvD(acc, acc, tmp1);
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+ break;
+ case DataType::Type::kInt64: {
+ DCHECK_EQ(2u, a->GetVectorLength());
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kInt64: {
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ FillD(tmp, ZERO);
+ __ SubvD(tmp1, left, right);
+ __ Add_aD(tmp1, tmp1, tmp);
+ __ AddvD(acc, acc, tmp1);
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
// Helper to set up locations for vector memory operations.
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index 69c5827..fcc59ea 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -1512,17 +1512,17 @@
case DataType::Type::kBool:
case DataType::Type::kUint8:
case DataType::Type::kInt8:
- *restrictions |= kNoDiv | kNoReduction | kNoSAD;
+ *restrictions |= kNoDiv;
return TrySetVectorLength(16);
case DataType::Type::kUint16:
case DataType::Type::kInt16:
- *restrictions |= kNoDiv | kNoStringCharAt | kNoReduction | kNoSAD;
+ *restrictions |= kNoDiv | kNoStringCharAt;
return TrySetVectorLength(8);
case DataType::Type::kInt32:
- *restrictions |= kNoDiv | kNoSAD;
+ *restrictions |= kNoDiv;
return TrySetVectorLength(4);
case DataType::Type::kInt64:
- *restrictions |= kNoDiv | kNoSAD;
+ *restrictions |= kNoDiv;
return TrySetVectorLength(2);
case DataType::Type::kFloat32:
*restrictions |= kNoMinMax | kNoReduction; // min/max(x, NaN)
@@ -1541,17 +1541,17 @@
case DataType::Type::kBool:
case DataType::Type::kUint8:
case DataType::Type::kInt8:
- *restrictions |= kNoDiv | kNoReduction | kNoSAD;
+ *restrictions |= kNoDiv;
return TrySetVectorLength(16);
case DataType::Type::kUint16:
case DataType::Type::kInt16:
- *restrictions |= kNoDiv | kNoStringCharAt | kNoReduction | kNoSAD;
+ *restrictions |= kNoDiv | kNoStringCharAt;
return TrySetVectorLength(8);
case DataType::Type::kInt32:
- *restrictions |= kNoDiv | kNoSAD;
+ *restrictions |= kNoDiv;
return TrySetVectorLength(4);
case DataType::Type::kInt64:
- *restrictions |= kNoDiv | kNoSAD;
+ *restrictions |= kNoDiv;
return TrySetVectorLength(2);
case DataType::Type::kFloat32:
*restrictions |= kNoMinMax | kNoReduction; // min/max(x, NaN)
diff --git a/test/651-checker-byte-simd-minmax/src/Main.java b/test/651-checker-byte-simd-minmax/src/Main.java
index d365689..2188346 100644
--- a/test/651-checker-byte-simd-minmax/src/Main.java
+++ b/test/651-checker-byte-simd-minmax/src/Main.java
@@ -183,6 +183,13 @@
/// CHECK-DAG: <<Get:d\d+>> VecLoad loop:<<Loop:B\d+>> outer_loop:none
/// CHECK-DAG: <<Min:d\d+>> VecMin [<<Get>>,<<Repl>>] packed_type:Int8 loop:<<Loop>> outer_loop:none
/// CHECK-DAG: VecStore [{{l\d+}},{{i\d+}},<<Min>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-MIPS64: void Main.doitMin100(byte[], byte[]) loop_optimization (after)
+ /// CHECK-DAG: <<I100:i\d+>> IntConstant 100 loop:none
+ /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<I100>>] loop:none
+ /// CHECK-DAG: <<Get:d\d+>> VecLoad loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Min:d\d+>> VecMin [<<Get>>,<<Repl>>] packed_type:Int8 loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: VecStore [{{l\d+}},{{i\d+}},<<Min>>] loop:<<Loop>> outer_loop:none
private static void doitMin100(byte[] x, byte[] y) {
int min = Math.min(x.length, y.length);
for (int i = 0; i < min; i++) {
diff --git a/test/651-checker-char-simd-minmax/src/Main.java b/test/651-checker-char-simd-minmax/src/Main.java
index 72e8958..d92bdaf 100644
--- a/test/651-checker-char-simd-minmax/src/Main.java
+++ b/test/651-checker-char-simd-minmax/src/Main.java
@@ -97,6 +97,13 @@
/// CHECK-DAG: <<Get:d\d+>> VecLoad loop:<<Loop:B\d+>> outer_loop:none
/// CHECK-DAG: <<Min:d\d+>> VecMin [<<Get>>,<<Repl>>] packed_type:Uint16 loop:<<Loop>> outer_loop:none
/// CHECK-DAG: VecStore [{{l\d+}},{{i\d+}},<<Min>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-MIPS64: void Main.doitMin100(char[], char[]) loop_optimization (after)
+ /// CHECK-DAG: <<I100:i\d+>> IntConstant 100 loop:none
+ /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<I100>>] loop:none
+ /// CHECK-DAG: <<Get:d\d+>> VecLoad loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Min:d\d+>> VecMin [<<Get>>,<<Repl>>] packed_type:Uint16 loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: VecStore [{{l\d+}},{{i\d+}},<<Min>>] loop:<<Loop>> outer_loop:none
private static void doitMin100(char[] x, char[] y) {
int min = Math.min(x.length, y.length);
for (int i = 0; i < min; i++) {
diff --git a/test/651-checker-short-simd-minmax/src/Main.java b/test/651-checker-short-simd-minmax/src/Main.java
index d8c4d1e..91f2a2d 100644
--- a/test/651-checker-short-simd-minmax/src/Main.java
+++ b/test/651-checker-short-simd-minmax/src/Main.java
@@ -183,6 +183,13 @@
/// CHECK-DAG: <<Get:d\d+>> VecLoad loop:<<Loop:B\d+>> outer_loop:none
/// CHECK-DAG: <<Min:d\d+>> VecMin [<<Get>>,<<Repl>>] packed_type:Int16 loop:<<Loop>> outer_loop:none
/// CHECK-DAG: VecStore [{{l\d+}},{{i\d+}},<<Min>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-MIPS64: void Main.doitMin100(short[], short[]) loop_optimization (after)
+ /// CHECK-DAG: <<I100:i\d+>> IntConstant 100 loop:none
+ /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<I100>>] loop:none
+ /// CHECK-DAG: <<Get:d\d+>> VecLoad loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Min:d\d+>> VecMin [<<Get>>,<<Repl>>] packed_type:Int16 loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: VecStore [{{l\d+}},{{i\d+}},<<Min>>] loop:<<Loop>> outer_loop:none
private static void doitMin100(short[] x, short[] y) {
int min = Math.min(x.length, y.length);
for (int i = 0; i < min; i++) {
diff --git a/test/656-checker-simd-opt/src/Main.java b/test/656-checker-simd-opt/src/Main.java
index 39a126f..31d28e8 100644
--- a/test/656-checker-simd-opt/src/Main.java
+++ b/test/656-checker-simd-opt/src/Main.java
@@ -114,6 +114,19 @@
/// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: VecAdd [<<Phi2>>,<<Rep>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: Add [<<Phi1>>,<<L2>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-MIPS64: long Main.longInductionReduction(long[]) loop_optimization (after)
+ /// CHECK-DAG: <<L0:j\d+>> LongConstant 0 loop:none
+ /// CHECK-DAG: <<L1:j\d+>> LongConstant 1 loop:none
+ /// CHECK-DAG: <<L2:j\d+>> LongConstant 2 loop:none
+ /// CHECK-DAG: <<I0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Get:j\d+>> ArrayGet [{{l\d+}},<<I0>>] loop:none
+ /// CHECK-DAG: <<Rep:d\d+>> VecReplicateScalar [<<Get>>] loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<L1>>] loop:none
+ /// CHECK-DAG: <<Phi1:j\d+>> Phi [<<L0>>,{{j\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: VecAdd [<<Phi2>>,<<Rep>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<L2>>] loop:<<Loop>> outer_loop:none
static long longInductionReduction(long[] y) {
long x = 1;
for (long i = 0; i < 10; i++) {
@@ -141,6 +154,17 @@
/// CHECK-DAG: <<Phi:i\d+>> Phi [<<I0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
/// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Rep>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: Add [<<Phi>>,<<I4>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-MIPS64: void Main.intVectorLongInvariant(int[], long[]) loop_optimization (after)
+ /// CHECK-DAG: <<I0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<I1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<I4:i\d+>> IntConstant 4 loop:none
+ /// CHECK-DAG: <<Get:j\d+>> ArrayGet [{{l\d+}},<<I0>>] loop:none
+ /// CHECK-DAG: <<Cnv:i\d+>> TypeConversion [<<Get>>] loop:none
+ /// CHECK-DAG: <<Rep:d\d+>> VecReplicateScalar [<<Cnv>>] loop:none
+ /// CHECK-DAG: <<Phi:i\d+>> Phi [<<I0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Rep>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi>>,<<I4>>] loop:<<Loop>> outer_loop:none
static void intVectorLongInvariant(int[] x, long[] y) {
for (int i = 0; i < 100; i++) {
x[i] = (int) y[0];
@@ -170,6 +194,18 @@
/// CHECK-DAG: <<Add:d\d+>> VecAdd [<<Load>>,<<Rep>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Add>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: Add [<<Phi>>,<<I4>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-MIPS64: void Main.longCanBeDoneWithInt(int[], int[]) loop_optimization (after)
+ /// CHECK-DAG: <<I0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<I4:i\d+>> IntConstant 4 loop:none
+ /// CHECK-DAG: <<L1:j\d+>> LongConstant 1 loop:none
+ /// CHECK-DAG: <<Cnv:i\d+>> TypeConversion [<<L1>>] loop:none
+ /// CHECK-DAG: <<Rep:d\d+>> VecReplicateScalar [<<Cnv>>] loop:none
+ /// CHECK-DAG: <<Phi:i\d+>> Phi [<<I0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Load:d\d+>> VecLoad [{{l\d+}},<<Phi>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Add:d\d+>> VecAdd [<<Load>>,<<Rep>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Add>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi>>,<<I4>>] loop:<<Loop>> outer_loop:none
static void longCanBeDoneWithInt(int[] x, int[] y) {
for (int i = 0; i < 100; i++) {
x[i] = (int) (y[i] + 1L);
diff --git a/test/660-checker-simd-sad-byte/src/Main.java b/test/660-checker-simd-sad-byte/src/Main.java
index 72d1c24..877d718 100644
--- a/test/660-checker-simd-sad-byte/src/Main.java
+++ b/test/660-checker-simd-sad-byte/src/Main.java
@@ -109,6 +109,17 @@
/// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: Add [<<Phi1>>,<<Cons16>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-MIPS64: int Main.sadByte2Int(byte[], byte[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons16:i\d+>> IntConstant 16 loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Cons0>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons16>>] loop:<<Loop>> outer_loop:none
private static int sadByte2Int(byte[] b1, byte[] b2) {
int min_length = Math.min(b1.length, b2.length);
int sad = 0;
@@ -140,6 +151,17 @@
/// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load2>>,<<Load1>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: Add [<<Phi1>>,<<Cons16>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-MIPS64: int Main.sadByte2IntAlt(byte[], byte[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons16:i\d+>> IntConstant 16 loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Cons0>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load2>>,<<Load1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons16>>] loop:<<Loop>> outer_loop:none
private static int sadByte2IntAlt(byte[] b1, byte[] b2) {
int min_length = Math.min(b1.length, b2.length);
int sad = 0;
@@ -173,6 +195,17 @@
/// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: Add [<<Phi1>>,<<Cons16>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-MIPS64: int Main.sadByte2IntAlt2(byte[], byte[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons16:i\d+>> IntConstant 16 loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Cons0>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons16>>] loop:<<Loop>> outer_loop:none
private static int sadByte2IntAlt2(byte[] b1, byte[] b2) {
int min_length = Math.min(b1.length, b2.length);
int sad = 0;
@@ -212,6 +245,18 @@
/// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: Add [<<Phi1>>,<<Cons16>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-MIPS64: long Main.sadByte2Long(byte[], byte[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons16:i\d+>> IntConstant 16 loop:none
+ /// CHECK-DAG: <<ConsL:j\d+>> LongConstant 0 loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<ConsL>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons16>>] loop:<<Loop>> outer_loop:none
private static long sadByte2Long(byte[] b1, byte[] b2) {
int min_length = Math.min(b1.length, b2.length);
long sad = 0;
@@ -249,6 +294,18 @@
/// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: Add [<<Phi1>>,<<Cons16>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-MIPS64: long Main.sadByte2LongAt1(byte[], byte[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons16:i\d+>> IntConstant 16 loop:none
+ /// CHECK-DAG: <<ConsL:j\d+>> LongConstant 1 loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<ConsL>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons16>>] loop:<<Loop>> outer_loop:none
private static long sadByte2LongAt1(byte[] b1, byte[] b2) {
int min_length = Math.min(b1.length, b2.length);
long sad = 1; // starts at 1
diff --git a/test/660-checker-simd-sad-char/src/Main.java b/test/660-checker-simd-sad-char/src/Main.java
index 2535d49..ba22614 100644
--- a/test/660-checker-simd-sad-char/src/Main.java
+++ b/test/660-checker-simd-sad-char/src/Main.java
@@ -68,7 +68,7 @@
/// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
//
- /// CHECK-START-ARM64: int Main.sadChar2Int(char[], char[]) loop_optimization (after)
+ /// CHECK-START: int Main.sadChar2Int(char[], char[]) loop_optimization (after)
/// CHECK-NOT: VecSADAccumulate
private static int sadChar2Int(char[] s1, char[] s2) {
int min_length = Math.min(s1.length, s2.length);
@@ -91,7 +91,7 @@
/// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
//
- /// CHECK-START-ARM64: int Main.sadChar2IntAlt(char[], char[]) loop_optimization (after)
+ /// CHECK-START: int Main.sadChar2IntAlt(char[], char[]) loop_optimization (after)
/// CHECK-NOT: VecSADAccumulate
private static int sadChar2IntAlt(char[] s1, char[] s2) {
int min_length = Math.min(s1.length, s2.length);
@@ -116,7 +116,7 @@
/// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
//
- /// CHECK-START-ARM64: int Main.sadChar2IntAlt2(char[], char[]) loop_optimization (after)
+ /// CHECK-START: int Main.sadChar2IntAlt2(char[], char[]) loop_optimization (after)
/// CHECK-NOT: VecSADAccumulate
private static int sadChar2IntAlt2(char[] s1, char[] s2) {
int min_length = Math.min(s1.length, s2.length);
@@ -146,7 +146,7 @@
/// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
//
- /// CHECK-START-ARM64: long Main.sadChar2Long(char[], char[]) loop_optimization (after)
+ /// CHECK-START: long Main.sadChar2Long(char[], char[]) loop_optimization (after)
/// CHECK-NOT: VecSADAccumulate
private static long sadChar2Long(char[] s1, char[] s2) {
int min_length = Math.min(s1.length, s2.length);
@@ -174,7 +174,7 @@
/// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
//
- /// CHECK-START-ARM64: long Main.sadChar2LongAt1(char[], char[]) loop_optimization (after)
+ /// CHECK-START: long Main.sadChar2LongAt1(char[], char[]) loop_optimization (after)
/// CHECK-NOT: VecSADAccumulate
private static long sadChar2LongAt1(char[] s1, char[] s2) {
int min_length = Math.min(s1.length, s2.length);
diff --git a/test/660-checker-simd-sad-int/src/Main.java b/test/660-checker-simd-sad-int/src/Main.java
index 388bfba..d7d5a95 100644
--- a/test/660-checker-simd-sad-int/src/Main.java
+++ b/test/660-checker-simd-sad-int/src/Main.java
@@ -48,6 +48,15 @@
/// CHECK-DAG: <<Ld2:d\d+>> VecLoad [{{l\d+}},<<I>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi>>,<<Ld1>>,<<Ld2>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: Add [<<I>>,<<Cons4>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-MIPS64: int Main.sadInt2Int(int[], int[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons4:i\d+>> IntConstant 4 loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [{{i\d+}}] loop:none
+ /// CHECK-DAG: <<Phi:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Ld1:d\d+>> VecLoad [{{l\d+}},<<I:i\d+>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Ld2:d\d+>> VecLoad [{{l\d+}},<<I>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi>>,<<Ld1>>,<<Ld2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<I>>,<<Cons4>>] loop:<<Loop>> outer_loop:none
private static int sadInt2Int(int[] x, int[] y) {
int min_length = Math.min(x.length, y.length);
int sad = 0;
@@ -72,10 +81,7 @@
//
// No ABS? No SAD!
//
- /// CHECK-START-ARM: int Main.sadInt2IntAlt(int[], int[]) loop_optimization (after)
- /// CHECK-NOT: VecSADAccumulate
- //
- /// CHECK-START-ARM64: int Main.sadInt2IntAlt(int[], int[]) loop_optimization (after)
+ /// CHECK-START: int Main.sadInt2IntAlt(int[], int[]) loop_optimization (after)
/// CHECK-NOT: VecSADAccumulate
private static int sadInt2IntAlt(int[] x, int[] y) {
int min_length = Math.min(x.length, y.length);
@@ -117,6 +123,15 @@
/// CHECK-DAG: <<Ld2:d\d+>> VecLoad [{{l\d+}},<<I>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi>>,<<Ld1>>,<<Ld2>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: Add [<<I>>,<<Cons4>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-MIPS64: int Main.sadInt2IntAlt2(int[], int[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons4:i\d+>> IntConstant 4 loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [{{i\d+}}] loop:none
+ /// CHECK-DAG: <<Phi:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Ld1:d\d+>> VecLoad [{{l\d+}},<<I:i\d+>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Ld2:d\d+>> VecLoad [{{l\d+}},<<I>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi>>,<<Ld1>>,<<Ld2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<I>>,<<Cons4>>] loop:<<Loop>> outer_loop:none
private static int sadInt2IntAlt2(int[] x, int[] y) {
int min_length = Math.min(x.length, y.length);
int sad = 0;
@@ -156,6 +171,18 @@
/// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: Add [<<Phi1>>,<<Cons4>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-MIPS64: long Main.sadInt2Long(int[], int[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons4:i\d+>> IntConstant 4 loop:none
+ /// CHECK-DAG: <<ConsL:j\d+>> LongConstant 0 loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<ConsL>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons4>>] loop:<<Loop>> outer_loop:none
private static long sadInt2Long(int[] x, int[] y) {
int min_length = Math.min(x.length, y.length);
long sad = 0;
@@ -193,6 +220,18 @@
/// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: Add [<<Phi1>>,<<Cons4>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-MIPS64: long Main.sadInt2LongAt1(int[], int[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons4:i\d+>> IntConstant 4 loop:none
+ /// CHECK-DAG: <<ConsL:j\d+>> LongConstant 1 loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<ConsL>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons4>>] loop:<<Loop>> outer_loop:none
private static long sadInt2LongAt1(int[] x, int[] y) {
int min_length = Math.min(x.length, y.length);
long sad = 1; // starts at 1
diff --git a/test/660-checker-simd-sad-long/src/Main.java b/test/660-checker-simd-sad-long/src/Main.java
index 06f62bd..d080e0c 100644
--- a/test/660-checker-simd-sad-long/src/Main.java
+++ b/test/660-checker-simd-sad-long/src/Main.java
@@ -43,6 +43,18 @@
/// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: Add [<<Phi1>>,<<Cons2>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-MIPS64: long Main.sadLong2Long(long[], long[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons2:i\d+>> IntConstant 2 loop:none
+ /// CHECK-DAG: <<ConsL:j\d+>> LongConstant 0 loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<ConsL>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons2>>] loop:<<Loop>> outer_loop:none
private static long sadLong2Long(long[] x, long[] y) {
int min_length = Math.min(x.length, y.length);
long sad = 0;
@@ -105,6 +117,18 @@
/// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: Add [<<Phi1>>,<<Cons2>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-MIPS64: long Main.sadLong2LongAlt2(long[], long[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons2:i\d+>> IntConstant 2 loop:none
+ /// CHECK-DAG: <<ConsL:j\d+>> LongConstant 0 loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<ConsL>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons2>>] loop:<<Loop>> outer_loop:none
private static long sadLong2LongAlt2(long[] x, long[] y) {
int min_length = Math.min(x.length, y.length);
long sad = 0;
@@ -142,6 +166,18 @@
/// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: Add [<<Phi1>>,<<Cons2>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-MIPS64: long Main.sadLong2LongAt1(long[], long[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons2:i\d+>> IntConstant 2 loop:none
+ /// CHECK-DAG: <<ConsL:j\d+>> LongConstant 1 loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<ConsL>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons2>>] loop:<<Loop>> outer_loop:none
private static long sadLong2LongAt1(long[] x, long[] y) {
int min_length = Math.min(x.length, y.length);
long sad = 1; // starts at 1
diff --git a/test/660-checker-simd-sad-short/src/Main.java b/test/660-checker-simd-sad-short/src/Main.java
index d94308e..4ab6682 100644
--- a/test/660-checker-simd-sad-short/src/Main.java
+++ b/test/660-checker-simd-sad-short/src/Main.java
@@ -76,6 +76,17 @@
/// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: Add [<<Phi1>>,<<Cons8>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-MIPS64: int Main.sadShort2Int(short[], short[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons8:i\d+>> IntConstant 8 loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Cons0>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons8>>] loop:<<Loop>> outer_loop:none
private static int sadShort2Int(short[] s1, short[] s2) {
int min_length = Math.min(s1.length, s2.length);
int sad = 0;
@@ -107,6 +118,17 @@
/// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load2>>,<<Load1>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: Add [<<Phi1>>,<<Cons8>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-MIPS64: int Main.sadShort2IntAlt(short[], short[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons8:i\d+>> IntConstant 8 loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Cons0>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load2>>,<<Load1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons8>>] loop:<<Loop>> outer_loop:none
private static int sadShort2IntAlt(short[] s1, short[] s2) {
int min_length = Math.min(s1.length, s2.length);
int sad = 0;
@@ -140,6 +162,17 @@
/// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: Add [<<Phi1>>,<<Cons8>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-MIPS64: int Main.sadShort2IntAlt2(short[], short[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons8:i\d+>> IntConstant 8 loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Cons0>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons8>>] loop:<<Loop>> outer_loop:none
private static int sadShort2IntAlt2(short[] s1, short[] s2) {
int min_length = Math.min(s1.length, s2.length);
int sad = 0;
@@ -179,6 +212,18 @@
/// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: Add [<<Phi1>>,<<Cons8>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-MIPS64: long Main.sadShort2Long(short[], short[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons8:i\d+>> IntConstant 8 loop:none
+ /// CHECK-DAG: <<ConsL:j\d+>> LongConstant 0 loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<ConsL>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons8>>] loop:<<Loop>> outer_loop:none
private static long sadShort2Long(short[] s1, short[] s2) {
int min_length = Math.min(s1.length, s2.length);
long sad = 0;
@@ -216,6 +261,18 @@
/// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: Add [<<Phi1>>,<<Cons8>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-MIPS64: long Main.sadShort2LongAt1(short[], short[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons8:i\d+>> IntConstant 8 loop:none
+ /// CHECK-DAG: <<ConsL:j\d+>> LongConstant 1 loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<ConsL>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons8>>] loop:<<Loop>> outer_loop:none
private static long sadShort2LongAt1(short[] s1, short[] s2) {
int min_length = Math.min(s1.length, s2.length);
long sad = 1; // starts at 1
diff --git a/test/660-checker-simd-sad-short2/src/Main.java b/test/660-checker-simd-sad-short2/src/Main.java
index 708f3aa..331f5ce 100644
--- a/test/660-checker-simd-sad-short2/src/Main.java
+++ b/test/660-checker-simd-sad-short2/src/Main.java
@@ -94,6 +94,17 @@
/// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: Add [<<Phi1>>,<<Cons8>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-MIPS64: int Main.sadCastedChar2Int(char[], char[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons8:i\d+>> IntConstant 8 loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Cons0>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons8>>] loop:<<Loop>> outer_loop:none
private static int sadCastedChar2Int(char[] s1, char[] s2) {
int min_length = Math.min(s1.length, s2.length);
int sad = 0;
@@ -144,6 +155,17 @@
/// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load2>>,<<Load1>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: Add [<<Phi1>>,<<Cons8>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-MIPS64: int Main.sadCastedChar2IntAlt(char[], char[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons8:i\d+>> IntConstant 8 loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Cons0>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load2>>,<<Load1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons8>>] loop:<<Loop>> outer_loop:none
private static int sadCastedChar2IntAlt(char[] s1, char[] s2) {
int min_length = Math.min(s1.length, s2.length);
int sad = 0;
@@ -196,6 +218,17 @@
/// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: Add [<<Phi1>>,<<Cons8>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-MIPS64: int Main.sadCastedChar2IntAlt2(char[], char[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons8:i\d+>> IntConstant 8 loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Cons0>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons8>>] loop:<<Loop>> outer_loop:none
private static int sadCastedChar2IntAlt2(char[] s1, char[] s2) {
int min_length = Math.min(s1.length, s2.length);
int sad = 0;
@@ -254,6 +287,18 @@
/// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: Add [<<Phi1>>,<<Cons8>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-MIPS64: long Main.sadCastedChar2Long(char[], char[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons8:i\d+>> IntConstant 8 loop:none
+ /// CHECK-DAG: <<ConsL:j\d+>> LongConstant 0 loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<ConsL>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons8>>] loop:<<Loop>> outer_loop:none
private static long sadCastedChar2Long(char[] s1, char[] s2) {
int min_length = Math.min(s1.length, s2.length);
long sad = 0;
@@ -310,6 +355,18 @@
/// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: Add [<<Phi1>>,<<Cons8>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-MIPS64: long Main.sadCastedChar2LongAt1(char[], char[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons8:i\d+>> IntConstant 8 loop:none
+ /// CHECK-DAG: <<ConsL:j\d+>> LongConstant 1 loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<ConsL>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons8>>] loop:<<Loop>> outer_loop:none
private static long sadCastedChar2LongAt1(char[] s1, char[] s2) {
int min_length = Math.min(s1.length, s2.length);
long sad = 1; // starts at 1
diff --git a/test/660-checker-simd-sad-short3/src/Main.java b/test/660-checker-simd-sad-short3/src/Main.java
index c8850b4..ecda884 100644
--- a/test/660-checker-simd-sad-short3/src/Main.java
+++ b/test/660-checker-simd-sad-short3/src/Main.java
@@ -44,6 +44,18 @@
/// CHECK-DAG: <<Load:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load>>,<<Rep>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: Add [<<Phi1>>,<<Cons8>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-MIPS64: int Main.sadShort2IntParamRight(short[], short) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons8:i\d+>> IntConstant 8 loop:none
+ /// CHECK-DAG: <<Param:s\d+>> ParameterValue loop:none
+ /// CHECK-DAG: <<Rep:d\d+>> VecReplicateScalar [<<Param>>] loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Cons0>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load>>,<<Rep>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons8>>] loop:<<Loop>> outer_loop:none
private static int sadShort2IntParamRight(short[] s, short param) {
int sad = 0;
for (int i = 0; i < s.length; i++) {
@@ -75,6 +87,18 @@
/// CHECK-DAG: <<Load:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Rep>>,<<Load>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: Add [<<Phi1>>,<<Cons8>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-MIPS64: int Main.sadShort2IntParamLeft(short[], short) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons8:i\d+>> IntConstant 8 loop:none
+ /// CHECK-DAG: <<Param:s\d+>> ParameterValue loop:none
+ /// CHECK-DAG: <<Rep:d\d+>> VecReplicateScalar [<<Param>>] loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Cons0>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Rep>>,<<Load>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons8>>] loop:<<Loop>> outer_loop:none
private static int sadShort2IntParamLeft(short[] s, short param) {
int sad = 0;
for (int i = 0; i < s.length; i++) {
@@ -106,6 +130,18 @@
/// CHECK-DAG: <<Load:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load>>,<<Rep>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: Add [<<Phi1>>,<<Cons8>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-MIPS64: int Main.sadShort2IntConstRight(short[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons8:i\d+>> IntConstant 8 loop:none
+ /// CHECK-DAG: <<ConsI:i\d+>> IntConstant 32767 loop:none
+ /// CHECK-DAG: <<Rep:d\d+>> VecReplicateScalar [<<ConsI>>] loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Cons0>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load>>,<<Rep>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons8>>] loop:<<Loop>> outer_loop:none
private static int sadShort2IntConstRight(short[] s) {
int sad = 0;
for (int i = 0; i < s.length; i++) {
@@ -137,6 +173,18 @@
/// CHECK-DAG: <<Load:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Rep>>,<<Load>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: Add [<<Phi1>>,<<Cons8>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-MIPS64: int Main.sadShort2IntConstLeft(short[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons8:i\d+>> IntConstant 8 loop:none
+ /// CHECK-DAG: <<ConsI:i\d+>> IntConstant 32767 loop:none
+ /// CHECK-DAG: <<Rep:d\d+>> VecReplicateScalar [<<ConsI>>] loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Cons0>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Rep>>,<<Load>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons8>>] loop:<<Loop>> outer_loop:none
private static int sadShort2IntConstLeft(short[] s) {
int sad = 0;
for (int i = 0; i < s.length; i++) {
@@ -168,6 +216,18 @@
/// CHECK-DAG: <<Load:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load>>,<<Rep>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: Add [<<Phi1>>,<<Cons8>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-MIPS64: int Main.sadShort2IntInvariantRight(short[], int) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons8:i\d+>> IntConstant 8 loop:none
+ /// CHECK-DAG: <<Conv:s\d+>> TypeConversion [{{i\d+}}] loop:none
+ /// CHECK-DAG: <<Rep:d\d+>> VecReplicateScalar [<<Conv>>] loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Cons0>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load>>,<<Rep>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons8>>] loop:<<Loop>> outer_loop:none
private static int sadShort2IntInvariantRight(short[] s, int val) {
int sad = 0;
short x = (short) (val + 1);
@@ -199,6 +259,17 @@
/// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: <<Load:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Rep>>,<<Load>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-MIPS64: int Main.sadShort2IntInvariantLeft(short[], int) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons8:i\d+>> IntConstant 8 loop:none
+ /// CHECK-DAG: <<Conv:s\d+>> TypeConversion [{{i\d+}}] loop:none
+ /// CHECK-DAG: <<Rep:d\d+>> VecReplicateScalar [<<Conv>>] loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Cons0>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Rep>>,<<Load>>] loop:<<Loop>> outer_loop:none
private static int sadShort2IntInvariantLeft(short[] s, int val) {
int sad = 0;
short x = (short) (val + 1);
@@ -233,6 +304,18 @@
/// CHECK-DAG: <<Load:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: <<Add:d\d+>> VecAdd [<<Load>>,<<Rep>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load>>,<<Add>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-MIPS64: int Main.sadShort2IntCastedExprRight(short[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons8:i\d+>> IntConstant 8 loop:none
+ /// CHECK-DAG: <<ConsI:i\d+>> IntConstant 110 loop:none
+ /// CHECK-DAG: <<Rep:d\d+>> VecReplicateScalar [<<ConsI>>] loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Cons0>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Add:d\d+>> VecAdd [<<Load>>,<<Rep>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load>>,<<Add>>] loop:<<Loop>> outer_loop:none
private static int sadShort2IntCastedExprRight(short[] s) {
int sad = 0;
for (int i = 0; i < s.length; i++) {
@@ -267,6 +350,18 @@
/// CHECK-DAG: <<Load:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: <<Add:d\d+>> VecAdd [<<Load>>,<<Rep>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Add>>,<<Load>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-MIPS64: int Main.sadShort2IntCastedExprLeft(short[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons8:i\d+>> IntConstant 8 loop:none
+ /// CHECK-DAG: <<ConsI:i\d+>> IntConstant 110 loop:none
+ /// CHECK-DAG: <<Rep:d\d+>> VecReplicateScalar [<<ConsI>>] loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Cons0>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Add:d\d+>> VecAdd [<<Load>>,<<Rep>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Add>>,<<Load>>] loop:<<Loop>> outer_loop:none
private static int sadShort2IntCastedExprLeft(short[] s) {
int sad = 0;
for (int i = 0; i < s.length; i++) {