Implement Sum-of-Abs-Differences idiom recognition.
Rationale:
Currently just on ARM64 (x86 lacks proper support),
using the SAD idiom yields great speedup on loops
that compute the sum-of-abs-difference operation.
Also includes some refinements around type conversions.
Speedup ExoPlayerAudio (golem run):
1.3x on ARM64
1.1x on x86
Test: test-art-host test-art-target
Bug: 64091002
Change-Id: Ia2b711d2bc23609a2ed50493dfe6719eedfe0130
diff --git a/test/651-checker-byte-simd-minmax/src/Main.java b/test/651-checker-byte-simd-minmax/src/Main.java
index e018b56..9643b90 100644
--- a/test/651-checker-byte-simd-minmax/src/Main.java
+++ b/test/651-checker-byte-simd-minmax/src/Main.java
@@ -165,6 +165,28 @@
}
}
+ /// CHECK-START: void Main.doitMin100(byte[], byte[]) loop_optimization (before)
+ /// CHECK-DAG: <<I100:i\d+>> IntConstant 100 loop:none
+ /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Get:b\d+>> ArrayGet loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Min:i\d+>> InvokeStaticOrDirect [<<Get>>,<<I100>>] intrinsic:MathMinIntInt loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Cnv:b\d+>> TypeConversion [<<Min>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: void Main.doitMin100(byte[], byte[]) loop_optimization (after)
+ /// CHECK-DAG: <<I100:i\d+>> IntConstant 100 loop:none
+ /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<I100>>] loop:none
+ /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Get:d\d+>> VecLoad loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Min:d\d+>> VecMin [<<Get>>,<<Repl>>] unsigned:false loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>> outer_loop:none
+ private static void doitMin100(byte[] x, byte[] y) {
+ int min = Math.min(x.length, y.length);
+ for (int i = 0; i < min; i++) {
+ x[i] = (byte) Math.min(y[i], 100);
+ }
+ }
+
public static void main(String[] args) {
// Initialize cross-values for all possible values.
int total = 256 * 256;
@@ -202,6 +224,11 @@
byte expected = (byte) Math.max(y[i] & 0xff, z[i] & 0xff);
expectEquals(expected, x[i]);
}
+ doitMin100(x, y);
+ for (int i = 0; i < total; i++) {
+ byte expected = (byte) Math.min(y[i], 100);
+ expectEquals(expected, x[i]);
+ }
System.out.println("passed");
}