AArch64: Addressing Cortex-A53 erratum 835769.
Some early revisions of the Cortex-A53 have an erratum (835769) whereby
it is possible for a 64-bit multiply-accumulate instruction in AArch64
state to generate an incorrect result. The conditions which a portion
of code must satisfy in order for the issue to be observed are somewhat
complex, but all cases end with a memory (load, store, or prefetch)
instruction followed immediately by the multiply-accumulate operation.
This commit makes sure to insert a nop instruction before a 64-bit msub
instruction, whenever the latter is preceded by a memory instruction.
This behaviour should make it impossible for the Arm64 backend to
generate a sequence of instructions which matches the erratum
conditions.
Change-Id: I0022eccd41180183c20231dab6e2671d001a204c
diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc
index 8a5a58c..dfdb76b 100644
--- a/compiler/dex/quick/arm64/int_arm64.cc
+++ b/compiler/dex/quick/arm64/int_arm64.cc
@@ -427,8 +427,7 @@
rl_src = LoadValue(rl_src, kCoreReg);
RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
RegStorage r_long_mul = AllocTemp();
- NewLIR4(kA64Smaddl4xwwx, As64BitReg(r_long_mul).GetReg(),
- r_magic.GetReg(), rl_src.reg.GetReg(), rxzr);
+ NewLIR3(kA64Smull3xww, As64BitReg(r_long_mul).GetReg(), r_magic.GetReg(), rl_src.reg.GetReg());
switch (pattern) {
case Divide3:
OpRegRegImm(kOpLsr, As64BitReg(r_long_mul), As64BitReg(r_long_mul), 32);
@@ -648,7 +647,7 @@
}
OpRegRegReg(kOpDiv, temp, r_src1, r_src2);
NewLIR4(kA64Msub4rrrr | wide, rl_result.reg.GetReg(), temp.GetReg(),
- r_src1.GetReg(), r_src2.GetReg());
+ r_src2.GetReg(), r_src1.GetReg());
FreeTemp(temp);
}
return rl_result;