AArch64: Addressing Cortex-A53 erratum 835769.

Some early revisions of the Cortex-A53 have an erratum (835769) whereby
it is possible for a 64-bit multiply-accumulate instruction in AArch64
state to generate an incorrect result. The conditions which a portion
of code must satisfy in order for the issue to be observed are somewhat
complex, but all cases end with a memory (load, store, or prefetch)
instruction followed immediately by the multiply-accumulate operation.

This commit makes sure to insert a nop instruction before a 64-bit msub
instruction, whenever the latter is preceded by a memory instruction.
This behaviour should make it impossible for the Arm64 backend to
generate a sequence of instructions which matches the erratum
conditions.

Change-Id: I0022eccd41180183c20231dab6e2671d001a204c
diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc
index 8a5a58c..dfdb76b 100644
--- a/compiler/dex/quick/arm64/int_arm64.cc
+++ b/compiler/dex/quick/arm64/int_arm64.cc
@@ -427,8 +427,7 @@
   rl_src = LoadValue(rl_src, kCoreReg);
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
   RegStorage r_long_mul = AllocTemp();
-  NewLIR4(kA64Smaddl4xwwx, As64BitReg(r_long_mul).GetReg(),
-          r_magic.GetReg(), rl_src.reg.GetReg(), rxzr);
+  NewLIR3(kA64Smull3xww, As64BitReg(r_long_mul).GetReg(), r_magic.GetReg(), rl_src.reg.GetReg());
   switch (pattern) {
     case Divide3:
       OpRegRegImm(kOpLsr, As64BitReg(r_long_mul), As64BitReg(r_long_mul), 32);
@@ -648,7 +647,7 @@
     }
     OpRegRegReg(kOpDiv, temp, r_src1, r_src2);
     NewLIR4(kA64Msub4rrrr | wide, rl_result.reg.GetReg(), temp.GetReg(),
-            r_src1.GetReg(), r_src2.GetReg());
+            r_src2.GetReg(), r_src1.GetReg());
     FreeTemp(temp);
   }
   return rl_result;