Fix overlap bug

There is currently no rule against a dex code pattern that
overlaps the result of a long operation with the source operands.
For example:

   ADD_LONG v1, v9, v0

is:  (v1,v2) = (v9, v10) + (v0, v1)

However, dx appears to never generate such a pattern.  This masked
a JIT (and later Art) bug in the handling of long arithmetic code
generation.  The above code expands into a pair of 32-bit add
operations and naively kills the high-order word of the 2nd
operation while computing the low-order word of the result.

The fix is simple - detect this case and use an intermediate
result temporary.

Change-Id: I127f5a51925d5b4249a7a9710cce587dfe0939ad
diff --git a/src/compiler/codegen/GenCommon.cc b/src/compiler/codegen/GenCommon.cc
index cc3685c..591e12c 100644
--- a/src/compiler/codegen/GenCommon.cc
+++ b/src/compiler/codegen/GenCommon.cc
@@ -1576,11 +1576,11 @@
   rlSrc2 = loadValueWide(cUnit, rlSrc2, kCoreReg);
   rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
   // The longs may overlap - use intermediate temp if so
-  if (rlResult.lowReg == rlSrc1.highReg) {
+  if ((rlResult.lowReg == rlSrc1.highReg) || (rlResult.lowReg == rlSrc2.highReg)){
     int tReg = oatAllocTemp(cUnit);
-    opRegCopy(cUnit, tReg, rlSrc1.highReg);
-    opRegRegReg(cUnit, firstOp, rlResult.lowReg, rlSrc1.lowReg, rlSrc2.lowReg);
-    opRegRegReg(cUnit, secondOp, rlResult.highReg, tReg, rlSrc2.highReg);
+    opRegRegReg(cUnit, firstOp, tReg, rlSrc1.lowReg, rlSrc2.lowReg);
+    opRegRegReg(cUnit, secondOp, rlResult.highReg, rlSrc1.highReg, rlSrc2.highReg);
+    opRegCopy(cUnit, rlResult.lowReg, tReg);
     oatFreeTemp(cUnit, tReg);
   } else {
     opRegRegReg(cUnit, firstOp, rlResult.lowReg, rlSrc1.lowReg, rlSrc2.lowReg);