Fix branch bug (showed up in codegen for debug)

There are a few "safe" optimizations in the compiler - removing
register copies where source and target are the same, deleting
branches to the next instruction, etc.  One of the redundant
branch optimizations, however, was incorrect and resulted in
a good branch being deleted.  This one showed up in the debug
build, and resulted in a failure to do a suspend check (because
the branch to the suspend check was deleted).

I had hoped that this but might also be the case of some
other unexpected failures, but unfortunately I was only able
to trigger it when doing a "codegen for debug" build.

The source of the bug was a confusion around 16 v/ 32-bit
unconditional branch encodings.  For a 32-bit unconditional
branch, going to the next instruction means an displacement
of zero.  However, for 16-bit branches, the next instruction
is represented by a displacement of -1.

To help track down this sort of thing in the future, this CL
also adds a new optimization disable flag: kSafeOptimizations.
This will allow us to really turn off all optimizations for A/B
testing.

Also in this CL we are re-enabling the ability to promote argument
registers and improving somewhat the code sequence for suspend
check when debug is enabled.

Change-Id: Ib6b202746eac751cab3b4609805a389c18cb67b2
diff --git a/src/compiler/codegen/arm/ArmRallocUtil.cc b/src/compiler/codegen/arm/ArmRallocUtil.cc
index d44d7cf..3335f59 100644
--- a/src/compiler/codegen/arm/ArmRallocUtil.cc
+++ b/src/compiler/codegen/arm/ArmRallocUtil.cc
@@ -134,17 +134,6 @@
     oatClobber(cUnit, fr15);
 }
 
-extern RegLocation oatGetReturnWide(CompilationUnit* cUnit)
-{
-    RegLocation res = LOC_C_RETURN_WIDE;
-    oatClobber(cUnit, r0);
-    oatClobber(cUnit, r1);
-    oatMarkInUse(cUnit, r0);
-    oatMarkInUse(cUnit, r1);
-    oatMarkPair(cUnit, res.lowReg, res.highReg);
-    return res;
-}
-
 extern RegLocation oatGetReturnWideAlt(CompilationUnit* cUnit)
 {
     RegLocation res = LOC_C_RETURN_WIDE;
@@ -158,14 +147,6 @@
     return res;
 }
 
-extern RegLocation oatGetReturn(CompilationUnit* cUnit)
-{
-    RegLocation res = LOC_C_RETURN;
-    oatClobber(cUnit, r0);
-    oatMarkInUse(cUnit, r0);
-    return res;
-}
-
 extern RegLocation oatGetReturnAlt(CompilationUnit* cUnit)
 {
     RegLocation res = LOC_C_RETURN;
diff --git a/src/compiler/codegen/arm/Assemble.cc b/src/compiler/codegen/arm/Assemble.cc
index a1f7bd7..2c13107 100644
--- a/src/compiler/codegen/arm/Assemble.cc
+++ b/src/compiler/codegen/arm/Assemble.cc
@@ -1142,7 +1142,8 @@
                 intptr_t target = targetLIR->offset;
                 int delta = target - pc;
                 lir->operands[0] = delta >> 1;
-                if (lir->operands[0] == 0) {  // Useless branch?
+                if (!(cUnit->disableOpt & (1 << kSafeOptimizations)) &&
+                    lir->operands[0] == 0) {  // Useless branch
                     lir->flags.isNop = true;
                     res = kRetryAll;
                 }
@@ -1157,12 +1158,13 @@
                     lir->operands[0] = 0;
                     oatSetupResourceMasks(lir);
                     res = kRetryAll;
-                }
-                lir->operands[0] = delta >> 1;
-                if ((lir->operands[0] == 0) ||
-                    (lir->operands[0] == -1)) {  // Useless branch?
-                    lir->flags.isNop = true;
-                    res = kRetryAll;
+                } else {
+                    lir->operands[0] = delta >> 1;
+                    if (!(cUnit->disableOpt & (1 << kSafeOptimizations)) &&
+                        lir->operands[0] == -1) {  // Useless branch
+                        lir->flags.isNop = true;
+                        res = kRetryAll;
+                    }
                 }
             } else if (lir->opcode == kThumbBlx1) {
                 DCHECK(NEXT_LIR(lir)->opcode == kThumbBlx2);
diff --git a/src/compiler/codegen/arm/Codegen.h b/src/compiler/codegen/arm/Codegen.h
index 3b866ce..09ff964 100644
--- a/src/compiler/codegen/arm/Codegen.h
+++ b/src/compiler/codegen/arm/Codegen.h
@@ -49,7 +49,6 @@
 
 int loadHelper(CompilationUnit* cUnit, int offset);
 LIR* callRuntimeHelper(CompilationUnit* cUnit, int reg);
-RegLocation getRetLoc(CompilationUnit* cUnit);
 LIR* loadConstant(CompilationUnit* cUnit, int reg, int immVal);
 void opRegCopyWide(CompilationUnit* cUnit, int destLo, int destHi,
                    int srcLo, int srcHi);
diff --git a/src/compiler/codegen/arm/Thumb2/Factory.cc b/src/compiler/codegen/arm/Thumb2/Factory.cc
index b6df8e3..1810573 100644
--- a/src/compiler/codegen/arm/Thumb2/Factory.cc
+++ b/src/compiler/codegen/arm/Thumb2/Factory.cc
@@ -1039,7 +1039,7 @@
         }
     }
     LIR* res = rawLIR(cUnit, cUnit->currentDalvikOffset, opcode, rDest, rSrc);
-    if (rDest == rSrc) {
+    if (!(cUnit->disableOpt && (1 << kSafeOptimizations)) && rDest == rSrc) {
         res->flags.isNop = true;
     }
     return res;
diff --git a/src/compiler/codegen/arm/Thumb2/Gen.cc b/src/compiler/codegen/arm/Thumb2/Gen.cc
index 1351992..557e87b 100644
--- a/src/compiler/codegen/arm/Thumb2/Gen.cc
+++ b/src/compiler/codegen/arm/Thumb2/Gen.cc
@@ -437,7 +437,7 @@
     else
          opcode = kThumbMovRR_L2H;
     res = rawLIR(cUnit, cUnit->currentDalvikOffset, opcode, rDest, rSrc);
-    if (rDest == rSrc) {
+    if (!(cUnit->disableOpt & (1 << kSafeOptimizations)) && rDest == rSrc) {
         res->flags.isNop = true;
     }
     return res;