Branch fusing

A belated birthday gift for irogers.  Fuse cmp-long/if-XXz,
cmp[lg]-[float|double]/if-XXz.

Change-Id: I8fa87f620fcf4e6bcf291bbc7a0ea6c8f5535467
diff --git a/src/compiler/codegen/arm/FP/Thumb2VFP.cc b/src/compiler/codegen/arm/FP/Thumb2VFP.cc
index 72b4fec..380c014 100644
--- a/src/compiler/codegen/arm/FP/Thumb2VFP.cc
+++ b/src/compiler/codegen/arm/FP/Thumb2VFP.cc
@@ -182,6 +182,60 @@
     return false;
 }
 
+void genFusedFPCmpBranch(CompilationUnit* cUnit, BasicBlock* bb, MIR* mir,
+                         bool gtBias, bool isDouble)
+{
+    LIR* labelList = (LIR*)cUnit->blockLabelList;
+    LIR* target = &labelList[bb->taken->id];
+    RegLocation rlSrc1;
+    RegLocation rlSrc2;
+    if (isDouble) {
+        rlSrc1 = oatGetSrcWide(cUnit, mir, 0, 1);
+        rlSrc2 = oatGetSrcWide(cUnit, mir, 2, 3);
+        rlSrc1 = loadValueWide(cUnit, rlSrc1, kFPReg);
+        rlSrc2 = loadValueWide(cUnit, rlSrc2, kFPReg);
+        newLIR2(cUnit, kThumb2Vcmpd, S2D(rlSrc1.lowReg, r1Src2.highReg),
+                S2D(rlSrc2.lowReg, rlSrc2.highReg));
+    } else {
+        rlSrc1 = oatGetSrc(cUnit, mir, 0);
+        rlSrc2 = oatGetSrc(cUnit, mir, 1);
+        rlSrc1 = loadValue(cUnit, rlSrc1, kFPReg);
+        rlSrc2 = loadValue(cUnit, rlSrc2, kFPReg);
+        newLIR2(cUnit, kThumb2Vcmps, rlSrc1.lowReg, rlSrc2.lowReg);
+    }
+    newLIR0(cUnit, kThumb2Fmstat);
+    ConditionCode ccode = static_cast<ConditionCode>(mir->dalvikInsn.arg[0]);
+    switch(ccode) {
+        case kCondEq:
+        case kCondNe:
+            break;
+        case kCondLt:
+            if (gtBias) {
+                ccode = kCondMi;
+            }
+            break;
+        case kCondLe:
+            if (gtBias) {
+                ccode = kCondLs;
+            }
+            break;
+        case kCondGt:
+            if (gtBias) {
+                ccode = kCondHi;
+            }
+            break;
+        case kCondGe:
+            if (gtBias) {
+                ccode = kCondCs;
+            }
+            break;
+        default:
+            LOG(FATAL) << "Unexpected ccode: " << (int)ccode;
+    }
+    opCondBranch(cUnit, ccode, target);
+}
+
+
 bool genCmpFP(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest,
               RegLocation rlSrc1, RegLocation rlSrc2)
 {
diff --git a/src/compiler/codegen/arm/Thumb2/Gen.cc b/src/compiler/codegen/arm/Thumb2/Gen.cc
index 9477d2c..ea02ca9 100644
--- a/src/compiler/codegen/arm/Thumb2/Gen.cc
+++ b/src/compiler/codegen/arm/Thumb2/Gen.cc
@@ -654,6 +654,48 @@
     branch3->target = branch1->target;
 }
 
+void genFusedLongCmpBranch(CompilationUnit* cUnit, BasicBlock* bb, MIR* mir)
+{
+    LIR* labelList = (LIR*)cUnit->blockLabelList;
+    LIR* taken = &labelList[bb->taken->id];
+    RegLocation rlSrc1 = oatGetSrcWide(cUnit, mir, 0, 1);
+    RegLocation rlSrc2 = oatGetSrcWide(cUnit, mir, 2, 3);
+    rlSrc1 = loadValueWide(cUnit, rlSrc1, kCoreReg);
+    rlSrc2 = loadValueWide(cUnit, rlSrc2, kCoreReg);
+    ConditionCode ccode = static_cast<ConditionCode>(mir->dalvikInsn.arg[0]);
+    LIR* notTaken = rawLIR(cUnit, mir->offset, kPseudoTargetLabel);
+    opRegReg(cUnit, kOpCmp, rlSrc1.highReg, rlSrc2.highReg);
+    switch(ccode) {
+        case kCondEq:
+            opCondBranch(cUnit, kCondNe, notTaken);
+            break;
+        case kCondNe:
+            opCondBranch(cUnit, kCondNe, taken);
+            break;
+        case kCondLt:
+            opCondBranch(cUnit, kCondLt, taken);
+            opCondBranch(cUnit, kCondGt, notTaken);
+            break;
+        case kCondLe:
+            opCondBranch(cUnit, kCondLt, taken);
+            opCondBranch(cUnit, kCondGt, notTaken);
+            break;
+        case kCondGt:
+            opCondBranch(cUnit, kCondGt, taken);
+            opCondBranch(cUnit, kCondLt, notTaken);
+            break;
+        case kCondGe:
+            opCondBranch(cUnit, kCondGt, taken);
+            opCondBranch(cUnit, kCondLt, notTaken);
+            break;
+        default:
+            LOG(FATAL) << "Unexpected ccode: " << (int)ccode;
+    }
+    opRegReg(cUnit, kOpCmp, rlSrc1.lowReg, rlSrc2.lowReg);
+    opCondBranch(cUnit, ccode, taken);
+    oatAppendLIR(cUnit, notTaken);
+}
+
 /*
  * Generate a register comparison to an immediate and branch.  Caller
  * is responsible for setting branch target field.