MIPS switch table support

And 64-bit neg/add/sub (ouch! Mips has no carry bit...)

Change-Id: Ifb94324a0052d6069977fb8f22679b95890445d8
diff --git a/src/compiler/codegen/mips/Mips32/Gen.cc b/src/compiler/codegen/mips/Mips32/Gen.cc
index 155675c..c975889 100644
--- a/src/compiler/codegen/mips/Mips32/Gen.cc
+++ b/src/compiler/codegen/mips/Mips32/Gen.cc
@@ -25,28 +25,38 @@
 namespace art {
 
 /*
- * The sparse table in the literal pool is an array of <key,displacement>
- * pairs.  For each set, we'll load them as a pair using ldmia.
- * This means that the register number of the temp we use for the key
- * must be lower than the reg for the displacement.
+ * The lack of pc-relative loads on Mips presents somewhat of a challenge
+ * for our PIC switch table strategy.  To materialize the current location
+ * we'll do a dummy JAL and reference our tables using r_RA as the
+ * base register.  Note that r_RA will be used both as the base to
+ * locate the switch table data and as the reference base for the switch
+ * target offsets stored in the table.  We'll use a special pseudo-instruction
+ * to represent the jal and trigger the construction of the
+ * switch table offsets (which will happen after final assembly and all
+ * labels are fixed).
  *
  * The test loop will look something like:
  *
- *   adr   rBase, <table>
- *   ldr   rVal, [rSP, vRegOff]
- *   mov   rIdx, #tableSize
- * lp:
- *   ldmia rBase!, {rKey, rDisp}
- *   sub   rIdx, #1
- *   cmp   rVal, rKey
- *   ifeq
- *   add   rPC, rDisp   ; This is the branch from which we compute displacement
- *   cbnz  rIdx, lp
+ *   ori   rEnd, r_ZERO, #tableSize  ; size in bytes
+ *   jal   BaseLabel         ; stores "return address" (BaseLabel) in r_RA
+ *   nop                     ; opportunistically fill
+ * BaseLabel:
+ *   addiu rBase, r_RA, <table> - <BaseLabel>  ; table relative to BaseLabel
+     addu  rEnd, rEnd, rBase                   ; end of table
+ *   lw    rVal, [rSP, vRegOff]                ; Test Value
+ * loop:
+ *   beq   rBase, rEnd, done
+ *   lw    rKey, 0(rBase)
+ *   addu  rBase, 8
+ *   bne   rVal, rKey, loop
+ *   lw    rDisp, -4(rBase)
+ *   addu  r_RA, rDisp
+ *   jr    r_RA
+ * done:
+ *
  */
 void genSparseSwitch(CompilationUnit* cUnit, MIR* mir, RegLocation rlSrc)
 {
-    UNIMPLEMENTED(FATAL) << "Needs Mips sparse switch";
-#if 0
     const u2* table = cUnit->insns + mir->offset + mir->dalvikInsn.vB;
     if (cUnit->printMe) {
         dumpSparseSwitchTable(table);
@@ -56,49 +66,75 @@
                          true, kAllocData);
     tabRec->table = table;
     tabRec->vaddr = mir->offset;
-    int size = table[1];
-    tabRec->targets = (LIR* *)oatNew(cUnit, size * sizeof(LIR*), true,
+    int elements = table[1];
+    tabRec->targets = (LIR* *)oatNew(cUnit, elements * sizeof(LIR*), true,
                                      kAllocLIR);
     oatInsertGrowableList(cUnit, &cUnit->switchTables, (intptr_t)tabRec);
 
-    // Get the switch value
-    rlSrc = loadValue(cUnit, rlSrc, kCoreReg);
-    int rBase = oatAllocTemp(cUnit);
-    /* Allocate key and disp temps */
-    int rKey = oatAllocTemp(cUnit);
-    int rDisp = oatAllocTemp(cUnit);
-    // Make sure rKey's register number is less than rDisp's number for ldmia
-    if (rKey > rDisp) {
-        int tmp = rDisp;
-        rDisp = rKey;
-        rKey = tmp;
+    // The table is composed of 8-byte key/disp pairs
+    int byteSize = elements * 8;
+
+    int sizeHi = byteSize >> 16;
+    int sizeLo = byteSize & 0xffff;
+
+    int rEnd = oatAllocTemp(cUnit);
+    if (sizeHi) {
+        newLIR2(cUnit, kMipsLui, rEnd, sizeHi);
     }
-    // Materialize a pointer to the switch table
-    newLIR3(cUnit, kThumb2Adr, rBase, 0, (intptr_t)tabRec);
-    // Set up rIdx
-    int rIdx = oatAllocTemp(cUnit);
-    loadConstant(cUnit, rIdx, size);
-    // Establish loop branch target
-    LIR* target = newLIR0(cUnit, kPseudoTargetLabel);
-    target->defMask = ENCODE_ALL;
-    // Load next key/disp
-    newLIR2(cUnit, kThumb2LdmiaWB, rBase, (1 << rKey) | (1 << rDisp));
-    opRegReg(cUnit, kOpCmp, rKey, rlSrc.lowReg);
-    // Go if match. NOTE: No instruction set switch here - must stay Thumb2
-    opIT(cUnit, kArmCondEq, "");
-    LIR* switchBranch = newLIR1(cUnit, kThumb2AddPCR, rDisp);
-    tabRec->bxInst = switchBranch;
-    // Needs to use setflags encoding here
-    newLIR3(cUnit, kThumb2SubsRRI12, rIdx, rIdx, 1);
-    LIR* branch = opCondBranch(cUnit, kCondNe, target);
-#endif
+    // Must prevent code motion for the curr pc pair
+    genBarrier(cUnit);  // Scheduling barrier
+    newLIR0(cUnit, kMipsCurrPC);  // Really a jal to .+8
+    // Now, fill the branch delay slot
+    if (sizeHi) {
+        newLIR3(cUnit, kMipsOri, rEnd, rEnd, sizeLo);
+    } else {
+        newLIR3(cUnit, kMipsOri, rEnd, r_ZERO, sizeLo);
+    }
+    genBarrier(cUnit);  // Scheduling barrier
+
+    // Construct BaseLabel and set up table base register
+    LIR* baseLabel = newLIR0(cUnit, kPseudoTargetLabel);
+    // Remember base label so offsets can be computed later
+    tabRec->anchor = baseLabel;
+    int rBase = oatAllocTemp(cUnit);
+    newLIR4(cUnit, kMipsDelta, rBase, 0, (intptr_t)baseLabel, (intptr_t)tabRec);
+    opRegRegReg(cUnit, kOpAdd, rEnd, rEnd, rBase);
+
+    // Grab switch test value
+    rlSrc = loadValue(cUnit, rlSrc, kCoreReg);
+
+    // Test loop
+    int rKey = oatAllocTemp(cUnit);
+    LIR* loopLabel = newLIR0(cUnit, kPseudoTargetLabel);
+    LIR* exitBranch = opCmpBranch(cUnit , kCondEq, rBase, rEnd, NULL);
+    loadWordDisp(cUnit, rBase, 0, rKey);
+    opRegImm(cUnit, kOpAdd, rBase, 8);
+    opCmpBranch(cUnit, kCondNe, rlSrc.lowReg, rKey, loopLabel);
+    int rDisp = oatAllocTemp(cUnit);
+    loadWordDisp(cUnit, rBase, -4, rDisp);
+    opRegRegReg(cUnit, kOpAdd, r_RA, r_RA, rDisp);
+    opReg(cUnit, kOpBx, r_RA);
+
+    // Loop exit
+    LIR* exitLabel = newLIR0(cUnit, kPseudoTargetLabel);
+    exitBranch->target = exitLabel;
 }
 
-
+/*
+ * Code pattern will look something like:
+ *
+ *   lw    rVal
+ *   jal   BaseLabel         ; stores "return address" (BaseLabel) in r_RA
+ *   nop                     ; opportunistically fill
+ *   [subiu rVal, bias]      ; Remove bias if lowVal != 0
+ *   bound check -> done
+ *   lw    rDisp, [r_RA, rVal]
+ *   addu  r_RA, rDisp
+ *   jr    r_RA
+ * done:
+ */
 void genPackedSwitch(CompilationUnit* cUnit, MIR* mir, RegLocation rlSrc)
 {
-    UNIMPLEMENTED(FATAL) << "Need Mips packed switch";
-#if 0
     const u2* table = cUnit->insns + mir->offset + mir->dalvikInsn.vB;
     if (cUnit->printMe) {
         dumpPackedSwitchTable(table);
@@ -115,35 +151,59 @@
 
     // Get the switch value
     rlSrc = loadValue(cUnit, rlSrc, kCoreReg);
-    int tableBase = oatAllocTemp(cUnit);
-    // Materialize a pointer to the switch table
-    newLIR3(cUnit, kThumb2Adr, tableBase, 0, (intptr_t)tabRec);
+
+    // Prepare the bias.  If too big, handle 1st stage here
     int lowKey = s4FromSwitchData(&table[2]);
-    int keyReg;
-    // Remove the bias, if necessary
+    bool largeBias = false;
+    int rKey;
     if (lowKey == 0) {
-        keyReg = rlSrc.lowReg;
+        rKey = rlSrc.lowReg;
+    } else if ((lowKey & 0xffff) != lowKey) {
+        rKey = oatAllocTemp(cUnit);
+        loadConstant(cUnit, rKey, lowKey);
+        largeBias = true;
     } else {
-        keyReg = oatAllocTemp(cUnit);
-        opRegRegImm(cUnit, kOpSub, keyReg, rlSrc.lowReg, lowKey);
+        rKey = oatAllocTemp(cUnit);
     }
+
+    // Must prevent code motion for the curr pc pair
+    genBarrier(cUnit);
+    newLIR0(cUnit, kMipsCurrPC);  // Really a jal to .+8
+    // Now, fill the branch delay slot with bias strip
+    if (lowKey == 0) {
+        newLIR0(cUnit, kMipsNop);
+    } else {
+        if (largeBias) {
+            opRegRegReg(cUnit, kOpSub, rKey, rlSrc.lowReg, rKey);
+        } else {
+            opRegRegImm(cUnit, kOpSub, rKey, rlSrc.lowReg, lowKey);
+        }
+    }
+    genBarrier(cUnit);  // Scheduling barrier
+
+    // Construct BaseLabel and set up table base register
+    LIR* baseLabel = newLIR0(cUnit, kPseudoTargetLabel);
+    // Remember base label so offsets can be computed later
+    tabRec->anchor = baseLabel;
+
     // Bounds check - if < 0 or >= size continue following switch
-    opRegImm(cUnit, kOpCmp, keyReg, size-1);
-    LIR* branchOver = opCondBranch(cUnit, kCondHi, NULL);
+    LIR* branchOver = opCmpImmBranch(cUnit, kCondHi, rKey, size-1, NULL);
+
+    // Materialize the table base pointer
+    int rBase = oatAllocTemp(cUnit);
+    newLIR4(cUnit, kMipsDelta, rBase, 0, (intptr_t)baseLabel, (intptr_t)tabRec);
 
     // Load the displacement from the switch table
-    int dispReg = oatAllocTemp(cUnit);
-    loadBaseIndexed(cUnit, tableBase, keyReg, dispReg, 2, kWord);
+    int rDisp = oatAllocTemp(cUnit);
+    loadBaseIndexed(cUnit, rBase, rKey, rDisp, 2, kWord);
 
-    // ..and go! NOTE: No instruction set switch here - must stay Thumb2
-    LIR* switchBranch = newLIR1(cUnit, kThumb2AddPCR, dispReg);
-    tabRec->bxInst = switchBranch;
+    // Add to r_AP and go
+    opRegRegReg(cUnit, kOpAdd, r_RA, r_RA, rDisp);
+    opReg(cUnit, kOpBx, r_RA);
 
     /* branchOver target here */
     LIR* target = newLIR0(cUnit, kPseudoTargetLabel);
-    target->defMask = ENCODE_ALL;
     branchOver->target = (LIR*)target;
-#endif
 }
 
 /*
@@ -158,8 +218,6 @@
  */
 void genFillArrayData(CompilationUnit* cUnit, MIR* mir, RegLocation rlSrc)
 {
-    UNIMPLEMENTED(FATAL) << "Needs Mips FillArrayData";
-#if 0
     const u2* table = cUnit->insns + mir->offset + mir->dalvikInsn.vB;
     // Add the table to the list - we'll process it later
     FillArrayData *tabRec = (FillArrayData *)
@@ -174,13 +232,25 @@
 
     // Making a call - use explicit registers
     oatFlushAllRegs(cUnit);   /* Everything to home location */
+    oatLockCallTemps(cUnit);
     loadValueDirectFixed(cUnit, rlSrc, rARG0);
-    loadWordDisp(cUnit, rSELF,
-                 OFFSETOF_MEMBER(Thread, pHandleFillArrayDataFromCode), rLR);
+
+    // Must prevent code motion for the curr pc pair
+    genBarrier(cUnit);
+    newLIR0(cUnit, kMipsCurrPC);  // Really a jal to .+8
+    // Now, fill the branch delay slot with the helper load
+    int rTgt = loadHelper(cUnit, OFFSETOF_MEMBER(Thread,
+                          pHandleFillArrayDataFromCode));
+    genBarrier(cUnit);  // Scheduling barrier
+
+    // Construct BaseLabel and set up table base register
+    LIR* baseLabel = newLIR0(cUnit, kPseudoTargetLabel);
+
     // Materialize a pointer to the fill data image
-    newLIR3(cUnit, kThumb2Adr, r1, 0, (intptr_t)tabRec);
-    callRuntimeHelper(cUnit, rLR);
-#endif
+    newLIR4(cUnit, kMipsDelta, rARG1, 0, (intptr_t)baseLabel, (intptr_t)tabRec);
+
+    // And go...
+    callRuntimeHelper(cUnit, rTgt);  // ( array*, fill_data* )
 }
 
 void genNegFloat(CompilationUnit *cUnit, RegLocation rlDest, RegLocation rlSrc)
@@ -266,7 +336,6 @@
     oatFreeTemp(cUnit, t0);
     oatFreeTemp(cUnit, t1);
     LIR* target = newLIR0(cUnit, kPseudoTargetLabel);
-    target->defMask = ENCODE_ALL;
     branch->target = (LIR*)target;
     storeValue(cUnit, rlDest, rlResult);
 }
@@ -314,6 +383,11 @@
             sltOp = kMipsSlt;
             brOp = kMipsBnez;
             break;
+        case kCondHi:  // Gtu
+            sltOp = kMipsSltu;
+            brOp = kMipsBnez;
+            swapped = true;
+            break;
         default:
             UNIMPLEMENTED(FATAL) << "No support for ConditionCode: "
                                  << (int) cond;