More target-independence

Continuing to move target-specific code from the Arm
code generator into the independent realm.  This will be
done in multiple small steps.

In this CL, the focus is on unifying the LIR data structure and
various enums that don't really need to be target specific. Also
creates two new shared source files: GenCommon.cc (to hold
top-level code generation functions) and GenInvoke.cc (which
is likely to be shared only by the Arm and Mips targets).

Also added is a makefile hack to build for Mips (which we'll
eventually remove when the compiler support multiple targets
via the command line) and various minor cleanups.

Overall, this CL moves more than 3,000 lines of code from
target dependent to target independent.

Change-Id: I431ca4ae728100ed7d0e9d83a966a3f789f731b1
diff --git a/src/compiler/codegen/GenInvoke.cc b/src/compiler/codegen/GenInvoke.cc
new file mode 100644
index 0000000..a201010
--- /dev/null
+++ b/src/compiler/codegen/GenInvoke.cc
@@ -0,0 +1,506 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace art {
+
+/*
+ * This source files contains "gen" codegen routines that should
+ * be applicable to most targets.  Only mid-level support utilities
+ * and "op" calls may be used here.
+ */
+
+
+/*
+ * x86 targets will likely be different enough to need their own
+ * invoke gen routies.
+ */
+#if defined(TARGET_ARM) || defined (TARGET_MIPS)
+typedef int (*NextCallInsn)(CompilationUnit*, MIR*, int, uint32_t dexIdx,
+                            uint32_t methodIdx);
+/*
+ * If there are any ins passed in registers that have not been promoted
+ * to a callee-save register, flush them to the frame.  Perform intial
+ * assignment of promoted arguments.
+ */
+void flushIns(CompilationUnit* cUnit)
+{
+    if (cUnit->numIns == 0)
+        return;
+    int firstArgReg = rARG1;
+    int lastArgReg = rARG3;
+    int startVReg = cUnit->numDalvikRegisters - cUnit->numIns;
+    /*
+     * Arguments passed in registers should be flushed
+     * to their backing locations in the frame for now.
+     * Also, we need to do initial assignment for promoted
+     * arguments.  NOTE: an older version of dx had an issue
+     * in which it would reuse static method argument registers.
+     * This could result in the same Dalvik virtual register
+     * being promoted to both core and fp regs.  In those
+     * cases, copy argument to both.  This will be uncommon
+     * enough that it isn't worth attempting to optimize.
+     */
+    for (int i = 0; i < cUnit->numIns; i++) {
+        PromotionMap vMap = cUnit->promotionMap[startVReg + i];
+        if (i <= (lastArgReg - firstArgReg)) {
+            // If arriving in register
+            if (vMap.coreLocation == kLocPhysReg) {
+                genRegCopy(cUnit, vMap.coreReg, firstArgReg + i);
+            }
+            if (vMap.fpLocation == kLocPhysReg) {
+                genRegCopy(cUnit, vMap.fpReg, firstArgReg + i);
+            }
+            // Also put a copy in memory in case we're partially promoted
+            storeBaseDisp(cUnit, rSP, oatSRegOffset(cUnit, startVReg + i),
+                          firstArgReg + i, kWord);
+        } else {
+            // If arriving in frame & promoted
+            if (vMap.coreLocation == kLocPhysReg) {
+                loadWordDisp(cUnit, rSP, oatSRegOffset(cUnit, startVReg + i),
+                             vMap.coreReg);
+            }
+            if (vMap.fpLocation == kLocPhysReg) {
+                loadWordDisp(cUnit, rSP, oatSRegOffset(cUnit, startVReg + i),
+                             vMap.fpReg);
+            }
+        }
+    }
+}
+
+/*
+ * Bit of a hack here - in leiu of a real scheduling pass,
+ * emit the next instruction in static & direct invoke sequences.
+ */
+int nextSDCallInsn(CompilationUnit* cUnit, MIR* mir,
+                   int state, uint32_t dexIdx, uint32_t unused)
+{
+    switch(state) {
+        case 0:  // Get the current Method* [sets rARG0]
+            loadCurrMethodDirect(cUnit, rARG0);
+            break;
+        case 1:  // Get method->dex_cache_resolved_methods_
+            loadWordDisp(cUnit, rARG0,
+                Method::DexCacheResolvedMethodsOffset().Int32Value(),
+                rARG0);
+            break;
+        case 2:  // Grab target method*
+            loadWordDisp(cUnit, rARG0,
+                Array::DataOffset(sizeof(Object*)).Int32Value() + dexIdx * 4,
+                rARG0);
+            break;
+        case 3:  // Grab the code from the method*
+            loadWordDisp(cUnit, rARG0, Method::GetCodeOffset().Int32Value(),
+                         rLINK);
+            break;
+        default:
+            return -1;
+    }
+    return state + 1;
+}
+
+/*
+ * Bit of a hack here - in leiu of a real scheduling pass,
+ * emit the next instruction in a virtual invoke sequence.
+ * We can use rLR as a temp prior to target address loading
+ * Note also that we'll load the first argument ("this") into
+ * rARG1 here rather than the standard loadArgRegs.
+ */
+int nextVCallInsn(CompilationUnit* cUnit, MIR* mir,
+                  int state, uint32_t dexIdx, uint32_t methodIdx)
+{
+    RegLocation rlArg;
+    /*
+     * This is the fast path in which the target virtual method is
+     * fully resolved at compile time.
+     */
+    switch(state) {
+        case 0:  // Get "this" [set rARG1]
+            rlArg = oatGetSrc(cUnit, mir, 0);
+            loadValueDirectFixed(cUnit, rlArg, rARG1);
+            break;
+        case 1: // Is "this" null? [use rARG1]
+            genNullCheck(cUnit, oatSSASrc(mir,0), rARG1, mir);
+            // get this->klass_ [use rARG1, set rLINK]
+            loadWordDisp(cUnit, rARG1, Object::ClassOffset().Int32Value(),
+                         rLINK);
+            break;
+        case 2: // Get this->klass_->vtable [usr rLINK, set rLINK]
+            loadWordDisp(cUnit, rLINK, Class::VTableOffset().Int32Value(),
+                         rLINK);
+            break;
+        case 3: // Get target method [use rLINK, set rARG0]
+            loadWordDisp(cUnit, rLINK, (methodIdx * 4) +
+                         Array::DataOffset(sizeof(Object*)).Int32Value(),
+                         rARG0);
+            break;
+        case 4: // Get the target compiled code address [uses rARG0, sets rLINK]
+            loadWordDisp(cUnit, rARG0, Method::GetCodeOffset().Int32Value(),
+                         rLINK);
+            break;
+        default:
+            return -1;
+    }
+    return state + 1;
+}
+
+/*
+ * Interleave launch code for INVOKE_SUPER.  See comments
+ * for nextVCallIns.
+ */
+int nextSuperCallInsn(CompilationUnit* cUnit, MIR* mir,
+                      int state, uint32_t dexIdx, uint32_t methodIdx)
+{
+    /*
+     * This is the fast path in which the target virtual method is
+     * fully resolved at compile time.  Note also that this path assumes
+     * that the check to verify that the target method index falls
+     * within the size of the super's vtable has been done at compile-time.
+     */
+    RegLocation rlArg;
+    switch(state) {
+        case 0: // Get current Method* [set rARG0]
+            loadCurrMethodDirect(cUnit, rARG0);
+            // Load "this" [set rARG1]
+            rlArg = oatGetSrc(cUnit, mir, 0);
+            loadValueDirectFixed(cUnit, rlArg, rARG1);
+            // Get method->declaring_class_ [use rARG0, set rLINK]
+            loadWordDisp(cUnit, rARG0,
+                         Method::DeclaringClassOffset().Int32Value(),
+                         rLINK);
+            // Is "this" null? [use rARG1]
+            genNullCheck(cUnit, oatSSASrc(mir,0), rARG1, mir);
+            break;
+        case 1: // Get method->declaring_class_->super_class [use/set rLINK]
+            loadWordDisp(cUnit, rLINK,
+                         Class::SuperClassOffset().Int32Value(), rLINK);
+            break;
+        case 2: // Get ...->super_class_->vtable [u/s rLINK]
+            loadWordDisp(cUnit, rLINK,
+                         Class::VTableOffset().Int32Value(), rLINK);
+            break;
+        case 3: // Get target method [use rLINK, set rARG0]
+            loadWordDisp(cUnit, rLINK, (methodIdx * 4) +
+                         Array::DataOffset(sizeof(Object*)).Int32Value(),
+                         rARG0);
+            break;
+        case 4: // Get the target compiled code address [uses rARG0, sets rLINK]
+            loadWordDisp(cUnit, rARG0, Method::GetCodeOffset().Int32Value(),
+                         rLINK);
+            break;
+        default:
+            return -1;
+    }
+    return state + 1;
+}
+
+int nextInvokeInsnSP(CompilationUnit* cUnit, MIR* mir, int trampoline,
+                     int state, uint32_t dexIdx, uint32_t methodIdx)
+{
+    /*
+     * This handles the case in which the base method is not fully
+     * resolved at compile time, we bail to a runtime helper.
+     */
+    if (state == 0) {
+        // Load trampoline target
+        loadWordDisp(cUnit, rSELF, trampoline, rLINK);
+        // Load rARG0 with method index
+        loadConstant(cUnit, rARG0, dexIdx);
+        return 1;
+    }
+    return -1;
+}
+
+int nextStaticCallInsnSP(CompilationUnit* cUnit, MIR* mir,
+                         int state, uint32_t dexIdx, uint32_t methodIdx)
+{
+  int trampoline = OFFSETOF_MEMBER(Thread, pInvokeStaticTrampolineWithAccessCheck);
+  return nextInvokeInsnSP(cUnit, mir, trampoline, state, dexIdx, 0);
+}
+
+int nextDirectCallInsnSP(CompilationUnit* cUnit, MIR* mir, int state,
+                         uint32_t dexIdx, uint32_t methodIdx)
+{
+  int trampoline = OFFSETOF_MEMBER(Thread, pInvokeDirectTrampolineWithAccessCheck);
+  return nextInvokeInsnSP(cUnit, mir, trampoline, state, dexIdx, 0);
+}
+
+int nextSuperCallInsnSP(CompilationUnit* cUnit, MIR* mir, int state,
+                        uint32_t dexIdx, uint32_t methodIdx)
+{
+  int trampoline = OFFSETOF_MEMBER(Thread, pInvokeSuperTrampolineWithAccessCheck);
+  return nextInvokeInsnSP(cUnit, mir, trampoline, state, dexIdx, 0);
+}
+
+int nextVCallInsnSP(CompilationUnit* cUnit, MIR* mir, int state,
+                    uint32_t dexIdx, uint32_t methodIdx)
+{
+  int trampoline = OFFSETOF_MEMBER(Thread, pInvokeVirtualTrampolineWithAccessCheck);
+  return nextInvokeInsnSP(cUnit, mir, trampoline, state, dexIdx, 0);
+}
+
+/*
+ * All invoke-interface calls bounce off of art_invoke_interface_trampoline,
+ * which will locate the target and continue on via a tail call.
+ */
+int nextInterfaceCallInsn(CompilationUnit* cUnit, MIR* mir, int state,
+                          uint32_t dexIdx, uint32_t unused)
+{
+  int trampoline = OFFSETOF_MEMBER(Thread, pInvokeInterfaceTrampoline);
+  return nextInvokeInsnSP(cUnit, mir, trampoline, state, dexIdx, 0);
+}
+
+int nextInterfaceCallInsnWithAccessCheck(CompilationUnit* cUnit, MIR* mir,
+                                         int state, uint32_t dexIdx,
+                                         uint32_t unused)
+{
+  int trampoline = OFFSETOF_MEMBER(Thread, pInvokeInterfaceTrampolineWithAccessCheck);
+  return nextInvokeInsnSP(cUnit, mir, trampoline, state, dexIdx, 0);
+}
+
+int loadArgRegs(CompilationUnit* cUnit, MIR* mir, DecodedInstruction* dInsn,
+                int callState, NextCallInsn nextCallInsn, uint32_t dexIdx,
+                uint32_t methodIdx, bool skipThis)
+{
+    int nextReg = rARG1;
+    int nextArg = 0;
+    if (skipThis) {
+        nextReg++;
+        nextArg++;
+    }
+    for (; (nextReg <= rARG3) && (nextArg < mir->ssaRep->numUses); nextReg++) {
+        RegLocation rlArg = oatGetRawSrc(cUnit, mir, nextArg++);
+        rlArg = oatUpdateRawLoc(cUnit, rlArg);
+        if (rlArg.wide && (nextReg <= rARG2)) {
+            loadValueDirectWideFixed(cUnit, rlArg, nextReg, nextReg + 1);
+            nextReg++;
+            nextArg++;
+        } else {
+            rlArg.wide = false;
+            loadValueDirectFixed(cUnit, rlArg, nextReg);
+        }
+        callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
+    }
+    return callState;
+}
+
+/*
+ * Load up to 5 arguments, the first three of which will be in
+ * rARG1 .. rARG3.  On entry rARG0 contains the current method pointer,
+ * and as part of the load sequence, it must be replaced with
+ * the target method pointer.  Note, this may also be called
+ * for "range" variants if the number of arguments is 5 or fewer.
+ */
+int genDalvikArgsNoRange(CompilationUnit* cUnit, MIR* mir,
+                         DecodedInstruction* dInsn, int callState,
+                         LIR** pcrLabel, NextCallInsn nextCallInsn,
+                         uint32_t dexIdx, uint32_t methodIdx, bool skipThis)
+{
+    RegLocation rlArg;
+
+    /* If no arguments, just return */
+    if (dInsn->vA == 0)
+        return callState;
+
+    callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
+
+    DCHECK_LE(dInsn->vA, 5U);
+    if (dInsn->vA > 3) {
+        uint32_t nextUse = 3;
+        //Detect special case of wide arg spanning arg3/arg4
+        RegLocation rlUse0 = oatGetRawSrc(cUnit, mir, 0);
+        RegLocation rlUse1 = oatGetRawSrc(cUnit, mir, 1);
+        RegLocation rlUse2 = oatGetRawSrc(cUnit, mir, 2);
+        if (((!rlUse0.wide && !rlUse1.wide) || rlUse0.wide) &&
+            rlUse2.wide) {
+            int reg;
+            // Wide spans, we need the 2nd half of uses[2].
+            rlArg = oatUpdateLocWide(cUnit, rlUse2);
+            if (rlArg.location == kLocPhysReg) {
+                reg = rlArg.highReg;
+            } else {
+                // rARG2 & rARG3 can safely be used here
+                reg = rARG3;
+                loadWordDisp(cUnit, rSP,
+                             oatSRegOffset(cUnit, rlArg.sRegLow) + 4, reg);
+                callState = nextCallInsn(cUnit, mir, callState, dexIdx,
+                                         methodIdx);
+            }
+            storeBaseDisp(cUnit, rSP, (nextUse + 1) * 4, reg, kWord);
+            storeBaseDisp(cUnit, rSP, 16 /* (3+1)*4 */, reg, kWord);
+            callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
+            nextUse++;
+        }
+        // Loop through the rest
+        while (nextUse < dInsn->vA) {
+            int lowReg;
+            int highReg;
+            rlArg = oatGetRawSrc(cUnit, mir, nextUse);
+            rlArg = oatUpdateRawLoc(cUnit, rlArg);
+            if (rlArg.location == kLocPhysReg) {
+                lowReg = rlArg.lowReg;
+                highReg = rlArg.highReg;
+            } else {
+                lowReg = rARG2;
+                highReg = rARG3;
+                if (rlArg.wide) {
+                    loadValueDirectWideFixed(cUnit, rlArg, lowReg, highReg);
+                } else {
+                    loadValueDirectFixed(cUnit, rlArg, lowReg);
+                }
+                callState = nextCallInsn(cUnit, mir, callState, dexIdx,
+                                         methodIdx);
+            }
+            int outsOffset = (nextUse + 1) * 4;
+            if (rlArg.wide) {
+                storeBaseDispWide(cUnit, rSP, outsOffset, lowReg, highReg);
+                nextUse += 2;
+            } else {
+                storeWordDisp(cUnit, rSP, outsOffset, lowReg);
+                nextUse++;
+            }
+            callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
+        }
+    }
+
+    callState = loadArgRegs(cUnit, mir, dInsn, callState, nextCallInsn,
+                            dexIdx, methodIdx, skipThis);
+
+    if (pcrLabel) {
+        *pcrLabel = genNullCheck(cUnit, oatSSASrc(mir,0), rARG1, mir);
+    }
+    return callState;
+}
+
+/*
+ * May have 0+ arguments (also used for jumbo).  Note that
+ * source virtual registers may be in physical registers, so may
+ * need to be flushed to home location before copying.  This
+ * applies to arg3 and above (see below).
+ *
+ * Two general strategies:
+ *    If < 20 arguments
+ *       Pass args 3-18 using vldm/vstm block copy
+ *       Pass arg0, arg1 & arg2 in rARG1-rARG3
+ *    If 20+ arguments
+ *       Pass args arg19+ using memcpy block copy
+ *       Pass arg0, arg1 & arg2 in rARG1-rARG3
+ *
+ */
+int genDalvikArgsRange(CompilationUnit* cUnit, MIR* mir,
+                       DecodedInstruction* dInsn, int callState,
+                       LIR** pcrLabel, NextCallInsn nextCallInsn,
+                       uint32_t dexIdx, uint32_t methodIdx, bool skipThis)
+{
+    int firstArg = dInsn->vC;
+    int numArgs = dInsn->vA;
+
+    // If we can treat it as non-range (Jumbo ops will use range form)
+    if (numArgs <= 5)
+        return genDalvikArgsNoRange(cUnit, mir, dInsn, callState, pcrLabel,
+                                    nextCallInsn, dexIdx, methodIdx,
+                                    skipThis);
+    /*
+     * Make sure range list doesn't span the break between in normal
+     * Dalvik vRegs and the ins.
+     */
+    int highestArg = oatGetSrc(cUnit, mir, numArgs-1).sRegLow;
+    int boundaryReg = cUnit->numDalvikRegisters - cUnit->numIns;
+    if ((firstArg < boundaryReg) && (highestArg >= boundaryReg)) {
+        LOG(FATAL) << "Argument list spanned locals & args";
+    }
+
+    /*
+     * First load the non-register arguments.  Both forms expect all
+     * of the source arguments to be in their home frame location, so
+     * scan the sReg names and flush any that have been promoted to
+     * frame backing storage.
+     */
+    // Scan the rest of the args - if in physReg flush to memory
+    for (int nextArg = 0; nextArg < numArgs;) {
+        RegLocation loc = oatGetRawSrc(cUnit, mir, nextArg);
+        if (loc.wide) {
+            loc = oatUpdateLocWide(cUnit, loc);
+            if ((nextArg >= 2) && (loc.location == kLocPhysReg)) {
+                storeBaseDispWide(cUnit, rSP,
+                                  oatSRegOffset(cUnit, loc.sRegLow),
+                                  loc.lowReg, loc.highReg);
+            }
+            nextArg += 2;
+        } else {
+            loc = oatUpdateLoc(cUnit, loc);
+            if ((nextArg >= 3) && (loc.location == kLocPhysReg)) {
+                storeBaseDisp(cUnit, rSP, oatSRegOffset(cUnit, loc.sRegLow),
+                              loc.lowReg, kWord);
+            }
+            nextArg++;
+        }
+    }
+
+    int startOffset = oatSRegOffset(cUnit,
+        cUnit->regLocation[mir->ssaRep->uses[3]].sRegLow);
+    int outsOffset = 4 /* Method* */ + (3 * 4);
+#if defined(TARGET_MIPS)
+    // Generate memcpy
+    opRegRegImm(cUnit, kOpAdd, rARG0, rSP, outsOffset);
+    opRegRegImm(cUnit, kOpAdd, rARG1, rSP, startOffset);
+    int rTgt = loadHelper(cUnit, OFFSETOF_MEMBER(Thread, pMemcpy));
+    loadConstant(cUnit, rARG2, (numArgs - 3) * 4);
+    callRuntimeHelper(cUnit, rTgt);
+    // Restore Method*
+    loadCurrMethodDirect(cUnit, rARG0);
+#else
+    if (numArgs >= 20) {
+        // Generate memcpy
+        opRegRegImm(cUnit, kOpAdd, rARG0, rSP, outsOffset);
+        opRegRegImm(cUnit, kOpAdd, rARG1, rSP, startOffset);
+        int rTgt = loadHelper(cUnit, OFFSETOF_MEMBER(Thread, pMemcpy));
+        loadConstant(cUnit, rARG2, (numArgs - 3) * 4);
+        callRuntimeHelper(cUnit, rTgt);
+        // Restore Method*
+        loadCurrMethodDirect(cUnit, rARG0);
+    } else {
+        // Use vldm/vstm pair using rARG3 as a temp
+        int regsLeft = std::min(numArgs - 3, 16);
+        callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
+        opRegRegImm(cUnit, kOpAdd, rARG3, rSP, startOffset);
+        LIR* ld = newLIR3(cUnit, kThumb2Vldms, rARG3, fr0, regsLeft);
+        //TUNING: loosen barrier
+        ld->defMask = ENCODE_ALL;
+        setMemRefType(ld, true /* isLoad */, kDalvikReg);
+        callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
+        opRegRegImm(cUnit, kOpAdd, rARG3, rSP, 4 /* Method* */ + (3 * 4));
+        callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
+        LIR* st = newLIR3(cUnit, kThumb2Vstms, rARG3, fr0, regsLeft);
+        setMemRefType(st, false /* isLoad */, kDalvikReg);
+        st->defMask = ENCODE_ALL;
+        callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
+    }
+#endif
+
+    callState = loadArgRegs(cUnit, mir, dInsn, callState, nextCallInsn,
+                            dexIdx, methodIdx, skipThis);
+
+    callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
+    if (pcrLabel) {
+        *pcrLabel = genNullCheck(cUnit, oatSSASrc(mir,0), rARG1, mir);
+    }
+    return callState;
+}
+
+#endif  // TARGET_ARM || TARGET_MIPS
+
+
+}  // namespace art