Further x86 progress and image creation.

Change-Id: Idafadfc55228541536f25d2c92d40d9e0510b602
diff --git a/src/compiler/codegen/x86/ArchFactory.cc b/src/compiler/codegen/x86/ArchFactory.cc
index eec1cbd..bd95afb 100644
--- a/src/compiler/codegen/x86/ArchFactory.cc
+++ b/src/compiler/codegen/x86/ArchFactory.cc
@@ -24,42 +24,6 @@
 
 namespace art {
 
-bool genAddLong(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest,
-                RegLocation rlSrc1, RegLocation rlSrc2)
-{
-    rlSrc1 = loadValueWide(cUnit, rlSrc1, kCoreReg);
-    rlSrc2 = loadValueWide(cUnit, rlSrc2, kCoreReg);
-    RegLocation rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
-    /*
-     *  [v1 v0] =  [a1 a0] + [a3 a2];
-     *    add v0,a2,a0
-     *    adc v1,a3,a1
-     */
-
-    opRegRegReg(cUnit, kOpAdd, rlResult.lowReg, rlSrc2.lowReg, rlSrc1.lowReg);
-    opRegRegReg(cUnit, kOpAdc, rlResult.highReg, rlSrc2.highReg, rlSrc1.highReg);
-    storeValueWide(cUnit, rlDest, rlResult);
-    return false;
-}
-
-bool genSubLong(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest,
-                RegLocation rlSrc1, RegLocation rlSrc2)
-{
-    rlSrc1 = loadValueWide(cUnit, rlSrc1, kCoreReg);
-    rlSrc2 = loadValueWide(cUnit, rlSrc2, kCoreReg);
-    RegLocation rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
-    /*
-     *  [v1 v0] =  [a1 a0] - [a3 a2];
-     *    sub    v0,a0,a2
-     *    sbb    v1,a1,a3
-     */
-
-    opRegRegReg(cUnit, kOpSub, rlResult.lowReg, rlSrc1.lowReg, rlSrc2.lowReg);
-    opRegRegReg(cUnit, kOpSbc, rlResult.highReg, rlSrc1.highReg, rlSrc2.highReg);
-    storeValueWide(cUnit, rlDest, rlResult);
-    return false;
-}
-
 bool genNegLong(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest,
                 RegLocation rlSrc)
 {
diff --git a/src/compiler/codegen/x86/Assemble.cc b/src/compiler/codegen/x86/Assemble.cc
index b9dd978..b62b5b4 100644
--- a/src/compiler/codegen/x86/Assemble.cc
+++ b/src/compiler/codegen/x86/Assemble.cc
@@ -279,6 +279,10 @@
   { kX86Set8M, kMemCond,   IS_STORE | IS_TERTIARY_OP, { 0, 0, 0x0F, 0x90, 0, 0, 0, 0 }, "Set8M", "!2c [!0r+!1d]" },
   { kX86Set8A, kArrayCond, IS_STORE | IS_QUIN_OP,     { 0, 0, 0x0F, 0x90, 0, 0, 0, 0 }, "Set8A", "!4c [!0r+!1r<<!2d+!3d]" },
 
+  // TODO: load/store?
+  // Encode the modrm opcode as an extra opcode byte to avoid computation during assembly.
+  { kX86Mfence, kReg,                 NO_OPERAND,     { 0, 0, 0x0F, 0xAE, 0, 6, 0, 0 }, "Mfence", "" },
+
   EXT_0F_ENCODING_MAP(Imul16,  0x66, 0xAF),
   EXT_0F_ENCODING_MAP(Imul32,  0x00, 0xAF),
   EXT_0F_ENCODING_MAP(Movzx8,  0x00, 0xB6),
@@ -945,12 +949,12 @@
   LIR *lir;
   AssemblerStatus res = kSuccess;  // Assume success
 
+  const bool kVerbosePcFixup = false;
   for (lir = (LIR *) cUnit->firstLIRInsn; lir; lir = NEXT_LIR(lir)) {
     if (lir->opcode < 0) {
       continue;
     }
 
-
     if (lir->flags.isNop) {
       continue;
     }
@@ -970,8 +974,10 @@
           intptr_t target = targetLIR->offset;
           delta = target - pc;
           if (IS_SIMM8(delta) != IS_SIMM8(lir->operands[0])) {
-            LOG(INFO) << "Retry for JCC growth at " << lir->offset
-                << " delta: " << delta << " old delta: " << lir->operands[0];
+            if (kVerbosePcFixup) {
+              LOG(INFO) << "Retry for JCC growth at " << lir->offset
+                  << " delta: " << delta << " old delta: " << lir->operands[0];
+            }
             lir->opcode = kX86Jcc32;
             oatSetupResourceMasks(lir);
             res = kRetryAll;
@@ -994,10 +1000,14 @@
           if (!(cUnit->disableOpt & (1 << kSafeOptimizations)) && lir->operands[0] == 0) {
             // Useless branch
             lir->flags.isNop = true;
-            LOG(INFO) << "Retry for useless branch at " << lir->offset;
+            if (kVerbosePcFixup) {
+              LOG(INFO) << "Retry for useless branch at " << lir->offset;
+            }
             res = kRetryAll;
           } else if (IS_SIMM8(delta) != IS_SIMM8(lir->operands[0])) {
-            LOG(INFO) << "Retry for JMP growth at " << lir->offset;
+            if (kVerbosePcFixup) {
+              LOG(INFO) << "Retry for JMP growth at " << lir->offset;
+            }
             lir->opcode = kX86Jmp32;
             oatSetupResourceMasks(lir);
             res = kRetryAll;
@@ -1028,8 +1038,14 @@
         DCHECK_EQ(0, entry->skeleton.prefix1);
         DCHECK_EQ(0, entry->skeleton.prefix2);
         cUnit->codeBuffer.push_back(entry->skeleton.opcode);
-        DCHECK_EQ(0, entry->skeleton.extra_opcode1);
-        DCHECK_EQ(0, entry->skeleton.extra_opcode2);
+        if (entry->skeleton.extra_opcode1 != 0) {
+          cUnit->codeBuffer.push_back(entry->skeleton.extra_opcode1);
+          if (entry->skeleton.extra_opcode2 != 0) {
+            cUnit->codeBuffer.push_back(entry->skeleton.extra_opcode2);
+          }
+        } else {
+          DCHECK_EQ(0, entry->skeleton.extra_opcode2);
+        }
         DCHECK_EQ(0, entry->skeleton.modrm_opcode);
         DCHECK_EQ(0, entry->skeleton.ax_opcode);
         DCHECK_EQ(0, entry->skeleton.immediate_bytes);
diff --git a/src/compiler/codegen/x86/FP/X86FP.cc b/src/compiler/codegen/x86/FP/X86FP.cc
index c916640..29aaeeb 100644
--- a/src/compiler/codegen/x86/FP/X86FP.cc
+++ b/src/compiler/codegen/x86/FP/X86FP.cc
@@ -204,8 +204,8 @@
     srcReg2 = S2D(rlSrc2.lowReg, rlSrc2.highReg);
   }
   rlDest = oatGetDest(cUnit, mir, 0);
-  RegLocation rlResult = oatEvalLoc(cUnit, rlDest, kFPReg, true);
-  opRegImm(cUnit, kOpMov, rlResult.lowReg, unorderedGt ? 1 : 0);
+  RegLocation rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
+  loadConstantNoClobber(cUnit, rlResult.lowReg, unorderedGt ? 1 : 0);
   if (single) {
     newLIR2(cUnit, kX86UcomissRR, srcReg1, srcReg2);
   } else {
diff --git a/src/compiler/codegen/x86/X86/Factory.cc b/src/compiler/codegen/x86/X86/Factory.cc
index aef5879..4987c28 100644
--- a/src/compiler/codegen/x86/X86/Factory.cc
+++ b/src/compiler/codegen/x86/X86/Factory.cc
@@ -147,6 +147,7 @@
 LIR *opRegImm(CompilationUnit *cUnit, OpKind op, int rDestSrc1, int value) {
   X86OpCode opcode = kX86Bkpt;
   bool byteImm = IS_SIMM8(value);
+  DCHECK(!FPREG(rDestSrc1));
   switch (op) {
     case kOpLsl: opcode = kX86Sal32RI; break;
     case kOpLsr: opcode = kX86Shr32RI; break;
@@ -159,15 +160,7 @@
     case kOpSub: opcode = byteImm ? kX86Sub32RI8 : kX86Sub32RI; break;
     case kOpXor: opcode = byteImm ? kX86Xor32RI8 : kX86Xor32RI; break;
     case kOpCmp: opcode = byteImm ? kX86Cmp32RI8 : kX86Cmp32RI; break;
-    case kOpMov: {
-      if (value == 0) {  // turn "mov reg, 0" into "xor reg, reg"
-        opcode = kX86Xor32RR;
-        value = rDestSrc1;
-      } else {
-        opcode = kX86Mov32RI;
-      }
-      break;
-    }
+    case kOpMov: return loadConstantNoClobber(cUnit, rDestSrc1, value);
     case kOpMul:
       opcode = byteImm ? kX86Imul32RRI8 : kX86Imul32RRI;
       return newLIR3(cUnit, opcode, rDestSrc1, rDestSrc1, value);
@@ -410,59 +403,6 @@
 #endif
 }
 
-/* store value base base + scaled index. */
-LIR *storeBaseIndexed(CompilationUnit *cUnit, int rBase,
-                                int rIndex, int rSrc, int scale, OpSize size)
-{
-    UNIMPLEMENTED(WARNING) << "storeBaseIndexed";
-    return NULL;
-#if 0
-    LIR *first = NULL;
-    LIR *res;
-    X86OpCode opcode = kX86Nop;
-    int rNewIndex = rIndex;
-    int tReg = oatAllocTemp(cUnit);
-
-    if (FPREG(rSrc)) {
-        DCHECK(SINGLEREG(rSrc));
-        DCHECK((size == kWord) || (size == kSingle));
-        size = kSingle;
-    } else {
-        if (size == kSingle)
-            size = kWord;
-    }
-
-    if (!scale) {
-        first = newLIR3(cUnit, kX86Addu, tReg , rBase, rIndex);
-    } else {
-        first = opRegRegImm(cUnit, kOpLsl, tReg, rIndex, scale);
-        newLIR3(cUnit, kX86Addu, tReg , rBase, tReg);
-    }
-
-    switch (size) {
-        case kSingle:
-            opcode = kX86Fswc1;
-            break;
-        case kWord:
-            opcode = kX86Sw;
-            break;
-        case kUnsignedHalf:
-        case kSignedHalf:
-            opcode = kX86Sh;
-            break;
-        case kUnsignedByte:
-        case kSignedByte:
-            opcode = kX86Sb;
-            break;
-        default:
-            LOG(FATAL) << "Bad case in storeBaseIndexed";
-    }
-    res = newLIR3(cUnit, opcode, rSrc, 0, tReg);
-    oatFreeTemp(cUnit, rNewIndex);
-    return first;
-#endif
-}
-
 LIR *loadMultiple(CompilationUnit *cUnit, int rBase, int rMask)
 {
     UNIMPLEMENTED(WARNING) << "loadMultiple";
@@ -686,6 +626,14 @@
   return store;
 }
 
+/* store value base base + scaled index. */
+LIR *storeBaseIndexed(CompilationUnit *cUnit, int rBase, int rIndex, int rSrc, int scale,
+                      OpSize size)
+{
+  return storeBaseIndexedDisp(cUnit, NULL, rBase, rIndex, scale, 0,
+                              rSrc, INVALID_REG, size, INVALID_SREG);
+}
+
 LIR *storeBaseDisp(CompilationUnit *cUnit, int rBase, int displacement, int rSrc, OpSize size) {
     return storeBaseIndexedDisp(cUnit, NULL, rBase, INVALID_REG, 0, displacement,
                                 rSrc, INVALID_REG, size, INVALID_SREG);
diff --git a/src/compiler/codegen/x86/X86LIR.h b/src/compiler/codegen/x86/X86LIR.h
index 9b9fc6b..378c24d 100644
--- a/src/compiler/codegen/x86/X86LIR.h
+++ b/src/compiler/codegen/x86/X86LIR.h
@@ -436,6 +436,7 @@
     Binary0fOpCode(kX86Movdxr),     // move into xmm from gpr
     Binary0fOpCode(kX86Movdrx),     // move into reg from xmm
     kX86Set8R, kX86Set8M, kX86Set8A,// set byte depending on condition operand
+    kX86Mfence,                     // memory barrier
     Binary0fOpCode(kX86Imul16),     // 16bit multiply
     Binary0fOpCode(kX86Imul32),     // 32bit multiply
     Binary0fOpCode(kX86Movzx8),     // zero-extend 8-bit value
diff --git a/src/compiler/codegen/x86/x86/ArchVariant.cc b/src/compiler/codegen/x86/x86/ArchVariant.cc
index 944311c..2bb84d7 100644
--- a/src/compiler/codegen/x86/x86/ArchVariant.cc
+++ b/src/compiler/codegen/x86/x86/ArchVariant.cc
@@ -49,13 +49,11 @@
     return res;
 }
 
-void oatGenMemBarrier(CompilationUnit *cUnit, int barrierKind)
+void oatGenMemBarrier(CompilationUnit *cUnit, int /* barrierKind */)
 {
 #if ANDROID_SMP != 0
-    UNIMPLEMENTED(WARNING) << "oatGenMemBarrier";
-#if 0
-    newLIR1(cUnit, kX86Sync, barrierKind);
-#endif
+    // TODO: optimize fences
+    newLIR0(cUnit, kX86Mfence);
 #endif
 }