Merge "Optimize x86 long arithmetic"
diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h
index 2bc579a..bf85d91 100644
--- a/compiler/dex/quick/arm/codegen_arm.h
+++ b/compiler/dex/quick/arm/codegen_arm.h
@@ -94,9 +94,9 @@
                      RegLocation rl_src, int scale, bool card_mark);
     void GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
                            RegLocation rl_src1, RegLocation rl_shift);
-    void GenMulLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
-    void GenAddLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
-    void GenAndLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
+    void GenMulLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
+    void GenAddLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
+    void GenAndLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
     void GenArithOpDouble(Instruction::Code opcode, RegLocation rl_dest,
                           RegLocation rl_src1, RegLocation rl_src2);
     void GenArithOpFloat(Instruction::Code opcode, RegLocation rl_dest,
@@ -110,9 +110,9 @@
     bool GenInlinedPeek(CallInfo* info, OpSize size);
     bool GenInlinedPoke(CallInfo* info, OpSize size);
     void GenNegLong(RegLocation rl_dest, RegLocation rl_src);
-    void GenOrLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
-    void GenSubLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
-    void GenXorLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
+    void GenOrLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
+    void GenSubLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
+    void GenXorLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
     LIR* GenRegMemCheck(ConditionCode c_code, int reg1, int base, int offset,
                                 ThrowKind kind);
     RegLocation GenDivRem(RegLocation rl_dest, int reg_lo, int reg_hi, bool is_div);
diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc
index e0161e5..e02382b 100644
--- a/compiler/dex/quick/arm/int_arm.cc
+++ b/compiler/dex/quick/arm/int_arm.cc
@@ -795,8 +795,8 @@
   return (abs(mir_graph_->SRegToVReg(rl_src.s_reg_low) - mir_graph_->SRegToVReg(rl_dest.s_reg_low)) == 1);
 }
 
-void ArmMir2Lir::GenMulLong(RegLocation rl_dest, RegLocation rl_src1,
-                            RegLocation rl_src2) {
+void ArmMir2Lir::GenMulLong(Instruction::Code opcode, RegLocation rl_dest,
+                            RegLocation rl_src1, RegLocation rl_src2) {
     /*
      * To pull off inline multiply, we have a worst-case requirement of 8 temporary
      * registers.  Normally for Arm, we get 5.  We can get to 6 by including
@@ -868,27 +868,27 @@
     UnmarkTemp(rARM_LR);
 }
 
-void ArmMir2Lir::GenAddLong(RegLocation rl_dest, RegLocation rl_src1,
+void ArmMir2Lir::GenAddLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
                             RegLocation rl_src2) {
   LOG(FATAL) << "Unexpected use of GenAddLong for Arm";
 }
 
-void ArmMir2Lir::GenSubLong(RegLocation rl_dest, RegLocation rl_src1,
+void ArmMir2Lir::GenSubLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
                             RegLocation rl_src2) {
   LOG(FATAL) << "Unexpected use of GenSubLong for Arm";
 }
 
-void ArmMir2Lir::GenAndLong(RegLocation rl_dest, RegLocation rl_src1,
+void ArmMir2Lir::GenAndLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
                             RegLocation rl_src2) {
   LOG(FATAL) << "Unexpected use of GenAndLong for Arm";
 }
 
-void ArmMir2Lir::GenOrLong(RegLocation rl_dest, RegLocation rl_src1,
+void ArmMir2Lir::GenOrLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
                            RegLocation rl_src2) {
   LOG(FATAL) << "Unexpected use of GenOrLong for Arm";
 }
 
-void ArmMir2Lir::GenXorLong(RegLocation rl_dest, RegLocation rl_src1,
+void ArmMir2Lir::GenXorLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
                             RegLocation rl_src2) {
   LOG(FATAL) << "Unexpected use of genXoLong for Arm";
 }
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index daf21df..f2807c6 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -1718,7 +1718,7 @@
     case Instruction::ADD_LONG:
     case Instruction::ADD_LONG_2ADDR:
       if (cu_->instruction_set != kThumb2) {
-        GenAddLong(rl_dest, rl_src1, rl_src2);
+        GenAddLong(opcode, rl_dest, rl_src1, rl_src2);
         return;
       }
       first_op = kOpAdd;
@@ -1727,7 +1727,7 @@
     case Instruction::SUB_LONG:
     case Instruction::SUB_LONG_2ADDR:
       if (cu_->instruction_set != kThumb2) {
-        GenSubLong(rl_dest, rl_src1, rl_src2);
+        GenSubLong(opcode, rl_dest, rl_src1, rl_src2);
         return;
       }
       first_op = kOpSub;
@@ -1736,7 +1736,7 @@
     case Instruction::MUL_LONG:
     case Instruction::MUL_LONG_2ADDR:
       if (cu_->instruction_set == kThumb2) {
-        GenMulLong(rl_dest, rl_src1, rl_src2);
+        GenMulLong(opcode, rl_dest, rl_src1, rl_src2);
         return;
       } else {
         call_out = true;
@@ -1762,7 +1762,7 @@
     case Instruction::AND_LONG_2ADDR:
     case Instruction::AND_LONG:
       if (cu_->instruction_set == kX86) {
-        return GenAndLong(rl_dest, rl_src1, rl_src2);
+        return GenAndLong(opcode, rl_dest, rl_src1, rl_src2);
       }
       first_op = kOpAnd;
       second_op = kOpAnd;
@@ -1770,7 +1770,7 @@
     case Instruction::OR_LONG:
     case Instruction::OR_LONG_2ADDR:
       if (cu_->instruction_set == kX86) {
-        GenOrLong(rl_dest, rl_src1, rl_src2);
+        GenOrLong(opcode, rl_dest, rl_src1, rl_src2);
         return;
       }
       first_op = kOpOr;
@@ -1779,7 +1779,7 @@
     case Instruction::XOR_LONG:
     case Instruction::XOR_LONG_2ADDR:
       if (cu_->instruction_set == kX86) {
-        GenXorLong(rl_dest, rl_src1, rl_src2);
+        GenXorLong(opcode, rl_dest, rl_src1, rl_src2);
         return;
       }
       first_op = kOpXor;
diff --git a/compiler/dex/quick/gen_loadstore.cc b/compiler/dex/quick/gen_loadstore.cc
index 8f2f6ad..65582dd 100644
--- a/compiler/dex/quick/gen_loadstore.cc
+++ b/compiler/dex/quick/gen_loadstore.cc
@@ -294,6 +294,53 @@
   }
 }
 
+void Mir2Lir::StoreFinalValueWide(RegLocation rl_dest, RegLocation rl_src) {
+  DCHECK_EQ(IsFpReg(rl_src.low_reg), IsFpReg(rl_src.high_reg));
+  DCHECK(rl_dest.wide);
+  DCHECK(rl_src.wide);
+  DCHECK_EQ(rl_src.location, kLocPhysReg);
+
+  if (rl_dest.location == kLocPhysReg) {
+    OpRegCopyWide(rl_dest.low_reg, rl_dest.high_reg, rl_src.low_reg, rl_src.high_reg);
+  } else {
+    // Just re-assign the registers.  Dest gets Src's regs.
+    rl_dest.low_reg = rl_src.low_reg;
+    rl_dest.high_reg = rl_src.high_reg;
+    rl_dest.location = kLocPhysReg;
+    Clobber(rl_src.low_reg);
+    Clobber(rl_src.high_reg);
+  }
+
+  // Dest is now live and dirty (until/if we flush it to home location).
+  MarkLive(rl_dest.low_reg, rl_dest.s_reg_low);
+
+  // Does this wide value live in two registers (or one vector one)?
+  if (rl_dest.low_reg != rl_dest.high_reg) {
+    MarkLive(rl_dest.high_reg, GetSRegHi(rl_dest.s_reg_low));
+    MarkDirty(rl_dest);
+    MarkPair(rl_dest.low_reg, rl_dest.high_reg);
+  } else {
+    // This must be an x86 vector register value,
+    DCHECK(IsFpReg(rl_dest.low_reg) && (cu_->instruction_set == kX86));
+    MarkDirty(rl_dest);
+  }
+
+  ResetDefLocWide(rl_dest);
+  if ((IsDirty(rl_dest.low_reg) ||
+      IsDirty(rl_dest.high_reg)) &&
+      (oat_live_out(rl_dest.s_reg_low) ||
+      oat_live_out(GetSRegHi(rl_dest.s_reg_low)))) {
+    LIR *def_start = last_lir_insn_;
+    DCHECK_EQ((mir_graph_->SRegToVReg(rl_dest.s_reg_low)+1),
+              mir_graph_->SRegToVReg(GetSRegHi(rl_dest.s_reg_low)));
+    StoreBaseDispWide(TargetReg(kSp), SRegOffset(rl_dest.s_reg_low),
+                      rl_dest.low_reg, rl_dest.high_reg);
+    MarkClean(rl_dest);
+    LIR *def_end = last_lir_insn_;
+    MarkDefWide(rl_dest, def_start, def_end);
+  }
+}
+
 /* Utilities to load the current Method* */
 void Mir2Lir::LoadCurrMethodDirect(int r_tgt) {
   LoadValueDirectFixed(mir_graph_->GetMethodLoc(), r_tgt);
@@ -303,4 +350,47 @@
   return LoadValue(mir_graph_->GetMethodLoc(), kCoreReg);
 }
 
+RegLocation Mir2Lir::ForceTemp(RegLocation loc) {
+  DCHECK(!loc.wide);
+  DCHECK(loc.location == kLocPhysReg);
+  DCHECK(!IsFpReg(loc.low_reg));
+  DCHECK(!IsFpReg(loc.high_reg));
+  if (IsTemp(loc.low_reg)) {
+    Clobber(loc.low_reg);
+  } else {
+    int temp_low = AllocTemp();
+    OpRegCopy(temp_low, loc.low_reg);
+    loc.low_reg = temp_low;
+  }
+
+  // Ensure that this doesn't represent the original SR any more.
+  loc.s_reg_low = INVALID_SREG;
+  return loc;
+}
+
+RegLocation Mir2Lir::ForceTempWide(RegLocation loc) {
+  DCHECK(loc.wide);
+  DCHECK(loc.location == kLocPhysReg);
+  DCHECK(!IsFpReg(loc.low_reg));
+  DCHECK(!IsFpReg(loc.high_reg));
+  if (IsTemp(loc.low_reg)) {
+    Clobber(loc.low_reg);
+  } else {
+    int temp_low = AllocTemp();
+    OpRegCopy(temp_low, loc.low_reg);
+    loc.low_reg = temp_low;
+  }
+  if (IsTemp(loc.high_reg)) {
+    Clobber(loc.high_reg);
+  } else {
+    int temp_high = AllocTemp();
+    OpRegCopy(temp_high, loc.high_reg);
+    loc.high_reg = temp_high;
+  }
+
+  // Ensure that this doesn't represent the original SR any more.
+  loc.s_reg_low = INVALID_SREG;
+  return loc;
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/mips/codegen_mips.h b/compiler/dex/quick/mips/codegen_mips.h
index a5a14d5..1f99e10 100644
--- a/compiler/dex/quick/mips/codegen_mips.h
+++ b/compiler/dex/quick/mips/codegen_mips.h
@@ -94,9 +94,9 @@
                      RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark);
     void GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
                                    RegLocation rl_src1, RegLocation rl_shift);
-    void GenMulLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
-    void GenAddLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
-    void GenAndLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
+    void GenMulLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
+    void GenAddLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
+    void GenAndLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
     void GenArithOpDouble(Instruction::Code opcode, RegLocation rl_dest,
                                   RegLocation rl_src1, RegLocation rl_src2);
     void GenArithOpFloat(Instruction::Code opcode, RegLocation rl_dest,
@@ -110,9 +110,9 @@
     bool GenInlinedPeek(CallInfo* info, OpSize size);
     bool GenInlinedPoke(CallInfo* info, OpSize size);
     void GenNegLong(RegLocation rl_dest, RegLocation rl_src);
-    void GenOrLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
-    void GenSubLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
-    void GenXorLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
+    void GenOrLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
+    void GenSubLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
+    void GenXorLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
     LIR* GenRegMemCheck(ConditionCode c_code, int reg1, int base, int offset,
                                 ThrowKind kind);
     RegLocation GenDivRem(RegLocation rl_dest, int reg_lo, int reg_hi, bool is_div);
diff --git a/compiler/dex/quick/mips/int_mips.cc b/compiler/dex/quick/mips/int_mips.cc
index 180d56c..3410ecb 100644
--- a/compiler/dex/quick/mips/int_mips.cc
+++ b/compiler/dex/quick/mips/int_mips.cc
@@ -356,13 +356,13 @@
   return NULL;
 }
 
-void MipsMir2Lir::GenMulLong(RegLocation rl_dest, RegLocation rl_src1,
-                             RegLocation rl_src2) {
+void MipsMir2Lir::GenMulLong(Instruction::Code opcode, RegLocation rl_dest,
+                             RegLocation rl_src1, RegLocation rl_src2) {
   LOG(FATAL) << "Unexpected use of GenMulLong for Mips";
 }
 
-void MipsMir2Lir::GenAddLong(RegLocation rl_dest, RegLocation rl_src1,
-                             RegLocation rl_src2) {
+void MipsMir2Lir::GenAddLong(Instruction::Code opcode, RegLocation rl_dest,
+                             RegLocation rl_src1, RegLocation rl_src2) {
   rl_src1 = LoadValueWide(rl_src1, kCoreReg);
   rl_src2 = LoadValueWide(rl_src2, kCoreReg);
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
@@ -383,8 +383,8 @@
   StoreValueWide(rl_dest, rl_result);
 }
 
-void MipsMir2Lir::GenSubLong(RegLocation rl_dest, RegLocation rl_src1,
-                             RegLocation rl_src2) {
+void MipsMir2Lir::GenSubLong(Instruction::Code opcode, RegLocation rl_dest,
+                             RegLocation rl_src1, RegLocation rl_src2) {
   rl_src1 = LoadValueWide(rl_src1, kCoreReg);
   rl_src2 = LoadValueWide(rl_src2, kCoreReg);
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
@@ -425,18 +425,19 @@
   StoreValueWide(rl_dest, rl_result);
 }
 
-void MipsMir2Lir::GenAndLong(RegLocation rl_dest, RegLocation rl_src1,
+void MipsMir2Lir::GenAndLong(Instruction::Code opcode, RegLocation rl_dest,
+                             RegLocation rl_src1,
                              RegLocation rl_src2) {
   LOG(FATAL) << "Unexpected use of GenAndLong for Mips";
 }
 
-void MipsMir2Lir::GenOrLong(RegLocation rl_dest, RegLocation rl_src1,
-                            RegLocation rl_src2) {
+void MipsMir2Lir::GenOrLong(Instruction::Code opcode, RegLocation rl_dest,
+                            RegLocation rl_src1, RegLocation rl_src2) {
   LOG(FATAL) << "Unexpected use of GenOrLong for Mips";
 }
 
-void MipsMir2Lir::GenXorLong(RegLocation rl_dest, RegLocation rl_src1,
-                             RegLocation rl_src2) {
+void MipsMir2Lir::GenXorLong(Instruction::Code opcode, RegLocation rl_dest,
+                             RegLocation rl_src1, RegLocation rl_src2) {
   LOG(FATAL) << "Unexpected use of GenXorLong for Mips";
 }
 
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index f9d9e9e..c6c5000 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -628,6 +628,18 @@
      */
     void StoreValueWide(RegLocation rl_dest, RegLocation rl_src);
 
+    /**
+     * @brief Used to do the final store in a wide destination as per bytecode semantics.
+     * @see StoreValueWide
+     * @param rl_dest The destination dalvik register location.
+     * @param rl_src The source register location. It must be kLocPhysReg
+     *
+     * This is used for x86 two operand computations, where we have computed the correct
+     * register values that now need to be properly registered.  This is used to avoid an
+     * extra pair of register copies that would result if StoreValueWide was called.
+     */
+    void StoreFinalValueWide(RegLocation rl_dest, RegLocation rl_src);
+
     // Shared by all targets - implemented in mir_to_lir.cc.
     void CompileDalvikInstruction(MIR* mir, BasicBlock* bb, LIR* label_list);
     void HandleExtendedMethodMIR(BasicBlock* bb, MIR* mir);
@@ -696,11 +708,14 @@
     // Required for target - Dalvik-level generators.
     virtual void GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
                                    RegLocation rl_src1, RegLocation rl_src2) = 0;
-    virtual void GenMulLong(RegLocation rl_dest, RegLocation rl_src1,
+    virtual void GenMulLong(Instruction::Code,
+                            RegLocation rl_dest, RegLocation rl_src1,
                             RegLocation rl_src2) = 0;
-    virtual void GenAddLong(RegLocation rl_dest, RegLocation rl_src1,
+    virtual void GenAddLong(Instruction::Code,
+                            RegLocation rl_dest, RegLocation rl_src1,
                             RegLocation rl_src2) = 0;
-    virtual void GenAndLong(RegLocation rl_dest, RegLocation rl_src1,
+    virtual void GenAndLong(Instruction::Code,
+                            RegLocation rl_dest, RegLocation rl_src1,
                             RegLocation rl_src2) = 0;
     virtual void GenArithOpDouble(Instruction::Code opcode,
                                   RegLocation rl_dest, RegLocation rl_src1,
@@ -728,11 +743,14 @@
     virtual bool GenInlinedPeek(CallInfo* info, OpSize size) = 0;
     virtual bool GenInlinedPoke(CallInfo* info, OpSize size) = 0;
     virtual void GenNegLong(RegLocation rl_dest, RegLocation rl_src) = 0;
-    virtual void GenOrLong(RegLocation rl_dest, RegLocation rl_src1,
+    virtual void GenOrLong(Instruction::Code,
+                           RegLocation rl_dest, RegLocation rl_src1,
                            RegLocation rl_src2) = 0;
-    virtual void GenSubLong(RegLocation rl_dest, RegLocation rl_src1,
+    virtual void GenSubLong(Instruction::Code,
+                            RegLocation rl_dest, RegLocation rl_src1,
                             RegLocation rl_src2) = 0;
-    virtual void GenXorLong(RegLocation rl_dest, RegLocation rl_src1,
+    virtual void GenXorLong(Instruction::Code,
+                            RegLocation rl_dest, RegLocation rl_src1,
                             RegLocation rl_src2) = 0;
     virtual LIR* GenRegMemCheck(ConditionCode c_code, int reg1, int base,
                                 int offset, ThrowKind kind) = 0;
@@ -837,6 +855,20 @@
       return cu_;
     }
 
+    /*
+     * @brief Force a location (in a register) into a temporary register
+     * @param loc location of result
+     * @returns update location
+     */
+    RegLocation ForceTemp(RegLocation loc);
+
+    /*
+     * @brief Force a wide location (in registers) into temporary registers
+     * @param loc location of result
+     * @returns update location
+     */
+    RegLocation ForceTempWide(RegLocation loc);
+
   private:
     void GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx, RegLocation rl_dest,
                             RegLocation rl_src);
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index 816f2d0..53e82c3 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -94,9 +94,9 @@
                      RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark);
     void GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
                            RegLocation rl_src1, RegLocation rl_shift);
-    void GenMulLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
-    void GenAddLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
-    void GenAndLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
+    void GenMulLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
+    void GenAddLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
+    void GenAndLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
     void GenArithOpDouble(Instruction::Code opcode, RegLocation rl_dest,
                                   RegLocation rl_src1, RegLocation rl_src2);
     void GenArithOpFloat(Instruction::Code opcode, RegLocation rl_dest,
@@ -110,9 +110,9 @@
     bool GenInlinedPeek(CallInfo* info, OpSize size);
     bool GenInlinedPoke(CallInfo* info, OpSize size);
     void GenNegLong(RegLocation rl_dest, RegLocation rl_src);
-    void GenOrLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
-    void GenSubLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
-    void GenXorLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
+    void GenOrLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
+    void GenSubLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
+    void GenXorLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
     LIR* GenRegMemCheck(ConditionCode c_code, int reg1, int base, int offset,
                                 ThrowKind kind);
     LIR* GenMemImmedCheck(ConditionCode c_code, int base, int offset, int check_value,
@@ -136,6 +136,49 @@
     void GenPackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src);
     void GenSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src);
     void GenSpecialCase(BasicBlock* bb, MIR* mir, const InlineMethod& special);
+    /*
+     * @brief Generate a two address long operation with a constant value
+     * @param rl_dest location of result
+     * @param rl_src constant source operand
+     * @param op Opcode to be generated
+     */
+    void GenLongImm(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op);
+    /*
+     * @brief Generate a three address long operation with a constant value
+     * @param rl_dest location of result
+     * @param rl_src1 source operand
+     * @param rl_src2 constant source operand
+     * @param op Opcode to be generated
+     */
+    void GenLongLongImm(RegLocation rl_dest, RegLocation rl_src1,
+                        RegLocation rl_src2, Instruction::Code op);
+
+    /**
+      * @brief Generate a long arithmetic operation.
+      * @param rl_dest The destination.
+      * @param rl_src1 First operand.
+      * @param rl_src2 Second operand.
+      * @param op The DEX opcode for the operation.
+      * @param is_commutative The sources can be swapped if needed.
+      */
+    void GenLongArith(RegLocation rl_dest, RegLocation rl_src1,
+                      RegLocation rl_src2, Instruction::Code op, bool is_commutative);
+
+    /**
+      * @brief Generate a two operand long arithmetic operation.
+      * @param rl_dest The destination.
+      * @param rl_src Second operand.
+      * @param op The DEX opcode for the operation.
+      */
+    void GenLongArith(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op);
+
+    /**
+      * @brief Generate a long operation.
+      * @param rl_dest The destination.  Must be in a register
+      * @param rl_src The other operand.  May be in a register or in memory.
+      * @param op The DEX opcode for the operation.
+      */
+    void GenLongRegOrMemOp(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op);
 
     // Single operation generators.
     LIR* OpUnconditionalBranch(LIR* target);
@@ -230,6 +273,42 @@
                                   int64_t val, ConditionCode ccode);
     void OpVectorRegCopyWide(uint8_t fp_reg, uint8_t low_reg, uint8_t high_reg);
     void GenConstWide(RegLocation rl_dest, int64_t value);
+
+    /*
+     * @brief Return the correct x86 opcode for the Dex operation
+     * @param op Dex opcode for the operation
+     * @param loc Register location of the operand
+     * @param is_high_op 'true' if this is an operation on the high word
+     * @param value Immediate value for the operation.  Used for byte variants
+     * @returns the correct x86 opcode to perform the operation
+     */
+    X86OpCode GetOpcode(Instruction::Code op, RegLocation loc, bool is_high_op, int32_t value);
+
+    /*
+     * @brief Return the correct x86 opcode for the Dex operation
+     * @param op Dex opcode for the operation
+     * @param dest location of the destination.  May be register or memory.
+     * @param rhs Location for the rhs of the operation.  May be in register or memory.
+     * @param is_high_op 'true' if this is an operation on the high word
+     * @returns the correct x86 opcode to perform the operation
+     * @note at most one location may refer to memory
+     */
+    X86OpCode GetOpcode(Instruction::Code op, RegLocation dest, RegLocation rhs,
+                        bool is_high_op);
+
+    /*
+     * @brief Is this operation a no-op for this opcode and value
+     * @param op Dex opcode for the operation
+     * @param value Immediate value for the operation.
+     * @returns 'true' if the operation will have no effect
+     */
+    bool IsNoOp(Instruction::Code op, int32_t value);
+
+    /*
+     * @brief Dump a RegLocation using printf
+     * @param loc Register location to dump
+     */
+    static void DumpRegLocation(RegLocation loc);
 };
 
 }  // namespace art
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index 21956fa..79820c9 100644
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -512,100 +512,174 @@
   return NULL;
 }
 
-void X86Mir2Lir::GenMulLong(RegLocation rl_dest, RegLocation rl_src1,
+void X86Mir2Lir::GenMulLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1,
                             RegLocation rl_src2) {
   LOG(FATAL) << "Unexpected use of GenX86Long for x86";
 }
-void X86Mir2Lir::GenAddLong(RegLocation rl_dest, RegLocation rl_src1,
-                         RegLocation rl_src2) {
-  // TODO: fixed register usage here as we only have 4 temps and temporary allocation isn't smart
-  // enough.
-  FlushAllRegs();
-  LockCallTemps();  // Prepare for explicit register usage
-  LoadValueDirectWideFixed(rl_src1, r0, r1);
-  LoadValueDirectWideFixed(rl_src2, r2, r3);
-  // Compute (r1:r0) = (r1:r0) + (r2:r3)
-  OpRegReg(kOpAdd, r0, r2);  // r0 = r0 + r2
-  OpRegReg(kOpAdc, r1, r3);  // r1 = r1 + r3 + CF
-  RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r0, r1,
-                          INVALID_SREG, INVALID_SREG};
-  StoreValueWide(rl_dest, rl_result);
+
+void X86Mir2Lir::GenLongRegOrMemOp(RegLocation rl_dest, RegLocation rl_src,
+                                   Instruction::Code op) {
+  DCHECK_EQ(rl_dest.location, kLocPhysReg);
+  X86OpCode x86op = GetOpcode(op, rl_dest, rl_src, false);
+  if (rl_src.location == kLocPhysReg) {
+    // Both operands are in registers.
+    if (rl_dest.low_reg == rl_src.high_reg) {
+      // The registers are the same, so we would clobber it before the use.
+      int temp_reg = AllocTemp();
+      OpRegCopy(temp_reg, rl_dest.low_reg);
+      rl_src.high_reg = temp_reg;
+    }
+    NewLIR2(x86op, rl_dest.low_reg, rl_src.low_reg);
+
+    x86op = GetOpcode(op, rl_dest, rl_src, true);
+    NewLIR2(x86op, rl_dest.high_reg, rl_src.high_reg);
+    FreeTemp(rl_src.low_reg);
+    FreeTemp(rl_src.high_reg);
+    return;
+  }
+
+  // RHS is in memory.
+  DCHECK((rl_src.location == kLocDalvikFrame) ||
+         (rl_src.location == kLocCompilerTemp));
+  int rBase = TargetReg(kSp);
+  int displacement = SRegOffset(rl_src.s_reg_low);
+
+  LIR *lir = NewLIR3(x86op, rl_dest.low_reg, rBase, displacement + LOWORD_OFFSET);
+  AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
+                          true /* is_load */, true /* is64bit */);
+  x86op = GetOpcode(op, rl_dest, rl_src, true);
+  lir = NewLIR3(x86op, rl_dest.high_reg, rBase, displacement + HIWORD_OFFSET);
+  AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
+                          true /* is_load */, true /* is64bit */);
 }
 
-void X86Mir2Lir::GenSubLong(RegLocation rl_dest, RegLocation rl_src1,
-                            RegLocation rl_src2) {
-  // TODO: fixed register usage here as we only have 4 temps and temporary allocation isn't smart
-  // enough.
-  FlushAllRegs();
-  LockCallTemps();  // Prepare for explicit register usage
-  LoadValueDirectWideFixed(rl_src1, r0, r1);
-  LoadValueDirectWideFixed(rl_src2, r2, r3);
-  // Compute (r1:r0) = (r1:r0) + (r2:r3)
-  OpRegReg(kOpSub, r0, r2);  // r0 = r0 - r2
-  OpRegReg(kOpSbc, r1, r3);  // r1 = r1 - r3 - CF
-  RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r0, r1,
-                          INVALID_SREG, INVALID_SREG};
-  StoreValueWide(rl_dest, rl_result);
+void X86Mir2Lir::GenLongArith(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op) {
+  rl_dest = UpdateLocWide(rl_dest);
+  if (rl_dest.location == kLocPhysReg) {
+    // Ensure we are in a register pair
+    RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
+
+    rl_src = UpdateLocWide(rl_src);
+    GenLongRegOrMemOp(rl_result, rl_src, op);
+    StoreFinalValueWide(rl_dest, rl_result);
+    return;
+  }
+
+  // It wasn't in registers, so it better be in memory.
+  DCHECK((rl_dest.location == kLocDalvikFrame) ||
+         (rl_dest.location == kLocCompilerTemp));
+  rl_src = LoadValueWide(rl_src, kCoreReg);
+
+  // Operate directly into memory.
+  X86OpCode x86op = GetOpcode(op, rl_dest, rl_src, false);
+  int rBase = TargetReg(kSp);
+  int displacement = SRegOffset(rl_dest.s_reg_low);
+
+  LIR *lir = NewLIR3(x86op, rBase, displacement + LOWORD_OFFSET, rl_src.low_reg);
+  AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
+                          false /* is_load */, true /* is64bit */);
+  x86op = GetOpcode(op, rl_dest, rl_src, true);
+  lir = NewLIR3(x86op, rBase, displacement + HIWORD_OFFSET, rl_src.high_reg);
+  AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
+                          false /* is_load */, true /* is64bit */);
+  FreeTemp(rl_src.low_reg);
+  FreeTemp(rl_src.high_reg);
 }
 
-void X86Mir2Lir::GenAndLong(RegLocation rl_dest, RegLocation rl_src1,
-                            RegLocation rl_src2) {
-  // TODO: fixed register usage here as we only have 4 temps and temporary allocation isn't smart
-  // enough.
-  FlushAllRegs();
-  LockCallTemps();  // Prepare for explicit register usage
-  LoadValueDirectWideFixed(rl_src1, r0, r1);
-  LoadValueDirectWideFixed(rl_src2, r2, r3);
-  // Compute (r1:r0) = (r1:r0) & (r2:r3)
-  OpRegReg(kOpAnd, r0, r2);  // r0 = r0 & r2
-  OpRegReg(kOpAnd, r1, r3);  // r1 = r1 & r3
-  RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r0, r1,
-                          INVALID_SREG, INVALID_SREG};
-  StoreValueWide(rl_dest, rl_result);
+void X86Mir2Lir::GenLongArith(RegLocation rl_dest, RegLocation rl_src1,
+                              RegLocation rl_src2, Instruction::Code op,
+                              bool is_commutative) {
+  // Is this really a 2 operand operation?
+  switch (op) {
+    case Instruction::ADD_LONG_2ADDR:
+    case Instruction::SUB_LONG_2ADDR:
+    case Instruction::AND_LONG_2ADDR:
+    case Instruction::OR_LONG_2ADDR:
+    case Instruction::XOR_LONG_2ADDR:
+      GenLongArith(rl_dest, rl_src2, op);
+      return;
+    default:
+      break;
+  }
+
+  if (rl_dest.location == kLocPhysReg) {
+    RegLocation rl_result = LoadValueWide(rl_src1, kCoreReg);
+
+    // We are about to clobber the LHS, so it needs to be a temp.
+    rl_result = ForceTempWide(rl_result);
+
+    // Perform the operation using the RHS.
+    rl_src2 = UpdateLocWide(rl_src2);
+    GenLongRegOrMemOp(rl_result, rl_src2, op);
+
+    // And now record that the result is in the temp.
+    StoreFinalValueWide(rl_dest, rl_result);
+    return;
+  }
+
+  // It wasn't in registers, so it better be in memory.
+  DCHECK((rl_dest.location == kLocDalvikFrame) ||
+         (rl_dest.location == kLocCompilerTemp));
+  rl_src1 = UpdateLocWide(rl_src1);
+  rl_src2 = UpdateLocWide(rl_src2);
+
+  // Get one of the source operands into temporary register.
+  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
+  if (IsTemp(rl_src1.low_reg) && IsTemp(rl_src1.high_reg)) {
+    GenLongRegOrMemOp(rl_src1, rl_src2, op);
+  } else if (is_commutative) {
+    rl_src2 = LoadValueWide(rl_src2, kCoreReg);
+    // We need at least one of them to be a temporary.
+    if (!(IsTemp(rl_src2.low_reg) && IsTemp(rl_src2.high_reg))) {
+      rl_src1 = ForceTempWide(rl_src1);
+    }
+    GenLongRegOrMemOp(rl_src1, rl_src2, op);
+  } else {
+    // Need LHS to be the temp.
+    rl_src1 = ForceTempWide(rl_src1);
+    GenLongRegOrMemOp(rl_src1, rl_src2, op);
+  }
+
+  StoreFinalValueWide(rl_dest, rl_src1);
 }
 
-void X86Mir2Lir::GenOrLong(RegLocation rl_dest,
-                           RegLocation rl_src1, RegLocation rl_src2) {
-  // TODO: fixed register usage here as we only have 4 temps and temporary allocation isn't smart
-  // enough.
-  FlushAllRegs();
-  LockCallTemps();  // Prepare for explicit register usage
-  LoadValueDirectWideFixed(rl_src1, r0, r1);
-  LoadValueDirectWideFixed(rl_src2, r2, r3);
-  // Compute (r1:r0) = (r1:r0) | (r2:r3)
-  OpRegReg(kOpOr, r0, r2);  // r0 = r0 | r2
-  OpRegReg(kOpOr, r1, r3);  // r1 = r1 | r3
-  RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r0, r1,
-                          INVALID_SREG, INVALID_SREG};
-  StoreValueWide(rl_dest, rl_result);
-}
-
-void X86Mir2Lir::GenXorLong(RegLocation rl_dest,
+void X86Mir2Lir::GenAddLong(Instruction::Code opcode, RegLocation rl_dest,
                             RegLocation rl_src1, RegLocation rl_src2) {
-  // TODO: fixed register usage here as we only have 4 temps and temporary allocation isn't smart
-  // enough.
-  FlushAllRegs();
-  LockCallTemps();  // Prepare for explicit register usage
-  LoadValueDirectWideFixed(rl_src1, r0, r1);
-  LoadValueDirectWideFixed(rl_src2, r2, r3);
-  // Compute (r1:r0) = (r1:r0) ^ (r2:r3)
-  OpRegReg(kOpXor, r0, r2);  // r0 = r0 ^ r2
-  OpRegReg(kOpXor, r1, r3);  // r1 = r1 ^ r3
-  RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r0, r1,
-                          INVALID_SREG, INVALID_SREG};
-  StoreValueWide(rl_dest, rl_result);
+  GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true);
+}
+
+void X86Mir2Lir::GenSubLong(Instruction::Code opcode, RegLocation rl_dest,
+                            RegLocation rl_src1, RegLocation rl_src2) {
+  GenLongArith(rl_dest, rl_src1, rl_src2, opcode, false);
+}
+
+void X86Mir2Lir::GenAndLong(Instruction::Code opcode, RegLocation rl_dest,
+                            RegLocation rl_src1, RegLocation rl_src2) {
+  GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true);
+}
+
+void X86Mir2Lir::GenOrLong(Instruction::Code opcode, RegLocation rl_dest,
+                           RegLocation rl_src1, RegLocation rl_src2) {
+  GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true);
+}
+
+void X86Mir2Lir::GenXorLong(Instruction::Code opcode, RegLocation rl_dest,
+                            RegLocation rl_src1, RegLocation rl_src2) {
+  GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true);
 }
 
 void X86Mir2Lir::GenNegLong(RegLocation rl_dest, RegLocation rl_src) {
-  FlushAllRegs();
-  LockCallTemps();  // Prepare for explicit register usage
-  LoadValueDirectWideFixed(rl_src, r0, r1);
-  // Compute (r1:r0) = -(r1:r0)
-  OpRegReg(kOpNeg, r0, r0);  // r0 = -r0
-  OpRegImm(kOpAdc, r1, 0);   // r1 = r1 + CF
-  OpRegReg(kOpNeg, r1, r1);  // r1 = -r1
-  RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r0, r1,
-                          INVALID_SREG, INVALID_SREG};
+  rl_src = LoadValueWide(rl_src, kCoreReg);
+  RegLocation rl_result = ForceTempWide(rl_src);
+  if (rl_dest.low_reg == rl_src.high_reg) {
+    // The registers are the same, so we would clobber it before the use.
+    int temp_reg = AllocTemp();
+    OpRegCopy(temp_reg, rl_result.low_reg);
+    rl_result.high_reg = temp_reg;
+  }
+  OpRegReg(kOpNeg, rl_result.low_reg, rl_result.low_reg);    // rLow = -rLow
+  OpRegImm(kOpAdc, rl_result.high_reg, 0);                   // rHigh = rHigh + CF
+  OpRegReg(kOpNeg, rl_result.high_reg, rl_result.high_reg);  // rHigh = -rHigh
   StoreValueWide(rl_dest, rl_result);
 }
 
@@ -748,8 +822,241 @@
 
 void X86Mir2Lir::GenArithImmOpLong(Instruction::Code opcode,
                                    RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) {
-  // Default - bail to non-const handler.
-  GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2);
+  switch (opcode) {
+    case Instruction::ADD_LONG:
+    case Instruction::AND_LONG:
+    case Instruction::OR_LONG:
+    case Instruction::XOR_LONG:
+      if (rl_src2.is_const) {
+        GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode);
+      } else {
+        DCHECK(rl_src1.is_const);
+        GenLongLongImm(rl_dest, rl_src2, rl_src1, opcode);
+      }
+      break;
+    case Instruction::SUB_LONG:
+    case Instruction::SUB_LONG_2ADDR:
+      if (rl_src2.is_const) {
+        GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode);
+      } else {
+        GenSubLong(opcode, rl_dest, rl_src1, rl_src2);
+      }
+      break;
+    case Instruction::ADD_LONG_2ADDR:
+    case Instruction::OR_LONG_2ADDR:
+    case Instruction::XOR_LONG_2ADDR:
+    case Instruction::AND_LONG_2ADDR:
+      if (rl_src2.is_const) {
+        GenLongImm(rl_dest, rl_src2, opcode);
+      } else {
+        DCHECK(rl_src1.is_const);
+        GenLongLongImm(rl_dest, rl_src2, rl_src1, opcode);
+      }
+      break;
+    default:
+      // Default - bail to non-const handler.
+      GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2);
+      break;
+  }
+}
+
+bool X86Mir2Lir::IsNoOp(Instruction::Code op, int32_t value) {
+  switch (op) {
+    case Instruction::AND_LONG_2ADDR:
+    case Instruction::AND_LONG:
+      return value == -1;
+    case Instruction::OR_LONG:
+    case Instruction::OR_LONG_2ADDR:
+    case Instruction::XOR_LONG:
+    case Instruction::XOR_LONG_2ADDR:
+      return value == 0;
+    default:
+      return false;
+  }
+}
+
+X86OpCode X86Mir2Lir::GetOpcode(Instruction::Code op, RegLocation dest, RegLocation rhs,
+                                bool is_high_op) {
+  bool rhs_in_mem = rhs.location != kLocPhysReg;
+  bool dest_in_mem = dest.location != kLocPhysReg;
+  DCHECK(!rhs_in_mem || !dest_in_mem);
+  switch (op) {
+    case Instruction::ADD_LONG:
+    case Instruction::ADD_LONG_2ADDR:
+      if (dest_in_mem) {
+        return is_high_op ? kX86Adc32MR : kX86Add32MR;
+      } else if (rhs_in_mem) {
+        return is_high_op ? kX86Adc32RM : kX86Add32RM;
+      }
+      return is_high_op ? kX86Adc32RR : kX86Add32RR;
+    case Instruction::SUB_LONG:
+    case Instruction::SUB_LONG_2ADDR:
+      if (dest_in_mem) {
+        return is_high_op ? kX86Sbb32MR : kX86Sub32MR;
+      } else if (rhs_in_mem) {
+        return is_high_op ? kX86Sbb32RM : kX86Sub32RM;
+      }
+      return is_high_op ? kX86Sbb32RR : kX86Sub32RR;
+    case Instruction::AND_LONG_2ADDR:
+    case Instruction::AND_LONG:
+      if (dest_in_mem) {
+        return kX86And32MR;
+      }
+      return rhs_in_mem ? kX86And32RM : kX86And32RR;
+    case Instruction::OR_LONG:
+    case Instruction::OR_LONG_2ADDR:
+      if (dest_in_mem) {
+        return kX86Or32MR;
+      }
+      return rhs_in_mem ? kX86Or32RM : kX86Or32RR;
+    case Instruction::XOR_LONG:
+    case Instruction::XOR_LONG_2ADDR:
+      if (dest_in_mem) {
+        return kX86Xor32MR;
+      }
+      return rhs_in_mem ? kX86Xor32RM : kX86Xor32RR;
+    default:
+      LOG(FATAL) << "Unexpected opcode: " << op;
+      return kX86Add32RR;
+  }
+}
+
+X86OpCode X86Mir2Lir::GetOpcode(Instruction::Code op, RegLocation loc, bool is_high_op,
+                                int32_t value) {
+  bool in_mem = loc.location != kLocPhysReg;
+  bool byte_imm = IS_SIMM8(value);
+  DCHECK(in_mem || !IsFpReg(loc.low_reg));
+  switch (op) {
+    case Instruction::ADD_LONG:
+    case Instruction::ADD_LONG_2ADDR:
+      if (byte_imm) {
+        if (in_mem) {
+          return is_high_op ? kX86Adc32MI8 : kX86Add32MI8;
+        }
+        return is_high_op ? kX86Adc32RI8 : kX86Add32RI8;
+      }
+      if (in_mem) {
+        return is_high_op ? kX86Adc32MI : kX86Add32MI;
+      }
+      return is_high_op ? kX86Adc32RI : kX86Add32RI;
+    case Instruction::SUB_LONG:
+    case Instruction::SUB_LONG_2ADDR:
+      if (byte_imm) {
+        if (in_mem) {
+          return is_high_op ? kX86Sbb32MI8 : kX86Sub32MI8;
+        }
+        return is_high_op ? kX86Sbb32RI8 : kX86Sub32RI8;
+      }
+      if (in_mem) {
+        return is_high_op ? kX86Sbb32MI : kX86Sub32MI;
+      }
+      return is_high_op ? kX86Sbb32RI : kX86Sub32RI;
+    case Instruction::AND_LONG_2ADDR:
+    case Instruction::AND_LONG:
+      if (byte_imm) {
+        return in_mem ? kX86And32MI8 : kX86And32RI8;
+      }
+      return in_mem ? kX86And32MI : kX86And32RI;
+    case Instruction::OR_LONG:
+    case Instruction::OR_LONG_2ADDR:
+      if (byte_imm) {
+        return in_mem ? kX86Or32MI8 : kX86Or32RI8;
+      }
+      return in_mem ? kX86Or32MI : kX86Or32RI;
+    case Instruction::XOR_LONG:
+    case Instruction::XOR_LONG_2ADDR:
+      if (byte_imm) {
+        return in_mem ? kX86Xor32MI8 : kX86Xor32RI8;
+      }
+      return in_mem ? kX86Xor32MI : kX86Xor32RI;
+    default:
+      LOG(FATAL) << "Unexpected opcode: " << op;
+      return kX86Add32MI;
+  }
+}
+
+void X86Mir2Lir::GenLongImm(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op) {
+  DCHECK(rl_src.is_const);
+  int64_t val = mir_graph_->ConstantValueWide(rl_src);
+  int32_t val_lo = Low32Bits(val);
+  int32_t val_hi = High32Bits(val);
+  rl_dest = UpdateLocWide(rl_dest);
+
+  // Can we just do this into memory?
+  if ((rl_dest.location == kLocDalvikFrame) ||
+      (rl_dest.location == kLocCompilerTemp)) {
+    int rBase = TargetReg(kSp);
+    int displacement = SRegOffset(rl_dest.s_reg_low);
+
+    if (!IsNoOp(op, val_lo)) {
+      X86OpCode x86op = GetOpcode(op, rl_dest, false, val_lo);
+      LIR *lir = NewLIR3(x86op, rBase, displacement + LOWORD_OFFSET, val_lo);
+      AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
+                              false /* is_load */, true /* is64bit */);
+    }
+    if (!IsNoOp(op, val_hi)) {
+      X86OpCode x86op = GetOpcode(op, rl_dest, true, val_hi);
+      LIR *lir = NewLIR3(x86op, rBase, displacement + HIWORD_OFFSET, val_hi);
+      AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
+                                false /* is_load */, true /* is64bit */);
+    }
+    return;
+  }
+
+  RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
+  DCHECK_EQ(rl_result.location, kLocPhysReg);
+  DCHECK(!IsFpReg(rl_result.low_reg));
+
+  if (!IsNoOp(op, val_lo)) {
+    X86OpCode x86op = GetOpcode(op, rl_result, false, val_lo);
+    NewLIR2(x86op, rl_result.low_reg, val_lo);
+  }
+  if (!IsNoOp(op, val_hi)) {
+    X86OpCode x86op = GetOpcode(op, rl_result, true, val_hi);
+    NewLIR2(x86op, rl_result.high_reg, val_hi);
+  }
+  StoreValueWide(rl_dest, rl_result);
+}
+
+void X86Mir2Lir::GenLongLongImm(RegLocation rl_dest, RegLocation rl_src1,
+                                RegLocation rl_src2, Instruction::Code op) {
+  DCHECK(rl_src2.is_const);
+  int64_t val = mir_graph_->ConstantValueWide(rl_src2);
+  int32_t val_lo = Low32Bits(val);
+  int32_t val_hi = High32Bits(val);
+  rl_dest = UpdateLocWide(rl_dest);
+  rl_src1 = UpdateLocWide(rl_src1);
+
+  // Can we do this directly into the destination registers?
+  if (rl_dest.location == kLocPhysReg && rl_src1.location == kLocPhysReg &&
+      rl_dest.low_reg == rl_src1.low_reg && rl_dest.high_reg == rl_src1.high_reg &&
+      !IsFpReg(rl_dest.low_reg)) {
+    if (!IsNoOp(op, val_lo)) {
+      X86OpCode x86op = GetOpcode(op, rl_dest, false, val_lo);
+      NewLIR2(x86op, rl_dest.low_reg, val_lo);
+    }
+    if (!IsNoOp(op, val_hi)) {
+      X86OpCode x86op = GetOpcode(op, rl_dest, true, val_hi);
+      NewLIR2(x86op, rl_dest.high_reg, val_hi);
+    }
+    return;
+  }
+
+  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
+  DCHECK_EQ(rl_src1.location, kLocPhysReg);
+
+  // We need the values to be in a temporary
+  RegLocation rl_result = ForceTempWide(rl_src1);
+  if (!IsNoOp(op, val_lo)) {
+    X86OpCode x86op = GetOpcode(op, rl_result, false, val_lo);
+    NewLIR2(x86op, rl_result.low_reg, val_lo);
+  }
+  if (!IsNoOp(op, val_hi)) {
+    X86OpCode x86op = GetOpcode(op, rl_result, true, val_hi);
+    NewLIR2(x86op, rl_result.high_reg, val_hi);
+  }
+
+  StoreFinalValueWide(rl_dest, rl_result);
 }
 
 }  // namespace art
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index 5c993c5..27cec8d2 100644
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -679,31 +679,24 @@
   }
 
   DCHECK_NE(loc.s_reg_low, INVALID_SREG);
-  if (IsFpReg(loc.low_reg) && reg_class != kCoreReg) {
-    // Need a wide vector register.
-    low_reg = AllocTypedTemp(true, reg_class);
-    loc.low_reg = low_reg;
-    loc.high_reg = low_reg;  // Play nice with existing code.
-    loc.vec_len = kVectorLength8;
-    if (update) {
-      loc.location = kLocPhysReg;
-      MarkLive(loc.low_reg, loc.s_reg_low);
-    }
+  DCHECK_NE(GetSRegHi(loc.s_reg_low), INVALID_SREG);
+
+  new_regs = AllocTypedTempPair(loc.fp, reg_class);
+  loc.low_reg = new_regs & 0xff;
+  loc.high_reg = (new_regs >> 8) & 0xff;
+
+  if (loc.low_reg == loc.high_reg) {
     DCHECK(IsFpReg(loc.low_reg));
+    loc.vec_len = kVectorLength8;
   } else {
-    DCHECK_NE(GetSRegHi(loc.s_reg_low), INVALID_SREG);
-
-    new_regs = AllocTypedTempPair(loc.fp, reg_class);
-    loc.low_reg = new_regs & 0xff;
-    loc.high_reg = (new_regs >> 8) & 0xff;
-
     MarkPair(loc.low_reg, loc.high_reg);
-    if (update) {
-      loc.location = kLocPhysReg;
-      MarkLive(loc.low_reg, loc.s_reg_low);
+  }
+  if (update) {
+    loc.location = kLocPhysReg;
+    MarkLive(loc.low_reg, loc.s_reg_low);
+    if (loc.low_reg != loc.high_reg) {
       MarkLive(loc.high_reg, GetSRegHi(loc.s_reg_low));
     }
-    DCHECK(!IsFpReg(loc.low_reg) || ((loc.low_reg & 0x1) == 0));
   }
   return loc;
 }
@@ -796,4 +789,23 @@
   // Just use the standard code to do the generation.
   Mir2Lir::GenConstWide(rl_dest, value);
 }
+
+// TODO: Merge with existing RegLocation dumper in vreg_analysis.cc
+void X86Mir2Lir::DumpRegLocation(RegLocation loc) {
+  LOG(INFO)  << "location: " << loc.location << ','
+             << (loc.wide ? " w" : "  ")
+             << (loc.defined ? " D" : "  ")
+             << (loc.is_const ? " c" : "  ")
+             << (loc.fp ? " F" : "  ")
+             << (loc.core ? " C" : "  ")
+             << (loc.ref ? " r" : "  ")
+             << (loc.high_word ? " h" : "  ")
+             << (loc.home ? " H" : "  ")
+             << " vec_len: " << loc.vec_len
+             << ", low: " << static_cast<int>(loc.low_reg)
+             << ", high: " << static_cast<int>(loc.high_reg)
+             << ", s_reg: " << loc.s_reg_low
+             << ", orig: " << loc.orig_sreg;
+}
+
 }  // namespace art