Support to generate inline long to FP bytecodes for x86

long-to-float and long-to-double are now generated inline instead of calling
a helper routine. The conversion is done by using x87.

Change-Id: I196e526afec1be212898baceca8527549c3655b6
Signed-off-by: Razvan A Lupusoru <razvan.a.lupusoru@intel.com>
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
index 321c6a7..6481589 100644
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ b/compiler/dex/quick/x86/assemble_x86.cc
@@ -295,7 +295,11 @@
   { kX86PsrlqRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x73, 0, 2, 0, 1 }, "PsrlqRI", "!0r,!1d" },
   { kX86PsllqRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x73, 0, 6, 0, 1 }, "PsllqRI", "!0r,!1d" },
   { kX86SqrtsdRR, kRegReg, IS_BINARY_OP | REG_DEF0_USE1, { 0xF2, 0, 0x0F, 0x51, 0, 0, 0, 0 }, "SqrtsdRR", "!0r,!1r" },
-  { kX86FstpdM, kMem, IS_STORE | IS_BINARY_OP | REG_USE0, { 0x0, 0, 0xDD, 0x00, 0, 3, 0, 0 }, "FstpdM", "[!0r,!1d]" },
+
+  { kX86Fild32M, kMem, IS_LOAD | IS_UNARY_OP | REG_USE0, { 0x0, 0, 0xDB, 0x00, 0, 0, 0, 0 }, "Fild32M", "[!0r,!1d]" },
+  { kX86Fild64M, kMem, IS_LOAD | IS_UNARY_OP | REG_USE0, { 0x0, 0, 0xDF, 0x00, 0, 5, 0, 0 }, "Fild64M", "[!0r,!1d]" },
+  { kX86Fstp32M, kMem, IS_STORE | IS_UNARY_OP | REG_USE0, { 0x0, 0, 0xD9, 0x00, 0, 3, 0, 0 }, "FstpdM", "[!0r,!1d]" },
+  { kX86Fstp64M, kMem, IS_STORE | IS_UNARY_OP | REG_USE0, { 0x0, 0, 0xDD, 0x00, 0, 3, 0, 0 }, "FstpdM", "[!0r,!1d]" },
 
   EXT_0F_ENCODING_MAP(Movups,    0x0, 0x10, REG_DEF0),
   { kX86MovupsMR, kMemReg,      IS_STORE | IS_TERTIARY_OP | REG_USE02,  { 0x0, 0, 0x0F, 0x11, 0, 0, 0, 0 }, "MovupsMR", "[!0r+!1d],!2r" },
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index 22e36d5..70263d8 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -452,6 +452,7 @@
      */
     LIR* OpCmpMemImmBranch(ConditionCode cond, int temp_reg, int base_reg,
                            int offset, int check_value, LIR* target);
+
     /*
      * Can this operation be using core registers without temporaries?
      * @param rl_lhs Left hand operand.
@@ -460,6 +461,14 @@
      */
     bool IsOperationSafeWithoutTemps(RegLocation rl_lhs, RegLocation rl_rhs);
 
+    /**
+     * @brief Generates inline code for conversion of long to FP by using x87/
+     * @param rl_dest The destination of the FP.
+     * @param rl_src The source of the long.
+     * @param is_double 'true' if dealing with double, 'false' for float.
+     */
+    void GenLongToFP(RegLocation rl_dest, RegLocation rl_src, bool is_double);
+
     /*
      * @brief Perform MIR analysis before compiling method.
      * @note Invokes Mir2LiR::Materialize after analysis.
diff --git a/compiler/dex/quick/x86/fp_x86.cc b/compiler/dex/quick/x86/fp_x86.cc
index 006fe76..4c2ecc0 100644
--- a/compiler/dex/quick/x86/fp_x86.cc
+++ b/compiler/dex/quick/x86/fp_x86.cc
@@ -130,6 +130,70 @@
   StoreValueWide(rl_dest, rl_result);
 }
 
+void X86Mir2Lir::GenLongToFP(RegLocation rl_dest, RegLocation rl_src, bool is_double) {
+  // Compute offsets to the source and destination VRs on stack
+  int src_v_reg_offset = SRegOffset(rl_src.s_reg_low);
+  int dest_v_reg_offset = SRegOffset(rl_dest.s_reg_low);
+
+  // Update the in-register state of source.
+  rl_src = UpdateLocWide(rl_src);
+
+  // If the source is in physical register, then put it in its location on stack.
+  if (rl_src.location == kLocPhysReg) {
+    RegisterInfo* lo_info = GetRegInfo(rl_src.low_reg);
+
+    if (lo_info != nullptr && lo_info->is_temp) {
+      // Calling FlushSpecificReg because it will only write back VR if it is dirty.
+      FlushSpecificReg(lo_info);
+    } else {
+      // It must have been register promoted if it is not a temp but is still in physical
+      // register. Since we need it to be in memory to convert, we place it there now.
+      StoreBaseDispWide(TargetReg(kSp), src_v_reg_offset, rl_src.low_reg, rl_src.high_reg);
+    }
+  }
+
+  // Push the source virtual register onto the x87 stack.
+  LIR *fild64 = NewLIR2NoDest(kX86Fild64M, TargetReg(kSp), src_v_reg_offset + LOWORD_OFFSET);
+  AnnotateDalvikRegAccess(fild64, (src_v_reg_offset + LOWORD_OFFSET) >> 2,
+      true /* is_load */, true /* is64bit */);
+
+  // Now pop off x87 stack and store it in the destination VR's stack location.
+  int opcode = is_double ? kX86Fstp64M : kX86Fstp32M;
+  int displacement = is_double ? dest_v_reg_offset + LOWORD_OFFSET : dest_v_reg_offset;
+  LIR *fstp = NewLIR2NoDest(opcode, TargetReg(kSp), displacement);
+  AnnotateDalvikRegAccess(fstp, displacement >> 2, false /* is_load */, is_double);
+
+  /*
+   * The result is in a physical register if it was in a temp or was register
+   * promoted. For that reason it is enough to check if it is in physical
+   * register. If it is, then we must do all of the bookkeeping necessary to
+   * invalidate temp (if needed) and load in promoted register (if needed).
+   * If the result's location is in memory, then we do not need to do anything
+   * more since the fstp has already placed the correct value in memory.
+   */
+  RegLocation rl_result = is_double ? UpdateLocWide(rl_dest) : UpdateLoc(rl_dest);
+  if (rl_result.location == kLocPhysReg) {
+    /*
+     * We already know that the result is in a physical register but do not know if it is the
+     * right class. So we call EvalLoc(Wide) first which will ensure that it will get moved to the
+     * correct register class.
+     */
+    if (is_double) {
+      rl_result = EvalLocWide(rl_dest, kFPReg, true);
+
+      LoadBaseDispWide(TargetReg(kSp), dest_v_reg_offset, rl_result.low_reg, rl_result.high_reg, INVALID_SREG);
+
+      StoreValueWide(rl_dest, rl_result);
+    } else {
+      rl_result = EvalLoc(rl_dest, kFPReg, true);
+
+      LoadWordDisp(TargetReg(kSp), dest_v_reg_offset, rl_result.low_reg);
+
+      StoreValue(rl_dest, rl_result);
+    }
+  }
+}
+
 void X86Mir2Lir::GenConversion(Instruction::Code opcode, RegLocation rl_dest,
                                RegLocation rl_src) {
   RegisterClass rcSrc = kFPReg;
@@ -198,11 +262,10 @@
       return;
     }
     case Instruction::LONG_TO_DOUBLE:
-      GenConversionCall(QUICK_ENTRYPOINT_OFFSET(pL2d), rl_dest, rl_src);
+      GenLongToFP(rl_dest, rl_src, true /* is_double */);
       return;
     case Instruction::LONG_TO_FLOAT:
-      // TODO: inline by using memory as a 64-bit source. Be careful about promoted registers.
-      GenConversionCall(QUICK_ENTRYPOINT_OFFSET(pL2f), rl_dest, rl_src);
+      GenLongToFP(rl_dest, rl_src, false /* is_double */);
       return;
     case Instruction::FLOAT_TO_LONG:
       GenConversionCall(QUICK_ENTRYPOINT_OFFSET(pF2l), rl_dest, rl_src);
diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h
index c49f627..e75da0d 100644
--- a/compiler/dex/quick/x86/x86_lir.h
+++ b/compiler/dex/quick/x86/x86_lir.h
@@ -356,7 +356,10 @@
   kX86PsrlqRI,                  // right shift of floating point registers
   kX86PsllqRI,                  // left shift of floating point registers
   kX86SqrtsdRR,                 // sqrt of floating point register
-  kX86FstpdM,                   // Store and pop top x87 fp stack
+  kX86Fild32M,                  // push 32-bit integer on x87 stack
+  kX86Fild64M,                  // push 64-bit integer on x87 stack
+  kX86Fstp32M,                  // pop top x87 fp stack and do 32-bit store
+  kX86Fstp64M,                  // pop top x87 fp stack and do 64-bit store
   Binary0fOpCode(kX86Movups),   // load unaligned packed single FP values from xmm2/m128 to xmm1
   kX86MovupsMR, kX86MovupsAR,   // store unaligned packed single FP values from xmm1 to m128
   Binary0fOpCode(kX86Movaps),   // load aligned packed single FP values from xmm2/m128 to xmm1