ART: Implement X86 hard float (Quick/JNI/Baseline)

Use XMM0-XMM3 as parameter registers for float/double on X86.  X86_64
already uses XMM0-XMM7 for parameters.

Change the 'hidden' argument register from XMM0 to XMM7 to avoid a
conflict.

Add support for FPR save/restore in runtime/arch/x86.

Minimal support for Optimizing baseline compiler.

Bump the version in runtime/oat.h because this is an ABI change.

Change-Id: Ia6fe150e8488b9e582b0178c0dda65fc81d5a8ba
Signed-off-by: Mark Mendell <mark.p.mendell@intel.com>
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index 3f266fe..1f0dba5 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -1537,8 +1537,12 @@
 
   uint32_t reg_offset = 1;
   CHECK_ALIGNED(frame_size, kStackAlignment);
+  int gpr_count = 0;
   for (int i = spill_regs.size() - 1; i >= 0; --i) {
-    pushl(spill_regs.at(i).AsX86().AsCpuRegister());
+    x86::X86ManagedRegister spill = spill_regs.at(i).AsX86();
+    DCHECK(spill.IsCpuRegister());
+    pushl(spill.AsCpuRegister());
+    gpr_count++;
 
     // DW_CFA_advance_loc
     DW_CFA_advance_loc(&cfi_info_, buffer_.Size() - cfi_pc_);
@@ -1552,7 +1556,7 @@
   }
 
   // return address then method on stack
-  int32_t adjust = frame_size - (spill_regs.size() * kFramePointerSize) -
+  int32_t adjust = frame_size - (gpr_count * kFramePointerSize) -
                    sizeof(StackReference<mirror::ArtMethod>) /*method*/ -
                    kFramePointerSize /*return address*/;
   addl(ESP, Immediate(-adjust));
@@ -1572,9 +1576,18 @@
   DW_CFA_def_cfa_offset(&cfi_info_, cfi_cfa_offset_);
 
   for (size_t i = 0; i < entry_spills.size(); ++i) {
-    movl(Address(ESP, frame_size + sizeof(StackReference<mirror::ArtMethod>) +
-                 (i * kFramePointerSize)),
-         entry_spills.at(i).AsX86().AsCpuRegister());
+    ManagedRegisterSpill spill = entry_spills.at(i);
+    if (spill.AsX86().IsCpuRegister()) {
+      movl(Address(ESP, frame_size + spill.getSpillOffset()), spill.AsX86().AsCpuRegister());
+    } else {
+      DCHECK(spill.AsX86().IsXmmRegister());
+      if (spill.getSize() == 8) {
+        movsd(Address(ESP, frame_size + spill.getSpillOffset()), spill.AsX86().AsXmmRegister());
+      } else {
+        CHECK_EQ(spill.getSize(), 4);
+        movss(Address(ESP, frame_size + spill.getSpillOffset()), spill.AsX86().AsXmmRegister());
+      }
+    }
   }
 }
 
@@ -1584,7 +1597,9 @@
   addl(ESP, Immediate(frame_size - (spill_regs.size() * kFramePointerSize) -
                       sizeof(StackReference<mirror::ArtMethod>)));
   for (size_t i = 0; i < spill_regs.size(); ++i) {
-    popl(spill_regs.at(i).AsX86().AsCpuRegister());
+    x86::X86ManagedRegister spill = spill_regs.at(i).AsX86();
+    DCHECK(spill.IsCpuRegister());
+    popl(spill.AsCpuRegister());
   }
   ret();
 }