ART: Implement hard float for X86
Use XMM0-XMM3 as parameter registers for float/double on X86. X86_64
already uses XMM0-XMM7 for parameters.
Change the 'hidden' argument register from XMM0 to XMM7 to avoid a
conflict.
This change was requested to simplify the Optimizing compiler
implementation.
Change-Id: I89ba8ade99b9a8a5b1ad1ee5f5cbfd33d656bfaa
Signed-off-by: Mark Mendell <mark.p.mendell@intel.com>
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index c7d83dd..b7fa2d2 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -39,22 +39,15 @@
}
protected:
Mir2Lir* m2l_;
- private:
size_t cur_core_reg_;
size_t cur_fp_reg_;
};
- class InToRegStorageX86Mapper : public InToRegStorageMapper {
+ class InToRegStorageX86Mapper : public InToRegStorageX86_64Mapper {
public:
- explicit InToRegStorageX86Mapper(Mir2Lir* m2l) : m2l_(m2l), cur_core_reg_(0) {}
+ explicit InToRegStorageX86Mapper(Mir2Lir* m2l)
+ : InToRegStorageX86_64Mapper(m2l) { }
virtual RegStorage GetNextReg(ShortyArg arg);
- virtual void Reset() OVERRIDE {
- cur_core_reg_ = 0;
- }
- protected:
- Mir2Lir* m2l_;
- private:
- size_t cur_core_reg_;
};
InToRegStorageX86_64Mapper in_to_reg_storage_x86_64_mapper_;
@@ -118,9 +111,12 @@
if (cu_->target64) {
return As64BitReg(TargetReg32(symbolic_reg));
} else {
+ if (symbolic_reg >= kFArg0 && symbolic_reg <= kFArg3) {
+ // We want an XMM, not a pair.
+ return As64BitReg(TargetReg32(symbolic_reg));
+ }
// x86: construct a pair.
DCHECK((kArg0 <= symbolic_reg && symbolic_reg < kArg3) ||
- (kFArg0 <= symbolic_reg && symbolic_reg < kFArg3) ||
(kRet0 == symbolic_reg));
return RegStorage::MakeRegPair(TargetReg32(symbolic_reg),
TargetReg32(static_cast<SpecialTargetRegister>(symbolic_reg + 1)));
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index 142acbc..bfa24cc 100755
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -177,10 +177,10 @@
RegStorage::InvalidReg(), // kArg5
RegStorage::InvalidReg(), // kArg6
RegStorage::InvalidReg(), // kArg7
- rs_rAX, // kFArg0
- rs_rCX, // kFArg1
- rs_rDX, // kFArg2
- rs_rBX, // kFArg3
+ rs_fr0, // kFArg0
+ rs_fr1, // kFArg1
+ rs_fr2, // kFArg2
+ rs_fr3, // kFArg3
RegStorage::InvalidReg(), // kFArg4
RegStorage::InvalidReg(), // kFArg5
RegStorage::InvalidReg(), // kFArg6
@@ -197,7 +197,7 @@
rs_rDX, // kRet1
rs_rAX, // kInvokeTgt
rs_rAX, // kHiddenArg - used to hold the method index before copying to fr0.
- rs_fr0, // kHiddenFpArg
+ rs_fr7, // kHiddenFpArg
rs_rCX, // kCount
};
@@ -542,13 +542,13 @@
LockTemp(TargetReg32(kArg1));
LockTemp(TargetReg32(kArg2));
LockTemp(TargetReg32(kArg3));
+ LockTemp(TargetReg32(kFArg0));
+ LockTemp(TargetReg32(kFArg1));
+ LockTemp(TargetReg32(kFArg2));
+ LockTemp(TargetReg32(kFArg3));
if (cu_->target64) {
LockTemp(TargetReg32(kArg4));
LockTemp(TargetReg32(kArg5));
- LockTemp(TargetReg32(kFArg0));
- LockTemp(TargetReg32(kFArg1));
- LockTemp(TargetReg32(kFArg2));
- LockTemp(TargetReg32(kFArg3));
LockTemp(TargetReg32(kFArg4));
LockTemp(TargetReg32(kFArg5));
LockTemp(TargetReg32(kFArg6));
@@ -563,13 +563,13 @@
FreeTemp(TargetReg32(kArg2));
FreeTemp(TargetReg32(kArg3));
FreeTemp(TargetReg32(kHiddenArg));
+ FreeTemp(TargetReg32(kFArg0));
+ FreeTemp(TargetReg32(kFArg1));
+ FreeTemp(TargetReg32(kFArg2));
+ FreeTemp(TargetReg32(kFArg3));
if (cu_->target64) {
FreeTemp(TargetReg32(kArg4));
FreeTemp(TargetReg32(kArg5));
- FreeTemp(TargetReg32(kFArg0));
- FreeTemp(TargetReg32(kFArg1));
- FreeTemp(TargetReg32(kFArg2));
- FreeTemp(TargetReg32(kFArg3));
FreeTemp(TargetReg32(kFArg4));
FreeTemp(TargetReg32(kFArg5));
FreeTemp(TargetReg32(kFArg6));
@@ -2457,14 +2457,23 @@
RegStorage X86Mir2Lir::InToRegStorageX86Mapper::GetNextReg(ShortyArg arg) {
const SpecialTargetRegister coreArgMappingToPhysicalReg[] = {kArg1, kArg2, kArg3};
const size_t coreArgMappingToPhysicalRegSize = arraysize(coreArgMappingToPhysicalReg);
+ const SpecialTargetRegister fpArgMappingToPhysicalReg[] = {kFArg0, kFArg1, kFArg2, kFArg3};
+ const size_t fpArgMappingToPhysicalRegSize = arraysize(fpArgMappingToPhysicalReg);
RegStorage result = RegStorage::InvalidReg();
- if (cur_core_reg_ < coreArgMappingToPhysicalRegSize) {
- result = m2l_->TargetReg(coreArgMappingToPhysicalReg[cur_core_reg_++],
- arg.IsRef() ? kRef : kNotWide);
- if (arg.IsWide() && cur_core_reg_ < coreArgMappingToPhysicalRegSize) {
- result = RegStorage::MakeRegPair(
- result, m2l_->TargetReg(coreArgMappingToPhysicalReg[cur_core_reg_++], kNotWide));
+ if (arg.IsFP()) {
+ if (cur_fp_reg_ < fpArgMappingToPhysicalRegSize) {
+ return m2l_->TargetReg(fpArgMappingToPhysicalReg[cur_fp_reg_++],
+ arg.IsWide() ? kWide : kNotWide);
+ }
+ } else {
+ if (cur_core_reg_ < coreArgMappingToPhysicalRegSize) {
+ result = m2l_->TargetReg(coreArgMappingToPhysicalReg[cur_core_reg_++],
+ arg.IsRef() ? kRef : kNotWide);
+ if (arg.IsWide() && cur_core_reg_ < coreArgMappingToPhysicalRegSize) {
+ result = RegStorage::MakeRegPair(
+ result, m2l_->TargetReg(coreArgMappingToPhysicalReg[cur_core_reg_++], kNotWide));
+ }
}
}
return result;
diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h
index 3e0a852..b48c4ad 100644
--- a/compiler/dex/quick/x86/x86_lir.h
+++ b/compiler/dex/quick/x86/x86_lir.h
@@ -56,15 +56,15 @@
* x86-64/x32 gs: holds it.
*
* For floating point we don't support CPUs without SSE2 support (ie newer than PIII):
- * Native: x86 | x86-64 / x32 | ART x86 | ART x86-64
- * XMM0: caller | caller, arg1 | caller, float return value | caller, arg1, float return value
- * XMM1: caller | caller, arg2 | caller, scratch | caller, arg2, scratch
- * XMM2: caller | caller, arg3 | caller, scratch | caller, arg3, scratch
- * XMM3: caller | caller, arg4 | caller, scratch | caller, arg4, scratch
- * XMM4: caller | caller, arg5 | caller, scratch | caller, arg5, scratch
- * XMM5: caller | caller, arg6 | caller, scratch | caller, arg6, scratch
- * XMM6: caller | caller, arg7 | caller, scratch | caller, arg7, scratch
- * XMM7: caller | caller, arg8 | caller, scratch | caller, arg8, scratch
+ * Native: x86 | x86-64 / x32 | ART x86 | ART x86-64
+ * XMM0: caller | caller, arg1 | caller, arg1, float return value | caller, arg1, float return value
+ * XMM1: caller | caller, arg2 | caller, arg2, scratch | caller, arg2, scratch
+ * XMM2: caller | caller, arg3 | caller, arg3, scratch | caller, arg3, scratch
+ * XMM3: caller | caller, arg4 | caller, arg4, scratch | caller, arg4, scratch
+ * XMM4: caller | caller, arg5 | caller, scratch | caller, arg5, scratch
+ * XMM5: caller | caller, arg6 | caller, scratch | caller, arg6, scratch
+ * XMM6: caller | caller, arg7 | caller, scratch | caller, arg7, scratch
+ * XMM7: caller | caller, arg8 | caller, scratch | caller, arg8, scratch
* --- x86-64/x32 registers
* XMM8 .. 11: caller save available as scratch registers for ART.
* XMM12 .. 15: callee save available as promoted registers for ART.