Revert "Revert "Better support for x86 XMM registers""
This reverts commit 8ff67e3338952c70ccf3b609559bf8cc0f379cfd.
Fix applied to loc.fp usage.
Change-Id: I1eb3005392544fcf30c595923ed25bcee2dc4859
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index e6621f3..816f2d0 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -173,6 +173,12 @@
bool InexpensiveConstantLong(int64_t value);
bool InexpensiveConstantDouble(int64_t value);
+ RegLocation UpdateLocWide(RegLocation loc);
+ RegLocation EvalLocWide(RegLocation loc, int reg_class, bool update);
+ RegLocation EvalLoc(RegLocation loc, int reg_class, bool update);
+ int AllocTempDouble();
+ void ResetDefLocWide(RegLocation rl);
+
private:
void EmitPrefix(const X86EncodingMap* entry);
void EmitOpcode(const X86EncodingMap* entry);
@@ -222,6 +228,8 @@
void EmitUnimplemented(const X86EncodingMap* entry, LIR* lir);
void GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1,
int64_t val, ConditionCode ccode);
+ void OpVectorRegCopyWide(uint8_t fp_reg, uint8_t low_reg, uint8_t high_reg);
+ void GenConstWide(RegLocation rl_dest, int64_t value);
};
} // namespace art
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index 11ccd4b..01479a9 100644
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -155,9 +155,11 @@
// TODO: Prevent this from happening in the code. The result is often
// unused or could have been loaded more easily from memory.
NewLIR2(kX86MovdxrRR, dest_lo, src_lo);
+ dest_hi = AllocTempDouble();
NewLIR2(kX86MovdxrRR, dest_hi, src_hi);
NewLIR2(kX86PsllqRI, dest_hi, 32);
NewLIR2(kX86OrpsRR, dest_lo, dest_hi);
+ FreeTemp(dest_hi);
}
} else {
if (src_fp) {
@@ -525,7 +527,7 @@
// Compute (r1:r0) = (r1:r0) + (r2:r3)
OpRegReg(kOpAdd, r0, r2); // r0 = r0 + r2
OpRegReg(kOpAdc, r1, r3); // r1 = r1 + r3 + CF
- RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, r0, r1,
+ RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r0, r1,
INVALID_SREG, INVALID_SREG};
StoreValueWide(rl_dest, rl_result);
}
@@ -541,7 +543,7 @@
// Compute (r1:r0) = (r1:r0) + (r2:r3)
OpRegReg(kOpSub, r0, r2); // r0 = r0 - r2
OpRegReg(kOpSbc, r1, r3); // r1 = r1 - r3 - CF
- RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, r0, r1,
+ RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r0, r1,
INVALID_SREG, INVALID_SREG};
StoreValueWide(rl_dest, rl_result);
}
@@ -557,7 +559,7 @@
// Compute (r1:r0) = (r1:r0) & (r2:r3)
OpRegReg(kOpAnd, r0, r2); // r0 = r0 & r2
OpRegReg(kOpAnd, r1, r3); // r1 = r1 & r3
- RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, r0, r1,
+ RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r0, r1,
INVALID_SREG, INVALID_SREG};
StoreValueWide(rl_dest, rl_result);
}
@@ -573,7 +575,7 @@
// Compute (r1:r0) = (r1:r0) | (r2:r3)
OpRegReg(kOpOr, r0, r2); // r0 = r0 | r2
OpRegReg(kOpOr, r1, r3); // r1 = r1 | r3
- RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, r0, r1,
+ RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r0, r1,
INVALID_SREG, INVALID_SREG};
StoreValueWide(rl_dest, rl_result);
}
@@ -589,7 +591,7 @@
// Compute (r1:r0) = (r1:r0) ^ (r2:r3)
OpRegReg(kOpXor, r0, r2); // r0 = r0 ^ r2
OpRegReg(kOpXor, r1, r3); // r1 = r1 ^ r3
- RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, r0, r1,
+ RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r0, r1,
INVALID_SREG, INVALID_SREG};
StoreValueWide(rl_dest, rl_result);
}
@@ -602,7 +604,7 @@
OpRegReg(kOpNeg, r0, r0); // r0 = -r0
OpRegImm(kOpAdc, r1, 0); // r1 = r1 + CF
OpRegReg(kOpNeg, r1, r1); // r1 = -r1
- RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, r0, r1,
+ RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r0, r1,
INVALID_SREG, INVALID_SREG};
StoreValueWide(rl_dest, rl_result);
}
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index b281063..5c993c5 100644
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -416,7 +416,7 @@
if (((reg_class == kAnyReg) && fp_hint) || (reg_class == kFPReg)) {
low_reg = AllocTempDouble();
- high_reg = low_reg + 1;
+ high_reg = low_reg; // only one allocated!
res = (low_reg & 0xff) | ((high_reg & 0xff) << 8);
return res;
}
@@ -546,4 +546,254 @@
return X86Mir2Lir::EncodingMap[opcode].fmt;
}
+/*
+ * Return an updated location record with current in-register status.
+ * If the value lives in live temps, reflect that fact. No code
+ * is generated. If the live value is part of an older pair,
+ * clobber both low and high.
+ */
+// TODO: Reunify with common code after 'pair mess' has been fixed
+RegLocation X86Mir2Lir::UpdateLocWide(RegLocation loc) {
+ DCHECK(loc.wide);
+ DCHECK(CheckCorePoolSanity());
+ if (loc.location != kLocPhysReg) {
+ DCHECK((loc.location == kLocDalvikFrame) ||
+ (loc.location == kLocCompilerTemp));
+ // Are the dalvik regs already live in physical registers?
+ RegisterInfo* info_lo = AllocLive(loc.s_reg_low, kAnyReg);
+
+ // Handle FP registers specially on x86.
+ if (info_lo && IsFpReg(info_lo->reg)) {
+ bool match = true;
+
+ // We can't match a FP register with a pair of Core registers.
+ match = match && (info_lo->pair == 0);
+
+ if (match) {
+ // We can reuse;update the register usage info.
+ loc.low_reg = info_lo->reg;
+ loc.high_reg = info_lo->reg; // Play nice with existing code.
+ loc.location = kLocPhysReg;
+ loc.vec_len = kVectorLength8;
+ DCHECK(IsFpReg(loc.low_reg));
+ return loc;
+ }
+ // We can't easily reuse; clobber and free any overlaps.
+ if (info_lo) {
+ Clobber(info_lo->reg);
+ FreeTemp(info_lo->reg);
+ if (info_lo->pair)
+ Clobber(info_lo->partner);
+ }
+ } else {
+ RegisterInfo* info_hi = AllocLive(GetSRegHi(loc.s_reg_low), kAnyReg);
+ bool match = true;
+ match = match && (info_lo != NULL);
+ match = match && (info_hi != NULL);
+ // Are they both core or both FP?
+ match = match && (IsFpReg(info_lo->reg) == IsFpReg(info_hi->reg));
+ // If a pair of floating point singles, are they properly aligned?
+ if (match && IsFpReg(info_lo->reg)) {
+ match &= ((info_lo->reg & 0x1) == 0);
+ match &= ((info_hi->reg - info_lo->reg) == 1);
+ }
+ // If previously used as a pair, it is the same pair?
+ if (match && (info_lo->pair || info_hi->pair)) {
+ match = (info_lo->pair == info_hi->pair);
+ match &= ((info_lo->reg == info_hi->partner) &&
+ (info_hi->reg == info_lo->partner));
+ }
+ if (match) {
+ // Can reuse - update the register usage info
+ loc.low_reg = info_lo->reg;
+ loc.high_reg = info_hi->reg;
+ loc.location = kLocPhysReg;
+ MarkPair(loc.low_reg, loc.high_reg);
+ DCHECK(!IsFpReg(loc.low_reg) || ((loc.low_reg & 0x1) == 0));
+ return loc;
+ }
+ // Can't easily reuse - clobber and free any overlaps
+ if (info_lo) {
+ Clobber(info_lo->reg);
+ FreeTemp(info_lo->reg);
+ if (info_lo->pair)
+ Clobber(info_lo->partner);
+ }
+ if (info_hi) {
+ Clobber(info_hi->reg);
+ FreeTemp(info_hi->reg);
+ if (info_hi->pair)
+ Clobber(info_hi->partner);
+ }
+ }
+ }
+ return loc;
+}
+
+// TODO: Reunify with common code after 'pair mess' has been fixed
+RegLocation X86Mir2Lir::EvalLocWide(RegLocation loc, int reg_class, bool update) {
+ DCHECK(loc.wide);
+ int32_t new_regs;
+ int32_t low_reg;
+ int32_t high_reg;
+
+ loc = UpdateLocWide(loc);
+
+ /* If it is already in a register, we can assume proper form. Is it the right reg class? */
+ if (loc.location == kLocPhysReg) {
+ DCHECK_EQ(IsFpReg(loc.low_reg), loc.IsVectorScalar());
+ if (!RegClassMatches(reg_class, loc.low_reg)) {
+ /* It is the wrong register class. Reallocate and copy. */
+ if (!IsFpReg(loc.low_reg)) {
+ // We want this in a FP reg, and it is in core registers.
+ DCHECK(reg_class != kCoreReg);
+ // Allocate this into any FP reg, and mark it with the right size.
+ low_reg = AllocTypedTemp(true, reg_class);
+ OpVectorRegCopyWide(low_reg, loc.low_reg, loc.high_reg);
+ CopyRegInfo(low_reg, loc.low_reg);
+ Clobber(loc.low_reg);
+ Clobber(loc.high_reg);
+ loc.low_reg = low_reg;
+ loc.high_reg = low_reg; // Play nice with existing code.
+ loc.vec_len = kVectorLength8;
+ } else {
+ // The value is in a FP register, and we want it in a pair of core registers.
+ DCHECK_EQ(reg_class, kCoreReg);
+ DCHECK_EQ(loc.low_reg, loc.high_reg);
+ new_regs = AllocTypedTempPair(false, kCoreReg); // Force to core registers.
+ low_reg = new_regs & 0xff;
+ high_reg = (new_regs >> 8) & 0xff;
+ DCHECK_NE(low_reg, high_reg);
+ OpRegCopyWide(low_reg, high_reg, loc.low_reg, loc.high_reg);
+ CopyRegInfo(low_reg, loc.low_reg);
+ CopyRegInfo(high_reg, loc.high_reg);
+ Clobber(loc.low_reg);
+ Clobber(loc.high_reg);
+ loc.low_reg = low_reg;
+ loc.high_reg = high_reg;
+ MarkPair(loc.low_reg, loc.high_reg);
+ DCHECK(!IsFpReg(loc.low_reg) || ((loc.low_reg & 0x1) == 0));
+ }
+ }
+ return loc;
+ }
+
+ DCHECK_NE(loc.s_reg_low, INVALID_SREG);
+ if (IsFpReg(loc.low_reg) && reg_class != kCoreReg) {
+ // Need a wide vector register.
+ low_reg = AllocTypedTemp(true, reg_class);
+ loc.low_reg = low_reg;
+ loc.high_reg = low_reg; // Play nice with existing code.
+ loc.vec_len = kVectorLength8;
+ if (update) {
+ loc.location = kLocPhysReg;
+ MarkLive(loc.low_reg, loc.s_reg_low);
+ }
+ DCHECK(IsFpReg(loc.low_reg));
+ } else {
+ DCHECK_NE(GetSRegHi(loc.s_reg_low), INVALID_SREG);
+
+ new_regs = AllocTypedTempPair(loc.fp, reg_class);
+ loc.low_reg = new_regs & 0xff;
+ loc.high_reg = (new_regs >> 8) & 0xff;
+
+ MarkPair(loc.low_reg, loc.high_reg);
+ if (update) {
+ loc.location = kLocPhysReg;
+ MarkLive(loc.low_reg, loc.s_reg_low);
+ MarkLive(loc.high_reg, GetSRegHi(loc.s_reg_low));
+ }
+ DCHECK(!IsFpReg(loc.low_reg) || ((loc.low_reg & 0x1) == 0));
+ }
+ return loc;
+}
+
+// TODO: Reunify with common code after 'pair mess' has been fixed
+RegLocation X86Mir2Lir::EvalLoc(RegLocation loc, int reg_class, bool update) {
+ int new_reg;
+
+ if (loc.wide)
+ return EvalLocWide(loc, reg_class, update);
+
+ loc = UpdateLoc(loc);
+
+ if (loc.location == kLocPhysReg) {
+ if (!RegClassMatches(reg_class, loc.low_reg)) {
+ /* Wrong register class. Realloc, copy and transfer ownership. */
+ new_reg = AllocTypedTemp(loc.fp, reg_class);
+ OpRegCopy(new_reg, loc.low_reg);
+ CopyRegInfo(new_reg, loc.low_reg);
+ Clobber(loc.low_reg);
+ loc.low_reg = new_reg;
+ if (IsFpReg(loc.low_reg) && reg_class != kCoreReg)
+ loc.vec_len = kVectorLength4;
+ }
+ return loc;
+ }
+
+ DCHECK_NE(loc.s_reg_low, INVALID_SREG);
+
+ new_reg = AllocTypedTemp(loc.fp, reg_class);
+ loc.low_reg = new_reg;
+ if (IsFpReg(loc.low_reg) && reg_class != kCoreReg)
+ loc.vec_len = kVectorLength4;
+
+ if (update) {
+ loc.location = kLocPhysReg;
+ MarkLive(loc.low_reg, loc.s_reg_low);
+ }
+ return loc;
+}
+
+int X86Mir2Lir::AllocTempDouble() {
+ // We really don't need a pair of registers.
+ return AllocTempFloat();
+}
+
+// TODO: Reunify with common code after 'pair mess' has been fixed
+void X86Mir2Lir::ResetDefLocWide(RegLocation rl) {
+ DCHECK(rl.wide);
+ RegisterInfo* p_low = IsTemp(rl.low_reg);
+ if (IsFpReg(rl.low_reg)) {
+ // We are using only the low register.
+ if (p_low && !(cu_->disable_opt & (1 << kSuppressLoads))) {
+ NullifyRange(p_low->def_start, p_low->def_end, p_low->s_reg, rl.s_reg_low);
+ }
+ ResetDef(rl.low_reg);
+ } else {
+ RegisterInfo* p_high = IsTemp(rl.high_reg);
+ if (p_low && !(cu_->disable_opt & (1 << kSuppressLoads))) {
+ DCHECK(p_low->pair);
+ NullifyRange(p_low->def_start, p_low->def_end, p_low->s_reg, rl.s_reg_low);
+ }
+ if (p_high && !(cu_->disable_opt & (1 << kSuppressLoads))) {
+ DCHECK(p_high->pair);
+ }
+ ResetDef(rl.low_reg);
+ ResetDef(rl.high_reg);
+ }
+}
+
+void X86Mir2Lir::GenConstWide(RegLocation rl_dest, int64_t value) {
+ // Can we do this directly to memory?
+ rl_dest = UpdateLocWide(rl_dest);
+ if ((rl_dest.location == kLocDalvikFrame) ||
+ (rl_dest.location == kLocCompilerTemp)) {
+ int32_t val_lo = Low32Bits(value);
+ int32_t val_hi = High32Bits(value);
+ int rBase = TargetReg(kSp);
+ int displacement = SRegOffset(rl_dest.s_reg_low);
+
+ LIR * store = NewLIR3(kX86Mov32MI, rBase, displacement + LOWORD_OFFSET, val_lo);
+ AnnotateDalvikRegAccess(store, (displacement + LOWORD_OFFSET) >> 2,
+ false /* is_load */, true /* is64bit */);
+ store = NewLIR3(kX86Mov32MI, rBase, displacement + HIWORD_OFFSET, val_hi);
+ AnnotateDalvikRegAccess(store, (displacement + HIWORD_OFFSET) >> 2,
+ false /* is_load */, true /* is64bit */);
+ return;
+ }
+
+ // Just use the standard code to do the generation.
+ Mir2Lir::GenConstWide(rl_dest, value);
+}
} // namespace art
diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc
index f683aff..91c39fa 100644
--- a/compiler/dex/quick/x86/utility_x86.cc
+++ b/compiler/dex/quick/x86/utility_x86.cc
@@ -334,6 +334,7 @@
LIR *res;
if (X86_FPREG(r_dest_lo)) {
DCHECK(X86_FPREG(r_dest_hi)); // ignore r_dest_hi
+ DCHECK_EQ(r_dest_lo, r_dest_hi);
if (value == 0) {
return NewLIR2(kX86XorpsRR, r_dest_lo, r_dest_lo);
} else {
@@ -343,9 +344,11 @@
res = LoadConstantNoClobber(r_dest_lo, val_lo);
}
if (val_hi != 0) {
+ r_dest_hi = AllocTempDouble();
LoadConstantNoClobber(r_dest_hi, val_hi);
NewLIR2(kX86PsllqRI, r_dest_hi, 32);
NewLIR2(kX86OrpsRR, r_dest_lo, r_dest_hi);
+ FreeTemp(r_dest_hi);
}
}
} else {
@@ -370,12 +373,6 @@
is64bit = true;
if (X86_FPREG(r_dest)) {
opcode = is_array ? kX86MovsdRA : kX86MovsdRM;
- if (X86_SINGLEREG(r_dest)) {
- DCHECK(X86_FPREG(r_dest_hi));
- DCHECK_EQ(r_dest, (r_dest_hi - 1));
- r_dest = S2d(r_dest, r_dest_hi);
- }
- r_dest_hi = r_dest + 1;
} else {
pair = true;
opcode = is_array ? kX86Mov32RA : kX86Mov32RM;
@@ -488,12 +485,6 @@
is64bit = true;
if (X86_FPREG(r_src)) {
opcode = is_array ? kX86MovsdAR : kX86MovsdMR;
- if (X86_SINGLEREG(r_src)) {
- DCHECK(X86_FPREG(r_src_hi));
- DCHECK_EQ(r_src, (r_src_hi - 1));
- r_src = S2d(r_src, r_src_hi);
- }
- r_src_hi = r_src + 1;
} else {
pair = true;
opcode = is_array ? kX86Mov32AR : kX86Mov32MR;
@@ -573,4 +564,17 @@
r_src_lo, r_src_hi, kLong, INVALID_SREG);
}
+/*
+ * Copy a long value in Core registers to an XMM register
+ *
+ */
+void X86Mir2Lir::OpVectorRegCopyWide(uint8_t fp_reg, uint8_t low_reg, uint8_t high_reg) {
+ NewLIR2(kX86MovdxrRR, fp_reg, low_reg);
+ int tmp_reg = AllocTempDouble();
+ NewLIR2(kX86MovdxrRR, tmp_reg, high_reg);
+ NewLIR2(kX86PsllqRI, tmp_reg, 32);
+ NewLIR2(kX86OrpsRR, fp_reg, tmp_reg);
+ FreeTemp(tmp_reg);
+}
+
} // namespace art
diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h
index f38a16d..1488f5d 100644
--- a/compiler/dex/quick/x86/x86_lir.h
+++ b/compiler/dex/quick/x86/x86_lir.h
@@ -128,11 +128,11 @@
#define X86_FP_REG_MASK 0xF
// RegisterLocation templates return values (rAX, rAX/rDX or XMM0).
-// location, wide, defined, const, fp, core, ref, high_word, home, low_reg, high_reg, s_reg_low
-#define X86_LOC_C_RETURN {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, rAX, INVALID_REG, INVALID_SREG, INVALID_SREG}
-#define X86_LOC_C_RETURN_WIDE {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, rAX, rDX, INVALID_SREG, INVALID_SREG}
-#define X86_LOC_C_RETURN_FLOAT {kLocPhysReg, 0, 0, 0, 1, 0, 0, 0, 1, fr0, INVALID_REG, INVALID_SREG, INVALID_SREG}
-#define X86_LOC_C_RETURN_DOUBLE {kLocPhysReg, 1, 0, 0, 1, 0, 0, 0, 1, fr0, fr1, INVALID_SREG, INVALID_SREG}
+// location, wide, defined, const, fp, core, ref, high_word, home, vec_len, low_reg, high_reg, s_reg_low
+#define X86_LOC_C_RETURN {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, rAX, INVALID_REG, INVALID_SREG, INVALID_SREG}
+#define X86_LOC_C_RETURN_WIDE {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, rAX, rDX, INVALID_SREG, INVALID_SREG}
+#define X86_LOC_C_RETURN_FLOAT {kLocPhysReg, 0, 0, 0, 1, 0, 0, 0, 1, kVectorLength4, fr0, INVALID_REG, INVALID_SREG, INVALID_SREG}
+#define X86_LOC_C_RETURN_DOUBLE {kLocPhysReg, 1, 0, 0, 1, 0, 0, 0, 1, kVectorLength8, fr0, fr0, INVALID_SREG, INVALID_SREG}
enum X86ResourceEncodingPos {
kX86GPReg0 = 0,