Fixes to enable TrackLiveTemps optimization on x86.
- Created new kRegRegStore instruction class for Movdrx, where the
source is first, and the destination is second.
- Reverted neg_float and neg_double implementation to prevent confusion
of register types when optimizations are performed.
- Swapped order of loads for wide values to prevent base pointer from
being clobbered when the base pointer equals the low destination reg.
- Implemented opRegCopyWide for general purpose reg source to floating
point reg destination and vice versa.
- Added more opcode coverage to x86 disassembler.
Change-Id: I4e58eec91742cc51333003fa5a678ba5b23eb575
diff --git a/src/compiler/codegen/x86/Assemble.cc b/src/compiler/codegen/x86/Assemble.cc
index 7bd5c52..63e4cc3 100644
--- a/src/compiler/codegen/x86/Assemble.cc
+++ b/src/compiler/codegen/x86/Assemble.cc
@@ -280,10 +280,13 @@
EXT_0F_ENCODING_MAP(Divsd, 0xF2, 0x5E, REG_DEF0),
EXT_0F_ENCODING_MAP(Divss, 0xF3, 0x5E, REG_DEF0),
+ { kX86PsrlqRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x73, 0, 2, 0, 1 }, "PsrlqRI", "!0r,!1d" },
{ kX86PsllqRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x73, 0, 6, 0, 1 }, "PsllqRI", "!0r,!1d" },
EXT_0F_ENCODING_MAP(Movdxr, 0x66, 0x6E, REG_DEF0),
- EXT_0F_ENCODING_MAP(Movdrx, 0x66, 0x7E, REG_DEF0),
+ { kX86MovdrxRR, kRegRegStore, IS_BINARY_OP | REG_DEF0 | REG_USE01, { 0x66, 0, 0x0F, 0x7E, 0, 0, 0, 0 }, "MovdrxRR", "!0r,!1r" },
+ { kX86MovdrxMR, kMemReg, IS_STORE | IS_TERTIARY_OP | REG_USE02, { 0x66, 0, 0x0F, 0x7E, 0, 0, 0, 0 }, "MovdrxMR", "[!0r+!1d],!2r" },
+ { kX86MovdrxAR, kArrayReg, IS_STORE | IS_QUIN_OP | REG_USE014, { 0x66, 0, 0x0F, 0x7E, 0, 0, 0, 0 }, "MovdrxAR", "[!0r+!1r<<!2d+!3d],!4r" },
{ kX86Set8R, kRegCond, IS_BINARY_OP | REG_DEF0 | USES_CCODES, { 0, 0, 0x0F, 0x90, 0, 0, 0, 0 }, "Set8R", "!1c !0r" },
{ kX86Set8M, kMemCond, IS_STORE | IS_TERTIARY_OP | REG_USE0 | USES_CCODES, { 0, 0, 0x0F, 0x90, 0, 0, 0, 0 }, "Set8M", "!2c [!0r+!1d]" },
@@ -375,6 +378,8 @@
return computeSize(entry, lir->operands[0], false);
case kRegReg:
return computeSize(entry, 0, false);
+ case kRegRegStore:
+ return computeSize(entry, 0, false);
case kRegMem: { // lir operands - 0: reg, 1: base, 2: disp
int base = lir->operands[1];
return computeSize(entry, lir->operands[2], false) + (base == rSP ? 1 : 0);
@@ -800,6 +805,9 @@
DCHECK_EQ(0, entry->skeleton.extra_opcode1);
DCHECK_EQ(0, entry->skeleton.extra_opcode2);
}
+ if (FPREG(reg)) {
+ reg = reg & FP_REG_MASK;
+ }
uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | reg;
cUnit->codeBuffer.push_back(modrm);
}
@@ -1307,6 +1315,9 @@
case kRegReg: // lir operands - 0: reg1, 1: reg2
emitRegReg(cUnit, entry, lir->operands[0], lir->operands[1]);
break;
+ case kRegRegStore: // lir operands - 0: reg2, 1: reg1
+ emitRegReg(cUnit, entry, lir->operands[1], lir->operands[0]);
+ break;
case kRegRegImm:
emitRegRegImm(cUnit, entry, lir->operands[0], lir->operands[1], lir->operands[2]);
break;
diff --git a/src/compiler/codegen/x86/FP/X86FP.cc b/src/compiler/codegen/x86/FP/X86FP.cc
index c00b5fc..f2488d0 100644
--- a/src/compiler/codegen/x86/FP/X86FP.cc
+++ b/src/compiler/codegen/x86/FP/X86FP.cc
@@ -21,6 +21,7 @@
RegLocation rlSrc2) {
X86OpCode op = kX86Nop;
RegLocation rlResult;
+ int tempReg;
/*
* Don't attempt to optimize register usage since these opcodes call out to
@@ -44,12 +45,13 @@
op = kX86MulssRR;
break;
case Instruction::NEG_FLOAT:
- // TODO: Make this nicer. Subtracting the source from 0 doesn't work in
- // the 0 case, and using FCHS is difficult with register promotion. This
- // code treats the value as a CoreReg to make it easy to manipulate.
- rlSrc1 = loadValue(cUnit, rlSrc1, kCoreReg);
- rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
- opRegRegImm(cUnit, kOpAdd, rlResult.lowReg, rlSrc1.lowReg, 0x80000000);
+ // TODO: Make this an XorpsRM where the memory location holds 0x80000000
+ rlSrc1 = loadValue(cUnit, rlSrc1, kFPReg);
+ rlResult = oatEvalLoc(cUnit, rlDest, kFPReg, true);
+ tempReg = oatAllocTemp(cUnit);
+ loadConstant(cUnit, tempReg, 0x80000000);
+ newLIR2(cUnit, kX86MovdxrRR, rlResult.lowReg, tempReg);
+ newLIR2(cUnit, kX86XorpsRR, rlResult.lowReg, rlSrc1.lowReg);
storeValue(cUnit, rlDest, rlResult);
return false;
case Instruction::REM_FLOAT_2ADDR:
@@ -81,6 +83,7 @@
RegLocation rlSrc2) {
X86OpCode op = kX86Nop;
RegLocation rlResult;
+ int tempReg;
switch (opcode) {
case Instruction::ADD_DOUBLE_2ADDR:
@@ -100,13 +103,14 @@
op = kX86MulsdRR;
break;
case Instruction::NEG_DOUBLE:
- // TODO: Make this nicer. Subtracting the source from 0 doesn't work in
- // the 0 case, and using FCHS is difficult with register promotion. This
- // code treats the value as a CoreReg to make it easy to manipulate.
- rlSrc1 = loadValueWide(cUnit, rlSrc1, kCoreReg);
- rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
- opRegRegImm(cUnit, kOpAdd, rlResult.highReg, rlSrc1.highReg, 0x80000000);
- opRegCopy(cUnit, rlResult.lowReg, rlSrc1.lowReg);
+ // TODO: Make this an XorpdRM where the memory location holds 0x8000000000000000
+ rlSrc1 = loadValueWide(cUnit, rlSrc1, kFPReg);
+ rlResult = oatEvalLoc(cUnit, rlDest, kFPReg, true);
+ tempReg = oatAllocTemp(cUnit);
+ loadConstant(cUnit, tempReg, 0x80000000);
+ newLIR2(cUnit, kX86MovdxrRR, rlResult.lowReg, tempReg);
+ newLIR2(cUnit, kX86PsllqRI, rlResult.lowReg, 32);
+ newLIR2(cUnit, kX86XorpsRR, rlResult.lowReg, rlSrc1.lowReg);
storeValueWide(cUnit, rlDest, rlResult);
return false;
case Instruction::REM_DOUBLE_2ADDR:
diff --git a/src/compiler/codegen/x86/X86/Factory.cc b/src/compiler/codegen/x86/X86/Factory.cc
index d60d9de..9721038 100644
--- a/src/compiler/codegen/x86/X86/Factory.cc
+++ b/src/compiler/codegen/x86/X86/Factory.cc
@@ -475,9 +475,15 @@
if (!pair) {
load = newLIR3(cUnit, opcode, rDest, rBase, displacement + LOWORD_OFFSET);
} else {
- load = newLIR3(cUnit, opcode, rDest, rBase, displacement + LOWORD_OFFSET);
- load2 = newLIR3(cUnit, opcode, rDestHi, rBase,
- displacement + HIWORD_OFFSET);
+ if (rBase == rDest) {
+ load2 = newLIR3(cUnit, opcode, rDestHi, rBase,
+ displacement + HIWORD_OFFSET);
+ load = newLIR3(cUnit, opcode, rDest, rBase, displacement + LOWORD_OFFSET);
+ } else {
+ load = newLIR3(cUnit, opcode, rDest, rBase, displacement + LOWORD_OFFSET);
+ load2 = newLIR3(cUnit, opcode, rDestHi, rBase,
+ displacement + HIWORD_OFFSET);
+ }
}
if (rBase == rSP) {
annotateDalvikRegAccess(load, (displacement + (pair ? LOWORD_OFFSET : 0))
@@ -492,10 +498,17 @@
load = newLIR5(cUnit, opcode, rDest, rBase, rIndex, scale,
displacement + LOWORD_OFFSET);
} else {
- load = newLIR5(cUnit, opcode, rDest, rBase, rIndex, scale,
- displacement + LOWORD_OFFSET);
- load2 = newLIR5(cUnit, opcode, rDestHi, rBase, rIndex, scale,
- displacement + HIWORD_OFFSET);
+ if (rBase == rDest) {
+ load2 = newLIR5(cUnit, opcode, rDestHi, rBase, rIndex, scale,
+ displacement + HIWORD_OFFSET);
+ load = newLIR5(cUnit, opcode, rDest, rBase, rIndex, scale,
+ displacement + LOWORD_OFFSET);
+ } else {
+ load = newLIR5(cUnit, opcode, rDest, rBase, rIndex, scale,
+ displacement + LOWORD_OFFSET);
+ load2 = newLIR5(cUnit, opcode, rDestHi, rBase, rIndex, scale,
+ displacement + HIWORD_OFFSET);
+ }
}
}
diff --git a/src/compiler/codegen/x86/X86/Gen.cc b/src/compiler/codegen/x86/X86/Gen.cc
index b0b6ba8..adad05b 100644
--- a/src/compiler/codegen/x86/X86/Gen.cc
+++ b/src/compiler/codegen/x86/X86/Gen.cc
@@ -352,13 +352,18 @@
if (srcFP) {
opRegCopy(cUnit, S2D(destLo, destHi), S2D(srcLo, srcHi));
} else {
- UNIMPLEMENTED(WARNING);
- newLIR0(cUnit, kX86Bkpt);
+ // TODO: Prevent this from happening in the code. The result is often
+ // unused or could have been loaded more easily from memory.
+ newLIR2(cUnit, kX86MovdxrRR, destLo, srcLo);
+ newLIR2(cUnit, kX86MovdxrRR, destHi, srcHi);
+ newLIR2(cUnit, kX86PsllqRI, destHi, 32);
+ newLIR2(cUnit, kX86OrpsRR, destLo, destHi);
}
} else {
if (srcFP) {
- UNIMPLEMENTED(WARNING);
- newLIR0(cUnit, kX86Bkpt);
+ newLIR2(cUnit, kX86MovdrxRR, destLo, srcLo);
+ newLIR2(cUnit, kX86PsrlqRI, srcLo, 32);
+ newLIR2(cUnit, kX86MovdrxRR, destHi, srcLo);
} else {
// Handle overlap
if (srcHi == destLo) {
diff --git a/src/compiler/codegen/x86/X86LIR.h b/src/compiler/codegen/x86/X86LIR.h
index 4c44118..c229844 100644
--- a/src/compiler/codegen/x86/X86LIR.h
+++ b/src/compiler/codegen/x86/X86LIR.h
@@ -110,7 +110,7 @@
/* Offset to distingish FP regs */
#define FP_REG_OFFSET 32
/* Offset to distinguish DP FP regs */
-#define FP_DOUBLE (FP_REG_OFFSET + 32)
+#define FP_DOUBLE (FP_REG_OFFSET + 16)
/* Offset to distingish the extra regs */
#define EXTRA_REG_OFFSET (FP_DOUBLE + 16)
/* Reg types */
@@ -433,9 +433,10 @@
Binary0fOpCode(kX86Subss), // float subtract
Binary0fOpCode(kX86Divsd), // double divide
Binary0fOpCode(kX86Divss), // float divide
- kX86PsllqRI, // shift of floating point registers
+ kX86PsrlqRI, // right shift of floating point registers
+ kX86PsllqRI, // left shift of floating point registers
Binary0fOpCode(kX86Movdxr), // move into xmm from gpr
- Binary0fOpCode(kX86Movdrx), // move into reg from xmm
+ kX86MovdrxRR, kX86MovdrxMR, kX86MovdrxAR,// move into reg from xmm
kX86Set8R, kX86Set8M, kX86Set8A,// set byte depending on condition operand
kX86Mfence, // memory barrier
Binary0fOpCode(kX86Imul16), // 16bit multiply
@@ -470,6 +471,7 @@
kReg, kMem, kArray, // R, M and A instruction kinds.
kMemReg, kArrayReg, kThreadReg, // MR, AR and TR instruction kinds.
kRegReg, kRegMem, kRegArray, kRegThread, // RR, RM, RA and RT instruction kinds.
+ kRegRegStore, // RR following the store modrm reg-reg encoding rather than the load.
kRegImm, kMemImm, kArrayImm, kThreadImm, // RI, MI, AI and TI instruction kinds.
kRegRegImm, kRegMemImm, kRegArrayImm, // RRI, RMI and RAI instruction kinds.
kMovRegImm, // Shorter form move RI.