Numerous fixes to enable PromoteRegs, though it's still broken.
- Fixed ThrowNullPointerFromCode launchpad to load the array length
directly into the necessary arg reg without clobbering the array
pointer, since that value may be live afterwards.
- genArrayPut use a temporary reg for bytes if the source reg is >= 4,
since x86 can't express this.
- Fixed the order that core regs are spilled and unspilled.
- Correctly emit instructions when base == rBP and disp == 0.
- Added checks to the compiler to ensure that byte opcodes aren't used
on registers that can't be byte accessed.
- Fixed generation of a number of ops which use byte opcodes, including
floating point comparison, int-to-byte, and and-int/lit16.
- Added rBP, rSI, and rDI to spill registers for the x86 jni compiler.
- Various fixes and additions to the x86 disassembler.
Change-Id: I365fe7dec5cc64d181248fd58e90789f100b45e7
diff --git a/src/compiler/codegen/x86/ArchFactory.cc b/src/compiler/codegen/x86/ArchFactory.cc
index 1620044..001a93d 100644
--- a/src/compiler/codegen/x86/ArchFactory.cc
+++ b/src/compiler/codegen/x86/ArchFactory.cc
@@ -128,11 +128,11 @@
}
// Spill mask not including fake return address register
uint32_t mask = cUnit->coreSpillMask & ~(1 << rRET);
- int offset = cUnit->frameSize - 4;
+ int offset = cUnit->frameSize - (4 * cUnit->numCoreSpills);
for (int reg = 0; mask; mask >>= 1, reg++) {
if (mask & 0x1) {
- offset -= 4;
storeWordDisp(cUnit, rSP, offset, reg);
+ offset += 4;
}
}
}
@@ -143,11 +143,11 @@
}
// Spill mask not including fake return address register
uint32_t mask = cUnit->coreSpillMask & ~(1 << rRET);
- int offset = cUnit->frameSize - 4;
+ int offset = cUnit->frameSize - (4 * cUnit->numCoreSpills);
for (int reg = 0; mask; mask >>= 1, reg++) {
if (mask & 0x1) {
- offset -= 4;
loadWordDisp(cUnit, rSP, offset, reg);
+ offset += 4;
}
}
}
diff --git a/src/compiler/codegen/x86/Assemble.cc b/src/compiler/codegen/x86/Assemble.cc
index 63e4cc3..a245660 100644
--- a/src/compiler/codegen/x86/Assemble.cc
+++ b/src/compiler/codegen/x86/Assemble.cc
@@ -362,15 +362,19 @@
return computeSize(entry, 0, false);
case kMem: { // lir operands - 0: base, 1: disp
int base = lir->operands[0];
- // SP requires a special extra SIB byte
- return computeSize(entry, lir->operands[1], false) + (base == rSP ? 1 : 0);
+ int disp = lir->operands[1];
+ // SP requires a special extra SIB byte. BP requires explicit disp,
+ // so add a byte for disp 0 which would normally be omitted.
+ return computeSize(entry, disp, false) + ((base == rSP) || (base == rBP && disp == 0) ? 1 : 0);
}
case kArray: // lir operands - 0: base, 1: index, 2: scale, 3: disp
return computeSize(entry, lir->operands[3], true);
case kMemReg: { // lir operands - 0: base, 1: disp, 2: reg
int base = lir->operands[0];
- // SP requires a special extra SIB byte
- return computeSize(entry, lir->operands[1], false) + (base == rSP ? 1 : 0);
+ int disp = lir->operands[1];
+ // SP requires a special extra SIB byte. BP requires explicit disp,
+ // so add a byte for disp 0 which would normally be omitted.
+ return computeSize(entry, disp, false) + ((base == rSP) || (base == rBP && disp == 0) ? 1 : 0);
}
case kArrayReg: // lir operands - 0: base, 1: index, 2: scale, 3: disp, 4: reg
return computeSize(entry, lir->operands[3], true);
@@ -382,10 +386,17 @@
return computeSize(entry, 0, false);
case kRegMem: { // lir operands - 0: reg, 1: base, 2: disp
int base = lir->operands[1];
- return computeSize(entry, lir->operands[2], false) + (base == rSP ? 1 : 0);
+ int disp = lir->operands[2];
+ // SP requires a special extra SIB byte. BP requires explicit disp,
+ // so add a byte for disp 0 which would normally be omitted.
+ return computeSize(entry, disp, false) + ((base == rSP) || (base == rBP && disp == 0) ? 1 : 0);
}
- case kRegArray: // lir operands - 0: reg, 1: base, 2: index, 3: scale, 4: disp
- return computeSize(entry, lir->operands[4], true);
+ case kRegArray: { // lir operands - 0: reg, 1: base, 2: index, 3: scale, 4: disp
+ int base = lir->operands[1];
+ int disp = lir->operands[4];
+ // BP requires explicit disp, so add a byte for disp 0 which would normally be omitted.
+ return computeSize(entry, disp, true) + ((base == rBP && disp == 0) ? 1 : 0);
+ }
case kRegThread: // lir operands - 0: reg, 1: disp
return computeSize(entry, 0x12345678, false); // displacement size is always 32bit
case kRegImm: { // lir operands - 0: reg, 1: immediate
@@ -487,8 +498,9 @@
return 0;
}
-static uint8_t modrmForDisp(int disp) {
- if (disp == 0) {
+static uint8_t modrmForDisp(int base, int disp) {
+ // BP requires an explicit disp, so do not omit it in the 0 case
+ if (disp == 0 && base != rBP) {
return 0;
} else if (IS_SIMM8(disp)) {
return 1;
@@ -497,8 +509,9 @@
}
}
-static void emitDisp(CompilationUnit* cUnit, int disp) {
- if (disp == 0) {
+static void emitDisp(CompilationUnit* cUnit, int base, int disp) {
+ // BP requires an explicit disp, so do not omit it in the 0 case
+ if (disp == 0 && base != rBP) {
return;
} else if (IS_SIMM8(disp)) {
cUnit->codeBuffer.push_back(disp & 0xFF);
@@ -534,6 +547,10 @@
if (FPREG(reg)) {
reg = reg & FP_REG_MASK;
}
+ if (reg >= 4) {
+ DCHECK(strchr(entry->name, '8') == NULL) << entry->name << " " << (int) reg
+ << " in " << PrettyMethod(cUnit->method_idx, *cUnit->dex_file);
+ }
DCHECK_LT(reg, 8);
uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | reg;
cUnit->codeBuffer.push_back(modrm);
@@ -555,9 +572,9 @@
DCHECK_EQ(0, entry->skeleton.extra_opcode2);
DCHECK_LT(entry->skeleton.modrm_opcode, 8);
DCHECK_LT(base, 8);
- uint8_t modrm = (modrmForDisp(disp) << 6) | (entry->skeleton.modrm_opcode << 3) | base;
+ uint8_t modrm = (modrmForDisp(base, disp) << 6) | (entry->skeleton.modrm_opcode << 3) | base;
cUnit->codeBuffer.push_back(modrm);
- emitDisp(cUnit, disp);
+ emitDisp(cUnit, base, disp);
DCHECK_EQ(0, entry->skeleton.ax_opcode);
DCHECK_EQ(0, entry->skeleton.immediate_bytes);
}
@@ -587,15 +604,19 @@
if (FPREG(reg)) {
reg = reg & FP_REG_MASK;
}
+ if (reg >= 4) {
+ DCHECK(strchr(entry->name, '8') == NULL) << entry->name << " " << (int) reg
+ << " in " << PrettyMethod(cUnit->method_idx, *cUnit->dex_file);
+ }
DCHECK_LT(reg, 8);
DCHECK_LT(base, 8);
- uint8_t modrm = (modrmForDisp(disp) << 6) | (reg << 3) | base;
+ uint8_t modrm = (modrmForDisp(base, disp) << 6) | (reg << 3) | base;
cUnit->codeBuffer.push_back(modrm);
if (base == rSP) {
// Special SIB for SP base
cUnit->codeBuffer.push_back(0 << 6 | (rSP << 3) | rSP);
}
- emitDisp(cUnit, disp);
+ emitDisp(cUnit, base, disp);
DCHECK_EQ(0, entry->skeleton.modrm_opcode);
DCHECK_EQ(0, entry->skeleton.ax_opcode);
DCHECK_EQ(0, entry->skeleton.immediate_bytes);
@@ -633,14 +654,14 @@
reg = reg & FP_REG_MASK;
}
DCHECK_LT(reg, 8);
- uint8_t modrm = (modrmForDisp(disp) << 6) | (reg << 3) | rSP;
+ uint8_t modrm = (modrmForDisp(base, disp) << 6) | (reg << 3) | rSP;
cUnit->codeBuffer.push_back(modrm);
DCHECK_LT(scale, 4);
DCHECK_LT(index, 8);
DCHECK_LT(base, 8);
uint8_t sib = (scale << 6) | (index << 3) | base;
cUnit->codeBuffer.push_back(sib);
- emitDisp(cUnit, disp);
+ emitDisp(cUnit, base, disp);
DCHECK_EQ(0, entry->skeleton.modrm_opcode);
DCHECK_EQ(0, entry->skeleton.ax_opcode);
DCHECK_EQ(0, entry->skeleton.immediate_bytes);
@@ -674,6 +695,10 @@
if (FPREG(reg)) {
reg = reg & FP_REG_MASK;
}
+ if (reg >= 4) {
+ DCHECK(strchr(entry->name, '8') == NULL) << entry->name << " " << (int) reg
+ << " in " << PrettyMethod(cUnit->method_idx, *cUnit->dex_file);
+ }
DCHECK_LT(reg, 8);
uint8_t modrm = (0 << 6) | (reg << 3) | rBP;
cUnit->codeBuffer.push_back(modrm);
@@ -923,6 +948,10 @@
DCHECK_EQ(0, entry->skeleton.extra_opcode1);
DCHECK_EQ(0, entry->skeleton.extra_opcode2);
}
+ if (reg >= 4) {
+ DCHECK(strchr(entry->name, '8') == NULL) << entry->name << " " << (int) reg
+ << " in " << PrettyMethod(cUnit->method_idx, *cUnit->dex_file);
+ }
DCHECK_LT(reg, 8);
uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | reg;
cUnit->codeBuffer.push_back(modrm);
@@ -1037,13 +1066,13 @@
DCHECK_EQ(0, entry->skeleton.extra_opcode1);
DCHECK_EQ(0, entry->skeleton.extra_opcode2);
}
- uint8_t modrm = (modrmForDisp(disp) << 6) | (entry->skeleton.modrm_opcode << 3) | base;
+ uint8_t modrm = (modrmForDisp(base, disp) << 6) | (entry->skeleton.modrm_opcode << 3) | base;
cUnit->codeBuffer.push_back(modrm);
if (base == rSP) {
// Special SIB for SP base
cUnit->codeBuffer.push_back(0 << 6 | (rSP << 3) | rSP);
}
- emitDisp(cUnit, disp);
+ emitDisp(cUnit, base, disp);
DCHECK_EQ(0, entry->skeleton.ax_opcode);
DCHECK_EQ(0, entry->skeleton.immediate_bytes);
}
diff --git a/src/compiler/codegen/x86/FP/X86FP.cc b/src/compiler/codegen/x86/FP/X86FP.cc
index f2488d0..8cd32b4 100644
--- a/src/compiler/codegen/x86/FP/X86FP.cc
+++ b/src/compiler/codegen/x86/FP/X86FP.cc
@@ -264,7 +264,20 @@
if (unorderedGt) {
branch = newLIR2(cUnit, kX86Jcc8, 0, kX86CondPE);
}
- newLIR2(cUnit, kX86Set8R, rlResult.lowReg, kX86CondA /* above - unsigned > */);
+ // If the result reg can't be byte accessed, use a jump and move instead of a set.
+ if (rlResult.lowReg >= 4) {
+ LIR* branch2 = NULL;
+ if (unorderedGt) {
+ branch2 = newLIR2(cUnit, kX86Jcc8, 0, kX86CondA);
+ newLIR2(cUnit, kX86Mov32RI, rlResult.lowReg, 0x0);
+ } else {
+ branch2 = newLIR2(cUnit, kX86Jcc8, 0, kX86CondBe);
+ newLIR2(cUnit, kX86Mov32RI, rlResult.lowReg, 0x1);
+ }
+ branch2->target = newLIR0(cUnit, kPseudoTargetLabel);
+ } else {
+ newLIR2(cUnit, kX86Set8R, rlResult.lowReg, kX86CondA /* above - unsigned > */);
+ }
newLIR2(cUnit, kX86Sbb32RI, rlResult.lowReg, 0);
if (unorderedGt) {
branch->target = newLIR0(cUnit, kPseudoTargetLabel);
diff --git a/src/compiler/codegen/x86/X86/Factory.cc b/src/compiler/codegen/x86/X86/Factory.cc
index 9721038..f77a793 100644
--- a/src/compiler/codegen/x86/X86/Factory.cc
+++ b/src/compiler/codegen/x86/X86/Factory.cc
@@ -198,7 +198,16 @@
case kOpAnd: opcode = kX86And32RR; break;
case kOpOr: opcode = kX86Or32RR; break;
case kOpXor: opcode = kX86Xor32RR; break;
- case kOp2Byte: opcode = kX86Movsx8RR; break;
+ case kOp2Byte:
+ // Use shifts instead of a byte operand if the source can't be byte accessed.
+ if (rSrc2 >= 4) {
+ newLIR2(cUnit, kX86Mov32RR, rDestSrc1, rSrc2);
+ newLIR2(cUnit, kX86Sal32RI, rDestSrc1, 24);
+ return newLIR2(cUnit, kX86Sar32RI, rDestSrc1, 24);
+ } else {
+ opcode = kX86Movsx8RR;
+ }
+ break;
case kOp2Short: opcode = kX86Movsx16RR; break;
case kOp2Char: opcode = kX86Movzx16RR; break;
case kOpMul: opcode = kX86Imul32RR; break;
@@ -228,7 +237,7 @@
case kOp2Char: opcode = kX86Movzx16RM; break;
case kOpMul:
default:
- LOG(FATAL) << "Bad case in opRegReg " << op;
+ LOG(FATAL) << "Bad case in opRegMem " << op;
break;
}
return newLIR3(cUnit, opcode, rDest, rBase, offset);
@@ -290,7 +299,7 @@
X86OpCode opcode = IS_SIMM8(value) ? kX86Imul32RRI8 : kX86Imul32RRI;
return newLIR3(cUnit, opcode, rDest, rSrc, value);
} else if (op == kOpAnd) {
- if (value == 0xFF) {
+ if (value == 0xFF && rDest < 4) {
return newLIR2(cUnit, kX86Movzx8RR, rDest, rSrc);
} else if (value == 0xFFFF) {
return newLIR2(cUnit, kX86Movzx16RR, rDest, rSrc);
diff --git a/src/compiler/codegen/x86/X86LIR.h b/src/compiler/codegen/x86/X86LIR.h
index c229844..5bf4dd9 100644
--- a/src/compiler/codegen/x86/X86LIR.h
+++ b/src/compiler/codegen/x86/X86LIR.h
@@ -199,6 +199,9 @@
rSI = r6,
r7 = 7,
rDI = r7,
+#ifndef TARGET_REX_SUPPORT
+ rRET = 8, // fake return address register for core spill mask
+#else
r8 = 8,
r9 = 9,
r10 = 10,
@@ -208,6 +211,7 @@
r14 = 14,
r15 = 15,
rRET = 16, // fake return address register for core spill mask
+#endif
fr0 = 0 + FP_REG_OFFSET,
fr1 = 1 + FP_REG_OFFSET,
fr2 = 2 + FP_REG_OFFSET,