More x86 fixes for floating point operations.
- Fixed encoding for cvtss2sd and cvtsd2ss (they were swapped).
- Implemented neg-float and neg-double in assembly.
- Fixed float-to-int and double-to-int to return 0 if given NaN.
- Created helpers for rem-float and rem-double.
Change-Id: I456fcc1252a63dbb96c8280562e601f4efa3a5df
diff --git a/src/compiler/codegen/x86/FP/X86FP.cc b/src/compiler/codegen/x86/FP/X86FP.cc
index 24cd7d3..e6b47d2 100644
--- a/src/compiler/codegen/x86/FP/X86FP.cc
+++ b/src/compiler/codegen/x86/FP/X86FP.cc
@@ -44,8 +44,12 @@
op = kX86MulssRR;
break;
case Instruction::NEG_FLOAT:
- UNIMPLEMENTED(WARNING) << "inline fneg"; // pxor xmm, [0x80000000]
- // fall-through
+ rlSrc1 = loadValue(cUnit, rlSrc1, kFPReg);
+ rlResult = oatEvalLoc(cUnit, rlDest, kFPReg, true);
+ newLIR2(cUnit, kX86XorpsRR, rlResult.lowReg, rlResult.lowReg);
+ newLIR2(cUnit, kX86SubssRR, rlResult.lowReg, rlSrc1.lowReg);
+ storeValue(cUnit, rlDest, rlResult);
+ return false;
case Instruction::REM_FLOAT_2ADDR:
case Instruction::REM_FLOAT: {
return genArithOpFloatPortable(cUnit, mir, rlDest, rlSrc1, rlSrc2);
@@ -91,6 +95,12 @@
op = kX86MulsdRR;
break;
case Instruction::NEG_DOUBLE:
+ rlSrc1 = loadValueWide(cUnit, rlSrc1, kFPReg);
+ rlResult = oatEvalLoc(cUnit, rlDest, kFPReg, true);
+ newLIR2(cUnit, kX86XorpsRR, rlResult.lowReg, rlResult.lowReg);
+ newLIR2(cUnit, kX86SubsdRR, rlResult.lowReg, rlSrc1.lowReg);
+ storeValueWide(cUnit, rlDest, rlResult);
+ return false;
case Instruction::REM_DOUBLE_2ADDR:
case Instruction::REM_DOUBLE: {
return genArithOpDoublePortable(cUnit, mir, rlDest, rlSrc1, rlSrc2);
@@ -124,9 +134,7 @@
RegLocation rlDest;
X86OpCode op = kX86Nop;
int srcReg;
- int tempReg;
RegLocation rlResult;
- LIR* branch = NULL;
switch (opcode) {
case Instruction::INT_TO_FLOAT:
longSrc = false;
@@ -152,40 +160,52 @@
rcSrc = kCoreReg;
op = kX86Cvtsi2sdRR;
break;
- case Instruction::FLOAT_TO_INT:
+ case Instruction::FLOAT_TO_INT: {
rlSrc = oatGetSrc(cUnit, mir, 0);
rlSrc = loadValue(cUnit, rlSrc, kFPReg);
srcReg = rlSrc.lowReg;
rlDest = oatGetDest(cUnit, mir, 0);
oatClobberSReg(cUnit, rlDest.sRegLow);
rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
- tempReg = oatAllocTempFloat(cUnit);
+ int tempReg = oatAllocTempFloat(cUnit);
loadConstant(cUnit, rlResult.lowReg, 0x7fffffff);
newLIR2(cUnit, kX86Cvtsi2ssRR, tempReg, rlResult.lowReg);
newLIR2(cUnit, kX86ComissRR, srcReg, tempReg);
- branch = newLIR2(cUnit, kX86Jcc8, 0, kX86CondA);
- newLIR2(cUnit, kX86Cvtss2siRR, rlResult.lowReg, srcReg);
- branch->target = newLIR0(cUnit, kPseudoTargetLabel);
+ LIR* branchPosOverflow = newLIR2(cUnit, kX86Jcc8, 0, kX86CondA);
+ LIR* branchNaN = newLIR2(cUnit, kX86Jcc8, 0, kX86CondP);
+ newLIR2(cUnit, kX86Cvttss2siRR, rlResult.lowReg, srcReg);
+ LIR* branchNormal = newLIR1(cUnit, kX86Jmp8, 0);
+ branchNaN->target = newLIR0(cUnit, kPseudoTargetLabel);
+ newLIR2(cUnit, kX86Xor32RR, rlResult.lowReg, rlResult.lowReg);
+ branchPosOverflow->target = newLIR0(cUnit, kPseudoTargetLabel);
+ branchNormal->target = newLIR0(cUnit, kPseudoTargetLabel);
storeValue(cUnit, rlDest, rlResult);
return false;
- case Instruction::DOUBLE_TO_INT:
+ }
+ case Instruction::DOUBLE_TO_INT: {
rlSrc = oatGetSrcWide(cUnit, mir, 0, 1);
rlSrc = loadValueWide(cUnit, rlSrc, kFPReg);
srcReg = rlSrc.lowReg;
rlDest = oatGetDest(cUnit, mir, 0);
oatClobberSReg(cUnit, rlDest.sRegLow);
rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
- tempReg = oatAllocTempDouble(cUnit);
+ int tempReg = oatAllocTempDouble(cUnit);
loadConstant(cUnit, rlResult.lowReg, 0x7fffffff);
newLIR2(cUnit, kX86Cvtsi2sdRR, tempReg, rlResult.lowReg);
newLIR2(cUnit, kX86ComisdRR, srcReg, tempReg);
- branch = newLIR2(cUnit, kX86Jcc8, 0, kX86CondA);
- newLIR2(cUnit, kX86Cvtsd2siRR, rlResult.lowReg, srcReg);
- branch->target = newLIR0(cUnit, kPseudoTargetLabel);
+ LIR* branchPosOverflow = newLIR2(cUnit, kX86Jcc8, 0, kX86CondA);
+ LIR* branchNaN = newLIR2(cUnit, kX86Jcc8, 0, kX86CondP);
+ newLIR2(cUnit, kX86Cvttsd2siRR, rlResult.lowReg, srcReg);
+ LIR* branchNormal = newLIR1(cUnit, kX86Jmp8, 0);
+ branchNaN->target = newLIR0(cUnit, kPseudoTargetLabel);
+ newLIR2(cUnit, kX86Xor32RR, rlResult.lowReg, rlResult.lowReg);
+ branchPosOverflow->target = newLIR0(cUnit, kPseudoTargetLabel);
+ branchNormal->target = newLIR0(cUnit, kPseudoTargetLabel);
storeValue(cUnit, rlDest, rlResult);
return false;
+ }
case Instruction::LONG_TO_DOUBLE:
case Instruction::LONG_TO_FLOAT:
// These can be implemented inline by using memory as a 64-bit source.