ART: Ensure FP GET/PUT doesn't use Core register
Routine void org.jbox2d.collision.AABB.combine(
org.jbox2d.collision.AABB, org.jbox2d.collision.AABB)
in the icyrocks application generated code for an iget of a FP field
that was loaded into a Core register, and then into an XMM register.
This was caused by the Dex code:
0x0030: iget v2, v2, F org.jbox2d.common.Vec2.x // field@3747
I traced this to GenIGet using a reg_class of kAnyReg, and EvalLoc
finding that v2 was available in EDX. Since kAnyReg is compatible
with EDX, The iget loaded the FP value into EDX, and then into an XMM
register for subsequent use.
Fix: Pass kSingle/kDouble into IGET/IPUT/SGET/SPUT/AGET/APUT when the
source/destination is FP. Change X86Mir2Lir::RegClassForFieldLoadStore
to return kFPReg for those cases. This causes EvalLoc to return an XMM
register, and the load is done right to the XMM register.
Change-Id: Ifbcc9e4d80bc6da8ea4ebf7e6cebaaf672a2766e
Signed-off-by: Mark Mendell <mark.p.mendell@intel.com>
diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc
index bd88091..3618941 100644
--- a/compiler/dex/quick/mir_to_lir.cc
+++ b/compiler/dex/quick/mir_to_lir.cc
@@ -608,13 +608,13 @@
}
case Instruction::AGET_WIDE:
- GenArrayGet(opt_flags, k64, rl_src[0], rl_src[1], rl_dest, 3);
+ GenArrayGet(opt_flags, rl_dest.fp ? kDouble : k64, rl_src[0], rl_src[1], rl_dest, 3);
break;
case Instruction::AGET_OBJECT:
GenArrayGet(opt_flags, kReference, rl_src[0], rl_src[1], rl_dest, 2);
break;
case Instruction::AGET:
- GenArrayGet(opt_flags, k32, rl_src[0], rl_src[1], rl_dest, 2);
+ GenArrayGet(opt_flags, rl_dest.fp ? kSingle : k32, rl_src[0], rl_src[1], rl_dest, 2);
break;
case Instruction::AGET_BOOLEAN:
GenArrayGet(opt_flags, kUnsignedByte, rl_src[0], rl_src[1], rl_dest, 0);
@@ -629,10 +629,10 @@
GenArrayGet(opt_flags, kSignedHalf, rl_src[0], rl_src[1], rl_dest, 1);
break;
case Instruction::APUT_WIDE:
- GenArrayPut(opt_flags, k64, rl_src[1], rl_src[2], rl_src[0], 3, false);
+ GenArrayPut(opt_flags, rl_src[0].fp ? kDouble : k64, rl_src[1], rl_src[2], rl_src[0], 3, false);
break;
case Instruction::APUT:
- GenArrayPut(opt_flags, k32, rl_src[1], rl_src[2], rl_src[0], 2, false);
+ GenArrayPut(opt_flags, rl_src[0].fp ? kSingle : k32, rl_src[1], rl_src[2], rl_src[0], 2, false);
break;
case Instruction::APUT_OBJECT: {
bool is_null = mir_graph_->IsConstantNullRef(rl_src[0]);
@@ -666,11 +666,19 @@
case Instruction::IGET_WIDE:
// kPrimLong and kPrimDouble share the same entrypoints.
- GenIGet(mir, opt_flags, k64, Primitive::kPrimLong, rl_dest, rl_src[0]);
+ if (rl_dest.fp) {
+ GenIGet(mir, opt_flags, kDouble, Primitive::kPrimDouble, rl_dest, rl_src[0]);
+ } else {
+ GenIGet(mir, opt_flags, k64, Primitive::kPrimLong, rl_dest, rl_src[0]);
+ }
break;
case Instruction::IGET:
- GenIGet(mir, opt_flags, k32, Primitive::kPrimInt, rl_dest, rl_src[0]);
+ if (rl_dest.fp) {
+ GenIGet(mir, opt_flags, kSingle, Primitive::kPrimFloat, rl_dest, rl_src[0]);
+ } else {
+ GenIGet(mir, opt_flags, k32, Primitive::kPrimInt, rl_dest, rl_src[0]);
+ }
break;
case Instruction::IGET_CHAR:
@@ -690,7 +698,7 @@
break;
case Instruction::IPUT_WIDE:
- GenIPut(mir, opt_flags, k64, rl_src[0], rl_src[1]);
+ GenIPut(mir, opt_flags, rl_src[0].fp ? kDouble : k64, rl_src[0], rl_src[1]);
break;
case Instruction::IPUT_OBJECT:
@@ -698,7 +706,7 @@
break;
case Instruction::IPUT:
- GenIPut(mir, opt_flags, k32, rl_src[0], rl_src[1]);
+ GenIPut(mir, opt_flags, rl_src[0].fp ? kSingle : k32, rl_src[0], rl_src[1]);
break;
case Instruction::IPUT_BYTE:
@@ -719,7 +727,7 @@
break;
case Instruction::SGET:
- GenSget(mir, rl_dest, k32, Primitive::kPrimInt);
+ GenSget(mir, rl_dest, rl_dest.fp ? kSingle : k32, Primitive::kPrimInt);
break;
case Instruction::SGET_CHAR:
@@ -740,7 +748,7 @@
case Instruction::SGET_WIDE:
// kPrimLong and kPrimDouble share the same entrypoints.
- GenSget(mir, rl_dest, k64, Primitive::kPrimLong);
+ GenSget(mir, rl_dest, rl_dest.fp ? kDouble : k64, Primitive::kPrimDouble);
break;
case Instruction::SPUT_OBJECT:
@@ -748,7 +756,7 @@
break;
case Instruction::SPUT:
- GenSput(mir, rl_src[0], k32);
+ GenSput(mir, rl_src[0], rl_src[0].fp ? kSingle : k32);
break;
case Instruction::SPUT_BYTE:
@@ -766,7 +774,7 @@
case Instruction::SPUT_WIDE:
- GenSput(mir, rl_src[0], k64);
+ GenSput(mir, rl_src[0], rl_src[0].fp ? kDouble : k64);
break;
case Instruction::INVOKE_STATIC_RANGE:
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index 85ab92b..a79f299 100755
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -2319,7 +2319,7 @@
*/
void X86Mir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array,
RegLocation rl_index, RegLocation rl_dest, int scale) {
- RegisterClass reg_class = RegClassBySize(size);
+ RegisterClass reg_class = RegClassForFieldLoadStore(size, false);
int len_offset = mirror::Array::LengthOffset().Int32Value();
RegLocation rl_result;
rl_array = LoadValue(rl_array, kRefReg);
@@ -2368,7 +2368,7 @@
*/
void X86Mir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array,
RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark) {
- RegisterClass reg_class = RegClassBySize(size);
+ RegisterClass reg_class = RegClassForFieldLoadStore(size, false);
int len_offset = mirror::Array::LengthOffset().Int32Value();
int data_offset;
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index 5f6cdda..5d14869 100755
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -797,6 +797,12 @@
}
RegisterClass X86Mir2Lir::RegClassForFieldLoadStore(OpSize size, bool is_volatile) {
+ // Prefer XMM registers. Fixes a problem with iget/iput to a FP when cached temporary
+ // with same VR is a Core register.
+ if (size == kSingle || size == kDouble) {
+ return kFPReg;
+ }
+
// X86_64 can handle any size.
if (cu_->target64) {
return RegClassBySize(size);