AArch64: Enable Inlining.
This patch fixes the remaining issues with inlining for ARM64.
Change-Id: I2d85b7c4f3fb2b667bf6029fbc271ab954378889
Signed-off-by: Serban Constantinescu <serban.constantinescu@arm.com>
Signed-off-by: Matteo Franchin <matteo.franchin@arm.com>
diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h
index b070c8a..294b9ea 100644
--- a/compiler/dex/quick/arm64/codegen_arm64.h
+++ b/compiler/dex/quick/arm64/codegen_arm64.h
@@ -177,6 +177,7 @@
RegLocation rl_src2);
void GenConversion(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src);
bool GenInlinedReverseBits(CallInfo* info, OpSize size);
+ bool GenInlinedAbsDouble(CallInfo* info) OVERRIDE;
bool GenInlinedCas(CallInfo* info, bool is_long, bool is_object);
bool GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long);
bool GenInlinedMinMaxFP(CallInfo* info, bool is_min, bool is_double);
diff --git a/compiler/dex/quick/arm64/fp_arm64.cc b/compiler/dex/quick/arm64/fp_arm64.cc
index 0f9de5b..6594c4b 100644
--- a/compiler/dex/quick/arm64/fp_arm64.cc
+++ b/compiler/dex/quick/arm64/fp_arm64.cc
@@ -323,6 +323,16 @@
StoreValueWide(rl_dest, rl_result);
}
+bool Arm64Mir2Lir::GenInlinedAbsDouble(CallInfo* info) {
+ RegLocation rl_src = info->args[0];
+ rl_src = LoadValueWide(rl_src, kCoreReg);
+ RegLocation rl_dest = InlineTargetWide(info);
+ RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+ NewLIR4(WIDE(kA64Ubfm4rrdd), rl_result.reg.GetReg(), rl_src.reg.GetReg(), 0, 62);
+ StoreValueWide(rl_dest, rl_result);
+ return true;
+}
+
bool Arm64Mir2Lir::GenInlinedSqrt(CallInfo* info) {
RegLocation rl_src = info->args[0];
RegLocation rl_dest = InlineTargetWide(info); // double place for result
diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc
index bab5499..86dddae 100644
--- a/compiler/dex/quick/arm64/int_arm64.cc
+++ b/compiler/dex/quick/arm64/int_arm64.cc
@@ -451,9 +451,8 @@
bool Arm64Mir2Lir::GenInlinedPeek(CallInfo* info, OpSize size) {
RegLocation rl_src_address = info->args[0]; // long address
- rl_src_address = NarrowRegLoc(rl_src_address); // ignore high half in info->args[1] ?
- RegLocation rl_dest = InlineTarget(info);
- RegLocation rl_address = LoadValue(rl_src_address, kCoreReg); // kRefReg
+ RegLocation rl_dest = (size == k64) ? InlineTargetWide(info) : InlineTarget(info);
+ RegLocation rl_address = LoadValueWide(rl_src_address, kCoreReg);
RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size, kNotVolatile);
@@ -468,9 +467,8 @@
bool Arm64Mir2Lir::GenInlinedPoke(CallInfo* info, OpSize size) {
RegLocation rl_src_address = info->args[0]; // long address
- rl_src_address = NarrowRegLoc(rl_src_address); // ignore high half in info->args[1]
RegLocation rl_src_value = info->args[2]; // [size] value
- RegLocation rl_address = LoadValue(rl_src_address, kCoreReg); // kRefReg
+ RegLocation rl_address = LoadValueWide(rl_src_address, kCoreReg);
RegLocation rl_value;
if (size == k64) {
@@ -497,11 +495,9 @@
bool Arm64Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) {
DCHECK_EQ(cu_->instruction_set, kArm64);
- ArmOpcode wide = is_long ? WIDE(0) : UNWIDE(0);
// Unused - RegLocation rl_src_unsafe = info->args[0];
RegLocation rl_src_obj = info->args[1]; // Object - known non-null
RegLocation rl_src_offset = info->args[2]; // long low
- rl_src_offset = NarrowRegLoc(rl_src_offset); // ignore high half in info->args[3] //TODO: do we really need this
RegLocation rl_src_expected = info->args[4]; // int, long or Object
// If is_long, high half is in info->args[5]
RegLocation rl_src_new_value = info->args[is_long ? 6 : 5]; // int, long or Object
@@ -510,7 +506,7 @@
// Load Object and offset
RegLocation rl_object = LoadValue(rl_src_obj, kRefReg);
- RegLocation rl_offset = LoadValue(rl_src_offset, kRefReg);
+ RegLocation rl_offset = LoadValueWide(rl_src_offset, kCoreReg);
RegLocation rl_new_value;
RegLocation rl_expected;
@@ -542,28 +538,38 @@
// result = tmp != 0;
RegStorage r_tmp;
+ RegStorage r_tmp_stored;
+ RegStorage rl_new_value_stored = rl_new_value.reg;
+ ArmOpcode wide = UNWIDE(0);
if (is_long) {
- r_tmp = AllocTempWide();
+ r_tmp_stored = r_tmp = AllocTempWide();
+ wide = WIDE(0);
} else if (is_object) {
+ // References use 64-bit registers, but are stored as compressed 32-bit values.
+ // This means r_tmp_stored != r_tmp.
r_tmp = AllocTempRef();
+ r_tmp_stored = As32BitReg(r_tmp);
+ rl_new_value_stored = As32BitReg(rl_new_value_stored);
} else {
- r_tmp = AllocTemp();
+ r_tmp_stored = r_tmp = AllocTemp();
}
+ RegStorage r_tmp32 = (r_tmp.Is32Bit()) ? r_tmp : As32BitReg(r_tmp);
LIR* loop = NewLIR0(kPseudoTargetLabel);
- NewLIR2(kA64Ldaxr2rX | wide, r_tmp.GetReg(), r_ptr.GetReg());
+ NewLIR2(kA64Ldaxr2rX | wide, r_tmp_stored.GetReg(), r_ptr.GetReg());
OpRegReg(kOpCmp, r_tmp, rl_expected.reg);
DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode));
LIR* early_exit = OpCondBranch(kCondNe, NULL);
-
- NewLIR3(kA64Stlxr3wrX | wide, As32BitReg(r_tmp).GetReg(), rl_new_value.reg.GetReg(), r_ptr.GetReg());
- NewLIR3(kA64Cmp3RdT, As32BitReg(r_tmp).GetReg(), 0, ENCODE_NO_SHIFT);
+ NewLIR3(kA64Stlxr3wrX | wide, r_tmp32.GetReg(), rl_new_value_stored.GetReg(), r_ptr.GetReg());
+ NewLIR3(kA64Cmp3RdT, r_tmp32.GetReg(), 0, ENCODE_NO_SHIFT);
DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode));
OpCondBranch(kCondNe, loop);
+ LIR* exit_loop = NewLIR0(kPseudoTargetLabel);
+ early_exit->target = exit_loop;
+
RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
- LIR* exit = NewLIR4(kA64Csinc4rrrc, rl_result.reg.GetReg(), rwzr, rwzr, kArmCondNe);
- early_exit->target = exit;
+ NewLIR4(kA64Csinc4rrrc, rl_result.reg.GetReg(), rwzr, rwzr, kArmCondNe);
FreeTemp(r_tmp); // Now unneeded.
FreeTemp(r_ptr); // Now unneeded.
diff --git a/compiler/dex/quick/arm64/utility_arm64.cc b/compiler/dex/quick/arm64/utility_arm64.cc
index aaee91b..eb08404 100644
--- a/compiler/dex/quick/arm64/utility_arm64.cc
+++ b/compiler/dex/quick/arm64/utility_arm64.cc
@@ -893,7 +893,14 @@
int expected_scale = 0;
ArmOpcode opcode = kA64Brk1d;
r_base = Check64BitReg(r_base);
- r_index = Check64BitReg(r_index);
+
+ // TODO(Arm64): The sign extension of r_index should be carried out by using an extended
+ // register offset load (rather than doing the sign extension in a separate instruction).
+ if (r_index.Is32Bit()) {
+ // Assemble: ``sxtw xN, wN''.
+ r_index = As64BitReg(r_index);
+ NewLIR4(WIDE(kA64Sbfm4rrdd), r_index.GetReg(), r_index.GetReg(), 0, 31);
+ }
if (r_dest.IsFloat()) {
if (r_dest.IsDouble()) {
@@ -920,9 +927,11 @@
opcode = WIDE(kA64Ldr4rXxG);
expected_scale = 3;
break;
+ case kReference:
+ // TODO(Arm64): r_dest must be 64-bit below. Remove the hack below.
+ r_dest = (r_dest.Is64Bit()) ? As32BitReg(r_dest) : r_dest;
case kSingle:
case k32:
- case kReference:
r_dest = Check32BitReg(r_dest);
opcode = kA64Ldr4rXxG;
expected_scale = 2;
@@ -973,7 +982,14 @@
int expected_scale = 0;
ArmOpcode opcode = kA64Brk1d;
r_base = Check64BitReg(r_base);
- r_index = Check64BitReg(r_index);
+
+ // TODO(Arm64): The sign extension of r_index should be carried out by using an extended
+ // register offset store (rather than doing the sign extension in a separate instruction).
+ if (r_index.Is32Bit()) {
+ // Assemble: ``sxtw xN, wN''.
+ r_index = As64BitReg(r_index);
+ NewLIR4(WIDE(kA64Sbfm4rrdd), r_index.GetReg(), r_index.GetReg(), 0, 31);
+ }
if (r_src.IsFloat()) {
if (r_src.IsDouble()) {
@@ -1000,9 +1016,11 @@
opcode = WIDE(kA64Str4rXxG);
expected_scale = 3;
break;
+ case kReference:
+ // TODO(Arm64): r_src must be 64-bit below. Remove the hack below.
+ r_src = (r_src.Is64Bit()) ? As32BitReg(r_src) : r_src;
case kSingle: // Intentional fall-trough.
case k32: // Intentional fall-trough.
- case kReference:
r_src = Check32BitReg(r_src);
opcode = kA64Str4rXxG;
expected_scale = 2;
@@ -1066,9 +1084,11 @@
alt_opcode = WIDE(kA64Ldur3rXd);
}
break;
+ case kReference:
+ // TODO(Arm64): r_dest must be 64-bit below. Remove the hack below.
+ r_dest = (r_dest.Is64Bit()) ? As32BitReg(r_dest) : r_dest;
case kSingle: // Intentional fall-through.
case k32: // Intentional fall-trough.
- case kReference:
r_dest = Check32BitReg(r_dest);
scale = 2;
if (r_dest.IsFloat()) {
@@ -1165,9 +1185,11 @@
alt_opcode = FWIDE(kA64Stur3rXd);
}
break;
+ case kReference:
+ // TODO(Arm64): r_src must be 64-bit below. Remove the hack below.
+ r_src = (r_src.Is64Bit()) ? As32BitReg(r_src) : r_src;
case kSingle: // Intentional fall-through.
case k32: // Intentional fall-trough.
- case kReference:
r_src = Check32BitReg(r_src);
scale = 2;
if (r_src.IsFloat()) {