Delegate long-to-float type conversions to the runtime on ARM.
On ARM, translate long-to-float type conversions (from both
Quick and Optimizing) as calls to the runtime routine
art_l2f, instead of generating ad hoc code, so as to improve
the precision of the conversions.
Bug: 20413424
Change-Id: I8c414ee1c6f4ff1f32ee78f75734cfd3cf579f71
diff --git a/compiler/dex/quick/arm/fp_arm.cc b/compiler/dex/quick/arm/fp_arm.cc
index 94fc474..1a5c108 100644
--- a/compiler/dex/quick/arm/fp_arm.cc
+++ b/compiler/dex/quick/arm/fp_arm.cc
@@ -191,30 +191,8 @@
GenConversionCall(kQuickF2l, rl_dest, rl_src, kCoreReg);
return;
case Instruction::LONG_TO_FLOAT: {
- rl_src = LoadValueWide(rl_src, kFPReg);
- RegisterInfo* info = GetRegInfo(rl_src.reg);
- RegStorage src_low = info->FindMatchingView(RegisterInfo::kLowSingleStorageMask)->GetReg();
- DCHECK(src_low.Valid());
- RegStorage src_high = info->FindMatchingView(RegisterInfo::kHighSingleStorageMask)->GetReg();
- DCHECK(src_high.Valid());
- rl_result = EvalLoc(rl_dest, kFPReg, true);
- // Allocate temp registers.
- RegStorage high_val = AllocTempDouble();
- RegStorage low_val = AllocTempDouble();
- RegStorage const_val = AllocTempDouble();
- // Long to double.
- NewLIR2(kThumb2VcvtF64S32, high_val.GetReg(), src_high.GetReg());
- NewLIR2(kThumb2VcvtF64U32, low_val.GetReg(), src_low.GetReg());
- LoadConstantWide(const_val, INT64_C(0x41f0000000000000));
- NewLIR3(kThumb2VmlaF64, low_val.GetReg(), high_val.GetReg(), const_val.GetReg());
- // Double to float.
- NewLIR2(kThumb2VcvtDF, rl_result.reg.GetReg(), low_val.GetReg());
- // Free temp registers.
- FreeTemp(high_val);
- FreeTemp(low_val);
- FreeTemp(const_val);
- // Store result.
- StoreValue(rl_dest, rl_result);
+ CheckEntrypointTypes<kQuickL2f, float, int64_t>(); // float -> kFPReg
+ GenConversionCall(kQuickL2f, rl_dest, rl_src, kFPReg);
return;
}
case Instruction::DOUBLE_TO_LONG:
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 987a6c4..1c68654 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -1456,11 +1456,12 @@
Primitive::Type input_type = conversion->GetInputType();
DCHECK_NE(result_type, input_type);
- // The float-to-long and double-to-long type conversions rely on a
- // call to the runtime.
+ // The float-to-long, double-to-long and long-to-float type conversions
+ // rely on a call to the runtime.
LocationSummary::CallKind call_kind =
- ((input_type == Primitive::kPrimFloat || input_type == Primitive::kPrimDouble)
- && result_type == Primitive::kPrimLong)
+ (((input_type == Primitive::kPrimFloat || input_type == Primitive::kPrimDouble)
+ && result_type == Primitive::kPrimLong)
+ || (input_type == Primitive::kPrimLong && result_type == Primitive::kPrimFloat))
? LocationSummary::kCall
: LocationSummary::kNoCall;
LocationSummary* locations =
@@ -1603,15 +1604,14 @@
locations->SetOut(Location::RequiresFpuRegister());
break;
- case Primitive::kPrimLong:
+ case Primitive::kPrimLong: {
// Processing a Dex `long-to-float' instruction.
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetOut(Location::RequiresFpuRegister());
- locations->AddTemp(Location::RequiresRegister());
- locations->AddTemp(Location::RequiresRegister());
- locations->AddTemp(Location::RequiresFpuRegister());
- locations->AddTemp(Location::RequiresFpuRegister());
+ InvokeRuntimeCallingConvention calling_convention;
+ locations->SetInAt(0, Location::RegisterPairLocation(
+ calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
+ locations->SetOut(Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
break;
+ }
case Primitive::kPrimDouble:
// Processing a Dex `double-to-float' instruction.
@@ -1820,47 +1820,13 @@
break;
}
- case Primitive::kPrimLong: {
+ case Primitive::kPrimLong:
// Processing a Dex `long-to-float' instruction.
- Register low = in.AsRegisterPairLow<Register>();
- Register high = in.AsRegisterPairHigh<Register>();
- SRegister output = out.AsFpuRegister<SRegister>();
- Register constant_low = locations->GetTemp(0).AsRegister<Register>();
- Register constant_high = locations->GetTemp(1).AsRegister<Register>();
- SRegister temp1_s = locations->GetTemp(2).AsFpuRegisterPairLow<SRegister>();
- DRegister temp1_d = FromLowSToD(temp1_s);
- SRegister temp2_s = locations->GetTemp(3).AsFpuRegisterPairLow<SRegister>();
- DRegister temp2_d = FromLowSToD(temp2_s);
-
- // Operations use doubles for precision reasons (each 32-bit
- // half of a long fits in the 53-bit mantissa of a double,
- // but not in the 24-bit mantissa of a float). This is
- // especially important for the low bits. The result is
- // eventually converted to float.
-
- // temp1_d = int-to-double(high)
- __ vmovsr(temp1_s, high);
- __ vcvtdi(temp1_d, temp1_s);
- // Using vmovd to load the `k2Pow32EncodingForDouble` constant
- // as an immediate value into `temp2_d` does not work, as
- // this instruction only transfers 8 significant bits of its
- // immediate operand. Instead, use two 32-bit core
- // registers to load `k2Pow32EncodingForDouble` into
- // `temp2_d`.
- __ LoadImmediate(constant_low, Low32Bits(k2Pow32EncodingForDouble));
- __ LoadImmediate(constant_high, High32Bits(k2Pow32EncodingForDouble));
- __ vmovdrr(temp2_d, constant_low, constant_high);
- // temp1_d = temp1_d * 2^32
- __ vmuld(temp1_d, temp1_d, temp2_d);
- // temp2_d = unsigned-to-double(low)
- __ vmovsr(temp2_s, low);
- __ vcvtdu(temp2_d, temp2_s);
- // temp1_d = temp1_d + temp2_d
- __ vaddd(temp1_d, temp1_d, temp2_d);
- // output = double-to-float(temp1_d);
- __ vcvtsd(output, temp1_d);
+ codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pL2f),
+ conversion,
+ conversion->GetDexPc(),
+ nullptr);
break;
- }
case Primitive::kPrimDouble:
// Processing a Dex `double-to-float' instruction.
diff --git a/runtime/arch/arm/entrypoints_init_arm.cc b/runtime/arch/arm/entrypoints_init_arm.cc
index cafc868..2f2654d 100644
--- a/runtime/arch/arm/entrypoints_init_arm.cc
+++ b/runtime/arch/arm/entrypoints_init_arm.cc
@@ -127,11 +127,13 @@
qpoints->pFmodf = fmodf;
qpoints->pD2l = art_d2l;
qpoints->pF2l = art_f2l;
+ qpoints->pL2f = art_l2f;
} else {
qpoints->pFmod = art_quick_fmod;
qpoints->pFmodf = art_quick_fmodf;
qpoints->pD2l = art_quick_d2l;
qpoints->pF2l = art_quick_f2l;
+ qpoints->pL2f = art_quick_l2f;
}
// Intrinsics
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index 064f5a6..2e7f34e 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -1468,3 +1468,18 @@
vmov r0, s0
b art_f2l
END art_quick_f2l
+
+ /* float art_l2f(int64_t l) */
+ .extern art_l2f
+ENTRY art_quick_l2f
+ push {lr}
+ .cfi_adjust_cfa_offset 4
+ .cfi_rel_offset lr, 0
+ sub sp, #4
+ .cfi_adjust_cfa_offset 4
+ bl art_l2f
+ vmov s0, r0
+ add sp, #4
+ .cfi_adjust_cfa_offset -4
+ pop {pc}
+END art_quick_l2f
diff --git a/runtime/entrypoints/quick/quick_default_externs.h b/runtime/entrypoints/quick/quick_default_externs.h
index c7aaa20..a64e818 100644
--- a/runtime/entrypoints/quick/quick_default_externs.h
+++ b/runtime/entrypoints/quick/quick_default_externs.h
@@ -80,6 +80,7 @@
// Math entrypoints.
extern "C" int64_t art_quick_d2l(double);
extern "C" int64_t art_quick_f2l(float);
+extern "C" float art_quick_l2f (int64_t);
extern "C" int64_t art_quick_ldiv(int64_t, int64_t);
extern "C" int64_t art_quick_lmod(int64_t, int64_t);
extern "C" int64_t art_quick_lmul(int64_t, int64_t);
diff --git a/test/477-long-to-float-conversion-precision/info.txt b/test/477-long-to-float-conversion-precision/info.txt
index d9d41d7..1e07cf3 100644
--- a/test/477-long-to-float-conversion-precision/info.txt
+++ b/test/477-long-to-float-conversion-precision/info.txt
@@ -1 +1 @@
-Tests for type conversions precision.
+Regression test for type conversion precision.
diff --git a/test/477-long-to-float-conversion-precision/src/Main.java b/test/477-long-to-float-conversion-precision/src/Main.java
index bc17053..cd97039 100644
--- a/test/477-long-to-float-conversion-precision/src/Main.java
+++ b/test/477-long-to-float-conversion-precision/src/Main.java
@@ -30,9 +30,8 @@
}
private static void longToFloat() {
- // The result for this test case is slightly less accurate on ARM,
- // due to the implementation of long-to-float type conversions for
- // this architecture (both in Quick and Optimizing).
+ // The result for this test case used to be slightly less accurate
+ // on ARM (both in Quick and Optimizing).
assertFloatEquals(Float.intBitsToFloat(-555858671), $opt$LongToFloat(-8008112895877447681L));
}
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index d224f43..fa13fe5 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -387,29 +387,6 @@
TEST_ART_BROKEN_DEFAULT_RUN_TESTS :=
-# Known broken tests for Quick's and Optimizing's ARM back ends.
-TEST_ART_BROKEN_ARM_RUN_TESTS := 477-long-to-float-conversion-precision # b/20413424
-
-ifeq ($(TARGET_ARCH),arm)
- ifneq (,$(filter 32,$(ALL_ADDRESS_SIZES)))
- ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,target,$(RUN_TYPES),$(PREBUILD_TYPES), \
- $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
- $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES),$(TEST_ART_BROKEN_ARM_RUN_TESTS),32)
- endif
-endif
-
-ifdef TARGET_2ND_ARCH
- ifeq ($(TARGET_2ND_ARCH),arm)
- ifneq (,$(filter 32,$(ALL_ADDRESS_SIZES)))
- ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,target,$(RUN_TYPES),$(PREBUILD_TYPES), \
- $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
- $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES),$(TEST_ART_BROKEN_ARM_RUN_TESTS),32)
- endif
- endif
-endif
-
-TEST_ART_BROKEN_ARM_RUN_TESTS :=
-
# Known broken tests for the arm64 optimizing compiler backend.
TEST_ART_BROKEN_OPTIMIZING_ARM64_RUN_TESTS :=