Delegate long-to-float type conversions to the runtime on ARM. On ARM, translate long-to-float type conversions (from both Quick and Optimizing) as calls to the runtime routine art_l2f, instead of generating ad hoc code, so as to improve the precision of the conversions. Bug: 20413424 Change-Id: I8c414ee1c6f4ff1f32ee78f75734cfd3cf579f71

commit: 5b3ee56cd63ee9e3c70c0412d044b81ab9c94513 [log] [tgz]
author: Roland Levillain <rpl@google.com> Tue Apr 14 16:02:41 2015 +0100
committer: Roland Levillain <rpl@google.com> Tue Jun 02 17:37:40 2015 +0100
tree: b57f056c5e4ad18ba7c89034bbbb51b90820e648
parent: 122addc3962207badd364b00a75a58639e76b10c [diff]
diff --git a/compiler/dex/quick/arm/fp_arm.cc b/compiler/dex/quick/arm/fp_arm.cc
index 94fc474..1a5c108 100644
--- a/compiler/dex/quick/arm/fp_arm.cc
+++ b/compiler/dex/quick/arm/fp_arm.cc

@@ -191,30 +191,8 @@
       GenConversionCall(kQuickF2l, rl_dest, rl_src, kCoreReg);
       return;
     case Instruction::LONG_TO_FLOAT: {
-      rl_src = LoadValueWide(rl_src, kFPReg);
-      RegisterInfo* info = GetRegInfo(rl_src.reg);
-      RegStorage src_low = info->FindMatchingView(RegisterInfo::kLowSingleStorageMask)->GetReg();
-      DCHECK(src_low.Valid());
-      RegStorage src_high = info->FindMatchingView(RegisterInfo::kHighSingleStorageMask)->GetReg();
-      DCHECK(src_high.Valid());
-      rl_result = EvalLoc(rl_dest, kFPReg, true);
-      // Allocate temp registers.
-      RegStorage high_val = AllocTempDouble();
-      RegStorage low_val = AllocTempDouble();
-      RegStorage const_val = AllocTempDouble();
-      // Long to double.
-      NewLIR2(kThumb2VcvtF64S32, high_val.GetReg(), src_high.GetReg());
-      NewLIR2(kThumb2VcvtF64U32, low_val.GetReg(), src_low.GetReg());
-      LoadConstantWide(const_val, INT64_C(0x41f0000000000000));
-      NewLIR3(kThumb2VmlaF64, low_val.GetReg(), high_val.GetReg(), const_val.GetReg());
-      // Double to float.
-      NewLIR2(kThumb2VcvtDF, rl_result.reg.GetReg(), low_val.GetReg());
-      // Free temp registers.
-      FreeTemp(high_val);
-      FreeTemp(low_val);
-      FreeTemp(const_val);
-      // Store result.
-      StoreValue(rl_dest, rl_result);
+      CheckEntrypointTypes<kQuickL2f, float, int64_t>();  // float -> kFPReg
+      GenConversionCall(kQuickL2f, rl_dest, rl_src, kFPReg);
       return;
     }
     case Instruction::DOUBLE_TO_LONG:

diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 987a6c4..1c68654 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc

@@ -1456,11 +1456,12 @@
   Primitive::Type input_type = conversion->GetInputType();
   DCHECK_NE(result_type, input_type);
 
-  // The float-to-long and double-to-long type conversions rely on a
-  // call to the runtime.
+  // The float-to-long, double-to-long and long-to-float type conversions
+  // rely on a call to the runtime.
   LocationSummary::CallKind call_kind =
-      ((input_type == Primitive::kPrimFloat || input_type == Primitive::kPrimDouble)
-       && result_type == Primitive::kPrimLong)
+      (((input_type == Primitive::kPrimFloat || input_type == Primitive::kPrimDouble)
+        && result_type == Primitive::kPrimLong)
+       || (input_type == Primitive::kPrimLong && result_type == Primitive::kPrimFloat))
       ? LocationSummary::kCall
       : LocationSummary::kNoCall;
   LocationSummary* locations =
@@ -1603,15 +1604,14 @@
           locations->SetOut(Location::RequiresFpuRegister());
           break;
 
-        case Primitive::kPrimLong:
+        case Primitive::kPrimLong: {
           // Processing a Dex `long-to-float' instruction.
-          locations->SetInAt(0, Location::RequiresRegister());
-          locations->SetOut(Location::RequiresFpuRegister());
-          locations->AddTemp(Location::RequiresRegister());
-          locations->AddTemp(Location::RequiresRegister());
-          locations->AddTemp(Location::RequiresFpuRegister());
-          locations->AddTemp(Location::RequiresFpuRegister());
+          InvokeRuntimeCallingConvention calling_convention;
+          locations->SetInAt(0, Location::RegisterPairLocation(
+              calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
+          locations->SetOut(Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
           break;
+        }
 
         case Primitive::kPrimDouble:
           // Processing a Dex `double-to-float' instruction.
@@ -1820,47 +1820,13 @@
           break;
         }
 
-        case Primitive::kPrimLong: {
+        case Primitive::kPrimLong:
           // Processing a Dex `long-to-float' instruction.
-          Register low = in.AsRegisterPairLow<Register>();
-          Register high = in.AsRegisterPairHigh<Register>();
-          SRegister output = out.AsFpuRegister<SRegister>();
-          Register constant_low = locations->GetTemp(0).AsRegister<Register>();
-          Register constant_high = locations->GetTemp(1).AsRegister<Register>();
-          SRegister temp1_s = locations->GetTemp(2).AsFpuRegisterPairLow<SRegister>();
-          DRegister temp1_d = FromLowSToD(temp1_s);
-          SRegister temp2_s = locations->GetTemp(3).AsFpuRegisterPairLow<SRegister>();
-          DRegister temp2_d = FromLowSToD(temp2_s);
-
-          // Operations use doubles for precision reasons (each 32-bit
-          // half of a long fits in the 53-bit mantissa of a double,
-          // but not in the 24-bit mantissa of a float).  This is
-          // especially important for the low bits.  The result is
-          // eventually converted to float.
-
-          // temp1_d = int-to-double(high)
-          __ vmovsr(temp1_s, high);
-          __ vcvtdi(temp1_d, temp1_s);
-          // Using vmovd to load the `k2Pow32EncodingForDouble` constant
-          // as an immediate value into `temp2_d` does not work, as
-          // this instruction only transfers 8 significant bits of its
-          // immediate operand.  Instead, use two 32-bit core
-          // registers to load `k2Pow32EncodingForDouble` into
-          // `temp2_d`.
-          __ LoadImmediate(constant_low, Low32Bits(k2Pow32EncodingForDouble));
-          __ LoadImmediate(constant_high, High32Bits(k2Pow32EncodingForDouble));
-          __ vmovdrr(temp2_d, constant_low, constant_high);
-          // temp1_d = temp1_d * 2^32
-          __ vmuld(temp1_d, temp1_d, temp2_d);
-          // temp2_d = unsigned-to-double(low)
-          __ vmovsr(temp2_s, low);
-          __ vcvtdu(temp2_d, temp2_s);
-          // temp1_d = temp1_d + temp2_d
-          __ vaddd(temp1_d, temp1_d, temp2_d);
-          // output = double-to-float(temp1_d);
-          __ vcvtsd(output, temp1_d);
+          codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pL2f),
+                                  conversion,
+                                  conversion->GetDexPc(),
+                                  nullptr);
           break;
-        }
 
         case Primitive::kPrimDouble:
           // Processing a Dex `double-to-float' instruction.

diff --git a/runtime/arch/arm/entrypoints_init_arm.cc b/runtime/arch/arm/entrypoints_init_arm.cc
index cafc868..2f2654d 100644
--- a/runtime/arch/arm/entrypoints_init_arm.cc
+++ b/runtime/arch/arm/entrypoints_init_arm.cc

@@ -127,11 +127,13 @@
     qpoints->pFmodf = fmodf;
     qpoints->pD2l = art_d2l;
     qpoints->pF2l = art_f2l;
+    qpoints->pL2f = art_l2f;
   } else {
     qpoints->pFmod = art_quick_fmod;
     qpoints->pFmodf = art_quick_fmodf;
     qpoints->pD2l = art_quick_d2l;
     qpoints->pF2l = art_quick_f2l;
+    qpoints->pL2f = art_quick_l2f;
   }
 
   // Intrinsics

diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index 064f5a6..2e7f34e 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S

@@ -1468,3 +1468,18 @@
     vmov  r0, s0
     b     art_f2l
 END art_quick_f2l
+
+    /* float art_l2f(int64_t l) */
+    .extern art_l2f
+ENTRY art_quick_l2f
+    push  {lr}
+    .cfi_adjust_cfa_offset 4
+    .cfi_rel_offset lr, 0
+    sub   sp, #4
+    .cfi_adjust_cfa_offset 4
+    bl    art_l2f
+    vmov  s0, r0
+    add   sp, #4
+    .cfi_adjust_cfa_offset -4
+    pop   {pc}
+END art_quick_l2f

diff --git a/runtime/entrypoints/quick/quick_default_externs.h b/runtime/entrypoints/quick/quick_default_externs.h
index c7aaa20..a64e818 100644
--- a/runtime/entrypoints/quick/quick_default_externs.h
+++ b/runtime/entrypoints/quick/quick_default_externs.h

@@ -80,6 +80,7 @@
 // Math entrypoints.
 extern "C" int64_t art_quick_d2l(double);
 extern "C" int64_t art_quick_f2l(float);
+extern "C" float art_quick_l2f (int64_t);
 extern "C" int64_t art_quick_ldiv(int64_t, int64_t);
 extern "C" int64_t art_quick_lmod(int64_t, int64_t);
 extern "C" int64_t art_quick_lmul(int64_t, int64_t);

diff --git a/test/477-long-to-float-conversion-precision/info.txt b/test/477-long-to-float-conversion-precision/info.txt
index d9d41d7..1e07cf3 100644
--- a/test/477-long-to-float-conversion-precision/info.txt
+++ b/test/477-long-to-float-conversion-precision/info.txt

@@ -1 +1 @@
-Tests for type conversions precision.
+Regression test for type conversion precision.

diff --git a/test/477-long-to-float-conversion-precision/src/Main.java b/test/477-long-to-float-conversion-precision/src/Main.java
index bc17053..cd97039 100644
--- a/test/477-long-to-float-conversion-precision/src/Main.java
+++ b/test/477-long-to-float-conversion-precision/src/Main.java

@@ -30,9 +30,8 @@
   }
 
   private static void longToFloat() {
-    // The result for this test case is slightly less accurate on ARM,
-    // due to the implementation of long-to-float type conversions for
-    // this architecture (both in Quick and Optimizing).
+    // The result for this test case used to be slightly less accurate
+    // on ARM (both in Quick and Optimizing).
     assertFloatEquals(Float.intBitsToFloat(-555858671), $opt$LongToFloat(-8008112895877447681L));
   }
 

diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index d224f43..fa13fe5 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk

@@ -387,29 +387,6 @@
 
 TEST_ART_BROKEN_DEFAULT_RUN_TESTS :=
 
-# Known broken tests for Quick's and Optimizing's ARM back ends.
-TEST_ART_BROKEN_ARM_RUN_TESTS := 477-long-to-float-conversion-precision  # b/20413424
-
-ifeq ($(TARGET_ARCH),arm)
-  ifneq (,$(filter 32,$(ALL_ADDRESS_SIZES)))
-    ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,target,$(RUN_TYPES),$(PREBUILD_TYPES), \
-        $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
-        $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES),$(TEST_ART_BROKEN_ARM_RUN_TESTS),32)
-  endif
-endif
-
-ifdef TARGET_2ND_ARCH
-  ifeq ($(TARGET_2ND_ARCH),arm)
-    ifneq (,$(filter 32,$(ALL_ADDRESS_SIZES)))
-      ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,target,$(RUN_TYPES),$(PREBUILD_TYPES), \
-          $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
-          $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES),$(TEST_ART_BROKEN_ARM_RUN_TESTS),32)
-    endif
-  endif
-endif
-
-TEST_ART_BROKEN_ARM_RUN_TESTS :=
-
 # Known broken tests for the arm64 optimizing compiler backend.
 TEST_ART_BROKEN_OPTIMIZING_ARM64_RUN_TESTS :=
commit	5b3ee56cd63ee9e3c70c0412d044b81ab9c94513	[log] [tgz]
author	Roland Levillain <rpl@google.com>	Tue Apr 14 16:02:41 2015 +0100
committer	Roland Levillain <rpl@google.com>	Tue Jun 02 17:37:40 2015 +0100
tree	b57f056c5e4ad18ba7c89034bbbb51b90820e648
parent	122addc3962207badd364b00a75a58639e76b10c [diff]