Unify ART's various implementations of bit_cast.

ART had several implementations of art::bit_cast:

1. one in runtime/base/casts.h, declared as:

   template <class Dest, class Source>
   inline Dest bit_cast(const Source& source);

2. another one in runtime/utils.h, declared as:

   template<typename U, typename V>
   static inline V bit_cast(U in);

3. and a third local version, in runtime/memory_region.h,
   similar to the previous one:

   template<typename Source, typename Destination>
   static Destination MemoryRegion::local_bit_cast(Source in);

This CL removes versions 2. and 3. and changes their callers
to use 1. instead.  That version was chosen over the others
as:
- it was the oldest one in the code base; and
- its syntax was closer to the standard C++ cast operators,
  as it supports the following use:

    bit_cast<Destination>(source)

  since `Source' can be deduced from `source'.

Change-Id: I7334fd5d55bf0b8a0c52cb33cfbae6894ff83633
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 5eb97d8..8351e22 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -1184,7 +1184,7 @@
       gpr_index_--;
       if (kMultiGPRegistersWidened) {
         DCHECK_EQ(sizeof(uintptr_t), sizeof(int64_t));
-        PushGpr(static_cast<int64_t>(bit_cast<uint32_t, int32_t>(val)));
+        PushGpr(static_cast<int64_t>(bit_cast<int32_t, uint32_t>(val)));
       } else {
         PushGpr(val);
       }
@@ -1192,7 +1192,7 @@
       stack_entries_++;
       if (kMultiGPRegistersWidened) {
         DCHECK_EQ(sizeof(uintptr_t), sizeof(int64_t));
-        PushStack(static_cast<int64_t>(bit_cast<uint32_t, int32_t>(val)));
+        PushStack(static_cast<int64_t>(bit_cast<int32_t, uint32_t>(val)));
       } else {
         PushStack(val);
       }
@@ -1252,16 +1252,16 @@
 
   void AdvanceFloat(float val) {
     if (kNativeSoftFloatAbi) {
-      AdvanceInt(bit_cast<float, uint32_t>(val));
+      AdvanceInt(bit_cast<uint32_t, float>(val));
     } else {
       if (HaveFloatFpr()) {
         fpr_index_--;
         if (kRegistersNeededForDouble == 1) {
           if (kMultiFPRegistersWidened) {
-            PushFpr8(bit_cast<double, uint64_t>(val));
+            PushFpr8(bit_cast<uint64_t, double>(val));
           } else {
             // No widening, just use the bits.
-            PushFpr8(bit_cast<float, uint64_t>(val));
+            PushFpr8(static_cast<uint64_t>(bit_cast<uint32_t, float>(val)));
           }
         } else {
           PushFpr4(val);
@@ -1272,9 +1272,9 @@
           // Need to widen before storing: Note the "double" in the template instantiation.
           // Note: We need to jump through those hoops to make the compiler happy.
           DCHECK_EQ(sizeof(uintptr_t), sizeof(uint64_t));
-          PushStack(static_cast<uintptr_t>(bit_cast<double, uint64_t>(val)));
+          PushStack(static_cast<uintptr_t>(bit_cast<uint64_t, double>(val)));
         } else {
-          PushStack(bit_cast<float, uintptr_t>(val));
+          PushStack(static_cast<uintptr_t>(bit_cast<uint32_t, float>(val)));
         }
         fpr_index_ = 0;
       }
@@ -1908,8 +1908,8 @@
       case 'F': {
         if (kRuntimeISA == kX86) {
           // Convert back the result to float.
-          double d = bit_cast<uint64_t, double>(result_f);
-          return bit_cast<float, uint32_t>(static_cast<float>(d));
+          double d = bit_cast<double, uint64_t>(result_f);
+          return bit_cast<uint32_t, float>(static_cast<float>(d));
         } else {
           return result_f;
         }
diff --git a/runtime/interpreter/unstarted_runtime.cc b/runtime/interpreter/unstarted_runtime.cc
index fbbc863..98dfdbd 100644
--- a/runtime/interpreter/unstarted_runtime.cc
+++ b/runtime/interpreter/unstarted_runtime.cc
@@ -460,7 +460,7 @@
 static void UnstartedDoubleDoubleToRawLongBits(
     Thread* self ATTRIBUTE_UNUSED, ShadowFrame* shadow_frame, JValue* result, size_t arg_offset) {
   double in = shadow_frame->GetVRegDouble(arg_offset);
-  result->SetJ(bit_cast<int64_t>(in));
+  result->SetJ(bit_cast<int64_t, double>(in));
 }
 
 static mirror::Object* GetDexFromDexCache(Thread* self, mirror::DexCache* dex_cache)
diff --git a/runtime/memory_region.h b/runtime/memory_region.h
index 939a1a9..f867f6a 100644
--- a/runtime/memory_region.h
+++ b/runtime/memory_region.h
@@ -19,6 +19,7 @@
 
 #include <stdint.h>
 
+#include "base/casts.h"
 #include "base/logging.h"
 #include "base/macros.h"
 #include "base/value_object.h"
@@ -60,23 +61,6 @@
     *ComputeInternalPointer<T>(offset) = value;
   }
 
-  // TODO: Local hack to prevent name clashes between two conflicting
-  // implementations of bit_cast:
-  // - art::bit_cast<Destination, Source> runtime/base/casts.h, and
-  // - art::bit_cast<Source, Destination> from runtime/utils.h.
-  // Remove this when these routines have been merged.
-  template<typename Source, typename Destination>
-  static Destination local_bit_cast(Source in) {
-    static_assert(sizeof(Source) <= sizeof(Destination),
-                  "Size of Source not <= size of Destination");
-    union {
-      Source u;
-      Destination v;
-    } tmp;
-    tmp.u = in;
-    return tmp.v;
-  }
-
   // Load value of type `T` at `offset`.  The memory address corresponding
   // to `offset` does not need to be word-aligned.
   template<typename T> T LoadUnaligned(uintptr_t offset) const {
@@ -88,7 +72,7 @@
       equivalent_unsigned_integer_value +=
           *ComputeInternalPointer<uint8_t>(offset + i) << (i * kBitsPerByte);
     }
-    return local_bit_cast<U, T>(equivalent_unsigned_integer_value);
+    return bit_cast<T, U>(equivalent_unsigned_integer_value);
   }
 
   // Store `value` (of type `T`) at `offset`.  The memory address
@@ -96,7 +80,7 @@
   template<typename T> void StoreUnaligned(uintptr_t offset, T value) const {
     // Equivalent unsigned integer type corresponding to T.
     typedef typename UnsignedIntegerType<sizeof(T)>::type U;
-    U equivalent_unsigned_integer_value = local_bit_cast<T, U>(value);
+    U equivalent_unsigned_integer_value = bit_cast<U, T>(value);
     // Write the value byte by byte in a little-endian fashion.
     for (size_t i = 0; i < sizeof(U); ++i) {
       *ComputeInternalPointer<uint8_t>(offset + i) =
diff --git a/runtime/utils.h b/runtime/utils.h
index 9a9f51a..e20412e 100644
--- a/runtime/utils.h
+++ b/runtime/utils.h
@@ -311,19 +311,6 @@
   return (ch < ' ' || ch > '~');
 }
 
-// Interpret the bit pattern of input (type U) as type V. Requires the size
-// of V >= size of U (compile-time checked).
-template<typename U, typename V>
-static inline V bit_cast(U in) {
-  static_assert(sizeof(U) <= sizeof(V), "Size of U not <= size of V");
-  union {
-    U u;
-    V v;
-  } tmp;
-  tmp.u = in;
-  return tmp.v;
-}
-
 std::string PrintableChar(uint16_t ch);
 
 // Returns an ASCII string corresponding to the given UTF-8 string.