ART: Clean up arm64 kNumberOfXRegisters usage.

Avoid undefined behavior for arm64 stemming from 1u << 32 in
loops with upper bound kNumberOfXRegisters.

Create iterators for enumerating bits in an integer either
from high to low or from low to high and use them for
<arch>Context::FillCalleeSaves() on all architectures.

Refactor runtime/utils.{h,cc} by moving all bit-fiddling
functions to runtime/base/bit_utils.{h,cc} (together with
the new bit iterators) and all time-related functions to
runtime/base/time_utils.{h,cc}. Improve test coverage and
fix some corner cases for the bit-fiddling functions.

Bug: 13925192

(cherry picked from commit 80afd02024d20e60b197d3adfbb43cc303cf29e0)

Change-Id: I905257a21de90b5860ebe1e39563758f721eab82
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index f56e446..13775fe 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -493,11 +493,6 @@
         assembler_(codegen->GetAssembler()),
         codegen_(codegen) {}
 
-static uint32_t LeastSignificantBit(uint32_t mask) {
-  // ffs starts at 1.
-  return ffs(mask) - 1;
-}
-
 void CodeGeneratorARM::ComputeSpillMask() {
   core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_;
   // Save one extra register for baseline. Note that on thumb2, there is no easy
diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc
index 43fe374..9e18f11 100644
--- a/compiler/optimizing/intrinsics.cc
+++ b/compiler/optimizing/intrinsics.cc
@@ -22,6 +22,7 @@
 #include "invoke_type.h"
 #include "nodes.h"
 #include "quick/inline_method_analyser.h"
+#include "utils.h"
 
 namespace art {
 
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 41adc72..2bad682 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -18,6 +18,7 @@
 
 #include "ssa_builder.h"
 #include "base/bit_vector-inl.h"
+#include "base/bit_utils.h"
 #include "utils/growable_array.h"
 #include "scoped_thread_state_change.h"
 
diff --git a/compiler/optimizing/parallel_move_resolver.h b/compiler/optimizing/parallel_move_resolver.h
index e89417d..9ede910 100644
--- a/compiler/optimizing/parallel_move_resolver.h
+++ b/compiler/optimizing/parallel_move_resolver.h
@@ -20,6 +20,7 @@
 #include "base/value_object.h"
 #include "utils/growable_array.h"
 #include "locations.h"
+#include "primitive.h"
 
 namespace art {
 
diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h
index dc9c708..97bd777 100644
--- a/compiler/optimizing/register_allocator.h
+++ b/compiler/optimizing/register_allocator.h
@@ -17,6 +17,7 @@
 #ifndef ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATOR_H_
 #define ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATOR_H_
 
+#include "arch/instruction_set.h"
 #include "base/macros.h"
 #include "primitive.h"
 #include "utils/growable_array.h"