Some optimizations for the array alloc path.

- Force Array::Alloc() to be inlined.
- Simplify the array size overflow check.
- Turn fill_usable into a template parameter.
- Remove a branch in Array::DataOffset() and avoid
  Primitive::ComponentSize(), which has a switch, in the array alloc
  path.
- Strength reductions in the array size computation by using component
  size shifts instead of component sizes. Store component size shift
  in the upper 16 bits of primitive_type field.
- Speedup: ~4% (3435->3284) in MemAllocTest on N4.

Bug: 9986565

Change-Id: I4b142ffac4ab8b5b915836f1660a949d6442344c
diff --git a/runtime/mirror/array.h b/runtime/mirror/array.h
index 7af88d6..521d7e7 100644
--- a/runtime/mirror/array.h
+++ b/runtime/mirror/array.h
@@ -33,13 +33,12 @@
   // The size of a java.lang.Class representing an array.
   static uint32_t ClassSize();
 
-  // Allocates an array with the given properties, if fill_usable is true the array will be of at
+  // Allocates an array with the given properties, if kFillUsable is true the array will be of at
   // least component_count size, however, if there's usable space at the end of the allocation the
   // array will fill it.
-  template <bool kIsInstrumented>
-  static Array* Alloc(Thread* self, Class* array_class, int32_t component_count,
-                      size_t component_size, gc::AllocatorType allocator_type,
-                      bool fill_usable = false)
+  template <bool kIsInstrumented, bool kFillUsable = false>
+  ALWAYS_INLINE static Array* Alloc(Thread* self, Class* array_class, int32_t component_count,
+                                    size_t component_size_shift, gc::AllocatorType allocator_type)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   static Array* CreateMultiArray(Thread* self, Handle<Class> element_class,
@@ -66,12 +65,11 @@
   }
 
   static MemberOffset DataOffset(size_t component_size) {
-    if (component_size != sizeof(int64_t)) {
-      return OFFSET_OF_OBJECT_MEMBER(Array, first_element_);
-    } else {
-      // Align longs and doubles.
-      return MemberOffset(OFFSETOF_MEMBER(Array, first_element_) + 4);
-    }
+    DCHECK(IsPowerOfTwo(component_size)) << component_size;
+    size_t data_offset = RoundUp(OFFSETOF_MEMBER(Array, first_element_), component_size);
+    DCHECK_EQ(RoundUp(data_offset, component_size), data_offset)
+        << "Array data offset isn't aligned with component size";
+    return MemberOffset(data_offset);
   }
 
   void* GetRawData(size_t component_size, int32_t index)