More allocation code optimizations.

- Inline Class::AllocObject() and Array::Alloc().
- Inline some short Mutex functions and add LIKELY/UNLIKELY to some
  Mutex functions.
- This change improves the Ritz MemAllocTest by ~6% on Nexus 4 and
  ~10% on host.

Bug: 9986565
Change-Id: I1606c74ddb21676cbc1de1a40e9b076fc23eaea4
diff --git a/runtime/base/mutex-inl.h b/runtime/base/mutex-inl.h
index 7e8365e..c0cfee2 100644
--- a/runtime/base/mutex-inl.h
+++ b/runtime/base/mutex-inl.h
@@ -41,6 +41,54 @@
 }
 #endif  // ART_USE_FUTEXES
 
+#if defined(__APPLE__)
+
+// This works on Mac OS 10.6 but hasn't been tested on older releases.
+struct __attribute__((__may_alias__)) darwin_pthread_mutex_t {
+  long padding0;  // NOLINT(runtime/int) exact match to darwin type
+  int padding1;
+  uint32_t padding2;
+  int16_t padding3;
+  int16_t padding4;
+  uint32_t padding5;
+  pthread_t darwin_pthread_mutex_owner;
+  // ...other stuff we don't care about.
+};
+
+struct __attribute__((__may_alias__)) darwin_pthread_rwlock_t {
+  long padding0;  // NOLINT(runtime/int) exact match to darwin type
+  pthread_mutex_t padding1;
+  int padding2;
+  pthread_cond_t padding3;
+  pthread_cond_t padding4;
+  int padding5;
+  int padding6;
+  pthread_t darwin_pthread_rwlock_owner;
+  // ...other stuff we don't care about.
+};
+
+#endif  // __APPLE__
+
+#if defined(__GLIBC__)
+
+struct __attribute__((__may_alias__)) glibc_pthread_mutex_t {
+  int32_t padding0[2];
+  int owner;
+  // ...other stuff we don't care about.
+};
+
+struct __attribute__((__may_alias__)) glibc_pthread_rwlock_t {
+#ifdef __LP64__
+  int32_t padding0[6];
+#else
+  int32_t padding0[7];
+#endif
+  int writer;
+  // ...other stuff we don't care about.
+};
+
+#endif  // __GLIBC__
+
 class ScopedContentionRecorder {
  public:
   ScopedContentionRecorder(BaseMutex* mutex, uint64_t blocked_tid, uint64_t owner_tid)
@@ -185,6 +233,84 @@
 #endif
 }
 
+inline bool Mutex::IsExclusiveHeld(const Thread* self) const {
+  DCHECK(self == NULL || self == Thread::Current());
+  bool result = (GetExclusiveOwnerTid() == SafeGetTid(self));
+  if (kDebugLocking) {
+    // Sanity debug check that if we think it is locked we have it in our held mutexes.
+    if (result && self != NULL && level_ != kMonitorLock && !gAborting) {
+      CHECK_EQ(self->GetHeldMutex(level_), this);
+    }
+  }
+  return result;
+}
+
+inline uint64_t Mutex::GetExclusiveOwnerTid() const {
+#if ART_USE_FUTEXES
+  return exclusive_owner_;
+#elif defined(__BIONIC__)
+  return static_cast<uint64_t>((mutex_.value >> 16) & 0xffff);
+#elif defined(__GLIBC__)
+  return reinterpret_cast<const glibc_pthread_mutex_t*>(&mutex_)->owner;
+#elif defined(__APPLE__)
+  const darwin_pthread_mutex_t* dpmutex = reinterpret_cast<const darwin_pthread_mutex_t*>(&mutex_);
+  pthread_t owner = dpmutex->darwin_pthread_mutex_owner;
+  // 0 for unowned, -1 for PTHREAD_MTX_TID_SWITCHING
+  // TODO: should we make darwin_pthread_mutex_owner volatile and recheck until not -1?
+  if ((owner == (pthread_t)0) || (owner == (pthread_t)-1)) {
+    return 0;
+  }
+  uint64_t tid;
+  CHECK_PTHREAD_CALL(pthread_threadid_np, (owner, &tid), __FUNCTION__);  // Requires Mac OS 10.6
+  return tid;
+#else
+#error unsupported C library
+#endif
+}
+
+inline bool ReaderWriterMutex::IsExclusiveHeld(const Thread* self) const {
+  DCHECK(self == NULL || self == Thread::Current());
+  bool result = (GetExclusiveOwnerTid() == SafeGetTid(self));
+  if (kDebugLocking) {
+    // Sanity that if the pthread thinks we own the lock the Thread agrees.
+    if (self != NULL && result)  {
+      CHECK_EQ(self->GetHeldMutex(level_), this);
+    }
+  }
+  return result;
+}
+
+inline uint64_t ReaderWriterMutex::GetExclusiveOwnerTid() const {
+#if ART_USE_FUTEXES
+  int32_t state = state_;
+  if (state == 0) {
+    return 0;  // No owner.
+  } else if (state > 0) {
+    return -1;  // Shared.
+  } else {
+    return exclusive_owner_;
+  }
+#else
+#if defined(__BIONIC__)
+  return rwlock_.writerThreadId;
+#elif defined(__GLIBC__)
+  return reinterpret_cast<const glibc_pthread_rwlock_t*>(&rwlock_)->writer;
+#elif defined(__APPLE__)
+  const darwin_pthread_rwlock_t*
+      dprwlock = reinterpret_cast<const darwin_pthread_rwlock_t*>(&rwlock_);
+  pthread_t owner = dprwlock->darwin_pthread_rwlock_owner;
+  if (owner == (pthread_t)0) {
+    return 0;
+  }
+  uint64_t tid;
+  CHECK_PTHREAD_CALL(pthread_threadid_np, (owner, &tid), __FUNCTION__);  // Requires Mac OS 10.6
+  return tid;
+#else
+#error unsupported C library
+#endif
+#endif
+}
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_BASE_MUTEX_INL_H_
diff --git a/runtime/base/mutex.cc b/runtime/base/mutex.cc
index b99e7c9..b048bbb 100644
--- a/runtime/base/mutex.cc
+++ b/runtime/base/mutex.cc
@@ -31,54 +31,6 @@
 
 namespace art {
 
-#if defined(__APPLE__)
-
-// This works on Mac OS 10.6 but hasn't been tested on older releases.
-struct __attribute__((__may_alias__)) darwin_pthread_mutex_t {
-  long padding0;  // NOLINT(runtime/int) exact match to darwin type
-  int padding1;
-  uint32_t padding2;
-  int16_t padding3;
-  int16_t padding4;
-  uint32_t padding5;
-  pthread_t darwin_pthread_mutex_owner;
-  // ...other stuff we don't care about.
-};
-
-struct __attribute__((__may_alias__)) darwin_pthread_rwlock_t {
-  long padding0;  // NOLINT(runtime/int) exact match to darwin type
-  pthread_mutex_t padding1;
-  int padding2;
-  pthread_cond_t padding3;
-  pthread_cond_t padding4;
-  int padding5;
-  int padding6;
-  pthread_t darwin_pthread_rwlock_owner;
-  // ...other stuff we don't care about.
-};
-
-#endif  // __APPLE__
-
-#if defined(__GLIBC__)
-
-struct __attribute__((__may_alias__)) glibc_pthread_mutex_t {
-  int32_t padding0[2];
-  int owner;
-  // ...other stuff we don't care about.
-};
-
-struct __attribute__((__may_alias__)) glibc_pthread_rwlock_t {
-#ifdef __LP64__
-  int32_t padding0[6];
-#else
-  int32_t padding0[7];
-#endif
-  int writer;
-  // ...other stuff we don't care about.
-};
-
-#endif  // __GLIBC__
-
 #if ART_USE_FUTEXES
 static bool ComputeRelativeTimeSpec(timespec* result_ts, const timespec& lhs, const timespec& rhs) {
   const int32_t one_sec = 1000 * 1000 * 1000;  // one second in nanoseconds.
@@ -346,7 +298,7 @@
     bool done = false;
     do {
       int32_t cur_state = state_;
-      if (cur_state == 0) {
+      if (LIKELY(cur_state == 0)) {
         // Change state from 0 to 1.
         done = android_atomic_acquire_cas(0, 1, &state_) == 0;
       } else {
@@ -432,14 +384,14 @@
   bool done = false;
   do {
     int32_t cur_state = state_;
-    if (cur_state == 1) {
+    if (LIKELY(cur_state == 1)) {
       // We're no longer the owner.
       exclusive_owner_ = 0;
       // Change state to 0.
       done = android_atomic_release_cas(cur_state, 0, &state_) == 0;
-      if (done) {  // Spurious fail?
+      if (LIKELY(done)) {  // Spurious fail?
         // Wake a contender
-        if (num_contenders_ > 0) {
+        if (UNLIKELY(num_contenders_ > 0)) {
           futex(&state_, FUTEX_WAKE, 1, NULL, NULL, 0);
         }
       }
@@ -461,41 +413,6 @@
   }
 }
 
-bool Mutex::IsExclusiveHeld(const Thread* self) const {
-  DCHECK(self == NULL || self == Thread::Current());
-  bool result = (GetExclusiveOwnerTid() == SafeGetTid(self));
-  if (kDebugLocking) {
-    // Sanity debug check that if we think it is locked we have it in our held mutexes.
-    if (result && self != NULL && level_ != kMonitorLock && !gAborting) {
-      CHECK_EQ(self->GetHeldMutex(level_), this);
-    }
-  }
-  return result;
-}
-
-uint64_t Mutex::GetExclusiveOwnerTid() const {
-#if ART_USE_FUTEXES
-  return exclusive_owner_;
-#elif defined(__BIONIC__)
-  return static_cast<uint64_t>((mutex_.value >> 16) & 0xffff);
-#elif defined(__GLIBC__)
-  return reinterpret_cast<const glibc_pthread_mutex_t*>(&mutex_)->owner;
-#elif defined(__APPLE__)
-  const darwin_pthread_mutex_t* dpmutex = reinterpret_cast<const darwin_pthread_mutex_t*>(&mutex_);
-  pthread_t owner = dpmutex->darwin_pthread_mutex_owner;
-  // 0 for unowned, -1 for PTHREAD_MTX_TID_SWITCHING
-  // TODO: should we make darwin_pthread_mutex_owner volatile and recheck until not -1?
-  if ((owner == (pthread_t)0) || (owner == (pthread_t)-1)) {
-    return 0;
-  }
-  uint64_t tid;
-  CHECK_PTHREAD_CALL(pthread_threadid_np, (owner, &tid), __FUNCTION__);  // Requires Mac OS 10.6
-  return tid;
-#else
-#error unsupported C library
-#endif
-}
-
 void Mutex::Dump(std::ostream& os) const {
   os << (recursive_ ? "recursive " : "non-recursive ")
       << name_
@@ -549,7 +466,7 @@
   bool done = false;
   do {
     int32_t cur_state = state_;
-    if (cur_state == 0) {
+    if (LIKELY(cur_state == 0)) {
       // Change state from 0 to -1.
       done = android_atomic_acquire_cas(0, -1, &state_) == 0;
     } else {
@@ -583,14 +500,14 @@
   bool done = false;
   do {
     int32_t cur_state = state_;
-    if (cur_state == -1) {
+    if (LIKELY(cur_state == -1)) {
       // We're no longer the owner.
       exclusive_owner_ = 0;
       // Change state from -1 to 0.
       done = android_atomic_release_cas(-1, 0, &state_) == 0;
-      if (done) {  // cmpxchg may fail due to noise?
+      if (LIKELY(done)) {  // cmpxchg may fail due to noise?
         // Wake any waiters.
-        if (num_pending_readers_ > 0 || num_pending_writers_ > 0) {
+        if (UNLIKELY(num_pending_readers_ > 0 || num_pending_writers_ > 0)) {
           futex(&state_, FUTEX_WAKE, -1, NULL, NULL, 0);
         }
       }
@@ -687,18 +604,6 @@
   return true;
 }
 
-bool ReaderWriterMutex::IsExclusiveHeld(const Thread* self) const {
-  DCHECK(self == NULL || self == Thread::Current());
-  bool result = (GetExclusiveOwnerTid() == SafeGetTid(self));
-  if (kDebugLocking) {
-    // Sanity that if the pthread thinks we own the lock the Thread agrees.
-    if (self != NULL && result)  {
-      CHECK_EQ(self->GetHeldMutex(level_), this);
-    }
-  }
-  return result;
-}
-
 bool ReaderWriterMutex::IsSharedHeld(const Thread* self) const {
   DCHECK(self == NULL || self == Thread::Current());
   bool result;
@@ -710,37 +615,6 @@
   return result;
 }
 
-uint64_t ReaderWriterMutex::GetExclusiveOwnerTid() const {
-#if ART_USE_FUTEXES
-  int32_t state = state_;
-  if (state == 0) {
-    return 0;  // No owner.
-  } else if (state > 0) {
-    return -1;  // Shared.
-  } else {
-    return exclusive_owner_;
-  }
-#else
-#if defined(__BIONIC__)
-  return rwlock_.writerThreadId;
-#elif defined(__GLIBC__)
-  return reinterpret_cast<const glibc_pthread_rwlock_t*>(&rwlock_)->writer;
-#elif defined(__APPLE__)
-  const darwin_pthread_rwlock_t*
-      dprwlock = reinterpret_cast<const darwin_pthread_rwlock_t*>(&rwlock_);
-  pthread_t owner = dprwlock->darwin_pthread_rwlock_owner;
-  if (owner == (pthread_t)0) {
-    return 0;
-  }
-  uint64_t tid;
-  CHECK_PTHREAD_CALL(pthread_threadid_np, (owner, &tid), __FUNCTION__);  // Requires Mac OS 10.6
-  return tid;
-#else
-#error unsupported C library
-#endif
-#endif
-}
-
 void ReaderWriterMutex::Dump(std::ostream& os) const {
   os << name_
       << " level=" << static_cast<int>(level_)
diff --git a/runtime/gc/space/large_object_space.cc b/runtime/gc/space/large_object_space.cc
index a174c0a..c6d028e 100644
--- a/runtime/gc/space/large_object_space.cc
+++ b/runtime/gc/space/large_object_space.cc
@@ -17,6 +17,7 @@
 #include "large_object_space.h"
 
 #include "base/logging.h"
+#include "base/mutex-inl.h"
 #include "base/stl_util.h"
 #include "UniquePtr.h"
 #include "image.h"
diff --git a/runtime/indirect_reference_table_test.cc b/runtime/indirect_reference_table_test.cc
index bd2890c..b6c6cb4 100644
--- a/runtime/indirect_reference_table_test.cc
+++ b/runtime/indirect_reference_table_test.cc
@@ -17,6 +17,7 @@
 #include "common_test.h"
 
 #include "indirect_reference_table.h"
+#include "mirror/object-inl.h"
 
 namespace art {
 
diff --git a/runtime/mirror/array-inl.h b/runtime/mirror/array-inl.h
index eb73c7d..c7b370f 100644
--- a/runtime/mirror/array-inl.h
+++ b/runtime/mirror/array-inl.h
@@ -20,6 +20,8 @@
 #include "array.h"
 
 #include "class.h"
+#include "thread.h"
+#include "utils.h"
 
 namespace art {
 namespace mirror {
@@ -33,6 +35,39 @@
   return header_size + data_size;
 }
 
+inline Array* Array::Alloc(Thread* self, Class* array_class, int32_t component_count,
+                           size_t component_size) {
+  DCHECK(array_class != NULL);
+  DCHECK_GE(component_count, 0);
+  DCHECK(array_class->IsArrayClass());
+
+  size_t header_size = sizeof(Object) + (component_size == sizeof(int64_t) ? 8 : 4);
+  size_t data_size = component_count * component_size;
+  size_t size = header_size + data_size;
+
+  // Check for overflow and throw OutOfMemoryError if this was an unreasonable request.
+  size_t component_shift = sizeof(size_t) * 8 - 1 - CLZ(component_size);
+  if (UNLIKELY(data_size >> component_shift != size_t(component_count) || size < data_size)) {
+    self->ThrowOutOfMemoryError(StringPrintf("%s of length %d would overflow",
+                                             PrettyDescriptor(array_class).c_str(),
+                                             component_count).c_str());
+    return NULL;
+  }
+
+  gc::Heap* heap = Runtime::Current()->GetHeap();
+  Array* array = down_cast<Array*>(heap->AllocObject(self, array_class, size));
+  if (LIKELY(array != NULL)) {
+    DCHECK(array->IsArrayInstance());
+    array->SetLength(component_count);
+  }
+  return array;
+}
+
+inline Array* Array::Alloc(Thread* self, Class* array_class, int32_t component_count) {
+  DCHECK(array_class->IsArrayClass());
+  return Alloc(self, array_class, component_count, array_class->GetComponentSize());
+}
+
 }  // namespace mirror
 }  // namespace art
 
diff --git a/runtime/mirror/array.cc b/runtime/mirror/array.cc
index 88cd309..020085d 100644
--- a/runtime/mirror/array.cc
+++ b/runtime/mirror/array.cc
@@ -32,39 +32,6 @@
 namespace art {
 namespace mirror {
 
-Array* Array::Alloc(Thread* self, Class* array_class, int32_t component_count,
-                    size_t component_size) {
-  DCHECK(array_class != NULL);
-  DCHECK_GE(component_count, 0);
-  DCHECK(array_class->IsArrayClass());
-
-  size_t header_size = sizeof(Object) + (component_size == sizeof(int64_t) ? 8 : 4);
-  size_t data_size = component_count * component_size;
-  size_t size = header_size + data_size;
-
-  // Check for overflow and throw OutOfMemoryError if this was an unreasonable request.
-  size_t component_shift = sizeof(size_t) * 8 - 1 - CLZ(component_size);
-  if (UNLIKELY(data_size >> component_shift != size_t(component_count) || size < data_size)) {
-    self->ThrowOutOfMemoryError(StringPrintf("%s of length %d would overflow",
-                                             PrettyDescriptor(array_class).c_str(),
-                                             component_count).c_str());
-    return NULL;
-  }
-
-  gc::Heap* heap = Runtime::Current()->GetHeap();
-  Array* array = down_cast<Array*>(heap->AllocObject(self, array_class, size));
-  if (array != NULL) {
-    DCHECK(array->IsArrayInstance());
-    array->SetLength(component_count);
-  }
-  return array;
-}
-
-Array* Array::Alloc(Thread* self, Class* array_class, int32_t component_count) {
-  DCHECK(array_class->IsArrayClass());
-  return Alloc(self, array_class, component_count, array_class->GetComponentSize());
-}
-
 // Create a multi-dimensional array of Objects or primitive types.
 //
 // We have to generate the names for X[], X[][], X[][][], and so on.  The
diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index 1e11387..438ce81 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h
@@ -342,6 +342,15 @@
   SetFieldObject(OFFSET_OF_OBJECT_MEMBER(Class, name_), name, false);
 }
 
+inline Object* Class::AllocObject(Thread* self) {
+  DCHECK(!IsArrayClass()) << PrettyClass(this);
+  DCHECK(IsInstantiable()) << PrettyClass(this);
+  // TODO: decide whether we want this check. It currently fails during bootstrap.
+  // DCHECK(!Runtime::Current()->IsStarted() || IsInitializing()) << PrettyClass(this);
+  DCHECK_GE(this->object_size_, sizeof(Object));
+  return Runtime::Current()->GetHeap()->AllocObject(self, this, this->object_size_);
+}
+
 }  // namespace mirror
 }  // namespace art
 
diff --git a/runtime/mirror/class.cc b/runtime/mirror/class.cc
index 5e8b827..328c67d 100644
--- a/runtime/mirror/class.cc
+++ b/runtime/mirror/class.cc
@@ -118,15 +118,6 @@
   SetFieldObject(OFFSET_OF_OBJECT_MEMBER(Class, dex_cache_), new_dex_cache, false);
 }
 
-Object* Class::AllocObject(Thread* self) {
-  DCHECK(!IsArrayClass()) << PrettyClass(this);
-  DCHECK(IsInstantiable()) << PrettyClass(this);
-  // TODO: decide whether we want this check. It currently fails during bootstrap.
-  // DCHECK(!Runtime::Current()->IsStarted() || IsInitializing()) << PrettyClass(this);
-  DCHECK_GE(this->object_size_, sizeof(Object));
-  return Runtime::Current()->GetHeap()->AllocObject(self, this, this->object_size_);
-}
-
 void Class::SetClassSize(size_t new_class_size) {
   DCHECK_GE(new_class_size, GetClassSize()) << " class=" << PrettyTypeOf(this);
   SetField32(OFFSET_OF_OBJECT_MEMBER(Class, class_size_), new_class_size, false);
diff --git a/runtime/mirror/stack_trace_element.cc b/runtime/mirror/stack_trace_element.cc
index a505ed0..9d76c6b 100644
--- a/runtime/mirror/stack_trace_element.cc
+++ b/runtime/mirror/stack_trace_element.cc
@@ -17,6 +17,7 @@
 #include "stack_trace_element.h"
 
 #include "class.h"
+#include "class-inl.h"
 #include "gc/accounting/card_table-inl.h"
 #include "object-inl.h"
 #include "string.h"
diff --git a/runtime/mirror/string.cc b/runtime/mirror/string.cc
index f8a0e53..7d968c7 100644
--- a/runtime/mirror/string.cc
+++ b/runtime/mirror/string.cc
@@ -17,6 +17,7 @@
 #include "string.h"
 
 #include "array.h"
+#include "class-inl.h"
 #include "gc/accounting/card_table-inl.h"
 #include "intern_table.h"
 #include "object-inl.h"
@@ -285,4 +286,3 @@
 
 }  // namespace mirror
 }  // namespace art
-
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index 671924a..aba81fe 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -21,6 +21,7 @@
 #include <unistd.h>
 
 #include "base/mutex.h"
+#include "base/mutex-inl.h"
 #include "base/timing_logger.h"
 #include "debugger.h"
 #include "thread.h"