X86_64: Add allocation entrypoint switching for CC is_marking

Only X86_64 done so far. Use normal TLAB allocators if GC is not
marking.

Allocation speed goes up by ~8% based on perf sampling.

Without change:
1.19%: art_quick_alloc_object_region_tlab

With change:
0.63%: art_quick_alloc_object_tlab
0.47%: art_quick_alloc_object_region_tlab

Bug: 31018974
Bug: 12687968

Test: test-art-host-run-test

Change-Id: I4c4d9eb229d4ad2f41b856ba5c2958a5eb3b7ffa
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index 0c671d2..3a8e29b 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -854,6 +854,10 @@
         allocator_type != kAllocatorTypeRegionTLAB;
   }
   static ALWAYS_INLINE bool AllocatorMayHaveConcurrentGC(AllocatorType allocator_type) {
+    if (kUseReadBarrier) {
+      // Read barrier may have the TLAB allocator but is always concurrent. TODO: clean this up.
+      return true;
+    }
     return
         allocator_type != kAllocatorTypeBumpPointer &&
         allocator_type != kAllocatorTypeTLAB;
@@ -923,11 +927,20 @@
                                               size_t* bytes_tl_bulk_allocated)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
+  mirror::Object* AllocWithNewTLAB(Thread* self,
+                                   size_t alloc_size,
+                                   bool grow,
+                                   size_t* bytes_allocated,
+                                   size_t* usable_size,
+                                   size_t* bytes_tl_bulk_allocated)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
   void ThrowOutOfMemoryError(Thread* self, size_t byte_count, AllocatorType allocator_type)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  template <bool kGrow>
-  ALWAYS_INLINE bool IsOutOfMemoryOnAllocation(AllocatorType allocator_type, size_t alloc_size);
+  ALWAYS_INLINE bool IsOutOfMemoryOnAllocation(AllocatorType allocator_type,
+                                               size_t alloc_size,
+                                               bool grow);
 
   // Run the finalizers. If timeout is non zero, then we use the VMRuntime version.
   void RunFinalization(JNIEnv* env, uint64_t timeout);