X86_64: Add allocation entrypoint switching for CC is_marking

Only X86_64 done so far. Use normal TLAB allocators if GC is not
marking.

Allocation speed goes up by ~8% based on perf sampling.

Without change:
1.19%: art_quick_alloc_object_region_tlab

With change:
0.63%: art_quick_alloc_object_tlab
0.47%: art_quick_alloc_object_region_tlab

Bug: 31018974
Bug: 12687968

Test: test-art-host-run-test

Change-Id: I4c4d9eb229d4ad2f41b856ba5c2958a5eb3b7ffa
diff --git a/runtime/entrypoints/quick/quick_alloc_entrypoints.cc b/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
index 515fcbf..4a7e819 100644
--- a/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
@@ -258,7 +258,7 @@
   entry_points_instrumented = instrumented;
 }
 
-void ResetQuickAllocEntryPoints(QuickEntryPoints* qpoints) {
+void ResetQuickAllocEntryPoints(QuickEntryPoints* qpoints, bool is_marking) {
 #if !defined(__APPLE__) || !defined(__LP64__)
   switch (entry_points_allocator) {
     case gc::kAllocatorTypeDlMalloc: {
@@ -286,7 +286,12 @@
     }
     case gc::kAllocatorTypeRegionTLAB: {
       CHECK(kMovingCollector);
-      SetQuickAllocEntryPoints_region_tlab(qpoints, entry_points_instrumented);
+      if (is_marking) {
+        SetQuickAllocEntryPoints_region_tlab(qpoints, entry_points_instrumented);
+      } else {
+        // Not marking means we need no read barriers and can just use the normal TLAB case.
+        SetQuickAllocEntryPoints_tlab(qpoints, entry_points_instrumented);
+      }
       return;
     }
     default:
diff --git a/runtime/entrypoints/quick/quick_alloc_entrypoints.h b/runtime/entrypoints/quick/quick_alloc_entrypoints.h
index 14a8e04..bd1e295 100644
--- a/runtime/entrypoints/quick/quick_alloc_entrypoints.h
+++ b/runtime/entrypoints/quick/quick_alloc_entrypoints.h
@@ -23,7 +23,9 @@
 
 namespace art {
 
-void ResetQuickAllocEntryPoints(QuickEntryPoints* qpoints);
+// is_marking is only used for CC, if the GC is marking the allocation entrypoint is the marking
+// one.
+void ResetQuickAllocEntryPoints(QuickEntryPoints* qpoints, bool is_marking);
 
 // Runtime shutdown lock is necessary to prevent races in thread initialization. When the thread is
 // starting it doesn't hold the mutator lock until after it has been added to the thread list.
diff --git a/runtime/entrypoints/quick/quick_default_init_entrypoints.h b/runtime/entrypoints/quick/quick_default_init_entrypoints.h
index df23f94..78dad94 100644
--- a/runtime/entrypoints/quick/quick_default_init_entrypoints.h
+++ b/runtime/entrypoints/quick/quick_default_init_entrypoints.h
@@ -31,7 +31,7 @@
   jpoints->pDlsymLookup = art_jni_dlsym_lookup_stub;
 
   // Alloc
-  ResetQuickAllocEntryPoints(qpoints);
+  ResetQuickAllocEntryPoints(qpoints, /* is_marking */ true);
 
   // DexCache
   qpoints->pInitializeStaticStorage = art_quick_initialize_static_storage;