Revert CC related changes.

Revert: "X86_64: Add allocation entrypoint switching for CC is_marking"
Revert: "Fix mips build in InitEntryPoints"
Revert: "Fix mac build in ResetQuickAllocEntryPoints"

Test: test-art-target-run-test
Change-Id: If38d44edf8c5def5c4d8c9419e4af0cd8d3be724
diff --git a/runtime/arch/mips/entrypoints_init_mips.cc b/runtime/arch/mips/entrypoints_init_mips.cc
index 5c56923..6a442a5 100644
--- a/runtime/arch/mips/entrypoints_init_mips.cc
+++ b/runtime/arch/mips/entrypoints_init_mips.cc
@@ -71,7 +71,7 @@
   jpoints->pDlsymLookup = art_jni_dlsym_lookup_stub;
 
   // Alloc
-  ResetQuickAllocEntryPoints(qpoints, /*is_marking*/ false);
+  ResetQuickAllocEntryPoints(qpoints);
 
   // Cast
   qpoints->pInstanceofNonTrivial = artInstanceOfFromCode;
diff --git a/runtime/arch/quick_alloc_entrypoints.S b/runtime/arch/quick_alloc_entrypoints.S
index db2fdca..fa86bf4 100644
--- a/runtime/arch/quick_alloc_entrypoints.S
+++ b/runtime/arch/quick_alloc_entrypoints.S
@@ -107,28 +107,7 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB)
 .endm
 
-.macro GENERATE_ALLOC_ENTRYPOINTS_FOR_TLAB_ALLOCATOR
-// This is to be separately defined for each architecture to allow a hand-written assembly fast path.
-// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_tlab, TLAB)
-.endm
-
 .macro GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_REGION_TLAB_ALLOCATORS
-GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_TLAB_ALLOCATORS
-GENERATE_ALLOC_ENTRYPOINTS_FOR_TLAB_ALLOCATOR
-.endm
-
-.macro GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_TLAB_ALLOCATORS
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_dlmalloc, DlMalloc)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_dlmalloc, DlMalloc)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_dlmalloc, DlMalloc)
@@ -208,6 +187,20 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_bump_pointer_instrumented, BumpPointerInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_bump_pointer_instrumented, BumpPointerInstrumented)
 
+// This is to be separately defined for each architecture to allow a hand-written assembly fast path.
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_tlab, TLAB)
+
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab_instrumented, TLABInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab_instrumented, TLABInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab_instrumented, TLABInstrumented)
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 6fbc954..fb405fa 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -1085,12 +1085,15 @@
     RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER             // return or deliver exception
 END_MACRO
 
-// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB). May be called
-// for CC if the GC is not marking.
+// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB).
 DEFINE_FUNCTION art_quick_alloc_object_tlab
     // Fast path tlab allocation.
     // EAX: uint32_t type_idx/return value, ECX: ArtMethod*.
     // EBX, EDX: free.
+#if defined(USE_READ_BARRIER)
+    int3
+    int3
+#endif
     PUSH esi
     PUSH edi
     movl ART_METHOD_DEX_CACHE_TYPES_OFFSET_32(%ecx), %edx   // Load dex cache resolved types array
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index f8066e4..860b77e 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -18,13 +18,6 @@
 
 #include "arch/quick_alloc_entrypoints.S"
 
-MACRO0(ASSERT_USE_READ_BARRIER)
-#if !defined(USE_READ_BARRIER)
-    int3
-    int3
-#endif
-END_MACRO
-
 MACRO0(SETUP_FP_CALLEE_SAVE_FRAME)
     // Create space for ART FP callee-saved registers
     subq MACRO_LITERAL(4 * 8), %rsp
@@ -979,10 +972,8 @@
 END_MACRO
 
 // Generate the allocation entrypoints for each allocator.
-GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_TLAB_ALLOCATORS
-
+GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_REGION_TLAB_ALLOCATORS
 // Comment out allocators that have x86_64 specific asm.
-// Region TLAB:
 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB)
 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB)
 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB)
@@ -995,19 +986,6 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab, RegionTLAB)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB)
-// Normal TLAB:
-// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB)
-// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB)
-// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab, TLAB)
-// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_tlab, TLAB)
-// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_tlab, TLAB)
 
 // A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc).
 DEFINE_FUNCTION art_quick_alloc_object_rosalloc
@@ -1184,11 +1162,16 @@
     RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER                    // return or deliver exception
 END_MACRO
 
-// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB). May be
-// called with CC if the GC is not active.
+// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB).
 DEFINE_FUNCTION art_quick_alloc_object_tlab
+    // Fast path tlab allocation.
     // RDI: uint32_t type_idx, RSI: ArtMethod*
     // RDX, RCX, R8, R9: free. RAX: return val.
+#if defined(USE_READ_BARRIER)
+    int3
+    int3
+#endif
+    // Might need a special macro since rsi and edx is 32b/64b mismatched.
     movq ART_METHOD_DEX_CACHE_TYPES_OFFSET_64(%rsi), %rdx  // Load dex cache resolved types array
     // Might need to break down into multiple instructions to get the base address in a register.
                                                                // Load the class
@@ -1198,69 +1181,29 @@
     ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeTLAB
 END_FUNCTION art_quick_alloc_object_tlab
 
-// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB). May be
-// called with CC if the GC is not active.
-DEFINE_FUNCTION art_quick_alloc_object_resolved_tlab
-    // RDI: mirror::Class* klass, RSI: ArtMethod*
-    // RDX, RCX, R8, R9: free. RAX: return val.
-    movq %rdi, %rdx
-    ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lart_quick_alloc_object_resolved_tlab_slow_path
-.Lart_quick_alloc_object_resolved_tlab_slow_path:
-    ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeResolvedTLAB
-END_FUNCTION art_quick_alloc_object_resolved_tlab
-
-// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB).
-// May be called with CC if the GC is not active.
-DEFINE_FUNCTION art_quick_alloc_object_initialized_tlab
-    // RDI: mirror::Class* klass, RSI: ArtMethod*
-    // RDX, RCX, R8, R9: free. RAX: return val.
-    movq %rdi, %rdx
-    ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH .Lart_quick_alloc_object_initialized_tlab_slow_path
-.Lart_quick_alloc_object_initialized_tlab_slow_path:
-    ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeInitializedTLAB
-END_FUNCTION art_quick_alloc_object_initialized_tlab
-
-// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_tlab, TLAB).
-DEFINE_FUNCTION art_quick_alloc_array_tlab
-    // RDI: uint32_t type_idx, RSI: int32_t component_count, RDX: ArtMethod*
-    // RCX: klass, R8, R9: free. RAX: return val.
-    movq ART_METHOD_DEX_CACHE_TYPES_OFFSET_64(%rdx), %rcx      // Load dex cache resolved types array
-    movl 0(%rcx, %rdi, COMPRESSED_REFERENCE_SIZE), %ecx        // Load the class
-    testl %ecx, %ecx
-    jz .Lart_quick_alloc_array_tlab_slow_path
-    ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED .Lart_quick_alloc_array_tlab_slow_path
-.Lart_quick_alloc_array_tlab_slow_path:
-    ALLOC_ARRAY_TLAB_SLOW_PATH artAllocArrayFromCodeTLAB
-END_FUNCTION art_quick_alloc_array_tlab
-
-// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab, TLAB).
-DEFINE_FUNCTION art_quick_alloc_array_resolved_tlab
-    // RDI: mirror::Class* klass, RSI: int32_t component_count, RDX: ArtMethod*
-    // RCX: mirror::Class* klass, R8, R9: free. RAX: return val.
-    movq %rdi, %rcx
-    // Already resolved, no null check.
-    ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED .Lart_quick_alloc_array_resolved_tlab_slow_path
-.Lart_quick_alloc_array_resolved_tlab_slow_path:
-    ALLOC_ARRAY_TLAB_SLOW_PATH artAllocArrayFromCodeResolvedTLAB
-END_FUNCTION art_quick_alloc_array_resolved_tlab
-
 // A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_region_tlab, RegionTLAB).
 DEFINE_FUNCTION art_quick_alloc_array_region_tlab
     // Fast path region tlab allocation.
     // RDI: uint32_t type_idx, RSI: int32_t component_count, RDX: ArtMethod*
     // RCX: klass, R8, R9: free. RAX: return val.
-    ASSERT_USE_READ_BARRIER
+#if !defined(USE_READ_BARRIER)
+    int3
+    int3
+#endif
     movq ART_METHOD_DEX_CACHE_TYPES_OFFSET_64(%rdx), %rcx      // Load dex cache resolved types array
     movl 0(%rcx, %rdi, COMPRESSED_REFERENCE_SIZE), %ecx        // Load the class
     // Null check so that we can load the lock word.
     testl %ecx, %ecx
     jz .Lart_quick_alloc_array_region_tlab_slow_path
-    // Since we have allocation entrypoint switching, we know the GC is marking.
-    // Check the mark bit, if it is 0, do the read barrier mark.
-    testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(%ecx)
-    jz .Lart_quick_alloc_array_region_tlab_class_load_read_barrier_slow_path
+
+    cmpl LITERAL(0), %gs:THREAD_IS_GC_MARKING_OFFSET
+    jne .Lart_quick_alloc_array_region_tlab_class_load_read_barrier_marking
 .Lart_quick_alloc_array_region_tlab_class_load_read_barrier_slow_path_exit:
     ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED .Lart_quick_alloc_array_region_tlab_slow_path
+.Lart_quick_alloc_array_region_tlab_class_load_read_barrier_marking:
+    // Check the mark bit, if it is 1 return.
+    testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(%ecx)
+    jnz .Lart_quick_alloc_array_region_tlab_class_load_read_barrier_slow_path_exit
 .Lart_quick_alloc_array_region_tlab_class_load_read_barrier_slow_path:
     // The read barrier slow path. Mark the class.
     PUSH rdi
@@ -1283,11 +1226,33 @@
     // Fast path region tlab allocation.
     // RDI: mirror::Class* klass, RSI: int32_t component_count, RDX: ArtMethod*
     // RCX: mirror::Class* klass, R8, R9: free. RAX: return val.
-    ASSERT_USE_READ_BARRIER
+#if !defined(USE_READ_BARRIER)
+    int3
+    int3
+#endif
     movq %rdi, %rcx
-    // Caller is responsible for read barrier.
     // Already resolved, no null check.
+    cmpl LITERAL(0), %gs:THREAD_IS_GC_MARKING_OFFSET
+    jne .Lart_quick_alloc_array_resolved_region_tlab_class_load_read_barrier_marking
+.Lart_quick_alloc_array_resolved_region_tlab_class_load_read_barrier_slow_path_exit:
     ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED .Lart_quick_alloc_array_resolved_region_tlab_slow_path
+.Lart_quick_alloc_array_resolved_region_tlab_class_load_read_barrier_marking:
+    // Check the mark bit, if it is 1 return.
+    testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(%ecx)
+    jnz .Lart_quick_alloc_array_region_tlab_class_load_read_barrier_slow_path_exit
+.Lart_quick_alloc_array_resolved_region_tlab_class_load_read_barrier_slow_path:
+    // The read barrier slow path. Mark the class.
+    PUSH rdi
+    PUSH rsi
+    PUSH rdx
+    // Outgoing argument set up
+    movq %rcx, %rdi                                            // Pass the class as the first param.
+    call SYMBOL(artReadBarrierMark)                            // cxx_name(mirror::Object* obj)
+    movq %rax, %rcx
+    POP rdx
+    POP rsi
+    POP rdi
+    jmp .Lart_quick_alloc_array_resolved_region_tlab_class_load_read_barrier_slow_path_exit
 .Lart_quick_alloc_array_resolved_region_tlab_slow_path:
     ALLOC_ARRAY_TLAB_SLOW_PATH artAllocArrayFromCodeResolvedRegionTLAB
 END_FUNCTION art_quick_alloc_array_resolved_region_tlab
@@ -1297,19 +1262,24 @@
     // Fast path region tlab allocation.
     // RDI: uint32_t type_idx, RSI: ArtMethod*
     // RDX, RCX, R8, R9: free. RAX: return val.
-    ASSERT_USE_READ_BARRIER
+#if !defined(USE_READ_BARRIER)
+    int3
+    int3
+#endif
     movq ART_METHOD_DEX_CACHE_TYPES_OFFSET_64(%rsi), %rdx  // Load dex cache resolved types array
     movl 0(%rdx, %rdi, COMPRESSED_REFERENCE_SIZE), %edx    // Load the class
     // Null check so that we can load the lock word.
     testl %edx, %edx
     jz .Lart_quick_alloc_object_region_tlab_slow_path
-    // Since we have allocation entrypoint switching, we know the GC is marking.
-    // Check the mark bit, if it is 0, do the read barrier mark.
-    testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(%edx)
-    jz .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path
+    // Test if the GC is marking.
+    cmpl LITERAL(0), %gs:THREAD_IS_GC_MARKING_OFFSET
+    jne .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_marking
 .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit:
-    // Use resolved one since we already did the null check.
-    ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lart_quick_alloc_object_region_tlab_slow_path
+    ALLOC_OBJECT_TLAB_FAST_PATH .Lart_quick_alloc_object_region_tlab_slow_path
+.Lart_quick_alloc_object_region_tlab_class_load_read_barrier_marking:
+    // Check the mark bit, if it is 1 avoid the read barrier.
+    testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(%edx)
+    jnz .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit
 .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path:
     // The read barrier slow path. Mark the class.
     PUSH rdi
@@ -1332,7 +1302,10 @@
     // Fast path region tlab allocation.
     // RDI: mirror::Class* klass, RSI: ArtMethod*
     // RDX, RCX, R8, R9: free. RAX: return val.
-    ASSERT_USE_READ_BARRIER
+#if !defined(USE_READ_BARRIER)
+    int3
+    int3
+#endif
     // No read barrier since the caller is responsible for that.
     movq %rdi, %rdx
     ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lart_quick_alloc_object_resolved_region_tlab_slow_path
@@ -1345,7 +1318,10 @@
     // Fast path region tlab allocation.
     // RDI: mirror::Class* klass, RSI: ArtMethod*
     // RDX, RCX, R8, R9: free. RAX: return val.
-    ASSERT_USE_READ_BARRIER
+#if !defined(USE_READ_BARRIER)
+    int3
+    int3
+#endif
     movq %rdi, %rdx
     // No read barrier since the caller is responsible for that.
     ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH .Lart_quick_alloc_object_initialized_region_tlab_slow_path
diff --git a/runtime/entrypoints/quick/quick_alloc_entrypoints.cc b/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
index 82bb8e5..397655a 100644
--- a/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
@@ -292,7 +292,7 @@
   entry_points_instrumented = instrumented;
 }
 
-void ResetQuickAllocEntryPoints(QuickEntryPoints* qpoints, bool is_marking) {
+void ResetQuickAllocEntryPoints(QuickEntryPoints* qpoints) {
 #if !defined(__APPLE__) || !defined(__LP64__)
   switch (entry_points_allocator) {
     case gc::kAllocatorTypeDlMalloc: {
@@ -320,12 +320,7 @@
     }
     case gc::kAllocatorTypeRegionTLAB: {
       CHECK(kMovingCollector);
-      if (is_marking) {
-        SetQuickAllocEntryPoints_region_tlab(qpoints, entry_points_instrumented);
-      } else {
-        // Not marking means we need no read barriers and can just use the normal TLAB case.
-        SetQuickAllocEntryPoints_tlab(qpoints, entry_points_instrumented);
-      }
+      SetQuickAllocEntryPoints_region_tlab(qpoints, entry_points_instrumented);
       return;
     }
     default:
@@ -333,7 +328,6 @@
   }
 #else
   UNUSED(qpoints);
-  UNUSED(is_marking);
 #endif
   UNIMPLEMENTED(FATAL);
   UNREACHABLE();
diff --git a/runtime/entrypoints/quick/quick_alloc_entrypoints.h b/runtime/entrypoints/quick/quick_alloc_entrypoints.h
index bd1e295..14a8e04 100644
--- a/runtime/entrypoints/quick/quick_alloc_entrypoints.h
+++ b/runtime/entrypoints/quick/quick_alloc_entrypoints.h
@@ -23,9 +23,7 @@
 
 namespace art {
 
-// is_marking is only used for CC, if the GC is marking the allocation entrypoint is the marking
-// one.
-void ResetQuickAllocEntryPoints(QuickEntryPoints* qpoints, bool is_marking);
+void ResetQuickAllocEntryPoints(QuickEntryPoints* qpoints);
 
 // Runtime shutdown lock is necessary to prevent races in thread initialization. When the thread is
 // starting it doesn't hold the mutator lock until after it has been added to the thread list.
diff --git a/runtime/entrypoints/quick/quick_default_init_entrypoints.h b/runtime/entrypoints/quick/quick_default_init_entrypoints.h
index 78dad94..df23f94 100644
--- a/runtime/entrypoints/quick/quick_default_init_entrypoints.h
+++ b/runtime/entrypoints/quick/quick_default_init_entrypoints.h
@@ -31,7 +31,7 @@
   jpoints->pDlsymLookup = art_jni_dlsym_lookup_stub;
 
   // Alloc
-  ResetQuickAllocEntryPoints(qpoints, /* is_marking */ true);
+  ResetQuickAllocEntryPoints(qpoints);
 
   // DexCache
   qpoints->pInitializeStaticStorage = art_quick_initialize_static_storage;
diff --git a/runtime/gc/heap-inl.h b/runtime/gc/heap-inl.h
index 54f2210..97129e8 100644
--- a/runtime/gc/heap-inl.h
+++ b/runtime/gc/heap-inl.h
@@ -247,7 +247,7 @@
   if (allocator_type != kAllocatorTypeTLAB &&
       allocator_type != kAllocatorTypeRegionTLAB &&
       allocator_type != kAllocatorTypeRosAlloc &&
-      UNLIKELY(IsOutOfMemoryOnAllocation(allocator_type, alloc_size, kGrow))) {
+      UNLIKELY(IsOutOfMemoryOnAllocation<kGrow>(allocator_type, alloc_size))) {
     return nullptr;
   }
   mirror::Object* ret;
@@ -267,9 +267,8 @@
       if (kInstrumented && UNLIKELY(is_running_on_memory_tool_)) {
         // If running on valgrind or asan, we should be using the instrumented path.
         size_t max_bytes_tl_bulk_allocated = rosalloc_space_->MaxBytesBulkAllocatedFor(alloc_size);
-        if (UNLIKELY(IsOutOfMemoryOnAllocation(allocator_type,
-                                               max_bytes_tl_bulk_allocated,
-                                               kGrow))) {
+        if (UNLIKELY(IsOutOfMemoryOnAllocation<kGrow>(allocator_type,
+                                                      max_bytes_tl_bulk_allocated))) {
           return nullptr;
         }
         ret = rosalloc_space_->Alloc(self, alloc_size, bytes_allocated, usable_size,
@@ -278,18 +277,14 @@
         DCHECK(!is_running_on_memory_tool_);
         size_t max_bytes_tl_bulk_allocated =
             rosalloc_space_->MaxBytesBulkAllocatedForNonvirtual(alloc_size);
-        if (UNLIKELY(IsOutOfMemoryOnAllocation(allocator_type,
-                                               max_bytes_tl_bulk_allocated,
-                                               kGrow))) {
+        if (UNLIKELY(IsOutOfMemoryOnAllocation<kGrow>(allocator_type,
+                                                      max_bytes_tl_bulk_allocated))) {
           return nullptr;
         }
         if (!kInstrumented) {
           DCHECK(!rosalloc_space_->CanAllocThreadLocal(self, alloc_size));
         }
-        ret = rosalloc_space_->AllocNonvirtual(self,
-                                               alloc_size,
-                                               bytes_allocated,
-                                               usable_size,
+        ret = rosalloc_space_->AllocNonvirtual(self, alloc_size, bytes_allocated, usable_size,
                                                bytes_tl_bulk_allocated);
       }
       break;
@@ -297,34 +292,22 @@
     case kAllocatorTypeDlMalloc: {
       if (kInstrumented && UNLIKELY(is_running_on_memory_tool_)) {
         // If running on valgrind, we should be using the instrumented path.
-        ret = dlmalloc_space_->Alloc(self,
-                                     alloc_size,
-                                     bytes_allocated,
-                                     usable_size,
+        ret = dlmalloc_space_->Alloc(self, alloc_size, bytes_allocated, usable_size,
                                      bytes_tl_bulk_allocated);
       } else {
         DCHECK(!is_running_on_memory_tool_);
-        ret = dlmalloc_space_->AllocNonvirtual(self,
-                                               alloc_size,
-                                               bytes_allocated,
-                                               usable_size,
+        ret = dlmalloc_space_->AllocNonvirtual(self, alloc_size, bytes_allocated, usable_size,
                                                bytes_tl_bulk_allocated);
       }
       break;
     }
     case kAllocatorTypeNonMoving: {
-      ret = non_moving_space_->Alloc(self,
-                                     alloc_size,
-                                     bytes_allocated,
-                                     usable_size,
+      ret = non_moving_space_->Alloc(self, alloc_size, bytes_allocated, usable_size,
                                      bytes_tl_bulk_allocated);
       break;
     }
     case kAllocatorTypeLOS: {
-      ret = large_object_space_->Alloc(self,
-                                       alloc_size,
-                                       bytes_allocated,
-                                       usable_size,
+      ret = large_object_space_->Alloc(self, alloc_size, bytes_allocated, usable_size,
                                        bytes_tl_bulk_allocated);
       // Note that the bump pointer spaces aren't necessarily next to
       // the other continuous spaces like the non-moving alloc space or
@@ -332,38 +315,80 @@
       DCHECK(ret == nullptr || large_object_space_->Contains(ret));
       break;
     }
-    case kAllocatorTypeRegion: {
-      DCHECK(region_space_ != nullptr);
-      alloc_size = RoundUp(alloc_size, space::RegionSpace::kAlignment);
-      ret = region_space_->AllocNonvirtual<false>(alloc_size,
-                                                  bytes_allocated,
-                                                  usable_size,
-                                                  bytes_tl_bulk_allocated);
-      break;
-    }
-    case kAllocatorTypeTLAB:
-      FALLTHROUGH_INTENDED;
-    case kAllocatorTypeRegionTLAB: {
-      DCHECK_ALIGNED(alloc_size, kObjectAlignment);
-      static_assert(space::RegionSpace::kAlignment == space::BumpPointerSpace::kAlignment,
-                    "mismatched alignments");
-      static_assert(kObjectAlignment == space::BumpPointerSpace::kAlignment,
-                    "mismatched alignments");
+    case kAllocatorTypeTLAB: {
+      DCHECK_ALIGNED(alloc_size, space::BumpPointerSpace::kAlignment);
       if (UNLIKELY(self->TlabSize() < alloc_size)) {
-        // kAllocatorTypeTLAB may be the allocator for region space TLAB if the GC is not marking,
-        // that is why the allocator is not passed down.
-        return AllocWithNewTLAB(self,
-                                alloc_size,
-                                kGrow,
-                                bytes_allocated,
-                                usable_size,
-                                bytes_tl_bulk_allocated);
+        const size_t new_tlab_size = alloc_size + kDefaultTLABSize;
+        if (UNLIKELY(IsOutOfMemoryOnAllocation<kGrow>(allocator_type, new_tlab_size))) {
+          return nullptr;
+        }
+        // Try allocating a new thread local buffer, if the allocaiton fails the space must be
+        // full so return null.
+        if (!bump_pointer_space_->AllocNewTlab(self, new_tlab_size)) {
+          return nullptr;
+        }
+        *bytes_tl_bulk_allocated = new_tlab_size;
+      } else {
+        *bytes_tl_bulk_allocated = 0;
       }
       // The allocation can't fail.
       ret = self->AllocTlab(alloc_size);
       DCHECK(ret != nullptr);
       *bytes_allocated = alloc_size;
-      *bytes_tl_bulk_allocated = 0;  // Allocated in an existing buffer.
+      *usable_size = alloc_size;
+      break;
+    }
+    case kAllocatorTypeRegion: {
+      DCHECK(region_space_ != nullptr);
+      alloc_size = RoundUp(alloc_size, space::RegionSpace::kAlignment);
+      ret = region_space_->AllocNonvirtual<false>(alloc_size, bytes_allocated, usable_size,
+                                                  bytes_tl_bulk_allocated);
+      break;
+    }
+    case kAllocatorTypeRegionTLAB: {
+      DCHECK(region_space_ != nullptr);
+      DCHECK_ALIGNED(alloc_size, space::RegionSpace::kAlignment);
+      if (UNLIKELY(self->TlabSize() < alloc_size)) {
+        if (space::RegionSpace::kRegionSize >= alloc_size) {
+          // Non-large. Check OOME for a tlab.
+          if (LIKELY(!IsOutOfMemoryOnAllocation<kGrow>(allocator_type, space::RegionSpace::kRegionSize))) {
+            // Try to allocate a tlab.
+            if (!region_space_->AllocNewTlab(self)) {
+              // Failed to allocate a tlab. Try non-tlab.
+              ret = region_space_->AllocNonvirtual<false>(alloc_size, bytes_allocated, usable_size,
+                                                          bytes_tl_bulk_allocated);
+              return ret;
+            }
+            *bytes_tl_bulk_allocated = space::RegionSpace::kRegionSize;
+            // Fall-through.
+          } else {
+            // Check OOME for a non-tlab allocation.
+            if (!IsOutOfMemoryOnAllocation<kGrow>(allocator_type, alloc_size)) {
+              ret = region_space_->AllocNonvirtual<false>(alloc_size, bytes_allocated, usable_size,
+                                                          bytes_tl_bulk_allocated);
+              return ret;
+            } else {
+              // Neither tlab or non-tlab works. Give up.
+              return nullptr;
+            }
+          }
+        } else {
+          // Large. Check OOME.
+          if (LIKELY(!IsOutOfMemoryOnAllocation<kGrow>(allocator_type, alloc_size))) {
+            ret = region_space_->AllocNonvirtual<false>(alloc_size, bytes_allocated, usable_size,
+                                                        bytes_tl_bulk_allocated);
+            return ret;
+          } else {
+            return nullptr;
+          }
+        }
+      } else {
+        *bytes_tl_bulk_allocated = 0;  // Allocated in an existing buffer.
+      }
+      // The allocation can't fail.
+      ret = self->AllocTlab(alloc_size);
+      DCHECK(ret != nullptr);
+      *bytes_allocated = alloc_size;
       *usable_size = alloc_size;
       break;
     }
@@ -383,16 +408,15 @@
   return byte_count >= large_object_threshold_ && (c->IsPrimitiveArray() || c->IsStringClass());
 }
 
-inline bool Heap::IsOutOfMemoryOnAllocation(AllocatorType allocator_type,
-                                            size_t alloc_size,
-                                            bool grow) {
+template <bool kGrow>
+inline bool Heap::IsOutOfMemoryOnAllocation(AllocatorType allocator_type, size_t alloc_size) {
   size_t new_footprint = num_bytes_allocated_.LoadSequentiallyConsistent() + alloc_size;
   if (UNLIKELY(new_footprint > max_allowed_footprint_)) {
     if (UNLIKELY(new_footprint > growth_limit_)) {
       return true;
     }
     if (!AllocatorMayHaveConcurrentGC(allocator_type) || !IsGcConcurrent()) {
-      if (!grow) {
+      if (!kGrow) {
         return true;
       }
       // TODO: Grow for allocation is racy, fix it.
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index ae9741f..f0e619d 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -1819,7 +1819,7 @@
           break;
         }
         // Try to transition the heap if the allocation failure was due to the space being full.
-        if (!IsOutOfMemoryOnAllocation(allocator, alloc_size, /*grow*/ false)) {
+        if (!IsOutOfMemoryOnAllocation<false>(allocator, alloc_size)) {
           // If we aren't out of memory then the OOM was probably from the non moving space being
           // full. Attempt to disable compaction and turn the main space into a non moving space.
           DisableMovingGc();
@@ -4225,72 +4225,5 @@
   gc_pause_listener_.StoreRelaxed(nullptr);
 }
 
-mirror::Object* Heap::AllocWithNewTLAB(Thread* self,
-                                       size_t alloc_size,
-                                       bool grow,
-                                       size_t* bytes_allocated,
-                                       size_t* usable_size,
-                                       size_t* bytes_tl_bulk_allocated) {
-  const AllocatorType allocator_type = GetCurrentAllocator();
-  if (allocator_type == kAllocatorTypeTLAB) {
-    DCHECK(bump_pointer_space_ != nullptr);
-    const size_t new_tlab_size = alloc_size + kDefaultTLABSize;
-    if (UNLIKELY(IsOutOfMemoryOnAllocation(allocator_type, new_tlab_size, grow))) {
-      return nullptr;
-    }
-    // Try allocating a new thread local buffer, if the allocation fails the space must be
-    // full so return null.
-    if (!bump_pointer_space_->AllocNewTlab(self, new_tlab_size)) {
-      return nullptr;
-    }
-    *bytes_tl_bulk_allocated = new_tlab_size;
-  } else {
-    DCHECK(allocator_type == kAllocatorTypeRegionTLAB);
-    DCHECK(region_space_ != nullptr);
-    if (space::RegionSpace::kRegionSize >= alloc_size) {
-      // Non-large. Check OOME for a tlab.
-      if (LIKELY(!IsOutOfMemoryOnAllocation(allocator_type,
-                                            space::RegionSpace::kRegionSize,
-                                            grow))) {
-        // Try to allocate a tlab.
-        if (!region_space_->AllocNewTlab(self)) {
-          // Failed to allocate a tlab. Try non-tlab.
-          return region_space_->AllocNonvirtual<false>(alloc_size,
-                                                       bytes_allocated,
-                                                       usable_size,
-                                                       bytes_tl_bulk_allocated);
-        }
-        *bytes_tl_bulk_allocated = space::RegionSpace::kRegionSize;
-        // Fall-through to using the TLAB below.
-      } else {
-        // Check OOME for a non-tlab allocation.
-        if (!IsOutOfMemoryOnAllocation(allocator_type, alloc_size, grow)) {
-          return region_space_->AllocNonvirtual<false>(alloc_size,
-                                                       bytes_allocated,
-                                                       usable_size,
-                                                       bytes_tl_bulk_allocated);
-        }
-        // Neither tlab or non-tlab works. Give up.
-        return nullptr;
-      }
-    } else {
-      // Large. Check OOME.
-      if (LIKELY(!IsOutOfMemoryOnAllocation(allocator_type, alloc_size, grow))) {
-        return region_space_->AllocNonvirtual<false>(alloc_size,
-                                                     bytes_allocated,
-                                                     usable_size,
-                                                     bytes_tl_bulk_allocated);
-      }
-      return nullptr;
-    }
-  }
-  // Refilled TLAB, return.
-  mirror::Object* ret = self->AllocTlab(alloc_size);
-  DCHECK(ret != nullptr);
-  *bytes_allocated = alloc_size;
-  *usable_size = alloc_size;
-  return ret;
-}
-
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index 3a8e29b..0c671d2 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -854,10 +854,6 @@
         allocator_type != kAllocatorTypeRegionTLAB;
   }
   static ALWAYS_INLINE bool AllocatorMayHaveConcurrentGC(AllocatorType allocator_type) {
-    if (kUseReadBarrier) {
-      // Read barrier may have the TLAB allocator but is always concurrent. TODO: clean this up.
-      return true;
-    }
     return
         allocator_type != kAllocatorTypeBumpPointer &&
         allocator_type != kAllocatorTypeTLAB;
@@ -927,20 +923,11 @@
                                               size_t* bytes_tl_bulk_allocated)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  mirror::Object* AllocWithNewTLAB(Thread* self,
-                                   size_t alloc_size,
-                                   bool grow,
-                                   size_t* bytes_allocated,
-                                   size_t* usable_size,
-                                   size_t* bytes_tl_bulk_allocated)
-      REQUIRES_SHARED(Locks::mutator_lock_);
-
   void ThrowOutOfMemoryError(Thread* self, size_t byte_count, AllocatorType allocator_type)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  ALWAYS_INLINE bool IsOutOfMemoryOnAllocation(AllocatorType allocator_type,
-                                               size_t alloc_size,
-                                               bool grow);
+  template <bool kGrow>
+  ALWAYS_INLINE bool IsOutOfMemoryOnAllocation(AllocatorType allocator_type, size_t alloc_size);
 
   // Run the finalizers. If timeout is non zero, then we use the VMRuntime version.
   void RunFinalization(JNIEnv* env, uint64_t timeout);
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index 870d1ae..d4c322e 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -630,7 +630,7 @@
 }
 
 static void ResetQuickAllocEntryPointsForThread(Thread* thread, void* arg ATTRIBUTE_UNUSED) {
-  thread->ResetQuickAllocEntryPointsForThread(kUseReadBarrier && thread->GetIsGcMarking());
+  thread->ResetQuickAllocEntryPointsForThread();
 }
 
 void Instrumentation::SetEntrypointsInstrumented(bool instrumented) {
diff --git a/runtime/thread.cc b/runtime/thread.cc
index c92e38b..65c8681 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -122,27 +122,21 @@
   CHECK(kUseReadBarrier);
   tls32_.is_gc_marking = is_marking;
   UpdateReadBarrierEntrypoints(&tlsPtr_.quick_entrypoints, is_marking);
-  if (kRuntimeISA == kX86_64) {
-    // Entrypoint switching is only implemented for X86_64.
-    ResetQuickAllocEntryPointsForThread(is_marking);
-  }
 }
 
 void Thread::InitTlsEntryPoints() {
   // Insert a placeholder so we can easily tell if we call an unimplemented entry point.
   uintptr_t* begin = reinterpret_cast<uintptr_t*>(&tlsPtr_.jni_entrypoints);
-  uintptr_t* end = reinterpret_cast<uintptr_t*>(
-      reinterpret_cast<uint8_t*>(&tlsPtr_.quick_entrypoints) + sizeof(tlsPtr_.quick_entrypoints));
+  uintptr_t* end = reinterpret_cast<uintptr_t*>(reinterpret_cast<uint8_t*>(&tlsPtr_.quick_entrypoints) +
+      sizeof(tlsPtr_.quick_entrypoints));
   for (uintptr_t* it = begin; it != end; ++it) {
     *it = reinterpret_cast<uintptr_t>(UnimplementedEntryPoint);
   }
   InitEntryPoints(&tlsPtr_.jni_entrypoints, &tlsPtr_.quick_entrypoints);
 }
 
-void Thread::ResetQuickAllocEntryPointsForThread(bool is_marking) {
-  // Entrypoint switching is currnetly only faster for X86_64 since other archs don't have TLAB
-  // fast path for non region space entrypoints.
-  ResetQuickAllocEntryPoints(&tlsPtr_.quick_entrypoints, is_marking);
+void Thread::ResetQuickAllocEntryPointsForThread() {
+  ResetQuickAllocEntryPoints(&tlsPtr_.quick_entrypoints);
 }
 
 class DeoptimizationContextRecord {
diff --git a/runtime/thread.h b/runtime/thread.h
index 35226f2..97093a6 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -1007,7 +1007,7 @@
     tls32_.state_and_flags.as_atomic_int.FetchAndAndSequentiallyConsistent(-1 ^ flag);
   }
 
-  void ResetQuickAllocEntryPointsForThread(bool is_marking);
+  void ResetQuickAllocEntryPointsForThread();
 
   // Returns the remaining space in the TLAB.
   size_t TlabSize() const;