Revert CC related changes.
Revert: "X86_64: Add allocation entrypoint switching for CC is_marking"
Revert: "Fix mips build in InitEntryPoints"
Revert: "Fix mac build in ResetQuickAllocEntryPoints"
Test: test-art-target-run-test
Change-Id: If38d44edf8c5def5c4d8c9419e4af0cd8d3be724
diff --git a/runtime/arch/mips/entrypoints_init_mips.cc b/runtime/arch/mips/entrypoints_init_mips.cc
index 5c56923..6a442a5 100644
--- a/runtime/arch/mips/entrypoints_init_mips.cc
+++ b/runtime/arch/mips/entrypoints_init_mips.cc
@@ -71,7 +71,7 @@
jpoints->pDlsymLookup = art_jni_dlsym_lookup_stub;
// Alloc
- ResetQuickAllocEntryPoints(qpoints, /*is_marking*/ false);
+ ResetQuickAllocEntryPoints(qpoints);
// Cast
qpoints->pInstanceofNonTrivial = artInstanceOfFromCode;
diff --git a/runtime/arch/quick_alloc_entrypoints.S b/runtime/arch/quick_alloc_entrypoints.S
index db2fdca..fa86bf4 100644
--- a/runtime/arch/quick_alloc_entrypoints.S
+++ b/runtime/arch/quick_alloc_entrypoints.S
@@ -107,28 +107,7 @@
GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB)
.endm
-.macro GENERATE_ALLOC_ENTRYPOINTS_FOR_TLAB_ALLOCATOR
-// This is to be separately defined for each architecture to allow a hand-written assembly fast path.
-// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_tlab, TLAB)
-.endm
-
.macro GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_REGION_TLAB_ALLOCATORS
-GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_TLAB_ALLOCATORS
-GENERATE_ALLOC_ENTRYPOINTS_FOR_TLAB_ALLOCATOR
-.endm
-
-.macro GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_TLAB_ALLOCATORS
GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_dlmalloc, DlMalloc)
GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_dlmalloc, DlMalloc)
GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_dlmalloc, DlMalloc)
@@ -208,6 +187,20 @@
GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_bump_pointer_instrumented, BumpPointerInstrumented)
GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_bump_pointer_instrumented, BumpPointerInstrumented)
+// This is to be separately defined for each architecture to allow a hand-written assembly fast path.
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_tlab, TLAB)
+
GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab_instrumented, TLABInstrumented)
GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab_instrumented, TLABInstrumented)
GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab_instrumented, TLABInstrumented)
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 6fbc954..fb405fa 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -1085,12 +1085,15 @@
RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER // return or deliver exception
END_MACRO
-// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB). May be called
-// for CC if the GC is not marking.
+// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB).
DEFINE_FUNCTION art_quick_alloc_object_tlab
// Fast path tlab allocation.
// EAX: uint32_t type_idx/return value, ECX: ArtMethod*.
// EBX, EDX: free.
+#if defined(USE_READ_BARRIER)
+ int3
+ int3
+#endif
PUSH esi
PUSH edi
movl ART_METHOD_DEX_CACHE_TYPES_OFFSET_32(%ecx), %edx // Load dex cache resolved types array
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index f8066e4..860b77e 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -18,13 +18,6 @@
#include "arch/quick_alloc_entrypoints.S"
-MACRO0(ASSERT_USE_READ_BARRIER)
-#if !defined(USE_READ_BARRIER)
- int3
- int3
-#endif
-END_MACRO
-
MACRO0(SETUP_FP_CALLEE_SAVE_FRAME)
// Create space for ART FP callee-saved registers
subq MACRO_LITERAL(4 * 8), %rsp
@@ -979,10 +972,8 @@
END_MACRO
// Generate the allocation entrypoints for each allocator.
-GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_TLAB_ALLOCATORS
-
+GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_REGION_TLAB_ALLOCATORS
// Comment out allocators that have x86_64 specific asm.
-// Region TLAB:
// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB)
// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB)
// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB)
@@ -995,19 +986,6 @@
GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab, RegionTLAB)
GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB)
GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB)
-// Normal TLAB:
-// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB)
-// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB)
-// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab, TLAB)
-// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_tlab, TLAB)
-// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_tlab, TLAB)
// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc).
DEFINE_FUNCTION art_quick_alloc_object_rosalloc
@@ -1184,11 +1162,16 @@
RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER // return or deliver exception
END_MACRO
-// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB). May be
-// called with CC if the GC is not active.
+// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB).
DEFINE_FUNCTION art_quick_alloc_object_tlab
+ // Fast path tlab allocation.
// RDI: uint32_t type_idx, RSI: ArtMethod*
// RDX, RCX, R8, R9: free. RAX: return val.
+#if defined(USE_READ_BARRIER)
+ int3
+ int3
+#endif
+ // Might need a special macro since rsi and edx is 32b/64b mismatched.
movq ART_METHOD_DEX_CACHE_TYPES_OFFSET_64(%rsi), %rdx // Load dex cache resolved types array
// Might need to break down into multiple instructions to get the base address in a register.
// Load the class
@@ -1198,69 +1181,29 @@
ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeTLAB
END_FUNCTION art_quick_alloc_object_tlab
-// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB). May be
-// called with CC if the GC is not active.
-DEFINE_FUNCTION art_quick_alloc_object_resolved_tlab
- // RDI: mirror::Class* klass, RSI: ArtMethod*
- // RDX, RCX, R8, R9: free. RAX: return val.
- movq %rdi, %rdx
- ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lart_quick_alloc_object_resolved_tlab_slow_path
-.Lart_quick_alloc_object_resolved_tlab_slow_path:
- ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeResolvedTLAB
-END_FUNCTION art_quick_alloc_object_resolved_tlab
-
-// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB).
-// May be called with CC if the GC is not active.
-DEFINE_FUNCTION art_quick_alloc_object_initialized_tlab
- // RDI: mirror::Class* klass, RSI: ArtMethod*
- // RDX, RCX, R8, R9: free. RAX: return val.
- movq %rdi, %rdx
- ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH .Lart_quick_alloc_object_initialized_tlab_slow_path
-.Lart_quick_alloc_object_initialized_tlab_slow_path:
- ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeInitializedTLAB
-END_FUNCTION art_quick_alloc_object_initialized_tlab
-
-// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_tlab, TLAB).
-DEFINE_FUNCTION art_quick_alloc_array_tlab
- // RDI: uint32_t type_idx, RSI: int32_t component_count, RDX: ArtMethod*
- // RCX: klass, R8, R9: free. RAX: return val.
- movq ART_METHOD_DEX_CACHE_TYPES_OFFSET_64(%rdx), %rcx // Load dex cache resolved types array
- movl 0(%rcx, %rdi, COMPRESSED_REFERENCE_SIZE), %ecx // Load the class
- testl %ecx, %ecx
- jz .Lart_quick_alloc_array_tlab_slow_path
- ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED .Lart_quick_alloc_array_tlab_slow_path
-.Lart_quick_alloc_array_tlab_slow_path:
- ALLOC_ARRAY_TLAB_SLOW_PATH artAllocArrayFromCodeTLAB
-END_FUNCTION art_quick_alloc_array_tlab
-
-// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab, TLAB).
-DEFINE_FUNCTION art_quick_alloc_array_resolved_tlab
- // RDI: mirror::Class* klass, RSI: int32_t component_count, RDX: ArtMethod*
- // RCX: mirror::Class* klass, R8, R9: free. RAX: return val.
- movq %rdi, %rcx
- // Already resolved, no null check.
- ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED .Lart_quick_alloc_array_resolved_tlab_slow_path
-.Lart_quick_alloc_array_resolved_tlab_slow_path:
- ALLOC_ARRAY_TLAB_SLOW_PATH artAllocArrayFromCodeResolvedTLAB
-END_FUNCTION art_quick_alloc_array_resolved_tlab
-
// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_region_tlab, RegionTLAB).
DEFINE_FUNCTION art_quick_alloc_array_region_tlab
// Fast path region tlab allocation.
// RDI: uint32_t type_idx, RSI: int32_t component_count, RDX: ArtMethod*
// RCX: klass, R8, R9: free. RAX: return val.
- ASSERT_USE_READ_BARRIER
+#if !defined(USE_READ_BARRIER)
+ int3
+ int3
+#endif
movq ART_METHOD_DEX_CACHE_TYPES_OFFSET_64(%rdx), %rcx // Load dex cache resolved types array
movl 0(%rcx, %rdi, COMPRESSED_REFERENCE_SIZE), %ecx // Load the class
// Null check so that we can load the lock word.
testl %ecx, %ecx
jz .Lart_quick_alloc_array_region_tlab_slow_path
- // Since we have allocation entrypoint switching, we know the GC is marking.
- // Check the mark bit, if it is 0, do the read barrier mark.
- testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(%ecx)
- jz .Lart_quick_alloc_array_region_tlab_class_load_read_barrier_slow_path
+
+ cmpl LITERAL(0), %gs:THREAD_IS_GC_MARKING_OFFSET
+ jne .Lart_quick_alloc_array_region_tlab_class_load_read_barrier_marking
.Lart_quick_alloc_array_region_tlab_class_load_read_barrier_slow_path_exit:
ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED .Lart_quick_alloc_array_region_tlab_slow_path
+.Lart_quick_alloc_array_region_tlab_class_load_read_barrier_marking:
+ // Check the mark bit, if it is 1 return.
+ testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(%ecx)
+ jnz .Lart_quick_alloc_array_region_tlab_class_load_read_barrier_slow_path_exit
.Lart_quick_alloc_array_region_tlab_class_load_read_barrier_slow_path:
// The read barrier slow path. Mark the class.
PUSH rdi
@@ -1283,11 +1226,33 @@
// Fast path region tlab allocation.
// RDI: mirror::Class* klass, RSI: int32_t component_count, RDX: ArtMethod*
// RCX: mirror::Class* klass, R8, R9: free. RAX: return val.
- ASSERT_USE_READ_BARRIER
+#if !defined(USE_READ_BARRIER)
+ int3
+ int3
+#endif
movq %rdi, %rcx
- // Caller is responsible for read barrier.
// Already resolved, no null check.
+ cmpl LITERAL(0), %gs:THREAD_IS_GC_MARKING_OFFSET
+ jne .Lart_quick_alloc_array_resolved_region_tlab_class_load_read_barrier_marking
+.Lart_quick_alloc_array_resolved_region_tlab_class_load_read_barrier_slow_path_exit:
ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED .Lart_quick_alloc_array_resolved_region_tlab_slow_path
+.Lart_quick_alloc_array_resolved_region_tlab_class_load_read_barrier_marking:
+ // Check the mark bit, if it is 1 return.
+ testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(%ecx)
+ jnz .Lart_quick_alloc_array_region_tlab_class_load_read_barrier_slow_path_exit
+.Lart_quick_alloc_array_resolved_region_tlab_class_load_read_barrier_slow_path:
+ // The read barrier slow path. Mark the class.
+ PUSH rdi
+ PUSH rsi
+ PUSH rdx
+ // Outgoing argument set up
+ movq %rcx, %rdi // Pass the class as the first param.
+ call SYMBOL(artReadBarrierMark) // cxx_name(mirror::Object* obj)
+ movq %rax, %rcx
+ POP rdx
+ POP rsi
+ POP rdi
+ jmp .Lart_quick_alloc_array_resolved_region_tlab_class_load_read_barrier_slow_path_exit
.Lart_quick_alloc_array_resolved_region_tlab_slow_path:
ALLOC_ARRAY_TLAB_SLOW_PATH artAllocArrayFromCodeResolvedRegionTLAB
END_FUNCTION art_quick_alloc_array_resolved_region_tlab
@@ -1297,19 +1262,24 @@
// Fast path region tlab allocation.
// RDI: uint32_t type_idx, RSI: ArtMethod*
// RDX, RCX, R8, R9: free. RAX: return val.
- ASSERT_USE_READ_BARRIER
+#if !defined(USE_READ_BARRIER)
+ int3
+ int3
+#endif
movq ART_METHOD_DEX_CACHE_TYPES_OFFSET_64(%rsi), %rdx // Load dex cache resolved types array
movl 0(%rdx, %rdi, COMPRESSED_REFERENCE_SIZE), %edx // Load the class
// Null check so that we can load the lock word.
testl %edx, %edx
jz .Lart_quick_alloc_object_region_tlab_slow_path
- // Since we have allocation entrypoint switching, we know the GC is marking.
- // Check the mark bit, if it is 0, do the read barrier mark.
- testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(%edx)
- jz .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path
+ // Test if the GC is marking.
+ cmpl LITERAL(0), %gs:THREAD_IS_GC_MARKING_OFFSET
+ jne .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_marking
.Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit:
- // Use resolved one since we already did the null check.
- ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lart_quick_alloc_object_region_tlab_slow_path
+ ALLOC_OBJECT_TLAB_FAST_PATH .Lart_quick_alloc_object_region_tlab_slow_path
+.Lart_quick_alloc_object_region_tlab_class_load_read_barrier_marking:
+ // Check the mark bit, if it is 1 avoid the read barrier.
+ testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(%edx)
+ jnz .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit
.Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path:
// The read barrier slow path. Mark the class.
PUSH rdi
@@ -1332,7 +1302,10 @@
// Fast path region tlab allocation.
// RDI: mirror::Class* klass, RSI: ArtMethod*
// RDX, RCX, R8, R9: free. RAX: return val.
- ASSERT_USE_READ_BARRIER
+#if !defined(USE_READ_BARRIER)
+ int3
+ int3
+#endif
// No read barrier since the caller is responsible for that.
movq %rdi, %rdx
ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lart_quick_alloc_object_resolved_region_tlab_slow_path
@@ -1345,7 +1318,10 @@
// Fast path region tlab allocation.
// RDI: mirror::Class* klass, RSI: ArtMethod*
// RDX, RCX, R8, R9: free. RAX: return val.
- ASSERT_USE_READ_BARRIER
+#if !defined(USE_READ_BARRIER)
+ int3
+ int3
+#endif
movq %rdi, %rdx
// No read barrier since the caller is responsible for that.
ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH .Lart_quick_alloc_object_initialized_region_tlab_slow_path
diff --git a/runtime/entrypoints/quick/quick_alloc_entrypoints.cc b/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
index 82bb8e5..397655a 100644
--- a/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
@@ -292,7 +292,7 @@
entry_points_instrumented = instrumented;
}
-void ResetQuickAllocEntryPoints(QuickEntryPoints* qpoints, bool is_marking) {
+void ResetQuickAllocEntryPoints(QuickEntryPoints* qpoints) {
#if !defined(__APPLE__) || !defined(__LP64__)
switch (entry_points_allocator) {
case gc::kAllocatorTypeDlMalloc: {
@@ -320,12 +320,7 @@
}
case gc::kAllocatorTypeRegionTLAB: {
CHECK(kMovingCollector);
- if (is_marking) {
- SetQuickAllocEntryPoints_region_tlab(qpoints, entry_points_instrumented);
- } else {
- // Not marking means we need no read barriers and can just use the normal TLAB case.
- SetQuickAllocEntryPoints_tlab(qpoints, entry_points_instrumented);
- }
+ SetQuickAllocEntryPoints_region_tlab(qpoints, entry_points_instrumented);
return;
}
default:
@@ -333,7 +328,6 @@
}
#else
UNUSED(qpoints);
- UNUSED(is_marking);
#endif
UNIMPLEMENTED(FATAL);
UNREACHABLE();
diff --git a/runtime/entrypoints/quick/quick_alloc_entrypoints.h b/runtime/entrypoints/quick/quick_alloc_entrypoints.h
index bd1e295..14a8e04 100644
--- a/runtime/entrypoints/quick/quick_alloc_entrypoints.h
+++ b/runtime/entrypoints/quick/quick_alloc_entrypoints.h
@@ -23,9 +23,7 @@
namespace art {
-// is_marking is only used for CC, if the GC is marking the allocation entrypoint is the marking
-// one.
-void ResetQuickAllocEntryPoints(QuickEntryPoints* qpoints, bool is_marking);
+void ResetQuickAllocEntryPoints(QuickEntryPoints* qpoints);
// Runtime shutdown lock is necessary to prevent races in thread initialization. When the thread is
// starting it doesn't hold the mutator lock until after it has been added to the thread list.
diff --git a/runtime/entrypoints/quick/quick_default_init_entrypoints.h b/runtime/entrypoints/quick/quick_default_init_entrypoints.h
index 78dad94..df23f94 100644
--- a/runtime/entrypoints/quick/quick_default_init_entrypoints.h
+++ b/runtime/entrypoints/quick/quick_default_init_entrypoints.h
@@ -31,7 +31,7 @@
jpoints->pDlsymLookup = art_jni_dlsym_lookup_stub;
// Alloc
- ResetQuickAllocEntryPoints(qpoints, /* is_marking */ true);
+ ResetQuickAllocEntryPoints(qpoints);
// DexCache
qpoints->pInitializeStaticStorage = art_quick_initialize_static_storage;
diff --git a/runtime/gc/heap-inl.h b/runtime/gc/heap-inl.h
index 54f2210..97129e8 100644
--- a/runtime/gc/heap-inl.h
+++ b/runtime/gc/heap-inl.h
@@ -247,7 +247,7 @@
if (allocator_type != kAllocatorTypeTLAB &&
allocator_type != kAllocatorTypeRegionTLAB &&
allocator_type != kAllocatorTypeRosAlloc &&
- UNLIKELY(IsOutOfMemoryOnAllocation(allocator_type, alloc_size, kGrow))) {
+ UNLIKELY(IsOutOfMemoryOnAllocation<kGrow>(allocator_type, alloc_size))) {
return nullptr;
}
mirror::Object* ret;
@@ -267,9 +267,8 @@
if (kInstrumented && UNLIKELY(is_running_on_memory_tool_)) {
// If running on valgrind or asan, we should be using the instrumented path.
size_t max_bytes_tl_bulk_allocated = rosalloc_space_->MaxBytesBulkAllocatedFor(alloc_size);
- if (UNLIKELY(IsOutOfMemoryOnAllocation(allocator_type,
- max_bytes_tl_bulk_allocated,
- kGrow))) {
+ if (UNLIKELY(IsOutOfMemoryOnAllocation<kGrow>(allocator_type,
+ max_bytes_tl_bulk_allocated))) {
return nullptr;
}
ret = rosalloc_space_->Alloc(self, alloc_size, bytes_allocated, usable_size,
@@ -278,18 +277,14 @@
DCHECK(!is_running_on_memory_tool_);
size_t max_bytes_tl_bulk_allocated =
rosalloc_space_->MaxBytesBulkAllocatedForNonvirtual(alloc_size);
- if (UNLIKELY(IsOutOfMemoryOnAllocation(allocator_type,
- max_bytes_tl_bulk_allocated,
- kGrow))) {
+ if (UNLIKELY(IsOutOfMemoryOnAllocation<kGrow>(allocator_type,
+ max_bytes_tl_bulk_allocated))) {
return nullptr;
}
if (!kInstrumented) {
DCHECK(!rosalloc_space_->CanAllocThreadLocal(self, alloc_size));
}
- ret = rosalloc_space_->AllocNonvirtual(self,
- alloc_size,
- bytes_allocated,
- usable_size,
+ ret = rosalloc_space_->AllocNonvirtual(self, alloc_size, bytes_allocated, usable_size,
bytes_tl_bulk_allocated);
}
break;
@@ -297,34 +292,22 @@
case kAllocatorTypeDlMalloc: {
if (kInstrumented && UNLIKELY(is_running_on_memory_tool_)) {
// If running on valgrind, we should be using the instrumented path.
- ret = dlmalloc_space_->Alloc(self,
- alloc_size,
- bytes_allocated,
- usable_size,
+ ret = dlmalloc_space_->Alloc(self, alloc_size, bytes_allocated, usable_size,
bytes_tl_bulk_allocated);
} else {
DCHECK(!is_running_on_memory_tool_);
- ret = dlmalloc_space_->AllocNonvirtual(self,
- alloc_size,
- bytes_allocated,
- usable_size,
+ ret = dlmalloc_space_->AllocNonvirtual(self, alloc_size, bytes_allocated, usable_size,
bytes_tl_bulk_allocated);
}
break;
}
case kAllocatorTypeNonMoving: {
- ret = non_moving_space_->Alloc(self,
- alloc_size,
- bytes_allocated,
- usable_size,
+ ret = non_moving_space_->Alloc(self, alloc_size, bytes_allocated, usable_size,
bytes_tl_bulk_allocated);
break;
}
case kAllocatorTypeLOS: {
- ret = large_object_space_->Alloc(self,
- alloc_size,
- bytes_allocated,
- usable_size,
+ ret = large_object_space_->Alloc(self, alloc_size, bytes_allocated, usable_size,
bytes_tl_bulk_allocated);
// Note that the bump pointer spaces aren't necessarily next to
// the other continuous spaces like the non-moving alloc space or
@@ -332,38 +315,80 @@
DCHECK(ret == nullptr || large_object_space_->Contains(ret));
break;
}
- case kAllocatorTypeRegion: {
- DCHECK(region_space_ != nullptr);
- alloc_size = RoundUp(alloc_size, space::RegionSpace::kAlignment);
- ret = region_space_->AllocNonvirtual<false>(alloc_size,
- bytes_allocated,
- usable_size,
- bytes_tl_bulk_allocated);
- break;
- }
- case kAllocatorTypeTLAB:
- FALLTHROUGH_INTENDED;
- case kAllocatorTypeRegionTLAB: {
- DCHECK_ALIGNED(alloc_size, kObjectAlignment);
- static_assert(space::RegionSpace::kAlignment == space::BumpPointerSpace::kAlignment,
- "mismatched alignments");
- static_assert(kObjectAlignment == space::BumpPointerSpace::kAlignment,
- "mismatched alignments");
+ case kAllocatorTypeTLAB: {
+ DCHECK_ALIGNED(alloc_size, space::BumpPointerSpace::kAlignment);
if (UNLIKELY(self->TlabSize() < alloc_size)) {
- // kAllocatorTypeTLAB may be the allocator for region space TLAB if the GC is not marking,
- // that is why the allocator is not passed down.
- return AllocWithNewTLAB(self,
- alloc_size,
- kGrow,
- bytes_allocated,
- usable_size,
- bytes_tl_bulk_allocated);
+ const size_t new_tlab_size = alloc_size + kDefaultTLABSize;
+ if (UNLIKELY(IsOutOfMemoryOnAllocation<kGrow>(allocator_type, new_tlab_size))) {
+ return nullptr;
+ }
+ // Try allocating a new thread local buffer, if the allocaiton fails the space must be
+ // full so return null.
+ if (!bump_pointer_space_->AllocNewTlab(self, new_tlab_size)) {
+ return nullptr;
+ }
+ *bytes_tl_bulk_allocated = new_tlab_size;
+ } else {
+ *bytes_tl_bulk_allocated = 0;
}
// The allocation can't fail.
ret = self->AllocTlab(alloc_size);
DCHECK(ret != nullptr);
*bytes_allocated = alloc_size;
- *bytes_tl_bulk_allocated = 0; // Allocated in an existing buffer.
+ *usable_size = alloc_size;
+ break;
+ }
+ case kAllocatorTypeRegion: {
+ DCHECK(region_space_ != nullptr);
+ alloc_size = RoundUp(alloc_size, space::RegionSpace::kAlignment);
+ ret = region_space_->AllocNonvirtual<false>(alloc_size, bytes_allocated, usable_size,
+ bytes_tl_bulk_allocated);
+ break;
+ }
+ case kAllocatorTypeRegionTLAB: {
+ DCHECK(region_space_ != nullptr);
+ DCHECK_ALIGNED(alloc_size, space::RegionSpace::kAlignment);
+ if (UNLIKELY(self->TlabSize() < alloc_size)) {
+ if (space::RegionSpace::kRegionSize >= alloc_size) {
+ // Non-large. Check OOME for a tlab.
+ if (LIKELY(!IsOutOfMemoryOnAllocation<kGrow>(allocator_type, space::RegionSpace::kRegionSize))) {
+ // Try to allocate a tlab.
+ if (!region_space_->AllocNewTlab(self)) {
+ // Failed to allocate a tlab. Try non-tlab.
+ ret = region_space_->AllocNonvirtual<false>(alloc_size, bytes_allocated, usable_size,
+ bytes_tl_bulk_allocated);
+ return ret;
+ }
+ *bytes_tl_bulk_allocated = space::RegionSpace::kRegionSize;
+ // Fall-through.
+ } else {
+ // Check OOME for a non-tlab allocation.
+ if (!IsOutOfMemoryOnAllocation<kGrow>(allocator_type, alloc_size)) {
+ ret = region_space_->AllocNonvirtual<false>(alloc_size, bytes_allocated, usable_size,
+ bytes_tl_bulk_allocated);
+ return ret;
+ } else {
+ // Neither tlab or non-tlab works. Give up.
+ return nullptr;
+ }
+ }
+ } else {
+ // Large. Check OOME.
+ if (LIKELY(!IsOutOfMemoryOnAllocation<kGrow>(allocator_type, alloc_size))) {
+ ret = region_space_->AllocNonvirtual<false>(alloc_size, bytes_allocated, usable_size,
+ bytes_tl_bulk_allocated);
+ return ret;
+ } else {
+ return nullptr;
+ }
+ }
+ } else {
+ *bytes_tl_bulk_allocated = 0; // Allocated in an existing buffer.
+ }
+ // The allocation can't fail.
+ ret = self->AllocTlab(alloc_size);
+ DCHECK(ret != nullptr);
+ *bytes_allocated = alloc_size;
*usable_size = alloc_size;
break;
}
@@ -383,16 +408,15 @@
return byte_count >= large_object_threshold_ && (c->IsPrimitiveArray() || c->IsStringClass());
}
-inline bool Heap::IsOutOfMemoryOnAllocation(AllocatorType allocator_type,
- size_t alloc_size,
- bool grow) {
+template <bool kGrow>
+inline bool Heap::IsOutOfMemoryOnAllocation(AllocatorType allocator_type, size_t alloc_size) {
size_t new_footprint = num_bytes_allocated_.LoadSequentiallyConsistent() + alloc_size;
if (UNLIKELY(new_footprint > max_allowed_footprint_)) {
if (UNLIKELY(new_footprint > growth_limit_)) {
return true;
}
if (!AllocatorMayHaveConcurrentGC(allocator_type) || !IsGcConcurrent()) {
- if (!grow) {
+ if (!kGrow) {
return true;
}
// TODO: Grow for allocation is racy, fix it.
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index ae9741f..f0e619d 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -1819,7 +1819,7 @@
break;
}
// Try to transition the heap if the allocation failure was due to the space being full.
- if (!IsOutOfMemoryOnAllocation(allocator, alloc_size, /*grow*/ false)) {
+ if (!IsOutOfMemoryOnAllocation<false>(allocator, alloc_size)) {
// If we aren't out of memory then the OOM was probably from the non moving space being
// full. Attempt to disable compaction and turn the main space into a non moving space.
DisableMovingGc();
@@ -4225,72 +4225,5 @@
gc_pause_listener_.StoreRelaxed(nullptr);
}
-mirror::Object* Heap::AllocWithNewTLAB(Thread* self,
- size_t alloc_size,
- bool grow,
- size_t* bytes_allocated,
- size_t* usable_size,
- size_t* bytes_tl_bulk_allocated) {
- const AllocatorType allocator_type = GetCurrentAllocator();
- if (allocator_type == kAllocatorTypeTLAB) {
- DCHECK(bump_pointer_space_ != nullptr);
- const size_t new_tlab_size = alloc_size + kDefaultTLABSize;
- if (UNLIKELY(IsOutOfMemoryOnAllocation(allocator_type, new_tlab_size, grow))) {
- return nullptr;
- }
- // Try allocating a new thread local buffer, if the allocation fails the space must be
- // full so return null.
- if (!bump_pointer_space_->AllocNewTlab(self, new_tlab_size)) {
- return nullptr;
- }
- *bytes_tl_bulk_allocated = new_tlab_size;
- } else {
- DCHECK(allocator_type == kAllocatorTypeRegionTLAB);
- DCHECK(region_space_ != nullptr);
- if (space::RegionSpace::kRegionSize >= alloc_size) {
- // Non-large. Check OOME for a tlab.
- if (LIKELY(!IsOutOfMemoryOnAllocation(allocator_type,
- space::RegionSpace::kRegionSize,
- grow))) {
- // Try to allocate a tlab.
- if (!region_space_->AllocNewTlab(self)) {
- // Failed to allocate a tlab. Try non-tlab.
- return region_space_->AllocNonvirtual<false>(alloc_size,
- bytes_allocated,
- usable_size,
- bytes_tl_bulk_allocated);
- }
- *bytes_tl_bulk_allocated = space::RegionSpace::kRegionSize;
- // Fall-through to using the TLAB below.
- } else {
- // Check OOME for a non-tlab allocation.
- if (!IsOutOfMemoryOnAllocation(allocator_type, alloc_size, grow)) {
- return region_space_->AllocNonvirtual<false>(alloc_size,
- bytes_allocated,
- usable_size,
- bytes_tl_bulk_allocated);
- }
- // Neither tlab or non-tlab works. Give up.
- return nullptr;
- }
- } else {
- // Large. Check OOME.
- if (LIKELY(!IsOutOfMemoryOnAllocation(allocator_type, alloc_size, grow))) {
- return region_space_->AllocNonvirtual<false>(alloc_size,
- bytes_allocated,
- usable_size,
- bytes_tl_bulk_allocated);
- }
- return nullptr;
- }
- }
- // Refilled TLAB, return.
- mirror::Object* ret = self->AllocTlab(alloc_size);
- DCHECK(ret != nullptr);
- *bytes_allocated = alloc_size;
- *usable_size = alloc_size;
- return ret;
-}
-
} // namespace gc
} // namespace art
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index 3a8e29b..0c671d2 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -854,10 +854,6 @@
allocator_type != kAllocatorTypeRegionTLAB;
}
static ALWAYS_INLINE bool AllocatorMayHaveConcurrentGC(AllocatorType allocator_type) {
- if (kUseReadBarrier) {
- // Read barrier may have the TLAB allocator but is always concurrent. TODO: clean this up.
- return true;
- }
return
allocator_type != kAllocatorTypeBumpPointer &&
allocator_type != kAllocatorTypeTLAB;
@@ -927,20 +923,11 @@
size_t* bytes_tl_bulk_allocated)
REQUIRES_SHARED(Locks::mutator_lock_);
- mirror::Object* AllocWithNewTLAB(Thread* self,
- size_t alloc_size,
- bool grow,
- size_t* bytes_allocated,
- size_t* usable_size,
- size_t* bytes_tl_bulk_allocated)
- REQUIRES_SHARED(Locks::mutator_lock_);
-
void ThrowOutOfMemoryError(Thread* self, size_t byte_count, AllocatorType allocator_type)
REQUIRES_SHARED(Locks::mutator_lock_);
- ALWAYS_INLINE bool IsOutOfMemoryOnAllocation(AllocatorType allocator_type,
- size_t alloc_size,
- bool grow);
+ template <bool kGrow>
+ ALWAYS_INLINE bool IsOutOfMemoryOnAllocation(AllocatorType allocator_type, size_t alloc_size);
// Run the finalizers. If timeout is non zero, then we use the VMRuntime version.
void RunFinalization(JNIEnv* env, uint64_t timeout);
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index 870d1ae..d4c322e 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -630,7 +630,7 @@
}
static void ResetQuickAllocEntryPointsForThread(Thread* thread, void* arg ATTRIBUTE_UNUSED) {
- thread->ResetQuickAllocEntryPointsForThread(kUseReadBarrier && thread->GetIsGcMarking());
+ thread->ResetQuickAllocEntryPointsForThread();
}
void Instrumentation::SetEntrypointsInstrumented(bool instrumented) {
diff --git a/runtime/thread.cc b/runtime/thread.cc
index c92e38b..65c8681 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -122,27 +122,21 @@
CHECK(kUseReadBarrier);
tls32_.is_gc_marking = is_marking;
UpdateReadBarrierEntrypoints(&tlsPtr_.quick_entrypoints, is_marking);
- if (kRuntimeISA == kX86_64) {
- // Entrypoint switching is only implemented for X86_64.
- ResetQuickAllocEntryPointsForThread(is_marking);
- }
}
void Thread::InitTlsEntryPoints() {
// Insert a placeholder so we can easily tell if we call an unimplemented entry point.
uintptr_t* begin = reinterpret_cast<uintptr_t*>(&tlsPtr_.jni_entrypoints);
- uintptr_t* end = reinterpret_cast<uintptr_t*>(
- reinterpret_cast<uint8_t*>(&tlsPtr_.quick_entrypoints) + sizeof(tlsPtr_.quick_entrypoints));
+ uintptr_t* end = reinterpret_cast<uintptr_t*>(reinterpret_cast<uint8_t*>(&tlsPtr_.quick_entrypoints) +
+ sizeof(tlsPtr_.quick_entrypoints));
for (uintptr_t* it = begin; it != end; ++it) {
*it = reinterpret_cast<uintptr_t>(UnimplementedEntryPoint);
}
InitEntryPoints(&tlsPtr_.jni_entrypoints, &tlsPtr_.quick_entrypoints);
}
-void Thread::ResetQuickAllocEntryPointsForThread(bool is_marking) {
- // Entrypoint switching is currnetly only faster for X86_64 since other archs don't have TLAB
- // fast path for non region space entrypoints.
- ResetQuickAllocEntryPoints(&tlsPtr_.quick_entrypoints, is_marking);
+void Thread::ResetQuickAllocEntryPointsForThread() {
+ ResetQuickAllocEntryPoints(&tlsPtr_.quick_entrypoints);
}
class DeoptimizationContextRecord {
diff --git a/runtime/thread.h b/runtime/thread.h
index 35226f2..97093a6 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -1007,7 +1007,7 @@
tls32_.state_and_flags.as_atomic_int.FetchAndAndSequentiallyConsistent(-1 ^ flag);
}
- void ResetQuickAllocEntryPointsForThread(bool is_marking);
+ void ResetQuickAllocEntryPointsForThread();
// Returns the remaining space in the TLAB.
size_t TlabSize() const;