X86_64: Add allocation entrypoint switching for CC is_marking
Only X86_64 done so far. Use normal TLAB allocators if GC is not
marking.
Allocation speed goes up by ~8% based on perf sampling.
Without change:
1.19%: art_quick_alloc_object_region_tlab
With change:
0.63%: art_quick_alloc_object_tlab
0.47%: art_quick_alloc_object_region_tlab
Bug: 31018974
Bug: 12687968
Test: test-art-host-run-test
Change-Id: I4c4d9eb229d4ad2f41b856ba5c2958a5eb3b7ffa
diff --git a/runtime/entrypoints/quick/quick_alloc_entrypoints.cc b/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
index 515fcbf..4a7e819 100644
--- a/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
@@ -258,7 +258,7 @@
entry_points_instrumented = instrumented;
}
-void ResetQuickAllocEntryPoints(QuickEntryPoints* qpoints) {
+void ResetQuickAllocEntryPoints(QuickEntryPoints* qpoints, bool is_marking) {
#if !defined(__APPLE__) || !defined(__LP64__)
switch (entry_points_allocator) {
case gc::kAllocatorTypeDlMalloc: {
@@ -286,7 +286,12 @@
}
case gc::kAllocatorTypeRegionTLAB: {
CHECK(kMovingCollector);
- SetQuickAllocEntryPoints_region_tlab(qpoints, entry_points_instrumented);
+ if (is_marking) {
+ SetQuickAllocEntryPoints_region_tlab(qpoints, entry_points_instrumented);
+ } else {
+ // Not marking means we need no read barriers and can just use the normal TLAB case.
+ SetQuickAllocEntryPoints_tlab(qpoints, entry_points_instrumented);
+ }
return;
}
default:
diff --git a/runtime/entrypoints/quick/quick_alloc_entrypoints.h b/runtime/entrypoints/quick/quick_alloc_entrypoints.h
index 14a8e04..bd1e295 100644
--- a/runtime/entrypoints/quick/quick_alloc_entrypoints.h
+++ b/runtime/entrypoints/quick/quick_alloc_entrypoints.h
@@ -23,7 +23,9 @@
namespace art {
-void ResetQuickAllocEntryPoints(QuickEntryPoints* qpoints);
+// is_marking is only used for CC, if the GC is marking the allocation entrypoint is the marking
+// one.
+void ResetQuickAllocEntryPoints(QuickEntryPoints* qpoints, bool is_marking);
// Runtime shutdown lock is necessary to prevent races in thread initialization. When the thread is
// starting it doesn't hold the mutator lock until after it has been added to the thread list.
diff --git a/runtime/entrypoints/quick/quick_default_init_entrypoints.h b/runtime/entrypoints/quick/quick_default_init_entrypoints.h
index df23f94..78dad94 100644
--- a/runtime/entrypoints/quick/quick_default_init_entrypoints.h
+++ b/runtime/entrypoints/quick/quick_default_init_entrypoints.h
@@ -31,7 +31,7 @@
jpoints->pDlsymLookup = art_jni_dlsym_lookup_stub;
// Alloc
- ResetQuickAllocEntryPoints(qpoints);
+ ResetQuickAllocEntryPoints(qpoints, /* is_marking */ true);
// DexCache
qpoints->pInitializeStaticStorage = art_quick_initialize_static_storage;