Use entrypoint switching to reduce code size of GcRoot read barrier
Set the read barrier mark register entrypoints to null when the GC
is not marking. The compiler uses this to avoid needing to load the
is_gc_marking boolean.
Code size results on ritzperf CC:
arm32: 13439400 -> 13242792 (-1.5%)
arm64: 16380544 -> 16208512 (-1.05%)
Implemented for arm32 and arm64. TODO: Consider implementing on x86.
Bug: 32638713
Bug: 29516974
Test: test-art-host + run ritzperf
Change-Id: I527ca5dc4cd43950ba43b872d0ac81e1eb5791eb
diff --git a/runtime/arch/x86/entrypoints_init_x86.cc b/runtime/arch/x86/entrypoints_init_x86.cc
index 877df8f..94fea69 100644
--- a/runtime/arch/x86/entrypoints_init_x86.cc
+++ b/runtime/arch/x86/entrypoints_init_x86.cc
@@ -44,6 +44,16 @@
extern "C" mirror::Object* art_quick_read_barrier_slow(mirror::Object*, mirror::Object*, uint32_t);
extern "C" mirror::Object* art_quick_read_barrier_for_root_slow(GcRoot<mirror::Object>*);
+void UpdateReadBarrierEntrypoints(QuickEntryPoints* qpoints, bool is_marking) {
+ qpoints->pReadBarrierMarkReg00 = is_marking ? art_quick_read_barrier_mark_reg00 : nullptr;
+ qpoints->pReadBarrierMarkReg01 = is_marking ? art_quick_read_barrier_mark_reg01 : nullptr;
+ qpoints->pReadBarrierMarkReg02 = is_marking ? art_quick_read_barrier_mark_reg02 : nullptr;
+ qpoints->pReadBarrierMarkReg03 = is_marking ? art_quick_read_barrier_mark_reg03 : nullptr;
+ qpoints->pReadBarrierMarkReg05 = is_marking ? art_quick_read_barrier_mark_reg05 : nullptr;
+ qpoints->pReadBarrierMarkReg06 = is_marking ? art_quick_read_barrier_mark_reg06 : nullptr;
+ qpoints->pReadBarrierMarkReg07 = is_marking ? art_quick_read_barrier_mark_reg07 : nullptr;
+}
+
void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) {
DefaultInitEntryPoints(jpoints, qpoints);
@@ -87,14 +97,8 @@
// Read barrier.
qpoints->pReadBarrierJni = ReadBarrierJni;
- qpoints->pReadBarrierMarkReg00 = art_quick_read_barrier_mark_reg00;
- qpoints->pReadBarrierMarkReg01 = art_quick_read_barrier_mark_reg01;
- qpoints->pReadBarrierMarkReg02 = art_quick_read_barrier_mark_reg02;
- qpoints->pReadBarrierMarkReg03 = art_quick_read_barrier_mark_reg03;
+ UpdateReadBarrierEntrypoints(qpoints, /*is_marking*/ false);
qpoints->pReadBarrierMarkReg04 = nullptr; // Cannot use register 4 (ESP) to pass arguments.
- qpoints->pReadBarrierMarkReg05 = art_quick_read_barrier_mark_reg05;
- qpoints->pReadBarrierMarkReg06 = art_quick_read_barrier_mark_reg06;
- qpoints->pReadBarrierMarkReg07 = art_quick_read_barrier_mark_reg07;
// x86 has only 8 core registers.
qpoints->pReadBarrierMarkReg08 = nullptr;
qpoints->pReadBarrierMarkReg09 = nullptr;