Faster allocation fast path
Added a new object size field to class, this field contains the
aligned object size if the object is not finalizable and is
initialized. If the object is finalizable or uninitialized the field
is set to some large value that forces the ASM allocators to go slow
path.
Only implemented for region/normal TLAB for now, will add the to
RosAlloc stubs soon.
CC N6P MemAllocTest: 1067 -> 1039 (25 samples)
CC N6P EAAC: 1281 -> 1260 (25 samples)
RAM overhead technically 0 since mirror::Class was not 8 byte aligned
previously. Since the allocators require 8 byte allignment, there
would have been 1 word of padding at the end of the class. If there
was actually 4 extra bytes per class, the system overhead would be
36000 * 4 = 120KB based on old N6P numbers for the number of loaded
classes after boot.
Bug: 9986565
Test: test-art-host CC baker, N6P phone boot and EAAC runs.
Change-Id: I119a87b8cc6c980bff980a0c62f42610dab5e531
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 282f10d..67df081 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -990,32 +990,23 @@
MACRO1(ALLOC_OBJECT_TLAB_FAST_PATH, slowPathLabel)
testl %edx, %edx // Check null class
jz VAR(slowPathLabel)
- // Check class status.
- cmpl LITERAL(MIRROR_CLASS_STATUS_INITIALIZED), MIRROR_CLASS_STATUS_OFFSET(%edx)
- jne VAR(slowPathLabel)
// No fake dependence needed on x86
// between status and flags load,
// since each load is a load-acquire,
// no loads reordering.
- // Check access flags has
- // kAccClassIsFinalizable
- testl LITERAL(ACCESS_FLAGS_CLASS_IS_FINALIZABLE), MIRROR_CLASS_ACCESS_FLAGS_OFFSET(%edx)
- jnz VAR(slowPathLabel)
movl %fs:THREAD_SELF_OFFSET, %ebx // ebx = thread
movl THREAD_LOCAL_END_OFFSET(%ebx), %edi // Load thread_local_end.
subl THREAD_LOCAL_POS_OFFSET(%ebx), %edi // Compute the remaining buffer size.
- movl MIRROR_CLASS_OBJECT_SIZE_OFFSET(%edx), %esi // Load the object size.
+ movl MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET(%edx), %esi // Load the object size.
cmpl %edi, %esi // Check if it fits. OK to do this
// before rounding up the object size
// assuming the buf size alignment.
ja VAR(slowPathLabel)
- addl LITERAL(OBJECT_ALIGNMENT_MASK), %esi // Align the size by 8. (addr + 7) & ~7.
- andl LITERAL(OBJECT_ALIGNMENT_MASK_TOGGLED), %esi
movl THREAD_LOCAL_POS_OFFSET(%ebx), %eax // Load thread_local_pos
// as allocated object.
addl %eax, %esi // Add the object size.
movl %esi, THREAD_LOCAL_POS_OFFSET(%ebx) // Update thread_local_pos.
- addl LITERAL(1), THREAD_LOCAL_OBJECTS_OFFSET(%ebx) // Increase thread_local_objects.
+ incl THREAD_LOCAL_OBJECTS_OFFSET(%ebx) // Increase thread_local_objects.
// Store the class pointer in the header.
// No fence needed for x86.
POISON_HEAP_REF edx