Revert "Make object allocation entrypoints only take a class."
960-default-smali64 is failing.
This reverts commit 2b615ba29c4dfcf54aaf44955f2eac60f5080b2e.
Change-Id: Iebb8ee5a917fa84c5f01660ce432798524d078ef
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 8b1e038..b88515f 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -1669,6 +1669,7 @@
// Generate the allocation entrypoints for each allocator.
GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_REGION_TLAB_ALLOCATORS
// Comment out allocators that have arm64 specific asm.
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB) implemented in asm
// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB)
// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB)
GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
@@ -1681,23 +1682,27 @@
GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB)
GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB)
-// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc, RosAlloc).
-ENTRY art_quick_alloc_object_resolved_rosalloc
+// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc).
+ENTRY art_quick_alloc_object_rosalloc
// Fast path rosalloc allocation.
- // x0: type, xSELF(x19): Thread::Current
- // x1-x7: free.
+ // x0: type_idx/return value, x1: ArtMethod*, xSELF(x19): Thread::Current
+ // x2-x7: free.
+ ldr x2, [x1, #ART_METHOD_DEX_CACHE_TYPES_OFFSET_64] // Load dex cache resolved types array
+ // Load the class (x2)
+ ldr w2, [x2, x0, lsl #COMPRESSED_REFERENCE_SIZE_SHIFT]
+ cbz x2, .Lart_quick_alloc_object_rosalloc_slow_path // Check null class
ldr x3, [xSELF, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET] // Check if the thread local
// allocation stack has room.
// ldp won't work due to large offset.
ldr x4, [xSELF, #THREAD_LOCAL_ALLOC_STACK_END_OFFSET]
cmp x3, x4
- bhs .Lart_quick_alloc_object_resolved_rosalloc_slow_path
- ldr w3, [x0, #MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET] // Load the object size (x3)
+ bhs .Lart_quick_alloc_object_rosalloc_slow_path
+ ldr w3, [x2, #MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET] // Load the object size (x3)
cmp x3, #ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE // Check if the size is for a thread
// local allocation. Also does the
// finalizable and initialization
// checks.
- bhs .Lart_quick_alloc_object_resolved_rosalloc_slow_path
+ bhs .Lart_quick_alloc_object_rosalloc_slow_path
// Compute the rosalloc bracket index
// from the size. Since the size is
// already aligned we can combine the
@@ -1710,7 +1715,7 @@
// Load the free list head (x3). This
// will be the return val.
ldr x3, [x4, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)]
- cbz x3, .Lart_quick_alloc_object_resolved_rosalloc_slow_path
+ cbz x3, .Lart_quick_alloc_object_rosalloc_slow_path
// "Point of no slow path". Won't go to the slow path from here on. OK to clobber x0 and x1.
ldr x1, [x3, #ROSALLOC_SLOT_NEXT_OFFSET] // Load the next pointer of the head
// and update the list head with the
@@ -1723,8 +1728,8 @@
#if ROSALLOC_SLOT_NEXT_OFFSET != MIRROR_OBJECT_CLASS_OFFSET
#error "Class pointer needs to overwrite next pointer."
#endif
- POISON_HEAP_REF w0
- str w0, [x3, #MIRROR_OBJECT_CLASS_OFFSET]
+ POISON_HEAP_REF w2
+ str w2, [x3, #MIRROR_OBJECT_CLASS_OFFSET]
// Fence. This is "ish" not "ishst" so
// that it also ensures ordering of
// the object size load with respect
@@ -1754,13 +1759,13 @@
mov x0, x3 // Set the return value and return.
ret
-.Lart_quick_alloc_object_resolved_rosalloc_slow_path:
- SETUP_SAVE_REFS_ONLY_FRAME // save callee saves in case of GC
- mov x1, xSELF // pass Thread::Current
- bl artAllocObjectFromCodeResolvedRosAlloc // (mirror::Class* klass, Thread*)
+.Lart_quick_alloc_object_rosalloc_slow_path:
+ SETUP_SAVE_REFS_ONLY_FRAME // save callee saves in case of GC
+ mov x2, xSELF // pass Thread::Current
+ bl artAllocObjectFromCodeRosAlloc // (uint32_t type_idx, Method* method, Thread*)
RESTORE_SAVE_REFS_ONLY_FRAME
RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
-END art_quick_alloc_object_resolved_rosalloc
+END art_quick_alloc_object_rosalloc
// The common fast path code for art_quick_alloc_array_region_tlab.
@@ -1829,6 +1834,16 @@
ret
.endm
+// The common fast path code for art_quick_alloc_object_tlab and art_quick_alloc_object_region_tlab.
+//
+// x0: type_idx/return value, x1: ArtMethod*, x2: Class*, xSELF(x19): Thread::Current
+// x3-x7: free.
+// Need to preserve x0 and x1 to the slow path.
+.macro ALLOC_OBJECT_TLAB_FAST_PATH slowPathLabel
+ cbz x2, \slowPathLabel // Check null class
+ ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED \slowPathLabel
+.endm
+
// TODO: delete ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED since it is the same as
// ALLOC_OBJECT_TLAB_FAST_PATH_INITIALIZED.
.macro ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED slowPathLabel
@@ -1838,18 +1853,20 @@
.macro ALLOC_OBJECT_TLAB_FAST_PATH_INITIALIZED slowPathLabel
ldr x4, [xSELF, #THREAD_LOCAL_POS_OFFSET]
ldr x5, [xSELF, #THREAD_LOCAL_END_OFFSET]
- ldr w7, [x0, #MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET] // Load the object size (x7).
+ ldr w7, [x2, #MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET] // Load the object size (x7).
add x6, x4, x7 // Add object size to tlab pos.
cmp x6, x5 // Check if it fits, overflow works
// since the tlab pos and end are 32
// bit values.
bhi \slowPathLabel
+ // "Point of no slow path". Won't go to the slow path from here on. OK to clobber x0 and x1.
+ mov x0, x4
str x6, [xSELF, #THREAD_LOCAL_POS_OFFSET] // Store new thread_local_pos.
ldr x5, [xSELF, #THREAD_LOCAL_OBJECTS_OFFSET] // Increment thread_local_objects.
add x5, x5, #1
str x5, [xSELF, #THREAD_LOCAL_OBJECTS_OFFSET]
- POISON_HEAP_REF w0
- str w0, [x4, #MIRROR_OBJECT_CLASS_OFFSET] // Store the class pointer.
+ POISON_HEAP_REF w2
+ str w2, [x0, #MIRROR_OBJECT_CLASS_OFFSET] // Store the class pointer.
// Fence. This is "ish" not "ishst" so
// that the code after this allocation
// site will see the right values in
@@ -1857,52 +1874,91 @@
// Alternatively we could use "ishst"
// if we use load-acquire for the
// object size load.)
- mov x0, x4
dmb ish
ret
.endm
-// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB).
-ENTRY art_quick_alloc_object_resolved_tlab
+// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB).
+ENTRY art_quick_alloc_object_tlab
// Fast path tlab allocation.
- // x0: type, xSELF(x19): Thread::Current
- // x1-x7: free.
+ // x0: type_idx/return value, x1: ArtMethod*, xSELF(x19): Thread::Current
+ // x2-x7: free.
#if defined(USE_READ_BARRIER)
mvn x0, xzr // Read barrier not supported here.
ret // Return -1.
#endif
- ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED .Lart_quick_alloc_object_resolved_tlab_slow_path
-.Lart_quick_alloc_object_resolved_tlab_slow_path:
- SETUP_SAVE_REFS_ONLY_FRAME // Save callee saves in case of GC.
- mov x1, xSELF // Pass Thread::Current.
- bl artAllocObjectFromCodeResolvedTLAB // (mirror::Class*, Thread*)
+ ldr x2, [x1, #ART_METHOD_DEX_CACHE_TYPES_OFFSET_64] // Load dex cache resolved types array
+ // Load the class (x2)
+ ldr w2, [x2, x0, lsl #COMPRESSED_REFERENCE_SIZE_SHIFT]
+ ALLOC_OBJECT_TLAB_FAST_PATH .Lart_quick_alloc_object_tlab_slow_path
+.Lart_quick_alloc_object_tlab_slow_path:
+ SETUP_SAVE_REFS_ONLY_FRAME // Save callee saves in case of GC.
+ mov x2, xSELF // Pass Thread::Current.
+ bl artAllocObjectFromCodeTLAB // (uint32_t type_idx, Method* method, Thread*)
RESTORE_SAVE_REFS_ONLY_FRAME
RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
-END art_quick_alloc_object_resolved_tlab
+END art_quick_alloc_object_tlab
// The common code for art_quick_alloc_object_*region_tlab
-.macro GENERATE_ALLOC_OBJECT_RESOLVED_REGION_TLAB name, entrypoint, fast_path
+.macro GENERATE_ALLOC_OBJECT_REGION_TLAB name, entrypoint, fast_path, is_resolved, read_barrier
ENTRY \name
// Fast path region tlab allocation.
- // x0: type, xSELF(x19): Thread::Current
- // x1-x7: free.
+ // x0: type_idx/resolved class/return value, x1: ArtMethod*, xSELF(x19): Thread::Current
+ // If is_resolved is 1 then x0 is the resolved type, otherwise it is the index.
+ // x2-x7: free.
#if !defined(USE_READ_BARRIER)
mvn x0, xzr // Read barrier must be enabled here.
ret // Return -1.
#endif
+.if \is_resolved
+ mov x2, x0 // class is actually stored in x0 already
+.else
+ ldr x2, [x1, #ART_METHOD_DEX_CACHE_TYPES_OFFSET_64] // Load dex cache resolved types array
+ // Load the class (x2)
+ ldr w2, [x2, x0, lsl #COMPRESSED_REFERENCE_SIZE_SHIFT]
+ // If the class is null, go slow path. The check is required to read the lock word.
+ cbz w2, .Lslow_path\name
+.endif
+.if \read_barrier
+ // Most common case: GC is not marking.
+ ldr w3, [xSELF, #THREAD_IS_GC_MARKING_OFFSET]
+ cbnz x3, .Lmarking\name
+.endif
.Ldo_allocation\name:
\fast_path .Lslow_path\name
+.Lmarking\name:
+.if \read_barrier
+ // GC is marking, check the lock word of the class for the mark bit.
+ // Class is not null, check mark bit in lock word.
+ ldr w3, [x2, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
+ // If the bit is not zero, do the allocation.
+ tbnz w3, #LOCK_WORD_MARK_BIT_SHIFT, .Ldo_allocation\name
+ // The read barrier slow path. Mark
+ // the class.
+ SAVE_TWO_REGS_INCREASE_FRAME x0, x1, 32 // Save registers (x0, x1, lr).
+ SAVE_REG xLR, 24 // Align sp by 16 bytes.
+ mov x0, x2 // Pass the class as the first param.
+ bl artReadBarrierMark
+ mov x2, x0 // Get the (marked) class back.
+ RESTORE_REG xLR, 24
+ RESTORE_TWO_REGS_DECREASE_FRAME x0, x1, 32 // Restore registers.
+ b .Ldo_allocation\name
+.endif
.Lslow_path\name:
SETUP_SAVE_REFS_ONLY_FRAME // Save callee saves in case of GC.
- mov x1, xSELF // Pass Thread::Current.
- bl \entrypoint // (mirror::Class*, Thread*)
+ mov x2, xSELF // Pass Thread::Current.
+ bl \entrypoint // (uint32_t type_idx, Method* method, Thread*)
RESTORE_SAVE_REFS_ONLY_FRAME
RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
END \name
.endm
-GENERATE_ALLOC_OBJECT_RESOLVED_REGION_TLAB art_quick_alloc_object_resolved_region_tlab, artAllocObjectFromCodeResolvedRegionTLAB, ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED
-GENERATE_ALLOC_OBJECT_RESOLVED_REGION_TLAB art_quick_alloc_object_initialized_region_tlab, artAllocObjectFromCodeInitializedRegionTLAB, ALLOC_OBJECT_TLAB_FAST_PATH_INITIALIZED
+// Use ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED since the null check is already done in GENERATE_ALLOC_OBJECT_TLAB.
+GENERATE_ALLOC_OBJECT_REGION_TLAB art_quick_alloc_object_region_tlab, artAllocObjectFromCodeRegionTLAB, ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED, 0, 1
+// No read barrier for the resolved or initialized cases since the caller is responsible for the
+// read barrier due to the to-space invariant.
+GENERATE_ALLOC_OBJECT_REGION_TLAB art_quick_alloc_object_resolved_region_tlab, artAllocObjectFromCodeResolvedRegionTLAB, ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED, 1, 0
+GENERATE_ALLOC_OBJECT_REGION_TLAB art_quick_alloc_object_initialized_region_tlab, artAllocObjectFromCodeInitializedRegionTLAB, ALLOC_OBJECT_TLAB_FAST_PATH_INITIALIZED, 1, 0
// TODO: We could use this macro for the normal tlab allocator too.