Revert "Make object allocation entrypoints only take a class."

960-default-smali64 is failing.

This reverts commit 2b615ba29c4dfcf54aaf44955f2eac60f5080b2e.

Change-Id: Iebb8ee5a917fa84c5f01660ce432798524d078ef
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 8b1e038..b88515f 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -1669,6 +1669,7 @@
 // Generate the allocation entrypoints for each allocator.
 GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_REGION_TLAB_ALLOCATORS
 // Comment out allocators that have arm64 specific asm.
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB) implemented in asm
 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB)
 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
@@ -1681,23 +1682,27 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB)
 
-// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc, RosAlloc).
-ENTRY art_quick_alloc_object_resolved_rosalloc
+// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc).
+ENTRY art_quick_alloc_object_rosalloc
     // Fast path rosalloc allocation.
-    // x0: type, xSELF(x19): Thread::Current
-    // x1-x7: free.
+    // x0: type_idx/return value, x1: ArtMethod*, xSELF(x19): Thread::Current
+    // x2-x7: free.
+    ldr    x2, [x1, #ART_METHOD_DEX_CACHE_TYPES_OFFSET_64]    // Load dex cache resolved types array
+                                                              // Load the class (x2)
+    ldr    w2, [x2, x0, lsl #COMPRESSED_REFERENCE_SIZE_SHIFT]
+    cbz    x2, .Lart_quick_alloc_object_rosalloc_slow_path    // Check null class
     ldr    x3, [xSELF, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET]  // Check if the thread local
                                                               // allocation stack has room.
                                                               // ldp won't work due to large offset.
     ldr    x4, [xSELF, #THREAD_LOCAL_ALLOC_STACK_END_OFFSET]
     cmp    x3, x4
-    bhs    .Lart_quick_alloc_object_resolved_rosalloc_slow_path
-    ldr    w3, [x0, #MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET]  // Load the object size (x3)
+    bhs    .Lart_quick_alloc_object_rosalloc_slow_path
+    ldr    w3, [x2, #MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET]  // Load the object size (x3)
     cmp    x3, #ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE        // Check if the size is for a thread
                                                               // local allocation. Also does the
                                                               // finalizable and initialization
                                                               // checks.
-    bhs    .Lart_quick_alloc_object_resolved_rosalloc_slow_path
+    bhs    .Lart_quick_alloc_object_rosalloc_slow_path
                                                               // Compute the rosalloc bracket index
                                                               // from the size. Since the size is
                                                               // already aligned we can combine the
@@ -1710,7 +1715,7 @@
                                                               // Load the free list head (x3). This
                                                               // will be the return val.
     ldr    x3, [x4, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)]
-    cbz    x3, .Lart_quick_alloc_object_resolved_rosalloc_slow_path
+    cbz    x3, .Lart_quick_alloc_object_rosalloc_slow_path
     // "Point of no slow path". Won't go to the slow path from here on. OK to clobber x0 and x1.
     ldr    x1, [x3, #ROSALLOC_SLOT_NEXT_OFFSET]               // Load the next pointer of the head
                                                               // and update the list head with the
@@ -1723,8 +1728,8 @@
 #if ROSALLOC_SLOT_NEXT_OFFSET != MIRROR_OBJECT_CLASS_OFFSET
 #error "Class pointer needs to overwrite next pointer."
 #endif
-    POISON_HEAP_REF w0
-    str    w0, [x3, #MIRROR_OBJECT_CLASS_OFFSET]
+    POISON_HEAP_REF w2
+    str    w2, [x3, #MIRROR_OBJECT_CLASS_OFFSET]
                                                               // Fence. This is "ish" not "ishst" so
                                                               // that it also ensures ordering of
                                                               // the object size load with respect
@@ -1754,13 +1759,13 @@
 
     mov    x0, x3                                             // Set the return value and return.
     ret
-.Lart_quick_alloc_object_resolved_rosalloc_slow_path:
-    SETUP_SAVE_REFS_ONLY_FRAME                      // save callee saves in case of GC
-    mov    x1, xSELF                                // pass Thread::Current
-    bl     artAllocObjectFromCodeResolvedRosAlloc   // (mirror::Class* klass, Thread*)
+.Lart_quick_alloc_object_rosalloc_slow_path:
+    SETUP_SAVE_REFS_ONLY_FRAME             // save callee saves in case of GC
+    mov    x2, xSELF                       // pass Thread::Current
+    bl     artAllocObjectFromCodeRosAlloc  // (uint32_t type_idx, Method* method, Thread*)
     RESTORE_SAVE_REFS_ONLY_FRAME
     RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
-END art_quick_alloc_object_resolved_rosalloc
+END art_quick_alloc_object_rosalloc
 
 
 // The common fast path code for art_quick_alloc_array_region_tlab.
@@ -1829,6 +1834,16 @@
     ret
 .endm
 
+// The common fast path code for art_quick_alloc_object_tlab and art_quick_alloc_object_region_tlab.
+//
+// x0: type_idx/return value, x1: ArtMethod*, x2: Class*, xSELF(x19): Thread::Current
+// x3-x7: free.
+// Need to preserve x0 and x1 to the slow path.
+.macro ALLOC_OBJECT_TLAB_FAST_PATH slowPathLabel
+    cbz    x2, \slowPathLabel                                 // Check null class
+    ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED \slowPathLabel
+.endm
+
 // TODO: delete ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED since it is the same as
 // ALLOC_OBJECT_TLAB_FAST_PATH_INITIALIZED.
 .macro ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED slowPathLabel
@@ -1838,18 +1853,20 @@
 .macro ALLOC_OBJECT_TLAB_FAST_PATH_INITIALIZED slowPathLabel
     ldr    x4, [xSELF, #THREAD_LOCAL_POS_OFFSET]
     ldr    x5, [xSELF, #THREAD_LOCAL_END_OFFSET]
-    ldr    w7, [x0, #MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET]  // Load the object size (x7).
+    ldr    w7, [x2, #MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET]  // Load the object size (x7).
     add    x6, x4, x7                                         // Add object size to tlab pos.
     cmp    x6, x5                                             // Check if it fits, overflow works
                                                               // since the tlab pos and end are 32
                                                               // bit values.
     bhi    \slowPathLabel
+    // "Point of no slow path". Won't go to the slow path from here on. OK to clobber x0 and x1.
+    mov    x0, x4
     str    x6, [xSELF, #THREAD_LOCAL_POS_OFFSET]              // Store new thread_local_pos.
     ldr    x5, [xSELF, #THREAD_LOCAL_OBJECTS_OFFSET]          // Increment thread_local_objects.
     add    x5, x5, #1
     str    x5, [xSELF, #THREAD_LOCAL_OBJECTS_OFFSET]
-    POISON_HEAP_REF w0
-    str    w0, [x4, #MIRROR_OBJECT_CLASS_OFFSET]              // Store the class pointer.
+    POISON_HEAP_REF w2
+    str    w2, [x0, #MIRROR_OBJECT_CLASS_OFFSET]              // Store the class pointer.
                                                               // Fence. This is "ish" not "ishst" so
                                                               // that the code after this allocation
                                                               // site will see the right values in
@@ -1857,52 +1874,91 @@
                                                               // Alternatively we could use "ishst"
                                                               // if we use load-acquire for the
                                                               // object size load.)
-    mov    x0, x4
     dmb    ish
     ret
 .endm
 
-// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB).
-ENTRY art_quick_alloc_object_resolved_tlab
+// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB).
+ENTRY art_quick_alloc_object_tlab
     // Fast path tlab allocation.
-    // x0: type, xSELF(x19): Thread::Current
-    // x1-x7: free.
+    // x0: type_idx/return value, x1: ArtMethod*, xSELF(x19): Thread::Current
+    // x2-x7: free.
 #if defined(USE_READ_BARRIER)
     mvn    x0, xzr                                            // Read barrier not supported here.
     ret                                                       // Return -1.
 #endif
-    ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED .Lart_quick_alloc_object_resolved_tlab_slow_path
-.Lart_quick_alloc_object_resolved_tlab_slow_path:
-    SETUP_SAVE_REFS_ONLY_FRAME                    // Save callee saves in case of GC.
-    mov    x1, xSELF                              // Pass Thread::Current.
-    bl     artAllocObjectFromCodeResolvedTLAB     // (mirror::Class*, Thread*)
+    ldr    x2, [x1, #ART_METHOD_DEX_CACHE_TYPES_OFFSET_64]    // Load dex cache resolved types array
+                                                              // Load the class (x2)
+    ldr    w2, [x2, x0, lsl #COMPRESSED_REFERENCE_SIZE_SHIFT]
+    ALLOC_OBJECT_TLAB_FAST_PATH .Lart_quick_alloc_object_tlab_slow_path
+.Lart_quick_alloc_object_tlab_slow_path:
+    SETUP_SAVE_REFS_ONLY_FRAME           // Save callee saves in case of GC.
+    mov    x2, xSELF                     // Pass Thread::Current.
+    bl     artAllocObjectFromCodeTLAB    // (uint32_t type_idx, Method* method, Thread*)
     RESTORE_SAVE_REFS_ONLY_FRAME
     RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
-END art_quick_alloc_object_resolved_tlab
+END art_quick_alloc_object_tlab
 
 // The common code for art_quick_alloc_object_*region_tlab
-.macro GENERATE_ALLOC_OBJECT_RESOLVED_REGION_TLAB name, entrypoint, fast_path
+.macro GENERATE_ALLOC_OBJECT_REGION_TLAB name, entrypoint, fast_path, is_resolved, read_barrier
 ENTRY \name
     // Fast path region tlab allocation.
-    // x0: type, xSELF(x19): Thread::Current
-    // x1-x7: free.
+    // x0: type_idx/resolved class/return value, x1: ArtMethod*, xSELF(x19): Thread::Current
+    // If is_resolved is 1 then x0 is the resolved type, otherwise it is the index.
+    // x2-x7: free.
 #if !defined(USE_READ_BARRIER)
     mvn    x0, xzr                                            // Read barrier must be enabled here.
     ret                                                       // Return -1.
 #endif
+.if \is_resolved
+    mov    x2, x0 // class is actually stored in x0 already
+.else
+    ldr    x2, [x1, #ART_METHOD_DEX_CACHE_TYPES_OFFSET_64]    // Load dex cache resolved types array
+                                                              // Load the class (x2)
+    ldr    w2, [x2, x0, lsl #COMPRESSED_REFERENCE_SIZE_SHIFT]
+    // If the class is null, go slow path. The check is required to read the lock word.
+    cbz    w2, .Lslow_path\name
+.endif
+.if \read_barrier
+    // Most common case: GC is not marking.
+    ldr    w3, [xSELF, #THREAD_IS_GC_MARKING_OFFSET]
+    cbnz   x3, .Lmarking\name
+.endif
 .Ldo_allocation\name:
     \fast_path .Lslow_path\name
+.Lmarking\name:
+.if \read_barrier
+    // GC is marking, check the lock word of the class for the mark bit.
+    // Class is not null, check mark bit in lock word.
+    ldr    w3, [x2, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
+    // If the bit is not zero, do the allocation.
+    tbnz    w3, #LOCK_WORD_MARK_BIT_SHIFT, .Ldo_allocation\name
+                                                              // The read barrier slow path. Mark
+                                                              // the class.
+    SAVE_TWO_REGS_INCREASE_FRAME x0, x1, 32                   // Save registers (x0, x1, lr).
+    SAVE_REG xLR, 24                                          // Align sp by 16 bytes.
+    mov    x0, x2                                             // Pass the class as the first param.
+    bl     artReadBarrierMark
+    mov    x2, x0                                             // Get the (marked) class back.
+    RESTORE_REG xLR, 24
+    RESTORE_TWO_REGS_DECREASE_FRAME x0, x1, 32                // Restore registers.
+    b      .Ldo_allocation\name
+.endif
 .Lslow_path\name:
     SETUP_SAVE_REFS_ONLY_FRAME                 // Save callee saves in case of GC.
-    mov    x1, xSELF                           // Pass Thread::Current.
-    bl     \entrypoint                         // (mirror::Class*, Thread*)
+    mov    x2, xSELF                           // Pass Thread::Current.
+    bl     \entrypoint                         // (uint32_t type_idx, Method* method, Thread*)
     RESTORE_SAVE_REFS_ONLY_FRAME
     RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 END \name
 .endm
 
-GENERATE_ALLOC_OBJECT_RESOLVED_REGION_TLAB art_quick_alloc_object_resolved_region_tlab, artAllocObjectFromCodeResolvedRegionTLAB, ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED
-GENERATE_ALLOC_OBJECT_RESOLVED_REGION_TLAB art_quick_alloc_object_initialized_region_tlab, artAllocObjectFromCodeInitializedRegionTLAB, ALLOC_OBJECT_TLAB_FAST_PATH_INITIALIZED
+// Use ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED since the null check is already done in GENERATE_ALLOC_OBJECT_TLAB.
+GENERATE_ALLOC_OBJECT_REGION_TLAB art_quick_alloc_object_region_tlab, artAllocObjectFromCodeRegionTLAB, ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED, 0, 1
+// No read barrier for the resolved or initialized cases since the caller is responsible for the
+// read barrier due to the to-space invariant.
+GENERATE_ALLOC_OBJECT_REGION_TLAB art_quick_alloc_object_resolved_region_tlab, artAllocObjectFromCodeResolvedRegionTLAB, ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED, 1, 0
+GENERATE_ALLOC_OBJECT_REGION_TLAB art_quick_alloc_object_initialized_region_tlab, artAllocObjectFromCodeInitializedRegionTLAB, ALLOC_OBJECT_TLAB_FAST_PATH_INITIALIZED, 1, 0
 
 // TODO: We could use this macro for the normal tlab allocator too.