Improve quick codegen for aput-object.

1) don't type check known null.
2) if we know types in verify don't check at runtime.
3) if we're runtime checking then move all the code out-of-line.

Also, don't set up a callee-save frame for check-cast, do an instance-of test
then throw an exception if that fails.
Tidy quick entry point of Ldivmod to Lmod which it is on x86 and mips.
Fix monitor-enter/exit NPE for MIPS.
Fix benign bug in mirror::Class::CannotBeAssignedFromOtherTypes, a byte[]
cannot be assigned to from other types.

Change-Id: I9cb3859ec70cca71ed79331ec8df5bec969d6745
diff --git a/runtime/arch/arm/asm_support_arm.h b/runtime/arch/arm/asm_support_arm.h
index 69fb9c3..cfffbea 100644
--- a/runtime/arch/arm/asm_support_arm.h
+++ b/runtime/arch/arm/asm_support_arm.h
@@ -25,6 +25,8 @@
 #define rSELF r9
 // Offset of field Thread::suspend_count_ verified in InitCpu
 #define THREAD_FLAGS_OFFSET 0
+// Offset of field Thread::card_table_ verified in InitCpu
+#define THREAD_CARD_TABLE_OFFSET 8
 // Offset of field Thread::exception_ verified in InitCpu
 #define THREAD_EXCEPTION_OFFSET 12
 // Offset of field Thread::thin_lock_thread_id_ verified in InitCpu
diff --git a/runtime/arch/arm/entrypoints_init_arm.cc b/runtime/arch/arm/entrypoints_init_arm.cc
index e6e13be..352982f 100644
--- a/runtime/arch/arm/entrypoints_init_arm.cc
+++ b/runtime/arch/arm/entrypoints_init_arm.cc
@@ -52,7 +52,6 @@
 // Cast entrypoints.
 extern "C" uint32_t artIsAssignableFromCode(const mirror::Class* klass,
                                             const mirror::Class* ref_class);
-extern "C" void art_quick_can_put_array_element(void*, void*);
 extern "C" void art_quick_check_cast(void*, void*);
 
 // DexCache entrypoints.
@@ -78,7 +77,10 @@
 extern "C" void* art_quick_get_obj_instance(uint32_t, void*);
 extern "C" void* art_quick_get_obj_static(uint32_t);
 
-// FillArray entrypoint.
+// Array entrypoints.
+extern "C" void art_quick_aput_obj_with_null_and_bound_check(void*, uint32_t, void*);
+extern "C" void art_quick_aput_obj_with_bound_check(void*, uint32_t, void*);
+extern "C" void art_quick_aput_obj(void*, uint32_t, void*);
 extern "C" void art_quick_handle_fill_data(void*, void*);
 
 // Lock entrypoints.
@@ -182,7 +184,6 @@
 
   // Cast
   qpoints->pInstanceofNonTrivial = artIsAssignableFromCode;
-  qpoints->pCanPutArrayElement = art_quick_can_put_array_element;
   qpoints->pCheckCast = art_quick_check_cast;
 
   // DexCache
@@ -205,7 +206,10 @@
   qpoints->pGet64Static = art_quick_get64_static;
   qpoints->pGetObjStatic = art_quick_get_obj_static;
 
-  // FillArray
+  // Array
+  qpoints->pAputObjectWithNullAndBoundCheck = art_quick_aput_obj_with_null_and_bound_check;
+  qpoints->pAputObjectWithBoundCheck = art_quick_aput_obj_with_bound_check;
+  qpoints->pAputObject = art_quick_aput_obj;
   qpoints->pHandleFillArrayData = art_quick_handle_fill_data;
 
   // JNI
@@ -236,7 +240,7 @@
   qpoints->pD2l = art_d2l;
   qpoints->pF2l = art_f2l;
   qpoints->pLdiv = __aeabi_ldivmod;
-  qpoints->pLdivmod = __aeabi_ldivmod;  // result returned in r2:r3
+  qpoints->pLmod = __aeabi_ldivmod;  // result returned in r2:r3
   qpoints->pLmul = art_quick_mul_long;
   qpoints->pShlLong = art_quick_shl_long;
   qpoints->pShrLong = art_quick_shr_long;
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index cb61698..d073177 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -152,6 +152,7 @@
     mov r1, r9                      @ pass Thread::Current
     mov r2, sp                      @ pass SP
     b   \cxx_name                   @ \cxx_name(Thread*, SP)
+    bkpt
 END \c_name
 .endm
 
@@ -162,6 +163,7 @@
     mov r2, r9                      @ pass Thread::Current
     mov r3, sp                      @ pass SP
     b   \cxx_name                   @ \cxx_name(Thread*, SP)
+    bkpt
 END \c_name
 .endm
 
@@ -389,33 +391,96 @@
 END art_quick_unlock_object
 
     /*
-     * Entry from managed code that calls artCheckCastFromCode and delivers exception on failure.
+     * Entry from managed code that calls artIsAssignableFromCode and on failure calls
+     * artThrowClassCastException.
      */
-    .extern artCheckCastFromCode
+    .extern artThrowClassCastException
 ENTRY art_quick_check_cast
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME    @ save callee saves in case exception allocation triggers GC
-    mov    r2, r9                       @ pass Thread::Current
-    mov    r3, sp                       @ pass SP
-    bl     artCheckCastFromCode         @ (Class* a, Class* b, Thread*, SP)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
-    RETURN_IF_RESULT_IS_ZERO
-    DELIVER_PENDING_EXCEPTION
+    push {r0-r1, lr}                    @ save arguments, link register and pad
+    .save {r0-r1, lr}
+    .cfi_adjust_cfa_offset 12
+    .cfi_rel_offset r0, 0
+    .cfi_rel_offset r1, 4
+    .cfi_rel_offset lr, 8
+    sub sp, #4
+    .pad #4
+    .cfi_adjust_cfa_offset 4
+    bl artIsAssignableFromCode
+    cbz    r0, throw_class_cast_exception
+    add sp, #4
+    .cfi_adjust_cfa_offset -4
+    pop {r0-r1, pc}
+throw_class_cast_exception:
+    add sp, #4
+    .cfi_adjust_cfa_offset -4
+    pop {r0-r1, lr}
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
+    mov r2, r9                      @ pass Thread::Current
+    mov r3, sp                      @ pass SP
+    b   artThrowClassCastException  @ (Class*, Class*, Thread*, SP)
+    bkpt
 END art_quick_check_cast
 
     /*
-     * Entry from managed code that calls artCanPutArrayElementFromCode and delivers exception on
-     * failure.
+     * Entry from managed code for array put operations of objects where the value being stored
+     * needs to be checked for compatibility.
+     * r0 = array, r1 = index, r2 = value
      */
-    .extern artCanPutArrayElementFromCode
-ENTRY art_quick_can_put_array_element
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME    @ save callee saves in case exception allocation triggers GC
-    mov    r2, r9                         @ pass Thread::Current
-    mov    r3, sp                         @ pass SP
-    bl     artCanPutArrayElementFromCode  @ (Object* element, Class* array_class, Thread*, SP)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
-    RETURN_IF_RESULT_IS_ZERO
-    DELIVER_PENDING_EXCEPTION
-END art_quick_can_put_array_element
+ENTRY art_quick_aput_obj_with_null_and_bound_check
+    tst r0, r0
+    bne art_quick_aput_obj_with_bound_check
+    b art_quick_throw_null_pointer_exception
+END art_quick_aput_obj_with_null_and_bound_check
+
+ENTRY art_quick_aput_obj_with_bound_check
+    ldr r3, [r0, #ARRAY_LENGTH_OFFSET]
+    cmp r3, r1
+    bhi art_quick_aput_obj
+    mov r0, r1
+    mov r1, r3
+    b art_quick_throw_array_bounds
+END art_quick_aput_obj_with_bound_check
+
+ENTRY art_quick_aput_obj
+    cbz r2, do_aput_null
+    ldr r3, [r0, #CLASS_OFFSET]
+    ldr ip, [r2, #CLASS_OFFSET]
+    ldr r3, [r3, #CLASS_COMPONENT_TYPE_OFFSET]
+    cmp r3, ip  @ value's type == array's component type - trivial assignability
+    bne check_assignability
+do_aput:
+    add r3, r0, #OBJECT_ARRAY_DATA_OFFSET
+    str r2, [r3, r1, lsl #2]
+    ldr r3, [r9, #THREAD_CARD_TABLE_OFFSET]
+    lsr r0, r0, #7
+    strb r3, [r3, r0]
+    blx lr
+do_aput_null:
+    add r3, r0, #OBJECT_ARRAY_DATA_OFFSET
+    str r2, [r3, r1, lsl #2]
+    blx lr
+check_assignability:
+    push {r0-r2, lr}                 @ save arguments
+    mov r1, ip
+    mov r0, r3
+    bl artIsAssignableFromCode
+    cbz r0, throw_array_store_exception
+    pop {r0-r2, lr}
+    add r3, r0, #OBJECT_ARRAY_DATA_OFFSET
+    str r2, [r3, r1, lsl #2]
+    ldr r3, [r9, #THREAD_CARD_TABLE_OFFSET]
+    lsr r0, r0, #7
+    strb r3, [r3, r0]
+    blx lr
+throw_array_store_exception:
+    pop {r0-r2, lr}
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    mov r1, r2
+    mov r2, r9                   @ pass Thread::Current
+    mov r3, sp                   @ pass SP
+    b artThrowArrayStoreException  @ (Class*, Class*, Thread*, SP)
+    bkpt                         @ unreached
+END art_quick_aput_obj
 
     /*
      * Entry from managed code when uninitialized static storage, this stub will run the class
diff --git a/runtime/arch/arm/thread_arm.cc b/runtime/arch/arm/thread_arm.cc
index 75eef60..8c1efeb 100644
--- a/runtime/arch/arm/thread_arm.cc
+++ b/runtime/arch/arm/thread_arm.cc
@@ -23,6 +23,7 @@
 
 void Thread::InitCpu() {
   CHECK_EQ(THREAD_FLAGS_OFFSET, OFFSETOF_MEMBER(Thread, state_and_flags_));
+  CHECK_EQ(THREAD_CARD_TABLE_OFFSET, OFFSETOF_MEMBER(Thread, card_table_));
   CHECK_EQ(THREAD_EXCEPTION_OFFSET, OFFSETOF_MEMBER(Thread, exception_));
   CHECK_EQ(THREAD_ID_OFFSET, OFFSETOF_MEMBER(Thread, thin_lock_thread_id_));
 }
diff --git a/runtime/arch/mips/asm_support_mips.h b/runtime/arch/mips/asm_support_mips.h
index 9a66352..5307997 100644
--- a/runtime/arch/mips/asm_support_mips.h
+++ b/runtime/arch/mips/asm_support_mips.h
@@ -25,6 +25,8 @@
 #define rSELF $s1
 // Offset of field Thread::suspend_count_ verified in InitCpu
 #define THREAD_FLAGS_OFFSET 0
+// Offset of field Thread::card_table_ verified in InitCpu
+#define THREAD_CARD_TABLE_OFFSET 8
 // Offset of field Thread::exception_ verified in InitCpu
 #define THREAD_EXCEPTION_OFFSET 12
 
diff --git a/runtime/arch/mips/entrypoints_init_mips.cc b/runtime/arch/mips/entrypoints_init_mips.cc
index 3d08298..cc975d75 100644
--- a/runtime/arch/mips/entrypoints_init_mips.cc
+++ b/runtime/arch/mips/entrypoints_init_mips.cc
@@ -51,7 +51,6 @@
 // Cast entrypoints.
 extern "C" uint32_t artIsAssignableFromCode(const mirror::Class* klass,
                                             const mirror::Class* ref_class);
-extern "C" void art_quick_can_put_array_element(void*, void*);
 extern "C" void art_quick_check_cast(void*, void*);
 
 // DexCache entrypoints.
@@ -77,7 +76,10 @@
 extern "C" void* art_quick_get_obj_instance(uint32_t, void*);
 extern "C" void* art_quick_get_obj_static(uint32_t);
 
-// FillArray entrypoint.
+// Array entrypoints.
+extern "C" void art_quick_aput_obj_with_null_and_bound_check(void*, uint32_t, void*);
+extern "C" void art_quick_aput_obj_with_bound_check(void*, uint32_t, void*);
+extern "C" void art_quick_aput_obj(void*, uint32_t, void*);
 extern "C" void art_quick_handle_fill_data(void*, void*);
 
 // Lock entrypoints.
@@ -89,9 +91,9 @@
 extern int32_t CmplDouble(double a, double b);
 extern int32_t CmpgFloat(float a, float b);
 extern int32_t CmplFloat(float a, float b);
-extern "C" int64_t artLmulFromCode(int64_t a, int64_t b);
-extern "C" int64_t artLdivFromCode(int64_t a, int64_t b);
-extern "C" int64_t artLdivmodFromCode(int64_t a, int64_t b);
+extern "C" int64_t artLmul(int64_t a, int64_t b);
+extern "C" int64_t artLdiv(int64_t a, int64_t b);
+extern "C" int64_t artLmod(int64_t a, int64_t b);
 
 // Math conversions.
 extern "C" int32_t __fixsfsi(float op1);      // FLOAT_TO_INT
@@ -183,7 +185,6 @@
 
   // Cast
   qpoints->pInstanceofNonTrivial = artIsAssignableFromCode;
-  qpoints->pCanPutArrayElement = art_quick_can_put_array_element;
   qpoints->pCheckCast = art_quick_check_cast;
 
   // DexCache
@@ -206,7 +207,10 @@
   qpoints->pGet64Static = art_quick_get64_static;
   qpoints->pGetObjStatic = art_quick_get_obj_static;
 
-  // FillArray
+  // Array
+  qpoints->pAputObjectWithNullAndBoundCheck = art_quick_aput_obj_with_null_and_bound_check;
+  qpoints->pAputObjectWithBoundCheck = art_quick_aput_obj_with_bound_check;
+  qpoints->pAputObject = art_quick_aput_obj;
   qpoints->pHandleFillArrayData = art_quick_handle_fill_data;
 
   // JNI
@@ -235,9 +239,9 @@
   qpoints->pIdivmod = NULL;
   qpoints->pD2l = art_d2l;
   qpoints->pF2l = art_f2l;
-  qpoints->pLdiv = artLdivFromCode;
-  qpoints->pLdivmod = artLdivmodFromCode;
-  qpoints->pLmul = artLmulFromCode;
+  qpoints->pLdiv = artLdiv;
+  qpoints->pLmod = artLmod;
+  qpoints->pLmul = artLmul;
   qpoints->pShlLong = art_quick_shl_long;
   qpoints->pShrLong = art_quick_shr_long;
   qpoints->pUshrLong = art_quick_ushr_long;
diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S
index f9b703f..e9c6698 100644
--- a/runtime/arch/mips/quick_entrypoints_mips.S
+++ b/runtime/arch/mips/quick_entrypoints_mips.S
@@ -283,6 +283,7 @@
     .extern artThrowNullPointerExceptionFromCode
 ENTRY art_quick_throw_null_pointer_exception
     GENERATE_GLOBAL_POINTER
+art_quick_throw_null_pointer_exception_gp_set:
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
     move $a0, rSELF                 # pass Thread::Current
     la   $t9, artThrowNullPointerExceptionFromCode
@@ -309,6 +310,7 @@
     .extern artThrowArrayBoundsFromCode
 ENTRY art_quick_throw_array_bounds
     GENERATE_GLOBAL_POINTER
+art_quick_throw_array_bounds_gp_set:
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
     move $a2, rSELF                 # pass Thread::Current
     la   $t9, artThrowArrayBoundsFromCode
@@ -481,6 +483,8 @@
     .extern artLockObjectFromCode
 ENTRY art_quick_lock_object
     GENERATE_GLOBAL_POINTER
+    beqz    $a0, art_quick_throw_null_pointer_exception_gp_set
+    nop
     SETUP_REF_ONLY_CALLEE_SAVE_FRAME      # save callee saves in case we block
     move    $a1, rSELF                    # pass Thread::Current
     jal     artLockObjectFromCode         # (Object* obj, Thread*, $sp)
@@ -494,6 +498,8 @@
     .extern artUnlockObjectFromCode
 ENTRY art_quick_unlock_object
     GENERATE_GLOBAL_POINTER
+    beqz    $a0, art_quick_throw_null_pointer_exception_gp_set
+    nop
     SETUP_REF_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case exception allocation triggers GC
     move    $a1, rSELF                # pass Thread::Current
     jal     artUnlockObjectFromCode   # (Object* obj, Thread*, $sp)
@@ -504,29 +510,116 @@
     /*
      * Entry from managed code that calls artCheckCastFromCode and delivers exception on failure.
      */
-    .extern artCheckCastFromCode
+    .extern artThrowClassCastException
 ENTRY art_quick_check_cast
     GENERATE_GLOBAL_POINTER
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case exception allocation triggers GC
-    move    $a2, rSELF                # pass Thread::Current
-    jal     artCheckCastFromCode      # (Class* a, Class* b, Thread*, $sp)
-    move    $a3, $sp                  # pass $sp
-    RETURN_IF_ZERO
+    addiu  $sp, $sp, -16
+    .cfi_adjust_cfa_offset 16
+    sw     $ra, 12($sp)
+    .cfi_rel_offset 31, 12
+    sw     $t9, 8($sp)
+    sw     $a1, 4($sp)
+    sw     $a0, 0($sp)
+    jal    artIsAssignableFromCode
+    nop
+    beqz   $v0, throw_class_cast_exception
+    lw     $ra, 12($sp)
+    jr     $ra
+    addiu  $sp, $sp, 16
+    .cfi_adjust_cfa_offset -16
+throw_class_cast_exception:
+    lw     $t9, 8($sp)
+    lw     $a1, 4($sp)
+    lw     $a0, 0($sp)
+    addiu  $sp, $sp, 16
+    .cfi_adjust_cfa_offset -16
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    move $a2, rSELF                 # pass Thread::Current
+    la   $t9, artThrowClassCastException
+    jr   $t9                        # artThrowClassCastException (Class*, Class*, Thread*, SP)
+    move $a3, $sp                   # pass $sp
 END art_quick_check_cast
 
     /*
-     * Entry from managed code that calls artCanPutArrayElementFromCode and delivers exception on
-     * failure.
+     * Entry from managed code for array put operations of objects where the value being stored
+     * needs to be checked for compatibility.
+     * a0 = array, a1 = index, a2 = value
      */
-    .extern artCanPutArrayElementFromCode
-ENTRY art_quick_can_put_array_element
+ENTRY art_quick_aput_obj_with_null_and_bound_check
     GENERATE_GLOBAL_POINTER
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case exception allocation triggers GC
-    move    $a2, rSELF                     # pass Thread::Current
-    jal     artCanPutArrayElementFromCode  # (Object* element, Class* array_class, Thread*, $sp)
-    move    $a3, $sp                       # pass $sp
-    RETURN_IF_ZERO
-END art_quick_can_put_array_element
+    bnez    $a0, art_quick_aput_obj_with_bound_check_gp_set
+    nop
+    b art_quick_throw_null_pointer_exception_gp_set
+    nop
+END art_quick_aput_obj_with_null_and_bound_check
+
+ENTRY art_quick_aput_obj_with_bound_check
+    GENERATE_GLOBAL_POINTER
+art_quick_aput_obj_with_bound_check_gp_set:
+    lw $t0, ARRAY_LENGTH_OFFSET($a0)
+    sltu $t1, $a1, $t0
+    bnez $t1, art_quick_aput_obj_gp_set
+    nop
+    move $a0, $a1
+    b art_quick_throw_array_bounds_gp_set
+    move $a1, $t0
+END art_quick_aput_obj_with_bound_check
+
+ENTRY art_quick_aput_obj
+    GENERATE_GLOBAL_POINTER
+art_quick_aput_obj_gp_set:
+    beqz $a2, do_aput_null
+    nop
+    lw $t0, CLASS_OFFSET($a0)
+    lw $t1, CLASS_OFFSET($a2)
+    lw $t0, CLASS_COMPONENT_TYPE_OFFSET($t0)
+    bne $t1, $t0, check_assignability  # value's type == array's component type - trivial assignability
+    nop
+do_aput:
+    sll $a1, $a1, 2
+    add $t0, $a0, $a1
+    sw  $a2, OBJECT_ARRAY_DATA_OFFSET($t0)
+    lw  $t0, THREAD_CARD_TABLE_OFFSET(rSELF)
+    srl $t1, $a0, 7
+    add $t1, $t1, $t0
+    sb  $t0, ($t1)
+    jr  $ra
+    nop
+do_aput_null:
+    sll $a1, $a1, 2
+    add $t0, $a0, $a1
+    sw  $a2, OBJECT_ARRAY_DATA_OFFSET($t0)
+    jr  $ra
+    nop
+check_assignability:
+    addiu  $sp, $sp, -32
+    .cfi_adjust_cfa_offset 32
+    sw     $ra, 28($sp)
+    .cfi_rel_offset 31, 28
+    sw     $t9, 12($sp)
+    sw     $a2, 8($sp)
+    sw     $a1, 4($sp)
+    sw     $a0, 0($sp)
+    move   $a1, $t1
+    move   $a0, $t0
+    jal    artIsAssignableFromCode  # (Class*, Class*)
+    nop
+    lw     $ra, 28($sp)
+    lw     $t9, 12($sp)
+    lw     $a2, 8($sp)
+    lw     $a1, 4($sp)
+    lw     $a0, 0($sp)
+    add    $sp, 32
+    .cfi_adjust_cfa_offset -32
+    bnez   $v0, do_aput
+    nop
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    move $a1, $a2
+    move $a2, rSELF                 # pass Thread::Current
+    la   $t9, artThrowArrayStoreException
+    jr   $t9                        # artThrowArrayStoreException(Class*, Class*, Thread*, SP)
+    move $a3, $sp                   # pass $sp
+END art_quick_aput_obj
 
     /*
      * Entry from managed code when uninitialized static storage, this stub will run the class
diff --git a/runtime/arch/mips/thread_mips.cc b/runtime/arch/mips/thread_mips.cc
index 7364de0..bd54549 100644
--- a/runtime/arch/mips/thread_mips.cc
+++ b/runtime/arch/mips/thread_mips.cc
@@ -23,6 +23,7 @@
 
 void Thread::InitCpu() {
   CHECK_EQ(THREAD_FLAGS_OFFSET, OFFSETOF_MEMBER(Thread, state_and_flags_));
+  CHECK_EQ(THREAD_CARD_TABLE_OFFSET, OFFSETOF_MEMBER(Thread, card_table_));
   CHECK_EQ(THREAD_EXCEPTION_OFFSET, OFFSETOF_MEMBER(Thread, exception_));
 }
 
diff --git a/runtime/arch/x86/asm_support_x86.h b/runtime/arch/x86/asm_support_x86.h
index d4e0927..e817ff7 100644
--- a/runtime/arch/x86/asm_support_x86.h
+++ b/runtime/arch/x86/asm_support_x86.h
@@ -21,6 +21,8 @@
 
 // Offset of field Thread::self_ verified in InitCpu
 #define THREAD_SELF_OFFSET 40
+// Offset of field Thread::card_table_ verified in InitCpu
+#define THREAD_CARD_TABLE_OFFSET 8
 // Offset of field Thread::exception_ verified in InitCpu
 #define THREAD_EXCEPTION_OFFSET 12
 // Offset of field Thread::thin_lock_thread_id_ verified in InitCpu
diff --git a/runtime/arch/x86/entrypoints_init_x86.cc b/runtime/arch/x86/entrypoints_init_x86.cc
index 4c87e07..89dd1b8 100644
--- a/runtime/arch/x86/entrypoints_init_x86.cc
+++ b/runtime/arch/x86/entrypoints_init_x86.cc
@@ -50,7 +50,6 @@
 // Cast entrypoints.
 extern "C" uint32_t art_quick_is_assignable(const mirror::Class* klass,
                                                 const mirror::Class* ref_class);
-extern "C" void art_quick_can_put_array_element(void*, void*);
 extern "C" void art_quick_check_cast(void*, void*);
 
 // DexCache entrypoints.
@@ -73,7 +72,10 @@
 extern "C" void* art_quick_get_obj_instance(uint32_t, void*);
 extern "C" void* art_quick_get_obj_static(uint32_t);
 
-// FillArray entrypoint.
+// Array entrypoints.
+extern "C" void art_quick_aput_obj_with_null_and_bound_check(void*, uint32_t, void*);
+extern "C" void art_quick_aput_obj_with_bound_check(void*, uint32_t, void*);
+extern "C" void art_quick_aput_obj(void*, uint32_t, void*);
 extern "C" void art_quick_handle_fill_data(void*, void*);
 
 // Lock entrypoints.
@@ -89,7 +91,7 @@
 extern "C" int64_t art_quick_f2l(float);
 extern "C" int32_t art_quick_idivmod(int32_t, int32_t);
 extern "C" int64_t art_quick_ldiv(int64_t, int64_t);
-extern "C" int64_t art_quick_ldivmod(int64_t, int64_t);
+extern "C" int64_t art_quick_lmod(int64_t, int64_t);
 extern "C" int64_t art_quick_lmul(int64_t, int64_t);
 extern "C" uint64_t art_quick_lshl(uint64_t, uint32_t);
 extern "C" uint64_t art_quick_lshr(uint64_t, uint32_t);
@@ -165,7 +167,6 @@
 
   // Cast
   qpoints->pInstanceofNonTrivial = art_quick_is_assignable;
-  qpoints->pCanPutArrayElement = art_quick_can_put_array_element;
   qpoints->pCheckCast = art_quick_check_cast;
 
   // DexCache
@@ -188,7 +189,10 @@
   qpoints->pGet64Static = art_quick_get64_static;
   qpoints->pGetObjStatic = art_quick_get_obj_static;
 
-  // FillArray
+  // Array
+  qpoints->pAputObjectWithNullAndBoundCheck = art_quick_aput_obj_with_null_and_bound_check;
+  qpoints->pAputObjectWithBoundCheck = art_quick_aput_obj_with_bound_check;
+  qpoints->pAputObject = art_quick_aput_obj;
   qpoints->pHandleFillArrayData = art_quick_handle_fill_data;
 
   // JNI
@@ -218,7 +222,7 @@
   qpoints->pD2l = art_quick_d2l;
   qpoints->pF2l = art_quick_f2l;
   qpoints->pLdiv = art_quick_ldiv;
-  qpoints->pLdivmod = art_quick_ldivmod;
+  qpoints->pLmod = art_quick_lmod;
   qpoints->pLmul = art_quick_lmul;
   qpoints->pShlLong = art_quick_lshl;
   qpoints->pShrLong = art_quick_lshr;
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 6be73d1..9fce72f 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -479,14 +479,115 @@
 
 DEFINE_FUNCTION art_quick_is_assignable
     PUSH eax                     // alignment padding
-    PUSH ecx                     // pass arg2
-    PUSH eax                     // pass arg1
-    call SYMBOL(artIsAssignableFromCode)  // (Class* a, Class* b, Thread*, SP)
+    PUSH ecx                     // pass arg2 - obj->klass
+    PUSH eax                     // pass arg1 - checked class
+    call SYMBOL(artIsAssignableFromCode)  // (Class* klass, Class* ref_klass)
     addl LITERAL(12), %esp        // pop arguments
     .cfi_adjust_cfa_offset -12
     ret
 END_FUNCTION art_quick_is_assignable
 
+DEFINE_FUNCTION art_quick_check_cast
+    PUSH eax                     // alignment padding
+    PUSH ecx                     // pass arg2 - obj->klass
+    PUSH eax                     // pass arg1 - checked class
+    call SYMBOL(artIsAssignableFromCode)  // (Class* klass, Class* ref_klass)
+    testl %eax, %eax
+    jz 1f                         // jump forward if not assignable
+    addl LITERAL(12), %esp        // pop arguments
+    .cfi_adjust_cfa_offset -12
+    ret
+1:
+    POP eax                       // pop arguments
+    POP ecx
+    addl LITERAL(4), %esp
+    .cfi_adjust_cfa_offset -12
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
+    mov %esp, %edx
+    // Outgoing argument set up
+    PUSH edx                      // pass SP
+    pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
+    .cfi_adjust_cfa_offset 4
+    PUSH ecx                      // pass arg2
+    PUSH eax                      // pass arg1
+    call SYMBOL(artThrowClassCastException) // (Class* a, Class* b, Thread*, SP)
+    int3                          // unreached
+END_FUNCTION art_quick_check_cast
+
+    /*
+     * Entry from managed code for array put operations of objects where the value being stored
+     * needs to be checked for compatibility.
+     * eax = array, ecx = index, edx = value
+     */
+DEFINE_FUNCTION art_quick_aput_obj_with_null_and_bound_check
+    testl %eax, %eax
+    jnz art_quick_aput_obj_with_bound_check
+    jmp art_quick_throw_null_pointer_exception
+END_FUNCTION art_quick_aput_obj_with_null_and_bound_check
+
+DEFINE_FUNCTION art_quick_aput_obj_with_bound_check
+    movl ARRAY_LENGTH_OFFSET(%eax), %ebx
+    cmpl %ebx, %ecx
+    jb art_quick_aput_obj
+    mov %ecx, %eax
+    mov %ebx, %ecx
+    jmp art_quick_throw_array_bounds
+END_FUNCTION art_quick_aput_obj_with_bound_check
+
+DEFINE_FUNCTION art_quick_aput_obj
+    test %edx, %edx              // store of null
+    jz do_aput_null
+    movl CLASS_OFFSET(%eax), %ebx
+    movl CLASS_COMPONENT_TYPE_OFFSET(%ebx), %ebx
+    cmpl CLASS_OFFSET(%edx), %ebx // value's type == array's component type - trivial assignability
+    jne check_assignability
+do_aput:
+    movl %edx, OBJECT_ARRAY_DATA_OFFSET(%eax, %ecx, 4)
+    movl %fs:THREAD_CARD_TABLE_OFFSET, %edx
+    shrl LITERAL(7), %eax
+    movb %dl, (%edx, %eax)
+    ret
+do_aput_null:
+    movl %edx, OBJECT_ARRAY_DATA_OFFSET(%eax, %ecx, 4)
+    ret
+check_assignability:
+    PUSH eax                     // save arguments
+    PUSH ecx
+    PUSH edx
+    subl LITERAL(8), %esp        // alignment padding
+    .cfi_adjust_cfa_offset 8
+    pushl CLASS_OFFSET(%edx)     // pass arg2 - type of the value to be stored
+    .cfi_adjust_cfa_offset 4
+    PUSH ebx                     // pass arg1 - component type of the array
+    call SYMBOL(artIsAssignableFromCode)  // (Class* a, Class* b)
+    addl LITERAL(16), %esp       // pop arguments
+    .cfi_adjust_cfa_offset -16
+    testl %eax, %eax
+    jz   throw_array_store_exception
+    POP  edx
+    POP  ecx
+    POP  eax
+    movl %edx, OBJECT_ARRAY_DATA_OFFSET(%eax, %ecx, 4)  // do the aput
+    movl %fs:THREAD_CARD_TABLE_OFFSET, %edx
+    shrl LITERAL(7), %eax
+    movb %dl, (%edx, %eax)
+    ret
+throw_array_store_exception:
+    POP  edx
+    POP  ecx
+    POP  eax
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
+    mov %esp, %ecx
+    // Outgoing argument set up
+    PUSH ecx                      // pass SP
+    pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
+    .cfi_adjust_cfa_offset 4
+    PUSH edx                      // pass arg2 - value
+    PUSH eax                      // pass arg1 - array
+    call SYMBOL(artThrowArrayStoreException) // (array, value, Thread*, SP)
+    int3                          // unreached
+END_FUNCTION art_quick_aput_obj
+
 DEFINE_FUNCTION art_quick_memcpy
     PUSH edx                      // pass arg3
     PUSH ecx                      // pass arg2
@@ -497,9 +598,6 @@
     ret
 END_FUNCTION art_quick_memcpy
 
-TWO_ARG_DOWNCALL art_quick_check_cast, artCheckCastFromCode, RETURN_IF_EAX_ZERO
-TWO_ARG_DOWNCALL art_quick_can_put_array_element, artCanPutArrayElementFromCode, RETURN_IF_EAX_ZERO
-
 NO_ARG_DOWNCALL art_quick_test_suspend, artTestSuspendFromCode, ret
 
 DEFINE_FUNCTION art_quick_fmod
@@ -586,30 +684,30 @@
 END_FUNCTION art_quick_idivmod
 
 DEFINE_FUNCTION art_quick_ldiv
-    subl LITERAL(12), %esp        // alignment padding
+    subl LITERAL(12), %esp       // alignment padding
     .cfi_adjust_cfa_offset 12
     PUSH ebx                     // pass arg4 b.hi
     PUSH edx                     // pass arg3 b.lo
     PUSH ecx                     // pass arg2 a.hi
-    PUSH eax                      // pass arg1 a.lo
-    call SYMBOL(artLdivFromCode)  // (jlong a, jlong b)
-    addl LITERAL(28), %esp        // pop arguments
+    PUSH eax                     // pass arg1 a.lo
+    call SYMBOL(artLdiv)         // (jlong a, jlong b)
+    addl LITERAL(28), %esp       // pop arguments
     .cfi_adjust_cfa_offset -28
     ret
 END_FUNCTION art_quick_ldiv
 
-DEFINE_FUNCTION art_quick_ldivmod
-    subl LITERAL(12), %esp        // alignment padding
+DEFINE_FUNCTION art_quick_lmod
+    subl LITERAL(12), %esp       // alignment padding
     .cfi_adjust_cfa_offset 12
     PUSH ebx                     // pass arg4 b.hi
     PUSH edx                     // pass arg3 b.lo
     PUSH ecx                     // pass arg2 a.hi
-    PUSH eax                      // pass arg1 a.lo
-    call SYMBOL(artLdivmodFromCode) // (jlong a, jlong b)
-    addl LITERAL(28), %esp        // pop arguments
+    PUSH eax                     // pass arg1 a.lo
+    call SYMBOL(artLmod)         // (jlong a, jlong b)
+    addl LITERAL(28), %esp       // pop arguments
     .cfi_adjust_cfa_offset -28
     ret
-END_FUNCTION art_quick_ldivmod
+END_FUNCTION art_quick_lmod
 
 DEFINE_FUNCTION art_quick_lmul
     imul %eax, %ebx              // ebx = a.lo(eax) * b.hi(ebx)
diff --git a/runtime/arch/x86/thread_x86.cc b/runtime/arch/x86/thread_x86.cc
index 7e0aee0..42789cb 100644
--- a/runtime/arch/x86/thread_x86.cc
+++ b/runtime/arch/x86/thread_x86.cc
@@ -134,6 +134,7 @@
 
   // Sanity check other offsets.
   CHECK_EQ(THREAD_EXCEPTION_OFFSET, OFFSETOF_MEMBER(Thread, exception_));
+  CHECK_EQ(THREAD_CARD_TABLE_OFFSET, OFFSETOF_MEMBER(Thread, card_table_));
   CHECK_EQ(THREAD_ID_OFFSET, OFFSETOF_MEMBER(Thread, thin_lock_thread_id_));
 }