Improve quick codegen for aput-object.

1) don't type check known null.
2) if we know types in verify don't check at runtime.
3) if we're runtime checking then move all the code out-of-line.

Also, don't set up a callee-save frame for check-cast, do an instance-of test
then throw an exception if that fails.
Tidy quick entry point of Ldivmod to Lmod which it is on x86 and mips.
Fix monitor-enter/exit NPE for MIPS.
Fix benign bug in mirror::Class::CannotBeAssignedFromOtherTypes, a byte[]
cannot be assigned to from other types.

Change-Id: I9cb3859ec70cca71ed79331ec8df5bec969d6745
diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h
index b75661c..aa5782b 100644
--- a/compiler/dex/quick/arm/codegen_arm.h
+++ b/compiler/dex/quick/arm/codegen_arm.h
@@ -89,12 +89,10 @@
     // Required for target - Dalvik-level generators.
     void GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
                            RegLocation rl_src1, RegLocation rl_src2);
-    void GenArrayObjPut(int opt_flags, RegLocation rl_array, RegLocation rl_index,
-                        RegLocation rl_src, int scale);
     void GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array,
                      RegLocation rl_index, RegLocation rl_dest, int scale);
-    void GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array,
-                     RegLocation rl_index, RegLocation rl_src, int scale);
+    void GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, RegLocation rl_index,
+                     RegLocation rl_src, int scale, bool card_mark);
     void GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
                            RegLocation rl_src1, RegLocation rl_shift);
     void GenMulLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc
index 6d11b03..b1772fd 100644
--- a/compiler/dex/quick/arm/int_arm.cc
+++ b/compiler/dex/quick/arm/int_arm.cc
@@ -771,7 +771,7 @@
  * Generate array load
  */
 void ArmMir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array,
-                          RegLocation rl_index, RegLocation rl_dest, int scale) {
+                             RegLocation rl_index, RegLocation rl_dest, int scale) {
   RegisterClass reg_class = oat_reg_class_by_size(size);
   int len_offset = mirror::Array::LengthOffset().Int32Value();
   int data_offset;
@@ -861,13 +861,13 @@
  *
  */
 void ArmMir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array,
-                          RegLocation rl_index, RegLocation rl_src, int scale) {
+                             RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark) {
   RegisterClass reg_class = oat_reg_class_by_size(size);
   int len_offset = mirror::Array::LengthOffset().Int32Value();
-  int data_offset;
   bool constant_index = rl_index.is_const;
 
-  if (rl_src.wide) {
+  int data_offset;
+  if (size == kLong || size == kDouble) {
     data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value();
   } else {
     data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value();
@@ -943,68 +943,12 @@
   if (!constant_index) {
     FreeTemp(reg_ptr);
   }
-}
-
-/*
- * Generate array store
- *
- */
-void ArmMir2Lir::GenArrayObjPut(int opt_flags, RegLocation rl_array,
-                             RegLocation rl_index, RegLocation rl_src, int scale) {
-  int len_offset = mirror::Array::LengthOffset().Int32Value();
-  int data_offset = mirror::Array::DataOffset(sizeof(mirror::Object*)).Int32Value();
-
-  FlushAllRegs();  // Use explicit registers
-  LockCallTemps();
-
-  int r_value = TargetReg(kArg0);  // Register holding value
-  int r_array_class = TargetReg(kArg1);  // Register holding array's Class
-  int r_array = TargetReg(kArg2);  // Register holding array
-  int r_index = TargetReg(kArg3);  // Register holding index into array
-
-  LoadValueDirectFixed(rl_array, r_array);  // Grab array
-  LoadValueDirectFixed(rl_src, r_value);  // Grab value
-  LoadValueDirectFixed(rl_index, r_index);  // Grab index
-
-  GenNullCheck(rl_array.s_reg_low, r_array, opt_flags);  // NPE?
-
-  // Store of null?
-  LIR* null_value_check = OpCmpImmBranch(kCondEq, r_value, 0, NULL);
-
-  // Get the array's class.
-  LoadWordDisp(r_array, mirror::Object::ClassOffset().Int32Value(), r_array_class);
-  CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(pCanPutArrayElement), r_value,
-                          r_array_class, true);
-  // Redo LoadValues in case they didn't survive the call.
-  LoadValueDirectFixed(rl_array, r_array);  // Reload array
-  LoadValueDirectFixed(rl_index, r_index);  // Reload index
-  LoadValueDirectFixed(rl_src, r_value);  // Reload value
-  r_array_class = INVALID_REG;
-
-  // Branch here if value to be stored == null
-  LIR* target = NewLIR0(kPseudoTargetLabel);
-  null_value_check->target = target;
-
-  bool needs_range_check = (!(opt_flags & MIR_IGNORE_RANGE_CHECK));
-  int reg_len = INVALID_REG;
-  if (needs_range_check) {
-    reg_len = TargetReg(kArg1);
-    LoadWordDisp(r_array, len_offset, reg_len);  // Get len
-  }
-  /* r_ptr -> array data */
-  int r_ptr = AllocTemp();
-  OpRegRegImm(kOpAdd, r_ptr, r_array, data_offset);
-  if (needs_range_check) {
-    GenRegRegCheck(kCondCs, r_index, reg_len, kThrowArrayBounds);
-  }
-  StoreBaseIndexed(r_ptr, r_index, r_value, scale, kWord);
-  FreeTemp(r_ptr);
-  FreeTemp(r_index);
-  if (!mir_graph_->IsConstantNullRef(rl_src)) {
-    MarkGCCard(r_value, r_array);
+  if (card_mark) {
+    MarkGCCard(rl_src.low_reg, rl_array.low_reg);
   }
 }
 
+
 void ArmMir2Lir::GenShiftImmOpLong(Instruction::Code opcode,
                                    RegLocation rl_dest, RegLocation rl_src, RegLocation rl_shift) {
   rl_src = LoadValueWide(rl_src, kCoreReg);
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index 9e71749..2670c23 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -66,8 +66,7 @@
 
 /* Perform null-check on a register.  */
 LIR* Mir2Lir::GenNullCheck(int s_reg, int m_reg, int opt_flags) {
-  if (!(cu_->disable_opt & (1 << kNullCheckElimination)) &&
-    opt_flags & MIR_IGNORE_NULL_CHECK) {
+  if (!(cu_->disable_opt & (1 << kNullCheckElimination)) && (opt_flags & MIR_IGNORE_NULL_CHECK)) {
     return NULL;
   }
   return GenImmedCheck(kCondEq, m_reg, 0, kThrowNullPointer);
@@ -727,6 +726,18 @@
   }
 }
 
+void Mir2Lir::GenArrayObjPut(int opt_flags, RegLocation rl_array, RegLocation rl_index,
+                             RegLocation rl_src) {
+  bool needs_range_check = !(opt_flags & MIR_IGNORE_RANGE_CHECK);
+  bool needs_null_check = !((cu_->disable_opt & (1 << kNullCheckElimination)) &&
+      (opt_flags & MIR_IGNORE_NULL_CHECK));
+  ThreadOffset helper = needs_range_check
+      ? (needs_null_check ? QUICK_ENTRYPOINT_OFFSET(pAputObjectWithNullAndBoundCheck)
+                          : QUICK_ENTRYPOINT_OFFSET(pAputObjectWithBoundCheck))
+      : QUICK_ENTRYPOINT_OFFSET(pAputObject);
+  CallRuntimeHelperRegLocationRegLocationRegLocation(helper, rl_array, rl_index, rl_src, true);
+}
+
 void Mir2Lir::GenConstClass(uint32_t type_idx, RegLocation rl_dest) {
   RegLocation rl_method = LoadCurrMethod();
   int res_reg = AllocTemp();
@@ -1110,8 +1121,8 @@
   if (!type_known_abstract) {
     branch2 = OpCmpBranch(kCondEq, TargetReg(kArg1), class_reg, NULL);
   }
-  CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(pCheckCast), TargetReg(kArg1),
-                          TargetReg(kArg2), true);
+  CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(pCheckCast), TargetReg(kArg2),
+                          TargetReg(kArg1), true);
   /* branch target here */
   LIR* target = NewLIR0(kPseudoTargetLabel);
   branch1->target = target;
@@ -1648,7 +1659,7 @@
     case Instruction::REM_LONG_2ADDR:
       call_out = true;
       check_zero = true;
-      func_offset = QUICK_ENTRYPOINT_OFFSET(pLdivmod);
+      func_offset = QUICK_ENTRYPOINT_OFFSET(pLmod);
       /* NOTE - for Arm, result is in kArg2/kArg3 instead of kRet0/kRet1 */
       ret_reg = (cu_->instruction_set == kThumb2) ? TargetReg(kArg2) : TargetReg(kRet0);
       break;
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index 8270e01..0a0cc17 100644
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -214,6 +214,7 @@
                                                          int arg0, RegLocation arg1,
                                                          RegLocation arg2, bool safepoint_pc) {
   int r_tgt = CallHelperSetup(helper_offset);
+  DCHECK_EQ(arg1.wide, 0U);
   LoadValueDirectFixed(arg1, TargetReg(kArg1));
   if (arg2.wide == 0) {
     LoadValueDirectFixed(arg2, TargetReg(kArg2));
@@ -225,6 +226,21 @@
   CallHelper(r_tgt, helper_offset, safepoint_pc);
 }
 
+void Mir2Lir::CallRuntimeHelperRegLocationRegLocationRegLocation(ThreadOffset helper_offset,
+                                                                 RegLocation arg0, RegLocation arg1,
+                                                                 RegLocation arg2,
+                                                                 bool safepoint_pc) {
+  int r_tgt = CallHelperSetup(helper_offset);
+  DCHECK_EQ(arg0.wide, 0U);
+  LoadValueDirectFixed(arg0, TargetReg(kArg0));
+  DCHECK_EQ(arg1.wide, 0U);
+  LoadValueDirectFixed(arg1, TargetReg(kArg1));
+  DCHECK_EQ(arg1.wide, 0U);
+  LoadValueDirectFixed(arg2, TargetReg(kArg2));
+  ClobberCalleeSave();
+  CallHelper(r_tgt, helper_offset, safepoint_pc);
+}
+
 /*
  * If there are any ins passed in registers that have not been promoted
  * to a callee-save register, flush them to the frame.  Perform intial
diff --git a/compiler/dex/quick/mips/codegen_mips.h b/compiler/dex/quick/mips/codegen_mips.h
index 892af09..387fef3 100644
--- a/compiler/dex/quick/mips/codegen_mips.h
+++ b/compiler/dex/quick/mips/codegen_mips.h
@@ -88,12 +88,10 @@
     // Required for target - Dalvik-level generators.
     void GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
                                    RegLocation rl_src1, RegLocation rl_src2);
-    void GenArrayObjPut(int opt_flags, RegLocation rl_array, RegLocation rl_index,
-                                RegLocation rl_src, int scale);
     void GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array,
-                             RegLocation rl_index, RegLocation rl_dest, int scale);
+                     RegLocation rl_index, RegLocation rl_dest, int scale);
     void GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array,
-                             RegLocation rl_index, RegLocation rl_src, int scale);
+                     RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark);
     void GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
                                    RegLocation rl_src1, RegLocation rl_shift);
     void GenMulLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
diff --git a/compiler/dex/quick/mips/int_mips.cc b/compiler/dex/quick/mips/int_mips.cc
index 6ce5750..218ed48 100644
--- a/compiler/dex/quick/mips/int_mips.cc
+++ b/compiler/dex/quick/mips/int_mips.cc
@@ -484,7 +484,7 @@
  *
  */
 void MipsMir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array,
-                          RegLocation rl_index, RegLocation rl_src, int scale) {
+                          RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark) {
   RegisterClass reg_class = oat_reg_class_by_size(size);
   int len_offset = mirror::Array::LengthOffset().Int32Value();
   int data_offset;
@@ -549,65 +549,8 @@
     StoreBaseIndexed(reg_ptr, rl_index.low_reg, rl_src.low_reg,
                      scale, size);
   }
-}
-
-/*
- * Generate array store
- *
- */
-void MipsMir2Lir::GenArrayObjPut(int opt_flags, RegLocation rl_array,
-                             RegLocation rl_index, RegLocation rl_src, int scale) {
-  int len_offset = mirror::Array::LengthOffset().Int32Value();
-  int data_offset = mirror::Array::DataOffset(sizeof(mirror::Object*)).Int32Value();
-
-  FlushAllRegs();  // Use explicit registers
-  LockCallTemps();
-
-  int r_value = TargetReg(kArg0);  // Register holding value
-  int r_array_class = TargetReg(kArg1);  // Register holding array's Class
-  int r_array = TargetReg(kArg2);  // Register holding array
-  int r_index = TargetReg(kArg3);  // Register holding index into array
-
-  LoadValueDirectFixed(rl_array, r_array);  // Grab array
-  LoadValueDirectFixed(rl_src, r_value);  // Grab value
-  LoadValueDirectFixed(rl_index, r_index);  // Grab index
-
-  GenNullCheck(rl_array.s_reg_low, r_array, opt_flags);  // NPE?
-
-  // Store of null?
-  LIR* null_value_check = OpCmpImmBranch(kCondEq, r_value, 0, NULL);
-
-  // Get the array's class.
-  LoadWordDisp(r_array, mirror::Object::ClassOffset().Int32Value(), r_array_class);
-  CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(pCanPutArrayElement), r_value,
-                          r_array_class, true);
-  // Redo LoadValues in case they didn't survive the call.
-  LoadValueDirectFixed(rl_array, r_array);  // Reload array
-  LoadValueDirectFixed(rl_index, r_index);  // Reload index
-  LoadValueDirectFixed(rl_src, r_value);  // Reload value
-  r_array_class = INVALID_REG;
-
-  // Branch here if value to be stored == null
-  LIR* target = NewLIR0(kPseudoTargetLabel);
-  null_value_check->target = target;
-
-  bool needs_range_check = (!(opt_flags & MIR_IGNORE_RANGE_CHECK));
-  int reg_len = INVALID_REG;
-  if (needs_range_check) {
-    reg_len = TargetReg(kArg1);
-    LoadWordDisp(r_array, len_offset, reg_len);  // Get len
-  }
-  /* r_ptr -> array data */
-  int r_ptr = AllocTemp();
-  OpRegRegImm(kOpAdd, r_ptr, r_array, data_offset);
-  if (needs_range_check) {
-    GenRegRegCheck(kCondCs, r_index, reg_len, kThrowArrayBounds);
-  }
-  StoreBaseIndexed(r_ptr, r_index, r_value, scale, kWord);
-  FreeTemp(r_ptr);
-  FreeTemp(r_index);
-  if (!mir_graph_->IsConstantNullRef(rl_src)) {
-    MarkGCCard(r_value, r_array);
+  if (card_mark) {
+    MarkGCCard(rl_src.low_reg, rl_array.low_reg);
   }
 }
 
diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc
index 66ece2c..2b26c3d 100644
--- a/compiler/dex/quick/mir_to_lir.cc
+++ b/compiler/dex/quick/mir_to_lir.cc
@@ -338,22 +338,35 @@
       GenArrayGet(opt_flags, kSignedHalf, rl_src[0], rl_src[1], rl_dest, 1);
       break;
     case Instruction::APUT_WIDE:
-      GenArrayPut(opt_flags, kLong, rl_src[1], rl_src[2], rl_src[0], 3);
+      GenArrayPut(opt_flags, kLong, rl_src[1], rl_src[2], rl_src[0], 3, false);
       break;
     case Instruction::APUT:
-      GenArrayPut(opt_flags, kWord, rl_src[1], rl_src[2], rl_src[0], 2);
+      GenArrayPut(opt_flags, kWord, rl_src[1], rl_src[2], rl_src[0], 2, false);
       break;
-    case Instruction::APUT_OBJECT:
-      GenArrayObjPut(opt_flags, rl_src[1], rl_src[2], rl_src[0], 2);
+    case Instruction::APUT_OBJECT: {
+      bool is_null = mir_graph_->IsConstantNullRef(rl_src[0]);
+      bool is_safe = is_null;  // Always safe to store null.
+      if (!is_safe) {
+        // Check safety from verifier type information.
+        const MethodReference mr(cu_->dex_file, cu_->method_idx);
+        is_safe = cu_->compiler_driver->IsSafeCast(mr, mir->offset);
+      }
+      if (is_null || is_safe) {
+        // Store of constant null doesn't require an assignability test and can be generated inline
+        // without fixed register usage or a card mark.
+        GenArrayPut(opt_flags, kWord, rl_src[1], rl_src[2], rl_src[0], 2, !is_null);
+      } else {
+        GenArrayObjPut(opt_flags, rl_src[1], rl_src[2], rl_src[0]);
+      }
       break;
+    }
     case Instruction::APUT_SHORT:
     case Instruction::APUT_CHAR:
-      GenArrayPut(opt_flags, kUnsignedHalf, rl_src[1], rl_src[2], rl_src[0], 1);
+      GenArrayPut(opt_flags, kUnsignedHalf, rl_src[1], rl_src[2], rl_src[0], 1, false);
       break;
     case Instruction::APUT_BYTE:
     case Instruction::APUT_BOOLEAN:
-      GenArrayPut(opt_flags, kUnsignedByte, rl_src[1], rl_src[2],
-            rl_src[0], 0);
+      GenArrayPut(opt_flags, kUnsignedByte, rl_src[1], rl_src[2], rl_src[0], 0, false);
       break;
 
     case Instruction::IGET_OBJECT:
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index 61f4484..21711e5 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -419,6 +419,9 @@
                  RegLocation rl_dest, RegLocation rl_obj, bool is_long_or_double, bool is_object);
     void GenIPut(uint32_t field_idx, int opt_flags, OpSize size,
                  RegLocation rl_src, RegLocation rl_obj, bool is_long_or_double, bool is_object);
+    void GenArrayObjPut(int opt_flags, RegLocation rl_array, RegLocation rl_index,
+                        RegLocation rl_src);
+
     void GenConstClass(uint32_t type_idx, RegLocation rl_dest);
     void GenConstString(uint32_t string_idx, RegLocation rl_dest);
     void GenNewInstance(uint32_t type_idx, RegLocation rl_dest);
@@ -475,6 +478,10 @@
     void CallRuntimeHelperImmRegLocationRegLocation(ThreadOffset helper_offset,
                                                     int arg0, RegLocation arg1, RegLocation arg2,
                                                     bool safepoint_pc);
+    void CallRuntimeHelperRegLocationRegLocationRegLocation(ThreadOffset helper_offset,
+                                                            RegLocation arg0, RegLocation arg1,
+                                                            RegLocation arg2,
+                                                            bool safepoint_pc);
     void GenInvoke(CallInfo* info);
     void FlushIns(RegLocation* ArgLocs, RegLocation rl_method);
     int GenDalvikArgsNoRange(CallInfo* info, int call_state, LIR** pcrLabel,
@@ -651,12 +658,11 @@
                                  RegLocation rl_src) = 0;
     virtual void GenSpecialCase(BasicBlock* bb, MIR* mir,
                                 SpecialCaseHandler special_case) = 0;
-    virtual void GenArrayObjPut(int opt_flags, RegLocation rl_array,
-                                RegLocation rl_index, RegLocation rl_src, int scale) = 0;
     virtual void GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array,
                              RegLocation rl_index, RegLocation rl_dest, int scale) = 0;
     virtual void GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array,
-                     RegLocation rl_index, RegLocation rl_src, int scale) = 0;
+                             RegLocation rl_index, RegLocation rl_src, int scale,
+                             bool card_mark) = 0;
     virtual void GenShiftImmOpLong(Instruction::Code opcode,
                                    RegLocation rl_dest, RegLocation rl_src1,
                                    RegLocation rl_shift) = 0;
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index d5c21e5..c266e39 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -88,14 +88,12 @@
     // Required for target - Dalvik-level generators.
     void GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
                                    RegLocation rl_src1, RegLocation rl_src2);
-    void GenArrayObjPut(int opt_flags, RegLocation rl_array,
-                                RegLocation rl_index, RegLocation rl_src, int scale);
     void GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array,
                              RegLocation rl_index, RegLocation rl_dest, int scale);
     void GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array,
-                             RegLocation rl_index, RegLocation rl_src, int scale);
+                     RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark);
     void GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
-                                   RegLocation rl_src1, RegLocation rl_shift);
+                           RegLocation rl_src1, RegLocation rl_shift);
     void GenMulLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
     void GenAddLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
     void GenAndLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index 14be7dd..14f5348 100644
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -419,7 +419,7 @@
  * Generate array load
  */
 void X86Mir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array,
-                          RegLocation rl_index, RegLocation rl_dest, int scale) {
+                             RegLocation rl_index, RegLocation rl_dest, int scale) {
   RegisterClass reg_class = oat_reg_class_by_size(size);
   int len_offset = mirror::Array::LengthOffset().Int32Value();
   int data_offset;
@@ -466,7 +466,7 @@
  *
  */
 void X86Mir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array,
-                             RegLocation rl_index, RegLocation rl_src, int scale) {
+                             RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark) {
   RegisterClass reg_class = oat_reg_class_by_size(size);
   int len_offset = mirror::Array::LengthOffset().Int32Value();
   int data_offset;
@@ -502,59 +502,9 @@
     StoreBaseIndexedDisp(rl_array.low_reg, rl_index.low_reg, scale, data_offset, rl_src.low_reg,
                          rl_src.high_reg, size, INVALID_SREG);
   }
-}
-
-/*
- * Generate array store
- *
- */
-void X86Mir2Lir::GenArrayObjPut(int opt_flags, RegLocation rl_array,
-                             RegLocation rl_index, RegLocation rl_src, int scale) {
-  int len_offset = mirror::Array::LengthOffset().Int32Value();
-  int data_offset = mirror::Array::DataOffset(sizeof(mirror::Object*)).Int32Value();
-
-  FlushAllRegs();  // Use explicit registers
-  LockCallTemps();
-
-  int r_value = TargetReg(kArg0);  // Register holding value
-  int r_array_class = TargetReg(kArg1);  // Register holding array's Class
-  int r_array = TargetReg(kArg2);  // Register holding array
-  int r_index = TargetReg(kArg3);  // Register holding index into array
-
-  LoadValueDirectFixed(rl_array, r_array);  // Grab array
-  LoadValueDirectFixed(rl_src, r_value);  // Grab value
-  LoadValueDirectFixed(rl_index, r_index);  // Grab index
-
-  GenNullCheck(rl_array.s_reg_low, r_array, opt_flags);  // NPE?
-
-  // Store of null?
-  LIR* null_value_check = OpCmpImmBranch(kCondEq, r_value, 0, NULL);
-
-  // Get the array's class.
-  LoadWordDisp(r_array, mirror::Object::ClassOffset().Int32Value(), r_array_class);
-  CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(pCanPutArrayElement), r_value,
-                          r_array_class, true);
-  // Redo LoadValues in case they didn't survive the call.
-  LoadValueDirectFixed(rl_array, r_array);  // Reload array
-  LoadValueDirectFixed(rl_index, r_index);  // Reload index
-  LoadValueDirectFixed(rl_src, r_value);  // Reload value
-  r_array_class = INVALID_REG;
-
-  // Branch here if value to be stored == null
-  LIR* target = NewLIR0(kPseudoTargetLabel);
-  null_value_check->target = target;
-
-  // make an extra temp available for card mark below
-  FreeTemp(TargetReg(kArg1));
-  if (!(opt_flags & MIR_IGNORE_RANGE_CHECK)) {
-    /* if (rl_index >= [rl_array + len_offset]) goto kThrowArrayBounds */
-    GenRegMemCheck(kCondUge, r_index, r_array, len_offset, kThrowArrayBounds);
-  }
-  StoreBaseIndexedDisp(r_array, r_index, scale,
-                       data_offset, r_value, INVALID_REG, kWord, INVALID_SREG);
-  FreeTemp(r_index);
-  if (!mir_graph_->IsConstantNullRef(rl_src)) {
-    MarkGCCard(r_value, r_array);
+  if (card_mark) {
+    FreeTemp(rl_index.low_reg);  // Ensure there are 2 free regs for card mark.
+    MarkGCCard(rl_src.low_reg, rl_array.low_reg);
   }
 }
 
diff --git a/runtime/arch/arm/asm_support_arm.h b/runtime/arch/arm/asm_support_arm.h
index 69fb9c3..cfffbea 100644
--- a/runtime/arch/arm/asm_support_arm.h
+++ b/runtime/arch/arm/asm_support_arm.h
@@ -25,6 +25,8 @@
 #define rSELF r9
 // Offset of field Thread::suspend_count_ verified in InitCpu
 #define THREAD_FLAGS_OFFSET 0
+// Offset of field Thread::card_table_ verified in InitCpu
+#define THREAD_CARD_TABLE_OFFSET 8
 // Offset of field Thread::exception_ verified in InitCpu
 #define THREAD_EXCEPTION_OFFSET 12
 // Offset of field Thread::thin_lock_thread_id_ verified in InitCpu
diff --git a/runtime/arch/arm/entrypoints_init_arm.cc b/runtime/arch/arm/entrypoints_init_arm.cc
index e6e13be..352982f 100644
--- a/runtime/arch/arm/entrypoints_init_arm.cc
+++ b/runtime/arch/arm/entrypoints_init_arm.cc
@@ -52,7 +52,6 @@
 // Cast entrypoints.
 extern "C" uint32_t artIsAssignableFromCode(const mirror::Class* klass,
                                             const mirror::Class* ref_class);
-extern "C" void art_quick_can_put_array_element(void*, void*);
 extern "C" void art_quick_check_cast(void*, void*);
 
 // DexCache entrypoints.
@@ -78,7 +77,10 @@
 extern "C" void* art_quick_get_obj_instance(uint32_t, void*);
 extern "C" void* art_quick_get_obj_static(uint32_t);
 
-// FillArray entrypoint.
+// Array entrypoints.
+extern "C" void art_quick_aput_obj_with_null_and_bound_check(void*, uint32_t, void*);
+extern "C" void art_quick_aput_obj_with_bound_check(void*, uint32_t, void*);
+extern "C" void art_quick_aput_obj(void*, uint32_t, void*);
 extern "C" void art_quick_handle_fill_data(void*, void*);
 
 // Lock entrypoints.
@@ -182,7 +184,6 @@
 
   // Cast
   qpoints->pInstanceofNonTrivial = artIsAssignableFromCode;
-  qpoints->pCanPutArrayElement = art_quick_can_put_array_element;
   qpoints->pCheckCast = art_quick_check_cast;
 
   // DexCache
@@ -205,7 +206,10 @@
   qpoints->pGet64Static = art_quick_get64_static;
   qpoints->pGetObjStatic = art_quick_get_obj_static;
 
-  // FillArray
+  // Array
+  qpoints->pAputObjectWithNullAndBoundCheck = art_quick_aput_obj_with_null_and_bound_check;
+  qpoints->pAputObjectWithBoundCheck = art_quick_aput_obj_with_bound_check;
+  qpoints->pAputObject = art_quick_aput_obj;
   qpoints->pHandleFillArrayData = art_quick_handle_fill_data;
 
   // JNI
@@ -236,7 +240,7 @@
   qpoints->pD2l = art_d2l;
   qpoints->pF2l = art_f2l;
   qpoints->pLdiv = __aeabi_ldivmod;
-  qpoints->pLdivmod = __aeabi_ldivmod;  // result returned in r2:r3
+  qpoints->pLmod = __aeabi_ldivmod;  // result returned in r2:r3
   qpoints->pLmul = art_quick_mul_long;
   qpoints->pShlLong = art_quick_shl_long;
   qpoints->pShrLong = art_quick_shr_long;
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index cb61698..d073177 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -152,6 +152,7 @@
     mov r1, r9                      @ pass Thread::Current
     mov r2, sp                      @ pass SP
     b   \cxx_name                   @ \cxx_name(Thread*, SP)
+    bkpt
 END \c_name
 .endm
 
@@ -162,6 +163,7 @@
     mov r2, r9                      @ pass Thread::Current
     mov r3, sp                      @ pass SP
     b   \cxx_name                   @ \cxx_name(Thread*, SP)
+    bkpt
 END \c_name
 .endm
 
@@ -389,33 +391,96 @@
 END art_quick_unlock_object
 
     /*
-     * Entry from managed code that calls artCheckCastFromCode and delivers exception on failure.
+     * Entry from managed code that calls artIsAssignableFromCode and on failure calls
+     * artThrowClassCastException.
      */
-    .extern artCheckCastFromCode
+    .extern artThrowClassCastException
 ENTRY art_quick_check_cast
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME    @ save callee saves in case exception allocation triggers GC
-    mov    r2, r9                       @ pass Thread::Current
-    mov    r3, sp                       @ pass SP
-    bl     artCheckCastFromCode         @ (Class* a, Class* b, Thread*, SP)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
-    RETURN_IF_RESULT_IS_ZERO
-    DELIVER_PENDING_EXCEPTION
+    push {r0-r1, lr}                    @ save arguments, link register and pad
+    .save {r0-r1, lr}
+    .cfi_adjust_cfa_offset 12
+    .cfi_rel_offset r0, 0
+    .cfi_rel_offset r1, 4
+    .cfi_rel_offset lr, 8
+    sub sp, #4
+    .pad #4
+    .cfi_adjust_cfa_offset 4
+    bl artIsAssignableFromCode
+    cbz    r0, throw_class_cast_exception
+    add sp, #4
+    .cfi_adjust_cfa_offset -4
+    pop {r0-r1, pc}
+throw_class_cast_exception:
+    add sp, #4
+    .cfi_adjust_cfa_offset -4
+    pop {r0-r1, lr}
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
+    mov r2, r9                      @ pass Thread::Current
+    mov r3, sp                      @ pass SP
+    b   artThrowClassCastException  @ (Class*, Class*, Thread*, SP)
+    bkpt
 END art_quick_check_cast
 
     /*
-     * Entry from managed code that calls artCanPutArrayElementFromCode and delivers exception on
-     * failure.
+     * Entry from managed code for array put operations of objects where the value being stored
+     * needs to be checked for compatibility.
+     * r0 = array, r1 = index, r2 = value
      */
-    .extern artCanPutArrayElementFromCode
-ENTRY art_quick_can_put_array_element
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME    @ save callee saves in case exception allocation triggers GC
-    mov    r2, r9                         @ pass Thread::Current
-    mov    r3, sp                         @ pass SP
-    bl     artCanPutArrayElementFromCode  @ (Object* element, Class* array_class, Thread*, SP)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
-    RETURN_IF_RESULT_IS_ZERO
-    DELIVER_PENDING_EXCEPTION
-END art_quick_can_put_array_element
+ENTRY art_quick_aput_obj_with_null_and_bound_check
+    tst r0, r0
+    bne art_quick_aput_obj_with_bound_check
+    b art_quick_throw_null_pointer_exception
+END art_quick_aput_obj_with_null_and_bound_check
+
+ENTRY art_quick_aput_obj_with_bound_check
+    ldr r3, [r0, #ARRAY_LENGTH_OFFSET]
+    cmp r3, r1
+    bhi art_quick_aput_obj
+    mov r0, r1
+    mov r1, r3
+    b art_quick_throw_array_bounds
+END art_quick_aput_obj_with_bound_check
+
+ENTRY art_quick_aput_obj
+    cbz r2, do_aput_null
+    ldr r3, [r0, #CLASS_OFFSET]
+    ldr ip, [r2, #CLASS_OFFSET]
+    ldr r3, [r3, #CLASS_COMPONENT_TYPE_OFFSET]
+    cmp r3, ip  @ value's type == array's component type - trivial assignability
+    bne check_assignability
+do_aput:
+    add r3, r0, #OBJECT_ARRAY_DATA_OFFSET
+    str r2, [r3, r1, lsl #2]
+    ldr r3, [r9, #THREAD_CARD_TABLE_OFFSET]
+    lsr r0, r0, #7
+    strb r3, [r3, r0]
+    blx lr
+do_aput_null:
+    add r3, r0, #OBJECT_ARRAY_DATA_OFFSET
+    str r2, [r3, r1, lsl #2]
+    blx lr
+check_assignability:
+    push {r0-r2, lr}                 @ save arguments
+    mov r1, ip
+    mov r0, r3
+    bl artIsAssignableFromCode
+    cbz r0, throw_array_store_exception
+    pop {r0-r2, lr}
+    add r3, r0, #OBJECT_ARRAY_DATA_OFFSET
+    str r2, [r3, r1, lsl #2]
+    ldr r3, [r9, #THREAD_CARD_TABLE_OFFSET]
+    lsr r0, r0, #7
+    strb r3, [r3, r0]
+    blx lr
+throw_array_store_exception:
+    pop {r0-r2, lr}
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    mov r1, r2
+    mov r2, r9                   @ pass Thread::Current
+    mov r3, sp                   @ pass SP
+    b artThrowArrayStoreException  @ (Class*, Class*, Thread*, SP)
+    bkpt                         @ unreached
+END art_quick_aput_obj
 
     /*
      * Entry from managed code when uninitialized static storage, this stub will run the class
diff --git a/runtime/arch/arm/thread_arm.cc b/runtime/arch/arm/thread_arm.cc
index 75eef60..8c1efeb 100644
--- a/runtime/arch/arm/thread_arm.cc
+++ b/runtime/arch/arm/thread_arm.cc
@@ -23,6 +23,7 @@
 
 void Thread::InitCpu() {
   CHECK_EQ(THREAD_FLAGS_OFFSET, OFFSETOF_MEMBER(Thread, state_and_flags_));
+  CHECK_EQ(THREAD_CARD_TABLE_OFFSET, OFFSETOF_MEMBER(Thread, card_table_));
   CHECK_EQ(THREAD_EXCEPTION_OFFSET, OFFSETOF_MEMBER(Thread, exception_));
   CHECK_EQ(THREAD_ID_OFFSET, OFFSETOF_MEMBER(Thread, thin_lock_thread_id_));
 }
diff --git a/runtime/arch/mips/asm_support_mips.h b/runtime/arch/mips/asm_support_mips.h
index 9a66352..5307997 100644
--- a/runtime/arch/mips/asm_support_mips.h
+++ b/runtime/arch/mips/asm_support_mips.h
@@ -25,6 +25,8 @@
 #define rSELF $s1
 // Offset of field Thread::suspend_count_ verified in InitCpu
 #define THREAD_FLAGS_OFFSET 0
+// Offset of field Thread::card_table_ verified in InitCpu
+#define THREAD_CARD_TABLE_OFFSET 8
 // Offset of field Thread::exception_ verified in InitCpu
 #define THREAD_EXCEPTION_OFFSET 12
 
diff --git a/runtime/arch/mips/entrypoints_init_mips.cc b/runtime/arch/mips/entrypoints_init_mips.cc
index 3d08298..cc975d75 100644
--- a/runtime/arch/mips/entrypoints_init_mips.cc
+++ b/runtime/arch/mips/entrypoints_init_mips.cc
@@ -51,7 +51,6 @@
 // Cast entrypoints.
 extern "C" uint32_t artIsAssignableFromCode(const mirror::Class* klass,
                                             const mirror::Class* ref_class);
-extern "C" void art_quick_can_put_array_element(void*, void*);
 extern "C" void art_quick_check_cast(void*, void*);
 
 // DexCache entrypoints.
@@ -77,7 +76,10 @@
 extern "C" void* art_quick_get_obj_instance(uint32_t, void*);
 extern "C" void* art_quick_get_obj_static(uint32_t);
 
-// FillArray entrypoint.
+// Array entrypoints.
+extern "C" void art_quick_aput_obj_with_null_and_bound_check(void*, uint32_t, void*);
+extern "C" void art_quick_aput_obj_with_bound_check(void*, uint32_t, void*);
+extern "C" void art_quick_aput_obj(void*, uint32_t, void*);
 extern "C" void art_quick_handle_fill_data(void*, void*);
 
 // Lock entrypoints.
@@ -89,9 +91,9 @@
 extern int32_t CmplDouble(double a, double b);
 extern int32_t CmpgFloat(float a, float b);
 extern int32_t CmplFloat(float a, float b);
-extern "C" int64_t artLmulFromCode(int64_t a, int64_t b);
-extern "C" int64_t artLdivFromCode(int64_t a, int64_t b);
-extern "C" int64_t artLdivmodFromCode(int64_t a, int64_t b);
+extern "C" int64_t artLmul(int64_t a, int64_t b);
+extern "C" int64_t artLdiv(int64_t a, int64_t b);
+extern "C" int64_t artLmod(int64_t a, int64_t b);
 
 // Math conversions.
 extern "C" int32_t __fixsfsi(float op1);      // FLOAT_TO_INT
@@ -183,7 +185,6 @@
 
   // Cast
   qpoints->pInstanceofNonTrivial = artIsAssignableFromCode;
-  qpoints->pCanPutArrayElement = art_quick_can_put_array_element;
   qpoints->pCheckCast = art_quick_check_cast;
 
   // DexCache
@@ -206,7 +207,10 @@
   qpoints->pGet64Static = art_quick_get64_static;
   qpoints->pGetObjStatic = art_quick_get_obj_static;
 
-  // FillArray
+  // Array
+  qpoints->pAputObjectWithNullAndBoundCheck = art_quick_aput_obj_with_null_and_bound_check;
+  qpoints->pAputObjectWithBoundCheck = art_quick_aput_obj_with_bound_check;
+  qpoints->pAputObject = art_quick_aput_obj;
   qpoints->pHandleFillArrayData = art_quick_handle_fill_data;
 
   // JNI
@@ -235,9 +239,9 @@
   qpoints->pIdivmod = NULL;
   qpoints->pD2l = art_d2l;
   qpoints->pF2l = art_f2l;
-  qpoints->pLdiv = artLdivFromCode;
-  qpoints->pLdivmod = artLdivmodFromCode;
-  qpoints->pLmul = artLmulFromCode;
+  qpoints->pLdiv = artLdiv;
+  qpoints->pLmod = artLmod;
+  qpoints->pLmul = artLmul;
   qpoints->pShlLong = art_quick_shl_long;
   qpoints->pShrLong = art_quick_shr_long;
   qpoints->pUshrLong = art_quick_ushr_long;
diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S
index f9b703f..e9c6698 100644
--- a/runtime/arch/mips/quick_entrypoints_mips.S
+++ b/runtime/arch/mips/quick_entrypoints_mips.S
@@ -283,6 +283,7 @@
     .extern artThrowNullPointerExceptionFromCode
 ENTRY art_quick_throw_null_pointer_exception
     GENERATE_GLOBAL_POINTER
+art_quick_throw_null_pointer_exception_gp_set:
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
     move $a0, rSELF                 # pass Thread::Current
     la   $t9, artThrowNullPointerExceptionFromCode
@@ -309,6 +310,7 @@
     .extern artThrowArrayBoundsFromCode
 ENTRY art_quick_throw_array_bounds
     GENERATE_GLOBAL_POINTER
+art_quick_throw_array_bounds_gp_set:
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
     move $a2, rSELF                 # pass Thread::Current
     la   $t9, artThrowArrayBoundsFromCode
@@ -481,6 +483,8 @@
     .extern artLockObjectFromCode
 ENTRY art_quick_lock_object
     GENERATE_GLOBAL_POINTER
+    beqz    $a0, art_quick_throw_null_pointer_exception_gp_set
+    nop
     SETUP_REF_ONLY_CALLEE_SAVE_FRAME      # save callee saves in case we block
     move    $a1, rSELF                    # pass Thread::Current
     jal     artLockObjectFromCode         # (Object* obj, Thread*, $sp)
@@ -494,6 +498,8 @@
     .extern artUnlockObjectFromCode
 ENTRY art_quick_unlock_object
     GENERATE_GLOBAL_POINTER
+    beqz    $a0, art_quick_throw_null_pointer_exception_gp_set
+    nop
     SETUP_REF_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case exception allocation triggers GC
     move    $a1, rSELF                # pass Thread::Current
     jal     artUnlockObjectFromCode   # (Object* obj, Thread*, $sp)
@@ -504,29 +510,116 @@
     /*
      * Entry from managed code that calls artCheckCastFromCode and delivers exception on failure.
      */
-    .extern artCheckCastFromCode
+    .extern artThrowClassCastException
 ENTRY art_quick_check_cast
     GENERATE_GLOBAL_POINTER
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case exception allocation triggers GC
-    move    $a2, rSELF                # pass Thread::Current
-    jal     artCheckCastFromCode      # (Class* a, Class* b, Thread*, $sp)
-    move    $a3, $sp                  # pass $sp
-    RETURN_IF_ZERO
+    addiu  $sp, $sp, -16
+    .cfi_adjust_cfa_offset 16
+    sw     $ra, 12($sp)
+    .cfi_rel_offset 31, 12
+    sw     $t9, 8($sp)
+    sw     $a1, 4($sp)
+    sw     $a0, 0($sp)
+    jal    artIsAssignableFromCode
+    nop
+    beqz   $v0, throw_class_cast_exception
+    lw     $ra, 12($sp)
+    jr     $ra
+    addiu  $sp, $sp, 16
+    .cfi_adjust_cfa_offset -16
+throw_class_cast_exception:
+    lw     $t9, 8($sp)
+    lw     $a1, 4($sp)
+    lw     $a0, 0($sp)
+    addiu  $sp, $sp, 16
+    .cfi_adjust_cfa_offset -16
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    move $a2, rSELF                 # pass Thread::Current
+    la   $t9, artThrowClassCastException
+    jr   $t9                        # artThrowClassCastException (Class*, Class*, Thread*, SP)
+    move $a3, $sp                   # pass $sp
 END art_quick_check_cast
 
     /*
-     * Entry from managed code that calls artCanPutArrayElementFromCode and delivers exception on
-     * failure.
+     * Entry from managed code for array put operations of objects where the value being stored
+     * needs to be checked for compatibility.
+     * a0 = array, a1 = index, a2 = value
      */
-    .extern artCanPutArrayElementFromCode
-ENTRY art_quick_can_put_array_element
+ENTRY art_quick_aput_obj_with_null_and_bound_check
     GENERATE_GLOBAL_POINTER
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case exception allocation triggers GC
-    move    $a2, rSELF                     # pass Thread::Current
-    jal     artCanPutArrayElementFromCode  # (Object* element, Class* array_class, Thread*, $sp)
-    move    $a3, $sp                       # pass $sp
-    RETURN_IF_ZERO
-END art_quick_can_put_array_element
+    bnez    $a0, art_quick_aput_obj_with_bound_check_gp_set
+    nop
+    b art_quick_throw_null_pointer_exception_gp_set
+    nop
+END art_quick_aput_obj_with_null_and_bound_check
+
+ENTRY art_quick_aput_obj_with_bound_check
+    GENERATE_GLOBAL_POINTER
+art_quick_aput_obj_with_bound_check_gp_set:
+    lw $t0, ARRAY_LENGTH_OFFSET($a0)
+    sltu $t1, $a1, $t0
+    bnez $t1, art_quick_aput_obj_gp_set
+    nop
+    move $a0, $a1
+    b art_quick_throw_array_bounds_gp_set
+    move $a1, $t0
+END art_quick_aput_obj_with_bound_check
+
+ENTRY art_quick_aput_obj
+    GENERATE_GLOBAL_POINTER
+art_quick_aput_obj_gp_set:
+    beqz $a2, do_aput_null
+    nop
+    lw $t0, CLASS_OFFSET($a0)
+    lw $t1, CLASS_OFFSET($a2)
+    lw $t0, CLASS_COMPONENT_TYPE_OFFSET($t0)
+    bne $t1, $t0, check_assignability  # value's type == array's component type - trivial assignability
+    nop
+do_aput:
+    sll $a1, $a1, 2
+    add $t0, $a0, $a1
+    sw  $a2, OBJECT_ARRAY_DATA_OFFSET($t0)
+    lw  $t0, THREAD_CARD_TABLE_OFFSET(rSELF)
+    srl $t1, $a0, 7
+    add $t1, $t1, $t0
+    sb  $t0, ($t1)
+    jr  $ra
+    nop
+do_aput_null:
+    sll $a1, $a1, 2
+    add $t0, $a0, $a1
+    sw  $a2, OBJECT_ARRAY_DATA_OFFSET($t0)
+    jr  $ra
+    nop
+check_assignability:
+    addiu  $sp, $sp, -32
+    .cfi_adjust_cfa_offset 32
+    sw     $ra, 28($sp)
+    .cfi_rel_offset 31, 28
+    sw     $t9, 12($sp)
+    sw     $a2, 8($sp)
+    sw     $a1, 4($sp)
+    sw     $a0, 0($sp)
+    move   $a1, $t1
+    move   $a0, $t0
+    jal    artIsAssignableFromCode  # (Class*, Class*)
+    nop
+    lw     $ra, 28($sp)
+    lw     $t9, 12($sp)
+    lw     $a2, 8($sp)
+    lw     $a1, 4($sp)
+    lw     $a0, 0($sp)
+    add    $sp, 32
+    .cfi_adjust_cfa_offset -32
+    bnez   $v0, do_aput
+    nop
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    move $a1, $a2
+    move $a2, rSELF                 # pass Thread::Current
+    la   $t9, artThrowArrayStoreException
+    jr   $t9                        # artThrowArrayStoreException(Class*, Class*, Thread*, SP)
+    move $a3, $sp                   # pass $sp
+END art_quick_aput_obj
 
     /*
      * Entry from managed code when uninitialized static storage, this stub will run the class
diff --git a/runtime/arch/mips/thread_mips.cc b/runtime/arch/mips/thread_mips.cc
index 7364de0..bd54549 100644
--- a/runtime/arch/mips/thread_mips.cc
+++ b/runtime/arch/mips/thread_mips.cc
@@ -23,6 +23,7 @@
 
 void Thread::InitCpu() {
   CHECK_EQ(THREAD_FLAGS_OFFSET, OFFSETOF_MEMBER(Thread, state_and_flags_));
+  CHECK_EQ(THREAD_CARD_TABLE_OFFSET, OFFSETOF_MEMBER(Thread, card_table_));
   CHECK_EQ(THREAD_EXCEPTION_OFFSET, OFFSETOF_MEMBER(Thread, exception_));
 }
 
diff --git a/runtime/arch/x86/asm_support_x86.h b/runtime/arch/x86/asm_support_x86.h
index d4e0927..e817ff7 100644
--- a/runtime/arch/x86/asm_support_x86.h
+++ b/runtime/arch/x86/asm_support_x86.h
@@ -21,6 +21,8 @@
 
 // Offset of field Thread::self_ verified in InitCpu
 #define THREAD_SELF_OFFSET 40
+// Offset of field Thread::card_table_ verified in InitCpu
+#define THREAD_CARD_TABLE_OFFSET 8
 // Offset of field Thread::exception_ verified in InitCpu
 #define THREAD_EXCEPTION_OFFSET 12
 // Offset of field Thread::thin_lock_thread_id_ verified in InitCpu
diff --git a/runtime/arch/x86/entrypoints_init_x86.cc b/runtime/arch/x86/entrypoints_init_x86.cc
index 4c87e07..89dd1b8 100644
--- a/runtime/arch/x86/entrypoints_init_x86.cc
+++ b/runtime/arch/x86/entrypoints_init_x86.cc
@@ -50,7 +50,6 @@
 // Cast entrypoints.
 extern "C" uint32_t art_quick_is_assignable(const mirror::Class* klass,
                                                 const mirror::Class* ref_class);
-extern "C" void art_quick_can_put_array_element(void*, void*);
 extern "C" void art_quick_check_cast(void*, void*);
 
 // DexCache entrypoints.
@@ -73,7 +72,10 @@
 extern "C" void* art_quick_get_obj_instance(uint32_t, void*);
 extern "C" void* art_quick_get_obj_static(uint32_t);
 
-// FillArray entrypoint.
+// Array entrypoints.
+extern "C" void art_quick_aput_obj_with_null_and_bound_check(void*, uint32_t, void*);
+extern "C" void art_quick_aput_obj_with_bound_check(void*, uint32_t, void*);
+extern "C" void art_quick_aput_obj(void*, uint32_t, void*);
 extern "C" void art_quick_handle_fill_data(void*, void*);
 
 // Lock entrypoints.
@@ -89,7 +91,7 @@
 extern "C" int64_t art_quick_f2l(float);
 extern "C" int32_t art_quick_idivmod(int32_t, int32_t);
 extern "C" int64_t art_quick_ldiv(int64_t, int64_t);
-extern "C" int64_t art_quick_ldivmod(int64_t, int64_t);
+extern "C" int64_t art_quick_lmod(int64_t, int64_t);
 extern "C" int64_t art_quick_lmul(int64_t, int64_t);
 extern "C" uint64_t art_quick_lshl(uint64_t, uint32_t);
 extern "C" uint64_t art_quick_lshr(uint64_t, uint32_t);
@@ -165,7 +167,6 @@
 
   // Cast
   qpoints->pInstanceofNonTrivial = art_quick_is_assignable;
-  qpoints->pCanPutArrayElement = art_quick_can_put_array_element;
   qpoints->pCheckCast = art_quick_check_cast;
 
   // DexCache
@@ -188,7 +189,10 @@
   qpoints->pGet64Static = art_quick_get64_static;
   qpoints->pGetObjStatic = art_quick_get_obj_static;
 
-  // FillArray
+  // Array
+  qpoints->pAputObjectWithNullAndBoundCheck = art_quick_aput_obj_with_null_and_bound_check;
+  qpoints->pAputObjectWithBoundCheck = art_quick_aput_obj_with_bound_check;
+  qpoints->pAputObject = art_quick_aput_obj;
   qpoints->pHandleFillArrayData = art_quick_handle_fill_data;
 
   // JNI
@@ -218,7 +222,7 @@
   qpoints->pD2l = art_quick_d2l;
   qpoints->pF2l = art_quick_f2l;
   qpoints->pLdiv = art_quick_ldiv;
-  qpoints->pLdivmod = art_quick_ldivmod;
+  qpoints->pLmod = art_quick_lmod;
   qpoints->pLmul = art_quick_lmul;
   qpoints->pShlLong = art_quick_lshl;
   qpoints->pShrLong = art_quick_lshr;
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 6be73d1..9fce72f 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -479,14 +479,115 @@
 
 DEFINE_FUNCTION art_quick_is_assignable
     PUSH eax                     // alignment padding
-    PUSH ecx                     // pass arg2
-    PUSH eax                     // pass arg1
-    call SYMBOL(artIsAssignableFromCode)  // (Class* a, Class* b, Thread*, SP)
+    PUSH ecx                     // pass arg2 - obj->klass
+    PUSH eax                     // pass arg1 - checked class
+    call SYMBOL(artIsAssignableFromCode)  // (Class* klass, Class* ref_klass)
     addl LITERAL(12), %esp        // pop arguments
     .cfi_adjust_cfa_offset -12
     ret
 END_FUNCTION art_quick_is_assignable
 
+DEFINE_FUNCTION art_quick_check_cast
+    PUSH eax                     // alignment padding
+    PUSH ecx                     // pass arg2 - obj->klass
+    PUSH eax                     // pass arg1 - checked class
+    call SYMBOL(artIsAssignableFromCode)  // (Class* klass, Class* ref_klass)
+    testl %eax, %eax
+    jz 1f                         // jump forward if not assignable
+    addl LITERAL(12), %esp        // pop arguments
+    .cfi_adjust_cfa_offset -12
+    ret
+1:
+    POP eax                       // pop arguments
+    POP ecx
+    addl LITERAL(4), %esp
+    .cfi_adjust_cfa_offset -12
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
+    mov %esp, %edx
+    // Outgoing argument set up
+    PUSH edx                      // pass SP
+    pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
+    .cfi_adjust_cfa_offset 4
+    PUSH ecx                      // pass arg2
+    PUSH eax                      // pass arg1
+    call SYMBOL(artThrowClassCastException) // (Class* a, Class* b, Thread*, SP)
+    int3                          // unreached
+END_FUNCTION art_quick_check_cast
+
+    /*
+     * Entry from managed code for array put operations of objects where the value being stored
+     * needs to be checked for compatibility.
+     * eax = array, ecx = index, edx = value
+     */
+DEFINE_FUNCTION art_quick_aput_obj_with_null_and_bound_check
+    testl %eax, %eax
+    jnz art_quick_aput_obj_with_bound_check
+    jmp art_quick_throw_null_pointer_exception
+END_FUNCTION art_quick_aput_obj_with_null_and_bound_check
+
+DEFINE_FUNCTION art_quick_aput_obj_with_bound_check
+    movl ARRAY_LENGTH_OFFSET(%eax), %ebx
+    cmpl %ebx, %ecx
+    jb art_quick_aput_obj
+    mov %ecx, %eax
+    mov %ebx, %ecx
+    jmp art_quick_throw_array_bounds
+END_FUNCTION art_quick_aput_obj_with_bound_check
+
+DEFINE_FUNCTION art_quick_aput_obj
+    test %edx, %edx              // store of null
+    jz do_aput_null
+    movl CLASS_OFFSET(%eax), %ebx
+    movl CLASS_COMPONENT_TYPE_OFFSET(%ebx), %ebx
+    cmpl CLASS_OFFSET(%edx), %ebx // value's type == array's component type - trivial assignability
+    jne check_assignability
+do_aput:
+    movl %edx, OBJECT_ARRAY_DATA_OFFSET(%eax, %ecx, 4)
+    movl %fs:THREAD_CARD_TABLE_OFFSET, %edx
+    shrl LITERAL(7), %eax
+    movb %dl, (%edx, %eax)
+    ret
+do_aput_null:
+    movl %edx, OBJECT_ARRAY_DATA_OFFSET(%eax, %ecx, 4)
+    ret
+check_assignability:
+    PUSH eax                     // save arguments
+    PUSH ecx
+    PUSH edx
+    subl LITERAL(8), %esp        // alignment padding
+    .cfi_adjust_cfa_offset 8
+    pushl CLASS_OFFSET(%edx)     // pass arg2 - type of the value to be stored
+    .cfi_adjust_cfa_offset 4
+    PUSH ebx                     // pass arg1 - component type of the array
+    call SYMBOL(artIsAssignableFromCode)  // (Class* a, Class* b)
+    addl LITERAL(16), %esp       // pop arguments
+    .cfi_adjust_cfa_offset -16
+    testl %eax, %eax
+    jz   throw_array_store_exception
+    POP  edx
+    POP  ecx
+    POP  eax
+    movl %edx, OBJECT_ARRAY_DATA_OFFSET(%eax, %ecx, 4)  // do the aput
+    movl %fs:THREAD_CARD_TABLE_OFFSET, %edx
+    shrl LITERAL(7), %eax
+    movb %dl, (%edx, %eax)
+    ret
+throw_array_store_exception:
+    POP  edx
+    POP  ecx
+    POP  eax
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
+    mov %esp, %ecx
+    // Outgoing argument set up
+    PUSH ecx                      // pass SP
+    pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
+    .cfi_adjust_cfa_offset 4
+    PUSH edx                      // pass arg2 - value
+    PUSH eax                      // pass arg1 - array
+    call SYMBOL(artThrowArrayStoreException) // (array, value, Thread*, SP)
+    int3                          // unreached
+END_FUNCTION art_quick_aput_obj
+
 DEFINE_FUNCTION art_quick_memcpy
     PUSH edx                      // pass arg3
     PUSH ecx                      // pass arg2
@@ -497,9 +598,6 @@
     ret
 END_FUNCTION art_quick_memcpy
 
-TWO_ARG_DOWNCALL art_quick_check_cast, artCheckCastFromCode, RETURN_IF_EAX_ZERO
-TWO_ARG_DOWNCALL art_quick_can_put_array_element, artCanPutArrayElementFromCode, RETURN_IF_EAX_ZERO
-
 NO_ARG_DOWNCALL art_quick_test_suspend, artTestSuspendFromCode, ret
 
 DEFINE_FUNCTION art_quick_fmod
@@ -586,30 +684,30 @@
 END_FUNCTION art_quick_idivmod
 
 DEFINE_FUNCTION art_quick_ldiv
-    subl LITERAL(12), %esp        // alignment padding
+    subl LITERAL(12), %esp       // alignment padding
     .cfi_adjust_cfa_offset 12
     PUSH ebx                     // pass arg4 b.hi
     PUSH edx                     // pass arg3 b.lo
     PUSH ecx                     // pass arg2 a.hi
-    PUSH eax                      // pass arg1 a.lo
-    call SYMBOL(artLdivFromCode)  // (jlong a, jlong b)
-    addl LITERAL(28), %esp        // pop arguments
+    PUSH eax                     // pass arg1 a.lo
+    call SYMBOL(artLdiv)         // (jlong a, jlong b)
+    addl LITERAL(28), %esp       // pop arguments
     .cfi_adjust_cfa_offset -28
     ret
 END_FUNCTION art_quick_ldiv
 
-DEFINE_FUNCTION art_quick_ldivmod
-    subl LITERAL(12), %esp        // alignment padding
+DEFINE_FUNCTION art_quick_lmod
+    subl LITERAL(12), %esp       // alignment padding
     .cfi_adjust_cfa_offset 12
     PUSH ebx                     // pass arg4 b.hi
     PUSH edx                     // pass arg3 b.lo
     PUSH ecx                     // pass arg2 a.hi
-    PUSH eax                      // pass arg1 a.lo
-    call SYMBOL(artLdivmodFromCode) // (jlong a, jlong b)
-    addl LITERAL(28), %esp        // pop arguments
+    PUSH eax                     // pass arg1 a.lo
+    call SYMBOL(artLmod)         // (jlong a, jlong b)
+    addl LITERAL(28), %esp       // pop arguments
     .cfi_adjust_cfa_offset -28
     ret
-END_FUNCTION art_quick_ldivmod
+END_FUNCTION art_quick_lmod
 
 DEFINE_FUNCTION art_quick_lmul
     imul %eax, %ebx              // ebx = a.lo(eax) * b.hi(ebx)
diff --git a/runtime/arch/x86/thread_x86.cc b/runtime/arch/x86/thread_x86.cc
index 7e0aee0..42789cb 100644
--- a/runtime/arch/x86/thread_x86.cc
+++ b/runtime/arch/x86/thread_x86.cc
@@ -134,6 +134,7 @@
 
   // Sanity check other offsets.
   CHECK_EQ(THREAD_EXCEPTION_OFFSET, OFFSETOF_MEMBER(Thread, exception_));
+  CHECK_EQ(THREAD_CARD_TABLE_OFFSET, OFFSETOF_MEMBER(Thread, card_table_));
   CHECK_EQ(THREAD_ID_OFFSET, OFFSETOF_MEMBER(Thread, thin_lock_thread_id_));
 }
 
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index d2eaf8e..a6700bc 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -22,8 +22,16 @@
 #define SUSPEND_CHECK_INTERVAL (1000)
 
 // Offsets within java.lang.Object.
+#define CLASS_OFFSET 0
 #define LOCK_WORD_OFFSET 4
 
+// Offsets within java.lang.Class.
+#define CLASS_COMPONENT_TYPE_OFFSET 12
+
+// Array offsets.
+#define ARRAY_LENGTH_OFFSET 8
+#define OBJECT_ARRAY_DATA_OFFSET 12
+
 // Offsets within java.lang.String.
 #define STRING_VALUE_OFFSET 8
 #define STRING_COUNT_OFFSET 12
diff --git a/runtime/entrypoints/quick/quick_cast_entrypoints.cc b/runtime/entrypoints/quick/quick_cast_entrypoints.cc
index 9ffa736..ae53d6c 100644
--- a/runtime/entrypoints/quick/quick_cast_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_cast_entrypoints.cc
@@ -14,11 +14,8 @@
  * limitations under the License.
  */
 
-#include "callee_save_frame.h"
-#include "entrypoints/entrypoint_utils.h"
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
-#include "mirror/object_array-inl.h"
 
 namespace art {
 
@@ -31,38 +28,4 @@
   return klass->IsAssignableFrom(ref_class) ? 1 : 0;
 }
 
-// Check whether it is safe to cast one class to the other, throw exception and return -1 on failure
-extern "C" int artCheckCastFromCode(mirror::Class* src_type, mirror::Class* dest_type,
-                                    Thread* self, mirror::ArtMethod** sp)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  DCHECK(src_type->IsClass()) << PrettyClass(src_type);
-  DCHECK(dest_type->IsClass()) << PrettyClass(dest_type);
-  if (LIKELY(dest_type->IsAssignableFrom(src_type))) {
-    return 0;  // Success
-  } else {
-    FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
-    ThrowClassCastException(dest_type, src_type);
-    return -1;  // Failure
-  }
-}
-
-// Tests whether 'element' can be assigned into an array of type 'array_class'.
-// Returns 0 on success and -1 if an exception is pending.
-extern "C" int artCanPutArrayElementFromCode(const mirror::Object* element,
-                                             const mirror::Class* array_class,
-                                             Thread* self, mirror::ArtMethod** sp)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  DCHECK(array_class != NULL);
-  // element can't be NULL as we catch this is screened in runtime_support
-  mirror::Class* element_class = element->GetClass();
-  mirror::Class* component_type = array_class->GetComponentType();
-  if (LIKELY(component_type->IsAssignableFrom(element_class))) {
-    return 0;  // Success
-  } else {
-    FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
-    ThrowArrayStoreException(element_class, array_class);
-    return -1;  // Failure
-  }
-}
-
 }  // namespace art
diff --git a/runtime/entrypoints/quick/quick_entrypoints.h b/runtime/entrypoints/quick/quick_entrypoints.h
index 9d3b8ef..e9964ad 100644
--- a/runtime/entrypoints/quick/quick_entrypoints.h
+++ b/runtime/entrypoints/quick/quick_entrypoints.h
@@ -48,7 +48,6 @@
 
   // Cast
   uint32_t (*pInstanceofNonTrivial)(const mirror::Class*, const mirror::Class*);
-  void (*pCanPutArrayElement)(void*, void*);
   void (*pCheckCast)(void*, void*);
 
   // DexCache
@@ -71,7 +70,10 @@
   void* (*pGetObjInstance)(uint32_t, void*);
   void* (*pGetObjStatic)(uint32_t);
 
-  // FillArray
+  // Array
+  void (*pAputObjectWithNullAndBoundCheck)(void*, uint32_t, void*);  // array, index, src
+  void (*pAputObjectWithBoundCheck)(void*, uint32_t, void*);  // array, index, src
+  void (*pAputObject)(void*, uint32_t, void*);  // array, index, src
   void (*pHandleFillArrayData)(void*, void*);
 
   // JNI
@@ -103,7 +105,7 @@
   int64_t (*pD2l)(double);
   int64_t (*pF2l)(float);
   int64_t (*pLdiv)(int64_t, int64_t);
-  int64_t (*pLdivmod)(int64_t, int64_t);
+  int64_t (*pLmod)(int64_t, int64_t);
   int64_t (*pLmul)(int64_t, int64_t);
   uint64_t (*pShlLong)(uint64_t, uint32_t);
   uint64_t (*pShrLong)(uint64_t, uint32_t);
diff --git a/runtime/entrypoints/quick/quick_math_entrypoints.cc b/runtime/entrypoints/quick/quick_math_entrypoints.cc
index 0bfe59d..014aad3 100644
--- a/runtime/entrypoints/quick/quick_math_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_math_entrypoints.cc
@@ -62,15 +62,15 @@
   return -1;
 }
 
-extern "C" int64_t artLmulFromCode(int64_t a, int64_t b) {
+extern "C" int64_t artLmul(int64_t a, int64_t b) {
   return a * b;
 }
 
-extern "C" int64_t artLdivFromCode(int64_t a, int64_t b) {
+extern "C" int64_t artLdiv(int64_t a, int64_t b) {
   return a / b;
 }
 
-extern "C" int64_t artLdivmodFromCode(int64_t a, int64_t b) {
+extern "C" int64_t artLmod(int64_t a, int64_t b) {
   return a % b;
 }
 
diff --git a/runtime/entrypoints/quick/quick_throw_entrypoints.cc b/runtime/entrypoints/quick/quick_throw_entrypoints.cc
index f67b2fc..31eacac 100644
--- a/runtime/entrypoints/quick/quick_throw_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_throw_entrypoints.cc
@@ -15,8 +15,9 @@
  */
 
 #include "callee_save_frame.h"
+#include "common_throws.h"
 #include "entrypoints/entrypoint_utils.h"
-#include "mirror/object.h"
+#include "mirror/object-inl.h"
 #include "object_utils.h"
 #include "thread.h"
 #include "well_known_classes.h"
@@ -95,4 +96,21 @@
   self->QuickDeliverException();
 }
 
+extern "C" void artThrowClassCastException(mirror::Class* dest_type, mirror::Class* src_type,
+                                           Thread* self, mirror::ArtMethod** sp)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  FinishCalleeSaveFrameSetup(self, sp, Runtime::kSaveAll);
+  CHECK(!dest_type->IsAssignableFrom(src_type));
+  ThrowClassCastException(dest_type, src_type);
+  self->QuickDeliverException();
+}
+
+extern "C" void artThrowArrayStoreException(mirror::Object* array, mirror::Object* value,
+                                            Thread* self, mirror::ArtMethod** sp)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  FinishCalleeSaveFrameSetup(self, sp, Runtime::kSaveAll);
+  ThrowArrayStoreException(value->GetClass(), array->GetClass());
+  self->QuickDeliverException();
+}
+
 }  // namespace art
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index 586151d..dbc6f57 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -247,7 +247,7 @@
     } else {
       Class* component = GetComponentType();
       if (component->IsPrimitive()) {
-        return false;
+        return true;
       } else {
         return component->CannotBeAssignedFromOtherTypes();
       }
@@ -346,14 +346,18 @@
 
   bool IsArtMethodClass() const;
 
+  static MemberOffset ComponentTypeOffset() {
+    return OFFSET_OF_OBJECT_MEMBER(Class, component_type_);
+  }
+
   Class* GetComponentType() const {
-    return GetFieldObject<Class*>(OFFSET_OF_OBJECT_MEMBER(Class, component_type_), false);
+    return GetFieldObject<Class*>(ComponentTypeOffset(), false);
   }
 
   void SetComponentType(Class* new_component_type) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK(GetComponentType() == NULL);
     DCHECK(new_component_type != NULL);
-    SetFieldObject(OFFSET_OF_OBJECT_MEMBER(Class, component_type_), new_component_type, false);
+    SetFieldObject(ComponentTypeOffset(), new_component_type, false);
   }
 
   size_t GetComponentSize() const {
diff --git a/runtime/mirror/object_test.cc b/runtime/mirror/object_test.cc
index b8765af..1e610f2 100644
--- a/runtime/mirror/object_test.cc
+++ b/runtime/mirror/object_test.cc
@@ -71,12 +71,20 @@
 
 // Keep the assembly code in sync
 TEST_F(ObjectTest, AsmConstants) {
-  ASSERT_EQ(STRING_VALUE_OFFSET, String::ValueOffset().Int32Value());
-  ASSERT_EQ(STRING_COUNT_OFFSET, String::CountOffset().Int32Value());
-  ASSERT_EQ(STRING_OFFSET_OFFSET, String::OffsetOffset().Int32Value());
-  ASSERT_EQ(STRING_DATA_OFFSET, Array::DataOffset(sizeof(uint16_t)).Int32Value());
+  EXPECT_EQ(CLASS_OFFSET, Object::ClassOffset().Int32Value());
+  EXPECT_EQ(LOCK_WORD_OFFSET, Object::MonitorOffset().Int32Value());
 
-  ASSERT_EQ(METHOD_CODE_OFFSET, ArtMethod::EntryPointFromCompiledCodeOffset().Int32Value());
+  EXPECT_EQ(CLASS_COMPONENT_TYPE_OFFSET, Class::ComponentTypeOffset().Int32Value());
+
+  EXPECT_EQ(ARRAY_LENGTH_OFFSET, Array::LengthOffset().Int32Value());
+  EXPECT_EQ(OBJECT_ARRAY_DATA_OFFSET, Array::DataOffset(sizeof(Object*)).Int32Value());
+
+  EXPECT_EQ(STRING_VALUE_OFFSET, String::ValueOffset().Int32Value());
+  EXPECT_EQ(STRING_COUNT_OFFSET, String::CountOffset().Int32Value());
+  EXPECT_EQ(STRING_OFFSET_OFFSET, String::OffsetOffset().Int32Value());
+  EXPECT_EQ(STRING_DATA_OFFSET, Array::DataOffset(sizeof(uint16_t)).Int32Value());
+
+  EXPECT_EQ(METHOD_CODE_OFFSET, ArtMethod::EntryPointFromCompiledCodeOffset().Int32Value());
 }
 
 TEST_F(ObjectTest, IsInSamePackage) {
diff --git a/runtime/thread.cc b/runtime/thread.cc
index de14dbb..3063658 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -1564,7 +1564,6 @@
   QUICK_ENTRY_POINT_INFO(pCheckAndAllocArray),
   QUICK_ENTRY_POINT_INFO(pCheckAndAllocArrayWithAccessCheck),
   QUICK_ENTRY_POINT_INFO(pInstanceofNonTrivial),
-  QUICK_ENTRY_POINT_INFO(pCanPutArrayElement),
   QUICK_ENTRY_POINT_INFO(pCheckCast),
   QUICK_ENTRY_POINT_INFO(pInitializeStaticStorage),
   QUICK_ENTRY_POINT_INFO(pInitializeTypeAndVerifyAccess),
@@ -1582,6 +1581,9 @@
   QUICK_ENTRY_POINT_INFO(pGet64Static),
   QUICK_ENTRY_POINT_INFO(pGetObjInstance),
   QUICK_ENTRY_POINT_INFO(pGetObjStatic),
+  QUICK_ENTRY_POINT_INFO(pAputObjectWithNullAndBoundCheck),
+  QUICK_ENTRY_POINT_INFO(pAputObjectWithBoundCheck),
+  QUICK_ENTRY_POINT_INFO(pAputObject),
   QUICK_ENTRY_POINT_INFO(pHandleFillArrayData),
   QUICK_ENTRY_POINT_INFO(pJniMethodStart),
   QUICK_ENTRY_POINT_INFO(pJniMethodStartSynchronized),
@@ -1606,7 +1608,7 @@
   QUICK_ENTRY_POINT_INFO(pD2l),
   QUICK_ENTRY_POINT_INFO(pF2l),
   QUICK_ENTRY_POINT_INFO(pLdiv),
-  QUICK_ENTRY_POINT_INFO(pLdivmod),
+  QUICK_ENTRY_POINT_INFO(pLmod),
   QUICK_ENTRY_POINT_INFO(pLmul),
   QUICK_ENTRY_POINT_INFO(pShlLong),
   QUICK_ENTRY_POINT_INFO(pShrLong),
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index 6b1ff77..7d2ee19 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -506,17 +506,25 @@
 
   while (dex_pc < insns_size) {
     Instruction::Code opcode = inst->Opcode();
-    if (opcode == Instruction::NEW_INSTANCE) {
-      new_instance_count++;
-    } else if (opcode == Instruction::MONITOR_ENTER) {
-      monitor_enter_count++;
-    } else if (opcode == Instruction::CHECK_CAST) {
-      has_check_casts_ = true;
-    } else if ((inst->Opcode() == Instruction::INVOKE_VIRTUAL) ||
-              (inst->Opcode() ==  Instruction::INVOKE_VIRTUAL_RANGE) ||
-              (inst->Opcode() == Instruction::INVOKE_INTERFACE) ||
-              (inst->Opcode() == Instruction::INVOKE_INTERFACE_RANGE)) {
-      has_virtual_or_interface_invokes_ = true;
+    switch (opcode) {
+      case Instruction::APUT_OBJECT:
+      case Instruction::CHECK_CAST:
+        has_check_casts_ = true;
+        break;
+      case Instruction::INVOKE_VIRTUAL:
+      case Instruction::INVOKE_VIRTUAL_RANGE:
+      case Instruction::INVOKE_INTERFACE:
+      case Instruction::INVOKE_INTERFACE_RANGE:
+        has_virtual_or_interface_invokes_ = true;
+        break;
+      case Instruction::MONITOR_ENTER:
+        monitor_enter_count++;
+        break;
+      case Instruction::NEW_INSTANCE:
+        new_instance_count++;
+        break;
+      default:
+        break;
     }
     size_t inst_size = inst->SizeInCodeUnits();
     insn_flags_[dex_pc].SetLengthInCodeUnits(inst_size);
@@ -3940,18 +3948,32 @@
                                            code_item_->insns_size_in_code_units_);
 
   for (; inst < end; inst = inst->Next()) {
-    if (Instruction::CHECK_CAST != inst->Opcode()) {
-      continue;
-    }
-    uint32_t dex_pc = inst->GetDexPc(code_item_->insns_);
-    RegisterLine* line = reg_table_.GetLine(dex_pc);
-    const RegType& reg_type(line->GetRegisterType(inst->VRegA_21c()));
-    const RegType& cast_type = ResolveClassAndCheckAccess(inst->VRegB_21c());
-    if (cast_type.IsStrictlyAssignableFrom(reg_type)) {
-      if (mscs.get() == NULL) {
-        mscs.reset(new MethodSafeCastSet());
+    Instruction::Code code = inst->Opcode();
+    if ((code == Instruction::CHECK_CAST) || (code == Instruction::APUT_OBJECT)) {
+      uint32_t dex_pc = inst->GetDexPc(code_item_->insns_);
+      RegisterLine* line = reg_table_.GetLine(dex_pc);
+      bool is_safe_cast = false;
+      if (code == Instruction::CHECK_CAST) {
+        const RegType& reg_type(line->GetRegisterType(inst->VRegA_21c()));
+        const RegType& cast_type = ResolveClassAndCheckAccess(inst->VRegB_21c());
+        is_safe_cast = cast_type.IsStrictlyAssignableFrom(reg_type);
+      } else {
+        const RegType& array_type(line->GetRegisterType(inst->VRegB_23x()));
+        // We only know its safe to assign to an array if the array type is precise. For example,
+        // an Object[] can have any type of object stored in it, but it may also be assigned a
+        // String[] in which case the stores need to be of Strings.
+        if (array_type.IsPreciseReference()) {
+          const RegType& value_type(line->GetRegisterType(inst->VRegA_23x()));
+          const RegType& component_type(reg_types_.GetComponentType(array_type, class_loader_));
+          is_safe_cast = component_type.IsStrictlyAssignableFrom(value_type);
+        }
       }
-      mscs->insert(dex_pc);
+      if (is_safe_cast) {
+        if (mscs.get() == NULL) {
+          mscs.reset(new MethodSafeCastSet());
+        }
+        mscs->insert(dex_pc);
+      }
     }
   }
   return mscs.release();
diff --git a/runtime/verifier/method_verifier.h b/runtime/verifier/method_verifier.h
index 7f33741..57fde1d 100644
--- a/runtime/verifier/method_verifier.h
+++ b/runtime/verifier/method_verifier.h
@@ -724,10 +724,12 @@
   // running and the verifier is called from the class linker.
   const bool allow_soft_failures_;
 
-  // Indicates if the method being verified contains at least one check-cast instruction.
+  // Indicates the method being verified contains at least one check-cast or aput-object
+  // instruction. Aput-object operations implicitly check for array-store exceptions, similar to
+  // check-cast.
   bool has_check_casts_;
 
-  // Indicates if the method being verified contains at least one invoke-virtual/range
+  // Indicates the method being verified contains at least one invoke-virtual/range
   // or invoke-interface/range.
   bool has_virtual_or_interface_invokes_;
 };
diff --git a/runtime/verifier/reg_type_cache.cc b/runtime/verifier/reg_type_cache.cc
index fd70300..446dd00 100644
--- a/runtime/verifier/reg_type_cache.cc
+++ b/runtime/verifier/reg_type_cache.cc
@@ -531,8 +531,9 @@
 }
 
 const RegType& RegTypeCache::GetComponentType(const RegType& array, mirror::ClassLoader* loader) {
-  CHECK(array.IsArrayTypes());
-  if (array.IsUnresolvedTypes()) {
+  if (!array.IsArrayTypes()) {
+    return Conflict();
+  } else if (array.IsUnresolvedTypes()) {
     const std::string& descriptor(array.GetDescriptor());
     const std::string component(descriptor.substr(1, descriptor.size() - 1));
     return FromDescriptor(loader, component.c_str(), false);