Merge "Add more error printing to TransitionFromSuspendedToRunnable"
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index c775e03..a7dbb53 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -361,6 +361,51 @@
   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARM);
 };
 
+class ArraySetSlowPathARM : public SlowPathCode {
+ public:
+  explicit ArraySetSlowPathARM(HInstruction* instruction) : instruction_(instruction) {}
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, locations);
+
+    InvokeRuntimeCallingConvention calling_convention;
+    HParallelMove parallel_move(codegen->GetGraph()->GetArena());
+    parallel_move.AddMove(
+        locations->InAt(0),
+        Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+        Primitive::kPrimNot,
+        nullptr);
+    parallel_move.AddMove(
+        locations->InAt(1),
+        Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
+        Primitive::kPrimInt,
+        nullptr);
+    parallel_move.AddMove(
+        locations->InAt(2),
+        Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
+        Primitive::kPrimNot,
+        nullptr);
+    codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+
+    CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
+    arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pAputObject),
+                               instruction_,
+                               instruction_->GetDexPc(),
+                               this);
+    RestoreLiveRegisters(codegen, locations);
+    __ b(GetExitLabel());
+  }
+
+  const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathARM"; }
+
+ private:
+  HInstruction* const instruction_;
+
+  DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARM);
+};
+
 #undef __
 #define __ down_cast<ArmAssembler*>(GetAssembler())->
 
@@ -3750,38 +3795,32 @@
 
   bool needs_write_barrier =
       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
-  bool needs_runtime_call = instruction->NeedsTypeCheck();
+  bool may_need_runtime_call = instruction->NeedsTypeCheck();
 
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
-      instruction, needs_runtime_call ? LocationSummary::kCall : LocationSummary::kNoCall);
-  if (needs_runtime_call) {
-    InvokeRuntimeCallingConvention calling_convention;
-    locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
-    locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
-    locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
+      instruction,
+      may_need_runtime_call ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+  if (Primitive::IsFloatingPointType(value_type)) {
+    locations->SetInAt(2, Location::RequiresFpuRegister());
   } else {
-    locations->SetInAt(0, Location::RequiresRegister());
-    locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
-    if (Primitive::IsFloatingPointType(value_type)) {
-      locations->SetInAt(2, Location::RequiresFpuRegister());
-    } else {
-      locations->SetInAt(2, Location::RequiresRegister());
-    }
+    locations->SetInAt(2, Location::RequiresRegister());
+  }
 
-    if (needs_write_barrier) {
-      // Temporary registers for the write barrier.
-      locations->AddTemp(Location::RequiresRegister());  // Possibly used for ref. poisoning too.
-      locations->AddTemp(Location::RequiresRegister());
-    }
+  if (needs_write_barrier) {
+    // Temporary registers for the write barrier.
+    locations->AddTemp(Location::RequiresRegister());  // Possibly used for ref. poisoning too.
+    locations->AddTemp(Location::RequiresRegister());
   }
 }
 
 void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  Register obj = locations->InAt(0).AsRegister<Register>();
+  Register array = locations->InAt(0).AsRegister<Register>();
   Location index = locations->InAt(1);
   Primitive::Type value_type = instruction->GetComponentType();
-  bool needs_runtime_call = locations->WillCall();
+  bool may_need_runtime_call = locations->CanCall();
   bool needs_write_barrier =
       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
 
@@ -3793,9 +3832,9 @@
       if (index.IsConstant()) {
         size_t offset =
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
-        __ StoreToOffset(kStoreByte, value, obj, offset);
+        __ StoreToOffset(kStoreByte, value, array, offset);
       } else {
-        __ add(IP, obj, ShifterOperand(index.AsRegister<Register>()));
+        __ add(IP, array, ShifterOperand(index.AsRegister<Register>()));
         __ StoreToOffset(kStoreByte, value, IP, data_offset);
       }
       break;
@@ -3808,55 +3847,133 @@
       if (index.IsConstant()) {
         size_t offset =
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
-        __ StoreToOffset(kStoreHalfword, value, obj, offset);
+        __ StoreToOffset(kStoreHalfword, value, array, offset);
       } else {
-        __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_2));
+        __ add(IP, array, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_2));
         __ StoreToOffset(kStoreHalfword, value, IP, data_offset);
       }
       break;
     }
 
-    case Primitive::kPrimInt:
     case Primitive::kPrimNot: {
-      if (!needs_runtime_call) {
-        uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
-        Register value = locations->InAt(2).AsRegister<Register>();
-        Register source = value;
-        if (kPoisonHeapReferences && needs_write_barrier) {
-          // Note that in the case where `value` is a null reference,
-          // we do not enter this block, as a null reference does not
-          // need poisoning.
-          DCHECK_EQ(value_type, Primitive::kPrimNot);
-          Register temp = locations->GetTemp(0).AsRegister<Register>();
-          __ Mov(temp, value);
-          __ PoisonHeapReference(temp);
-          source = temp;
-        }
+      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
+      Register value = locations->InAt(2).AsRegister<Register>();
+      Register source = value;
+
+      if (instruction->InputAt(2)->IsNullConstant()) {
+        // Just setting null.
         if (index.IsConstant()) {
           size_t offset =
               (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
-          __ StoreToOffset(kStoreWord, source, obj, offset);
+          __ StoreToOffset(kStoreWord, source, array, offset);
         } else {
           DCHECK(index.IsRegister()) << index;
-          __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
+          __ add(IP, array, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
           __ StoreToOffset(kStoreWord, source, IP, data_offset);
         }
-        codegen_->MaybeRecordImplicitNullCheck(instruction);
-        if (needs_write_barrier) {
-          DCHECK_EQ(value_type, Primitive::kPrimNot);
-          Register temp = locations->GetTemp(0).AsRegister<Register>();
-          Register card = locations->GetTemp(1).AsRegister<Register>();
-          codegen_->MarkGCCard(temp, card, obj, value, instruction->GetValueCanBeNull());
-        }
-      } else {
-        DCHECK_EQ(value_type, Primitive::kPrimNot);
-        // Note: if heap poisoning is enabled, pAputObject takes cares
-        // of poisoning the reference.
-        codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pAputObject),
-                                instruction,
-                                instruction->GetDexPc(),
-                                nullptr);
+        break;
       }
+
+      DCHECK(needs_write_barrier);
+      Register temp1 = locations->GetTemp(0).AsRegister<Register>();
+      Register temp2 = locations->GetTemp(1).AsRegister<Register>();
+      uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+      uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
+      uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
+      Label done;
+      SlowPathCode* slow_path = nullptr;
+
+      if (may_need_runtime_call) {
+        slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathARM(instruction);
+        codegen_->AddSlowPath(slow_path);
+        if (instruction->GetValueCanBeNull()) {
+          Label non_zero;
+          __ CompareAndBranchIfNonZero(value, &non_zero);
+          if (index.IsConstant()) {
+            size_t offset =
+               (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+            __ StoreToOffset(kStoreWord, value, array, offset);
+          } else {
+            DCHECK(index.IsRegister()) << index;
+            __ add(IP, array, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
+            __ StoreToOffset(kStoreWord, value, IP, data_offset);
+          }
+          codegen_->MaybeRecordImplicitNullCheck(instruction);
+          __ b(&done);
+          __ Bind(&non_zero);
+        }
+
+        __ LoadFromOffset(kLoadWord, temp1, array, class_offset);
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
+        __ MaybeUnpoisonHeapReference(temp1);
+        __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset);
+        __ LoadFromOffset(kLoadWord, temp2, value, class_offset);
+        // No need to poison/unpoison, we're comparing two poisoined references.
+        __ cmp(temp1, ShifterOperand(temp2));
+        if (instruction->StaticTypeOfArrayIsObjectArray()) {
+          Label do_put;
+          __ b(&do_put, EQ);
+          __ MaybeUnpoisonHeapReference(temp1);
+          __ LoadFromOffset(kLoadWord, temp1, temp1, super_offset);
+          // No need to poison/unpoison, we're comparing against null.
+          __ CompareAndBranchIfNonZero(temp1, slow_path->GetEntryLabel());
+          __ Bind(&do_put);
+        } else {
+          __ b(slow_path->GetEntryLabel(), NE);
+        }
+      }
+
+      if (kPoisonHeapReferences) {
+        // Note that in the case where `value` is a null reference,
+        // we do not enter this block, as a null reference does not
+        // need poisoning.
+        DCHECK_EQ(value_type, Primitive::kPrimNot);
+        __ Mov(temp1, value);
+        __ PoisonHeapReference(temp1);
+        source = temp1;
+      }
+
+      if (index.IsConstant()) {
+        size_t offset =
+            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+        __ StoreToOffset(kStoreWord, source, array, offset);
+      } else {
+        DCHECK(index.IsRegister()) << index;
+        __ add(IP, array, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
+        __ StoreToOffset(kStoreWord, source, IP, data_offset);
+      }
+
+      if (!may_need_runtime_call) {
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
+      }
+
+      codegen_->MarkGCCard(temp1, temp2, array, value, instruction->GetValueCanBeNull());
+
+      if (done.IsLinked()) {
+        __ Bind(&done);
+      }
+
+      if (slow_path != nullptr) {
+        __ Bind(slow_path->GetExitLabel());
+      }
+
+      break;
+    }
+
+    case Primitive::kPrimInt: {
+      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
+      Register value = locations->InAt(2).AsRegister<Register>();
+      if (index.IsConstant()) {
+        size_t offset =
+            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+        __ StoreToOffset(kStoreWord, value, array, offset);
+      } else {
+        DCHECK(index.IsRegister()) << index;
+        __ add(IP, array, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
+        __ StoreToOffset(kStoreWord, value, IP, data_offset);
+      }
+
+      codegen_->MaybeRecordImplicitNullCheck(instruction);
       break;
     }
 
@@ -3866,9 +3983,9 @@
       if (index.IsConstant()) {
         size_t offset =
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
-        __ StoreToOffset(kStoreWordPair, value.AsRegisterPairLow<Register>(), obj, offset);
+        __ StoreToOffset(kStoreWordPair, value.AsRegisterPairLow<Register>(), array, offset);
       } else {
-        __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_8));
+        __ add(IP, array, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_8));
         __ StoreToOffset(kStoreWordPair, value.AsRegisterPairLow<Register>(), IP, data_offset);
       }
       break;
@@ -3880,9 +3997,9 @@
       DCHECK(value.IsFpuRegister());
       if (index.IsConstant()) {
         size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
-        __ StoreSToOffset(value.AsFpuRegister<SRegister>(), obj, offset);
+        __ StoreSToOffset(value.AsFpuRegister<SRegister>(), array, offset);
       } else {
-        __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
+        __ add(IP, array, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
         __ StoreSToOffset(value.AsFpuRegister<SRegister>(), IP, data_offset);
       }
       break;
@@ -3894,9 +4011,9 @@
       DCHECK(value.IsFpuRegisterPair());
       if (index.IsConstant()) {
         size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
-        __ StoreDToOffset(FromLowSToD(value.AsFpuRegisterPairLow<SRegister>()), obj, offset);
+        __ StoreDToOffset(FromLowSToD(value.AsFpuRegisterPairLow<SRegister>()), array, offset);
       } else {
-        __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_8));
+        __ add(IP, array, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_8));
         __ StoreDToOffset(FromLowSToD(value.AsFpuRegisterPairLow<SRegister>()), IP, data_offset);
       }
 
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 70327af..78ecfde 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -480,7 +480,7 @@
 class DeoptimizationSlowPathARM64 : public SlowPathCodeARM64 {
  public:
   explicit DeoptimizationSlowPathARM64(HInstruction* instruction)
-    : instruction_(instruction) {}
+      : instruction_(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     __ Bind(GetEntryLabel());
@@ -499,6 +499,52 @@
   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARM64);
 };
 
+class ArraySetSlowPathARM64 : public SlowPathCodeARM64 {
+ public:
+  explicit ArraySetSlowPathARM64(HInstruction* instruction) : instruction_(instruction) {}
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, locations);
+
+    InvokeRuntimeCallingConvention calling_convention;
+    HParallelMove parallel_move(codegen->GetGraph()->GetArena());
+    parallel_move.AddMove(
+        locations->InAt(0),
+        LocationFrom(calling_convention.GetRegisterAt(0)),
+        Primitive::kPrimNot,
+        nullptr);
+    parallel_move.AddMove(
+        locations->InAt(1),
+        LocationFrom(calling_convention.GetRegisterAt(1)),
+        Primitive::kPrimInt,
+        nullptr);
+    parallel_move.AddMove(
+        locations->InAt(2),
+        LocationFrom(calling_convention.GetRegisterAt(2)),
+        Primitive::kPrimNot,
+        nullptr);
+    codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+
+    CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
+    arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pAputObject),
+                                 instruction_,
+                                 instruction_->GetDexPc(),
+                                 this);
+    CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
+    RestoreLiveRegisters(codegen, locations);
+    __ B(GetExitLabel());
+  }
+
+  const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathARM64"; }
+
+ private:
+  HInstruction* const instruction_;
+
+  DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARM64);
+};
+
 #undef __
 
 Location InvokeDexCallingConventionVisitorARM64::GetNextLocation(Primitive::Type type) {
@@ -1560,76 +1606,136 @@
 }
 
 void LocationsBuilderARM64::VisitArraySet(HArraySet* instruction) {
-  if (instruction->NeedsTypeCheck()) {
-    LocationSummary* locations =
-        new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
-    InvokeRuntimeCallingConvention calling_convention;
-    locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
-    locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
-    locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
+      instruction,
+      instruction->NeedsTypeCheck() ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+  if (Primitive::IsFloatingPointType(instruction->InputAt(2)->GetType())) {
+    locations->SetInAt(2, Location::RequiresFpuRegister());
   } else {
-    LocationSummary* locations =
-        new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
-    locations->SetInAt(0, Location::RequiresRegister());
-    locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
-    if (Primitive::IsFloatingPointType(instruction->InputAt(2)->GetType())) {
-      locations->SetInAt(2, Location::RequiresFpuRegister());
-    } else {
-      locations->SetInAt(2, Location::RequiresRegister());
-    }
+    locations->SetInAt(2, Location::RequiresRegister());
   }
 }
 
 void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) {
   Primitive::Type value_type = instruction->GetComponentType();
   LocationSummary* locations = instruction->GetLocations();
-  bool needs_runtime_call = locations->WillCall();
+  bool may_need_runtime_call = locations->CanCall();
+  bool needs_write_barrier =
+      CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
 
-  if (needs_runtime_call) {
-    // Note: if heap poisoning is enabled, pAputObject takes cares
-    // of poisoning the reference.
-    codegen_->InvokeRuntime(
-        QUICK_ENTRY_POINT(pAputObject), instruction, instruction->GetDexPc(), nullptr);
-    CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
+  Register array = InputRegisterAt(instruction, 0);
+  CPURegister value = InputCPURegisterAt(instruction, 2);
+  CPURegister source = value;
+  Location index = locations->InAt(1);
+  size_t offset = mirror::Array::DataOffset(Primitive::ComponentSize(value_type)).Uint32Value();
+  MemOperand destination = HeapOperand(array);
+  MacroAssembler* masm = GetVIXLAssembler();
+  BlockPoolsScope block_pools(masm);
+
+  if (!needs_write_barrier) {
+    DCHECK(!may_need_runtime_call);
+    if (index.IsConstant()) {
+      offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(value_type);
+      destination = HeapOperand(array, offset);
+    } else {
+      UseScratchRegisterScope temps(masm);
+      Register temp = temps.AcquireSameSizeAs(array);
+      __ Add(temp, array, offset);
+      destination = HeapOperand(temp,
+                                XRegisterFrom(index),
+                                LSL,
+                                Primitive::ComponentSizeShift(value_type));
+    }
+    codegen_->Store(value_type, value, destination);
+    codegen_->MaybeRecordImplicitNullCheck(instruction);
   } else {
-    Register obj = InputRegisterAt(instruction, 0);
-    CPURegister value = InputCPURegisterAt(instruction, 2);
-    CPURegister source = value;
-    Location index = locations->InAt(1);
-    size_t offset = mirror::Array::DataOffset(Primitive::ComponentSize(value_type)).Uint32Value();
-    MemOperand destination = HeapOperand(obj);
-    MacroAssembler* masm = GetVIXLAssembler();
-    BlockPoolsScope block_pools(masm);
+    DCHECK(needs_write_barrier);
+    vixl::Label done;
+    SlowPathCodeARM64* slow_path = nullptr;
     {
       // We use a block to end the scratch scope before the write barrier, thus
       // freeing the temporary registers so they can be used in `MarkGCCard`.
       UseScratchRegisterScope temps(masm);
-
-      if (kPoisonHeapReferences && value_type == Primitive::kPrimNot) {
-        DCHECK(value.IsW());
-        Register temp = temps.AcquireW();
-        __ Mov(temp, value.W());
-        GetAssembler()->PoisonHeapReference(temp.W());
-        source = temp;
-      }
-
+      Register temp = temps.AcquireSameSizeAs(array);
       if (index.IsConstant()) {
         offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(value_type);
-        destination = HeapOperand(obj, offset);
+        destination = HeapOperand(array, offset);
       } else {
-        Register temp = temps.AcquireSameSizeAs(obj);
-        __ Add(temp, obj, offset);
         destination = HeapOperand(temp,
                                   XRegisterFrom(index),
                                   LSL,
                                   Primitive::ComponentSizeShift(value_type));
       }
 
-      codegen_->Store(value_type, source, destination);
-      codegen_->MaybeRecordImplicitNullCheck(instruction);
+      uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+      uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
+      uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
+
+      if (may_need_runtime_call) {
+        slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathARM64(instruction);
+        codegen_->AddSlowPath(slow_path);
+        if (instruction->GetValueCanBeNull()) {
+          vixl::Label non_zero;
+          __ Cbnz(Register(value), &non_zero);
+          if (!index.IsConstant()) {
+            __ Add(temp, array, offset);
+          }
+          __ Str(wzr, destination);
+          codegen_->MaybeRecordImplicitNullCheck(instruction);
+          __ B(&done);
+          __ Bind(&non_zero);
+        }
+
+        Register temp2 = temps.AcquireSameSizeAs(array);
+        __ Ldr(temp, HeapOperand(array, class_offset));
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
+        GetAssembler()->MaybeUnpoisonHeapReference(temp);
+        __ Ldr(temp, HeapOperand(temp, component_offset));
+        __ Ldr(temp2, HeapOperand(Register(value), class_offset));
+        // No need to poison/unpoison, we're comparing two poisoned references.
+        __ Cmp(temp, temp2);
+        if (instruction->StaticTypeOfArrayIsObjectArray()) {
+          vixl::Label do_put;
+          __ B(eq, &do_put);
+          GetAssembler()->MaybeUnpoisonHeapReference(temp);
+          __ Ldr(temp, HeapOperand(temp, super_offset));
+          // No need to unpoison, we're comparing against null.
+          __ Cbnz(temp, slow_path->GetEntryLabel());
+          __ Bind(&do_put);
+        } else {
+          __ B(ne, slow_path->GetEntryLabel());
+        }
+        temps.Release(temp2);
+      }
+
+      if (kPoisonHeapReferences) {
+        Register temp2 = temps.AcquireSameSizeAs(array);
+          DCHECK(value.IsW());
+        __ Mov(temp2, value.W());
+        GetAssembler()->PoisonHeapReference(temp2);
+        source = temp2;
+      }
+
+      if (!index.IsConstant()) {
+        __ Add(temp, array, offset);
+      }
+      __ Str(source, destination);
+
+      if (!may_need_runtime_call) {
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
+      }
     }
-    if (CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue())) {
-      codegen_->MarkGCCard(obj, value.W(), instruction->GetValueCanBeNull());
+
+    codegen_->MarkGCCard(array, value.W(), instruction->GetValueCanBeNull());
+
+    if (done.IsLinked()) {
+      __ Bind(&done);
+    }
+
+    if (slow_path != nullptr) {
+      __ Bind(slow_path->GetExitLabel());
     }
   }
 }
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index c9f8493..ad0a39c 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -40,9 +40,6 @@
 // We need extra temporary/scratch registers (in addition to AT) in some cases.
 static constexpr FpuRegister FTMP = F8;
 
-// ART Thread Register.
-static constexpr GpuRegister TR = S1;
-
 Location Mips64ReturnLocation(Primitive::Type return_type) {
   switch (return_type) {
     case Primitive::kPrimBoolean:
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index a47a95e..3d97132 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -380,6 +380,51 @@
   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86);
 };
 
+class ArraySetSlowPathX86 : public SlowPathCode {
+ public:
+  explicit ArraySetSlowPathX86(HInstruction* instruction) : instruction_(instruction) {}
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, locations);
+
+    InvokeRuntimeCallingConvention calling_convention;
+    HParallelMove parallel_move(codegen->GetGraph()->GetArena());
+    parallel_move.AddMove(
+        locations->InAt(0),
+        Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+        Primitive::kPrimNot,
+        nullptr);
+    parallel_move.AddMove(
+        locations->InAt(1),
+        Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
+        Primitive::kPrimInt,
+        nullptr);
+    parallel_move.AddMove(
+        locations->InAt(2),
+        Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
+        Primitive::kPrimNot,
+        nullptr);
+    codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+
+    CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
+    x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pAputObject),
+                               instruction_,
+                               instruction_->GetDexPc(),
+                               this);
+    RestoreLiveRegisters(codegen, locations);
+    __ jmp(GetExitLabel());
+  }
+
+  const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathX86"; }
+
+ private:
+  HInstruction* const instruction_;
+
+  DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86);
+};
+
 #undef __
 #define __ down_cast<X86Assembler*>(GetAssembler())->
 
@@ -4245,72 +4290,59 @@
   bool needs_write_barrier =
       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
 
-  bool needs_runtime_call = instruction->NeedsTypeCheck();
+  bool may_need_runtime_call = instruction->NeedsTypeCheck();
 
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
       instruction,
-      needs_runtime_call ? LocationSummary::kCall : LocationSummary::kNoCall);
+      may_need_runtime_call ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall);
 
-  if (needs_runtime_call) {
-    InvokeRuntimeCallingConvention calling_convention;
-    locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
-    locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
-    locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
+  bool is_byte_type = (value_type == Primitive::kPrimBoolean)
+      || (value_type == Primitive::kPrimByte);
+  // We need the inputs to be different than the output in case of long operation.
+  // In case of a byte operation, the register allocator does not support multiple
+  // inputs that die at entry with one in a specific register.
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+  if (is_byte_type) {
+    // Ensure the value is in a byte register.
+    locations->SetInAt(2, Location::ByteRegisterOrConstant(EAX, instruction->InputAt(2)));
+  } else if (Primitive::IsFloatingPointType(value_type)) {
+    locations->SetInAt(2, Location::RequiresFpuRegister());
   } else {
-    bool is_byte_type = (value_type == Primitive::kPrimBoolean)
-        || (value_type == Primitive::kPrimByte);
-    // We need the inputs to be different than the output in case of long operation.
-    // In case of a byte operation, the register allocator does not support multiple
-    // inputs that die at entry with one in a specific register.
-    locations->SetInAt(0, Location::RequiresRegister());
-    locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
-    if (is_byte_type) {
-      // Ensure the value is in a byte register.
-      locations->SetInAt(2, Location::ByteRegisterOrConstant(EAX, instruction->InputAt(2)));
-    } else if (Primitive::IsFloatingPointType(value_type)) {
-      locations->SetInAt(2, Location::RequiresFpuRegister());
-    } else {
-      locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
-    }
-    if (needs_write_barrier) {
-      // Temporary registers for the write barrier.
-      locations->AddTemp(Location::RequiresRegister());  // Possibly used for ref. poisoning too.
-      // Ensure the card is in a byte register.
-      locations->AddTemp(Location::RegisterLocation(ECX));
-    }
+    locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
+  }
+  if (needs_write_barrier) {
+    // Temporary registers for the write barrier.
+    locations->AddTemp(Location::RequiresRegister());  // Possibly used for ref. poisoning too.
+    // Ensure the card is in a byte register.
+    locations->AddTemp(Location::RegisterLocation(ECX));
   }
 }
 
 void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  Register obj = locations->InAt(0).AsRegister<Register>();
+  Register array = locations->InAt(0).AsRegister<Register>();
   Location index = locations->InAt(1);
   Location value = locations->InAt(2);
   Primitive::Type value_type = instruction->GetComponentType();
-  bool needs_runtime_call = locations->WillCall();
+  uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+  uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
+  uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
+  bool may_need_runtime_call = locations->CanCall();
   bool needs_write_barrier =
       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
 
   switch (value_type) {
     case Primitive::kPrimBoolean:
     case Primitive::kPrimByte: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
-      if (index.IsConstant()) {
-        size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
-        if (value.IsRegister()) {
-          __ movb(Address(obj, offset), value.AsRegister<ByteRegister>());
-        } else {
-          __ movb(Address(obj, offset),
-                  Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
-        }
+      uint32_t offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
+      Address address = index.IsConstant()
+          ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + offset)
+          : Address(array, index.AsRegister<Register>(), TIMES_1, offset);
+      if (value.IsRegister()) {
+        __ movb(address, value.AsRegister<ByteRegister>());
       } else {
-        if (value.IsRegister()) {
-          __ movb(Address(obj, index.AsRegister<Register>(), TIMES_1, data_offset),
-                  value.AsRegister<ByteRegister>());
-        } else {
-          __ movb(Address(obj, index.AsRegister<Register>(), TIMES_1, data_offset),
-                  Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
-        }
+        __ movb(address, Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
       }
       codegen_->MaybeRecordImplicitNullCheck(instruction);
       break;
@@ -4318,93 +4350,106 @@
 
     case Primitive::kPrimShort:
     case Primitive::kPrimChar: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
-      if (index.IsConstant()) {
-        size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
-        if (value.IsRegister()) {
-          __ movw(Address(obj, offset), value.AsRegister<Register>());
-        } else {
-          __ movw(Address(obj, offset),
-                  Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
-        }
+      uint32_t offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
+      Address address = index.IsConstant()
+          ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + offset)
+          : Address(array, index.AsRegister<Register>(), TIMES_2, offset);
+      if (value.IsRegister()) {
+        __ movw(address, value.AsRegister<Register>());
       } else {
-        if (value.IsRegister()) {
-          __ movw(Address(obj, index.AsRegister<Register>(), TIMES_2, data_offset),
-                  value.AsRegister<Register>());
-        } else {
-          __ movw(Address(obj, index.AsRegister<Register>(), TIMES_2, data_offset),
-                  Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
-        }
+        __ movw(address, Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
       }
       codegen_->MaybeRecordImplicitNullCheck(instruction);
       break;
     }
 
-    case Primitive::kPrimInt:
     case Primitive::kPrimNot: {
-      if (!needs_runtime_call) {
-        uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
-        if (index.IsConstant()) {
-          size_t offset =
-              (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
-          if (value.IsRegister()) {
-            if (kPoisonHeapReferences && value_type == Primitive::kPrimNot) {
-              Register temp = locations->GetTemp(0).AsRegister<Register>();
-              __ movl(temp, value.AsRegister<Register>());
-              __ PoisonHeapReference(temp);
-              __ movl(Address(obj, offset), temp);
-            } else {
-              __ movl(Address(obj, offset), value.AsRegister<Register>());
-            }
-          } else {
-            DCHECK(value.IsConstant()) << value;
-            int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
-            // `value_type == Primitive::kPrimNot` implies `v == 0`.
-            DCHECK((value_type != Primitive::kPrimNot) || (v == 0));
-            // Note: if heap poisoning is enabled, no need to poison
-            // (negate) `v` if it is a reference, as it would be null.
-            __ movl(Address(obj, offset), Immediate(v));
-          }
-        } else {
-          DCHECK(index.IsRegister()) << index;
-          if (value.IsRegister()) {
-            if (kPoisonHeapReferences && value_type == Primitive::kPrimNot) {
-              Register temp = locations->GetTemp(0).AsRegister<Register>();
-              __ movl(temp, value.AsRegister<Register>());
-              __ PoisonHeapReference(temp);
-              __ movl(Address(obj, index.AsRegister<Register>(), TIMES_4, data_offset), temp);
-            } else {
-              __ movl(Address(obj, index.AsRegister<Register>(), TIMES_4, data_offset),
-                      value.AsRegister<Register>());
-            }
-          } else {
-            DCHECK(value.IsConstant()) << value;
-            int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
-            // `value_type == Primitive::kPrimNot` implies `v == 0`.
-            DCHECK((value_type != Primitive::kPrimNot) || (v == 0));
-            // Note: if heap poisoning is enabled, no need to poison
-            // (negate) `v` if it is a reference, as it would be null.
-            __ movl(Address(obj, index.AsRegister<Register>(), TIMES_4, data_offset), Immediate(v));
-          }
-        }
+      uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
+      Address address = index.IsConstant()
+          ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset)
+          : Address(array, index.AsRegister<Register>(), TIMES_4, offset);
+      if (!value.IsRegister()) {
+        // Just setting null.
+        DCHECK(instruction->InputAt(2)->IsNullConstant());
+        DCHECK(value.IsConstant()) << value;
+        __ movl(address, Immediate(0));
         codegen_->MaybeRecordImplicitNullCheck(instruction);
-
-        if (needs_write_barrier) {
-          Register temp = locations->GetTemp(0).AsRegister<Register>();
-          Register card = locations->GetTemp(1).AsRegister<Register>();
-          codegen_->MarkGCCard(
-              temp, card, obj, value.AsRegister<Register>(), instruction->GetValueCanBeNull());
-        }
-      } else {
-        DCHECK_EQ(value_type, Primitive::kPrimNot);
-        DCHECK(!codegen_->IsLeafMethod());
-        // Note: if heap poisoning is enabled, pAputObject takes cares
-        // of poisoning the reference.
-        codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pAputObject),
-                                instruction,
-                                instruction->GetDexPc(),
-                                nullptr);
+        DCHECK(!needs_write_barrier);
+        DCHECK(!may_need_runtime_call);
+        break;
       }
+
+      DCHECK(needs_write_barrier);
+      Register register_value = value.AsRegister<Register>();
+      NearLabel done, not_null, do_put;
+      SlowPathCode* slow_path = nullptr;
+      Register temp = locations->GetTemp(0).AsRegister<Register>();
+      if (may_need_runtime_call) {
+        slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathX86(instruction);
+        codegen_->AddSlowPath(slow_path);
+        if (instruction->GetValueCanBeNull()) {
+          __ testl(register_value, register_value);
+          __ j(kNotEqual, &not_null);
+          __ movl(address, Immediate(0));
+          codegen_->MaybeRecordImplicitNullCheck(instruction);
+          __ jmp(&done);
+          __ Bind(&not_null);
+        }
+
+        __ movl(temp, Address(array, class_offset));
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
+        __ MaybeUnpoisonHeapReference(temp);
+        __ movl(temp, Address(temp, component_offset));
+        // No need to poison/unpoison, we're comparing two poisoned references.
+        __ cmpl(temp, Address(register_value, class_offset));
+        if (instruction->StaticTypeOfArrayIsObjectArray()) {
+          __ j(kEqual, &do_put);
+          __ MaybeUnpoisonHeapReference(temp);
+          __ movl(temp, Address(temp, super_offset));
+          // No need to unpoison, we're comparing against null..
+          __ testl(temp, temp);
+          __ j(kNotEqual, slow_path->GetEntryLabel());
+          __ Bind(&do_put);
+        } else {
+          __ j(kNotEqual, slow_path->GetEntryLabel());
+        }
+      }
+
+      if (kPoisonHeapReferences) {
+        __ movl(temp, register_value);
+        __ PoisonHeapReference(temp);
+        __ movl(address, temp);
+      } else {
+        __ movl(address, register_value);
+      }
+      if (!may_need_runtime_call) {
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
+      }
+
+      Register card = locations->GetTemp(1).AsRegister<Register>();
+      codegen_->MarkGCCard(
+          temp, card, array, value.AsRegister<Register>(), instruction->GetValueCanBeNull());
+      __ Bind(&done);
+
+      if (slow_path != nullptr) {
+        __ Bind(slow_path->GetExitLabel());
+      }
+
+      break;
+    }
+    case Primitive::kPrimInt: {
+      uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
+      Address address = index.IsConstant()
+          ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset)
+          : Address(array, index.AsRegister<Register>(), TIMES_4, offset);
+      if (value.IsRegister()) {
+        __ movl(address, value.AsRegister<Register>());
+      } else {
+        DCHECK(value.IsConstant()) << value;
+        int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
+        __ movl(address, Immediate(v));
+      }
+      codegen_->MaybeRecordImplicitNullCheck(instruction);
       break;
     }
 
@@ -4413,30 +4458,30 @@
       if (index.IsConstant()) {
         size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
         if (value.IsRegisterPair()) {
-          __ movl(Address(obj, offset), value.AsRegisterPairLow<Register>());
+          __ movl(Address(array, offset), value.AsRegisterPairLow<Register>());
           codegen_->MaybeRecordImplicitNullCheck(instruction);
-          __ movl(Address(obj, offset + kX86WordSize), value.AsRegisterPairHigh<Register>());
+          __ movl(Address(array, offset + kX86WordSize), value.AsRegisterPairHigh<Register>());
         } else {
           DCHECK(value.IsConstant());
           int64_t val = value.GetConstant()->AsLongConstant()->GetValue();
-          __ movl(Address(obj, offset), Immediate(Low32Bits(val)));
+          __ movl(Address(array, offset), Immediate(Low32Bits(val)));
           codegen_->MaybeRecordImplicitNullCheck(instruction);
-          __ movl(Address(obj, offset + kX86WordSize), Immediate(High32Bits(val)));
+          __ movl(Address(array, offset + kX86WordSize), Immediate(High32Bits(val)));
         }
       } else {
         if (value.IsRegisterPair()) {
-          __ movl(Address(obj, index.AsRegister<Register>(), TIMES_8, data_offset),
+          __ movl(Address(array, index.AsRegister<Register>(), TIMES_8, data_offset),
                   value.AsRegisterPairLow<Register>());
           codegen_->MaybeRecordImplicitNullCheck(instruction);
-          __ movl(Address(obj, index.AsRegister<Register>(), TIMES_8, data_offset + kX86WordSize),
+          __ movl(Address(array, index.AsRegister<Register>(), TIMES_8, data_offset + kX86WordSize),
                   value.AsRegisterPairHigh<Register>());
         } else {
           DCHECK(value.IsConstant());
           int64_t val = value.GetConstant()->AsLongConstant()->GetValue();
-          __ movl(Address(obj, index.AsRegister<Register>(), TIMES_8, data_offset),
+          __ movl(Address(array, index.AsRegister<Register>(), TIMES_8, data_offset),
                   Immediate(Low32Bits(val)));
           codegen_->MaybeRecordImplicitNullCheck(instruction);
-          __ movl(Address(obj, index.AsRegister<Register>(), TIMES_8, data_offset + kX86WordSize),
+          __ movl(Address(array, index.AsRegister<Register>(), TIMES_8, data_offset + kX86WordSize),
                   Immediate(High32Bits(val)));
         }
       }
@@ -4444,28 +4489,22 @@
     }
 
     case Primitive::kPrimFloat: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
+      uint32_t offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
+      Address address = index.IsConstant()
+          ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset)
+          : Address(array, index.AsRegister<Register>(), TIMES_4, offset);
       DCHECK(value.IsFpuRegister());
-      if (index.IsConstant()) {
-        size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
-        __ movss(Address(obj, offset), value.AsFpuRegister<XmmRegister>());
-      } else {
-        __ movss(Address(obj, index.AsRegister<Register>(), TIMES_4, data_offset),
-                value.AsFpuRegister<XmmRegister>());
-      }
+      __ movss(address, value.AsFpuRegister<XmmRegister>());
       break;
     }
 
     case Primitive::kPrimDouble: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
+      uint32_t offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
+      Address address = index.IsConstant()
+          ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + offset)
+          : Address(array, index.AsRegister<Register>(), TIMES_8, offset);
       DCHECK(value.IsFpuRegister());
-      if (index.IsConstant()) {
-        size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
-        __ movsd(Address(obj, offset), value.AsFpuRegister<XmmRegister>());
-      } else {
-        __ movsd(Address(obj, index.AsRegister<Register>(), TIMES_8, data_offset),
-                value.AsFpuRegister<XmmRegister>());
-      }
+      __ movsd(address, value.AsFpuRegister<XmmRegister>());
       break;
     }
 
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index b845a27..6ea6138 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -396,6 +396,51 @@
   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86_64);
 };
 
+class ArraySetSlowPathX86_64 : public SlowPathCode {
+ public:
+  explicit ArraySetSlowPathX86_64(HInstruction* instruction) : instruction_(instruction) {}
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, locations);
+
+    InvokeRuntimeCallingConvention calling_convention;
+    HParallelMove parallel_move(codegen->GetGraph()->GetArena());
+    parallel_move.AddMove(
+        locations->InAt(0),
+        Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+        Primitive::kPrimNot,
+        nullptr);
+    parallel_move.AddMove(
+        locations->InAt(1),
+        Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
+        Primitive::kPrimInt,
+        nullptr);
+    parallel_move.AddMove(
+        locations->InAt(2),
+        Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
+        Primitive::kPrimNot,
+        nullptr);
+    codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+
+    CodeGeneratorX86_64* x64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
+    x64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pAputObject),
+                               instruction_,
+                               instruction_->GetDexPc(),
+                               this);
+    RestoreLiveRegisters(codegen, locations);
+    __ jmp(GetExitLabel());
+  }
+
+  const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathX86_64"; }
+
+ private:
+  HInstruction* const instruction_;
+
+  DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86_64);
+};
+
 #undef __
 #define __ down_cast<X86_64Assembler*>(GetAssembler())->
 
@@ -3992,66 +4037,55 @@
 
   bool needs_write_barrier =
       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
-  bool needs_runtime_call = instruction->NeedsTypeCheck();
+  bool may_need_runtime_call = instruction->NeedsTypeCheck();
 
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
-      instruction, needs_runtime_call ? LocationSummary::kCall : LocationSummary::kNoCall);
-  if (needs_runtime_call) {
-    InvokeRuntimeCallingConvention calling_convention;
-    locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
-    locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
-    locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
-  } else {
-    locations->SetInAt(0, Location::RequiresRegister());
-    locations->SetInAt(
-        1, Location::RegisterOrConstant(instruction->InputAt(1)));
-    locations->SetInAt(2, Location::RequiresRegister());
-    if (value_type == Primitive::kPrimLong) {
-      locations->SetInAt(2, Location::RegisterOrInt32LongConstant(instruction->InputAt(2)));
-    } else if (value_type == Primitive::kPrimFloat || value_type == Primitive::kPrimDouble) {
-      locations->SetInAt(2, Location::RequiresFpuRegister());
-    } else {
-      locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
-    }
+      instruction,
+      may_need_runtime_call ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall);
 
-    if (needs_write_barrier) {
-      // Temporary registers for the write barrier.
-      locations->AddTemp(Location::RequiresRegister());  // Possibly used for ref. poisoning too.
-      locations->AddTemp(Location::RequiresRegister());
-    }
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(
+      1, Location::RegisterOrConstant(instruction->InputAt(1)));
+  locations->SetInAt(2, Location::RequiresRegister());
+  if (value_type == Primitive::kPrimLong) {
+    locations->SetInAt(2, Location::RegisterOrInt32LongConstant(instruction->InputAt(2)));
+  } else if (value_type == Primitive::kPrimFloat || value_type == Primitive::kPrimDouble) {
+    locations->SetInAt(2, Location::RequiresFpuRegister());
+  } else {
+    locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
+  }
+
+  if (needs_write_barrier) {
+    // Temporary registers for the write barrier.
+    locations->AddTemp(Location::RequiresRegister());  // Possibly used for ref. poisoning too.
+    locations->AddTemp(Location::RequiresRegister());
   }
 }
 
 void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
+  CpuRegister array = locations->InAt(0).AsRegister<CpuRegister>();
   Location index = locations->InAt(1);
   Location value = locations->InAt(2);
   Primitive::Type value_type = instruction->GetComponentType();
-  bool needs_runtime_call = locations->WillCall();
+  bool may_need_runtime_call = locations->CanCall();
   bool needs_write_barrier =
       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
+  uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+  uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
+  uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
 
   switch (value_type) {
     case Primitive::kPrimBoolean:
     case Primitive::kPrimByte: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
-      if (index.IsConstant()) {
-        size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
-        if (value.IsRegister()) {
-          __ movb(Address(obj, offset), value.AsRegister<CpuRegister>());
-        } else {
-          __ movb(Address(obj, offset),
-                  Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
-        }
+      uint32_t offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
+      Address address = index.IsConstant()
+          ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + offset)
+          : Address(array, index.AsRegister<CpuRegister>(), TIMES_1, offset);
+      if (value.IsRegister()) {
+        __ movb(address, value.AsRegister<CpuRegister>());
       } else {
-        if (value.IsRegister()) {
-          __ movb(Address(obj, index.AsRegister<CpuRegister>(), TIMES_1, data_offset),
-                  value.AsRegister<CpuRegister>());
-        } else {
-          __ movb(Address(obj, index.AsRegister<CpuRegister>(), TIMES_1, data_offset),
-                  Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
-        }
+        __ movb(address, Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
       }
       codegen_->MaybeRecordImplicitNullCheck(instruction);
       break;
@@ -4059,154 +4093,145 @@
 
     case Primitive::kPrimShort:
     case Primitive::kPrimChar: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
-      if (index.IsConstant()) {
-        size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
-        if (value.IsRegister()) {
-          __ movw(Address(obj, offset), value.AsRegister<CpuRegister>());
-        } else {
-          DCHECK(value.IsConstant()) << value;
-          __ movw(Address(obj, offset),
-                  Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
-        }
+      uint32_t offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
+      Address address = index.IsConstant()
+          ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + offset)
+          : Address(array, index.AsRegister<CpuRegister>(), TIMES_2, offset);
+      if (value.IsRegister()) {
+        __ movw(address, value.AsRegister<CpuRegister>());
       } else {
-        DCHECK(index.IsRegister()) << index;
-        if (value.IsRegister()) {
-          __ movw(Address(obj, index.AsRegister<CpuRegister>(), TIMES_2, data_offset),
-                  value.AsRegister<CpuRegister>());
-        } else {
-          DCHECK(value.IsConstant()) << value;
-          __ movw(Address(obj, index.AsRegister<CpuRegister>(), TIMES_2, data_offset),
-                  Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
-        }
+        DCHECK(value.IsConstant()) << value;
+        __ movw(address, Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
       }
       codegen_->MaybeRecordImplicitNullCheck(instruction);
       break;
     }
 
-    case Primitive::kPrimInt:
     case Primitive::kPrimNot: {
-      if (!needs_runtime_call) {
-        uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
-        if (index.IsConstant()) {
-          size_t offset =
-              (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
-          if (value.IsRegister()) {
-            if (kPoisonHeapReferences && value_type == Primitive::kPrimNot) {
-              CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
-              __ movl(temp, value.AsRegister<CpuRegister>());
-              __ PoisonHeapReference(temp);
-              __ movl(Address(obj, offset), temp);
-            } else {
-              __ movl(Address(obj, offset), value.AsRegister<CpuRegister>());
-            }
-          } else {
-            DCHECK(value.IsConstant()) << value;
-            int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
-            // `value_type == Primitive::kPrimNot` implies `v == 0`.
-            DCHECK((value_type != Primitive::kPrimNot) || (v == 0));
-            // Note: if heap poisoning is enabled, no need to poison
-            // (negate) `v` if it is a reference, as it would be null.
-            __ movl(Address(obj, offset), Immediate(v));
-          }
-        } else {
-          DCHECK(index.IsRegister()) << index;
-          if (value.IsRegister()) {
-            if (kPoisonHeapReferences && value_type == Primitive::kPrimNot) {
-              CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
-              __ movl(temp, value.AsRegister<CpuRegister>());
-              __ PoisonHeapReference(temp);
-              __ movl(Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset), temp);
-            } else {
-              __ movl(Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset),
-                      value.AsRegister<CpuRegister>());
-            }
-          } else {
-            DCHECK(value.IsConstant()) << value;
-            int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
-            // `value_type == Primitive::kPrimNot` implies `v == 0`.
-            DCHECK((value_type != Primitive::kPrimNot) || (v == 0));
-            // Note: if heap poisoning is enabled, no need to poison
-            // (negate) `v` if it is a reference, as it would be null.
-            __ movl(Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset),
-                    Immediate(v));
-          }
-        }
+      uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
+      Address address = index.IsConstant()
+          ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset)
+          : Address(array, index.AsRegister<CpuRegister>(), TIMES_4, offset);
+      if (!value.IsRegister()) {
+        // Just setting null.
+        DCHECK(instruction->InputAt(2)->IsNullConstant());
+        DCHECK(value.IsConstant()) << value;
+        __ movl(address, Immediate(0));
         codegen_->MaybeRecordImplicitNullCheck(instruction);
-        if (needs_write_barrier) {
-          DCHECK_EQ(value_type, Primitive::kPrimNot);
-          CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
-          CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>();
-          codegen_->MarkGCCard(
-              temp, card, obj, value.AsRegister<CpuRegister>(), instruction->GetValueCanBeNull());
-        }
-      } else {
-        DCHECK_EQ(value_type, Primitive::kPrimNot);
-        // Note: if heap poisoning is enabled, pAputObject takes cares
-        // of poisoning the reference.
-        codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pAputObject),
-                                instruction,
-                                instruction->GetDexPc(),
-                                nullptr);
-        DCHECK(!codegen_->IsLeafMethod());
+        DCHECK(!needs_write_barrier);
+        DCHECK(!may_need_runtime_call);
+        break;
       }
+
+      DCHECK(needs_write_barrier);
+      CpuRegister register_value = value.AsRegister<CpuRegister>();
+      NearLabel done, not_null, do_put;
+      SlowPathCode* slow_path = nullptr;
+      CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
+      if (may_need_runtime_call) {
+        slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathX86_64(instruction);
+        codegen_->AddSlowPath(slow_path);
+        if (instruction->GetValueCanBeNull()) {
+          __ testl(register_value, register_value);
+          __ j(kNotEqual, &not_null);
+          __ movl(address, Immediate(0));
+          codegen_->MaybeRecordImplicitNullCheck(instruction);
+          __ jmp(&done);
+          __ Bind(&not_null);
+        }
+
+        __ movl(temp, Address(array, class_offset));
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
+        __ MaybeUnpoisonHeapReference(temp);
+        __ movl(temp, Address(temp, component_offset));
+        // No need to poison/unpoison, we're comparing two poisoned references.
+        __ cmpl(temp, Address(register_value, class_offset));
+        if (instruction->StaticTypeOfArrayIsObjectArray()) {
+          __ j(kEqual, &do_put);
+          __ MaybeUnpoisonHeapReference(temp);
+          __ movl(temp, Address(temp, super_offset));
+          // No need to unpoison the result, we're comparing against null.
+          __ testl(temp, temp);
+          __ j(kNotEqual, slow_path->GetEntryLabel());
+          __ Bind(&do_put);
+        } else {
+          __ j(kNotEqual, slow_path->GetEntryLabel());
+        }
+      }
+
+      if (kPoisonHeapReferences) {
+        __ movl(temp, register_value);
+        __ PoisonHeapReference(temp);
+        __ movl(address, temp);
+      } else {
+        __ movl(address, register_value);
+      }
+      if (!may_need_runtime_call) {
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
+      }
+
+      CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>();
+      codegen_->MarkGCCard(
+          temp, card, array, value.AsRegister<CpuRegister>(), instruction->GetValueCanBeNull());
+      __ Bind(&done);
+
+      if (slow_path != nullptr) {
+        __ Bind(slow_path->GetExitLabel());
+      }
+
+      break;
+    }
+    case Primitive::kPrimInt: {
+      uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
+      Address address = index.IsConstant()
+          ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset)
+          : Address(array, index.AsRegister<CpuRegister>(), TIMES_4, offset);
+      if (value.IsRegister()) {
+        __ movl(address, value.AsRegister<CpuRegister>());
+      } else {
+        DCHECK(value.IsConstant()) << value;
+        int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
+        __ movl(address, Immediate(v));
+      }
+      codegen_->MaybeRecordImplicitNullCheck(instruction);
       break;
     }
 
     case Primitive::kPrimLong: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
-      if (index.IsConstant()) {
-        size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
-        if (value.IsRegister()) {
-          __ movq(Address(obj, offset), value.AsRegister<CpuRegister>());
-        } else {
-          int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
-          DCHECK(IsInt<32>(v));
-          int32_t v_32 = v;
-          __ movq(Address(obj, offset), Immediate(v_32));
-        }
+      uint32_t offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
+      Address address = index.IsConstant()
+          ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + offset)
+          : Address(array, index.AsRegister<CpuRegister>(), TIMES_8, offset);
+      if (value.IsRegister()) {
+        __ movq(address, value.AsRegister<CpuRegister>());
       } else {
-        if (value.IsRegister()) {
-          __ movq(Address(obj, index.AsRegister<CpuRegister>(), TIMES_8, data_offset),
-                  value.AsRegister<CpuRegister>());
-        } else {
-          int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
-          DCHECK(IsInt<32>(v));
-          int32_t v_32 = v;
-          __ movq(Address(obj, index.AsRegister<CpuRegister>(), TIMES_8, data_offset),
-                  Immediate(v_32));
-        }
+        int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
+        DCHECK(IsInt<32>(v));
+        int32_t v_32 = v;
+        __ movq(address, Immediate(v_32));
       }
       codegen_->MaybeRecordImplicitNullCheck(instruction);
       break;
     }
 
     case Primitive::kPrimFloat: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
-      if (index.IsConstant()) {
-        size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
-        DCHECK(value.IsFpuRegister());
-        __ movss(Address(obj, offset), value.AsFpuRegister<XmmRegister>());
-      } else {
-        DCHECK(value.IsFpuRegister());
-        __ movss(Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset),
-                value.AsFpuRegister<XmmRegister>());
-      }
+      uint32_t offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
+      Address address = index.IsConstant()
+          ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset)
+          : Address(array, index.AsRegister<CpuRegister>(), TIMES_4, offset);
+      DCHECK(value.IsFpuRegister());
+      __ movss(address, value.AsFpuRegister<XmmRegister>());
       codegen_->MaybeRecordImplicitNullCheck(instruction);
       break;
     }
 
     case Primitive::kPrimDouble: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
-      if (index.IsConstant()) {
-        size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
-        DCHECK(value.IsFpuRegister());
-        __ movsd(Address(obj, offset), value.AsFpuRegister<XmmRegister>());
-      } else {
-        DCHECK(value.IsFpuRegister());
-        __ movsd(Address(obj, index.AsRegister<CpuRegister>(), TIMES_8, data_offset),
-                value.AsFpuRegister<XmmRegister>());
-      }
+      uint32_t offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
+      Address address = index.IsConstant()
+          ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + offset)
+          : Address(array, index.AsRegister<CpuRegister>(), TIMES_8, offset);
+      DCHECK(value.IsFpuRegister());
+      __ movsd(address, value.AsFpuRegister<XmmRegister>());
       codegen_->MaybeRecordImplicitNullCheck(instruction);
       break;
     }
@@ -4250,7 +4275,7 @@
   Location index_loc = locations->InAt(0);
   Location length_loc = locations->InAt(1);
   SlowPathCode* slow_path =
-    new (GetGraph()->GetArena()) BoundsCheckSlowPathX86_64(instruction);
+      new (GetGraph()->GetArena()) BoundsCheckSlowPathX86_64(instruction);
 
   if (length_loc.IsConstant()) {
     int32_t length = CodeGenerator::GetInt32ValueOf(length_loc.GetConstant());
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index 22bca2f..3287a0a 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -431,19 +431,41 @@
   HInstruction* value = instruction->GetValue();
   if (value->GetType() != Primitive::kPrimNot) return;
 
+  if (CanEnsureNotNullAt(value, instruction)) {
+    instruction->ClearValueCanBeNull();
+  }
+
   if (value->IsArrayGet()) {
     if (value->AsArrayGet()->GetArray() == instruction->GetArray()) {
       // If the code is just swapping elements in the array, no need for a type check.
       instruction->ClearNeedsTypeCheck();
+      return;
     }
   }
 
   if (value->IsNullConstant()) {
     instruction->ClearNeedsTypeCheck();
+    return;
   }
 
-  if (CanEnsureNotNullAt(value, instruction)) {
-    instruction->ClearValueCanBeNull();
+  ScopedObjectAccess soa(Thread::Current());
+  ReferenceTypeInfo array_rti = instruction->GetArray()->GetReferenceTypeInfo();
+  ReferenceTypeInfo value_rti = value->GetReferenceTypeInfo();
+  if (!array_rti.IsValid()) {
+    return;
+  }
+
+  if (value_rti.IsValid() && array_rti.CanArrayHold(value_rti)) {
+    instruction->ClearNeedsTypeCheck();
+    return;
+  }
+
+  if (array_rti.IsObjectArray()) {
+    if (array_rti.IsExact()) {
+      instruction->ClearNeedsTypeCheck();
+      return;
+    }
+    instruction->SetStaticTypeOfArrayIsObjectArray();
   }
 }
 
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index 52e2cbe..1b4d161 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -725,6 +725,24 @@
   __ Sd(val, adr, 0);
 }
 
+// Thread java.lang.Thread.currentThread()
+void IntrinsicLocationsBuilderMIPS64::VisitThreadCurrentThread(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kNoCall,
+                                                            kIntrinsified);
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitThreadCurrentThread(HInvoke* invoke) {
+  Mips64Assembler* assembler = GetAssembler();
+  GpuRegister out = invoke->GetLocations()->Out().AsRegister<GpuRegister>();
+
+  __ LoadFromOffset(kLoadUnsignedWord,
+                    out,
+                    TR,
+                    Thread::PeerOffset<kMips64PointerSize>().Int32Value());
+}
+
 // Unimplemented intrinsics.
 
 #define UNIMPLEMENTED_INTRINSIC(Name)                                                  \
@@ -736,7 +754,6 @@
 UNIMPLEMENTED_INTRINSIC(MathRoundDouble)
 UNIMPLEMENTED_INTRINSIC(MathRoundFloat)
 
-UNIMPLEMENTED_INTRINSIC(ThreadCurrentThread)
 UNIMPLEMENTED_INTRINSIC(UnsafeGet)
 UNIMPLEMENTED_INTRINSIC(UnsafeGetVolatile)
 UNIMPLEMENTED_INTRINSIC(UnsafeGetLong)
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 486968c..d52f592 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -1644,17 +1644,34 @@
   bool IsValid() const SHARED_REQUIRES(Locks::mutator_lock_) {
     return IsValidHandle(type_handle_);
   }
+
   bool IsExact() const { return is_exact_; }
 
   bool IsObjectClass() const SHARED_REQUIRES(Locks::mutator_lock_) {
     DCHECK(IsValid());
     return GetTypeHandle()->IsObjectClass();
   }
+
+  bool IsObjectArray() const SHARED_REQUIRES(Locks::mutator_lock_) {
+    DCHECK(IsValid());
+    return IsArrayClass() && GetTypeHandle()->GetComponentType()->IsObjectClass();
+  }
+
   bool IsInterface() const SHARED_REQUIRES(Locks::mutator_lock_) {
     DCHECK(IsValid());
     return GetTypeHandle()->IsInterface();
   }
 
+  bool IsArrayClass() const SHARED_REQUIRES(Locks::mutator_lock_) {
+    return GetTypeHandle()->IsArrayClass();
+  }
+
+  bool CanArrayHold(ReferenceTypeInfo rti)  const SHARED_REQUIRES(Locks::mutator_lock_) {
+    if (!IsExact()) return false;
+    if (!IsArrayClass()) return false;
+    return GetTypeHandle()->GetComponentType()->IsAssignableFrom(rti.GetTypeHandle().Get());
+  }
+
   Handle<mirror::Class> GetTypeHandle() const { return type_handle_; }
 
   bool IsSupertypeOf(ReferenceTypeInfo rti) const SHARED_REQUIRES(Locks::mutator_lock_) {
@@ -2222,7 +2239,9 @@
  public:
   int32_t GetValue() const { return value_; }
 
-  uint64_t GetValueAsUint64() const OVERRIDE { return static_cast<uint64_t>(value_); }
+  uint64_t GetValueAsUint64() const OVERRIDE {
+    return static_cast<uint64_t>(static_cast<uint32_t>(value_));
+  }
 
   bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
     DCHECK(other->IsIntConstant());
@@ -4312,7 +4331,8 @@
                 SideEffectsForArchRuntimeCalls(value->GetType())), dex_pc),
         expected_component_type_(expected_component_type),
         needs_type_check_(value->GetType() == Primitive::kPrimNot),
-        value_can_be_null_(true) {
+        value_can_be_null_(true),
+        static_type_of_array_is_object_array_(false) {
     SetRawInputAt(0, array);
     SetRawInputAt(1, index);
     SetRawInputAt(2, value);
@@ -4341,8 +4361,13 @@
     value_can_be_null_ = false;
   }
 
+  void SetStaticTypeOfArrayIsObjectArray() {
+    static_type_of_array_is_object_array_ = true;
+  }
+
   bool GetValueCanBeNull() const { return value_can_be_null_; }
   bool NeedsTypeCheck() const { return needs_type_check_; }
+  bool StaticTypeOfArrayIsObjectArray() const { return static_type_of_array_is_object_array_; }
 
   HInstruction* GetArray() const { return InputAt(0); }
   HInstruction* GetIndex() const { return InputAt(1); }
@@ -4369,6 +4394,9 @@
   const Primitive::Type expected_component_type_;
   bool needs_type_check_;
   bool value_can_be_null_;
+  // Cached information for the reference_type_info_ so that codegen
+  // does not need to inspect the static type.
+  bool static_type_of_array_is_object_array_;
 
   DISALLOW_COPY_AND_ASSIGN(HArraySet);
 };
diff --git a/runtime/arch/mips64/registers_mips64.h b/runtime/arch/mips64/registers_mips64.h
index cd94d5e..1d07d47 100644
--- a/runtime/arch/mips64/registers_mips64.h
+++ b/runtime/arch/mips64/registers_mips64.h
@@ -52,7 +52,6 @@
   S6   = 22,
   S7   = 23,
   T8   = 24,  // More temporaries.
-  TMP  = T8,  // scratch register (in addition to AT)
   T9   = 25,
   K0   = 26,  // Reserved for trap handler.
   K1   = 27,
@@ -60,6 +59,8 @@
   SP   = 29,  // Stack pointer.
   S8   = 30,  // Saved value/frame pointer.
   RA   = 31,  // Return address.
+  TR   = S1,  // ART Thread Register
+  TMP  = T8,  // scratch register (in addition to AT)
   kNumberOfGpuRegisters = 32,
   kNoGpuRegister = -1  // Signals an illegal register.
 };
diff --git a/runtime/art_method-inl.h b/runtime/art_method-inl.h
index d6b2b7e..632a50f 100644
--- a/runtime/art_method-inl.h
+++ b/runtime/art_method-inl.h
@@ -35,6 +35,8 @@
 #include "quick/quick_method_frame_info.h"
 #include "read_barrier-inl.h"
 #include "runtime-inl.h"
+#include "scoped_thread_state_change.h"
+#include "thread-inl.h"
 #include "utils.h"
 
 namespace art {
@@ -75,9 +77,28 @@
           expected_root, desired_root);
 }
 
+// AssertSharedHeld doesn't work in GetAccessFlags, so use a NO_THREAD_SAFETY_ANALYSIS helper.
+// TODO: Figure out why ASSERT_SHARED_CAPABILITY doesn't work.
+ALWAYS_INLINE
+static inline void DoGetAccessFlagsHelper(ArtMethod* method) NO_THREAD_SAFETY_ANALYSIS {
+  CHECK(method->IsRuntimeMethod() || method->GetDeclaringClass()->IsIdxLoaded() ||
+        method->GetDeclaringClass()->IsErroneous());
+}
+
 inline uint32_t ArtMethod::GetAccessFlags() {
-  DCHECK(IsRuntimeMethod() || GetDeclaringClass()->IsIdxLoaded() ||
-         GetDeclaringClass()->IsErroneous());
+  if (kIsDebugBuild) {
+    Thread* self = Thread::Current();
+    if (!Locks::mutator_lock_->IsSharedHeld(self)) {
+      ScopedObjectAccess soa(self);
+      CHECK(IsRuntimeMethod() || GetDeclaringClass()->IsIdxLoaded() ||
+            GetDeclaringClass()->IsErroneous());
+    } else {
+      // We cannot use SOA in this case. We might be holding the lock, but may not be in the
+      // runnable state (e.g., during GC).
+      Locks::mutator_lock_->AssertSharedHeld(self);
+      DoGetAccessFlagsHelper(this);
+    }
+  }
   return access_flags_;
 }
 
diff --git a/runtime/art_method.h b/runtime/art_method.h
index f78c827..0315c3a 100644
--- a/runtime/art_method.h
+++ b/runtime/art_method.h
@@ -75,7 +75,9 @@
     return MemberOffset(OFFSETOF_MEMBER(ArtMethod, declaring_class_));
   }
 
-  ALWAYS_INLINE uint32_t GetAccessFlags() SHARED_REQUIRES(Locks::mutator_lock_);
+  // Note: GetAccessFlags acquires the mutator lock in debug mode to check that it is not called for
+  // a proxy method.
+  ALWAYS_INLINE uint32_t GetAccessFlags();
 
   void SetAccessFlags(uint32_t new_access_flags) {
     // Not called within a transaction.
@@ -86,77 +88,78 @@
   InvokeType GetInvokeType() SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Returns true if the method is declared public.
-  bool IsPublic() SHARED_REQUIRES(Locks::mutator_lock_) {
+  bool IsPublic() {
     return (GetAccessFlags() & kAccPublic) != 0;
   }
 
   // Returns true if the method is declared private.
-  bool IsPrivate() SHARED_REQUIRES(Locks::mutator_lock_) {
+  bool IsPrivate() {
     return (GetAccessFlags() & kAccPrivate) != 0;
   }
 
   // Returns true if the method is declared static.
-  bool IsStatic() SHARED_REQUIRES(Locks::mutator_lock_) {
+  bool IsStatic() {
     return (GetAccessFlags() & kAccStatic) != 0;
   }
 
   // Returns true if the method is a constructor.
-  bool IsConstructor() SHARED_REQUIRES(Locks::mutator_lock_) {
+  bool IsConstructor() {
     return (GetAccessFlags() & kAccConstructor) != 0;
   }
 
   // Returns true if the method is a class initializer.
-  bool IsClassInitializer() SHARED_REQUIRES(Locks::mutator_lock_) {
+  bool IsClassInitializer() {
     return IsConstructor() && IsStatic();
   }
 
   // Returns true if the method is static, private, or a constructor.
-  bool IsDirect() SHARED_REQUIRES(Locks::mutator_lock_) {
+  bool IsDirect() {
     return IsDirect(GetAccessFlags());
   }
 
   static bool IsDirect(uint32_t access_flags) {
-    return (access_flags & (kAccStatic | kAccPrivate | kAccConstructor)) != 0;
+    constexpr uint32_t direct = kAccStatic | kAccPrivate | kAccConstructor;
+    return (access_flags & direct) != 0;
   }
 
   // Returns true if the method is declared synchronized.
-  bool IsSynchronized() SHARED_REQUIRES(Locks::mutator_lock_) {
-    uint32_t synchonized = kAccSynchronized | kAccDeclaredSynchronized;
+  bool IsSynchronized() {
+    constexpr uint32_t synchonized = kAccSynchronized | kAccDeclaredSynchronized;
     return (GetAccessFlags() & synchonized) != 0;
   }
 
-  bool IsFinal() SHARED_REQUIRES(Locks::mutator_lock_) {
+  bool IsFinal() {
     return (GetAccessFlags() & kAccFinal) != 0;
   }
 
-  bool IsMiranda() SHARED_REQUIRES(Locks::mutator_lock_) {
+  bool IsMiranda() {
     return (GetAccessFlags() & kAccMiranda) != 0;
   }
 
-  bool IsNative() SHARED_REQUIRES(Locks::mutator_lock_) {
+  bool IsNative() {
     return (GetAccessFlags() & kAccNative) != 0;
   }
 
-  bool IsFastNative() SHARED_REQUIRES(Locks::mutator_lock_) {
-    uint32_t mask = kAccFastNative | kAccNative;
+  bool IsFastNative() {
+    constexpr uint32_t mask = kAccFastNative | kAccNative;
     return (GetAccessFlags() & mask) == mask;
   }
 
-  bool IsAbstract() SHARED_REQUIRES(Locks::mutator_lock_) {
+  bool IsAbstract() {
     return (GetAccessFlags() & kAccAbstract) != 0;
   }
 
-  bool IsSynthetic() SHARED_REQUIRES(Locks::mutator_lock_) {
+  bool IsSynthetic() {
     return (GetAccessFlags() & kAccSynthetic) != 0;
   }
 
   bool IsProxyMethod() SHARED_REQUIRES(Locks::mutator_lock_);
 
-  bool IsPreverified() SHARED_REQUIRES(Locks::mutator_lock_) {
+  bool IsPreverified() {
     return (GetAccessFlags() & kAccPreverified) != 0;
   }
 
-  void SetPreverified() SHARED_REQUIRES(Locks::mutator_lock_) {
+  void SetPreverified() {
     DCHECK(!IsPreverified());
     SetAccessFlags(GetAccessFlags() | kAccPreverified);
   }
@@ -404,7 +407,7 @@
     return GetNativePointer<void*>(EntryPointFromJniOffset(pointer_size), pointer_size);
   }
 
-  void SetEntryPointFromJni(const void* entrypoint) SHARED_REQUIRES(Locks::mutator_lock_) {
+  void SetEntryPointFromJni(const void* entrypoint) {
     DCHECK(IsNative());
     SetEntryPointFromJniPtrSize(entrypoint, sizeof(void*));
   }
diff --git a/test/004-ReferenceMap/stack_walk_refmap_jni.cc b/test/004-ReferenceMap/stack_walk_refmap_jni.cc
index 55a77ac..285df18 100644
--- a/test/004-ReferenceMap/stack_walk_refmap_jni.cc
+++ b/test/004-ReferenceMap/stack_walk_refmap_jni.cc
@@ -49,7 +49,9 @@
       CHECK_REGS_CONTAIN_REFS(0x06U, true, 8, 1);  // v8: this, v1: x
       CHECK_REGS_CONTAIN_REFS(0x08U, true, 8, 3, 1);  // v8: this, v3: y, v1: x
       CHECK_REGS_CONTAIN_REFS(0x0cU, true, 8, 3, 1);  // v8: this, v3: y, v1: x
-      CHECK_REGS_CONTAIN_REFS(0x0eU, true, 8, 3, 1);  // v8: this, v3: y, v1: x
+      if (!m->IsOptimized(sizeof(void*))) {
+        CHECK_REGS_CONTAIN_REFS(0x0eU, true, 8, 3, 1);  // v8: this, v3: y, v1: x
+      }
       CHECK_REGS_CONTAIN_REFS(0x10U, true, 8, 3, 1);  // v8: this, v3: y, v1: x
       // v2 is added because of the instruction at DexPC 0024. Object merges with 0 is Object. See:
       //   0024: move-object v3, v2
@@ -63,12 +65,18 @@
       // Note that v0: ex can be eliminated because it's a dead merge of two different exceptions.
       CHECK_REGS_CONTAIN_REFS(0x18U, true, 8, 2, 1);  // v8: this, v2: y, v1: x (dead v0: ex)
       CHECK_REGS_CONTAIN_REFS(0x1aU, true, 8, 5, 2, 1);  // v8: this, v5: x[1], v2: y, v1: x (dead v0: ex)
-      CHECK_REGS_CONTAIN_REFS(0x1dU, true, 8, 5, 2, 1);  // v8: this, v5: x[1], v2: y, v1: x (dead v0: ex)
-      // v5 is removed from the root set because there is a "merge" operation.
-      // See 0015: if-nez v2, 001f.
-      CHECK_REGS_CONTAIN_REFS(0x1fU, true, 8, 2, 1);  // v8: this, v2: y, v1: x (dead v0: ex)
+      if (!m->IsOptimized(sizeof(void*))) {
+        // v8: this, v5: x[1], v2: y, v1: x (dead v0: ex)
+        CHECK_REGS_CONTAIN_REFS(0x1dU, true, 8, 5, 2, 1);
+        // v5 is removed from the root set because there is a "merge" operation.
+        // See 0015: if-nez v2, 001f.
+        CHECK_REGS_CONTAIN_REFS(0x1fU, true, 8, 2, 1);  // v8: this, v2: y, v1: x (dead v0: ex)
+      }
       CHECK_REGS_CONTAIN_REFS(0x21U, true, 8, 2, 1);  // v8: this, v2: y, v1: x (dead v0: ex)
-      CHECK_REGS_CONTAIN_REFS(0x27U, true, 8, 4, 2, 1);  // v8: this, v4: ex, v2: y, v1: x
+
+      if (!m->IsOptimized(sizeof(void*))) {
+        CHECK_REGS_CONTAIN_REFS(0x27U, true, 8, 4, 2, 1);  // v8: this, v4: ex, v2: y, v1: x
+      }
       CHECK_REGS_CONTAIN_REFS(0x29U, true, 8, 4, 2, 1);  // v8: this, v4: ex, v2: y, v1: x
       CHECK_REGS_CONTAIN_REFS(0x2cU, true, 8, 4, 2, 1);  // v8: this, v4: ex, v2: y, v1: x
       // Note that it is OK for a compiler to not have a dex map at these two dex PCs because
diff --git a/test/535-regression-const-val/expected.txt b/test/535-regression-const-val/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/535-regression-const-val/expected.txt
diff --git a/test/535-regression-const-val/info.txt b/test/535-regression-const-val/info.txt
new file mode 100644
index 0000000..ea3e67b
--- /dev/null
+++ b/test/535-regression-const-val/info.txt
@@ -0,0 +1,2 @@
+Test a regression where SsaChecker would fail comparing raw value of IntConstant
+vs FloatConstant due to a static_cast sign extend.
diff --git a/test/535-regression-const-val/smali/TestCase.smali b/test/535-regression-const-val/smali/TestCase.smali
new file mode 100644
index 0000000..f42f173
--- /dev/null
+++ b/test/535-regression-const-val/smali/TestCase.smali
@@ -0,0 +1,36 @@
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LTestCase;
+.super Ljava/lang/Object;
+
+.method public static testCase(ZZ)I
+  .registers 5
+
+  # Create Phi [ 0.0f, -0.25f ].
+  # Binary representation of -0.25f has the most significant bit set.
+  if-eqz p0, :else
+  :then
+    const v0, 0x0
+    goto :merge
+  :else
+    const/high16 v0, 0xbe800000
+  :merge
+
+  # Now use as either float or int.
+  if-eqz p1, :return
+  float-to-int v0, v0
+  :return
+  return v0
+.end method
diff --git a/test/535-regression-const-val/src/Main.java b/test/535-regression-const-val/src/Main.java
new file mode 100644
index 0000000..858770f
--- /dev/null
+++ b/test/535-regression-const-val/src/Main.java
@@ -0,0 +1,22 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  // Workaround for b/18051191.
+  class InnerClass {}
+
+  public static void main(String[] args) {}
+}
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index e915357..49778cb 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -467,6 +467,7 @@
     530-checker-loops \
     530-checker-regression-reftype-final \
     532-checker-nonnull-arrayset \
+    534-checker-bce-deoptimization \
 
 ifeq (mips,$(TARGET_ARCH))
   ifneq (,$(filter optimizing,$(COMPILER_TYPES)))
diff --git a/tools/buildbot-build.sh b/tools/buildbot-build.sh
index 972e827..de9b35d 100755
--- a/tools/buildbot-build.sh
+++ b/tools/buildbot-build.sh
@@ -20,21 +20,11 @@
 fi
 
 common_targets="vogar vogar.jar core-tests apache-harmony-jdwp-tests-hostdex jsr166-tests"
-android_root="/data/local/tmp/system"
-linker="linker"
 mode="target"
 j_arg="-j$(nproc)"
 showcommands=
 make_command=
 
-case "$TARGET_PRODUCT" in
-  (armv8|mips64r6) linker="linker64";;
-esac
-
-if [[ "$ART_TEST_ANDROID_ROOT" != "" ]]; then
-  android_root="$ART_TEST_ANDROID_ROOT"
-fi
-
 while true; do
   if [[ "$1" == "--host" ]]; then
     mode="host"
@@ -42,16 +32,6 @@
   elif [[ "$1" == "--target" ]]; then
     mode="target"
     shift
-  elif [[ "$1" == "--32" ]]; then
-    linker="linker"
-    shift
-  elif [[ "$1" == "--64" ]]; then
-    linker="linker64"
-    shift
-  elif [[ "$1" == "--android-root" ]]; then
-    shift
-    android_root=$1
-    shift
   elif [[ "$1" == -j* ]]; then
     j_arg=$1
     shift
@@ -64,19 +44,10 @@
 done
 
 if [[ $mode == "host" ]]; then
-  make_command="make $j_arg build-art-host-tests $common_targets out/host/linux-x86/lib/libjavacoretests.so out/host/linux-x86/lib64/libjavacoretests.so"
-  echo "Executing $make_command"
-  $make_command
+  make_command="make $j_arg $showcommands build-art-host-tests $common_targets out/host/linux-x86/lib/libjavacoretests.so out/host/linux-x86/lib64/libjavacoretests.so"
 elif [[ $mode == "target" ]]; then
-  # Disable NINJA for building on target, it does not support setting environment variables
-  # within the make command.
-  env="$env USE_NINJA=false"
-  # Build extra tools that will be used by tests, so that
-  # they are compiled with our own linker.
-  # We need to provide our own linker in case the linker on the device
-  # is out of date.
-  make_command="make TARGET_LINKER=$android_root/bin/$linker $j_arg $showcommands build-art-target-tests $common_targets libjavacrypto libjavacoretests linker toybox toolbox sh out/host/linux-x86/bin/adb"
-  echo "Executing env $env $make_command"
-  env $env $make_command
+  make_command="make $j_arg $showcommands build-art-target-tests $common_targets libjavacrypto libjavacoretests linker toybox toolbox sh out/host/linux-x86/bin/adb"
 fi
 
+echo "Executing $make_command"
+$make_command