Improve ArraySet codegen.

Simplify the reference case to emit fewer instructions and
take at most one branch in the main path.

Test: m test-art-host-gtest
Test: testrunner.py --host --optimizing
Test: aosp_taimen-userdebug boots.
Test: testrunner.py --target --optimizing
Bug: 32489401
Change-Id: I9d76b7795ec01e6245ed3184cd8d384389e5070d
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 3086882..d206669 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -2497,12 +2497,10 @@
 void LocationsBuilderARM64::VisitArraySet(HArraySet* instruction) {
   DataType::Type value_type = instruction->GetComponentType();
 
-  bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
+  bool needs_type_check = instruction->NeedsTypeCheck();
   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
       instruction,
-      may_need_runtime_call_for_type_check ?
-          LocationSummary::kCallOnSlowPath :
-          LocationSummary::kNoCall);
+      needs_type_check ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
   if (IsConstantZeroBitPattern(instruction->InputAt(2))) {
@@ -2517,7 +2515,7 @@
 void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) {
   DataType::Type value_type = instruction->GetComponentType();
   LocationSummary* locations = instruction->GetLocations();
-  bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
+  bool needs_type_check = instruction->NeedsTypeCheck();
   bool needs_write_barrier =
       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
 
@@ -2530,7 +2528,7 @@
   MacroAssembler* masm = GetVIXLAssembler();
 
   if (!needs_write_barrier) {
-    DCHECK(!may_need_runtime_call_for_type_check);
+    DCHECK(!needs_type_check);
     if (index.IsConstant()) {
       offset += Int64FromLocation(index) << DataType::SizeShift(value_type);
       destination = HeapOperand(array, offset);
@@ -2562,128 +2560,105 @@
     }
   } else {
     DCHECK(!instruction->GetArray()->IsIntermediateAddress());
-    vixl::aarch64::Label done;
-    SlowPathCodeARM64* slow_path = nullptr;
-    {
-      // We use a block to end the scratch scope before the write barrier, thus
-      // freeing the temporary registers so they can be used in `MarkGCCard`.
+
+    bool can_value_be_null = instruction->GetValueCanBeNull();
+    vixl::aarch64::Label do_store;
+    if (can_value_be_null) {
+      __ Cbz(Register(value), &do_store);
+    }
+
+    if (needs_type_check) {
+      SlowPathCodeARM64* slow_path =
+          new (codegen_->GetScopedAllocator()) ArraySetSlowPathARM64(instruction);
+      codegen_->AddSlowPath(slow_path);
+
+      const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+      const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
+      const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
+
       UseScratchRegisterScope temps(masm);
       Register temp = temps.AcquireSameSizeAs(array);
-      if (index.IsConstant()) {
-        offset += Int64FromLocation(index) << DataType::SizeShift(value_type);
-        destination = HeapOperand(array, offset);
-      } else {
-        destination = HeapOperand(temp,
-                                  XRegisterFrom(index),
-                                  LSL,
-                                  DataType::SizeShift(value_type));
+      Register temp2 = temps.AcquireSameSizeAs(array);
+
+      // Note that when Baker read barriers are enabled, the type
+      // checks are performed without read barriers.  This is fine,
+      // even in the case where a class object is in the from-space
+      // after the flip, as a comparison involving such a type would
+      // not produce a false positive; it may of course produce a
+      // false negative, in which case we would take the ArraySet
+      // slow path.
+
+      // /* HeapReference<Class> */ temp = array->klass_
+      {
+        // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
+        EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
+        __ Ldr(temp, HeapOperand(array, class_offset));
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
       }
+      GetAssembler()->MaybeUnpoisonHeapReference(temp);
 
-      uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
-      uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
-      uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
+      // /* HeapReference<Class> */ temp = temp->component_type_
+      __ Ldr(temp, HeapOperand(temp, component_offset));
+      // /* HeapReference<Class> */ temp2 = value->klass_
+      __ Ldr(temp2, HeapOperand(Register(value), class_offset));
+      // If heap poisoning is enabled, no need to unpoison `temp`
+      // nor `temp2`, as we are comparing two poisoned references.
+      __ Cmp(temp, temp2);
 
-      if (may_need_runtime_call_for_type_check) {
-        slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathARM64(instruction);
-        codegen_->AddSlowPath(slow_path);
-        if (instruction->GetValueCanBeNull()) {
-          vixl::aarch64::Label non_zero;
-          __ Cbnz(Register(value), &non_zero);
-          if (!index.IsConstant()) {
-            __ Add(temp, array, offset);
-          }
-          {
-            // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools
-            // emitted.
-            EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
-            __ Str(wzr, destination);
-            codegen_->MaybeRecordImplicitNullCheck(instruction);
-          }
-          __ B(&done);
-          __ Bind(&non_zero);
-        }
-
-        // Note that when Baker read barriers are enabled, the type
-        // checks are performed without read barriers.  This is fine,
-        // even in the case where a class object is in the from-space
-        // after the flip, as a comparison involving such a type would
-        // not produce a false positive; it may of course produce a
-        // false negative, in which case we would take the ArraySet
-        // slow path.
-
-        Register temp2 = temps.AcquireSameSizeAs(array);
-        // /* HeapReference<Class> */ temp = array->klass_
-        {
-          // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
-          EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
-          __ Ldr(temp, HeapOperand(array, class_offset));
-          codegen_->MaybeRecordImplicitNullCheck(instruction);
-        }
+      if (instruction->StaticTypeOfArrayIsObjectArray()) {
+        __ B(eq, slow_path->GetExitLabel());
+        // If heap poisoning is enabled, the `temp` reference has
+        // not been unpoisoned yet; unpoison it now.
         GetAssembler()->MaybeUnpoisonHeapReference(temp);
 
-        // /* HeapReference<Class> */ temp = temp->component_type_
-        __ Ldr(temp, HeapOperand(temp, component_offset));
-        // /* HeapReference<Class> */ temp2 = value->klass_
-        __ Ldr(temp2, HeapOperand(Register(value), class_offset));
-        // If heap poisoning is enabled, no need to unpoison `temp`
-        // nor `temp2`, as we are comparing two poisoned references.
-        __ Cmp(temp, temp2);
-        temps.Release(temp2);
-
-        if (instruction->StaticTypeOfArrayIsObjectArray()) {
-          vixl::aarch64::Label do_put;
-          __ B(eq, &do_put);
-          // If heap poisoning is enabled, the `temp` reference has
-          // not been unpoisoned yet; unpoison it now.
-          GetAssembler()->MaybeUnpoisonHeapReference(temp);
-
-          // /* HeapReference<Class> */ temp = temp->super_class_
-          __ Ldr(temp, HeapOperand(temp, super_offset));
-          // If heap poisoning is enabled, no need to unpoison
-          // `temp`, as we are comparing against null below.
-          __ Cbnz(temp, slow_path->GetEntryLabel());
-          __ Bind(&do_put);
-        } else {
-          __ B(ne, slow_path->GetEntryLabel());
-        }
-      }
-
-      if (kPoisonHeapReferences) {
-        Register temp2 = temps.AcquireSameSizeAs(array);
-          DCHECK(value.IsW());
-        __ Mov(temp2, value.W());
-        GetAssembler()->PoisonHeapReference(temp2);
-        source = temp2;
-      }
-
-      if (!index.IsConstant()) {
-        __ Add(temp, array, offset);
+        // /* HeapReference<Class> */ temp = temp->super_class_
+        __ Ldr(temp, HeapOperand(temp, super_offset));
+        // If heap poisoning is enabled, no need to unpoison
+        // `temp`, as we are comparing against null below.
+        __ Cbnz(temp, slow_path->GetEntryLabel());
       } else {
-        // We no longer need the `temp` here so release it as the store below may
-        // need a scratch register (if the constant index makes the offset too large)
-        // and the poisoned `source` could be using the other scratch register.
-        temps.Release(temp);
+        __ B(ne, slow_path->GetEntryLabel());
       }
-      {
-        // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
-        EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
-        __ Str(source, destination);
-
-        if (!may_need_runtime_call_for_type_check) {
-          codegen_->MaybeRecordImplicitNullCheck(instruction);
-        }
-      }
-    }
-
-    codegen_->MarkGCCard(array, value.W(), instruction->GetValueCanBeNull());
-
-    if (done.IsLinked()) {
-      __ Bind(&done);
-    }
-
-    if (slow_path != nullptr) {
       __ Bind(slow_path->GetExitLabel());
     }
+
+    codegen_->MarkGCCard(array, value.W(), /* value_can_be_null= */ false);
+
+    UseScratchRegisterScope temps(masm);
+    if (kPoisonHeapReferences) {
+      Register temp_source = temps.AcquireSameSizeAs(array);
+        DCHECK(value.IsW());
+      __ Mov(temp_source, value.W());
+      GetAssembler()->PoisonHeapReference(temp_source);
+      source = temp_source;
+    }
+
+    if (can_value_be_null) {
+      DCHECK(do_store.IsLinked());
+      __ Bind(&do_store);
+    }
+
+    if (index.IsConstant()) {
+      offset += Int64FromLocation(index) << DataType::SizeShift(value_type);
+      destination = HeapOperand(array, offset);
+    } else {
+      Register temp_base = temps.AcquireSameSizeAs(array);
+      __ Add(temp_base, array, offset);
+      destination = HeapOperand(temp_base,
+                                XRegisterFrom(index),
+                                LSL,
+                                DataType::SizeShift(value_type));
+    }
+
+    {
+      // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
+      EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
+      __ Str(source, destination);
+
+      if (can_value_be_null || !needs_type_check) {
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
+      }
+    }
   }
 }
 
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index 6469c69..9d3bdef 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -6168,13 +6168,11 @@
 
   bool needs_write_barrier =
       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
-  bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
+  bool needs_type_check = instruction->NeedsTypeCheck();
 
   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
       instruction,
-      may_need_runtime_call_for_type_check ?
-          LocationSummary::kCallOnSlowPath :
-          LocationSummary::kNoCall);
+      needs_type_check ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall);
 
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
@@ -6195,7 +6193,7 @@
   vixl32::Register array = InputRegisterAt(instruction, 0);
   Location index = locations->InAt(1);
   DataType::Type value_type = instruction->GetComponentType();
-  bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
+  bool needs_type_check = instruction->NeedsTypeCheck();
   bool needs_write_barrier =
       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
   uint32_t data_offset =
@@ -6247,8 +6245,7 @@
       if (instruction->InputAt(2)->IsNullConstant()) {
         // Just setting null.
         if (index.IsConstant()) {
-          size_t offset =
-              (Int32ConstantFrom(index) << TIMES_4) + data_offset;
+          size_t offset = (Int32ConstantFrom(index) << TIMES_4) + data_offset;
           GetAssembler()->StoreToOffset(kStoreWord, value, array, offset);
         } else {
           DCHECK(index.IsRegister()) << index;
@@ -6261,7 +6258,7 @@
         // store instruction.
         codegen_->MaybeRecordImplicitNullCheck(instruction);
         DCHECK(!needs_write_barrier);
-        DCHECK(!may_need_runtime_call_for_type_check);
+        DCHECK(!needs_type_check);
         break;
       }
 
@@ -6270,36 +6267,21 @@
       vixl32::Register temp1 = RegisterFrom(temp1_loc);
       Location temp2_loc = locations->GetTemp(1);
       vixl32::Register temp2 = RegisterFrom(temp2_loc);
-      uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
-      uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
-      uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
-      vixl32::Label done;
-      vixl32::Label* final_label = codegen_->GetFinalLabel(instruction, &done);
-      SlowPathCodeARMVIXL* slow_path = nullptr;
 
-      if (may_need_runtime_call_for_type_check) {
-        slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathARMVIXL(instruction);
+      bool can_value_be_null = instruction->GetValueCanBeNull();
+      vixl32::Label do_store;
+      if (can_value_be_null) {
+        __ CompareAndBranchIfZero(value, &do_store, /* is_far_target= */ false);
+      }
+
+      if (needs_type_check) {
+        SlowPathCodeARMVIXL* slow_path =
+            new (codegen_->GetScopedAllocator()) ArraySetSlowPathARMVIXL(instruction);
         codegen_->AddSlowPath(slow_path);
-        if (instruction->GetValueCanBeNull()) {
-          vixl32::Label non_zero;
-          __ CompareAndBranchIfNonZero(value, &non_zero);
-          if (index.IsConstant()) {
-            size_t offset =
-               (Int32ConstantFrom(index) << TIMES_4) + data_offset;
-            GetAssembler()->StoreToOffset(kStoreWord, value, array, offset);
-          } else {
-            DCHECK(index.IsRegister()) << index;
-            UseScratchRegisterScope temps(GetVIXLAssembler());
-            vixl32::Register temp = temps.Acquire();
-            __ Add(temp, array, data_offset);
-            codegen_->StoreToShiftedRegOffset(value_type, value_loc, temp, RegisterFrom(index));
-          }
-          // TODO(VIXL): Use a scope to ensure we record the pc info immediately after the preceding
-          // store instruction.
-          codegen_->MaybeRecordImplicitNullCheck(instruction);
-          __ B(final_label);
-          __ Bind(&non_zero);
-        }
+
+        const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+        const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
+        const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
 
         // Note that when read barriers are enabled, the type checks
         // are performed without read barriers.  This is fine, even in
@@ -6329,8 +6311,7 @@
         __ Cmp(temp1, temp2);
 
         if (instruction->StaticTypeOfArrayIsObjectArray()) {
-          vixl32::Label do_put;
-          __ B(eq, &do_put, /* is_far_target= */ false);
+          __ B(eq, slow_path->GetExitLabel(), /* is_far_target= */ false);
           // If heap poisoning is enabled, the `temp1` reference has
           // not been unpoisoned yet; unpoison it now.
           GetAssembler()->MaybeUnpoisonHeapReference(temp1);
@@ -6340,12 +6321,14 @@
           // If heap poisoning is enabled, no need to unpoison
           // `temp1`, as we are comparing against null below.
           __ CompareAndBranchIfNonZero(temp1, slow_path->GetEntryLabel());
-          __ Bind(&do_put);
         } else {
           __ B(ne, slow_path->GetEntryLabel());
         }
+        __ Bind(slow_path->GetExitLabel());
       }
 
+      codegen_->MarkGCCard(temp1, temp2, array, value, /* can_be_null= */ false);
+
       vixl32::Register source = value;
       if (kPoisonHeapReferences) {
         // Note that in the case where `value` is a null reference,
@@ -6357,9 +6340,13 @@
         source = temp1;
       }
 
+      if (can_value_be_null) {
+        DCHECK(do_store.IsReferenced());
+        __ Bind(&do_store);
+      }
+
       if (index.IsConstant()) {
-        size_t offset =
-            (Int32ConstantFrom(index) << TIMES_4) + data_offset;
+        size_t offset = (Int32ConstantFrom(index) << TIMES_4) + data_offset;
         GetAssembler()->StoreToOffset(kStoreWord, source, array, offset);
       } else {
         DCHECK(index.IsRegister()) << index;
@@ -6373,22 +6360,12 @@
                                           RegisterFrom(index));
       }
 
-      if (!may_need_runtime_call_for_type_check) {
+      if (can_value_be_null || !needs_type_check) {
         // TODO(VIXL): Ensure we record the pc position immediately after the preceding store
         // instruction.
         codegen_->MaybeRecordImplicitNullCheck(instruction);
       }
 
-      codegen_->MarkGCCard(temp1, temp2, array, value, instruction->GetValueCanBeNull());
-
-      if (done.IsReferenced()) {
-        __ Bind(&done);
-      }
-
-      if (slow_path != nullptr) {
-        __ Bind(slow_path->GetExitLabel());
-      }
-
       break;
     }
 
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index ca1723b..334f2b4 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -5781,13 +5781,11 @@
 
   bool needs_write_barrier =
       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
-  bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
+  bool needs_type_check = instruction->NeedsTypeCheck();
 
   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
       instruction,
-      may_need_runtime_call_for_type_check ?
-          LocationSummary::kCallOnSlowPath :
-          LocationSummary::kNoCall);
+      needs_type_check ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall);
 
   bool is_byte_type = DataType::Size(value_type) == 1u;
   // We need the inputs to be different than the output in case of long operation.
@@ -5818,10 +5816,7 @@
   Location index = locations->InAt(1);
   Location value = locations->InAt(2);
   DataType::Type value_type = instruction->GetComponentType();
-  uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
-  uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
-  uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
-  bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
+  bool needs_type_check = instruction->NeedsTypeCheck();
   bool needs_write_barrier =
       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
 
@@ -5864,30 +5859,30 @@
         __ movl(address, Immediate(0));
         codegen_->MaybeRecordImplicitNullCheck(instruction);
         DCHECK(!needs_write_barrier);
-        DCHECK(!may_need_runtime_call_for_type_check);
+        DCHECK(!needs_type_check);
         break;
       }
 
       DCHECK(needs_write_barrier);
       Register register_value = value.AsRegister<Register>();
-      // We cannot use a NearLabel for `done`, as its range may be too
-      // short when Baker read barriers are enabled.
-      Label done;
-      NearLabel not_null, do_put;
-      SlowPathCode* slow_path = nullptr;
       Location temp_loc = locations->GetTemp(0);
       Register temp = temp_loc.AsRegister<Register>();
-      if (may_need_runtime_call_for_type_check) {
-        slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathX86(instruction);
+
+      bool can_value_be_null = instruction->GetValueCanBeNull();
+      NearLabel do_store;
+      if (can_value_be_null) {
+        __ testl(register_value, register_value);
+        __ j(kEqual, &do_store);
+      }
+
+      if (needs_type_check) {
+        SlowPathCode* slow_path =
+            new (codegen_->GetScopedAllocator()) ArraySetSlowPathX86(instruction);
         codegen_->AddSlowPath(slow_path);
-        if (instruction->GetValueCanBeNull()) {
-          __ testl(register_value, register_value);
-          __ j(kNotEqual, &not_null);
-          __ movl(address, Immediate(0));
-          codegen_->MaybeRecordImplicitNullCheck(instruction);
-          __ jmp(&done);
-          __ Bind(&not_null);
-        }
+
+        const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+        const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
+        const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
 
         // Note that when Baker read barriers are enabled, the type
         // checks are performed without read barriers.  This is fine,
@@ -5910,6 +5905,7 @@
         __ cmpl(temp, Address(register_value, class_offset));
 
         if (instruction->StaticTypeOfArrayIsObjectArray()) {
+          NearLabel do_put;  // Use a dedicated NearLabel instead of the slow_path->GetExitLabel().
           __ j(kEqual, &do_put);
           // If heap poisoning is enabled, the `temp` reference has
           // not been unpoisoned yet; unpoison it now.
@@ -5924,26 +5920,29 @@
         } else {
           __ j(kNotEqual, slow_path->GetEntryLabel());
         }
-      }
-
-      if (kPoisonHeapReferences) {
-        __ movl(temp, register_value);
-        __ PoisonHeapReference(temp);
-        __ movl(address, temp);
-      } else {
-        __ movl(address, register_value);
-      }
-      if (!may_need_runtime_call_for_type_check) {
-        codegen_->MaybeRecordImplicitNullCheck(instruction);
+        __ Bind(slow_path->GetExitLabel());
       }
 
       Register card = locations->GetTemp(1).AsRegister<Register>();
       codegen_->MarkGCCard(
-          temp, card, array, value.AsRegister<Register>(), instruction->GetValueCanBeNull());
-      __ Bind(&done);
+          temp, card, array, value.AsRegister<Register>(), /* value_can_be_null= */ false);
 
-      if (slow_path != nullptr) {
-        __ Bind(slow_path->GetExitLabel());
+      Register source = register_value;
+      if (kPoisonHeapReferences) {
+        __ movl(temp, register_value);
+        __ PoisonHeapReference(temp);
+        source = temp;
+      }
+
+      if (can_value_be_null) {
+        DCHECK(do_store.IsLinked());
+        __ Bind(&do_store);
+      }
+
+      __ movl(address, source);
+
+      if (can_value_be_null || !needs_type_check) {
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
       }
 
       break;
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 7c293b8..e816450 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -5143,13 +5143,11 @@
 
   bool needs_write_barrier =
       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
-  bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
+  bool needs_type_check = instruction->NeedsTypeCheck();
 
   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
       instruction,
-      may_need_runtime_call_for_type_check ?
-          LocationSummary::kCallOnSlowPath :
-          LocationSummary::kNoCall);
+      needs_type_check ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall);
 
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
@@ -5173,12 +5171,9 @@
   Location index = locations->InAt(1);
   Location value = locations->InAt(2);
   DataType::Type value_type = instruction->GetComponentType();
-  bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
+  bool needs_type_check = instruction->NeedsTypeCheck();
   bool needs_write_barrier =
       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
-  uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
-  uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
-  uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
 
   switch (value_type) {
     case DataType::Type::kBool:
@@ -5220,30 +5215,30 @@
         __ movl(address, Immediate(0));
         codegen_->MaybeRecordImplicitNullCheck(instruction);
         DCHECK(!needs_write_barrier);
-        DCHECK(!may_need_runtime_call_for_type_check);
+        DCHECK(!needs_type_check);
         break;
       }
 
       DCHECK(needs_write_barrier);
       CpuRegister register_value = value.AsRegister<CpuRegister>();
-      // We cannot use a NearLabel for `done`, as its range may be too
-      // short when Baker read barriers are enabled.
-      Label done;
-      NearLabel not_null, do_put;
-      SlowPathCode* slow_path = nullptr;
       Location temp_loc = locations->GetTemp(0);
       CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
-      if (may_need_runtime_call_for_type_check) {
-        slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathX86_64(instruction);
+
+      bool can_value_be_null = instruction->GetValueCanBeNull();
+      NearLabel do_store;
+      if (can_value_be_null) {
+        __ testl(register_value, register_value);
+        __ j(kEqual, &do_store);
+      }
+
+      if (needs_type_check) {
+        SlowPathCode* slow_path =
+            new (codegen_->GetScopedAllocator()) ArraySetSlowPathX86_64(instruction);
         codegen_->AddSlowPath(slow_path);
-        if (instruction->GetValueCanBeNull()) {
-          __ testl(register_value, register_value);
-          __ j(kNotEqual, &not_null);
-          __ movl(address, Immediate(0));
-          codegen_->MaybeRecordImplicitNullCheck(instruction);
-          __ jmp(&done);
-          __ Bind(&not_null);
-        }
+
+        const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+        const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
+        const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
 
         // Note that when Baker read barriers are enabled, the type
         // checks are performed without read barriers.  This is fine,
@@ -5266,6 +5261,7 @@
         __ cmpl(temp, Address(register_value, class_offset));
 
         if (instruction->StaticTypeOfArrayIsObjectArray()) {
+          NearLabel do_put;  // Use a dedicated NearLabel instead of the slow_path->GetExitLabel().
           __ j(kEqual, &do_put);
           // If heap poisoning is enabled, the `temp` reference has
           // not been unpoisoned yet; unpoison it now.
@@ -5280,26 +5276,29 @@
         } else {
           __ j(kNotEqual, slow_path->GetEntryLabel());
         }
-      }
-
-      if (kPoisonHeapReferences) {
-        __ movl(temp, register_value);
-        __ PoisonHeapReference(temp);
-        __ movl(address, temp);
-      } else {
-        __ movl(address, register_value);
-      }
-      if (!may_need_runtime_call_for_type_check) {
-        codegen_->MaybeRecordImplicitNullCheck(instruction);
+        __ Bind(slow_path->GetExitLabel());
       }
 
       CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>();
       codegen_->MarkGCCard(
-          temp, card, array, value.AsRegister<CpuRegister>(), instruction->GetValueCanBeNull());
-      __ Bind(&done);
+          temp, card, array, value.AsRegister<CpuRegister>(), /* value_can_be_null= */ false);
 
-      if (slow_path != nullptr) {
-        __ Bind(slow_path->GetExitLabel());
+      Location source = value;
+      if (kPoisonHeapReferences) {
+        __ movl(temp, register_value);
+        __ PoisonHeapReference(temp);
+        source = temp_loc;
+      }
+
+      if (can_value_be_null) {
+        DCHECK(do_store.IsLinked());
+        __ Bind(&do_store);
+      }
+
+      __ movl(address, source.AsRegister<CpuRegister>());
+
+      if (can_value_be_null || !needs_type_check) {
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
       }
 
       break;