Merge "Revert "lambda: Add support for invoke-interface for boxed innate lambdas""
diff --git a/Android.mk b/Android.mk
index fcf70ff..0d0003a 100644
--- a/Android.mk
+++ b/Android.mk
@@ -122,6 +122,16 @@
 include $(art_path)/test/Android.run-test.mk
 include $(art_path)/benchmark/Android.mk
 
+TEST_ART_ADB_ROOT_AND_REMOUNT := \
+    (adb root && \
+     adb wait-for-device remount && \
+     ((adb shell touch /system/testfile && \
+       (adb shell rm /system/testfile || true)) || \
+      (adb disable-verity && \
+       adb reboot && \
+       adb wait-for-device root && \
+       adb wait-for-device remount)))
+
 # Sync test files to the target, depends upon all things that must be pushed to the target.
 .PHONY: test-art-target-sync
 # Check if we need to sync. In case ART_TEST_ANDROID_ROOT is not empty,
@@ -130,12 +140,11 @@
 ifneq ($(ART_TEST_NO_SYNC),true)
 ifeq ($(ART_TEST_ANDROID_ROOT),)
 test-art-target-sync: $(TEST_ART_TARGET_SYNC_DEPS)
-	adb root
-	adb wait-for-device remount
+	$(TEST_ART_ADB_ROOT_AND_REMOUNT)
 	adb sync
 else
 test-art-target-sync: $(TEST_ART_TARGET_SYNC_DEPS)
-	adb root
+	$(TEST_ART_ADB_ROOT_AND_REMOUNT)
 	adb wait-for-device push $(ANDROID_PRODUCT_OUT)/system $(ART_TEST_ANDROID_ROOT)
 	adb push $(ANDROID_PRODUCT_OUT)/data /data
 endif
@@ -374,8 +383,7 @@
 
 .PHONY: oat-target-sync
 oat-target-sync: oat-target
-	adb root
-	adb wait-for-device remount
+	$(TEST_ART_ADB_ROOT_AND_REMOUNT)
 	adb sync
 
 ########################################################################
diff --git a/compiler/optimizing/boolean_simplifier.cc b/compiler/optimizing/boolean_simplifier.cc
index f985745..f0cafc8 100644
--- a/compiler/optimizing/boolean_simplifier.cc
+++ b/compiler/optimizing/boolean_simplifier.cc
@@ -61,40 +61,6 @@
       && input_false->IsIntConstant() && input_false->AsIntConstant()->IsOne();
 }
 
-// Returns an instruction with the opposite boolean value from 'cond'.
-static HInstruction* GetOppositeCondition(HInstruction* cond) {
-  HGraph* graph = cond->GetBlock()->GetGraph();
-  ArenaAllocator* allocator = graph->GetArena();
-
-  if (cond->IsCondition()) {
-    HInstruction* lhs = cond->InputAt(0);
-    HInstruction* rhs = cond->InputAt(1);
-    switch (cond->AsCondition()->GetOppositeCondition()) {  // get *opposite*
-      case kCondEQ: return new (allocator) HEqual(lhs, rhs);
-      case kCondNE: return new (allocator) HNotEqual(lhs, rhs);
-      case kCondLT: return new (allocator) HLessThan(lhs, rhs);
-      case kCondLE: return new (allocator) HLessThanOrEqual(lhs, rhs);
-      case kCondGT: return new (allocator) HGreaterThan(lhs, rhs);
-      case kCondGE: return new (allocator) HGreaterThanOrEqual(lhs, rhs);
-      case kCondB:  return new (allocator) HBelow(lhs, rhs);
-      case kCondBE: return new (allocator) HBelowOrEqual(lhs, rhs);
-      case kCondA:  return new (allocator) HAbove(lhs, rhs);
-      case kCondAE: return new (allocator) HAboveOrEqual(lhs, rhs);
-    }
-  } else if (cond->IsIntConstant()) {
-    HIntConstant* int_const = cond->AsIntConstant();
-    if (int_const->IsZero()) {
-      return graph->GetIntConstant(1);
-    } else {
-      DCHECK(int_const->IsOne());
-      return graph->GetIntConstant(0);
-    }
-  }
-  // General case when 'cond' is another instruction of type boolean,
-  // as verified by SSAChecker.
-  return new (allocator) HBooleanNot(cond);
-}
-
 void HBooleanSimplifier::TryRemovingBooleanSelection(HBasicBlock* block) {
   DCHECK(block->EndsWithIf());
 
@@ -126,10 +92,7 @@
 
   HInstruction* replacement;
   if (NegatesCondition(true_value, false_value)) {
-    replacement = GetOppositeCondition(if_condition);
-    if (replacement->GetBlock() == nullptr) {
-      block->InsertInstructionBefore(replacement, if_instruction);
-    }
+    replacement = graph_->InsertOppositeCondition(if_condition, if_instruction);
   } else if (PreservesCondition(true_value, false_value)) {
     replacement = if_condition;
   } else {
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index 3257de1..32968a5 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -876,12 +876,78 @@
                       clinit_check);
 }
 
+bool HGraphBuilder::BuildNewInstance(uint16_t type_index, uint32_t dex_pc) {
+  bool finalizable;
+  bool can_throw = NeedsAccessCheck(type_index, &finalizable);
+
+  // Only the non-resolved entrypoint handles the finalizable class case. If we
+  // need access checks, then we haven't resolved the method and the class may
+  // again be finalizable.
+  QuickEntrypointEnum entrypoint = (finalizable || can_throw)
+      ? kQuickAllocObject
+      : kQuickAllocObjectInitialized;
+
+  ScopedObjectAccess soa(Thread::Current());
+  StackHandleScope<3> hs(soa.Self());
+  Handle<mirror::DexCache> dex_cache(hs.NewHandle(
+      dex_compilation_unit_->GetClassLinker()->FindDexCache(
+          soa.Self(), *dex_compilation_unit_->GetDexFile())));
+  Handle<mirror::Class> resolved_class(hs.NewHandle(dex_cache->GetResolvedType(type_index)));
+  const DexFile& outer_dex_file = *outer_compilation_unit_->GetDexFile();
+  Handle<mirror::DexCache> outer_dex_cache(hs.NewHandle(
+      outer_compilation_unit_->GetClassLinker()->FindDexCache(soa.Self(), outer_dex_file)));
+
+  if (outer_dex_cache.Get() != dex_cache.Get()) {
+    // We currently do not support inlining allocations across dex files.
+    return false;
+  }
+
+  HLoadClass* load_class = new (arena_) HLoadClass(
+      graph_->GetCurrentMethod(),
+      type_index,
+      *dex_compilation_unit_->GetDexFile(),
+      IsOutermostCompilingClass(type_index),
+      dex_pc,
+      /*needs_access_check*/ can_throw);
+
+  current_block_->AddInstruction(load_class);
+  HInstruction* cls = load_class;
+  if (!IsInitialized(resolved_class, type_index)) {
+    cls = new (arena_) HClinitCheck(load_class, dex_pc);
+    current_block_->AddInstruction(cls);
+  }
+
+  current_block_->AddInstruction(new (arena_) HNewInstance(
+      cls,
+      graph_->GetCurrentMethod(),
+      dex_pc,
+      type_index,
+      *dex_compilation_unit_->GetDexFile(),
+      can_throw,
+      finalizable,
+      entrypoint));
+  return true;
+}
+
+bool HGraphBuilder::IsInitialized(Handle<mirror::Class> cls, uint16_t type_index) const {
+  if (cls.Get() == nullptr) {
+    return false;
+  }
+  if (GetOutermostCompilingClass() == cls.Get()) {
+    return true;
+  }
+  // TODO: find out why this check is needed.
+  bool is_in_dex_cache = compiler_driver_->CanAssumeTypeIsPresentInDexCache(
+      *outer_compilation_unit_->GetDexFile(), type_index);
+  return cls->IsInitialized() && is_in_dex_cache;
+}
+
 HClinitCheck* HGraphBuilder::ProcessClinitCheckForInvoke(
       uint32_t dex_pc,
       uint32_t method_idx,
       HInvokeStaticOrDirect::ClinitCheckRequirement* clinit_check_requirement) {
   ScopedObjectAccess soa(Thread::Current());
-  StackHandleScope<4> hs(soa.Self());
+  StackHandleScope<5> hs(soa.Self());
   Handle<mirror::DexCache> dex_cache(hs.NewHandle(
       dex_compilation_unit_->GetClassLinker()->FindDexCache(
           soa.Self(), *dex_compilation_unit_->GetDexFile())));
@@ -927,13 +993,8 @@
     // whether we should add an explicit class initialization
     // check for its declaring class before the static method call.
 
-    // TODO: find out why this check is needed.
-    bool is_in_dex_cache = compiler_driver_->CanAssumeTypeIsPresentInDexCache(
-        *outer_compilation_unit_->GetDexFile(), storage_index);
-    bool is_initialized =
-        resolved_method->GetDeclaringClass()->IsInitialized() && is_in_dex_cache;
-
-    if (is_initialized) {
+    Handle<mirror::Class> cls(hs.NewHandle(resolved_method->GetDeclaringClass()));
+    if (IsInitialized(cls, storage_index)) {
       *clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kNone;
     } else {
       *clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kExplicit;
@@ -1272,7 +1333,7 @@
   uint16_t field_index = instruction.VRegB_21c();
 
   ScopedObjectAccess soa(Thread::Current());
-  StackHandleScope<4> hs(soa.Self());
+  StackHandleScope<5> hs(soa.Self());
   Handle<mirror::DexCache> dex_cache(hs.NewHandle(
       dex_compilation_unit_->GetClassLinker()->FindDexCache(
           soa.Self(), *dex_compilation_unit_->GetDexFile())));
@@ -1318,11 +1379,6 @@
     }
   }
 
-  // TODO: find out why this check is needed.
-  bool is_in_dex_cache = compiler_driver_->CanAssumeTypeIsPresentInDexCache(
-      *outer_compilation_unit_->GetDexFile(), storage_index);
-  bool is_initialized = resolved_field->GetDeclaringClass()->IsInitialized() && is_in_dex_cache;
-
   HLoadClass* constant = new (arena_) HLoadClass(graph_->GetCurrentMethod(),
                                                  storage_index,
                                                  *dex_compilation_unit_->GetDexFile(),
@@ -1332,12 +1388,14 @@
   current_block_->AddInstruction(constant);
 
   HInstruction* cls = constant;
-  if (!is_initialized && !is_outer_class) {
+
+  Handle<mirror::Class> klass(hs.NewHandle(resolved_field->GetDeclaringClass()));
+  if (!IsInitialized(klass, storage_index)) {
     cls = new (arena_) HClinitCheck(constant, dex_pc);
     current_block_->AddInstruction(cls);
   }
 
-  uint16_t class_def_index = resolved_field->GetDeclaringClass()->GetDexClassDefIndex();
+  uint16_t class_def_index = klass->GetDexClassDefIndex();
   if (is_put) {
     // We need to keep the class alive before loading the value.
     Temporaries temps(graph_);
@@ -2509,20 +2567,9 @@
         current_block_->AddInstruction(fake_string);
         UpdateLocal(register_index, fake_string, dex_pc);
       } else {
-        bool finalizable;
-        bool can_throw = NeedsAccessCheck(type_index, &finalizable);
-        QuickEntrypointEnum entrypoint = can_throw
-            ? kQuickAllocObjectWithAccessCheck
-            : kQuickAllocObject;
-
-        current_block_->AddInstruction(new (arena_) HNewInstance(
-            graph_->GetCurrentMethod(),
-            dex_pc,
-            type_index,
-            *dex_compilation_unit_->GetDexFile(),
-            can_throw,
-            finalizable,
-            entrypoint));
+        if (!BuildNewInstance(type_index, dex_pc)) {
+          return false;
+        }
         UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction(), dex_pc);
       }
       break;
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index f857ef0..615b0cd 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -308,6 +308,14 @@
       uint32_t method_idx,
       HInvokeStaticOrDirect::ClinitCheckRequirement* clinit_check_requirement);
 
+  // Build a HNewInstance instruction.
+  bool BuildNewInstance(uint16_t type_index, uint32_t dex_pc);
+
+  // Return whether the compiler can assume `cls` is initialized. `type_index` is the index
+  // of the class in the outer dex file.
+  bool IsInitialized(Handle<mirror::Class> cls, uint16_t type_index) const
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
   ArenaAllocator* const arena_;
 
   // A list of the size of the dex code holding block information for
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index cb6bed0..cf6f7e3 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -3361,7 +3361,19 @@
             __ mov(o_l, ShifterOperand(high));
             __ LoadImmediate(o_h, 0);
           }
-        } else {  // shift_value < 32
+        } else if (shift_value == 1) {
+          if (op->IsShl()) {
+            __ Lsls(o_l, low, 1);
+            __ adc(o_h, high, ShifterOperand(high));
+          } else if (op->IsShr()) {
+            __ Asrs(o_h, high, 1);
+            __ Rrx(o_l, low);
+          } else {
+            __ Lsrs(o_h, high, 1);
+            __ Rrx(o_l, low);
+          }
+        } else {
+          DCHECK(2 <= shift_value && shift_value < 32) << shift_value;
           if (op->IsShl()) {
             __ Lsl(o_h, high, shift_value);
             __ orr(o_h, o_h, ShifterOperand(low, LSR, 32 - shift_value));
@@ -3413,14 +3425,12 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
   InvokeRuntimeCallingConvention calling_convention;
-  locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
-  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+  locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
   locations->SetOut(Location::RegisterLocation(R0));
 }
 
 void InstructionCodeGeneratorARM::VisitNewInstance(HNewInstance* instruction) {
-  InvokeRuntimeCallingConvention calling_convention;
-  __ LoadImmediate(calling_convention.GetRegisterAt(0), instruction->GetTypeIndex());
   // Note: if heap poisoning is enabled, the entry point takes cares
   // of poisoning the reference.
   codegen_->InvokeRuntime(instruction->GetEntrypoint(),
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 2776b7d..d82cb67 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -1628,6 +1628,47 @@
          Operand(InputOperandAt(instruction, 1)));
 }
 
+void LocationsBuilderARM64::VisitArm64MultiplyAccumulate(HArm64MultiplyAccumulate* instr) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instr, LocationSummary::kNoCall);
+  locations->SetInAt(HArm64MultiplyAccumulate::kInputAccumulatorIndex,
+                     Location::RequiresRegister());
+  locations->SetInAt(HArm64MultiplyAccumulate::kInputMulLeftIndex, Location::RequiresRegister());
+  locations->SetInAt(HArm64MultiplyAccumulate::kInputMulRightIndex, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorARM64::VisitArm64MultiplyAccumulate(HArm64MultiplyAccumulate* instr) {
+  Register res = OutputRegister(instr);
+  Register accumulator = InputRegisterAt(instr, HArm64MultiplyAccumulate::kInputAccumulatorIndex);
+  Register mul_left = InputRegisterAt(instr, HArm64MultiplyAccumulate::kInputMulLeftIndex);
+  Register mul_right = InputRegisterAt(instr, HArm64MultiplyAccumulate::kInputMulRightIndex);
+
+  // Avoid emitting code that could trigger Cortex A53's erratum 835769.
+  // This fixup should be carried out for all multiply-accumulate instructions:
+  // madd, msub, smaddl, smsubl, umaddl and umsubl.
+  if (instr->GetType() == Primitive::kPrimLong &&
+      codegen_->GetInstructionSetFeatures().NeedFixCortexA53_835769()) {
+    MacroAssembler* masm = down_cast<CodeGeneratorARM64*>(codegen_)->GetVIXLAssembler();
+    vixl::Instruction* prev = masm->GetCursorAddress<vixl::Instruction*>() - vixl::kInstructionSize;
+    if (prev->IsLoadOrStore()) {
+      // Make sure we emit only exactly one nop.
+      vixl::CodeBufferCheckScope scope(masm,
+                                       vixl::kInstructionSize,
+                                       vixl::CodeBufferCheckScope::kCheck,
+                                       vixl::CodeBufferCheckScope::kExactSize);
+      __ nop();
+    }
+  }
+
+  if (instr->GetOpKind() == HInstruction::kAdd) {
+    __ Madd(res, mul_left, mul_right, accumulator);
+  } else {
+    DCHECK(instr->GetOpKind() == HInstruction::kSub);
+    __ Msub(res, mul_left, mul_right, accumulator);
+  }
+}
+
 void LocationsBuilderARM64::VisitArrayGet(HArrayGet* instruction) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
@@ -3372,17 +3413,13 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
   InvokeRuntimeCallingConvention calling_convention;
-  locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0)));
-  locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(1)));
+  locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
+  locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
   locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot));
   CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>();
 }
 
 void InstructionCodeGeneratorARM64::VisitNewInstance(HNewInstance* instruction) {
-  LocationSummary* locations = instruction->GetLocations();
-  Register type_index = RegisterFrom(locations->GetTemp(0), Primitive::kPrimInt);
-  DCHECK(type_index.Is(w0));
-  __ Mov(type_index, instruction->GetTypeIndex());
   // Note: if heap poisoning is enabled, the entry point takes cares
   // of poisoning the reference.
   codegen_->InvokeRuntime(instruction->GetEntrypoint(),
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 801e203..f3178bd 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -3478,17 +3478,12 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
   InvokeRuntimeCallingConvention calling_convention;
-  locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
-  locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+  locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
   locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot));
 }
 
 void InstructionCodeGeneratorMIPS::VisitNewInstance(HNewInstance* instruction) {
-  InvokeRuntimeCallingConvention calling_convention;
-  Register current_method_register = calling_convention.GetRegisterAt(1);
-  __ Lw(current_method_register, SP, kCurrentMethodStackOffset);
-  // Move an uint16_t value to a register.
-  __ LoadConst32(calling_convention.GetRegisterAt(0), instruction->GetTypeIndex());
   codegen_->InvokeRuntime(
       GetThreadOffset<kMipsWordSize>(instruction->GetEntrypoint()).Int32Value(),
       instruction,
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 7b33075..6100859 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -3266,15 +3266,12 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
   InvokeRuntimeCallingConvention calling_convention;
-  locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
-  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+  locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
   locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot));
 }
 
 void InstructionCodeGeneratorMIPS64::VisitNewInstance(HNewInstance* instruction) {
-  LocationSummary* locations = instruction->GetLocations();
-  // Move an uint16_t value to a register.
-  __ LoadConst32(locations->GetTemp(0).AsRegister<GpuRegister>(), instruction->GetTypeIndex());
   codegen_->InvokeRuntime(instruction->GetEntrypoint(),
                           instruction,
                           instruction->GetDexPc(),
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index a87e8ed..53e33bf 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -3769,13 +3769,11 @@
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
   locations->SetOut(Location::RegisterLocation(EAX));
   InvokeRuntimeCallingConvention calling_convention;
-  locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
-  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+  locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
 }
 
 void InstructionCodeGeneratorX86::VisitNewInstance(HNewInstance* instruction) {
-  InvokeRuntimeCallingConvention calling_convention;
-  __ movl(calling_convention.GetRegisterAt(0), Immediate(instruction->GetTypeIndex()));
   // Note: if heap poisoning is enabled, the entry point takes cares
   // of poisoning the reference.
   codegen_->InvokeRuntime(instruction->GetEntrypoint(),
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index dcc1808..0e0b869 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -3765,18 +3765,14 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
   InvokeRuntimeCallingConvention calling_convention;
-  locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
-  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+  locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
   locations->SetOut(Location::RegisterLocation(RAX));
 }
 
 void InstructionCodeGeneratorX86_64::VisitNewInstance(HNewInstance* instruction) {
-  InvokeRuntimeCallingConvention calling_convention;
-  codegen_->Load64BitValue(CpuRegister(calling_convention.GetRegisterAt(0)),
-                           instruction->GetTypeIndex());
   // Note: if heap poisoning is enabled, the entry point takes cares
   // of poisoning the reference.
-
   codegen_->InvokeRuntime(instruction->GetEntrypoint(),
                           instruction,
                           instruction->GetDexPc(),
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index d166d00..4438190 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -422,6 +422,12 @@
     StartAttributeStream("kind") << (try_boundary->IsEntry() ? "entry" : "exit");
   }
 
+#ifdef ART_ENABLE_CODEGEN_arm64
+  void VisitArm64MultiplyAccumulate(HArm64MultiplyAccumulate* instruction) OVERRIDE {
+    StartAttributeStream("kind") << instruction->GetOpKind();
+  }
+#endif
+
   bool IsPass(const char* name) {
     return strcmp(pass_name_, name) == 0;
   }
diff --git a/compiler/optimizing/gvn.cc b/compiler/optimizing/gvn.cc
index c36de84..4af111b 100644
--- a/compiler/optimizing/gvn.cc
+++ b/compiler/optimizing/gvn.cc
@@ -377,9 +377,10 @@
 
   HInstruction* current = block->GetFirstInstruction();
   while (current != nullptr) {
-    set->Kill(current->GetSideEffects());
     // Save the next instruction in case `current` is removed from the graph.
     HInstruction* next = current->GetNext();
+    // Do not kill the set with the side effects of the instruction just now: if
+    // the instruction is GVN'ed, we don't need to kill.
     if (current->CanBeMoved()) {
       if (current->IsBinaryOperation() && current->AsBinaryOperation()->IsCommutative()) {
         // For commutative ops, (x op y) will be treated the same as (y op x)
@@ -395,8 +396,11 @@
         current->ReplaceWith(existing);
         current->GetBlock()->RemoveInstruction(current);
       } else {
+        set->Kill(current->GetSideEffects());
         set->Add(current);
       }
+    } else {
+      set->Kill(current->GetSideEffects());
     }
     current = next;
   }
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index 9ad2dd1..2f3df7f 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -169,16 +169,6 @@
       //    src
       instruction->ReplaceWith(input_other);
       instruction->GetBlock()->RemoveInstruction(instruction);
-    } else if (instruction->IsShl() && input_cst->IsOne()) {
-      // Replace Shl looking like
-      //    SHL dst, src, 1
-      // with
-      //    ADD dst, src, src
-      HAdd *add = new(GetGraph()->GetArena()) HAdd(instruction->GetType(),
-                                                   input_other,
-                                                   input_other);
-      instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, add);
-      RecordSimplification();
     }
   }
 }
@@ -372,9 +362,8 @@
         block->RemoveInstruction(equal);
         RecordSimplification();
       } else if (input_const->AsIntConstant()->IsZero()) {
-        // Replace (bool_value == false) with !bool_value
-        block->ReplaceAndRemoveInstructionWith(
-            equal, new (block->GetGraph()->GetArena()) HBooleanNot(input_value));
+        equal->ReplaceWith(GetGraph()->InsertOppositeCondition(input_value, equal));
+        block->RemoveInstruction(equal);
         RecordSimplification();
       } else {
         // Replace (bool_value == integer_not_zero_nor_one_constant) with false
@@ -399,9 +388,8 @@
       // We are comparing the boolean to a constant which is of type int and can
       // be any constant.
       if (input_const->AsIntConstant()->IsOne()) {
-        // Replace (bool_value != true) with !bool_value
-        block->ReplaceAndRemoveInstructionWith(
-            not_equal, new (block->GetGraph()->GetArena()) HBooleanNot(input_value));
+        not_equal->ReplaceWith(GetGraph()->InsertOppositeCondition(input_value, not_equal));
+        block->RemoveInstruction(not_equal);
         RecordSimplification();
       } else if (input_const->AsIntConstant()->IsZero()) {
         // Replace (bool_value != false) with bool_value
diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc
index eb79f46..54dd2cc 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.cc
+++ b/compiler/optimizing/instruction_simplifier_arm64.cc
@@ -62,6 +62,67 @@
   RecordSimplification();
 }
 
+bool InstructionSimplifierArm64Visitor::TrySimpleMultiplyAccumulatePatterns(
+    HMul* mul, HBinaryOperation* input_binop, HInstruction* input_other) {
+  DCHECK(Primitive::IsIntOrLongType(mul->GetType()));
+  DCHECK(input_binop->IsAdd() || input_binop->IsSub());
+  DCHECK_NE(input_binop, input_other);
+  if (!input_binop->HasOnlyOneNonEnvironmentUse()) {
+    return false;
+  }
+
+  // Try to interpret patterns like
+  //    a * (b <+/-> 1)
+  // as
+  //    (a * b) <+/-> a
+  HInstruction* input_a = input_other;
+  HInstruction* input_b = nullptr;  // Set to a non-null value if we found a pattern to optimize.
+  HInstruction::InstructionKind op_kind;
+
+  if (input_binop->IsAdd()) {
+    if ((input_binop->GetConstantRight() != nullptr) && input_binop->GetConstantRight()->IsOne()) {
+      // Interpret
+      //    a * (b + 1)
+      // as
+      //    (a * b) + a
+      input_b = input_binop->GetLeastConstantLeft();
+      op_kind = HInstruction::kAdd;
+    }
+  } else {
+    DCHECK(input_binop->IsSub());
+    if (input_binop->GetRight()->IsConstant() &&
+        input_binop->GetRight()->AsConstant()->IsMinusOne()) {
+      // Interpret
+      //    a * (b - (-1))
+      // as
+      //    a + (a * b)
+      input_b = input_binop->GetLeft();
+      op_kind = HInstruction::kAdd;
+    } else if (input_binop->GetLeft()->IsConstant() &&
+               input_binop->GetLeft()->AsConstant()->IsOne()) {
+      // Interpret
+      //    a * (1 - b)
+      // as
+      //    a - (a * b)
+      input_b = input_binop->GetRight();
+      op_kind = HInstruction::kSub;
+    }
+  }
+
+  if (input_b == nullptr) {
+    // We did not find a pattern we can optimize.
+    return false;
+  }
+
+  HArm64MultiplyAccumulate* mulacc = new(GetGraph()->GetArena()) HArm64MultiplyAccumulate(
+      mul->GetType(), op_kind, input_a, input_a, input_b, mul->GetDexPc());
+
+  mul->GetBlock()->ReplaceAndRemoveInstructionWith(mul, mulacc);
+  input_binop->GetBlock()->RemoveInstruction(input_binop);
+
+  return false;
+}
+
 void InstructionSimplifierArm64Visitor::VisitArrayGet(HArrayGet* instruction) {
   TryExtractArrayAccessAddress(instruction,
                                instruction->GetArray(),
@@ -76,5 +137,78 @@
                                Primitive::ComponentSize(instruction->GetComponentType()));
 }
 
+void InstructionSimplifierArm64Visitor::VisitMul(HMul* instruction) {
+  Primitive::Type type = instruction->GetType();
+  if (!Primitive::IsIntOrLongType(type)) {
+    return;
+  }
+
+  HInstruction* use = instruction->HasNonEnvironmentUses()
+      ? instruction->GetUses().GetFirst()->GetUser()
+      : nullptr;
+
+  if (instruction->HasOnlyOneNonEnvironmentUse() && (use->IsAdd() || use->IsSub())) {
+    // Replace code looking like
+    //    MUL tmp, x, y
+    //    SUB dst, acc, tmp
+    // with
+    //    MULSUB dst, acc, x, y
+    // Note that we do not want to (unconditionally) perform the merge when the
+    // multiplication has multiple uses and it can be merged in all of them.
+    // Multiple uses could happen on the same control-flow path, and we would
+    // then increase the amount of work. In the future we could try to evaluate
+    // whether all uses are on different control-flow paths (using dominance and
+    // reverse-dominance information) and only perform the merge when they are.
+    HInstruction* accumulator = nullptr;
+    HBinaryOperation* binop = use->AsBinaryOperation();
+    HInstruction* binop_left = binop->GetLeft();
+    HInstruction* binop_right = binop->GetRight();
+    // Be careful after GVN. This should not happen since the `HMul` has only
+    // one use.
+    DCHECK_NE(binop_left, binop_right);
+    if (binop_right == instruction) {
+      accumulator = binop_left;
+    } else if (use->IsAdd()) {
+      DCHECK_EQ(binop_left, instruction);
+      accumulator = binop_right;
+    }
+
+    if (accumulator != nullptr) {
+      HArm64MultiplyAccumulate* mulacc =
+          new (GetGraph()->GetArena()) HArm64MultiplyAccumulate(type,
+                                                                binop->GetKind(),
+                                                                accumulator,
+                                                                instruction->GetLeft(),
+                                                                instruction->GetRight());
+
+      binop->GetBlock()->ReplaceAndRemoveInstructionWith(binop, mulacc);
+      DCHECK(!instruction->HasUses());
+      instruction->GetBlock()->RemoveInstruction(instruction);
+      RecordSimplification();
+      return;
+    }
+  }
+
+  // Use multiply accumulate instruction for a few simple patterns.
+  // We prefer not applying the following transformations if the left and
+  // right inputs perform the same operation.
+  // We rely on GVN having squashed the inputs if appropriate. However the
+  // results are still correct even if that did not happen.
+  if (instruction->GetLeft() == instruction->GetRight()) {
+    return;
+  }
+
+  HInstruction* left = instruction->GetLeft();
+  HInstruction* right = instruction->GetRight();
+  if ((right->IsAdd() || right->IsSub()) &&
+      TrySimpleMultiplyAccumulatePatterns(instruction, right->AsBinaryOperation(), left)) {
+    return;
+  }
+  if ((left->IsAdd() || left->IsSub()) &&
+      TrySimpleMultiplyAccumulatePatterns(instruction, left->AsBinaryOperation(), right)) {
+    return;
+  }
+}
+
 }  // namespace arm64
 }  // namespace art
diff --git a/compiler/optimizing/instruction_simplifier_arm64.h b/compiler/optimizing/instruction_simplifier_arm64.h
index 4b697db..eed2276 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.h
+++ b/compiler/optimizing/instruction_simplifier_arm64.h
@@ -40,8 +40,14 @@
                                     HInstruction* index,
                                     int access_size);
 
+  bool TrySimpleMultiplyAccumulatePatterns(HMul* mul,
+                                           HBinaryOperation* input_binop,
+                                           HInstruction* input_other);
+
+  // HInstruction visitors, sorted alphabetically.
   void VisitArrayGet(HArrayGet* instruction) OVERRIDE;
   void VisitArraySet(HArraySet* instruction) OVERRIDE;
+  void VisitMul(HMul* instruction) OVERRIDE;
 
   OptimizingCompilerStats* stats_;
 };
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 0a39ff3..890598d 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -2090,4 +2090,46 @@
   env_uses_.Clear();
 }
 
+// Returns an instruction with the opposite boolean value from 'cond'.
+HInstruction* HGraph::InsertOppositeCondition(HInstruction* cond, HInstruction* cursor) {
+  ArenaAllocator* allocator = GetArena();
+
+  if (cond->IsCondition() &&
+      !Primitive::IsFloatingPointType(cond->InputAt(0)->GetType())) {
+    // Can't reverse floating point conditions.  We have to use HBooleanNot in that case.
+    HInstruction* lhs = cond->InputAt(0);
+    HInstruction* rhs = cond->InputAt(1);
+    HInstruction* replacement = nullptr;
+    switch (cond->AsCondition()->GetOppositeCondition()) {  // get *opposite*
+      case kCondEQ: replacement = new (allocator) HEqual(lhs, rhs); break;
+      case kCondNE: replacement = new (allocator) HNotEqual(lhs, rhs); break;
+      case kCondLT: replacement = new (allocator) HLessThan(lhs, rhs); break;
+      case kCondLE: replacement = new (allocator) HLessThanOrEqual(lhs, rhs); break;
+      case kCondGT: replacement = new (allocator) HGreaterThan(lhs, rhs); break;
+      case kCondGE: replacement = new (allocator) HGreaterThanOrEqual(lhs, rhs); break;
+      case kCondB:  replacement = new (allocator) HBelow(lhs, rhs); break;
+      case kCondBE: replacement = new (allocator) HBelowOrEqual(lhs, rhs); break;
+      case kCondA:  replacement = new (allocator) HAbove(lhs, rhs); break;
+      case kCondAE: replacement = new (allocator) HAboveOrEqual(lhs, rhs); break;
+      default:
+        LOG(FATAL) << "Unexpected condition";
+        UNREACHABLE();
+    }
+    cursor->GetBlock()->InsertInstructionBefore(replacement, cursor);
+    return replacement;
+  } else if (cond->IsIntConstant()) {
+    HIntConstant* int_const = cond->AsIntConstant();
+    if (int_const->IsZero()) {
+      return GetIntConstant(1);
+    } else {
+      DCHECK(int_const->IsOne());
+      return GetIntConstant(0);
+    }
+  } else {
+    HInstruction* replacement = new (allocator) HBooleanNot(cond);
+    cursor->GetBlock()->InsertInstructionBefore(replacement, cursor);
+    return replacement;
+  }
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 4f894b0..1bd626f 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -371,6 +371,11 @@
   bool HasTryCatch() const { return has_try_catch_; }
   void SetHasTryCatch(bool value) { has_try_catch_ = value; }
 
+  // Returns an instruction with the opposite boolean value from 'cond'.
+  // The instruction has been inserted into the graph, either as a constant, or
+  // before cursor.
+  HInstruction* InsertOppositeCondition(HInstruction* cond, HInstruction* cursor);
+
  private:
   void FindBackEdges(ArenaBitVector* visited);
   void RemoveInstructionsAsUsersFromDeadBlocks(const ArenaBitVector& visited) const;
@@ -1096,7 +1101,8 @@
 #define FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M)
 #else
 #define FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M)                          \
-  M(Arm64IntermediateAddress, Instruction)
+  M(Arm64IntermediateAddress, Instruction)                              \
+  M(Arm64MultiplyAccumulate, Instruction)
 #endif
 
 #define FOR_EACH_CONCRETE_INSTRUCTION_MIPS(M)
@@ -1626,6 +1632,11 @@
     return holder_;
   }
 
+
+  bool IsFromInlinedInvoke() const {
+    return GetParent() != nullptr;
+  }
+
  private:
   // Record instructions' use entries of this environment for constant-time removal.
   // It should only be called by HInstruction when a new environment use is added.
@@ -3238,7 +3249,7 @@
   void SetIntrinsic(Intrinsics intrinsic, IntrinsicNeedsEnvironmentOrCache needs_env_or_cache);
 
   bool IsFromInlinedInvoke() const {
-    return GetEnvironment()->GetParent() != nullptr;
+    return GetEnvironment()->IsFromInlinedInvoke();
   }
 
   bool CanThrow() const OVERRIDE { return true; }
@@ -3652,9 +3663,10 @@
   DISALLOW_COPY_AND_ASSIGN(HInvokeInterface);
 };
 
-class HNewInstance : public HExpression<1> {
+class HNewInstance : public HExpression<2> {
  public:
-  HNewInstance(HCurrentMethod* current_method,
+  HNewInstance(HInstruction* cls,
+               HCurrentMethod* current_method,
                uint32_t dex_pc,
                uint16_t type_index,
                const DexFile& dex_file,
@@ -3667,7 +3679,8 @@
         can_throw_(can_throw),
         finalizable_(finalizable),
         entrypoint_(entrypoint) {
-    SetRawInputAt(0, current_method);
+    SetRawInputAt(0, cls);
+    SetRawInputAt(1, current_method);
   }
 
   uint16_t GetTypeIndex() const { return type_index_; }
@@ -3687,6 +3700,10 @@
 
   QuickEntrypointEnum GetEntrypoint() const { return entrypoint_; }
 
+  void SetEntrypoint(QuickEntrypointEnum entrypoint) {
+    entrypoint_ = entrypoint;
+  }
+
   DECLARE_INSTRUCTION(NewInstance);
 
  private:
@@ -3694,7 +3711,7 @@
   const DexFile& dex_file_;
   const bool can_throw_;
   const bool finalizable_;
-  const QuickEntrypointEnum entrypoint_;
+  QuickEntrypointEnum entrypoint_;
 
   DISALLOW_COPY_AND_ASSIGN(HNewInstance);
 };
@@ -4302,9 +4319,13 @@
       : HInstruction(SideEffects::None(), dex_pc),
         inputs_(number_of_inputs, arena->Adapter(kArenaAllocPhiInputs)),
         reg_number_(reg_number),
-        type_(type),
-        is_live_(false),
+        type_(ToPhiType(type)),
+        // Phis are constructed live and marked dead if conflicting or unused.
+        // Individual steps of SsaBuilder should assume that if a phi has been
+        // marked dead, it can be ignored and will be removed by SsaPhiElimination.
+        is_live_(true),
         can_be_null_(true) {
+    DCHECK_NE(type_, Primitive::kPrimVoid);
   }
 
   // Returns a type equivalent to the given `type`, but that a `HPhi` can hold.
@@ -4927,6 +4948,7 @@
     return true;
   }
 
+  bool CanThrow() const OVERRIDE { return true; }
 
   HLoadClass* GetLoadClass() const { return InputAt(0)->AsLoadClass(); }
 
diff --git a/compiler/optimizing/nodes_arm64.h b/compiler/optimizing/nodes_arm64.h
index 885d3a2..d07f019 100644
--- a/compiler/optimizing/nodes_arm64.h
+++ b/compiler/optimizing/nodes_arm64.h
@@ -42,6 +42,40 @@
   DISALLOW_COPY_AND_ASSIGN(HArm64IntermediateAddress);
 };
 
+class HArm64MultiplyAccumulate : public HExpression<3> {
+ public:
+  HArm64MultiplyAccumulate(Primitive::Type type,
+                           InstructionKind op,
+                           HInstruction* accumulator,
+                           HInstruction* mul_left,
+                           HInstruction* mul_right,
+                           uint32_t dex_pc = kNoDexPc)
+      : HExpression(type, SideEffects::None(), dex_pc), op_kind_(op) {
+    SetRawInputAt(kInputAccumulatorIndex, accumulator);
+    SetRawInputAt(kInputMulLeftIndex, mul_left);
+    SetRawInputAt(kInputMulRightIndex, mul_right);
+  }
+
+  static constexpr int kInputAccumulatorIndex = 0;
+  static constexpr int kInputMulLeftIndex = 1;
+  static constexpr int kInputMulRightIndex = 2;
+
+  bool CanBeMoved() const OVERRIDE { return true; }
+  bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
+    return op_kind_ == other->AsArm64MultiplyAccumulate()->op_kind_;
+  }
+
+  InstructionKind GetOpKind() const { return op_kind_; }
+
+  DECLARE_INSTRUCTION(Arm64MultiplyAccumulate);
+
+ private:
+  // Indicates if this is a MADD or MSUB.
+  InstructionKind op_kind_;
+
+  DISALLOW_COPY_AND_ASSIGN(HArm64MultiplyAccumulate);
+};
+
 }  // namespace art
 
 #endif  // ART_COMPILER_OPTIMIZING_NODES_ARM64_H_
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 2204921..dec08d8 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -110,24 +110,23 @@
 class PassObserver : public ValueObject {
  public:
   PassObserver(HGraph* graph,
-               const char* method_name,
                CodeGenerator* codegen,
                std::ostream* visualizer_output,
                CompilerDriver* compiler_driver)
       : graph_(graph),
-        method_name_(method_name),
+        cached_method_name_(),
         timing_logger_enabled_(compiler_driver->GetDumpPasses()),
-        timing_logger_(method_name, true, true),
+        timing_logger_(timing_logger_enabled_ ? GetMethodName() : "", true, true),
         disasm_info_(graph->GetArena()),
         visualizer_enabled_(!compiler_driver->GetDumpCfgFileName().empty()),
         visualizer_(visualizer_output, graph, *codegen),
         graph_in_bad_state_(false) {
     if (timing_logger_enabled_ || visualizer_enabled_) {
-      if (!IsVerboseMethod(compiler_driver, method_name)) {
+      if (!IsVerboseMethod(compiler_driver, GetMethodName())) {
         timing_logger_enabled_ = visualizer_enabled_ = false;
       }
       if (visualizer_enabled_) {
-        visualizer_.PrintHeader(method_name_);
+        visualizer_.PrintHeader(GetMethodName());
         codegen->SetDisassemblyInformation(&disasm_info_);
       }
     }
@@ -135,7 +134,7 @@
 
   ~PassObserver() {
     if (timing_logger_enabled_) {
-      LOG(INFO) << "TIMINGS " << method_name_;
+      LOG(INFO) << "TIMINGS " << GetMethodName();
       LOG(INFO) << Dumpable<TimingLogger>(timing_logger_);
     }
   }
@@ -148,6 +147,14 @@
 
   void SetGraphInBadState() { graph_in_bad_state_ = true; }
 
+  const char* GetMethodName() {
+    // PrettyMethod() is expensive, so we delay calling it until we actually have to.
+    if (cached_method_name_.empty()) {
+      cached_method_name_ = PrettyMethod(graph_->GetMethodIdx(), graph_->GetDexFile());
+    }
+    return cached_method_name_.c_str();
+  }
+
  private:
   void StartPass(const char* pass_name) {
     // Dump graph first, then start timer.
@@ -206,7 +213,8 @@
   }
 
   HGraph* const graph_;
-  const char* method_name_;
+
+  std::string cached_method_name_;
 
   bool timing_logger_enabled_;
   TimingLogger timing_logger_;
@@ -664,7 +672,6 @@
                                               jobject class_loader,
                                               const DexFile& dex_file,
                                               Handle<mirror::DexCache> dex_cache) const {
-  std::string method_name = PrettyMethod(method_idx, dex_file);
   MaybeRecordStat(MethodCompilationStat::kAttemptCompilation);
   CompilerDriver* compiler_driver = GetCompilerDriver();
   InstructionSet instruction_set = compiler_driver->GetInstructionSet();
@@ -728,7 +735,6 @@
       compiler_driver->GetCompilerOptions().GetGenerateDebugInfo());
 
   PassObserver pass_observer(graph,
-                             method_name.c_str(),
                              codegen.get(),
                              visualizer_output_.get(),
                              compiler_driver);
@@ -756,7 +762,7 @@
                         interpreter_metadata,
                         dex_cache);
 
-  VLOG(compiler) << "Building " << method_name;
+  VLOG(compiler) << "Building " << pass_observer.GetMethodName();
 
   {
     PassScope scope(HGraphBuilder::kBuilderPassName, &pass_observer);
@@ -766,13 +772,14 @@
     }
   }
 
-  VLOG(compiler) << "Optimizing " << method_name;
+  VLOG(compiler) << "Optimizing " << pass_observer.GetMethodName();
   if (run_optimizations_) {
     {
       PassScope scope(SsaBuilder::kSsaBuilderPassName, &pass_observer);
       if (!graph->TryBuildingSsa()) {
         // We could not transform the graph to SSA, bailout.
-        LOG(INFO) << "Skipping compilation of " << method_name << ": it contains a non natural loop";
+        LOG(INFO) << "Skipping compilation of " << pass_observer.GetMethodName()
+            << ": it contains a non natural loop";
         MaybeRecordStat(MethodCompilationStat::kNotCompiledCannotBuildSSA);
         pass_observer.SetGraphInBadState();
         return nullptr;
diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc
index f3d075c..d1770b7 100644
--- a/compiler/optimizing/prepare_for_register_allocation.cc
+++ b/compiler/optimizing/prepare_for_register_allocation.cc
@@ -48,22 +48,34 @@
 }
 
 void PrepareForRegisterAllocation::VisitClinitCheck(HClinitCheck* check) {
-  // Try to find a static invoke from which this check originated.
-  HInvokeStaticOrDirect* invoke = nullptr;
+  // Try to find a static invoke or a new-instance from which this check originated.
+  HInstruction* implicit_clinit = nullptr;
   for (HUseIterator<HInstruction*> it(check->GetUses()); !it.Done(); it.Advance()) {
     HInstruction* user = it.Current()->GetUser();
-    if (user->IsInvokeStaticOrDirect() && CanMoveClinitCheck(check, user)) {
-      invoke = user->AsInvokeStaticOrDirect();
-      DCHECK(invoke->IsStaticWithExplicitClinitCheck());
-      invoke->RemoveExplicitClinitCheck(HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit);
+    if ((user->IsInvokeStaticOrDirect() || user->IsNewInstance()) &&
+        CanMoveClinitCheck(check, user)) {
+      implicit_clinit = user;
+      if (user->IsInvokeStaticOrDirect()) {
+        DCHECK(user->AsInvokeStaticOrDirect()->IsStaticWithExplicitClinitCheck());
+        user->AsInvokeStaticOrDirect()->RemoveExplicitClinitCheck(
+            HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit);
+      } else {
+        DCHECK(user->IsNewInstance());
+        // We delegate the initialization duty to the allocation.
+        if (user->AsNewInstance()->GetEntrypoint() == kQuickAllocObjectInitialized) {
+          user->AsNewInstance()->SetEntrypoint(kQuickAllocObjectResolved);
+        }
+      }
       break;
     }
   }
-  // If we found a static invoke for merging, remove the check from all other static invokes.
-  if (invoke != nullptr) {
+  // If we found a static invoke or new-instance for merging, remove the check
+  // from dominated static invokes.
+  if (implicit_clinit != nullptr) {
     for (HUseIterator<HInstruction*> it(check->GetUses()); !it.Done(); ) {
       HInstruction* user = it.Current()->GetUser();
-      DCHECK(invoke->StrictlyDominates(user));  // All other uses must be dominated.
+      // All other uses must be dominated.
+      DCHECK(implicit_clinit->StrictlyDominates(user) || (implicit_clinit == user));
       it.Advance();  // Advance before we remove the node, reference to the next node is preserved.
       if (user->IsInvokeStaticOrDirect()) {
         user->AsInvokeStaticOrDirect()->RemoveExplicitClinitCheck(
@@ -77,8 +89,8 @@
 
   check->ReplaceWith(load_class);
 
-  if (invoke != nullptr) {
-    // Remove the check from the graph. It has been merged into the invoke.
+  if (implicit_clinit != nullptr) {
+    // Remove the check from the graph. It has been merged into the invoke or new-instance.
     check->GetBlock()->RemoveInstruction(check);
     // Check if we can merge the load class as well.
     if (can_merge_with_load_class && !load_class->HasUses()) {
@@ -92,6 +104,29 @@
   }
 }
 
+void PrepareForRegisterAllocation::VisitNewInstance(HNewInstance* instruction) {
+  HLoadClass* load_class = instruction->InputAt(0)->AsLoadClass();
+  bool has_only_one_use = load_class->HasOnlyOneNonEnvironmentUse();
+  // Change the entrypoint to kQuickAllocObject if either:
+  // - the class is finalizable (only kQuickAllocObject handles finalizable classes),
+  // - the class needs access checks (we do not know if it's finalizable),
+  // - or the load class has only one use.
+  if (instruction->IsFinalizable() || has_only_one_use || load_class->NeedsAccessCheck()) {
+    instruction->SetEntrypoint(kQuickAllocObject);
+    instruction->ReplaceInput(GetGraph()->GetIntConstant(load_class->GetTypeIndex()), 0);
+    // The allocation entry point that deals with access checks does not work with inlined
+    // methods, so we need to check whether this allocation comes from an inlined method.
+    if (has_only_one_use && !instruction->GetEnvironment()->IsFromInlinedInvoke()) {
+      // We can remove the load class from the graph. If it needed access checks, we delegate
+      // the access check to the allocation.
+      if (load_class->NeedsAccessCheck()) {
+        instruction->SetEntrypoint(kQuickAllocObjectWithAccessCheck);
+      }
+      load_class->GetBlock()->RemoveInstruction(load_class);
+    }
+  }
+}
+
 void PrepareForRegisterAllocation::VisitCondition(HCondition* condition) {
   bool needs_materialization = false;
   if (!condition->GetUses().HasOnlyOneUse() || !condition->GetEnvUses().IsEmpty()) {
diff --git a/compiler/optimizing/prepare_for_register_allocation.h b/compiler/optimizing/prepare_for_register_allocation.h
index a70fb30..9b24342 100644
--- a/compiler/optimizing/prepare_for_register_allocation.h
+++ b/compiler/optimizing/prepare_for_register_allocation.h
@@ -40,6 +40,7 @@
   void VisitClinitCheck(HClinitCheck* check) OVERRIDE;
   void VisitCondition(HCondition* condition) OVERRIDE;
   void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE;
+  void VisitNewInstance(HNewInstance* instruction) OVERRIDE;
 
   bool CanMoveClinitCheck(HInstruction* input, HInstruction* user);
 
diff --git a/compiler/optimizing/primitive_type_propagation.cc b/compiler/optimizing/primitive_type_propagation.cc
index c98f43e..bde54ee 100644
--- a/compiler/optimizing/primitive_type_propagation.cc
+++ b/compiler/optimizing/primitive_type_propagation.cc
@@ -63,7 +63,6 @@
             : SsaBuilder::GetFloatOrDoubleEquivalent(phi, input, new_type);
         phi->ReplaceInput(equivalent, i);
         if (equivalent->IsPhi()) {
-          equivalent->AsPhi()->SetLive();
           AddToWorklist(equivalent->AsPhi());
         } else if (equivalent == input) {
           // The input has changed its type. It can be an input of other phis,
diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc
index 5190eb3..9e6cfbe 100644
--- a/compiler/optimizing/ssa_builder.cc
+++ b/compiler/optimizing/ssa_builder.cc
@@ -22,6 +22,13 @@
 
 namespace art {
 
+// Returns whether this is a loop header phi which was eagerly created but later
+// found inconsistent due to the vreg being undefined in one of its predecessors.
+// Such phi is marked dead and should be ignored until its removal in SsaPhiElimination.
+static bool IsUndefinedLoopHeaderPhi(HPhi* phi) {
+  return phi->IsLoopHeaderPhi() && phi->InputCount() != phi->GetBlock()->GetPredecessors().size();
+}
+
 /**
  * A debuggable application may require to reviving phis, to ensure their
  * associated DEX register is available to a debugger. This class implements
@@ -165,17 +172,15 @@
 void DeadPhiHandling::VisitBasicBlock(HBasicBlock* block) {
   for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
     HPhi* phi = it.Current()->AsPhi();
+    if (IsUndefinedLoopHeaderPhi(phi)) {
+      DCHECK(phi->IsDead());
+      continue;
+    }
     if (phi->IsDead() && phi->HasEnvironmentUses()) {
       phi->SetLive();
       if (block->IsLoopHeader()) {
-        // Give a type to the loop phi to guarantee convergence of the algorithm.
-        // Note that the dead phi may already have a type if it is an equivalent
-        // generated for a typed LoadLocal. In that case we do not change the
-        // type because it could lead to an unsupported PrimNot/Float/Double ->
-        // PrimInt/Long transition and create same type equivalents.
-        if (phi->GetType() == Primitive::kPrimVoid) {
-          phi->SetType(phi->InputAt(0)->GetType());
-        }
+        // Loop phis must have a type to guarantee convergence of the algorithm.
+        DCHECK_NE(phi->GetType(), Primitive::kPrimVoid);
         AddToWorklist(phi);
       } else {
         // Because we are doing a reverse post order visit, all inputs of
@@ -220,6 +225,27 @@
   ProcessWorklist();
 }
 
+void SsaBuilder::SetLoopHeaderPhiInputs() {
+  for (size_t i = loop_headers_.size(); i > 0; --i) {
+    HBasicBlock* block = loop_headers_[i - 1];
+    for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
+      HPhi* phi = it.Current()->AsPhi();
+      size_t vreg = phi->GetRegNumber();
+      for (HBasicBlock* predecessor : block->GetPredecessors()) {
+        HInstruction* value = ValueOfLocal(predecessor, vreg);
+        if (value == nullptr) {
+          // Vreg is undefined at this predecessor. Mark it dead and leave with
+          // fewer inputs than predecessors. SsaChecker will fail if not removed.
+          phi->SetDead();
+          break;
+        } else {
+          phi->AddInput(value);
+        }
+      }
+    }
+  }
+}
+
 void SsaBuilder::FixNullConstantType() {
   // The order doesn't matter here.
   for (HReversePostOrderIterator itb(*GetGraph()); !itb.Done(); itb.Advance()) {
@@ -283,15 +309,7 @@
   }
 
   // 2) Set inputs of loop phis.
-  for (HBasicBlock* block : loop_headers_) {
-    for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
-      HPhi* phi = it.Current()->AsPhi();
-      for (HBasicBlock* predecessor : block->GetPredecessors()) {
-        HInstruction* input = ValueOfLocal(predecessor, phi->GetRegNumber());
-        phi->AddInput(input);
-      }
-    }
-  }
+  SetLoopHeaderPhiInputs();
 
   // 3) Mark dead phis. This will mark phis that are only used by environments:
   // at the DEX level, the type of these phis does not need to be consistent, but
@@ -403,8 +421,13 @@
       for (size_t i = 0; i < vregs; ++i) {
         // No point in creating the catch phi if it is already undefined at
         // the first throwing instruction.
-        if ((*current_locals_)[i] != nullptr) {
-          HPhi* phi = new (arena) HPhi(arena, i, 0, Primitive::kPrimVoid);
+        HInstruction* current_local_value = (*current_locals_)[i];
+        if (current_local_value != nullptr) {
+          HPhi* phi = new (arena) HPhi(
+              arena,
+              i,
+              0,
+              current_local_value->GetType());
           block->AddPhi(phi);
           (*locals)[i] = phi;
         }
@@ -451,7 +474,10 @@
       HInstruction* incoming = ValueOfLocal(block->GetLoopInformation()->GetPreHeader(), local);
       if (incoming != nullptr) {
         HPhi* phi = new (GetGraph()->GetArena()) HPhi(
-            GetGraph()->GetArena(), local, 0, Primitive::kPrimVoid);
+            GetGraph()->GetArena(),
+            local,
+            0,
+            incoming->GetType());
         block->AddPhi(phi);
         (*current_locals_)[local] = phi;
       }
@@ -484,8 +510,12 @@
       }
 
       if (is_different) {
+        HInstruction* first_input = ValueOfLocal(block->GetPredecessors()[0], local);
         HPhi* phi = new (GetGraph()->GetArena()) HPhi(
-            GetGraph()->GetArena(), local, block->GetPredecessors().size(), Primitive::kPrimVoid);
+            GetGraph()->GetArena(),
+            local,
+            block->GetPredecessors().size(),
+            first_input->GetType());
         for (size_t i = 0; i < block->GetPredecessors().size(); i++) {
           HInstruction* pred_value = ValueOfLocal(block->GetPredecessors()[i], local);
           phi->SetRawInputAt(i, pred_value);
@@ -583,8 +613,16 @@
     phi->GetBlock()->InsertPhiAfter(new_phi, phi);
     return new_phi;
   } else {
-    DCHECK_EQ(next->GetType(), type);
-    return next->AsPhi();
+    HPhi* next_phi = next->AsPhi();
+    DCHECK_EQ(next_phi->GetType(), type);
+    if (next_phi->IsDead()) {
+      // TODO(dbrazdil): Remove this SetLive (we should not need to revive phis)
+      // once we stop running MarkDeadPhis before PrimitiveTypePropagation. This
+      // cannot revive undefined loop header phis because they cannot have uses.
+      DCHECK(!IsUndefinedLoopHeaderPhi(next_phi));
+      next_phi->SetLive();
+    }
+    return next_phi;
   }
 }
 
@@ -638,7 +676,36 @@
 }
 
 void SsaBuilder::VisitStoreLocal(HStoreLocal* store) {
-  (*current_locals_)[store->GetLocal()->GetRegNumber()] = store->InputAt(1);
+  uint32_t reg_number = store->GetLocal()->GetRegNumber();
+  HInstruction* stored_value = store->InputAt(1);
+  Primitive::Type stored_type = stored_value->GetType();
+  DCHECK_NE(stored_type, Primitive::kPrimVoid);
+
+  // Storing into vreg `reg_number` may implicitly invalidate the surrounding
+  // registers. Consider the following cases:
+  // (1) Storing a wide value must overwrite previous values in both `reg_number`
+  //     and `reg_number+1`. We store `nullptr` in `reg_number+1`.
+  // (2) If vreg `reg_number-1` holds a wide value, writing into `reg_number`
+  //     must invalidate it. We store `nullptr` in `reg_number-1`.
+  // Consequently, storing a wide value into the high vreg of another wide value
+  // will invalidate both `reg_number-1` and `reg_number+1`.
+
+  if (reg_number != 0) {
+    HInstruction* local_low = (*current_locals_)[reg_number - 1];
+    if (local_low != nullptr && Primitive::Is64BitType(local_low->GetType())) {
+      // The vreg we are storing into was previously the high vreg of a pair.
+      // We need to invalidate its low vreg.
+      DCHECK((*current_locals_)[reg_number] == nullptr);
+      (*current_locals_)[reg_number - 1] = nullptr;
+    }
+  }
+
+  (*current_locals_)[reg_number] = stored_value;
+  if (Primitive::Is64BitType(stored_type)) {
+    // We are storing a pair. Invalidate the instruction in the high vreg.
+    (*current_locals_)[reg_number + 1] = nullptr;
+  }
+
   store->GetBlock()->RemoveInstruction(store);
 }
 
diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h
index 79f1a28..dcce5e4 100644
--- a/compiler/optimizing/ssa_builder.h
+++ b/compiler/optimizing/ssa_builder.h
@@ -81,6 +81,7 @@
   static constexpr const char* kSsaBuilderPassName = "ssa_builder";
 
  private:
+  void SetLoopHeaderPhiInputs();
   void FixNullConstantType();
   void EquivalentPhisCleanup();
 
diff --git a/compiler/optimizing/ssa_phi_elimination.cc b/compiler/optimizing/ssa_phi_elimination.cc
index 72f9ddd..a3219dc 100644
--- a/compiler/optimizing/ssa_phi_elimination.cc
+++ b/compiler/optimizing/ssa_phi_elimination.cc
@@ -16,6 +16,8 @@
 
 #include "ssa_phi_elimination.h"
 
+#include "base/arena_containers.h"
+
 namespace art {
 
 void SsaDeadPhiElimination::Run() {
@@ -24,22 +26,36 @@
 }
 
 void SsaDeadPhiElimination::MarkDeadPhis() {
+  // Phis are constructed live and should not be revived if previously marked
+  // dead. This algorithm temporarily breaks that invariant but we DCHECK that
+  // only phis which were initially live are revived.
+  ArenaSet<HPhi*> initially_live(graph_->GetArena()->Adapter());
+
   // Add to the worklist phis referenced by non-phi instructions.
   for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
     HBasicBlock* block = it.Current();
     for (HInstructionIterator inst_it(block->GetPhis()); !inst_it.Done(); inst_it.Advance()) {
       HPhi* phi = inst_it.Current()->AsPhi();
-      // Set dead ahead of running through uses. The phi may have no use.
-      phi->SetDead();
+      if (phi->IsDead()) {
+        continue;
+      }
+
+      bool has_non_phi_use = false;
       for (HUseIterator<HInstruction*> use_it(phi->GetUses()); !use_it.Done(); use_it.Advance()) {
-        HUseListNode<HInstruction*>* current = use_it.Current();
-        HInstruction* user = current->GetUser();
-        if (!user->IsPhi()) {
-          worklist_.push_back(phi);
-          phi->SetLive();
+        if (!use_it.Current()->GetUser()->IsPhi()) {
+          has_non_phi_use = true;
           break;
         }
       }
+
+      if (has_non_phi_use) {
+        worklist_.push_back(phi);
+      } else {
+        phi->SetDead();
+        if (kIsDebugBuild) {
+          initially_live.insert(phi);
+        }
+      }
     }
   }
 
@@ -48,10 +64,13 @@
     HPhi* phi = worklist_.back();
     worklist_.pop_back();
     for (HInputIterator it(phi); !it.Done(); it.Advance()) {
-      HInstruction* input = it.Current();
-      if (input->IsPhi() && input->AsPhi()->IsDead()) {
-        worklist_.push_back(input->AsPhi());
-        input->AsPhi()->SetLive();
+      HPhi* input = it.Current()->AsPhi();
+      if (input != nullptr && input->IsDead()) {
+        // Input is a dead phi. Revive it and add to the worklist. We make sure
+        // that the phi was not dead initially (see definition of `initially_live`).
+        DCHECK(ContainsElement(initially_live, input));
+        input->SetLive();
+        worklist_.push_back(input);
       }
     }
   }
@@ -118,7 +137,6 @@
     }
 
     if (phi->InputCount() == 0) {
-      DCHECK(phi->IsCatchPhi());
       DCHECK(phi->IsDead());
       continue;
     }
diff --git a/compiler/utils/arm/assembler_arm.cc b/compiler/utils/arm/assembler_arm.cc
index 68e3956..dead8fd 100644
--- a/compiler/utils/arm/assembler_arm.cc
+++ b/compiler/utils/arm/assembler_arm.cc
@@ -342,9 +342,9 @@
       return IsAbsoluteUint<12>(offset);
     case kLoadSWord:
     case kLoadDWord:
-      return IsAbsoluteUint<10>(offset);  // VFP addressing mode.
+      return IsAbsoluteUint<10>(offset) && (offset & 3) == 0;  // VFP addressing mode.
     case kLoadWordPair:
-      return IsAbsoluteUint<10>(offset);
+      return IsAbsoluteUint<10>(offset) && (offset & 3) == 0;
     default:
       LOG(FATAL) << "UNREACHABLE";
       UNREACHABLE();
@@ -360,9 +360,9 @@
       return IsAbsoluteUint<12>(offset);
     case kStoreSWord:
     case kStoreDWord:
-      return IsAbsoluteUint<10>(offset);  // VFP addressing mode.
+      return IsAbsoluteUint<10>(offset) && (offset & 3) == 0;  // VFP addressing mode.
     case kStoreWordPair:
-      return IsAbsoluteUint<10>(offset);
+      return IsAbsoluteUint<10>(offset) && (offset & 3) == 0;
     default:
       LOG(FATAL) << "UNREACHABLE";
       UNREACHABLE();
diff --git a/compiler/utils/arm/assembler_arm32.h b/compiler/utils/arm/assembler_arm32.h
index 5233dcb..ce3a872 100644
--- a/compiler/utils/arm/assembler_arm32.h
+++ b/compiler/utils/arm/assembler_arm32.h
@@ -389,8 +389,6 @@
   void EmitBranch(Condition cond, Label* label, bool link);
   static int32_t EncodeBranchOffset(int offset, int32_t inst);
   static int DecodeBranchOffset(int32_t inst);
-  int32_t EncodeTstOffset(int offset, int32_t inst);
-  int DecodeTstOffset(int32_t inst);
   bool ShifterOperandCanHoldArm32(uint32_t immediate, ShifterOperand* shifter_op);
 };
 
diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc
index 297cc54..7ad5b44 100644
--- a/compiler/utils/arm/assembler_thumb2.cc
+++ b/compiler/utils/arm/assembler_thumb2.cc
@@ -1349,7 +1349,8 @@
   int32_t encoding = 0;
   if (so.IsImmediate()) {
     // Check special cases.
-    if ((opcode == SUB || opcode == ADD) && (so.GetImmediate() < (1u << 12))) {
+    if ((opcode == SUB || opcode == ADD) && (so.GetImmediate() < (1u << 12)) &&
+        /* Prefer T3 encoding to T4. */ !ShifterOperandCanAlwaysHold(so.GetImmediate())) {
       if (set_cc != kCcSet) {
         if (opcode == SUB) {
           thumb_opcode = 5U;
@@ -3220,7 +3221,7 @@
 
 void Thumb2Assembler::Rrx(Register rd, Register rm, Condition cond, SetCc set_cc) {
   CheckCondition(cond);
-  EmitShift(rd, rm, RRX, rm, cond, set_cc);
+  EmitShift(rd, rm, RRX, 0, cond, set_cc);
 }
 
 
@@ -3469,6 +3470,73 @@
   }
 }
 
+int32_t Thumb2Assembler::GetAllowedLoadOffsetBits(LoadOperandType type) {
+  switch (type) {
+    case kLoadSignedByte:
+    case kLoadSignedHalfword:
+    case kLoadUnsignedHalfword:
+    case kLoadUnsignedByte:
+    case kLoadWord:
+      // We can encode imm12 offset.
+      return 0xfffu;
+    case kLoadSWord:
+    case kLoadDWord:
+    case kLoadWordPair:
+      // We can encode imm8:'00' offset.
+      return 0xff << 2;
+    default:
+      LOG(FATAL) << "UNREACHABLE";
+      UNREACHABLE();
+  }
+}
+
+int32_t Thumb2Assembler::GetAllowedStoreOffsetBits(StoreOperandType type) {
+  switch (type) {
+    case kStoreHalfword:
+    case kStoreByte:
+    case kStoreWord:
+      // We can encode imm12 offset.
+      return 0xfff;
+    case kStoreSWord:
+    case kStoreDWord:
+    case kStoreWordPair:
+      // We can encode imm8:'00' offset.
+      return 0xff << 2;
+    default:
+      LOG(FATAL) << "UNREACHABLE";
+      UNREACHABLE();
+  }
+}
+
+bool Thumb2Assembler::CanSplitLoadStoreOffset(int32_t allowed_offset_bits,
+                                              int32_t offset,
+                                              /*out*/ int32_t* add_to_base,
+                                              /*out*/ int32_t* offset_for_load_store) {
+  int32_t other_bits = offset & ~allowed_offset_bits;
+  if (ShifterOperandCanAlwaysHold(other_bits) || ShifterOperandCanAlwaysHold(-other_bits)) {
+    *add_to_base = offset & ~allowed_offset_bits;
+    *offset_for_load_store = offset & allowed_offset_bits;
+    return true;
+  }
+  return false;
+}
+
+int32_t Thumb2Assembler::AdjustLoadStoreOffset(int32_t allowed_offset_bits,
+                                               Register temp,
+                                               Register base,
+                                               int32_t offset,
+                                               Condition cond) {
+  DCHECK_NE(offset & ~allowed_offset_bits, 0);
+  int32_t add_to_base, offset_for_load;
+  if (CanSplitLoadStoreOffset(allowed_offset_bits, offset, &add_to_base, &offset_for_load)) {
+    AddConstant(temp, base, add_to_base, cond, kCcKeep);
+    return offset_for_load;
+  } else {
+    LoadImmediate(temp, offset, cond);
+    add(temp, temp, ShifterOperand(base), cond, kCcKeep);
+    return 0;
+  }
+}
 
 // Implementation note: this method must emit at most one instruction when
 // Address::CanHoldLoadOffsetThumb.
@@ -3479,12 +3547,26 @@
                                      Condition cond) {
   if (!Address::CanHoldLoadOffsetThumb(type, offset)) {
     CHECK_NE(base, IP);
-    LoadImmediate(IP, offset, cond);
-    add(IP, IP, ShifterOperand(base), cond);
-    base = IP;
-    offset = 0;
+    // Inlined AdjustLoadStoreOffset() allows us to pull a few more tricks.
+    int32_t allowed_offset_bits = GetAllowedLoadOffsetBits(type);
+    DCHECK_NE(offset & ~allowed_offset_bits, 0);
+    int32_t add_to_base, offset_for_load;
+    if (CanSplitLoadStoreOffset(allowed_offset_bits, offset, &add_to_base, &offset_for_load)) {
+      // Use reg for the adjusted base. If it's low reg, we may end up using 16-bit load.
+      AddConstant(reg, base, add_to_base, cond, kCcKeep);
+      base = reg;
+      offset = offset_for_load;
+    } else {
+      Register temp = (reg == base) ? IP : reg;
+      LoadImmediate(temp, offset, cond);
+      // TODO: Implement indexed load (not available for LDRD) and use it here to avoid the ADD.
+      // Use reg for the adjusted base. If it's low reg, we may end up using 16-bit load.
+      add(reg, reg, ShifterOperand((reg == base) ? IP : base), cond, kCcKeep);
+      base = reg;
+      offset = 0;
+    }
   }
-  CHECK(Address::CanHoldLoadOffsetThumb(type, offset));
+  DCHECK(Address::CanHoldLoadOffsetThumb(type, offset));
   switch (type) {
     case kLoadSignedByte:
       ldrsb(reg, Address(base, offset), cond);
@@ -3510,7 +3592,6 @@
   }
 }
 
-
 // Implementation note: this method must emit at most one instruction when
 // Address::CanHoldLoadOffsetThumb, as expected by JIT::GuardedLoadFromOffset.
 void Thumb2Assembler::LoadSFromOffset(SRegister reg,
@@ -3519,12 +3600,10 @@
                                       Condition cond) {
   if (!Address::CanHoldLoadOffsetThumb(kLoadSWord, offset)) {
     CHECK_NE(base, IP);
-    LoadImmediate(IP, offset, cond);
-    add(IP, IP, ShifterOperand(base), cond);
+    offset = AdjustLoadStoreOffset(GetAllowedLoadOffsetBits(kLoadSWord), IP, base, offset, cond);
     base = IP;
-    offset = 0;
   }
-  CHECK(Address::CanHoldLoadOffsetThumb(kLoadSWord, offset));
+  DCHECK(Address::CanHoldLoadOffsetThumb(kLoadSWord, offset));
   vldrs(reg, Address(base, offset), cond);
 }
 
@@ -3537,12 +3616,10 @@
                                       Condition cond) {
   if (!Address::CanHoldLoadOffsetThumb(kLoadDWord, offset)) {
     CHECK_NE(base, IP);
-    LoadImmediate(IP, offset, cond);
-    add(IP, IP, ShifterOperand(base), cond);
+    offset = AdjustLoadStoreOffset(GetAllowedLoadOffsetBits(kLoadDWord), IP, base, offset, cond);
     base = IP;
-    offset = 0;
   }
-  CHECK(Address::CanHoldLoadOffsetThumb(kLoadDWord, offset));
+  DCHECK(Address::CanHoldLoadOffsetThumb(kLoadDWord, offset));
   vldrd(reg, Address(base, offset), cond);
 }
 
@@ -3573,12 +3650,12 @@
         offset += kRegisterSize;
       }
     }
-    LoadImmediate(tmp_reg, offset, cond);
-    add(tmp_reg, tmp_reg, ShifterOperand(base), AL);
+    // TODO: Implement indexed store (not available for STRD), inline AdjustLoadStoreOffset()
+    // and in the "unsplittable" path get rid of the "add" by using the store indexed instead.
+    offset = AdjustLoadStoreOffset(GetAllowedStoreOffsetBits(type), tmp_reg, base, offset, cond);
     base = tmp_reg;
-    offset = 0;
   }
-  CHECK(Address::CanHoldStoreOffsetThumb(type, offset));
+  DCHECK(Address::CanHoldStoreOffsetThumb(type, offset));
   switch (type) {
     case kStoreByte:
       strb(reg, Address(base, offset), cond);
@@ -3611,12 +3688,10 @@
                                      Condition cond) {
   if (!Address::CanHoldStoreOffsetThumb(kStoreSWord, offset)) {
     CHECK_NE(base, IP);
-    LoadImmediate(IP, offset, cond);
-    add(IP, IP, ShifterOperand(base), cond);
+    offset = AdjustLoadStoreOffset(GetAllowedStoreOffsetBits(kStoreSWord), IP, base, offset, cond);
     base = IP;
-    offset = 0;
   }
-  CHECK(Address::CanHoldStoreOffsetThumb(kStoreSWord, offset));
+  DCHECK(Address::CanHoldStoreOffsetThumb(kStoreSWord, offset));
   vstrs(reg, Address(base, offset), cond);
 }
 
@@ -3629,12 +3704,10 @@
                                      Condition cond) {
   if (!Address::CanHoldStoreOffsetThumb(kStoreDWord, offset)) {
     CHECK_NE(base, IP);
-    LoadImmediate(IP, offset, cond);
-    add(IP, IP, ShifterOperand(base), cond);
+    offset = AdjustLoadStoreOffset(GetAllowedStoreOffsetBits(kStoreDWord), IP, base, offset, cond);
     base = IP;
-    offset = 0;
   }
-  CHECK(Address::CanHoldStoreOffsetThumb(kStoreDWord, offset));
+  DCHECK(Address::CanHoldStoreOffsetThumb(kStoreDWord, offset));
   vstrd(reg, Address(base, offset), cond);
 }
 
diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h
index e183613..9aeece8 100644
--- a/compiler/utils/arm/assembler_thumb2.h
+++ b/compiler/utils/arm/assembler_thumb2.h
@@ -729,13 +729,23 @@
   void EmitBranch(Condition cond, Label* label, bool link, bool x);
   static int32_t EncodeBranchOffset(int32_t offset, int32_t inst);
   static int DecodeBranchOffset(int32_t inst);
-  int32_t EncodeTstOffset(int offset, int32_t inst);
-  int DecodeTstOffset(int32_t inst);
   void EmitShift(Register rd, Register rm, Shift shift, uint8_t amount,
                  Condition cond = AL, SetCc set_cc = kCcDontCare);
   void EmitShift(Register rd, Register rn, Shift shift, Register rm,
                  Condition cond = AL, SetCc set_cc = kCcDontCare);
 
+  static int32_t GetAllowedLoadOffsetBits(LoadOperandType type);
+  static int32_t GetAllowedStoreOffsetBits(StoreOperandType type);
+  bool CanSplitLoadStoreOffset(int32_t allowed_offset_bits,
+                               int32_t offset,
+                               /*out*/ int32_t* add_to_base,
+                               /*out*/ int32_t* offset_for_load_store);
+  int32_t AdjustLoadStoreOffset(int32_t allowed_offset_bits,
+                                Register temp,
+                                Register base,
+                                int32_t offset,
+                                Condition cond);
+
   // Whether the assembler can relocate branches. If false, unresolved branches will be
   // emitted on 32bits.
   bool can_relocate_branches_;
diff --git a/compiler/utils/arm/assembler_thumb2_test.cc b/compiler/utils/arm/assembler_thumb2_test.cc
index cb4b20b..7b32b0f 100644
--- a/compiler/utils/arm/assembler_thumb2_test.cc
+++ b/compiler/utils/arm/assembler_thumb2_test.cc
@@ -243,7 +243,7 @@
 
   const char* expected =
       "subs r1, r0, #42\n"
-      "subw r1, r0, #42\n"
+      "sub.w r1, r0, #42\n"
       "subs r1, r0, r2, asr #31\n"
       "sub r1, r0, r2, asr #31\n";
   DriverStr(expected, "sub");
@@ -257,7 +257,7 @@
 
   const char* expected =
       "adds r1, r0, #42\n"
-      "addw r1, r0, #42\n"
+      "add.w r1, r0, #42\n"
       "adds r1, r0, r2, asr #31\n"
       "add r1, r0, r2, asr #31\n";
   DriverStr(expected, "add");
@@ -305,21 +305,18 @@
   __ StoreToOffset(type, arm::IP, arm::R5, offset);
 
   const char* expected =
-      "mov ip, #4096\n"       // LoadImmediate(ip, 4096)
-      "add ip, ip, sp\n"
+      "add.w ip, sp, #4096\n"   // AddConstant(ip, sp, 4096)
       "str r0, [ip, #0]\n"
 
-      "str r5, [sp, #-4]!\n"  // Push(r5)
-      "movw r5, #4100\n"      // LoadImmediate(r5, 4096 + kRegisterSize)
-      "add r5, r5, sp\n"
-      "str ip, [r5, #0]\n"
-      "ldr r5, [sp], #4\n"    // Pop(r5)
+      "str r5, [sp, #-4]!\n"    // Push(r5)
+      "add.w r5, sp, #4096\n"   // AddConstant(r5, 4100 & ~0xfff)
+      "str ip, [r5, #4]\n"      // StoreToOffset(type, ip, r5, 4100 & 0xfff)
+      "ldr r5, [sp], #4\n"      // Pop(r5)
 
-      "str r6, [sp, #-4]!\n"  // Push(r6)
-      "mov r6, #4096\n"       // LoadImmediate(r6, 4096)
-      "add r6, r6, r5\n"
-      "str ip, [r6, #0]\n"
-      "ldr r6, [sp], #4\n";   // Pop(r6)
+      "str r6, [sp, #-4]!\n"    // Push(r6)
+      "add.w r6, r5, #4096\n"   // AddConstant(r6, r5, 4096 & ~0xfff)
+      "str ip, [r6, #0]\n"      // StoreToOffset(type, ip, r6, 4096 & 0xfff)
+      "ldr r6, [sp], #4\n";     // Pop(r6)
   DriverStr(expected, "StoreWordToNonThumbOffset");
 }
 
@@ -360,20 +357,17 @@
   __ StoreToOffset(type, arm::R11, arm::R5, offset);
 
   const char* expected =
-      "mov ip, #1024\n"           // LoadImmediate(ip, 1024)
-      "add ip, ip, sp\n"
+      "add.w ip, sp, #1024\n"     // AddConstant(ip, sp, 1024)
       "strd r0, r1, [ip, #0]\n"
 
       "str r5, [sp, #-4]!\n"      // Push(r5)
-      "movw r5, #1028\n"          // LoadImmediate(r5, 1024 + kRegisterSize)
-      "add r5, r5, sp\n"
-      "strd r11, ip, [r5, #0]\n"
+      "add.w r5, sp, #1024\n"     // AddConstant(r5, sp, (1024 + kRegisterSize) & ~0x3fc)
+      "strd r11, ip, [r5, #4]\n"  // StoreToOffset(type, r11, sp, (1024 + kRegisterSize) & 0x3fc)
       "ldr r5, [sp], #4\n"        // Pop(r5)
 
       "str r6, [sp, #-4]!\n"      // Push(r6)
-      "mov r6, #1024\n"           // LoadImmediate(r6, 1024)
-      "add r6, r6, r5\n"
-      "strd r11, ip, [r6, #0]\n"
+      "add.w r6, r5, #1024\n"     // AddConstant(r6, r5, 1024 & ~0x3fc)
+      "strd r11, ip, [r6, #0]\n"  // StoreToOffset(type, r11, r6, 1024 & 0x3fc)
       "ldr r6, [sp], #4\n";       // Pop(r6)
   DriverStr(expected, "StoreWordPairToNonThumbOffset");
 }
diff --git a/compiler/utils/assembler_thumb_test.cc b/compiler/utils/assembler_thumb_test.cc
index 2ae8841..1de51a2 100644
--- a/compiler/utils/assembler_thumb_test.cc
+++ b/compiler/utils/assembler_thumb_test.cc
@@ -466,6 +466,38 @@
   EmitAndCheck(&assembler, "DataProcessingShiftedRegister");
 }
 
+TEST(Thumb2AssemblerTest, ShiftImmediate) {
+  // Note: This test produces the same results as DataProcessingShiftedRegister
+  // but it does so using shift functions instead of mov().
+  arm::Thumb2Assembler assembler;
+
+  // 16-bit variants.
+  __ Lsl(R3, R4, 4);
+  __ Lsr(R3, R4, 5);
+  __ Asr(R3, R4, 6);
+
+  // 32-bit ROR because ROR immediate doesn't have the same 16-bit version as other shifts.
+  __ Ror(R3, R4, 7);
+
+  // 32-bit RRX because RRX has no 16-bit version.
+  __ Rrx(R3, R4);
+
+  // 32 bit variants (not setting condition codes).
+  __ Lsl(R3, R4, 4, AL, kCcKeep);
+  __ Lsr(R3, R4, 5, AL, kCcKeep);
+  __ Asr(R3, R4, 6, AL, kCcKeep);
+  __ Ror(R3, R4, 7, AL, kCcKeep);
+  __ Rrx(R3, R4, AL, kCcKeep);
+
+  // 32 bit variants (high registers).
+  __ Lsls(R8, R4, 4);
+  __ Lsrs(R8, R4, 5);
+  __ Asrs(R8, R4, 6);
+  __ Rors(R8, R4, 7);
+  __ Rrxs(R8, R4);
+
+  EmitAndCheck(&assembler, "ShiftImmediate");
+}
 
 TEST(Thumb2AssemblerTest, BasicLoad) {
   arm::Thumb2Assembler assembler;
@@ -823,29 +855,80 @@
 
   __ add(R2, SP, ShifterOperand(0xf00));  // 32 bit due to imm size.
   __ add(SP, SP, ShifterOperand(0xf00));  // 32 bit due to imm size.
+  __ add(SP, SP, ShifterOperand(0xffc));  // 32 bit due to imm size; encoding T4.
 
-  __ sub(SP, SP, ShifterOperand(0x50));     // 16 bit
-  __ sub(R0, SP, ShifterOperand(0x50));     // 32 bit
-  __ sub(R8, SP, ShifterOperand(0x50));     // 32 bit.
+  __ sub(SP, SP, ShifterOperand(0x50));   // 16 bit
+  __ sub(R0, SP, ShifterOperand(0x50));   // 32 bit
+  __ sub(R8, SP, ShifterOperand(0x50));   // 32 bit.
 
-  __ sub(SP, SP, ShifterOperand(0xf00));   // 32 bit due to imm size
+  __ sub(SP, SP, ShifterOperand(0xf00));  // 32 bit due to imm size
+  __ sub(SP, SP, ShifterOperand(0xffc));  // 32 bit due to imm size; encoding T4.
 
   EmitAndCheck(&assembler, "SpecialAddSub");
 }
 
+TEST(Thumb2AssemblerTest, LoadFromOffset) {
+  arm::Thumb2Assembler assembler;
+
+  __ LoadFromOffset(kLoadWord, R2, R4, 12);
+  __ LoadFromOffset(kLoadWord, R2, R4, 0xfff);
+  __ LoadFromOffset(kLoadWord, R2, R4, 0x1000);
+  __ LoadFromOffset(kLoadWord, R2, R4, 0x1000a4);
+  __ LoadFromOffset(kLoadWord, R2, R4, 0x101000);
+  __ LoadFromOffset(kLoadWord, R4, R4, 0x101000);
+  __ LoadFromOffset(kLoadUnsignedHalfword, R2, R4, 12);
+  __ LoadFromOffset(kLoadUnsignedHalfword, R2, R4, 0xfff);
+  __ LoadFromOffset(kLoadUnsignedHalfword, R2, R4, 0x1000);
+  __ LoadFromOffset(kLoadUnsignedHalfword, R2, R4, 0x1000a4);
+  __ LoadFromOffset(kLoadUnsignedHalfword, R2, R4, 0x101000);
+  __ LoadFromOffset(kLoadUnsignedHalfword, R4, R4, 0x101000);
+  __ LoadFromOffset(kLoadWordPair, R2, R4, 12);
+  __ LoadFromOffset(kLoadWordPair, R2, R4, 0x3fc);
+  __ LoadFromOffset(kLoadWordPair, R2, R4, 0x400);
+  __ LoadFromOffset(kLoadWordPair, R2, R4, 0x400a4);
+  __ LoadFromOffset(kLoadWordPair, R2, R4, 0x40400);
+  __ LoadFromOffset(kLoadWordPair, R4, R4, 0x40400);
+
+  __ LoadFromOffset(kLoadWord, R0, R12, 12);  // 32-bit because of R12.
+  __ LoadFromOffset(kLoadWord, R2, R4, 0xa4 - 0x100000);
+
+  __ LoadFromOffset(kLoadSignedByte, R2, R4, 12);
+  __ LoadFromOffset(kLoadUnsignedByte, R2, R4, 12);
+  __ LoadFromOffset(kLoadSignedHalfword, R2, R4, 12);
+
+  EmitAndCheck(&assembler, "LoadFromOffset");
+}
+
 TEST(Thumb2AssemblerTest, StoreToOffset) {
   arm::Thumb2Assembler assembler;
 
-  __ StoreToOffset(kStoreWord, R2, R4, 12);     // Simple
-  __ StoreToOffset(kStoreWord, R2, R4, 0x2000);     // Offset too big.
-  __ StoreToOffset(kStoreWord, R0, R12, 12);
-  __ StoreToOffset(kStoreHalfword, R0, R12, 12);
-  __ StoreToOffset(kStoreByte, R2, R12, 12);
+  __ StoreToOffset(kStoreWord, R2, R4, 12);
+  __ StoreToOffset(kStoreWord, R2, R4, 0xfff);
+  __ StoreToOffset(kStoreWord, R2, R4, 0x1000);
+  __ StoreToOffset(kStoreWord, R2, R4, 0x1000a4);
+  __ StoreToOffset(kStoreWord, R2, R4, 0x101000);
+  __ StoreToOffset(kStoreWord, R4, R4, 0x101000);
+  __ StoreToOffset(kStoreHalfword, R2, R4, 12);
+  __ StoreToOffset(kStoreHalfword, R2, R4, 0xfff);
+  __ StoreToOffset(kStoreHalfword, R2, R4, 0x1000);
+  __ StoreToOffset(kStoreHalfword, R2, R4, 0x1000a4);
+  __ StoreToOffset(kStoreHalfword, R2, R4, 0x101000);
+  __ StoreToOffset(kStoreHalfword, R4, R4, 0x101000);
+  __ StoreToOffset(kStoreWordPair, R2, R4, 12);
+  __ StoreToOffset(kStoreWordPair, R2, R4, 0x3fc);
+  __ StoreToOffset(kStoreWordPair, R2, R4, 0x400);
+  __ StoreToOffset(kStoreWordPair, R2, R4, 0x400a4);
+  __ StoreToOffset(kStoreWordPair, R2, R4, 0x40400);
+  __ StoreToOffset(kStoreWordPair, R4, R4, 0x40400);
+
+  __ StoreToOffset(kStoreWord, R0, R12, 12);  // 32-bit because of R12.
+  __ StoreToOffset(kStoreWord, R2, R4, 0xa4 - 0x100000);
+
+  __ StoreToOffset(kStoreByte, R2, R4, 12);
 
   EmitAndCheck(&assembler, "StoreToOffset");
 }
 
-
 TEST(Thumb2AssemblerTest, IfThen) {
   arm::Thumb2Assembler assembler;
 
diff --git a/compiler/utils/assembler_thumb_test_expected.cc.inc b/compiler/utils/assembler_thumb_test_expected.cc.inc
index b79c2e4..9246c82 100644
--- a/compiler/utils/assembler_thumb_test_expected.cc.inc
+++ b/compiler/utils/assembler_thumb_test_expected.cc.inc
@@ -132,8 +132,8 @@
 const char* DataProcessingImmediateResults[] = {
   "   0:	2055      	movs	r0, #85	; 0x55\n",
   "   2:	f06f 0055 	mvn.w	r0, #85	; 0x55\n",
-  "   6:	f201 0055 	addw	r0, r1, #85	; 0x55\n",
-  "   a:	f2a1 0055 	subw	r0, r1, #85	; 0x55\n",
+  "   6:	f101 0055 	add.w	r0, r1, #85	; 0x55\n",
+  "   a:	f1a1 0055 	sub.w	r0, r1, #85	; 0x55\n",
   "   e:	f001 0055 	and.w	r0, r1, #85	; 0x55\n",
   "  12:	f041 0055 	orr.w	r0, r1, #85	; 0x55\n",
   "  16:	f061 0055 	orn	r0, r1, #85	; 0x55\n",
@@ -201,6 +201,24 @@
   "  32:	ea5f 0834 	movs.w	r8, r4, rrx\n",
   nullptr
 };
+const char* ShiftImmediateResults[] = {
+  "   0:  0123        lsls  r3, r4, #4\n",
+  "   2:  0963        lsrs  r3, r4, #5\n",
+  "   4:  11a3        asrs  r3, r4, #6\n",
+  "   6:  ea4f 13f4   mov.w  r3, r4, ror #7\n",
+  "   a:  ea4f 0334   mov.w  r3, r4, rrx\n",
+  "   e:  ea4f 1304   mov.w r3, r4, lsl #4\n",
+  "  12:  ea4f 1354   mov.w r3, r4, lsr #5\n",
+  "  16:  ea4f 13a4   mov.w r3, r4, asr #6\n",
+  "  1a:  ea4f 13f4   mov.w r3, r4, ror #7\n",
+  "  1e:  ea4f 0334   mov.w r3, r4, rrx\n",
+  "  22:  ea5f 1804   movs.w  r8, r4, lsl #4\n",
+  "  26:  ea5f 1854   movs.w  r8, r4, lsr #5\n",
+  "  2a:  ea5f 18a4   movs.w  r8, r4, asr #6\n",
+  "  2e:  ea5f 18f4   movs.w  r8, r4, ror #7\n",
+  "  32:  ea5f 0834   movs.w  r8, r4, rrx\n",
+  nullptr
+};
 const char* BasicLoadResults[] = {
   "   0:	69a3      	ldr	r3, [r4, #24]\n",
   "   2:	7e23      	ldrb	r3, [r4, #24]\n",
@@ -434,23 +452,115 @@
 const char* SpecialAddSubResults[] = {
   "   0:	aa14      	add	r2, sp, #80	; 0x50\n",
   "   2:	b014      	add	sp, #80		; 0x50\n",
-  "   4:	f20d 0850 	addw	r8, sp, #80	; 0x50\n",
-  "   8:	f60d 7200 	addw	r2, sp, #3840	; 0xf00\n",
-  "   c:	f60d 7d00 	addw	sp, sp, #3840	; 0xf00\n",
-  "  10:	b094      	sub	sp, #80		; 0x50\n",
-  "  12:	f2ad 0050 	subw	r0, sp, #80	; 0x50\n",
-  "  16:	f2ad 0850 	subw	r8, sp, #80	; 0x50\n",
-  "  1a:	f6ad 7d00 	subw	sp, sp, #3840	; 0xf00\n",
+  "   4:	f10d 0850 	add.w	r8, sp, #80	; 0x50\n",
+  "   8:	f50d 6270 	add.w	r2, sp, #3840	; 0xf00\n",
+  "   c:	f50d 6d70 	add.w	sp, sp, #3840	; 0xf00\n",
+  "  10:	f60d 7dfc 	addw	sp, sp, #4092	; 0xffc\n",
+  "  14:	b094      	sub	sp, #80		; 0x50\n",
+  "  16:	f1ad 0050 	sub.w	r0, sp, #80	; 0x50\n",
+  "  1a:	f1ad 0850 	sub.w	r8, sp, #80	; 0x50\n",
+  "  1e:	f5ad 6d70 	sub.w	sp, sp, #3840	; 0xf00\n",
+  "  22:	f6ad 7dfc 	subw	sp, sp, #4092	; 0xffc\n",
+  nullptr
+};
+const char* LoadFromOffsetResults[] = {
+  "   0:	68e2      	ldr	r2, [r4, #12]\n",
+  "   2:	f8d4 2fff 	ldr.w	r2, [r4, #4095]	; 0xfff\n",
+  "   6:	f504 5280 	add.w	r2, r4, #4096	; 0x1000\n",
+  "   a:	6812      	ldr	r2, [r2, #0]\n",
+  "   c:	f504 1280 	add.w	r2, r4, #1048576	; 0x100000\n",
+  "  10:	f8d2 20a4 	ldr.w	r2, [r2, #164]	; 0xa4\n",
+  "  14:	f241 0200 	movw	r2, #4096	; 0x1000\n",
+  "  18:	f2c0 0210 	movt	r2, #16\n",
+  "  1c:	4422      	add	r2, r4\n",
+  "  1e:	6812      	ldr	r2, [r2, #0]\n",
+  "  20:	f241 0c00 	movw	ip, #4096	; 0x1000\n",
+  "  24:	f2c0 0c10 	movt	ip, #16\n",
+  "  28:	4464      	add	r4, ip\n",
+  "  2a:	6824      	ldr	r4, [r4, #0]\n",
+  "  2c:	89a2      	ldrh	r2, [r4, #12]\n",
+  "  2e:	f8b4 2fff 	ldrh.w	r2, [r4, #4095]	; 0xfff\n",
+  "  32:	f504 5280 	add.w	r2, r4, #4096	; 0x1000\n",
+  "  36:	8812      	ldrh	r2, [r2, #0]\n",
+  "  38:	f504 1280 	add.w	r2, r4, #1048576	; 0x100000\n",
+  "  3c:	f8b2 20a4 	ldrh.w	r2, [r2, #164]	; 0xa4\n",
+  "  40:	f241 0200 	movw	r2, #4096	; 0x1000\n",
+  "  44:	f2c0 0210 	movt	r2, #16\n",
+  "  48:	4422      	add	r2, r4\n",
+  "  4a:	8812      	ldrh	r2, [r2, #0]\n",
+  "  4c:	f241 0c00 	movw	ip, #4096	; 0x1000\n",
+  "  50:	f2c0 0c10 	movt	ip, #16\n",
+  "  54:	4464      	add	r4, ip\n",
+  "  56:	8824      	ldrh	r4, [r4, #0]\n",
+  "  58:	e9d4 2303 	ldrd	r2, r3, [r4, #12]\n",
+  "  5c:	e9d4 23ff 	ldrd	r2, r3, [r4, #1020]	; 0x3fc\n",
+  "  60:	f504 6280 	add.w	r2, r4, #1024	; 0x400\n",
+  "  64:	e9d2 2300 	ldrd	r2, r3, [r2]\n",
+  "  68:	f504 2280 	add.w	r2, r4, #262144	; 0x40000\n",
+  "  6c:	e9d2 2329 	ldrd	r2, r3, [r2, #164];	0xa4\n",
+  "  70:	f240 4200 	movw	r2, #1024	; 0x400\n",
+  "  74:	f2c0 0204 	movt	r2, #4\n",
+  "  78:	4422      	add	r2, r4\n",
+  "  7a:	e9d2 2300 	ldrd	r2, r3, [r2]\n",
+  "  7e:	f240 4c00 	movw	ip, #1024	; 0x400\n",
+  "  82:	f2c0 0c04 	movt	ip, #4\n",
+  "  86:	4464      	add	r4, ip\n",
+  "  88:	e9d4 4500 	ldrd	r4, r5, [r4]\n",
+  "  8c:	f8dc 000c 	ldr.w	r0, [ip, #12]\n",
+  "  90:	f5a4 1280 	sub.w	r2, r4, #1048576	; 0x100000\n",
+  "  94:	f8d2 20a4 	ldr.w	r2, [r2, #164]	; 0xa4\n",
+  "  98:	f994 200c 	ldrsb.w	r2, [r4, #12]\n",
+  "  9c:	7b22      	ldrb	r2, [r4, #12]\n",
+  "  9e:	f9b4 200c 	ldrsh.w	r2, [r4, #12]\n",
   nullptr
 };
 const char* StoreToOffsetResults[] = {
   "   0:	60e2      	str	r2, [r4, #12]\n",
-  "   2:	f44f 5c00 	mov.w	ip, #8192	; 0x2000\n",
-  "   6:	44a4      	add	ip, r4\n",
-  "   8:	f8cc 2000 	str.w	r2, [ip]\n",
-  "   c:	f8cc 000c 	str.w	r0, [ip, #12]\n",
-  "   10:	f8ac 000c 	strh.w	r0, [ip, #12]\n",
-  "   14:	f88c 200c 	strb.w	r2, [ip, #12]\n",
+  "   2:	f8c4 2fff 	str.w	r2, [r4, #4095]	; 0xfff\n",
+  "   6:	f504 5c80 	add.w	ip, r4, #4096	; 0x1000\n",
+  "   a:	f8cc 2000 	str.w	r2, [ip]\n",
+  "   e:	f504 1c80 	add.w	ip, r4, #1048576	; 0x100000\n",
+  "  12:	f8cc 20a4 	str.w	r2, [ip, #164]	; 0xa4\n",
+  "  16:	f241 0c00 	movw	ip, #4096	; 0x1000\n",
+  "  1a:	f2c0 0c10 	movt	ip, #16\n",
+  "  1e:	44a4      	add	ip, r4\n",
+  "  20:	f8cc 2000 	str.w	r2, [ip]\n",
+  "  24:	f241 0c00 	movw	ip, #4096	; 0x1000\n",
+  "  28:	f2c0 0c10 	movt	ip, #16\n",
+  "  2c:	44a4      	add	ip, r4\n",
+  "  2e:	f8cc 4000 	str.w	r4, [ip]\n",
+  "  32:	81a2      	strh	r2, [r4, #12]\n",
+  "  34:	f8a4 2fff 	strh.w	r2, [r4, #4095]	; 0xfff\n",
+  "  38:	f504 5c80 	add.w	ip, r4, #4096	; 0x1000\n",
+  "  3c:	f8ac 2000 	strh.w	r2, [ip]\n",
+  "  40:	f504 1c80 	add.w	ip, r4, #1048576	; 0x100000\n",
+  "  44:	f8ac 20a4 	strh.w	r2, [ip, #164]	; 0xa4\n",
+  "  48:	f241 0c00 	movw	ip, #4096	; 0x1000\n",
+  "  4c:	f2c0 0c10 	movt	ip, #16\n",
+  "  50:	44a4      	add	ip, r4\n",
+  "  52:	f8ac 2000 	strh.w	r2, [ip]\n",
+  "  56:	f241 0c00 	movw	ip, #4096	; 0x1000\n",
+  "  5a:	f2c0 0c10 	movt	ip, #16\n",
+  "  5e:	44a4      	add	ip, r4\n",
+  "  60:	f8ac 4000 	strh.w	r4, [ip]\n",
+  "  64:	e9c4 2303 	strd	r2, r3, [r4, #12]\n",
+  "  68:	e9c4 23ff 	strd	r2, r3, [r4, #1020]	; 0x3fc\n",
+  "  6c:	f504 6c80 	add.w	ip, r4, #1024	; 0x400\n",
+  "  70:	e9cc 2300 	strd	r2, r3, [ip]\n",
+  "  74:	f504 2c80 	add.w	ip, r4, #262144	; 0x40000\n",
+  "  78:	e9cc 2329 	strd	r2, r3, [ip, #164];	0xa4\n",
+  "  7c:	f240 4c00 	movw	ip, #1024	; 0x400\n",
+  "  80:	f2c0 0c04 	movt	ip, #4\n",
+  "  84:	44a4      	add	ip, r4\n",
+  "  86:	e9cc 2300 	strd	r2, r3, [ip]\n",
+  "  8a:	f240 4c00 	movw	ip, #1024	; 0x400\n",
+  "  8e:	f2c0 0c04 	movt	ip, #4\n",
+  "  92:	44a4      	add	ip, r4\n",
+  "  94:	e9cc 4500 	strd	r4, r5, [ip]\n",
+  "  98:	f8cc 000c 	str.w	r0, [ip, #12]\n",
+  "  9c:	f5a4 1c80 	sub.w	ip, r4, #1048576	; 0x100000\n",
+  "  a0:	f8cc 20a4 	str.w	r2, [ip, #164]	; 0xa4\n",
+  "  a4:	7322      	strb	r2, [r4, #12]\n",
   nullptr
 };
 const char* IfThenResults[] = {
@@ -4952,6 +5062,7 @@
     test_results["DataProcessingModifiedImmediate"] = DataProcessingModifiedImmediateResults;
     test_results["DataProcessingModifiedImmediates"] = DataProcessingModifiedImmediatesResults;
     test_results["DataProcessingShiftedRegister"] = DataProcessingShiftedRegisterResults;
+    test_results["ShiftImmediate"] = ShiftImmediateResults;
     test_results["BasicLoad"] = BasicLoadResults;
     test_results["BasicStore"] = BasicStoreResults;
     test_results["ComplexLoad"] = ComplexLoadResults;
@@ -4966,6 +5077,7 @@
     test_results["StoreMultiple"] = StoreMultipleResults;
     test_results["MovWMovT"] = MovWMovTResults;
     test_results["SpecialAddSub"] = SpecialAddSubResults;
+    test_results["LoadFromOffset"] = LoadFromOffsetResults;
     test_results["StoreToOffset"] = StoreToOffsetResults;
     test_results["IfThen"] = IfThenResults;
     test_results["CbzCbnz"] = CbzCbnzResults;
diff --git a/test/458-checker-instruction-simplification/src/Main.java b/test/458-checker-instruction-simplification/src/Main.java
index d5fed2a..6151fc1 100644
--- a/test/458-checker-instruction-simplification/src/Main.java
+++ b/test/458-checker-instruction-simplification/src/Main.java
@@ -389,24 +389,6 @@
     return arg << 0;
   }
 
-  /// CHECK-START: int Main.Shl1(int) instruction_simplifier (before)
-  /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
-  /// CHECK-DAG:     <<Const1:i\d+>>   IntConstant 1
-  /// CHECK-DAG:     <<Shl:i\d+>>      Shl [<<Arg>>,<<Const1>>]
-  /// CHECK-DAG:                       Return [<<Shl>>]
-
-  /// CHECK-START: int Main.Shl1(int) instruction_simplifier (after)
-  /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
-  /// CHECK-DAG:     <<Add:i\d+>>      Add [<<Arg>>,<<Arg>>]
-  /// CHECK-DAG:                       Return [<<Add>>]
-
-  /// CHECK-START: int Main.Shl1(int) instruction_simplifier (after)
-  /// CHECK-NOT:                       Shl
-
-  public static int Shl1(int arg) {
-    return arg << 1;
-  }
-
   /// CHECK-START: long Main.Shr0(long) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
@@ -1245,7 +1227,6 @@
     return arg * 9;
   }
 
-
   /**
    * Test strength reduction of factors of the form (2^n - 1).
    */
@@ -1265,6 +1246,91 @@
     return arg * 31;
   }
 
+  /// CHECK-START: int Main.booleanFieldNotEqualOne() instruction_simplifier (before)
+  /// CHECK-DAG:      <<Const1:i\d+>>   IntConstant 1
+  /// CHECK-DAG:      <<Field:z\d+>>    StaticFieldGet
+  /// CHECK-DAG:      <<NE:z\d+>>       NotEqual [<<Field>>,<<Const1>>]
+  /// CHECK-DAG:                        If [<<NE>>]
+
+  /// CHECK-START: int Main.booleanFieldNotEqualOne() instruction_simplifier (after)
+  /// CHECK-DAG:      <<Field:z\d+>>    StaticFieldGet
+  /// CHECK-DAG:      <<Not:z\d+>>      BooleanNot [<<Field>>]
+  /// CHECK-DAG:                        If [<<Not>>]
+
+  public static int booleanFieldNotEqualOne() {
+    return (booleanField == true) ? 13 : 54;
+  }
+
+  /// CHECK-START: int Main.booleanFieldEqualZero() instruction_simplifier (before)
+  /// CHECK-DAG:      <<Const0:i\d+>>   IntConstant 0
+  /// CHECK-DAG:      <<Field:z\d+>>    StaticFieldGet
+  /// CHECK-DAG:      <<EQ:z\d+>>       Equal [<<Field>>,<<Const0>>]
+  /// CHECK-DAG:                        If [<<EQ>>]
+
+  /// CHECK-START: int Main.booleanFieldEqualZero() instruction_simplifier (after)
+  /// CHECK-DAG:      <<Field:z\d+>>    StaticFieldGet
+  /// CHECK-DAG:      <<Not:z\d+>>      BooleanNot [<<Field>>]
+  /// CHECK-DAG:                        If [<<Not>>]
+
+  public static int booleanFieldEqualZero() {
+    return (booleanField != false) ? 13 : 54;
+  }
+
+  /// CHECK-START: int Main.intConditionNotEqualOne(int) instruction_simplifier_after_bce (before)
+  /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Const1:i\d+>>   IntConstant 1
+  /// CHECK-DAG:      <<Const42:i\d+>>  IntConstant 42
+  /// CHECK-DAG:      <<GT:z\d+>>       GreaterThan [<<Arg>>,<<Const42>>]
+  /// CHECK-DAG:      <<NE:z\d+>>       NotEqual [<<GT>>,<<Const1>>]
+  /// CHECK-DAG:                        If [<<NE>>]
+
+  /// CHECK-START: int Main.intConditionNotEqualOne(int) instruction_simplifier_after_bce (after)
+  /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Const42:i\d+>>  IntConstant 42
+  /// CHECK-DAG:                        If [<<LE:z\d+>>]
+  /// CHECK-DAG:      <<LE>>            LessThanOrEqual [<<Arg>>,<<Const42>>]
+  // Note that we match `LE` from If because there are two identical LessThanOrEqual instructions.
+
+  public static int intConditionNotEqualOne(int i) {
+    return ((i > 42) == true) ? 13 : 54;
+  }
+
+  /// CHECK-START: int Main.intConditionEqualZero(int) instruction_simplifier_after_bce (before)
+  /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Const0:i\d+>>   IntConstant 0
+  /// CHECK-DAG:      <<Const42:i\d+>>  IntConstant 42
+  /// CHECK-DAG:      <<GT:z\d+>>       GreaterThan [<<Arg>>,<<Const42>>]
+  /// CHECK-DAG:      <<EQ:z\d+>>       Equal [<<GT>>,<<Const0>>]
+  /// CHECK-DAG:                        If [<<EQ>>]
+
+  /// CHECK-START: int Main.intConditionEqualZero(int) instruction_simplifier_after_bce (after)
+  /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Const42:i\d+>>  IntConstant 42
+  /// CHECK-DAG:                        If [<<LE:z\d+>>]
+  /// CHECK-DAG:      <<LE>>            LessThanOrEqual [<<Arg>>,<<Const42>>]
+  // Note that we match `LE` from If because there are two identical LessThanOrEqual instructions.
+
+  public static int intConditionEqualZero(int i) {
+    return ((i > 42) != false) ? 13 : 54;
+  }
+
+  // Test that conditions on float/double are not flipped.
+
+  /// CHECK-START: int Main.floatConditionNotEqualOne(float) register (before)
+  /// CHECK-DAG:      <<Const1:i\d+>>   IntConstant 1
+  /// CHECK-DAG:                        NotEqual [{{i\d+}},<<Const1>>]
+
+  public static int floatConditionNotEqualOne(float f) {
+    return ((f > 42.0f) == true) ? 13 : 54;
+  }
+
+  /// CHECK-START: int Main.doubleConditionEqualZero(double) register (before)
+  /// CHECK-DAG:      <<Const0:i\d+>>   IntConstant 0
+  /// CHECK-DAG:                        Equal [{{i\d+}},<<Const0>>]
+
+  public static int doubleConditionEqualZero(double d) {
+    return ((d > 42.0) != false) ? 13 : 54;
+  }
 
   public static void main(String[] args) {
     int arg = 123456;
@@ -1314,7 +1380,6 @@
     assertDoubleEquals(Div2(150.0), 75.0);
     assertFloatEquals(DivMP25(100.0f), -400.0f);
     assertDoubleEquals(DivMP25(150.0), -600.0);
-    assertLongEquals(Shl1(100), 200);
     assertIntEquals(UShr28And15(0xc1234567), 0xc);
     assertLongEquals(UShr60And15(0xc123456787654321L), 0xcL);
     assertIntEquals(UShr28And7(0xc1234567), 0x4);
@@ -1333,5 +1398,22 @@
     assertLongEquals(62, mulPow2Minus1(2));
     assertLongEquals(3100, mulPow2Minus1(100));
     assertLongEquals(382695, mulPow2Minus1(12345));
+
+    booleanField = false;
+    assertIntEquals(booleanFieldNotEqualOne(), 54);
+    assertIntEquals(booleanFieldEqualZero(), 54);
+    booleanField = true;
+    assertIntEquals(booleanFieldNotEqualOne(), 13);
+    assertIntEquals(booleanFieldEqualZero(), 13);
+    assertIntEquals(intConditionNotEqualOne(6), 54);
+    assertIntEquals(intConditionNotEqualOne(43), 13);
+    assertIntEquals(intConditionEqualZero(6), 54);
+    assertIntEquals(intConditionEqualZero(43), 13);
+    assertIntEquals(floatConditionNotEqualOne(6.0f), 54);
+    assertIntEquals(floatConditionNotEqualOne(43.0f), 13);
+    assertIntEquals(doubleConditionEqualZero(6.0), 54);
+    assertIntEquals(doubleConditionEqualZero(43.0), 13);
   }
+
+  public static boolean booleanField;
 }
diff --git a/test/530-checker-lse/src/Main.java b/test/530-checker-lse/src/Main.java
index 13c4722..17e88ce 100644
--- a/test/530-checker-lse/src/Main.java
+++ b/test/530-checker-lse/src/Main.java
@@ -136,6 +136,9 @@
 
   // A new allocation shouldn't alias with pre-existing values.
   static int test3(TestClass obj) {
+    // Do an allocation here to avoid the HLoadClass and HClinitCheck
+    // at the second allocation.
+    new TestClass();
     obj.i = 1;
     obj.next.j = 2;
     TestClass obj2 = new TestClass();
diff --git a/test/538-checker-embed-constants/src/Main.java b/test/538-checker-embed-constants/src/Main.java
index 12f0380..f791adf 100644
--- a/test/538-checker-embed-constants/src/Main.java
+++ b/test/538-checker-embed-constants/src/Main.java
@@ -260,26 +260,43 @@
     return arg ^ 0xf00000000000000fL;
   }
 
-  /// CHECK-START-ARM: long Main.shl2(long) disassembly (after)
-  /// CHECK:                lsl{{s?|.w}} <<oh:r\d+>>, {{r\d+}}, #2
-  /// CHECK:                orr.w <<oh>>, <<oh>>, <<low:r\d+>>, lsr #30
-  /// CHECK-DAG:            lsl{{s?|.w}} {{r\d+}}, <<low>>, #2
+  /// CHECK-START-ARM: long Main.shl1(long) disassembly (after)
+  /// CHECK:                lsls{{(\.w)?}} {{r\d+}}, {{r\d+}}, #1
+  /// CHECK:                adc{{(\.w)?}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+
+  /// CHECK-START-ARM: long Main.shl1(long) disassembly (after)
+  /// CHECK-NOT:            lsl{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+
+  /// CHECK-START-X86: long Main.shl1(long) disassembly (after)
+  /// CHECK:                add
+  /// CHECK:                adc
+
+  /// CHECK-START-X86: long Main.shl1(long) disassembly (after)
+  /// CHECK-NOT:            shl
+
+  public static long shl1(long arg) {
+    return arg << 1;
+  }
 
   /// CHECK-START-ARM: long Main.shl2(long) disassembly (after)
-  /// CHECK-NOT:            lsl{{s?|.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+  /// CHECK:                lsl{{s?|\.w}} <<oh:r\d+>>, {{r\d+}}, #2
+  /// CHECK:                orr.w <<oh>>, <<oh>>, <<low:r\d+>>, lsr #30
+  /// CHECK:                lsl{{s?|\.w}} {{r\d+}}, <<low>>, #2
+
+  /// CHECK-START-ARM: long Main.shl2(long) disassembly (after)
+  /// CHECK-NOT:            lsl{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
 
   public static long shl2(long arg) {
-    // Note: Shl(x, 1) is transformed to Add(x, x), so test Shl(x, 2).
     return arg << 2;
   }
 
   /// CHECK-START-ARM: long Main.shl31(long) disassembly (after)
-  /// CHECK:                lsl{{s?|.w}} <<oh:r\d+>>, {{r\d+}}, #31
+  /// CHECK:                lsl{{s?|\.w}} <<oh:r\d+>>, {{r\d+}}, #31
   /// CHECK:                orr.w <<oh>>, <<oh>>, <<low:r\d+>>, lsr #1
-  /// CHECK:                lsl{{s?|.w}} {{r\d+}}, <<low>>, #31
+  /// CHECK:                lsl{{s?|\.w}} {{r\d+}}, <<low>>, #31
 
   /// CHECK-START-ARM: long Main.shl31(long) disassembly (after)
-  /// CHECK-NOT:            lsl{{s?|.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+  /// CHECK-NOT:            lsl{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
 
   public static long shl31(long arg) {
     return arg << 31;
@@ -287,114 +304,136 @@
 
   /// CHECK-START-ARM: long Main.shl32(long) disassembly (after)
   /// CHECK-DAG:            mov {{r\d+}}, {{r\d+}}
-  /// CHECK-DAG:            mov{{s?|.w}} {{r\d+}}, #0
+  /// CHECK-DAG:            mov{{s?|\.w}} {{r\d+}}, #0
 
   /// CHECK-START-ARM: long Main.shl32(long) disassembly (after)
-  /// CHECK-NOT:            lsl{{s?|.w}}
+  /// CHECK-NOT:            lsl{{s?|\.w}}
 
   public static long shl32(long arg) {
     return arg << 32;
   }
 
   /// CHECK-START-ARM: long Main.shl33(long) disassembly (after)
-  /// CHECK-DAG:            lsl{{s?|.w}} {{r\d+}}, <<high:r\d+>>, #1
-  /// CHECK-DAG:            mov{{s?|.w}} {{r\d+}}, #0
+  /// CHECK-DAG:            lsl{{s?|\.w}} {{r\d+}}, <<high:r\d+>>, #1
+  /// CHECK-DAG:            mov{{s?|\.w}} {{r\d+}}, #0
 
   /// CHECK-START-ARM: long Main.shl33(long) disassembly (after)
-  /// CHECK-NOT:            lsl{{s?|.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+  /// CHECK-NOT:            lsl{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
 
   public static long shl33(long arg) {
     return arg << 33;
   }
 
   /// CHECK-START-ARM: long Main.shl63(long) disassembly (after)
-  /// CHECK-DAG:            lsl{{s?|.w}} {{r\d+}}, <<high:r\d+>>, #31
-  /// CHECK-DAG:            mov{{s?|.w}} {{r\d+}}, #0
+  /// CHECK-DAG:            lsl{{s?|\.w}} {{r\d+}}, <<high:r\d+>>, #31
+  /// CHECK-DAG:            mov{{s?|\.w}} {{r\d+}}, #0
 
   /// CHECK-START-ARM: long Main.shl63(long) disassembly (after)
-  /// CHECK-NOT:            lsl{{s?|.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+  /// CHECK-NOT:            lsl{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
 
   public static long shl63(long arg) {
     return arg << 63;
   }
 
   /// CHECK-START-ARM: long Main.shr1(long) disassembly (after)
-  /// CHECK:                lsr{{s?|.w}} <<ol:r\d+>>, {{r\d+}}, #1
-  /// CHECK:                orr.w <<ol>>, <<ol>>, <<high:r\d+>>, lsl #31
-  /// CHECK-DAG:            asr{{s?|.w}} {{r\d+}}, <<high>>, #1
+  /// CHECK:                asrs{{(\.w)?}} {{r\d+}}, {{r\d+}}, #1
+  /// CHECK:                mov.w {{r\d+}}, {{r\d+}}, rrx
 
   /// CHECK-START-ARM: long Main.shr1(long) disassembly (after)
-  /// CHECK-NOT:            asr{{s?|.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+  /// CHECK-NOT:            asr{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
 
   public static long shr1(long arg) {
     return arg >> 1;
   }
 
-  /// CHECK-START-ARM: long Main.shr31(long) disassembly (after)
-  /// CHECK:                lsr{{s?|.w}} <<ol:r\d+>>, {{r\d+}}, #31
-  /// CHECK:                orr.w <<ol>>, <<ol>>, <<high:r\d+>>, lsl #1
-  /// CHECK:                asr{{s?|.w}} {{r\d+}}, <<high>>, #31
+  /// CHECK-START-ARM: long Main.shr2(long) disassembly (after)
+  /// CHECK:                lsr{{s?|\.w}} <<ol:r\d+>>, {{r\d+}}, #2
+  /// CHECK:                orr.w <<ol>>, <<ol>>, <<high:r\d+>>, lsl #30
+  /// CHECK-DAG:            asr{{s?|\.w}} {{r\d+}}, <<high>>, #2
+
+  /// CHECK-START-ARM: long Main.shr2(long) disassembly (after)
+  /// CHECK-NOT:            asr{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+
+  public static long shr2(long arg) {
+    return arg >> 2;
+  }
 
   /// CHECK-START-ARM: long Main.shr31(long) disassembly (after)
-  /// CHECK-NOT:            asr{{s?|.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+  /// CHECK:                lsr{{s?|\.w}} <<ol:r\d+>>, {{r\d+}}, #31
+  /// CHECK:                orr.w <<ol>>, <<ol>>, <<high:r\d+>>, lsl #1
+  /// CHECK:                asr{{s?|\.w}} {{r\d+}}, <<high>>, #31
+
+  /// CHECK-START-ARM: long Main.shr31(long) disassembly (after)
+  /// CHECK-NOT:            asr{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
 
   public static long shr31(long arg) {
     return arg >> 31;
   }
 
   /// CHECK-START-ARM: long Main.shr32(long) disassembly (after)
-  /// CHECK-DAG:            asr{{s?|.w}} {{r\d+}}, <<high:r\d+>>, #31
+  /// CHECK-DAG:            asr{{s?|\.w}} {{r\d+}}, <<high:r\d+>>, #31
   /// CHECK-DAG:            mov {{r\d+}}, <<high>>
 
   /// CHECK-START-ARM: long Main.shr32(long) disassembly (after)
-  /// CHECK-NOT:            asr{{s?|.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
-  /// CHECK-NOT:            lsr{{s?|.w}}
+  /// CHECK-NOT:            asr{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+  /// CHECK-NOT:            lsr{{s?|\.w}}
 
   public static long shr32(long arg) {
     return arg >> 32;
   }
 
   /// CHECK-START-ARM: long Main.shr33(long) disassembly (after)
-  /// CHECK-DAG:            asr{{s?|.w}} {{r\d+}}, <<high:r\d+>>, #1
-  /// CHECK-DAG:            asr{{s?|.w}} {{r\d+}}, <<high>>, #31
+  /// CHECK-DAG:            asr{{s?|\.w}} {{r\d+}}, <<high:r\d+>>, #1
+  /// CHECK-DAG:            asr{{s?|\.w}} {{r\d+}}, <<high>>, #31
 
   /// CHECK-START-ARM: long Main.shr33(long) disassembly (after)
-  /// CHECK-NOT:            asr{{s?|.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+  /// CHECK-NOT:            asr{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
 
   public static long shr33(long arg) {
     return arg >> 33;
   }
 
   /// CHECK-START-ARM: long Main.shr63(long) disassembly (after)
-  /// CHECK-DAG:            asr{{s?|.w}} {{r\d+}}, <<high:r\d+>>, #31
-  /// CHECK-DAG:            asr{{s?|.w}} {{r\d+}}, <<high>>, #31
+  /// CHECK-DAG:            asr{{s?|\.w}} {{r\d+}}, <<high:r\d+>>, #31
+  /// CHECK-DAG:            asr{{s?|\.w}} {{r\d+}}, <<high>>, #31
 
   /// CHECK-START-ARM: long Main.shr63(long) disassembly (after)
-  /// CHECK-NOT:            asr{{s?|.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+  /// CHECK-NOT:            asr{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
 
   public static long shr63(long arg) {
     return arg >> 63;
   }
 
   /// CHECK-START-ARM: long Main.ushr1(long) disassembly (after)
-  /// CHECK:                lsr{{s?|.w}} <<ol:r\d+>>, {{r\d+}}, #1
-  /// CHECK:                orr.w <<ol>>, <<ol>>, <<high:r\d+>>, lsl #31
-  /// CHECK-DAG:            lsr{{s?|.w}} {{r\d+}}, <<high>>, #1
+  /// CHECK:                lsrs{{|.w}} {{r\d+}}, {{r\d+}}, #1
+  /// CHECK:                mov.w {{r\d+}}, {{r\d+}}, rrx
 
   /// CHECK-START-ARM: long Main.ushr1(long) disassembly (after)
-  /// CHECK-NOT:            lsr{{s?|.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+  /// CHECK-NOT:            lsr{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
 
   public static long ushr1(long arg) {
     return arg >>> 1;
   }
 
-  /// CHECK-START-ARM: long Main.ushr31(long) disassembly (after)
-  /// CHECK:                lsr{{s?|.w}} <<ol:r\d+>>, {{r\d+}}, #31
-  /// CHECK:                orr.w <<ol>>, <<ol>>, <<high:r\d+>>, lsl #1
-  /// CHECK:                lsr{{s?|.w}} {{r\d+}}, <<high>>, #31
+  /// CHECK-START-ARM: long Main.ushr2(long) disassembly (after)
+  /// CHECK:                lsr{{s?|\.w}} <<ol:r\d+>>, {{r\d+}}, #2
+  /// CHECK:                orr.w <<ol>>, <<ol>>, <<high:r\d+>>, lsl #30
+  /// CHECK-DAG:            lsr{{s?|\.w}} {{r\d+}}, <<high>>, #2
+
+  /// CHECK-START-ARM: long Main.ushr2(long) disassembly (after)
+  /// CHECK-NOT:            lsr{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+
+  public static long ushr2(long arg) {
+    return arg >>> 2;
+  }
 
   /// CHECK-START-ARM: long Main.ushr31(long) disassembly (after)
-  /// CHECK-NOT:            lsr{{s?|.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+  /// CHECK:                lsr{{s?|\.w}} <<ol:r\d+>>, {{r\d+}}, #31
+  /// CHECK:                orr.w <<ol>>, <<ol>>, <<high:r\d+>>, lsl #1
+  /// CHECK:                lsr{{s?|\.w}} {{r\d+}}, <<high>>, #31
+
+  /// CHECK-START-ARM: long Main.ushr31(long) disassembly (after)
+  /// CHECK-NOT:            lsr{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
 
   public static long ushr31(long arg) {
     return arg >>> 31;
@@ -402,32 +441,32 @@
 
   /// CHECK-START-ARM: long Main.ushr32(long) disassembly (after)
   /// CHECK-DAG:            mov {{r\d+}}, {{r\d+}}
-  /// CHECK-DAG:            mov{{s?|.w}} {{r\d+}}, #0
+  /// CHECK-DAG:            mov{{s?|\.w}} {{r\d+}}, #0
 
   /// CHECK-START-ARM: long Main.ushr32(long) disassembly (after)
-  /// CHECK-NOT:            lsr{{s?|.w}}
+  /// CHECK-NOT:            lsr{{s?|\.w}}
 
   public static long ushr32(long arg) {
     return arg >>> 32;
   }
 
   /// CHECK-START-ARM: long Main.ushr33(long) disassembly (after)
-  /// CHECK-DAG:            lsr{{s?|.w}} {{r\d+}}, {{r\d+}}, #1
-  /// CHECK-DAG:            mov{{s?|.w}} {{r\d+}}, #0
+  /// CHECK-DAG:            lsr{{s?|\.w}} {{r\d+}}, {{r\d+}}, #1
+  /// CHECK-DAG:            mov{{s?|\.w}} {{r\d+}}, #0
 
   /// CHECK-START-ARM: long Main.ushr33(long) disassembly (after)
-  /// CHECK-NOT:            lsr{{s?|.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+  /// CHECK-NOT:            lsr{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
 
   public static long ushr33(long arg) {
     return arg >>> 33;
   }
 
   /// CHECK-START-ARM: long Main.ushr63(long) disassembly (after)
-  /// CHECK-DAG:            lsr{{s?|.w}} {{r\d+}}, {{r\d+}}, #31
-  /// CHECK-DAG:            mov{{s?|.w}} {{r\d+}}, #0
+  /// CHECK-DAG:            lsr{{s?|\.w}} {{r\d+}}, {{r\d+}}, #31
+  /// CHECK-DAG:            mov{{s?|\.w}} {{r\d+}}, #0
 
   /// CHECK-START-ARM: long Main.ushr63(long) disassembly (after)
-  /// CHECK-NOT:            lsr{{s?|.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+  /// CHECK-NOT:            lsr{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
 
   public static long ushr63(long arg) {
     return arg >>> 63;
@@ -485,11 +524,13 @@
 
     assertLongEquals(14, addM1(7));
 
+    assertLongEquals(shl1(longArg), 0x2468acf10eca8642L);
     assertLongEquals(shl2(longArg), 0x48d159e21d950c84L);
     assertLongEquals(shl31(longArg), 0x43b2a19080000000L);
     assertLongEquals(shl32(longArg), 0x8765432100000000L);
     assertLongEquals(shl33(longArg), 0x0eca864200000000L);
     assertLongEquals(shl63(longArg), 0x8000000000000000L);
+    assertLongEquals(shl1(~longArg), 0xdb97530ef13579bcL);
     assertLongEquals(shl2(~longArg), 0xb72ea61de26af378L);
     assertLongEquals(shl31(~longArg), 0xbc4d5e6f00000000L);
     assertLongEquals(shl32(~longArg), 0x789abcde00000000L);
@@ -497,22 +538,26 @@
     assertLongEquals(shl63(~longArg), 0x0000000000000000L);
 
     assertLongEquals(shr1(longArg), 0x091a2b3c43b2a190L);
+    assertLongEquals(shr2(longArg), 0x048d159e21d950c8L);
     assertLongEquals(shr31(longArg), 0x000000002468acf1L);
     assertLongEquals(shr32(longArg), 0x0000000012345678L);
     assertLongEquals(shr33(longArg), 0x00000000091a2b3cL);
     assertLongEquals(shr63(longArg), 0x0000000000000000L);
     assertLongEquals(shr1(~longArg), 0xf6e5d4c3bc4d5e6fL);
+    assertLongEquals(shr2(~longArg), 0xfb72ea61de26af37L);
     assertLongEquals(shr31(~longArg), 0xffffffffdb97530eL);
     assertLongEquals(shr32(~longArg), 0xffffffffedcba987L);
     assertLongEquals(shr33(~longArg), 0xfffffffff6e5d4c3L);
     assertLongEquals(shr63(~longArg), 0xffffffffffffffffL);
 
     assertLongEquals(ushr1(longArg), 0x091a2b3c43b2a190L);
+    assertLongEquals(ushr2(longArg), 0x048d159e21d950c8L);
     assertLongEquals(ushr31(longArg), 0x000000002468acf1L);
     assertLongEquals(ushr32(longArg), 0x0000000012345678L);
     assertLongEquals(ushr33(longArg), 0x00000000091a2b3cL);
     assertLongEquals(ushr63(longArg), 0x0000000000000000L);
     assertLongEquals(ushr1(~longArg), 0x76e5d4c3bc4d5e6fL);
+    assertLongEquals(ushr2(~longArg), 0x3b72ea61de26af37L);
     assertLongEquals(ushr31(~longArg), 0x00000001db97530eL);
     assertLongEquals(ushr32(~longArg), 0x00000000edcba987L);
     assertLongEquals(ushr33(~longArg), 0x0000000076e5d4c3L);
diff --git a/test/550-checker-multiply-accumulate/expected.txt b/test/550-checker-multiply-accumulate/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/550-checker-multiply-accumulate/expected.txt
diff --git a/test/550-checker-multiply-accumulate/info.txt b/test/550-checker-multiply-accumulate/info.txt
new file mode 100644
index 0000000..10e998c
--- /dev/null
+++ b/test/550-checker-multiply-accumulate/info.txt
@@ -0,0 +1 @@
+Test the merging of instructions into the shifter operand on arm64.
diff --git a/test/550-checker-multiply-accumulate/src/Main.java b/test/550-checker-multiply-accumulate/src/Main.java
new file mode 100644
index 0000000..2d0688d
--- /dev/null
+++ b/test/550-checker-multiply-accumulate/src/Main.java
@@ -0,0 +1,234 @@
+/*
+* Copyright (C) 2015 The Android Open Source Project
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*      http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+public class Main {
+
+  // A dummy value to defeat inlining of these routines.
+  static boolean doThrow = false;
+
+  public static void assertIntEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static void assertLongEquals(long expected, long result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  /**
+   * Test basic merging of `MUL+ADD` into `MULADD`.
+   */
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$mulAdd(int, int, int) instruction_simplifier_arm64 (before)
+  /// CHECK:       <<Acc:i\d+>>         ParameterValue
+  /// CHECK:       <<Left:i\d+>>        ParameterValue
+  /// CHECK:       <<Right:i\d+>>       ParameterValue
+  /// CHECK:       <<Mul:i\d+>>         Mul [<<Left>>,<<Right>>]
+  /// CHECK:       <<Add:i\d+>>         Add [<<Acc>>,<<Mul>>]
+  /// CHECK:                            Return [<<Add>>]
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$mulAdd(int, int, int) instruction_simplifier_arm64 (after)
+  /// CHECK:       <<Acc:i\d+>>         ParameterValue
+  /// CHECK:       <<Left:i\d+>>        ParameterValue
+  /// CHECK:       <<Right:i\d+>>       ParameterValue
+  /// CHECK:       <<MulAdd:i\d+>>      Arm64MultiplyAccumulate [<<Acc>>,<<Left>>,<<Right>>] kind:Add
+  /// CHECK:                            Return [<<MulAdd>>]
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$mulAdd(int, int, int) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:                        Mul
+  /// CHECK-NOT:                        Add
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$mulAdd(int, int, int) disassembly (after)
+  /// CHECK:                            madd w{{\d+}}, w{{\d+}}, w{{\d+}}, w{{\d+}}
+
+  public static int $opt$noinline$mulAdd(int acc, int left, int right) {
+    if (doThrow) throw new Error();
+    return acc + left * right;
+  }
+
+  /**
+   * Test basic merging of `MUL+SUB` into `MULSUB`.
+   */
+
+  /// CHECK-START-ARM64: long Main.$opt$noinline$mulSub(long, long, long) instruction_simplifier_arm64 (before)
+  /// CHECK:       <<Acc:j\d+>>         ParameterValue
+  /// CHECK:       <<Left:j\d+>>        ParameterValue
+  /// CHECK:       <<Right:j\d+>>       ParameterValue
+  /// CHECK:       <<Mul:j\d+>>         Mul [<<Left>>,<<Right>>]
+  /// CHECK:       <<Sub:j\d+>>         Sub [<<Acc>>,<<Mul>>]
+  /// CHECK:                            Return [<<Sub>>]
+
+  /// CHECK-START-ARM64: long Main.$opt$noinline$mulSub(long, long, long) instruction_simplifier_arm64 (after)
+  /// CHECK:       <<Acc:j\d+>>         ParameterValue
+  /// CHECK:       <<Left:j\d+>>        ParameterValue
+  /// CHECK:       <<Right:j\d+>>       ParameterValue
+  /// CHECK:       <<MulSub:j\d+>>      Arm64MultiplyAccumulate [<<Acc>>,<<Left>>,<<Right>>] kind:Sub
+  /// CHECK:                            Return [<<MulSub>>]
+
+  /// CHECK-START-ARM64: long Main.$opt$noinline$mulSub(long, long, long) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:                        Mul
+  /// CHECK-NOT:                        Sub
+
+  /// CHECK-START-ARM64: long Main.$opt$noinline$mulSub(long, long, long) disassembly (after)
+  /// CHECK:                            msub x{{\d+}}, x{{\d+}}, x{{\d+}}, x{{\d+}}
+
+  public static long $opt$noinline$mulSub(long acc, long left, long right) {
+    if (doThrow) throw new Error();
+    return acc - left * right;
+  }
+
+  /**
+   * Test that we do not create a multiply-accumulate instruction when there
+   * are other uses of the multiplication that cannot merge it.
+   */
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$multipleUses1(int, int, int) instruction_simplifier_arm64 (before)
+  /// CHECK:       <<Acc:i\d+>>         ParameterValue
+  /// CHECK:       <<Left:i\d+>>        ParameterValue
+  /// CHECK:       <<Right:i\d+>>       ParameterValue
+  /// CHECK:       <<Mul:i\d+>>         Mul [<<Left>>,<<Right>>]
+  /// CHECK:       <<Add:i\d+>>         Add [<<Acc>>,<<Mul>>]
+  /// CHECK:       <<Or:i\d+>>          Or [<<Mul>>,<<Add>>]
+  /// CHECK:                            Return [<<Or>>]
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$multipleUses1(int, int, int) instruction_simplifier_arm64 (after)
+  /// CHECK:       <<Acc:i\d+>>         ParameterValue
+  /// CHECK:       <<Left:i\d+>>        ParameterValue
+  /// CHECK:       <<Right:i\d+>>       ParameterValue
+  /// CHECK:       <<Mul:i\d+>>         Mul [<<Left>>,<<Right>>]
+  /// CHECK:       <<Add:i\d+>>         Add [<<Acc>>,<<Mul>>]
+  /// CHECK:       <<Or:i\d+>>          Or [<<Mul>>,<<Add>>]
+  /// CHECK:                            Return [<<Or>>]
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$multipleUses1(int, int, int) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:                        Arm64MultiplyAccumulate
+
+  public static int $opt$noinline$multipleUses1(int acc, int left, int right) {
+    if (doThrow) throw new Error();
+    int temp = left * right;
+    return temp | (acc + temp);
+  }
+
+  /**
+   * Test that we do not create a multiply-accumulate instruction even when all
+   * uses of the multiplication can merge it.
+   */
+
+  /// CHECK-START-ARM64: long Main.$opt$noinline$multipleUses2(long, long, long) instruction_simplifier_arm64 (before)
+  /// CHECK:       <<Acc:j\d+>>         ParameterValue
+  /// CHECK:       <<Left:j\d+>>        ParameterValue
+  /// CHECK:       <<Right:j\d+>>       ParameterValue
+  /// CHECK:       <<Mul:j\d+>>         Mul [<<Left>>,<<Right>>]
+  /// CHECK:       <<Add:j\d+>>         Add [<<Acc>>,<<Mul>>]
+  /// CHECK:       <<Sub:j\d+>>         Sub [<<Acc>>,<<Mul>>]
+  /// CHECK:       <<Res:j\d+>>         Add [<<Add>>,<<Sub>>]
+  /// CHECK:                            Return [<<Res>>]
+
+  /// CHECK-START-ARM64: long Main.$opt$noinline$multipleUses2(long, long, long) instruction_simplifier_arm64 (after)
+  /// CHECK:       <<Acc:j\d+>>         ParameterValue
+  /// CHECK:       <<Left:j\d+>>        ParameterValue
+  /// CHECK:       <<Right:j\d+>>       ParameterValue
+  /// CHECK:       <<Mul:j\d+>>         Mul [<<Left>>,<<Right>>]
+  /// CHECK:       <<Add:j\d+>>         Add [<<Acc>>,<<Mul>>]
+  /// CHECK:       <<Sub:j\d+>>         Sub [<<Acc>>,<<Mul>>]
+  /// CHECK:       <<Res:j\d+>>         Add [<<Add>>,<<Sub>>]
+  /// CHECK:                            Return [<<Res>>]
+
+  /// CHECK-START-ARM64: long Main.$opt$noinline$multipleUses2(long, long, long) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:                        Arm64MultiplyAccumulate
+
+
+  public static long $opt$noinline$multipleUses2(long acc, long left, long right) {
+    if (doThrow) throw new Error();
+    long temp = left * right;
+    return (acc + temp) + (acc - temp);
+  }
+
+
+  /**
+   * Test the interpretation of `a * (b + 1)` as `a + (a * b)`.
+   */
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$mulPlusOne(int, int) instruction_simplifier_arm64 (before)
+  /// CHECK:       <<Acc:i\d+>>         ParameterValue
+  /// CHECK:       <<Var:i\d+>>         ParameterValue
+  /// CHECK:       <<Const1:i\d+>>      IntConstant 1
+  /// CHECK:       <<Add:i\d+>>         Add [<<Var>>,<<Const1>>]
+  /// CHECK:       <<Mul:i\d+>>         Mul [<<Acc>>,<<Add>>]
+  /// CHECK:                            Return [<<Mul>>]
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$mulPlusOne(int, int) instruction_simplifier_arm64 (after)
+  /// CHECK:       <<Acc:i\d+>>         ParameterValue
+  /// CHECK:       <<Var:i\d+>>         ParameterValue
+  /// CHECK:       <<MulAdd:i\d+>>      Arm64MultiplyAccumulate [<<Acc>>,<<Acc>>,<<Var>>] kind:Add
+  /// CHECK:                            Return [<<MulAdd>>]
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$mulPlusOne(int, int) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:                        Mul
+  /// CHECK-NOT:                        Add
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$mulPlusOne(int, int) disassembly (after)
+  /// CHECK:                            madd w{{\d+}}, w{{\d+}}, w{{\d+}}, w{{\d+}}
+
+  public static int $opt$noinline$mulPlusOne(int acc, int var) {
+    if (doThrow) throw new Error();
+    return acc * (var + 1);
+  }
+
+
+  /**
+   * Test the interpretation of `a * (1 - b)` as `a - (a * b)`.
+   */
+
+  /// CHECK-START-ARM64: long Main.$opt$noinline$mulMinusOne(long, long) instruction_simplifier_arm64 (before)
+  /// CHECK:       <<Acc:j\d+>>         ParameterValue
+  /// CHECK:       <<Var:j\d+>>         ParameterValue
+  /// CHECK:       <<Const1:j\d+>>      LongConstant 1
+  /// CHECK:       <<Sub:j\d+>>         Sub [<<Const1>>,<<Var>>]
+  /// CHECK:       <<Mul:j\d+>>         Mul [<<Acc>>,<<Sub>>]
+  /// CHECK:                            Return [<<Mul>>]
+
+  /// CHECK-START-ARM64: long Main.$opt$noinline$mulMinusOne(long, long) instruction_simplifier_arm64 (after)
+  /// CHECK:       <<Acc:j\d+>>         ParameterValue
+  /// CHECK:       <<Var:j\d+>>         ParameterValue
+  /// CHECK:       <<MulSub:j\d+>>      Arm64MultiplyAccumulate [<<Acc>>,<<Acc>>,<<Var>>] kind:Sub
+  /// CHECK:                            Return [<<MulSub>>]
+
+  /// CHECK-START-ARM64: long Main.$opt$noinline$mulMinusOne(long, long) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:                        Mul
+  /// CHECK-NOT:                        Sub
+
+  /// CHECK-START-ARM64: long Main.$opt$noinline$mulMinusOne(long, long) disassembly (after)
+  /// CHECK:                            msub x{{\d+}}, x{{\d+}}, x{{\d+}}, x{{\d+}}
+
+  public static long $opt$noinline$mulMinusOne(long acc, long var) {
+    if (doThrow) throw new Error();
+    return acc * (1 - var);
+  }
+
+
+  public static void main(String[] args) {
+    assertIntEquals(7, $opt$noinline$mulAdd(1, 2, 3));
+    assertLongEquals(-26, $opt$noinline$mulSub(4, 5, 6));
+    assertIntEquals(79, $opt$noinline$multipleUses1(7, 8, 9));
+    assertLongEquals(20, $opt$noinline$multipleUses2(10, 11, 12));
+    assertIntEquals(195, $opt$noinline$mulPlusOne(13, 14));
+    assertLongEquals(-225, $opt$noinline$mulMinusOne(15, 16));
+  }
+}
diff --git a/test/550-checker-regression-wide-store/expected.txt b/test/550-checker-regression-wide-store/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/550-checker-regression-wide-store/expected.txt
diff --git a/test/550-checker-regression-wide-store/info.txt b/test/550-checker-regression-wide-store/info.txt
new file mode 100644
index 0000000..6cf04bc
--- /dev/null
+++ b/test/550-checker-regression-wide-store/info.txt
@@ -0,0 +1,3 @@
+Test an SsaBuilder regression where storing into the high vreg of a pair
+would not invalidate the low vreg. The resulting environment would generate
+an incorrect stack map, causing deopt and try/catch to use a wrong location.
\ No newline at end of file
diff --git a/test/550-checker-regression-wide-store/smali/TestCase.smali b/test/550-checker-regression-wide-store/smali/TestCase.smali
new file mode 100644
index 0000000..7974d56
--- /dev/null
+++ b/test/550-checker-regression-wide-store/smali/TestCase.smali
@@ -0,0 +1,82 @@
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LTestCase;
+.super Ljava/lang/Object;
+
+.method public static $noinline$throw()V
+  .registers 1
+  new-instance v0, Ljava/lang/Exception;
+  invoke-direct {v0}, Ljava/lang/Exception;-><init>()V
+  throw v0
+.end method
+
+# Test storing into the high vreg of a wide pair. This scenario has runtime
+# behaviour implications so we run it from Main.main.
+
+## CHECK-START: int TestCase.invalidateLow(long) ssa_builder (after)
+## CHECK-DAG: <<Cst0:i\d+>> IntConstant 0
+## CHECK-DAG: <<Arg:j\d+>>  ParameterValue
+## CHECK-DAG: <<Cast:i\d+>> TypeConversion [<<Arg>>]
+## CHECK-DAG: InvokeStaticOrDirect method_name:java.lang.System.nanoTime env:[[_,<<Cst0>>,<<Arg>>,_]]
+## CHECK-DAG: InvokeStaticOrDirect method_name:TestCase.$noinline$throw  env:[[_,<<Cast>>,<<Arg>>,_]]
+
+.method public static invalidateLow(J)I
+  .registers 4
+
+  const/4 v1, 0x0
+
+  :try_start
+  invoke-static {}, Ljava/lang/System;->nanoTime()J
+  move-wide v0, p0
+  long-to-int v1, v0
+  invoke-static {}, LTestCase;->$noinline$throw()V
+  :try_end
+  .catchall {:try_start .. :try_end} :catchall
+
+  :catchall
+  return v1
+
+.end method
+
+# Test that storing a wide invalidates the value in the high vreg. This
+# cannot be detected from runtime so we only test the environment with Checker.
+
+## CHECK-START: void TestCase.invalidateHigh1(long) ssa_builder (after)
+## CHECK-DAG: <<Arg:j\d+>>  ParameterValue
+## CHECK-DAG: InvokeStaticOrDirect method_name:java.lang.System.nanoTime env:[[<<Arg>>,_,<<Arg>>,_]]
+
+.method public static invalidateHigh1(J)V
+  .registers 4
+
+  const/4 v1, 0x0
+  move-wide v0, p0
+  invoke-static {}, Ljava/lang/System;->nanoTime()J
+  return-void
+
+.end method
+
+## CHECK-START: void TestCase.invalidateHigh2(long) ssa_builder (after)
+## CHECK-DAG: <<Arg:j\d+>>  ParameterValue
+## CHECK-DAG: InvokeStaticOrDirect method_name:java.lang.System.nanoTime env:[[<<Arg>>,_,_,<<Arg>>,_]]
+
+.method public static invalidateHigh2(J)V
+  .registers 5
+
+  move-wide v1, p0
+  move-wide v0, p0
+  invoke-static {}, Ljava/lang/System;->nanoTime()J
+  return-void
+
+.end method
diff --git a/test/550-checker-regression-wide-store/src/Main.java b/test/550-checker-regression-wide-store/src/Main.java
new file mode 100644
index 0000000..9b502df
--- /dev/null
+++ b/test/550-checker-regression-wide-store/src/Main.java
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+
+public class Main {
+
+  // Workaround for b/18051191.
+  class InnerClass {}
+
+  private static int runTestCase(String name, long arg) throws Exception {
+    Class<?> c = Class.forName("TestCase");
+    Method m = c.getMethod(name, long.class);
+    int result = (Integer) m.invoke(null, arg);
+    return result;
+  }
+
+  private static void assertEquals(int expected, int actual) {
+    if (expected != actual) {
+      throw new Error("Wrong result: " + expected + " != " + actual);
+    }
+  }
+
+  public static void main(String[] args) throws Exception {
+    assertEquals(42, runTestCase("invalidateLow", 42L));
+  }
+}
diff --git a/test/550-new-instance-clinit/expected.txt b/test/550-new-instance-clinit/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/550-new-instance-clinit/expected.txt
diff --git a/test/550-new-instance-clinit/info.txt b/test/550-new-instance-clinit/info.txt
new file mode 100644
index 0000000..c5fa3c7
--- /dev/null
+++ b/test/550-new-instance-clinit/info.txt
@@ -0,0 +1,3 @@
+Regression test for optimizing which used to treat
+HNewInstance as not having side effects even though it
+could invoke a clinit method.
diff --git a/test/550-new-instance-clinit/src/Main.java b/test/550-new-instance-clinit/src/Main.java
new file mode 100644
index 0000000..45e259e
--- /dev/null
+++ b/test/550-new-instance-clinit/src/Main.java
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  public static void main(String[] args) {
+    int foo = Main.a;
+    new Bar();
+    foo = Main.a;
+    if (foo != 43) {
+      throw new Error("Expected 43, got " + foo);
+    }
+  }
+  static int a = 42;
+}
+
+class Bar {
+  static {
+    Main.a++;
+  }
+}
diff --git a/tools/run-jdwp-tests.sh b/tools/run-jdwp-tests.sh
index 0747712..47fc50f 100755
--- a/tools/run-jdwp-tests.sh
+++ b/tools/run-jdwp-tests.sh
@@ -28,6 +28,18 @@
   exit 1
 fi
 
+if [ "x$ART_USE_READ_BARRIER" = xtrue ]; then
+  # For the moment, skip JDWP tests when read barriers are enabled, as
+  # they sometimes exhibit a deadlock issue with the concurrent
+  # copying collector in the read barrier configuration, between the
+  # HeapTaskDeamon and the JDWP thread (b/25800335).
+  #
+  # TODO: Re-enable the JDWP tests when this deadlock issue is fixed.
+  echo "JDWP tests are temporarily disabled in the read barrier configuration because of"
+  echo "a deadlock issue (b/25800335)."
+  exit 0
+fi
+
 art="/data/local/tmp/system/bin/art"
 art_debugee="sh /data/local/tmp/system/bin/art"
 args=$@