Merge "Clean up profiler options"
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index 1afbdfc..c09116f 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -277,6 +277,7 @@
   compiler/optimizing/suspend_check_test.cc \
   compiler/utils/dedupe_set_test.cc \
   compiler/utils/intrusive_forward_list_test.cc \
+  compiler/utils/string_reference_test.cc \
   compiler/utils/swap_space_test.cc \
   compiler/utils/test_dex_file_builder_test.cc \
   compiler/utils/transform_array_ref_test.cc \
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index a3e7efa..e52dda3 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -2521,28 +2521,11 @@
                                                               true);
     }
     // Create the conflict tables.
-    FillIMTAndConflictTables(klass);
-    return true;
-  }
-
- private:
-  void FillIMTAndConflictTables(mirror::Class* klass) SHARED_REQUIRES(Locks::mutator_lock_) {
-    if (!klass->ShouldHaveImt()) {
-      return;
-    }
-    if (visited_classes_.find(klass) != visited_classes_.end()) {
-      return;
-    }
-    if (klass->HasSuperClass()) {
-      FillIMTAndConflictTables(klass->GetSuperClass());
-    }
-    if (!klass->IsTemp()) {
+    if (!klass->IsTemp() && klass->ShouldHaveEmbeddedImtAndVTable()) {
       Runtime::Current()->GetClassLinker()->FillIMTAndConflictTables(klass);
     }
-    visited_classes_.insert(klass);
+    return true;
   }
-
-  std::set<mirror::Class*> visited_classes_;
 };
 
 void CompilerDriver::InitializeClasses(jobject class_loader,
diff --git a/compiler/elf_builder.h b/compiler/elf_builder.h
index 26ab281..7f2e193 100644
--- a/compiler/elf_builder.h
+++ b/compiler/elf_builder.h
@@ -780,9 +780,9 @@
                               EF_MIPS_PIC       |
                               EF_MIPS_CPIC      |
                               EF_MIPS_ABI_O32   |
-                              features->AsMipsInstructionSetFeatures()->IsR6()
-                                  ? EF_MIPS_ARCH_32R6
-                                  : EF_MIPS_ARCH_32R2);
+                              (features->AsMipsInstructionSetFeatures()->IsR6()
+                                   ? EF_MIPS_ARCH_32R6
+                                   : EF_MIPS_ARCH_32R2));
         break;
       }
       case kMips64: {
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index 063eb11..da10568 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -1232,10 +1232,9 @@
       }
       // Assign offsets for all runtime methods in the IMT since these may hold conflict tables
       // live.
-      if (as_klass->ShouldHaveImt()) {
-        ImTable* imt = as_klass->GetImt(target_ptr_size_);
-        for (size_t i = 0; i < ImTable::kSize; ++i) {
-          ArtMethod* imt_method = imt->Get(i, target_ptr_size_);
+      if (as_klass->ShouldHaveEmbeddedImtAndVTable()) {
+        for (size_t i = 0; i < mirror::Class::kImtSize; ++i) {
+          ArtMethod* imt_method = as_klass->GetEmbeddedImTableEntry(i, target_ptr_size_);
           DCHECK(imt_method != nullptr);
           if (imt_method->IsRuntimeMethod() &&
               !IsInBootImage(imt_method) &&
@@ -1244,11 +1243,6 @@
           }
         }
       }
-
-      if (as_klass->ShouldHaveImt()) {
-        ImTable* imt = as_klass->GetImt(target_ptr_size_);
-        TryAssignImTableOffset(imt, oat_index);
-      }
     } else if (h_obj->IsObjectArray()) {
       // Walk elements of an object array.
       int32_t length = h_obj->AsObjectArray<mirror::Object>()->GetLength();
@@ -1275,23 +1269,6 @@
   return native_object_relocations_.find(ptr) != native_object_relocations_.end();
 }
 
-void ImageWriter::TryAssignImTableOffset(ImTable* imt, size_t oat_index) {
-  // No offset, or already assigned.
-  if (imt == nullptr || IsInBootImage(imt) || NativeRelocationAssigned(imt)) {
-    return;
-  }
-  // If the method is a conflict method we also want to assign the conflict table offset.
-  ImageInfo& image_info = GetImageInfo(oat_index);
-  const size_t size = ImTable::SizeInBytes(target_ptr_size_);
-  native_object_relocations_.emplace(
-      imt,
-      NativeObjectRelocation {
-          oat_index,
-          image_info.bin_slot_sizes_[kBinImTable],
-          kNativeObjectRelocationTypeIMTable});
-  image_info.bin_slot_sizes_[kBinImTable] += size;
-}
-
 void ImageWriter::TryAssignConflictTableOffset(ImtConflictTable* table, size_t oat_index) {
   // No offset, or already assigned.
   if (table == nullptr || NativeRelocationAssigned(table)) {
@@ -1414,7 +1391,6 @@
           bin_offset = RoundUp(bin_offset, method_alignment);
           break;
         }
-        case kBinImTable:
         case kBinIMTConflictTable: {
           bin_offset = RoundUp(bin_offset, target_ptr_size_);
           break;
@@ -1485,10 +1461,6 @@
       bin_slot_offsets_[kBinArtMethodClean],
       bin_slot_sizes_[kBinArtMethodClean] + bin_slot_sizes_[kBinArtMethodDirty]);
 
-  // IMT section.
-  ImageSection* imt_section = &out_sections[ImageHeader::kSectionImTables];
-  *imt_section = ImageSection(bin_slot_offsets_[kBinImTable], bin_slot_sizes_[kBinImTable]);
-
   // Conflict tables section.
   ImageSection* imt_conflict_tables_section = &out_sections[ImageHeader::kSectionIMTConflictTables];
   *imt_conflict_tables_section = ImageSection(bin_slot_offsets_[kBinIMTConflictTable],
@@ -1613,13 +1585,6 @@
   ImageWriter* const image_writer_;
 };
 
-void ImageWriter::CopyAndFixupImTable(ImTable* orig, ImTable* copy) {
-  for (size_t i = 0; i < ImTable::kSize; ++i) {
-    ArtMethod* method = orig->Get(i, target_ptr_size_);
-    copy->Set(i, NativeLocationInImage(method), target_ptr_size_);
-  }
-}
-
 void ImageWriter::CopyAndFixupImtConflictTable(ImtConflictTable* orig, ImtConflictTable* copy) {
   const size_t count = orig->NumEntries(target_ptr_size_);
   for (size_t i = 0; i < count; ++i) {
@@ -1677,12 +1642,6 @@
       case kNativeObjectRelocationTypeDexCacheArray:
         // Nothing to copy here, everything is done in FixupDexCache().
         break;
-      case kNativeObjectRelocationTypeIMTable: {
-        ImTable* orig_imt = reinterpret_cast<ImTable*>(pair.first);
-        ImTable* dest_imt = reinterpret_cast<ImTable*>(dest);
-        CopyAndFixupImTable(orig_imt, dest_imt);
-        break;
-      }
       case kNativeObjectRelocationTypeIMTConflictTable: {
         auto* orig_table = reinterpret_cast<ImtConflictTable*>(pair.first);
         CopyAndFixupImtConflictTable(
@@ -1891,25 +1850,13 @@
 }
 
 template <typename T>
-std::string PrettyPrint(T* ptr) SHARED_REQUIRES(Locks::mutator_lock_) {
-  std::ostringstream oss;
-  oss << ptr;
-  return oss.str();
-}
-
-template <>
-std::string PrettyPrint(ArtMethod* method) SHARED_REQUIRES(Locks::mutator_lock_) {
-  return PrettyMethod(method);
-}
-
-template <typename T>
 T* ImageWriter::NativeLocationInImage(T* obj) {
   if (obj == nullptr || IsInBootImage(obj)) {
     return obj;
   } else {
     auto it = native_object_relocations_.find(obj);
-    CHECK(it != native_object_relocations_.end()) << obj << " " << PrettyPrint(obj)
-        << " spaces " << Runtime::Current()->GetHeap()->DumpSpaces();
+    CHECK(it != native_object_relocations_.end()) << obj << " spaces "
+        << Runtime::Current()->GetHeap()->DumpSpaces();
     const NativeObjectRelocation& relocation = it->second;
     ImageInfo& image_info = GetImageInfo(relocation.oat_index);
     return reinterpret_cast<T*>(image_info.image_begin_ + relocation.offset);
@@ -2263,8 +2210,6 @@
       return kBinDexCacheArray;
     case kNativeObjectRelocationTypeRuntimeMethod:
       return kBinRuntimeMethod;
-    case kNativeObjectRelocationTypeIMTable:
-      return kBinImTable;
     case kNativeObjectRelocationTypeIMTConflictTable:
       return kBinIMTConflictTable;
   }
diff --git a/compiler/image_writer.h b/compiler/image_writer.h
index 1efdc22..51976c5 100644
--- a/compiler/image_writer.h
+++ b/compiler/image_writer.h
@@ -169,8 +169,6 @@
     // ArtMethods may be dirty if the class has native methods or a declaring class that isn't
     // initialized.
     kBinArtMethodDirty,
-    // IMT (clean)
-    kBinImTable,
     // Conflict tables (clean).
     kBinIMTConflictTable,
     // Runtime methods (always clean, do not have a length prefix array).
@@ -193,7 +191,6 @@
     kNativeObjectRelocationTypeArtMethodDirty,
     kNativeObjectRelocationTypeArtMethodArrayDirty,
     kNativeObjectRelocationTypeRuntimeMethod,
-    kNativeObjectRelocationTypeIMTable,
     kNativeObjectRelocationTypeIMTConflictTable,
     kNativeObjectRelocationTypeDexCacheArray,
   };
@@ -404,7 +401,6 @@
   void CopyAndFixupObject(mirror::Object* obj) SHARED_REQUIRES(Locks::mutator_lock_);
   void CopyAndFixupMethod(ArtMethod* orig, ArtMethod* copy, const ImageInfo& image_info)
       SHARED_REQUIRES(Locks::mutator_lock_);
-  void CopyAndFixupImTable(ImTable* orig, ImTable* copy) SHARED_REQUIRES(Locks::mutator_lock_);
   void CopyAndFixupImtConflictTable(ImtConflictTable* orig, ImtConflictTable* copy)
       SHARED_REQUIRES(Locks::mutator_lock_);
   void FixupClass(mirror::Class* orig, mirror::Class* copy)
@@ -437,8 +433,6 @@
                           size_t oat_index)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  void TryAssignImTableOffset(ImTable* imt, size_t oat_index) SHARED_REQUIRES(Locks::mutator_lock_);
-
   // Assign the offset for an IMT conflict table. Does nothing if the table already has a native
   // relocation.
   void TryAssignConflictTableOffset(ImtConflictTable* table, size_t oat_index)
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index b9466ba..5316d59 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -430,7 +430,9 @@
            instruction_->IsLoadClass() ||
            instruction_->IsLoadString() ||
            instruction_->IsInstanceOf() ||
-           instruction_->IsCheckCast())
+           instruction_->IsCheckCast() ||
+           ((instruction_->IsInvokeStaticOrDirect() || instruction_->IsInvokeVirtual()) &&
+            instruction_->GetLocations()->Intrinsified()))
         << "Unexpected instruction in read barrier marking slow path: "
         << instruction_->DebugName();
 
@@ -493,8 +495,12 @@
     Register reg_out = out_.AsRegister<Register>();
     DCHECK(locations->CanCall());
     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
-    DCHECK(!instruction_->IsInvoke() ||
-           (instruction_->IsInvokeStaticOrDirect() &&
+    DCHECK(instruction_->IsInstanceFieldGet() ||
+           instruction_->IsStaticFieldGet() ||
+           instruction_->IsArrayGet() ||
+           instruction_->IsInstanceOf() ||
+           instruction_->IsCheckCast() ||
+           ((instruction_->IsInvokeStaticOrDirect() || instruction_->IsInvokeVirtual()) &&
             instruction_->GetLocations()->Intrinsified()))
         << "Unexpected instruction in read barrier for heap reference slow path: "
         << instruction_->DebugName();
@@ -507,7 +513,7 @@
     // introduce a copy of it, `index`.
     Location index = index_;
     if (index_.IsValid()) {
-      // Handle `index_` for HArrayGet and intrinsic UnsafeGetObject.
+      // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
       if (instruction_->IsArrayGet()) {
         // Compute the actual memory offset and store it in `index`.
         Register index_reg = index_.AsRegister<Register>();
@@ -555,7 +561,11 @@
             "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
         __ AddConstant(index_reg, index_reg, offset_);
       } else {
-        DCHECK(instruction_->IsInvoke());
+        // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile
+        // intrinsics, `index_` is not shifted by a scale factor of 2
+        // (as in the case of ArrayGet), as it is actually an offset
+        // to an object field within an object.
+        DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
         DCHECK(instruction_->GetLocations()->Intrinsified());
         DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
                (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
@@ -1879,6 +1889,8 @@
   LocationSummary* locations = invoke->GetLocations();
   Register temp = locations->GetTemp(0).AsRegister<Register>();
   Register hidden_reg = locations->GetTemp(1).AsRegister<Register>();
+  uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset(
+      invoke->GetImtIndex() % mirror::Class::kImtSize, kArmPointerSize).Uint32Value();
   Location receiver = locations->InAt(0);
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
 
@@ -1904,14 +1916,10 @@
   // intact/accessible until the end of the marking phase (the
   // concurrent copying collector may not in the future).
   __ MaybeUnpoisonHeapReference(temp);
-  __ LoadFromOffset(kLoadWord, temp, temp,
-        mirror::Class::ImtPtrOffset(kArmPointerSize).Uint32Value());
-  uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
-      invoke->GetImtIndex(), kArmPointerSize));
   // temp = temp->GetImtEntryAt(method_offset);
-  __ LoadFromOffset(kLoadWord, temp, temp, method_offset);
   uint32_t entry_point =
       ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmWordSize).Int32Value();
+  __ LoadFromOffset(kLoadWord, temp, temp, method_offset);
   // LR = temp->GetEntryPoint();
   __ LoadFromOffset(kLoadWord, LR, temp, entry_point);
   // LR();
@@ -6203,8 +6211,9 @@
 
   // /* HeapReference<Object> */ ref = *(obj + offset)
   Location no_index = Location::NoLocation();
+  ScaleFactor no_scale_factor = TIMES_1;
   GenerateReferenceLoadWithBakerReadBarrier(
-      instruction, ref, obj, offset, no_index, temp, needs_null_check);
+      instruction, ref, obj, offset, no_index, no_scale_factor, temp, needs_null_check);
 }
 
 void CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
@@ -6217,10 +6226,14 @@
   DCHECK(kEmitCompilerReadBarrier);
   DCHECK(kUseBakerReadBarrier);
 
+  static_assert(
+      sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+      "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
   // /* HeapReference<Object> */ ref =
   //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
+  ScaleFactor scale_factor = TIMES_4;
   GenerateReferenceLoadWithBakerReadBarrier(
-      instruction, ref, obj, data_offset, index, temp, needs_null_check);
+      instruction, ref, obj, data_offset, index, scale_factor, temp, needs_null_check);
 }
 
 void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
@@ -6228,6 +6241,7 @@
                                                                  Register obj,
                                                                  uint32_t offset,
                                                                  Location index,
+                                                                 ScaleFactor scale_factor,
                                                                  Location temp,
                                                                  bool needs_null_check) {
   DCHECK(kEmitCompilerReadBarrier);
@@ -6282,17 +6296,22 @@
 
   // The actual reference load.
   if (index.IsValid()) {
-    static_assert(
-        sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
-        "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
-    // /* HeapReference<Object> */ ref =
-    //     *(obj + offset + index * sizeof(HeapReference<Object>))
+    // Load types involving an "index": ArrayGet and
+    // UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
+    // /* HeapReference<Object> */ ref = *(obj + offset + (index << scale_factor))
     if (index.IsConstant()) {
       size_t computed_offset =
-          (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset;
+          (index.GetConstant()->AsIntConstant()->GetValue() << scale_factor) + offset;
       __ LoadFromOffset(kLoadWord, ref_reg, obj, computed_offset);
     } else {
-      __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
+      // Handle the special case of the
+      // UnsafeGetObject/UnsafeGetObjectVolatile intrinsics, which use
+      // a register pair as index ("long offset"), of which only the low
+      // part contains data.
+      Register index_reg = index.IsRegisterPair()
+          ? index.AsRegisterPairLow<Register>()
+          : index.AsRegister<Register>();
+      __ add(IP, obj, ShifterOperand(index_reg, LSL, scale_factor));
       __ LoadFromOffset(kLoadWord, ref_reg, IP, offset);
     }
   } else {
@@ -6940,11 +6959,8 @@
     method_offset = mirror::Class::EmbeddedVTableEntryOffset(
         instruction->GetIndex(), kArmPointerSize).SizeValue();
   } else {
-    __ LoadFromOffset(kLoadWord, locations->Out().AsRegister<Register>(),
-        locations->InAt(0).AsRegister<Register>(),
-        mirror::Class::ImtPtrOffset(kArmPointerSize).Uint32Value());
-    method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
-        instruction->GetIndex(), kArmPointerSize));
+    method_offset = mirror::Class::EmbeddedImTableEntryOffset(
+        instruction->GetIndex() % mirror::Class::kImtSize, kArmPointerSize).Uint32Value();
   }
   __ LoadFromOffset(kLoadWord,
                     locations->Out().AsRegister<Register>(),
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 4fce5af..477c4f1 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -472,6 +472,16 @@
                                              Location index,
                                              Location temp,
                                              bool needs_null_check);
+  // Factored implementation used by GenerateFieldLoadWithBakerReadBarrier
+  // and GenerateArrayLoadWithBakerReadBarrier.
+  void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                 Location ref,
+                                                 Register obj,
+                                                 uint32_t offset,
+                                                 Location index,
+                                                 ScaleFactor scale_factor,
+                                                 Location temp,
+                                                 bool needs_null_check);
 
   // Generate a read barrier for a heap reference within `instruction`
   // using a slow path.
@@ -527,16 +537,6 @@
   void GenerateExplicitNullCheck(HNullCheck* instruction);
 
  private:
-  // Factored implementation of GenerateFieldLoadWithBakerReadBarrier
-  // and GenerateArrayLoadWithBakerReadBarrier.
-  void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
-                                                 Location ref,
-                                                 Register obj,
-                                                 uint32_t offset,
-                                                 Location index,
-                                                 Location temp,
-                                                 bool needs_null_check);
-
   Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp);
 
   using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, Literal*>;
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 4692a4a..fc2c2c3 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -598,7 +598,9 @@
            instruction_->IsLoadClass() ||
            instruction_->IsLoadString() ||
            instruction_->IsInstanceOf() ||
-           instruction_->IsCheckCast())
+           instruction_->IsCheckCast() ||
+           ((instruction_->IsInvokeStaticOrDirect() || instruction_->IsInvokeVirtual()) &&
+            instruction_->GetLocations()->Intrinsified()))
         << "Unexpected instruction in read barrier marking slow path: "
         << instruction_->DebugName();
 
@@ -661,8 +663,12 @@
     Primitive::Type type = Primitive::kPrimNot;
     DCHECK(locations->CanCall());
     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
-    DCHECK(!instruction_->IsInvoke() ||
-           (instruction_->IsInvokeStaticOrDirect() &&
+    DCHECK(instruction_->IsInstanceFieldGet() ||
+           instruction_->IsStaticFieldGet() ||
+           instruction_->IsArrayGet() ||
+           instruction_->IsInstanceOf() ||
+           instruction_->IsCheckCast() ||
+           ((instruction_->IsInvokeStaticOrDirect() || instruction_->IsInvokeVirtual()) &&
             instruction_->GetLocations()->Intrinsified()))
         << "Unexpected instruction in read barrier for heap reference slow path: "
         << instruction_->DebugName();
@@ -680,7 +686,7 @@
     // introduce a copy of it, `index`.
     Location index = index_;
     if (index_.IsValid()) {
-      // Handle `index_` for HArrayGet and intrinsic UnsafeGetObject.
+      // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
       if (instruction_->IsArrayGet()) {
         // Compute the actual memory offset and store it in `index`.
         Register index_reg = RegisterFrom(index_, Primitive::kPrimInt);
@@ -728,7 +734,11 @@
             "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
         __ Add(index_reg, index_reg, Operand(offset_));
       } else {
-        DCHECK(instruction_->IsInvoke());
+        // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile
+        // intrinsics, `index_` is not shifted by a scale factor of 2
+        // (as in the case of ArrayGet), as it is actually an offset
+        // to an object field within an object.
+        DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
         DCHECK(instruction_->GetLocations()->Intrinsified());
         DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
                (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
@@ -3496,6 +3506,8 @@
   // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
   LocationSummary* locations = invoke->GetLocations();
   Register temp = XRegisterFrom(locations->GetTemp(0));
+  uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset(
+      invoke->GetImtIndex() % mirror::Class::kImtSize, kArm64PointerSize).Uint32Value();
   Location receiver = locations->InAt(0);
   Offset class_offset = mirror::Object::ClassOffset();
   Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64WordSize);
@@ -3525,10 +3537,6 @@
   // intact/accessible until the end of the marking phase (the
   // concurrent copying collector may not in the future).
   GetAssembler()->MaybeUnpoisonHeapReference(temp.W());
-  __ Ldr(temp,
-      MemOperand(temp, mirror::Class::ImtPtrOffset(kArm64PointerSize).Uint32Value()));
-  uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
-      invoke->GetImtIndex(), kArm64PointerSize));
   // temp = temp->GetImtEntryAt(method_offset);
   __ Ldr(temp, MemOperand(temp, method_offset));
   // lr = temp->GetEntryPoint();
@@ -5102,8 +5110,16 @@
 
   // /* HeapReference<Object> */ ref = *(obj + offset)
   Location no_index = Location::NoLocation();
-  GenerateReferenceLoadWithBakerReadBarrier(
-      instruction, ref, obj, offset, no_index, temp, needs_null_check, use_load_acquire);
+  size_t no_scale_factor = 0U;
+  GenerateReferenceLoadWithBakerReadBarrier(instruction,
+                                            ref,
+                                            obj,
+                                            offset,
+                                            no_index,
+                                            no_scale_factor,
+                                            temp,
+                                            needs_null_check,
+                                            use_load_acquire);
 }
 
 void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
@@ -5120,10 +5136,21 @@
   // never use Load-Acquire instructions on ARM64.
   const bool use_load_acquire = false;
 
+  static_assert(
+      sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+      "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
   // /* HeapReference<Object> */ ref =
   //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
-  GenerateReferenceLoadWithBakerReadBarrier(
-      instruction, ref, obj, data_offset, index, temp, needs_null_check, use_load_acquire);
+  size_t scale_factor = Primitive::ComponentSizeShift(Primitive::kPrimNot);
+  GenerateReferenceLoadWithBakerReadBarrier(instruction,
+                                            ref,
+                                            obj,
+                                            data_offset,
+                                            index,
+                                            scale_factor,
+                                            temp,
+                                            needs_null_check,
+                                            use_load_acquire);
 }
 
 void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
@@ -5131,15 +5158,16 @@
                                                                    vixl::Register obj,
                                                                    uint32_t offset,
                                                                    Location index,
+                                                                   size_t scale_factor,
                                                                    Register temp,
                                                                    bool needs_null_check,
                                                                    bool use_load_acquire) {
   DCHECK(kEmitCompilerReadBarrier);
   DCHECK(kUseBakerReadBarrier);
-  // If `index` is a valid location, then we are emitting an array
-  // load, so we shouldn't be using a Load Acquire instruction.
-  // In other words: `index.IsValid()` => `!use_load_acquire`.
-  DCHECK(!index.IsValid() || !use_load_acquire);
+  // If we are emitting an array load, we should not be using a
+  // Load Acquire instruction.  In other words:
+  // `instruction->IsArrayGet()` => `!use_load_acquire`.
+  DCHECK(!instruction->IsArrayGet() || !use_load_acquire);
 
   MacroAssembler* masm = GetVIXLAssembler();
   UseScratchRegisterScope temps(masm);
@@ -5196,20 +5224,33 @@
 
   // The actual reference load.
   if (index.IsValid()) {
-    static_assert(
-        sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
-        "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
-    // /* HeapReference<Object> */ ref =
-    //     *(obj + offset + index * sizeof(HeapReference<Object>))
-    const size_t shift_amount = Primitive::ComponentSizeShift(type);
-    if (index.IsConstant()) {
-      uint32_t computed_offset = offset + (Int64ConstantFrom(index) << shift_amount);
-      Load(type, ref_reg, HeapOperand(obj, computed_offset));
+    // Load types involving an "index".
+    if (use_load_acquire) {
+      // UnsafeGetObjectVolatile intrinsic case.
+      // Register `index` is not an index in an object array, but an
+      // offset to an object reference field within object `obj`.
+      DCHECK(instruction->IsInvoke()) << instruction->DebugName();
+      DCHECK(instruction->GetLocations()->Intrinsified());
+      DCHECK(instruction->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile)
+          << instruction->AsInvoke()->GetIntrinsic();
+      DCHECK_EQ(offset, 0U);
+      DCHECK_EQ(scale_factor, 0U);
+      DCHECK_EQ(needs_null_check, 0U);
+      // /* HeapReference<Object> */ ref = *(obj + index)
+      MemOperand field = HeapOperand(obj, XRegisterFrom(index));
+      LoadAcquire(instruction, ref_reg, field, /* needs_null_check */ false);
     } else {
-      temp2 = temps.AcquireW();
-      __ Add(temp2, obj, offset);
-      Load(type, ref_reg, HeapOperand(temp2, XRegisterFrom(index), LSL, shift_amount));
-      temps.Release(temp2);
+      // ArrayGet and UnsafeGetObject intrinsics cases.
+      // /* HeapReference<Object> */ ref = *(obj + offset + (index << scale_factor))
+      if (index.IsConstant()) {
+        uint32_t computed_offset = offset + (Int64ConstantFrom(index) << scale_factor);
+        Load(type, ref_reg, HeapOperand(obj, computed_offset));
+      } else {
+        temp2 = temps.AcquireW();
+        __ Add(temp2, obj, offset);
+        Load(type, ref_reg, HeapOperand(temp2, XRegisterFrom(index), LSL, scale_factor));
+        temps.Release(temp2);
+      }
     }
   } else {
     // /* HeapReference<Object> */ ref = *(obj + offset)
@@ -5312,10 +5353,8 @@
     method_offset = mirror::Class::EmbeddedVTableEntryOffset(
         instruction->GetIndex(), kArm64PointerSize).SizeValue();
   } else {
-    __ Ldr(XRegisterFrom(locations->Out()), MemOperand(XRegisterFrom(locations->InAt(0)),
-        mirror::Class::ImtPtrOffset(kArm64PointerSize).Uint32Value()));
-    method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
-        instruction->GetIndex(), kArm64PointerSize));
+    method_offset = mirror::Class::EmbeddedImTableEntryOffset(
+        instruction->GetIndex() % mirror::Class::kImtSize, kArm64PointerSize).Uint32Value();
   }
   __ Ldr(XRegisterFrom(locations->Out()),
          MemOperand(XRegisterFrom(locations->InAt(0)), method_offset));
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index e6fd336..d4bf695 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -531,6 +531,17 @@
                                              Location index,
                                              vixl::Register temp,
                                              bool needs_null_check);
+  // Factored implementation used by GenerateFieldLoadWithBakerReadBarrier
+  // and GenerateArrayLoadWithBakerReadBarrier.
+  void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                 Location ref,
+                                                 vixl::Register obj,
+                                                 uint32_t offset,
+                                                 Location index,
+                                                 size_t scale_factor,
+                                                 vixl::Register temp,
+                                                 bool needs_null_check,
+                                                 bool use_load_acquire);
 
   // Generate a read barrier for a heap reference within `instruction`
   // using a slow path.
@@ -586,17 +597,6 @@
   void GenerateExplicitNullCheck(HNullCheck* instruction);
 
  private:
-  // Factored implementation of GenerateFieldLoadWithBakerReadBarrier
-  // and GenerateArrayLoadWithBakerReadBarrier.
-  void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
-                                                 Location ref,
-                                                 vixl::Register obj,
-                                                 uint32_t offset,
-                                                 Location index,
-                                                 vixl::Register temp,
-                                                 bool needs_null_check,
-                                                 bool use_load_acquire);
-
   using Uint64ToLiteralMap = ArenaSafeMap<uint64_t, vixl::Literal<uint64_t>*>;
   using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, vixl::Literal<uint32_t>*>;
   using MethodToLiteralMap = ArenaSafeMap<MethodReference,
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 7381806..4d44c18 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -3698,6 +3698,8 @@
 void InstructionCodeGeneratorMIPS::VisitInvokeInterface(HInvokeInterface* invoke) {
   // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
   Register temp = invoke->GetLocations()->GetTemp(0).AsRegister<Register>();
+  uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset(
+      invoke->GetImtIndex() % mirror::Class::kImtSize, kMipsPointerSize).Uint32Value();
   Location receiver = invoke->GetLocations()->InAt(0);
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMipsWordSize);
@@ -3714,10 +3716,6 @@
     __ LoadFromOffset(kLoadWord, temp, receiver.AsRegister<Register>(), class_offset);
   }
   codegen_->MaybeRecordImplicitNullCheck(invoke);
-  __ LoadFromOffset(kLoadWord, temp, temp,
-      mirror::Class::ImtPtrOffset(kMipsPointerSize).Uint32Value());
-  uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
-      invoke->GetImtIndex(), kMipsPointerSize));
   // temp = temp->GetImtEntryAt(method_offset);
   __ LoadFromOffset(kLoadWord, temp, temp, method_offset);
   // T9 = temp->GetEntryPoint();
@@ -4730,7 +4728,6 @@
   Primitive::Type input_type = conversion->GetInputType();
   bool has_sign_extension = codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2();
   bool isR6 = codegen_->GetInstructionSetFeatures().IsR6();
-  bool fpu_32bit = codegen_->GetInstructionSetFeatures().Is32BitFloatingPoint();
 
   DCHECK_NE(input_type, result_type);
 
@@ -4739,7 +4736,9 @@
     Register dst_low = locations->Out().AsRegisterPairLow<Register>();
     Register src = locations->InAt(0).AsRegister<Register>();
 
-    __ Move(dst_low, src);
+    if (dst_low != src) {
+      __ Move(dst_low, src);
+    }
     __ Sra(dst_high, src, 31);
   } else if (Primitive::IsIntegralType(result_type) && Primitive::IsIntegralType(input_type)) {
     Register dst = locations->Out().AsRegister<Register>();
@@ -4768,7 +4767,9 @@
         }
         break;
       case Primitive::kPrimInt:
-        __ Move(dst, src);
+        if (dst != src) {
+          __ Move(dst, src);
+        }
         break;
 
       default:
@@ -4925,11 +4926,7 @@
         uint64_t min_val = bit_cast<uint64_t, double>(std::numeric_limits<int32_t>::min());
         __ LoadConst32(TMP, High32Bits(min_val));
         __ Mtc1(ZERO, FTMP);
-        if (fpu_32bit) {
-          __ Mtc1(TMP, static_cast<FRegister>(FTMP + 1));
-        } else {
-          __ Mthc1(TMP, FTMP);
-        }
+        __ MoveToFpuHigh(TMP, FTMP);
       }
 
       if (isR6) {
@@ -5168,12 +5165,8 @@
     method_offset = mirror::Class::EmbeddedVTableEntryOffset(
         instruction->GetIndex(), kMipsPointerSize).SizeValue();
   } else {
-    __ LoadFromOffset(kLoadWord,
-                      locations->Out().AsRegister<Register>(),
-                      locations->InAt(0).AsRegister<Register>(),
-                      mirror::Class::ImtPtrOffset(kMipsPointerSize).Uint32Value());
-    method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
-        instruction->GetIndex(), kMipsPointerSize));
+    method_offset = mirror::Class::EmbeddedImTableEntryOffset(
+        instruction->GetIndex() % mirror::Class::kImtSize, kMipsPointerSize).Uint32Value();
   }
   __ LoadFromOffset(kLoadWord,
                     locations->Out().AsRegister<Register>(),
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 5587351..2e78884 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -2932,6 +2932,8 @@
 void InstructionCodeGeneratorMIPS64::VisitInvokeInterface(HInvokeInterface* invoke) {
   // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
   GpuRegister temp = invoke->GetLocations()->GetTemp(0).AsRegister<GpuRegister>();
+  uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset(
+      invoke->GetImtIndex() % mirror::Class::kImtSize, kMips64PointerSize).Uint32Value();
   Location receiver = invoke->GetLocations()->InAt(0);
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMips64DoublewordSize);
@@ -2948,10 +2950,6 @@
     __ LoadFromOffset(kLoadUnsignedWord, temp, receiver.AsRegister<GpuRegister>(), class_offset);
   }
   codegen_->MaybeRecordImplicitNullCheck(invoke);
-  __ LoadFromOffset(kLoadDoubleword, temp, temp,
-      mirror::Class::ImtPtrOffset(kMips64PointerSize).Uint32Value());
-  uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
-      invoke->GetImtIndex(), kMips64PointerSize));
   // temp = temp->GetImtEntryAt(method_offset);
   __ LoadFromOffset(kLoadDoubleword, temp, temp, method_offset);
   // T9 = temp->GetEntryPoint();
@@ -2983,19 +2981,6 @@
   }
 
   HandleInvoke(invoke);
-
-  // While SetupBlockedRegisters() blocks registers S2-S8 due to their
-  // clobbering somewhere else, reduce further register pressure by avoiding
-  // allocation of a register for the current method pointer like on x86 baseline.
-  // TODO: remove this once all the issues with register saving/restoring are
-  // sorted out.
-  if (invoke->HasCurrentMethodInput()) {
-    LocationSummary* locations = invoke->GetLocations();
-    Location location = locations->InAt(invoke->GetSpecialInputIndex());
-    if (location.IsUnallocated() && location.GetPolicy() == Location::kRequiresRegister) {
-      locations->SetInAt(invoke->GetSpecialInputIndex(), Location::NoLocation());
-    }
-  }
 }
 
 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorMIPS64* codegen) {
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 52868f4..1261619 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -448,7 +448,9 @@
            instruction_->IsLoadClass() ||
            instruction_->IsLoadString() ||
            instruction_->IsInstanceOf() ||
-           instruction_->IsCheckCast())
+           instruction_->IsCheckCast() ||
+           ((instruction_->IsInvokeStaticOrDirect() || instruction_->IsInvokeVirtual()) &&
+            instruction_->GetLocations()->Intrinsified()))
         << "Unexpected instruction in read barrier marking slow path: "
         << instruction_->DebugName();
 
@@ -511,8 +513,12 @@
     Register reg_out = out_.AsRegister<Register>();
     DCHECK(locations->CanCall());
     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
-    DCHECK(!instruction_->IsInvoke() ||
-           (instruction_->IsInvokeStaticOrDirect() &&
+    DCHECK(instruction_->IsInstanceFieldGet() ||
+           instruction_->IsStaticFieldGet() ||
+           instruction_->IsArrayGet() ||
+           instruction_->IsInstanceOf() ||
+           instruction_->IsCheckCast() ||
+           ((instruction_->IsInvokeStaticOrDirect() || instruction_->IsInvokeVirtual()) &&
             instruction_->GetLocations()->Intrinsified()))
         << "Unexpected instruction in read barrier for heap reference slow path: "
         << instruction_->DebugName();
@@ -525,7 +531,7 @@
     // introduce a copy of it, `index`.
     Location index = index_;
     if (index_.IsValid()) {
-      // Handle `index_` for HArrayGet and intrinsic UnsafeGetObject.
+      // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
       if (instruction_->IsArrayGet()) {
         // Compute the actual memory offset and store it in `index`.
         Register index_reg = index_.AsRegister<Register>();
@@ -573,7 +579,11 @@
             "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
         __ AddImmediate(index_reg, Immediate(offset_));
       } else {
-        DCHECK(instruction_->IsInvoke());
+        // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile
+        // intrinsics, `index_` is not shifted by a scale factor of 2
+        // (as in the case of ArrayGet), as it is actually an offset
+        // to an object field within an object.
+        DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
         DCHECK(instruction_->GetLocations()->Intrinsified());
         DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
                (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
@@ -2017,6 +2027,8 @@
   LocationSummary* locations = invoke->GetLocations();
   Register temp = locations->GetTemp(0).AsRegister<Register>();
   XmmRegister hidden_reg = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
+  uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset(
+      invoke->GetImtIndex() % mirror::Class::kImtSize, kX86PointerSize).Uint32Value();
   Location receiver = locations->InAt(0);
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
 
@@ -2043,12 +2055,7 @@
   // intact/accessible until the end of the marking phase (the
   // concurrent copying collector may not in the future).
   __ MaybeUnpoisonHeapReference(temp);
-  // temp = temp->GetAddressOfIMT()
-  __ movl(temp,
-      Address(temp, mirror::Class::ImtPtrOffset(kX86PointerSize).Uint32Value()));
   // temp = temp->GetImtEntryAt(method_offset);
-  uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
-      invoke->GetImtIndex(), kX86PointerSize));
   __ movl(temp, Address(temp, method_offset));
   // call temp->GetEntryPoint();
   __ call(Address(temp,
@@ -4068,12 +4075,8 @@
     method_offset = mirror::Class::EmbeddedVTableEntryOffset(
         instruction->GetIndex(), kX86PointerSize).SizeValue();
   } else {
-    __ movl(locations->InAt(0).AsRegister<Register>(),
-        Address(locations->InAt(0).AsRegister<Register>(),
-        mirror::Class::ImtPtrOffset(kX86PointerSize).Uint32Value()));
-    // temp = temp->GetImtEntryAt(method_offset);
-    method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
-        instruction->GetIndex(), kX86PointerSize));
+    method_offset = mirror::Class::EmbeddedImTableEntryOffset(
+        instruction->GetIndex() % mirror::Class::kImtSize, kX86PointerSize).Uint32Value();
   }
   __ movl(locations->Out().AsRegister<Register>(),
           Address(locations->InAt(0).AsRegister<Register>(), method_offset));
@@ -6977,6 +6980,9 @@
   DCHECK(kEmitCompilerReadBarrier);
   DCHECK(kUseBakerReadBarrier);
 
+  static_assert(
+      sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+      "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
   // /* HeapReference<Object> */ ref =
   //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
   Address src = index.IsConstant() ?
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index fb402be..2a9fb80 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -491,6 +491,14 @@
                                              Location index,
                                              Location temp,
                                              bool needs_null_check);
+  // Factored implementation used by GenerateFieldLoadWithBakerReadBarrier
+  // and GenerateArrayLoadWithBakerReadBarrier.
+  void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                 Location ref,
+                                                 Register obj,
+                                                 const Address& src,
+                                                 Location temp,
+                                                 bool needs_null_check);
 
   // Generate a read barrier for a heap reference within `instruction`
   // using a slow path.
@@ -561,15 +569,6 @@
   static constexpr int32_t kDummy32BitOffset = 256;
 
  private:
-  // Factored implementation of GenerateFieldLoadWithBakerReadBarrier
-  // and GenerateArrayLoadWithBakerReadBarrier.
-  void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
-                                                 Location ref,
-                                                 Register obj,
-                                                 const Address& src,
-                                                 Location temp,
-                                                 bool needs_null_check);
-
   Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp);
 
   struct PcRelativeDexCacheAccessInfo {
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 9a3e8d2..5e30203 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -469,7 +469,9 @@
            instruction_->IsLoadClass() ||
            instruction_->IsLoadString() ||
            instruction_->IsInstanceOf() ||
-           instruction_->IsCheckCast())
+           instruction_->IsCheckCast() ||
+           ((instruction_->IsInvokeStaticOrDirect() || instruction_->IsInvokeVirtual()) &&
+            instruction_->GetLocations()->Intrinsified()))
         << "Unexpected instruction in read barrier marking slow path: "
         << instruction_->DebugName();
 
@@ -532,8 +534,12 @@
     CpuRegister reg_out = out_.AsRegister<CpuRegister>();
     DCHECK(locations->CanCall());
     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out.AsRegister())) << out_;
-    DCHECK(!instruction_->IsInvoke() ||
-           (instruction_->IsInvokeStaticOrDirect() &&
+    DCHECK(instruction_->IsInstanceFieldGet() ||
+           instruction_->IsStaticFieldGet() ||
+           instruction_->IsArrayGet() ||
+           instruction_->IsInstanceOf() ||
+           instruction_->IsCheckCast() ||
+           ((instruction_->IsInvokeStaticOrDirect() || instruction_->IsInvokeVirtual()) &&
             instruction_->GetLocations()->Intrinsified()))
         << "Unexpected instruction in read barrier for heap reference slow path: "
         << instruction_->DebugName();
@@ -546,7 +552,7 @@
     // introduce a copy of it, `index`.
     Location index = index_;
     if (index_.IsValid()) {
-      // Handle `index_` for HArrayGet and intrinsic UnsafeGetObject.
+      // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
       if (instruction_->IsArrayGet()) {
         // Compute real offset and store it in index_.
         Register index_reg = index_.AsRegister<CpuRegister>().AsRegister();
@@ -594,7 +600,11 @@
             "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
         __ AddImmediate(CpuRegister(index_reg), Immediate(offset_));
       } else {
-        DCHECK(instruction_->IsInvoke());
+        // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile
+        // intrinsics, `index_` is not shifted by a scale factor of 2
+        // (as in the case of ArrayGet), as it is actually an offset
+        // to an object field within an object.
+        DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
         DCHECK(instruction_->GetLocations()->Intrinsified());
         DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
                (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
@@ -2247,6 +2257,8 @@
   LocationSummary* locations = invoke->GetLocations();
   CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
   CpuRegister hidden_reg = locations->GetTemp(1).AsRegister<CpuRegister>();
+  uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset(
+      invoke->GetImtIndex() % mirror::Class::kImtSize, kX86_64PointerSize).Uint32Value();
   Location receiver = locations->InAt(0);
   size_t class_offset = mirror::Object::ClassOffset().SizeValue();
 
@@ -2272,12 +2284,6 @@
   // intact/accessible until the end of the marking phase (the
   // concurrent copying collector may not in the future).
   __ MaybeUnpoisonHeapReference(temp);
-  // temp = temp->GetAddressOfIMT()
-  __ movq(temp,
-      Address(temp, mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value()));
-  // temp = temp->GetImtEntryAt(method_offset);
-  uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
-      invoke->GetImtIndex(), kX86_64PointerSize));
   // temp = temp->GetImtEntryAt(method_offset);
   __ movq(temp, Address(temp, method_offset));
   // call temp->GetEntryPoint();
@@ -4001,11 +4007,8 @@
     method_offset = mirror::Class::EmbeddedVTableEntryOffset(
         instruction->GetIndex(), kX86_64PointerSize).SizeValue();
   } else {
-    __ movq(locations->Out().AsRegister<CpuRegister>(),
-            Address(locations->InAt(0).AsRegister<CpuRegister>(),
-            mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value()));
-    method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
-        instruction->GetIndex(), kX86_64PointerSize));
+    method_offset = mirror::Class::EmbeddedImTableEntryOffset(
+        instruction->GetIndex() % mirror::Class::kImtSize, kX86_64PointerSize).Uint32Value();
   }
   __ movq(locations->Out().AsRegister<CpuRegister>(),
           Address(locations->InAt(0).AsRegister<CpuRegister>(), method_offset));
@@ -6430,6 +6433,9 @@
   DCHECK(kEmitCompilerReadBarrier);
   DCHECK(kUseBakerReadBarrier);
 
+  static_assert(
+      sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+      "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
   // /* HeapReference<Object> */ ref =
   //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
   Address src = index.IsConstant() ?
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index cf4cc4c..d7cfd37 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -433,6 +433,14 @@
                                              Location index,
                                              Location temp,
                                              bool needs_null_check);
+  // Factored implementation used by GenerateFieldLoadWithBakerReadBarrier
+  // and GenerateArrayLoadWithBakerReadBarrier.
+  void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                 Location ref,
+                                                 CpuRegister obj,
+                                                 const Address& src,
+                                                 Location temp,
+                                                 bool needs_null_check);
 
   // Generate a read barrier for a heap reference within `instruction`
   // using a slow path.
@@ -535,15 +543,6 @@
   static constexpr int32_t kDummy32BitOffset = 256;
 
  private:
-  // Factored implementation of GenerateFieldLoadWithBakerReadBarrier
-  // and GenerateArrayLoadWithBakerReadBarrier.
-  void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
-                                                 Location ref,
-                                                 CpuRegister obj,
-                                                 const Address& src,
-                                                 Location temp,
-                                                 bool needs_null_check);
-
   struct PcRelativeDexCacheAccessInfo {
     PcRelativeDexCacheAccessInfo(const DexFile& dex_file, uint32_t element_off)
         : target_dex_file(dex_file), element_offset(element_off), label() { }
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index d5e80b4..c67b2d5 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -656,8 +656,8 @@
     }
     ArtMethod* new_method = nullptr;
     if (invoke_instruction->IsInvokeInterface()) {
-      new_method = ic.GetTypeAt(i)->GetImt(pointer_size)->Get(
-          method_index, pointer_size);
+      new_method = ic.GetTypeAt(i)->GetEmbeddedImTableEntry(
+          method_index % mirror::Class::kImtSize, pointer_size);
       if (new_method->IsRuntimeMethod()) {
         // Bail out as soon as we see a conflict trampoline in one of the target's
         // interface table.
@@ -756,7 +756,15 @@
     invoke_instruction->ReplaceWith(return_replacement);
   }
   invoke_instruction->GetBlock()->RemoveInstruction(invoke_instruction);
-  FixUpReturnReferenceType(invoke_instruction, method, return_replacement, do_rtp);
+  FixUpReturnReferenceType(method, return_replacement);
+  if (do_rtp && ReturnTypeMoreSpecific(invoke_instruction, return_replacement)) {
+    // Actual return value has a more specific type than the method's declared
+    // return type. Run RTP again on the outer graph to propagate it.
+    ReferenceTypePropagation(graph_,
+                             outer_compilation_unit_.GetDexCache(),
+                             handles_,
+                             /* is_first_run */ false).Run();
+  }
   return true;
 }
 
@@ -1159,6 +1167,15 @@
     }
   }
 
+  // We have replaced formal arguments with actual arguments. If actual types
+  // are more specific than the declared ones, run RTP again on the inner graph.
+  if (ArgumentTypesMoreSpecific(invoke_instruction, resolved_method)) {
+    ReferenceTypePropagation(callee_graph,
+                             dex_compilation_unit.GetDexCache(),
+                             handles_,
+                             /* is_first_run */ false).Run();
+  }
+
   size_t number_of_instructions_budget = kMaximumNumberOfHInstructions;
   size_t number_of_inlined_instructions =
       RunOptimizations(callee_graph, code_item, dex_compilation_unit);
@@ -1332,13 +1349,87 @@
   return number_of_inlined_instructions;
 }
 
-void HInliner::FixUpReturnReferenceType(HInvoke* invoke_instruction,
-                                        ArtMethod* resolved_method,
-                                        HInstruction* return_replacement,
-                                        bool do_rtp) {
+static bool IsReferenceTypeRefinement(ReferenceTypeInfo declared_rti,
+                                      bool declared_can_be_null,
+                                      HInstruction* actual_obj)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  if (declared_can_be_null && !actual_obj->CanBeNull()) {
+    return true;
+  }
+
+  ReferenceTypeInfo actual_rti = actual_obj->GetReferenceTypeInfo();
+  return (actual_rti.IsExact() && !declared_rti.IsExact()) ||
+         declared_rti.IsStrictSupertypeOf(actual_rti);
+}
+
+ReferenceTypeInfo HInliner::GetClassRTI(mirror::Class* klass) {
+  return ReferenceTypePropagation::IsAdmissible(klass)
+      ? ReferenceTypeInfo::Create(handles_->NewHandle(klass))
+      : graph_->GetInexactObjectRti();
+}
+
+bool HInliner::ArgumentTypesMoreSpecific(HInvoke* invoke_instruction, ArtMethod* resolved_method) {
+  // If this is an instance call, test whether the type of the `this` argument
+  // is more specific than the class which declares the method.
+  if (!resolved_method->IsStatic()) {
+    if (IsReferenceTypeRefinement(GetClassRTI(resolved_method->GetDeclaringClass()),
+                                  /* declared_can_be_null */ false,
+                                  invoke_instruction->InputAt(0u))) {
+      return true;
+    }
+  }
+
+  size_t pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
+
+  // Iterate over the list of parameter types and test whether any of the
+  // actual inputs has a more specific reference type than the type declared in
+  // the signature.
+  const DexFile::TypeList* param_list = resolved_method->GetParameterTypeList();
+  for (size_t param_idx = 0,
+              input_idx = resolved_method->IsStatic() ? 0 : 1,
+              e = (param_list == nullptr ? 0 : param_list->Size());
+       param_idx < e;
+       ++param_idx, ++input_idx) {
+    HInstruction* input = invoke_instruction->InputAt(input_idx);
+    if (input->GetType() == Primitive::kPrimNot) {
+      mirror::Class* param_cls = resolved_method->GetDexCacheResolvedType(
+          param_list->GetTypeItem(param_idx).type_idx_,
+          pointer_size);
+      if (IsReferenceTypeRefinement(GetClassRTI(param_cls),
+                                    /* declared_can_be_null */ true,
+                                    input)) {
+        return true;
+      }
+    }
+  }
+
+  return false;
+}
+
+bool HInliner::ReturnTypeMoreSpecific(HInvoke* invoke_instruction,
+                                      HInstruction* return_replacement) {
   // Check the integrity of reference types and run another type propagation if needed.
   if (return_replacement != nullptr) {
     if (return_replacement->GetType() == Primitive::kPrimNot) {
+      // Test if the return type is a refinement of the declared return type.
+      if (IsReferenceTypeRefinement(invoke_instruction->GetReferenceTypeInfo(),
+                                    /* declared_can_be_null */ true,
+                                    return_replacement)) {
+        return true;
+      }
+    } else if (return_replacement->IsInstanceOf()) {
+      // Inlining InstanceOf into an If may put a tighter bound on reference types.
+      return true;
+    }
+  }
+
+  return false;
+}
+
+void HInliner::FixUpReturnReferenceType(ArtMethod* resolved_method,
+                                        HInstruction* return_replacement) {
+  if (return_replacement != nullptr) {
+    if (return_replacement->GetType() == Primitive::kPrimNot) {
       if (!return_replacement->GetReferenceTypeInfo().IsValid()) {
         // Make sure that we have a valid type for the return. We may get an invalid one when
         // we inline invokes with multiple branches and create a Phi for the result.
@@ -1347,36 +1438,7 @@
         DCHECK(return_replacement->IsPhi());
         size_t pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
         mirror::Class* cls = resolved_method->GetReturnType(false /* resolve */, pointer_size);
-        if (cls != nullptr && !cls->IsErroneous()) {
-          ReferenceTypeInfo::TypeHandle return_handle = handles_->NewHandle(cls);
-          return_replacement->SetReferenceTypeInfo(ReferenceTypeInfo::Create(
-              return_handle, return_handle->CannotBeAssignedFromOtherTypes() /* is_exact */));
-        } else {
-          // Return inexact object type on failures.
-          return_replacement->SetReferenceTypeInfo(graph_->GetInexactObjectRti());
-        }
-      }
-
-      if (do_rtp) {
-        // If the return type is a refinement of the declared type run the type propagation again.
-        ReferenceTypeInfo return_rti = return_replacement->GetReferenceTypeInfo();
-        ReferenceTypeInfo invoke_rti = invoke_instruction->GetReferenceTypeInfo();
-        if (invoke_rti.IsStrictSupertypeOf(return_rti)
-            || (return_rti.IsExact() && !invoke_rti.IsExact())
-            || !return_replacement->CanBeNull()) {
-          ReferenceTypePropagation(graph_,
-                                   outer_compilation_unit_.GetDexCache(),
-                                   handles_,
-                                   /* is_first_run */ false).Run();
-        }
-      }
-    } else if (return_replacement->IsInstanceOf()) {
-      if (do_rtp) {
-        // Inlining InstanceOf into an If may put a tighter bound on reference types.
-        ReferenceTypePropagation(graph_,
-                                 outer_compilation_unit_.GetDexCache(),
-                                 handles_,
-                                 /* is_first_run */ false).Run();
+        return_replacement->SetReferenceTypeInfo(GetClassRTI(cls));
       }
     }
   }
diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h
index 7cf1424..02d3a5f 100644
--- a/compiler/optimizing/inliner.h
+++ b/compiler/optimizing/inliner.h
@@ -124,10 +124,18 @@
                                            uint32_t dex_pc) const
     SHARED_REQUIRES(Locks::mutator_lock_);
 
-  void FixUpReturnReferenceType(HInvoke* invoke_instruction,
-                                ArtMethod* resolved_method,
-                                HInstruction* return_replacement,
-                                bool do_rtp)
+  void FixUpReturnReferenceType(ArtMethod* resolved_method, HInstruction* return_replacement)
+    SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Creates an instance of ReferenceTypeInfo from `klass` if `klass` is
+  // admissible (see ReferenceTypePropagation::IsAdmissible for details).
+  // Otherwise returns inexact Object RTI.
+  ReferenceTypeInfo GetClassRTI(mirror::Class* klass) SHARED_REQUIRES(Locks::mutator_lock_);
+
+  bool ArgumentTypesMoreSpecific(HInvoke* invoke_instruction, ArtMethod* resolved_method)
+    SHARED_REQUIRES(Locks::mutator_lock_);
+
+  bool ReturnTypeMoreSpecific(HInvoke* invoke_instruction, HInstruction* return_replacement)
     SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Add a type guard on the given `receiver`. This will add to the graph:
diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc
index afac5f9..b412529 100644
--- a/compiler/optimizing/instruction_builder.cc
+++ b/compiler/optimizing/instruction_builder.cc
@@ -16,7 +16,6 @@
 
 #include "instruction_builder.h"
 
-#include "art_method-inl.h"
 #include "bytecode_utils.h"
 #include "class_linker.h"
 #include "driver/compiler_options.h"
@@ -891,7 +890,7 @@
                                            return_type,
                                            dex_pc,
                                            method_idx,
-                                           resolved_method->GetImtIndex());
+                                           resolved_method->GetDexMethodIndex());
   }
 
   return HandleInvoke(invoke,
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index 62d6370..3041c4d 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -236,22 +236,40 @@
 
 void InstructionSimplifierVisitor::VisitShift(HBinaryOperation* instruction) {
   DCHECK(instruction->IsShl() || instruction->IsShr() || instruction->IsUShr());
-  HConstant* input_cst = instruction->GetConstantRight();
-  HInstruction* input_other = instruction->GetLeastConstantLeft();
+  HInstruction* shift_amount = instruction->GetRight();
+  HInstruction* value = instruction->GetLeft();
 
-  if (input_cst != nullptr) {
-    int64_t cst = Int64FromConstant(input_cst);
-    int64_t mask = (input_other->GetType() == Primitive::kPrimLong)
-        ? kMaxLongShiftDistance
-        : kMaxIntShiftDistance;
-    if ((cst & mask) == 0) {
+  int64_t implicit_mask = (value->GetType() == Primitive::kPrimLong)
+      ? kMaxLongShiftDistance
+      : kMaxIntShiftDistance;
+
+  if (shift_amount->IsConstant()) {
+    int64_t cst = Int64FromConstant(shift_amount->AsConstant());
+    if ((cst & implicit_mask) == 0) {
       // Replace code looking like
-      //    SHL dst, src, 0
+      //    SHL dst, value, 0
       // with
-      //    src
-      instruction->ReplaceWith(input_other);
+      //    value
+      instruction->ReplaceWith(value);
       instruction->GetBlock()->RemoveInstruction(instruction);
       RecordSimplification();
+      return;
+    }
+  }
+
+  // Shift operations implicitly mask the shift amount according to the type width. Get rid of
+  // unnecessary explicit masking operations on the shift amount.
+  // Replace code looking like
+  //    AND masked_shift, shift, <superset of implicit mask>
+  //    SHL dst, value, masked_shift
+  // with
+  //    SHL dst, value, shift
+  if (shift_amount->IsAnd()) {
+    HAnd* and_insn = shift_amount->AsAnd();
+    HConstant* mask = and_insn->GetConstantRight();
+    if ((mask != nullptr) && ((Int64FromConstant(mask) & implicit_mask) == implicit_mask)) {
+      instruction->ReplaceInput(and_insn->GetLeastConstantLeft(), 1);
+      RecordSimplification();
     }
   }
 }
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index 93950d5..19629b1 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -47,19 +47,6 @@
   if (res == nullptr) {
     return false;
   }
-  if (kEmitCompilerReadBarrier && res->CanCall()) {
-    // Generating an intrinsic for this HInvoke may produce an
-    // IntrinsicSlowPathARM slow path.  Currently this approach
-    // does not work when using read barriers, as the emitted
-    // calling sequence will make use of another slow path
-    // (ReadBarrierForRootSlowPathARM for HInvokeStaticOrDirect,
-    // ReadBarrierSlowPathARM for HInvokeVirtual).  So we bail
-    // out in this case.
-    //
-    // TODO: Find a way to have intrinsics work with read barriers.
-    invoke->SetLocations(nullptr);
-    return false;
-  }
   return res->Intrinsified();
 }
 
@@ -524,8 +511,8 @@
       if (kEmitCompilerReadBarrier) {
         if (kUseBakerReadBarrier) {
           Location temp = locations->GetTemp(0);
-          codegen->GenerateArrayLoadWithBakerReadBarrier(
-              invoke, trg_loc, base, 0U, offset_loc, temp, /* needs_null_check */ false);
+          codegen->GenerateReferenceLoadWithBakerReadBarrier(
+              invoke, trg_loc, base, 0U, offset_loc, TIMES_1, temp, /* needs_null_check */ false);
           if (is_volatile) {
             __ dmb(ISH);
           }
@@ -581,10 +568,11 @@
   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
   locations->SetInAt(1, Location::RequiresRegister());
   locations->SetInAt(2, Location::RequiresRegister());
-  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+  locations->SetOut(Location::RequiresRegister(),
+                    can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap);
   if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
     // We need a temporary register for the read barrier marking slow
-    // path in InstructionCodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier.
+    // path in InstructionCodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier.
     locations->AddTemp(Location::RequiresRegister());
   }
 }
@@ -919,9 +907,10 @@
   // The UnsafeCASObject intrinsic is missing a read barrier, and
   // therefore sometimes does not work as expected (b/25883050).
   // Turn it off temporarily as a quick fix, until the read barrier is
-  // implemented (see TODO in GenCAS below).
+  // implemented (see TODO in GenCAS).
   //
-  // TODO(rpl): Fix this issue and re-enable this intrinsic with read barriers.
+  // TODO(rpl): Implement read barrier support in GenCAS and re-enable
+  // this intrinsic.
   if (kEmitCompilerReadBarrier) {
     return;
   }
@@ -932,6 +921,15 @@
   GenCas(invoke->GetLocations(), Primitive::kPrimInt, codegen_);
 }
 void IntrinsicCodeGeneratorARM::VisitUnsafeCASObject(HInvoke* invoke) {
+  // The UnsafeCASObject intrinsic is missing a read barrier, and
+  // therefore sometimes does not work as expected (b/25883050).
+  // Turn it off temporarily as a quick fix, until the read barrier is
+  // implemented (see TODO in GenCAS).
+  //
+  // TODO(rpl): Implement read barrier support in GenCAS and re-enable
+  // this intrinsic.
+  DCHECK(!kEmitCompilerReadBarrier);
+
   GenCas(invoke->GetLocations(), Primitive::kPrimNot, codegen_);
 }
 
@@ -1335,6 +1333,12 @@
 }
 
 void IntrinsicLocationsBuilderARM::VisitSystemArrayCopy(HInvoke* invoke) {
+  // TODO(rpl): Implement read barriers in the SystemArrayCopy
+  // intrinsic and re-enable it (b/29516905).
+  if (kEmitCompilerReadBarrier) {
+    return;
+  }
+
   CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke);
   LocationSummary* locations = invoke->GetLocations();
   if (locations == nullptr) {
@@ -1419,11 +1423,11 @@
   }
 }
 
-// TODO: Implement read barriers in the SystemArrayCopy intrinsic.
-// Note that this code path is not used (yet) because we do not
-// intrinsify methods that can go into the IntrinsicSlowPathARM
-// slow path.
 void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) {
+  // TODO(rpl): Implement read barriers in the SystemArrayCopy
+  // intrinsic and re-enable it (b/29516905).
+  DCHECK(!kEmitCompilerReadBarrier);
+
   ArmAssembler* assembler = GetAssembler();
   LocationSummary* locations = invoke->GetLocations();
 
@@ -1972,6 +1976,50 @@
   __ revsh(out, in);
 }
 
+static void GenBitCount(HInvoke* instr, bool is64bit, ArmAssembler* assembler) {
+  DCHECK(instr->GetType() == Primitive::kPrimInt);
+  DCHECK((is64bit && instr->InputAt(0)->GetType() == Primitive::kPrimLong) ||
+         (!is64bit && instr->InputAt(0)->GetType() == Primitive::kPrimInt));
+
+  LocationSummary* locations = instr->GetLocations();
+  Location     in = locations->InAt(0);
+  Register  src_0 = is64bit ? in.AsRegisterPairLow<Register>() : in.AsRegister<Register>();
+  Register  src_1 = is64bit ? in.AsRegisterPairHigh<Register>() : src_0;
+  SRegister tmp_s = locations->GetTemp(0).AsFpuRegisterPairLow<SRegister>();
+  DRegister tmp_d = FromLowSToD(tmp_s);
+  Register  out_r = locations->Out().AsRegister<Register>();
+
+  // Move data from core register(s) to temp D-reg for bit count calculation, then move back.
+  // According to Cortex A57 and A72 optimization guides, compared to transferring to full D-reg,
+  // transferring data from core reg to upper or lower half of vfp D-reg requires extra latency,
+  // That's why for integer bit count, we use 'vmov d0, r0, r0' instead of 'vmov d0[0], r0'.
+  __ vmovdrr(tmp_d, src_1, src_0);                         // Temp DReg |--src_1|--src_0|
+  __ vcntd(tmp_d, tmp_d);                                  // Temp DReg |c|c|c|c|c|c|c|c|
+  __ vpaddld(tmp_d, tmp_d, 8, /* is_unsigned */ true);     // Temp DReg |--c|--c|--c|--c|
+  __ vpaddld(tmp_d, tmp_d, 16, /* is_unsigned */ true);    // Temp DReg |------c|------c|
+  if (is64bit) {
+    __ vpaddld(tmp_d, tmp_d, 32, /* is_unsigned */ true);  // Temp DReg |--------------c|
+  }
+  __ vmovrs(out_r, tmp_s);
+}
+
+void IntrinsicLocationsBuilderARM::VisitIntegerBitCount(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+  invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister());
+}
+
+void IntrinsicCodeGeneratorARM::VisitIntegerBitCount(HInvoke* invoke) {
+  GenBitCount(invoke, /* is64bit */ false, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderARM::VisitLongBitCount(HInvoke* invoke) {
+  VisitIntegerBitCount(invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitLongBitCount(HInvoke* invoke) {
+  GenBitCount(invoke, /* is64bit */ true, GetAssembler());
+}
+
 void IntrinsicLocationsBuilderARM::VisitStringGetCharsNoCheck(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
                                                             LocationSummary::kNoCall,
@@ -2112,8 +2160,6 @@
   __ Lsr(out, out, 5);
 }
 
-UNIMPLEMENTED_INTRINSIC(ARM, IntegerBitCount)
-UNIMPLEMENTED_INTRINSIC(ARM, LongBitCount)
 UNIMPLEMENTED_INTRINSIC(ARM, MathMinDoubleDouble)
 UNIMPLEMENTED_INTRINSIC(ARM, MathMinFloatFloat)
 UNIMPLEMENTED_INTRINSIC(ARM, MathMaxDoubleDouble)
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 4da0843..1685cf9 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -149,19 +149,6 @@
   if (res == nullptr) {
     return false;
   }
-  if (kEmitCompilerReadBarrier && res->CanCall()) {
-    // Generating an intrinsic for this HInvoke may produce an
-    // IntrinsicSlowPathARM64 slow path.  Currently this approach
-    // does not work when using read barriers, as the emitted
-    // calling sequence will make use of another slow path
-    // (ReadBarrierForRootSlowPathARM64 for HInvokeStaticOrDirect,
-    // ReadBarrierSlowPathARM64 for HInvokeVirtual).  So we bail
-    // out in this case.
-    //
-    // TODO: Find a way to have intrinsics work with read barriers.
-    invoke->SetLocations(nullptr);
-    return false;
-  }
   return res->Intrinsified();
 }
 
@@ -791,8 +778,15 @@
     // UnsafeGetObject/UnsafeGetObjectVolatile with Baker's read barrier case.
     UseScratchRegisterScope temps(masm);
     Register temp = temps.AcquireW();
-    codegen->GenerateArrayLoadWithBakerReadBarrier(
-        invoke, trg_loc, base, 0U, offset_loc, temp, /* needs_null_check */ false);
+    codegen->GenerateReferenceLoadWithBakerReadBarrier(invoke,
+                                                       trg_loc,
+                                                       base,
+                                                       /* offset */ 0U,
+                                                       /* index */ offset_loc,
+                                                       /* scale_factor */ 0U,
+                                                       temp,
+                                                       /* needs_null_check */ false,
+                                                       is_volatile);
   } else {
     // Other cases.
     MemOperand mem_op(base.X(), offset);
@@ -821,7 +815,8 @@
   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
   locations->SetInAt(1, Location::RequiresRegister());
   locations->SetInAt(2, Location::RequiresRegister());
-  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+  locations->SetOut(Location::RequiresRegister(),
+                    can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap);
 }
 
 void IntrinsicLocationsBuilderARM64::VisitUnsafeGet(HInvoke* invoke) {
@@ -1102,9 +1097,10 @@
   // The UnsafeCASObject intrinsic is missing a read barrier, and
   // therefore sometimes does not work as expected (b/25883050).
   // Turn it off temporarily as a quick fix, until the read barrier is
-  // implemented (see TODO in GenCAS below).
+  // implemented (see TODO in GenCAS).
   //
-  // TODO(rpl): Fix this issue and re-enable this intrinsic with read barriers.
+  // TODO(rpl): Implement read barrier support in GenCAS and re-enable
+  // this intrinsic.
   if (kEmitCompilerReadBarrier) {
     return;
   }
@@ -1119,6 +1115,15 @@
   GenCas(invoke->GetLocations(), Primitive::kPrimLong, codegen_);
 }
 void IntrinsicCodeGeneratorARM64::VisitUnsafeCASObject(HInvoke* invoke) {
+  // The UnsafeCASObject intrinsic is missing a read barrier, and
+  // therefore sometimes does not work as expected (b/25883050).
+  // Turn it off temporarily as a quick fix, until the read barrier is
+  // implemented (see TODO in GenCAS).
+  //
+  // TODO(rpl): Implement read barrier support in GenCAS and re-enable
+  // this intrinsic.
+  DCHECK(!kEmitCompilerReadBarrier);
+
   GenCas(invoke->GetLocations(), Primitive::kPrimNot, codegen_);
 }
 
@@ -2012,6 +2017,12 @@
 // We want to use two temporary registers in order to reduce the register pressure in arm64.
 // So we don't use the CodeGenerator::CreateSystemArrayCopyLocationSummary.
 void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopy(HInvoke* invoke) {
+  // TODO(rpl): Implement read barriers in the SystemArrayCopy
+  // intrinsic and re-enable it (b/29516905).
+  if (kEmitCompilerReadBarrier) {
+    return;
+  }
+
   // Check to see if we have known failures that will cause us to have to bail out
   // to the runtime, and just generate the runtime call directly.
   HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
@@ -2064,6 +2075,10 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
+  // TODO(rpl): Implement read barriers in the SystemArrayCopy
+  // intrinsic and re-enable it (b/29516905).
+  DCHECK(!kEmitCompilerReadBarrier);
+
   vixl::MacroAssembler* masm = GetVIXLAssembler();
   LocationSummary* locations = invoke->GetLocations();
 
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 4988398..031cd13 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -60,19 +60,6 @@
   if (res == nullptr) {
     return false;
   }
-  if (kEmitCompilerReadBarrier && res->CanCall()) {
-    // Generating an intrinsic for this HInvoke may produce an
-    // IntrinsicSlowPathX86 slow path.  Currently this approach
-    // does not work when using read barriers, as the emitted
-    // calling sequence will make use of another slow path
-    // (ReadBarrierForRootSlowPathX86 for HInvokeStaticOrDirect,
-    // ReadBarrierSlowPathX86 for HInvokeVirtual).  So we bail
-    // out in this case.
-    //
-    // TODO: Find a way to have intrinsics work with read barriers.
-    invoke->SetLocations(nullptr);
-    return false;
-  }
   return res->Intrinsified();
 }
 
@@ -1822,8 +1809,9 @@
       if (kEmitCompilerReadBarrier) {
         if (kUseBakerReadBarrier) {
           Location temp = locations->GetTemp(0);
-          codegen->GenerateArrayLoadWithBakerReadBarrier(
-              invoke, output_loc, base, 0U, offset_loc, temp, /* needs_null_check */ false);
+          Address src(base, offset, ScaleFactor::TIMES_1, 0);
+          codegen->GenerateReferenceLoadWithBakerReadBarrier(
+              invoke, output_loc, base, src, temp, /* needs_null_check */ false);
         } else {
           __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
           codegen->GenerateReadBarrierSlow(
@@ -1878,16 +1866,17 @@
     if (is_volatile) {
       // Need to use XMM to read volatile.
       locations->AddTemp(Location::RequiresFpuRegister());
-      locations->SetOut(Location::RequiresRegister());
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
     } else {
       locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
     }
   } else {
-    locations->SetOut(Location::RequiresRegister());
+    locations->SetOut(Location::RequiresRegister(),
+                      can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap);
   }
   if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
     // We need a temporary register for the read barrier marking slow
-    // path in InstructionCodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier.
+    // path in InstructionCodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier.
     locations->AddTemp(Location::RequiresRegister());
   }
 }
@@ -2109,9 +2098,9 @@
   // The UnsafeCASObject intrinsic is missing a read barrier, and
   // therefore sometimes does not work as expected (b/25883050).
   // Turn it off temporarily as a quick fix, until the read barrier is
-  // implemented.
+  // implemented (see TODO in GenCAS).
   //
-  // TODO(rpl): Implement a read barrier in GenCAS below and re-enable
+  // TODO(rpl): Implement read barrier support in GenCAS and re-enable
   // this intrinsic.
   if (kEmitCompilerReadBarrier) {
     return;
@@ -2236,6 +2225,15 @@
 }
 
 void IntrinsicCodeGeneratorX86::VisitUnsafeCASObject(HInvoke* invoke) {
+  // The UnsafeCASObject intrinsic is missing a read barrier, and
+  // therefore sometimes does not work as expected (b/25883050).
+  // Turn it off temporarily as a quick fix, until the read barrier is
+  // implemented (see TODO in GenCAS).
+  //
+  // TODO(rpl): Implement read barrier support in GenCAS and re-enable
+  // this intrinsic.
+  DCHECK(!kEmitCompilerReadBarrier);
+
   GenCAS(Primitive::kPrimNot, invoke, codegen_);
 }
 
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 593c8f3..c5b44d4 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -54,19 +54,6 @@
   if (res == nullptr) {
     return false;
   }
-  if (kEmitCompilerReadBarrier && res->CanCall()) {
-    // Generating an intrinsic for this HInvoke may produce an
-    // IntrinsicSlowPathX86_64 slow path.  Currently this approach
-    // does not work when using read barriers, as the emitted
-    // calling sequence will make use of another slow path
-    // (ReadBarrierForRootSlowPathX86_64 for HInvokeStaticOrDirect,
-    // ReadBarrierSlowPathX86_64 for HInvokeVirtual).  So we bail
-    // out in this case.
-    //
-    // TODO: Find a way to have intrinsics work with read barriers.
-    invoke->SetLocations(nullptr);
-    return false;
-  }
   return res->Intrinsified();
 }
 
@@ -1079,14 +1066,20 @@
 
 
 void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
+  // TODO(rpl): Implement read barriers in the SystemArrayCopy
+  // intrinsic and re-enable it (b/29516905).
+  if (kEmitCompilerReadBarrier) {
+    return;
+  }
+
   CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke);
 }
 
-// TODO: Implement read barriers in the SystemArrayCopy intrinsic.
-// Note that this code path is not used (yet) because we do not
-// intrinsify methods that can go into the IntrinsicSlowPathX86_64
-// slow path.
 void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
+  // TODO(rpl): Implement read barriers in the SystemArrayCopy
+  // intrinsic and re-enable it (b/29516905).
+  DCHECK(!kEmitCompilerReadBarrier);
+
   X86_64Assembler* assembler = GetAssembler();
   LocationSummary* locations = invoke->GetLocations();
 
@@ -1910,8 +1903,9 @@
       if (kEmitCompilerReadBarrier) {
         if (kUseBakerReadBarrier) {
           Location temp = locations->GetTemp(0);
-          codegen->GenerateArrayLoadWithBakerReadBarrier(
-              invoke, output_loc, base, 0U, offset_loc, temp, /* needs_null_check */ false);
+          Address src(base, offset, ScaleFactor::TIMES_1, 0);
+          codegen->GenerateReferenceLoadWithBakerReadBarrier(
+              invoke, output_loc, base, src, temp, /* needs_null_check */ false);
         } else {
           __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
           codegen->GenerateReadBarrierSlow(
@@ -1948,10 +1942,11 @@
   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
   locations->SetInAt(1, Location::RequiresRegister());
   locations->SetInAt(2, Location::RequiresRegister());
-  locations->SetOut(Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(),
+                    can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap);
   if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
     // We need a temporary register for the read barrier marking slow
-    // path in InstructionCodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier.
+    // path in InstructionCodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier.
     locations->AddTemp(Location::RequiresRegister());
   }
 }
@@ -2135,9 +2130,9 @@
   // The UnsafeCASObject intrinsic is missing a read barrier, and
   // therefore sometimes does not work as expected (b/25883050).
   // Turn it off temporarily as a quick fix, until the read barrier is
-  // implemented.
+  // implemented (see TODO in GenCAS).
   //
-  // TODO(rpl): Implement a read barrier in GenCAS below and re-enable
+  // TODO(rpl): Implement read barrier support in GenCAS and re-enable
   // this intrinsic.
   if (kEmitCompilerReadBarrier) {
     return;
@@ -2253,6 +2248,15 @@
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
+  // The UnsafeCASObject intrinsic is missing a read barrier, and
+  // therefore sometimes does not work as expected (b/25883050).
+  // Turn it off temporarily as a quick fix, until the read barrier is
+  // implemented (see TODO in GenCAS).
+  //
+  // TODO(rpl): Implement read barrier support in GenCAS and re-enable
+  // this intrinsic.
+  DCHECK(!kEmitCompilerReadBarrier);
+
   GenCAS(Primitive::kPrimNot, invoke, codegen_);
 }
 
@@ -2441,7 +2445,7 @@
                       : CTZ(static_cast<uint32_t>(value));
     }
     if (is_long) {
-      codegen->Load64BitValue(out, 1L << value);
+      codegen->Load64BitValue(out, 1ULL << value);
     } else {
       codegen->Load32BitValue(out, 1 << value);
     }
diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc
index 8a75a90..7347686 100644
--- a/compiler/optimizing/load_store_elimination.cc
+++ b/compiler/optimizing/load_store_elimination.cc
@@ -65,6 +65,16 @@
         is_singleton_and_not_returned_ = false;
         return;
       }
+      if ((user->IsUnresolvedInstanceFieldGet() && (reference_ == user->InputAt(0))) ||
+          (user->IsUnresolvedInstanceFieldSet() && (reference_ == user->InputAt(0)))) {
+        // The field is accessed in an unresolved way. We mark the object as a singleton to
+        // disable load/store optimizations on it.
+        // Note that we could optimize this case and still perform some optimizations until
+        // we hit the unresolved access, but disabling is the simplest.
+        is_singleton_ = false;
+        is_singleton_and_not_returned_ = false;
+        return;
+      }
       if (user->IsReturn()) {
         is_singleton_and_not_returned_ = false;
       }
diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h
index 63bbc2c..3f27c91 100644
--- a/compiler/optimizing/locations.h
+++ b/compiler/optimizing/locations.h
@@ -38,7 +38,13 @@
 class Location : public ValueObject {
  public:
   enum OutputOverlap {
+    // The liveness of the output overlaps the liveness of one or
+    // several input(s); the register allocator cannot reuse an
+    // input's location for the output's location.
     kOutputOverlap,
+    // The liveness of the output does not overlap the liveness of any
+    // input; the register allocator is allowed to reuse an input's
+    // location for the output's location.
     kNoOutputOverlap
   };
 
@@ -494,6 +500,10 @@
     return inputs_.size();
   }
 
+  // Set the output location.  Argument `overlaps` tells whether the
+  // output overlaps any of the inputs (if so, it cannot share the
+  // same register as one of the inputs); it is set to
+  // `Location::kOutputOverlap` by default for safety.
   void SetOut(Location location, Location::OutputOverlap overlaps = Location::kOutputOverlap) {
     DCHECK(output_.IsInvalid());
     output_overlaps_ = overlaps;
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 455f4e3..6b2c33e 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -172,6 +172,10 @@
 
   static ReferenceTypeInfo Create(TypeHandle type_handle, bool is_exact);
 
+  static ReferenceTypeInfo Create(TypeHandle type_handle) SHARED_REQUIRES(Locks::mutator_lock_) {
+    return Create(type_handle, type_handle->CannotBeAssignedFromOtherTypes());
+  }
+
   static ReferenceTypeInfo CreateUnchecked(TypeHandle type_handle, bool is_exact) {
     return ReferenceTypeInfo(type_handle, is_exact);
   }
@@ -5025,7 +5029,7 @@
   }
 
   bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE {
-    return (obj == InputAt(0)) && GetFieldOffset().Uint32Value() < kPageSize;
+    return (obj == InputAt(0)) && art::CanDoImplicitNullCheckOn(GetFieldOffset().Uint32Value());
   }
 
   size_t ComputeHashCode() const OVERRIDE {
@@ -5072,7 +5076,7 @@
   }
 
   bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE {
-    return (obj == InputAt(0)) && GetFieldOffset().Uint32Value() < kPageSize;
+    return (obj == InputAt(0)) && art::CanDoImplicitNullCheckOn(GetFieldOffset().Uint32Value());
   }
 
   const FieldInfo& GetFieldInfo() const { return field_info_; }
diff --git a/compiler/optimizing/optimizing_cfi_test_expected.inc b/compiler/optimizing/optimizing_cfi_test_expected.inc
index 764160a..05eb063 100644
--- a/compiler/optimizing/optimizing_cfi_test_expected.inc
+++ b/compiler/optimizing/optimizing_cfi_test_expected.inc
@@ -32,21 +32,21 @@
 // 0x00000012: .cfi_def_cfa_offset: 64
 
 static constexpr uint8_t expected_asm_kArm64[] = {
-    0xE0, 0x0F, 0x1C, 0xF8, 0xF4, 0xD7, 0x02, 0xA9, 0xFE, 0x1F, 0x00, 0xF9,
-    0xE8, 0xA7, 0x01, 0x6D, 0xE8, 0xA7, 0x41, 0x6D, 0xF4, 0xD7, 0x42, 0xA9,
-    0xFE, 0x1F, 0x40, 0xF9, 0xFF, 0x03, 0x01, 0x91, 0xC0, 0x03, 0x5F, 0xD6,
+    0xE0, 0x0F, 0x1C, 0xF8, 0xF4, 0x17, 0x00, 0xF9, 0xF5, 0x7B, 0x03, 0xA9,
+    0xE8, 0xA7, 0x01, 0x6D, 0xE8, 0xA7, 0x41, 0x6D, 0xF4, 0x17, 0x40, 0xF9,
+    0xF5, 0x7B, 0x43, 0xA9, 0xFF, 0x03, 0x01, 0x91, 0xC0, 0x03, 0x5F, 0xD6,
 };
 static constexpr uint8_t expected_cfi_kArm64[] = {
-    0x44, 0x0E, 0x40, 0x44, 0x94, 0x06, 0x95, 0x04, 0x44, 0x9E, 0x02, 0x44,
+    0x44, 0x0E, 0x40, 0x44, 0x94, 0x06, 0x44, 0x95, 0x04, 0x9E, 0x02, 0x44,
     0x05, 0x48, 0x0A, 0x05, 0x49, 0x08, 0x0A, 0x44, 0x06, 0x48, 0x06, 0x49,
-    0x44, 0xD4, 0xD5, 0x44, 0xDE, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E, 0x40,
+    0x44, 0xD4, 0x44, 0xD5, 0xDE, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E, 0x40,
 };
 // 0x00000000: str x0, [sp, #-64]!
 // 0x00000004: .cfi_def_cfa_offset: 64
-// 0x00000004: stp x20, x21, [sp, #40]
+// 0x00000004: str x20, [sp, #40]
 // 0x00000008: .cfi_offset: r20 at cfa-24
-// 0x00000008: .cfi_offset: r21 at cfa-16
-// 0x00000008: str lr, [sp, #56]
+// 0x00000008: stp x21, lr, [sp, #48]
+// 0x0000000c: .cfi_offset: r21 at cfa-16
 // 0x0000000c: .cfi_offset: r30 at cfa-8
 // 0x0000000c: stp d8, d9, [sp, #24]
 // 0x00000010: .cfi_offset_extended: r72 at cfa-40
@@ -55,10 +55,10 @@
 // 0x00000010: ldp d8, d9, [sp, #24]
 // 0x00000014: .cfi_restore_extended: r72
 // 0x00000014: .cfi_restore_extended: r73
-// 0x00000014: ldp x20, x21, [sp, #40]
+// 0x00000014: ldr x20, [sp, #40]
 // 0x00000018: .cfi_restore: r20
-// 0x00000018: .cfi_restore: r21
-// 0x00000018: ldr lr, [sp, #56]
+// 0x00000018: ldp x21, lr, [sp, #48]
+// 0x0000001c: .cfi_restore: r21
 // 0x0000001c: .cfi_restore: r30
 // 0x0000001c: add sp, sp, #0x40 (64)
 // 0x00000020: .cfi_def_cfa_offset: 0
diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc
index 3e6adcb..3dfd728 100644
--- a/compiler/optimizing/reference_type_propagation.cc
+++ b/compiler/optimizing/reference_type_propagation.cc
@@ -46,13 +46,6 @@
   return *cache;
 }
 
-// Returns true if klass is admissible to the propagation: non-null and resolved.
-// For an array type, we also check if the component type is admissible.
-static bool IsAdmissible(mirror::Class* klass) SHARED_REQUIRES(Locks::mutator_lock_) {
-  return klass != nullptr && klass->IsResolved() &&
-      (!klass->IsArrayClass() || IsAdmissible(klass->GetComponentType()));
-}
-
 ReferenceTypeInfo::TypeHandle ReferenceTypePropagation::HandleCache::GetObjectClassHandle() {
   return GetRootHandle(handles_, ClassLinker::kJavaLangObject, &object_class_handle_);
 }
diff --git a/compiler/optimizing/reference_type_propagation.h b/compiler/optimizing/reference_type_propagation.h
index 2106be6..edd83bf 100644
--- a/compiler/optimizing/reference_type_propagation.h
+++ b/compiler/optimizing/reference_type_propagation.h
@@ -42,6 +42,14 @@
 
   void Run() OVERRIDE;
 
+  // Returns true if klass is admissible to the propagation: non-null and resolved.
+  // For an array type, we also check if the component type is admissible.
+  static bool IsAdmissible(mirror::Class* klass) SHARED_REQUIRES(Locks::mutator_lock_) {
+    return klass != nullptr &&
+           klass->IsResolved() &&
+           (!klass->IsArrayClass() || IsAdmissible(klass->GetComponentType()));
+  }
+
   static constexpr const char* kReferenceTypePropagationPassName = "reference_type_propagation";
 
  private:
diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h
index 274d0de..a571d14 100644
--- a/compiler/utils/arm/assembler_arm.h
+++ b/compiler/utils/arm/assembler_arm.h
@@ -671,6 +671,9 @@
   virtual void vcmpdz(DRegister dd, Condition cond = AL) = 0;
   virtual void vmstat(Condition cond = AL) = 0;  // VMRS APSR_nzcv, FPSCR
 
+  virtual void vcntd(DRegister dd, DRegister dm) = 0;
+  virtual void vpaddld(DRegister dd, DRegister dm, int32_t size, bool is_unsigned) = 0;
+
   virtual void vpushs(SRegister reg, int nregs, Condition cond = AL) = 0;
   virtual void vpushd(DRegister reg, int nregs, Condition cond = AL) = 0;
   virtual void vpops(SRegister reg, int nregs, Condition cond = AL) = 0;
diff --git a/compiler/utils/arm/assembler_arm32.cc b/compiler/utils/arm/assembler_arm32.cc
index 0a227b2..6f7119d 100644
--- a/compiler/utils/arm/assembler_arm32.cc
+++ b/compiler/utils/arm/assembler_arm32.cc
@@ -1264,6 +1264,31 @@
   Emit(encoding);
 }
 
+void Arm32Assembler::vcntd(DRegister dd, DRegister dm) {
+  uint32_t encoding = (B31 | B30 | B29 | B28 | B25 | B24 | B23 | B21 | B20) |
+    ((static_cast<int32_t>(dd) >> 4) * B22) |
+    ((static_cast<uint32_t>(dd) & 0xf) * B12) |
+    (B10 | B8) |
+    ((static_cast<int32_t>(dm) >> 4) * B5) |
+    (static_cast<uint32_t>(dm) & 0xf);
+
+  Emit(encoding);
+}
+
+void Arm32Assembler::vpaddld(DRegister dd, DRegister dm, int32_t size, bool is_unsigned) {
+  CHECK(size == 8 || size == 16 || size == 32) << size;
+  uint32_t encoding = (B31 | B30 | B29 | B28 | B25 | B24 | B23 | B21 | B20) |
+    ((static_cast<uint32_t>(size >> 4) & 0x3) * B18) |
+    ((static_cast<int32_t>(dd) >> 4) * B22) |
+    ((static_cast<uint32_t>(dd) & 0xf) * B12) |
+    (B9) |
+    (is_unsigned ? B7 : 0) |
+    ((static_cast<int32_t>(dm) >> 4) * B5) |
+    (static_cast<uint32_t>(dm) & 0xf);
+
+  Emit(encoding);
+}
+
 
 void Arm32Assembler::svc(uint32_t imm24) {
   CHECK(IsUint<24>(imm24)) << imm24;
diff --git a/compiler/utils/arm/assembler_arm32.h b/compiler/utils/arm/assembler_arm32.h
index bc6020e..8726ac8 100644
--- a/compiler/utils/arm/assembler_arm32.h
+++ b/compiler/utils/arm/assembler_arm32.h
@@ -205,6 +205,9 @@
   void vcmpdz(DRegister dd, Condition cond = AL) OVERRIDE;
   void vmstat(Condition cond = AL) OVERRIDE;  // VMRS APSR_nzcv, FPSCR
 
+  void vcntd(DRegister dd, DRegister dm) OVERRIDE;
+  void vpaddld(DRegister dd, DRegister dm, int32_t size, bool is_unsigned) OVERRIDE;
+
   void vpushs(SRegister reg, int nregs, Condition cond = AL) OVERRIDE;
   void vpushd(DRegister reg, int nregs, Condition cond = AL) OVERRIDE;
   void vpops(SRegister reg, int nregs, Condition cond = AL) OVERRIDE;
diff --git a/compiler/utils/arm/assembler_arm32_test.cc b/compiler/utils/arm/assembler_arm32_test.cc
index e570e22..b214062 100644
--- a/compiler/utils/arm/assembler_arm32_test.cc
+++ b/compiler/utils/arm/assembler_arm32_test.cc
@@ -899,4 +899,43 @@
   T3Helper(&arm::Arm32Assembler::revsh, true, "revsh{cond} {reg1}, {reg2}", "revsh");
 }
 
+TEST_F(AssemblerArm32Test, vcnt) {
+  // Different D register numbers are used here, to test register encoding.
+  // Source register number is encoded as M:Vm, destination register number is encoded as D:Vd,
+  // For source and destination registers which use D0..D15, the M bit and D bit should be 0.
+  // For source and destination registers which use D16..D32, the M bit and D bit should be 1.
+  GetAssembler()->vcntd(arm::D0, arm::D1);
+  GetAssembler()->vcntd(arm::D19, arm::D20);
+  GetAssembler()->vcntd(arm::D0, arm::D9);
+  GetAssembler()->vcntd(arm::D16, arm::D20);
+
+  std::string expected =
+      "vcnt.8 d0, d1\n"
+      "vcnt.8 d19, d20\n"
+      "vcnt.8 d0, d9\n"
+      "vcnt.8 d16, d20\n";
+
+  DriverStr(expected, "vcnt");
+}
+
+TEST_F(AssemblerArm32Test, vpaddl) {
+  // Different D register numbers are used here, to test register encoding.
+  // Source register number is encoded as M:Vm, destination register number is encoded as D:Vd,
+  // For source and destination registers which use D0..D15, the M bit and D bit should be 0.
+  // For source and destination registers which use D16..D32, the M bit and D bit should be 1.
+  // Different data types (signed and unsigned) are also tested.
+  GetAssembler()->vpaddld(arm::D0, arm::D0, 8, true);
+  GetAssembler()->vpaddld(arm::D20, arm::D20, 8, false);
+  GetAssembler()->vpaddld(arm::D0, arm::D20, 16, false);
+  GetAssembler()->vpaddld(arm::D20, arm::D0, 32, true);
+
+  std::string expected =
+      "vpaddl.u8 d0, d0\n"
+      "vpaddl.s8 d20, d20\n"
+      "vpaddl.s16 d0, d20\n"
+      "vpaddl.u32 d20, d0\n";
+
+  DriverStr(expected, "vpaddl");
+}
+
 }  // namespace art
diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc
index 546dd65..a72ea41 100644
--- a/compiler/utils/arm/assembler_thumb2.cc
+++ b/compiler/utils/arm/assembler_thumb2.cc
@@ -3117,6 +3117,30 @@
   Emit32(encoding);
 }
 
+void Thumb2Assembler::vcntd(DRegister dd, DRegister dm) {
+  uint32_t encoding = (B31 | B30 | B29 | B28 | B27 | B26 | B25 | B24 | B23 | B21 | B20) |
+    ((static_cast<int32_t>(dd) >> 4) * B22) |
+    ((static_cast<uint32_t>(dd) & 0xf) * B12) |
+    (B10 | B8) |
+    ((static_cast<int32_t>(dm) >> 4) * B5) |
+    (static_cast<uint32_t>(dm) & 0xf);
+
+  Emit32(encoding);
+}
+
+void Thumb2Assembler::vpaddld(DRegister dd, DRegister dm, int32_t size, bool is_unsigned) {
+  CHECK(size == 8 || size == 16 || size == 32) << size;
+  uint32_t encoding = (B31 | B30 | B29 | B28 | B27 | B26 | B25 | B24 | B23 | B21 | B20) |
+    ((static_cast<uint32_t>(size >> 4) & 0x3) * B18) |
+    ((static_cast<int32_t>(dd) >> 4) * B22) |
+    ((static_cast<uint32_t>(dd) & 0xf) * B12) |
+    (B9) |
+    (is_unsigned ? B7 : 0) |
+    ((static_cast<int32_t>(dm) >> 4) * B5) |
+    (static_cast<uint32_t>(dm) & 0xf);
+
+  Emit32(encoding);
+}
 
 void Thumb2Assembler::svc(uint32_t imm8) {
   CHECK(IsUint<8>(imm8)) << imm8;
diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h
index ce310a4..2ca74fc 100644
--- a/compiler/utils/arm/assembler_thumb2.h
+++ b/compiler/utils/arm/assembler_thumb2.h
@@ -250,6 +250,9 @@
   void vcmpdz(DRegister dd, Condition cond = AL) OVERRIDE;
   void vmstat(Condition cond = AL) OVERRIDE;  // VMRS APSR_nzcv, FPSCR
 
+  void vcntd(DRegister dd, DRegister dm) OVERRIDE;
+  void vpaddld(DRegister dd, DRegister dm, int32_t size, bool is_unsigned) OVERRIDE;
+
   void vpushs(SRegister reg, int nregs, Condition cond = AL) OVERRIDE;
   void vpushd(DRegister reg, int nregs, Condition cond = AL) OVERRIDE;
   void vpops(SRegister reg, int nregs, Condition cond = AL) OVERRIDE;
diff --git a/compiler/utils/arm/assembler_thumb2_test.cc b/compiler/utils/arm/assembler_thumb2_test.cc
index b5cafcb..7f1dc49 100644
--- a/compiler/utils/arm/assembler_thumb2_test.cc
+++ b/compiler/utils/arm/assembler_thumb2_test.cc
@@ -1380,4 +1380,43 @@
   DriverStr(expected, "revsh");
 }
 
+TEST_F(AssemblerThumb2Test, vcnt) {
+  // Different D register numbers are used here, to test register encoding.
+  // Source register number is encoded as M:Vm, destination register number is encoded as D:Vd,
+  // For source and destination registers which use D0..D15, the M bit and D bit should be 0.
+  // For source and destination registers which use D16..D32, the M bit and D bit should be 1.
+  __ vcntd(arm::D0, arm::D1);
+  __ vcntd(arm::D19, arm::D20);
+  __ vcntd(arm::D0, arm::D9);
+  __ vcntd(arm::D16, arm::D20);
+
+  std::string expected =
+      "vcnt.8 d0, d1\n"
+      "vcnt.8 d19, d20\n"
+      "vcnt.8 d0, d9\n"
+      "vcnt.8 d16, d20\n";
+
+  DriverStr(expected, "vcnt");
+}
+
+TEST_F(AssemblerThumb2Test, vpaddl) {
+  // Different D register numbers are used here, to test register encoding.
+  // Source register number is encoded as M:Vm, destination register number is encoded as D:Vd,
+  // For source and destination registers which use D0..D15, the M bit and D bit should be 0.
+  // For source and destination registers which use D16..D32, the M bit and D bit should be 1.
+  // Different data types (signed and unsigned) are also tested.
+  __ vpaddld(arm::D0, arm::D0, 8, true);
+  __ vpaddld(arm::D20, arm::D20, 8, false);
+  __ vpaddld(arm::D0, arm::D20, 16, false);
+  __ vpaddld(arm::D20, arm::D0, 32, true);
+
+  std::string expected =
+      "vpaddl.u8 d0, d0\n"
+      "vpaddl.s8 d20, d20\n"
+      "vpaddl.s16 d0, d20\n"
+      "vpaddl.u32 d20, d0\n";
+
+  DriverStr(expected, "vpaddl");
+}
+
 }  // namespace art
diff --git a/compiler/utils/arm64/assembler_arm64.cc b/compiler/utils/arm64/assembler_arm64.cc
index 1842f00..54ed62b 100644
--- a/compiler/utils/arm64/assembler_arm64.cc
+++ b/compiler/utils/arm64/assembler_arm64.cc
@@ -648,6 +648,15 @@
 void Arm64Assembler::SpillRegisters(vixl::CPURegList registers, int offset) {
   int size = registers.RegisterSizeInBytes();
   const Register sp = vixl_masm_->StackPointer();
+  // Since we are operating on register pairs, we would like to align on
+  // double the standard size; on the other hand, we don't want to insert
+  // an extra store, which will happen if the number of registers is even.
+  if (!IsAlignedParam(offset, 2 * size) && registers.Count() % 2 != 0) {
+    const CPURegister& dst0 = registers.PopLowestIndex();
+    ___ Str(dst0, MemOperand(sp, offset));
+    cfi_.RelOffset(DWARFReg(dst0), offset);
+    offset += size;
+  }
   while (registers.Count() >= 2) {
     const CPURegister& dst0 = registers.PopLowestIndex();
     const CPURegister& dst1 = registers.PopLowestIndex();
@@ -667,6 +676,13 @@
 void Arm64Assembler::UnspillRegisters(vixl::CPURegList registers, int offset) {
   int size = registers.RegisterSizeInBytes();
   const Register sp = vixl_masm_->StackPointer();
+  // Be consistent with the logic for spilling registers.
+  if (!IsAlignedParam(offset, 2 * size) && registers.Count() % 2 != 0) {
+    const CPURegister& dst0 = registers.PopLowestIndex();
+    ___ Ldr(dst0, MemOperand(sp, offset));
+    cfi_.Restore(DWARFReg(dst0));
+    offset += size;
+  }
   while (registers.Count() >= 2) {
     const CPURegister& dst0 = registers.PopLowestIndex();
     const CPURegister& dst1 = registers.PopLowestIndex();
diff --git a/compiler/utils/string_reference.h b/compiler/utils/string_reference.h
index 9e1058e..e4c34ca 100644
--- a/compiler/utils/string_reference.h
+++ b/compiler/utils/string_reference.h
@@ -20,16 +20,19 @@
 #include <stdint.h>
 
 #include "base/logging.h"
+#include "dex_file-inl.h"
 #include "utf-inl.h"
 
 namespace art {
 
-class DexFile;
-
 // A string is located by its DexFile and the string_ids_ table index into that DexFile.
 struct StringReference {
   StringReference(const DexFile* file, uint32_t index) : dex_file(file), string_index(index) { }
 
+  const char* GetStringData() const {
+    return dex_file->GetStringData(dex_file->GetStringId(string_index));
+  }
+
   const DexFile* dex_file;
   uint32_t string_index;
 };
@@ -46,15 +49,13 @@
       // Use the string order enforced by the dex file verifier.
       DCHECK_EQ(
           sr1.string_index < sr2.string_index,
-          CompareModifiedUtf8ToModifiedUtf8AsUtf16CodePointValues(
-              sr1.dex_file->GetStringData(sr1.dex_file->GetStringId(sr1.string_index)),
-              sr1.dex_file->GetStringData(sr2.dex_file->GetStringId(sr2.string_index))) < 0);
+          CompareModifiedUtf8ToModifiedUtf8AsUtf16CodePointValues(sr1.GetStringData(),
+                                                                  sr2.GetStringData()) < 0);
       return sr1.string_index < sr2.string_index;
     } else {
       // Cannot compare indexes, so do the string comparison.
-      return CompareModifiedUtf8ToModifiedUtf8AsUtf16CodePointValues(
-          sr1.dex_file->GetStringData(sr1.dex_file->GetStringId(sr1.string_index)),
-          sr1.dex_file->GetStringData(sr2.dex_file->GetStringId(sr2.string_index))) < 0;
+      return CompareModifiedUtf8ToModifiedUtf8AsUtf16CodePointValues(sr1.GetStringData(),
+                                                                     sr2.GetStringData()) < 0;
     }
   }
 };
diff --git a/compiler/utils/string_reference_test.cc b/compiler/utils/string_reference_test.cc
new file mode 100644
index 0000000..df5080e
--- /dev/null
+++ b/compiler/utils/string_reference_test.cc
@@ -0,0 +1,107 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/string_reference.h"
+
+#include <memory>
+
+#include "gtest/gtest.h"
+#include "utils/test_dex_file_builder.h"
+
+namespace art {
+
+TEST(StringReference, ValueComparator) {
+  // This is a regression test for the StringReferenceValueComparator using the wrong
+  // dex file to get the string data from a StringId. We construct two dex files with
+  // just a single string with the same length but different value. This creates dex
+  // files that have the same layout, so the byte offset read from the StringId in one
+  // dex file, when used in the other dex file still points to valid string data, except
+  // that it's the wrong string. Without the fix the strings would then compare equal.
+  TestDexFileBuilder builder1;
+  builder1.AddString("String1");
+  std::unique_ptr<const DexFile> dex_file1 = builder1.Build("dummy location 1");
+  ASSERT_EQ(1u, dex_file1->NumStringIds());
+  ASSERT_STREQ("String1", dex_file1->GetStringData(dex_file1->GetStringId(0)));
+  StringReference sr1(dex_file1.get(), 0);
+
+  TestDexFileBuilder builder2;
+  builder2.AddString("String2");
+  std::unique_ptr<const DexFile> dex_file2 = builder2.Build("dummy location 2");
+  ASSERT_EQ(1u, dex_file2->NumStringIds());
+  ASSERT_STREQ("String2", dex_file2->GetStringData(dex_file2->GetStringId(0)));
+  StringReference sr2(dex_file2.get(), 0);
+
+  StringReferenceValueComparator cmp;
+  EXPECT_TRUE(cmp(sr1, sr2));  // "String1" < "String2" is true.
+  EXPECT_FALSE(cmp(sr2, sr1));  // "String2" < "String1" is false.
+}
+
+TEST(StringReference, ValueComparator2) {
+  const char* const kDexFile1Strings[] = {
+      "",
+      "abc",
+      "abcxyz",
+  };
+  const char* const kDexFile2Strings[] = {
+      "a",
+      "abc",
+      "abcdef",
+      "def",
+  };
+  const bool expectedCmp12[arraysize(kDexFile1Strings)][arraysize(kDexFile2Strings)] = {
+      { true, true, true, true },
+      { false, false, true, true },
+      { false, false, false, true },
+  };
+  const bool expectedCmp21[arraysize(kDexFile2Strings)][arraysize(kDexFile1Strings)] = {
+      { false, true, true },
+      { false, false, true },
+      { false, false, true },
+      { false, false, false },
+  };
+
+  TestDexFileBuilder builder1;
+  for (const char* s : kDexFile1Strings) {
+    builder1.AddString(s);
+  }
+  std::unique_ptr<const DexFile> dex_file1 = builder1.Build("dummy location 1");
+  ASSERT_EQ(arraysize(kDexFile1Strings), dex_file1->NumStringIds());
+  for (size_t index = 0; index != arraysize(kDexFile1Strings); ++index) {
+    ASSERT_STREQ(kDexFile1Strings[index], dex_file1->GetStringData(dex_file1->GetStringId(index)));
+  }
+
+  TestDexFileBuilder builder2;
+  for (const char* s : kDexFile2Strings) {
+    builder2.AddString(s);
+  }
+  std::unique_ptr<const DexFile> dex_file2 = builder2.Build("dummy location 1");
+  ASSERT_EQ(arraysize(kDexFile2Strings), dex_file2->NumStringIds());
+  for (size_t index = 0; index != arraysize(kDexFile2Strings); ++index) {
+    ASSERT_STREQ(kDexFile2Strings[index], dex_file2->GetStringData(dex_file2->GetStringId(index)));
+  }
+
+  StringReferenceValueComparator cmp;
+  for (size_t index1 = 0; index1 != arraysize(kDexFile1Strings); ++index1) {
+    for (size_t index2 = 0; index2 != arraysize(kDexFile2Strings); ++index2) {
+      StringReference sr1(dex_file1.get(), index1);
+      StringReference sr2(dex_file2.get(), index2);
+      EXPECT_EQ(expectedCmp12[index1][index2], cmp(sr1, sr2)) << index1 << " " << index2;
+      EXPECT_EQ(expectedCmp21[index2][index1], cmp(sr2, sr1)) << index1 << " " << index2;
+    }
+  }
+}
+
+}  // namespace art
diff --git a/disassembler/disassembler_arm.cc b/disassembler/disassembler_arm.cc
index 286faf2..1a3e3f5 100644
--- a/disassembler/disassembler_arm.cc
+++ b/disassembler/disassembler_arm.cc
@@ -1497,6 +1497,25 @@
           }
           break;
         }
+        case 0x7B: case 0x7F: {
+          FpRegister d(instr, 12, 22);
+          FpRegister m(instr, 0, 5);
+          uint32_t sz = (instr >> 18) & 0x3;  // Decode size bits.
+          uint32_t size = (sz == 0) ? 8 : sz << 4;
+          uint32_t opc2 = (instr >> 7) & 0xF;
+          uint32_t Q = (instr >> 6) & 1;
+          if (Q == 0 && opc2 == 0xA && size == 8) {  // 1010, VCNT
+            opcode << "vcnt." << size;
+            args << d << ", " << m;
+          } else if (Q == 0 && (opc2 == 0x4 || opc2 == 0x5) && size <= 32) {  // 010x, VPADDL
+            bool op = HasBitSet(instr, 7);
+            opcode << "vpaddl." << (op ? "u" : "s") << size;
+            args << d << ", " << m;
+          } else {
+            opcode << "UNKNOWN " << op2;
+          }
+          break;
+        }
       default:      // more formats
         if ((op2 >> 4) == 2) {      // 010xxxx
           // data processing (register)
diff --git a/patchoat/patchoat.cc b/patchoat/patchoat.cc
index 5bb61bb..0a7ffda 100644
--- a/patchoat/patchoat.cc
+++ b/patchoat/patchoat.cc
@@ -494,17 +494,6 @@
   image_header->VisitPackedArtMethods(&visitor, heap_->Begin(), pointer_size);
 }
 
-void PatchOat::PatchImTables(const ImageHeader* image_header) {
-  const size_t pointer_size = InstructionSetPointerSize(isa_);
-  // We can safely walk target image since the conflict tables are independent.
-  image_header->VisitPackedImTables(
-      [this](ArtMethod* method) {
-        return RelocatedAddressOfPointer(method);
-      },
-      image_->Begin(),
-      pointer_size);
-}
-
 void PatchOat::PatchImtConflictTables(const ImageHeader* image_header) {
   const size_t pointer_size = InstructionSetPointerSize(isa_);
   // We can safely walk target image since the conflict tables are independent.
@@ -647,7 +636,6 @@
 
   PatchArtFields(image_header);
   PatchArtMethods(image_header);
-  PatchImTables(image_header);
   PatchImtConflictTables(image_header);
   PatchInternedStrings(image_header);
   PatchClassTable(image_header);
diff --git a/patchoat/patchoat.h b/patchoat/patchoat.h
index 61ec695..3ef837f 100644
--- a/patchoat/patchoat.h
+++ b/patchoat/patchoat.h
@@ -117,7 +117,6 @@
   bool PatchImage(bool primary_image) SHARED_REQUIRES(Locks::mutator_lock_);
   void PatchArtFields(const ImageHeader* image_header) SHARED_REQUIRES(Locks::mutator_lock_);
   void PatchArtMethods(const ImageHeader* image_header) SHARED_REQUIRES(Locks::mutator_lock_);
-  void PatchImTables(const ImageHeader* image_header) SHARED_REQUIRES(Locks::mutator_lock_);
   void PatchImtConflictTables(const ImageHeader* image_header)
       SHARED_REQUIRES(Locks::mutator_lock_);
   void PatchInternedStrings(const ImageHeader* image_header)
diff --git a/profman/profile_assistant_test.cc b/profman/profile_assistant_test.cc
index 462c397..cd0aa6f 100644
--- a/profman/profile_assistant_test.cc
+++ b/profman/profile_assistant_test.cc
@@ -61,17 +61,21 @@
     ASSERT_TRUE(file_info.Equals(info));
   }
 
-    // Runs test with given arguments.
-  int ProcessProfiles(const std::vector<int>& profiles_fd, int reference_profile_fd) {
+  std::string GetProfmanCmd() {
     std::string file_path = GetTestAndroidRoot();
     file_path += "/bin/profman";
     if (kIsDebugBuild) {
       file_path += "d";
     }
-
-    EXPECT_TRUE(OS::FileExists(file_path.c_str())) << file_path << " should be a valid file path";
+    EXPECT_TRUE(OS::FileExists(file_path.c_str()))
+        << file_path << " should be a valid file path";
+    return file_path;
+  }
+  // Runs test with given arguments.
+  int ProcessProfiles(const std::vector<int>& profiles_fd, int reference_profile_fd) {
+    std::string profman_cmd = GetProfmanCmd();
     std::vector<std::string> argv_str;
-    argv_str.push_back(file_path);
+    argv_str.push_back(profman_cmd);
     for (size_t k = 0; k < profiles_fd.size(); k++) {
       argv_str.push_back("--profile-file-fd=" + std::to_string(profiles_fd[k]));
     }
@@ -80,6 +84,15 @@
     std::string error;
     return ExecAndReturnCode(argv_str, &error);
   }
+
+  bool GenerateTestProfile(const std::string& filename) {
+    std::string profman_cmd = GetProfmanCmd();
+    std::vector<std::string> argv_str;
+    argv_str.push_back(profman_cmd);
+    argv_str.push_back("--generate-test-profile=" + filename);
+    std::string error;
+    return ExecAndReturnCode(argv_str, &error);
+  }
 };
 
 TEST_F(ProfileAssistantTest, AdviseCompilationEmptyReferences) {
@@ -282,4 +295,15 @@
   CheckProfileInfo(profile1, info1);
 }
 
+TEST_F(ProfileAssistantTest, TestProfileGeneration) {
+  ScratchFile profile;
+  // Generate a test profile.
+  GenerateTestProfile(profile.GetFilename());
+
+  // Verify that the generated profile is valid and can be loaded.
+  ASSERT_TRUE(profile.GetFile()->ResetOffset());
+  ProfileCompilationInfo info;
+  ASSERT_TRUE(info.Load(GetFd(profile)));
+}
+
 }  // namespace art
diff --git a/profman/profman.cc b/profman/profman.cc
index d2c9cb2..a5fefa7 100644
--- a/profman/profman.cc
+++ b/profman/profman.cc
@@ -100,6 +100,14 @@
   UsageError("  --reference-profile-file-fd=<number>: same as --reference-profile-file but");
   UsageError("      accepts a file descriptor. Cannot be used together with");
   UsageError("      --reference-profile-file.");
+  UsageError("  --generate-test-profile=<filename>: generates a random profile file for testing.");
+  UsageError("  --generate-test-profile-num-dex=<number>: number of dex files that should be");
+  UsageError("      included in the generated profile. Defaults to 20.");
+  UsageError("  --generate-test-profile-method-ratio=<number>: the percentage from the maximum");
+  UsageError("      number of methods that should be generated. Defaults to 5.");
+  UsageError("  --generate-test-profile-class-ratio=<number>: the percentage from the maximum");
+  UsageError("      number of classes that should be generated. Defaults to 5.");
+  UsageError("");
   UsageError("");
   UsageError("  --dex-location=<string>: location string to use with corresponding");
   UsageError("      apk-fd to find dex files");
@@ -111,12 +119,20 @@
   exit(EXIT_FAILURE);
 }
 
+// Note: make sure you update the Usage if you change these values.
+static constexpr uint16_t kDefaultTestProfileNumDex = 20;
+static constexpr uint16_t kDefaultTestProfileMethodRatio = 5;
+static constexpr uint16_t kDefaultTestProfileClassRatio = 5;
+
 class ProfMan FINAL {
  public:
   ProfMan() :
       reference_profile_file_fd_(kInvalidFd),
       dump_only_(false),
       dump_output_to_fd_(kInvalidFd),
+      test_profile_num_dex_(kDefaultTestProfileNumDex),
+      test_profile_method_ratio_(kDefaultTestProfileMethodRatio),
+      test_profile_class_ratio_(kDefaultTestProfileClassRatio),
       start_ns_(NanoTime()) {}
 
   ~ProfMan() {
@@ -159,6 +175,23 @@
         dex_locations_.push_back(option.substr(strlen("--dex-location=")).ToString());
       } else if (option.starts_with("--apk-fd=")) {
         ParseFdForCollection(option, "--apk-fd", &apks_fd_);
+      } else if (option.starts_with("--generate-test-profile=")) {
+        test_profile_ = option.substr(strlen("--generate-test-profile=")).ToString();
+      } else if (option.starts_with("--generate-test-profile-num-dex=")) {
+        ParseUintOption(option,
+                        "--generate-test-profile-num-dex",
+                        &test_profile_num_dex_,
+                        Usage);
+      } else if (option.starts_with("--generate-test-profile-method-ratio")) {
+        ParseUintOption(option,
+                        "--generate-test-profile-method-ratio",
+                        &test_profile_method_ratio_,
+                        Usage);
+      } else if (option.starts_with("--generate-test-profile-class-ratio")) {
+        ParseUintOption(option,
+                        "--generate-test-profile-class-ratio",
+                        &test_profile_class_ratio_,
+                        Usage);
       } else {
         Usage("Unknown argument '%s'", option.data());
       }
@@ -168,6 +201,15 @@
     bool has_reference_profile = !reference_profile_file_.empty() ||
         FdIsValid(reference_profile_file_fd_);
 
+    if (!test_profile_.empty()) {
+      if (test_profile_method_ratio_ > 100) {
+        Usage("Invalid ratio for --generate-test-profile-method-ratio");
+      }
+      if (test_profile_class_ratio_ > 100) {
+        Usage("Invalid ratio for --generate-test-profile-class-ratio");
+      }
+      return;
+    }
     // --dump-only may be specified with only --reference-profiles present.
     if (!dump_only_ && !has_profiles) {
       Usage("No profile files specified.");
@@ -317,6 +359,25 @@
     return dump_only_;
   }
 
+  int GenerateTestProfile() {
+    int profile_test_fd = open(test_profile_.c_str(), O_CREAT | O_TRUNC | O_WRONLY);
+    if (profile_test_fd < 0) {
+      std::cerr << "Cannot open " << test_profile_ << strerror(errno);
+      return -1;
+    }
+
+    bool result = ProfileCompilationInfo::GenerateTestProfile(profile_test_fd,
+                                                             test_profile_num_dex_,
+                                                             test_profile_method_ratio_,
+                                                             test_profile_class_ratio_);
+    close(profile_test_fd);  // ignore close result.
+    return result ? 0 : -1;
+  }
+
+  bool ShouldGenerateTestProfile() {
+    return !test_profile_.empty();
+  }
+
  private:
   static void ParseFdForCollection(const StringPiece& option,
                                    const char* arg_name,
@@ -350,6 +411,10 @@
   int reference_profile_file_fd_;
   bool dump_only_;
   int dump_output_to_fd_;
+  std::string test_profile_;
+  uint16_t test_profile_num_dex_;
+  uint16_t test_profile_method_ratio_;
+  uint16_t test_profile_class_ratio_;
   uint64_t start_ns_;
 };
 
@@ -360,6 +425,9 @@
   // Parse arguments. Argument mistakes will lead to exit(EXIT_FAILURE) in UsageError.
   profman.ParseArgs(argc, argv);
 
+  if (profman.ShouldGenerateTestProfile()) {
+    return profman.GenerateTestProfile();
+  }
   if (profman.ShouldOnlyDumpProfile()) {
     return profman.DumpProfileInfo();
   }
diff --git a/runtime/arch/arm/fault_handler_arm.cc b/runtime/arch/arm/fault_handler_arm.cc
index d81e0a9..d105c67 100644
--- a/runtime/arch/arm/fault_handler_arm.cc
+++ b/runtime/arch/arm/fault_handler_arm.cc
@@ -34,7 +34,7 @@
 
 namespace art {
 
-extern "C" void art_quick_throw_null_pointer_exception();
+extern "C" void art_quick_throw_null_pointer_exception_from_signal();
 extern "C" void art_quick_throw_stack_overflow();
 extern "C" void art_quick_implicit_suspend();
 
@@ -107,8 +107,10 @@
   *out_return_pc = (sc->arm_pc + instr_size) | 1;
 }
 
-bool NullPointerHandler::Action(int sig ATTRIBUTE_UNUSED, siginfo_t* info ATTRIBUTE_UNUSED,
-                                void* context) {
+bool NullPointerHandler::Action(int sig ATTRIBUTE_UNUSED, siginfo_t* info, void* context) {
+  if (!IsValidImplicitCheck(info)) {
+    return false;
+  }
   // The code that looks for the catch location needs to know the value of the
   // ARM PC at the point of call.  For Null checks we insert a GC map that is immediately after
   // the load/store instruction that might cause the fault.  However the mapping table has
@@ -122,7 +124,10 @@
 
   uint32_t instr_size = GetInstructionSize(ptr);
   sc->arm_lr = (sc->arm_pc + instr_size) | 1;      // LR needs to point to gc map location
-  sc->arm_pc = reinterpret_cast<uintptr_t>(art_quick_throw_null_pointer_exception);
+  sc->arm_pc = reinterpret_cast<uintptr_t>(art_quick_throw_null_pointer_exception_from_signal);
+  // Pass the faulting address as the first argument of
+  // art_quick_throw_null_pointer_exception_from_signal.
+  sc->arm_r0 = reinterpret_cast<uintptr_t>(info->si_addr);
   VLOG(signals) << "Generating null pointer exception";
   return true;
 }
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index 27a41f0..0797def 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -299,6 +299,11 @@
 NO_ARG_RUNTIME_EXCEPTION art_quick_throw_null_pointer_exception, artThrowNullPointerExceptionFromCode
 
     /*
+     * Call installed by a signal handler to create and deliver a NullPointerException.
+     */
+ONE_ARG_RUNTIME_EXCEPTION art_quick_throw_null_pointer_exception_from_signal, artThrowNullPointerExceptionFromSignal
+
+    /*
      * Called by managed code to create and deliver an ArithmeticException.
      */
 NO_ARG_RUNTIME_EXCEPTION art_quick_throw_div_zero, artThrowDivZeroFromCode
diff --git a/runtime/arch/arm64/fault_handler_arm64.cc b/runtime/arch/arm64/fault_handler_arm64.cc
index 3e9ad0d..f591fcc 100644
--- a/runtime/arch/arm64/fault_handler_arm64.cc
+++ b/runtime/arch/arm64/fault_handler_arm64.cc
@@ -29,7 +29,7 @@
 #include "thread-inl.h"
 
 extern "C" void art_quick_throw_stack_overflow();
-extern "C" void art_quick_throw_null_pointer_exception();
+extern "C" void art_quick_throw_null_pointer_exception_from_signal();
 extern "C" void art_quick_implicit_suspend();
 
 //
@@ -84,8 +84,10 @@
   *out_return_pc = sc->pc + 4;
 }
 
-bool NullPointerHandler::Action(int sig ATTRIBUTE_UNUSED, siginfo_t* info ATTRIBUTE_UNUSED,
-                                void* context) {
+bool NullPointerHandler::Action(int sig ATTRIBUTE_UNUSED, siginfo_t* info, void* context) {
+  if (!IsValidImplicitCheck(info)) {
+    return false;
+  }
   // The code that looks for the catch location needs to know the value of the
   // PC at the point of call.  For Null checks we insert a GC map that is immediately after
   // the load/store instruction that might cause the fault.
@@ -95,7 +97,10 @@
 
   sc->regs[30] = sc->pc + 4;      // LR needs to point to gc map location
 
-  sc->pc = reinterpret_cast<uintptr_t>(art_quick_throw_null_pointer_exception);
+  sc->pc = reinterpret_cast<uintptr_t>(art_quick_throw_null_pointer_exception_from_signal);
+  // Pass the faulting address as the first argument of
+  // art_quick_throw_null_pointer_exception_from_signal.
+  sc->regs[0] = reinterpret_cast<uintptr_t>(info->si_addr);
   VLOG(signals) << "Generating null pointer exception";
   return true;
 }
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index a6490ae..10ee63f 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -406,6 +406,11 @@
 NO_ARG_RUNTIME_EXCEPTION art_quick_throw_null_pointer_exception, artThrowNullPointerExceptionFromCode
 
     /*
+     * Call installed by a signal handler to create and deliver a NullPointerException.
+     */
+ONE_ARG_RUNTIME_EXCEPTION art_quick_throw_null_pointer_exception_from_signal, artThrowNullPointerExceptionFromSignal
+
+    /*
      * Called by managed code to create and deliver an ArithmeticException.
      */
 NO_ARG_RUNTIME_EXCEPTION art_quick_throw_div_zero, artThrowDivZeroFromCode
diff --git a/runtime/arch/mips/fault_handler_mips.cc b/runtime/arch/mips/fault_handler_mips.cc
index 8ea78eb..754284c 100644
--- a/runtime/arch/mips/fault_handler_mips.cc
+++ b/runtime/arch/mips/fault_handler_mips.cc
@@ -27,7 +27,7 @@
 #include "thread-inl.h"
 
 extern "C" void art_quick_throw_stack_overflow();
-extern "C" void art_quick_throw_null_pointer_exception();
+extern "C" void art_quick_throw_null_pointer_exception_from_signal();
 
 //
 // Mips specific fault handler functions.
@@ -71,8 +71,10 @@
   *out_return_pc = sc->sc_pc + 4;
 }
 
-bool NullPointerHandler::Action(int sig ATTRIBUTE_UNUSED, siginfo_t* info ATTRIBUTE_UNUSED,
-                                void* context) {
+bool NullPointerHandler::Action(int sig ATTRIBUTE_UNUSED, siginfo_t* info, void* context) {
+  if (!IsValidImplicitCheck(info)) {
+    return false;
+  }
   // The code that looks for the catch location needs to know the value of the
   // PC at the point of call.  For Null checks we insert a GC map that is immediately after
   // the load/store instruction that might cause the fault.
@@ -81,8 +83,11 @@
   struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
 
   sc->sc_regs[31] = sc->sc_pc + 4;      // RA needs to point to gc map location
-  sc->sc_pc = reinterpret_cast<uintptr_t>(art_quick_throw_null_pointer_exception);
+  sc->sc_pc = reinterpret_cast<uintptr_t>(art_quick_throw_null_pointer_exception_from_signal);
   sc->sc_regs[25] = sc->sc_pc;          // make sure T9 points to the function
+  // Pass the faulting address as the first argument of
+  // art_quick_throw_null_pointer_exception_from_signal.
+  sc->sc_regs[0] = reinterpret_cast<uintptr_t>(info->si_addr);
   VLOG(signals) << "Generating null pointer exception";
   return true;
 }
diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S
index bb89674..c1b8044 100644
--- a/runtime/arch/mips/quick_entrypoints_mips.S
+++ b/runtime/arch/mips/quick_entrypoints_mips.S
@@ -506,6 +506,18 @@
     move $a0, rSELF                 # pass Thread::Current
 END art_quick_throw_null_pointer_exception
 
+
+    /*
+     * Call installed by a signal handler to create and deliver a NullPointerException.
+     */
+    .extern artThrowNullPointerExceptionFromSignal
+ENTRY art_quick_throw_null_pointer_exception_from_signal
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    la   $t9, artThrowNullPointerExceptionFromSignal
+    jalr $zero, $t9                 # artThrowNullPointerExceptionFromSignal(uintptr_t, Thread*)
+    move $a1, rSELF                 # pass Thread::Current
+END art_quick_throw_null_pointer_exception_from_signal
+
     /*
      * Called by managed code to create and deliver an ArithmeticException
      */
diff --git a/runtime/arch/mips64/fault_handler_mips64.cc b/runtime/arch/mips64/fault_handler_mips64.cc
index 4abfcf1..c9a32ad 100644
--- a/runtime/arch/mips64/fault_handler_mips64.cc
+++ b/runtime/arch/mips64/fault_handler_mips64.cc
@@ -27,7 +27,7 @@
 #include "thread-inl.h"
 
 extern "C" void art_quick_throw_stack_overflow();
-extern "C" void art_quick_throw_null_pointer_exception();
+extern "C" void art_quick_throw_null_pointer_exception_from_signal();
 
 //
 // Mips64 specific fault handler functions.
@@ -71,8 +71,11 @@
   *out_return_pc = sc->sc_pc + 4;
 }
 
-bool NullPointerHandler::Action(int sig ATTRIBUTE_UNUSED, siginfo_t* info ATTRIBUTE_UNUSED,
-                                void* context) {
+bool NullPointerHandler::Action(int sig ATTRIBUTE_UNUSED, siginfo_t* info, void* context) {
+  if (!IsValidImplicitCheck(info)) {
+    return false;
+  }
+
   // The code that looks for the catch location needs to know the value of the
   // PC at the point of call.  For Null checks we insert a GC map that is immediately after
   // the load/store instruction that might cause the fault.
@@ -81,8 +84,11 @@
   struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
 
   sc->sc_regs[31] = sc->sc_pc + 4;      // RA needs to point to gc map location
-  sc->sc_pc = reinterpret_cast<uintptr_t>(art_quick_throw_null_pointer_exception);
+  sc->sc_pc = reinterpret_cast<uintptr_t>(art_quick_throw_null_pointer_exception_from_signal);
   sc->sc_regs[25] = sc->sc_pc;          // make sure T9 points to the function
+  // Pass the faulting address as the first argument of
+  // art_quick_throw_null_pointer_exception_from_signal.
+  sc->sc_regs[0] = reinterpret_cast<uintptr_t>(info->si_addr);
   VLOG(signals) << "Generating null pointer exception";
   return true;
 }
diff --git a/runtime/arch/mips64/quick_entrypoints_mips64.S b/runtime/arch/mips64/quick_entrypoints_mips64.S
index 78ac748..ae69620 100644
--- a/runtime/arch/mips64/quick_entrypoints_mips64.S
+++ b/runtime/arch/mips64/quick_entrypoints_mips64.S
@@ -593,6 +593,17 @@
 END art_quick_throw_null_pointer_exception
 
     /*
+     * Call installed by a signal handler to create and deliver a NullPointerException
+     */
+    .extern artThrowNullPointerExceptionFromSignal
+ENTRY art_quick_throw_null_pointer_exception_from_signal
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    dla  $t9, artThrowNullPointerExceptionFromSignal
+    jalr $zero, $t9                 # artThrowNullPointerExceptionFromSignal(uinptr_t, Thread*)
+    move $a1, rSELF                 # pass Thread::Current
+END art_quick_throw_null_pointer_exception
+
+    /*
      * Called by managed code to create and deliver an ArithmeticException
      */
     .extern artThrowDivZeroFromCode
diff --git a/runtime/arch/x86/fault_handler_x86.cc b/runtime/arch/x86/fault_handler_x86.cc
index d7c4cb1..24e3a0d 100644
--- a/runtime/arch/x86/fault_handler_x86.cc
+++ b/runtime/arch/x86/fault_handler_x86.cc
@@ -36,6 +36,7 @@
 #define CTX_EIP uc_mcontext->__ss.__rip
 #define CTX_EAX uc_mcontext->__ss.__rax
 #define CTX_METHOD uc_mcontext->__ss.__rdi
+#define CTX_RDI uc_mcontext->__ss.__rdi
 #define CTX_JMP_BUF uc_mcontext->__ss.__rdi
 #else
 // 32 bit mac build.
@@ -71,12 +72,12 @@
 
 #if defined(__APPLE__) && defined(__x86_64__)
 // mac symbols have a prefix of _ on x86_64
-extern "C" void _art_quick_throw_null_pointer_exception();
+extern "C" void _art_quick_throw_null_pointer_exception_from_signal();
 extern "C" void _art_quick_throw_stack_overflow();
 extern "C" void _art_quick_test_suspend();
 #define EXT_SYM(sym) _ ## sym
 #else
-extern "C" void art_quick_throw_null_pointer_exception();
+extern "C" void art_quick_throw_null_pointer_exception_from_signal();
 extern "C" void art_quick_throw_stack_overflow();
 extern "C" void art_quick_test_suspend();
 #define EXT_SYM(sym) sym
@@ -292,7 +293,10 @@
   *out_return_pc = reinterpret_cast<uintptr_t>(pc + instr_size);
 }
 
-bool NullPointerHandler::Action(int, siginfo_t*, void* context) {
+bool NullPointerHandler::Action(int, siginfo_t* sig, void* context) {
+  if (!IsValidImplicitCheck(sig)) {
+    return false;
+  }
   struct ucontext *uc = reinterpret_cast<struct ucontext*>(context);
   uint8_t* pc = reinterpret_cast<uint8_t*>(uc->CTX_EIP);
   uint8_t* sp = reinterpret_cast<uint8_t*>(uc->CTX_ESP);
@@ -314,7 +318,15 @@
   *next_sp = retaddr;
   uc->CTX_ESP = reinterpret_cast<uintptr_t>(next_sp);
 
-  uc->CTX_EIP = reinterpret_cast<uintptr_t>(EXT_SYM(art_quick_throw_null_pointer_exception));
+  uc->CTX_EIP = reinterpret_cast<uintptr_t>(
+      EXT_SYM(art_quick_throw_null_pointer_exception_from_signal));
+  // Pass the faulting address as the first argument of
+  // art_quick_throw_null_pointer_exception_from_signal.
+#if defined(__x86_64__)
+  uc->CTX_RDI = reinterpret_cast<uintptr_t>(sig->si_addr);
+#else
+  uc->CTX_EAX = reinterpret_cast<uintptr_t>(sig->si_addr);
+#endif
   VLOG(signals) << "Generating null pointer exception";
   return true;
 }
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index b3dd454..5851fbd 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -284,6 +284,11 @@
 NO_ARG_RUNTIME_EXCEPTION art_quick_throw_null_pointer_exception, artThrowNullPointerExceptionFromCode
 
     /*
+     * Call installed by a signal handler to create and deliver a NullPointerException.
+     */
+ONE_ARG_RUNTIME_EXCEPTION art_quick_throw_null_pointer_exception_from_signal, artThrowNullPointerExceptionFromSignal
+
+    /*
      * Called by managed code to create and deliver an ArithmeticException.
      */
 NO_ARG_RUNTIME_EXCEPTION art_quick_throw_div_zero, artThrowDivZeroFromCode
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 205307c..e777e6c 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -309,6 +309,11 @@
 NO_ARG_RUNTIME_EXCEPTION art_quick_throw_null_pointer_exception, artThrowNullPointerExceptionFromCode
 
     /*
+     * Call installed by a signal handler to create and deliver a NullPointerException.
+     */
+ONE_ARG_RUNTIME_EXCEPTION art_quick_throw_null_pointer_exception_from_signal, artThrowNullPointerExceptionFromSignal
+
+    /*
      * Called by managed code to create and deliver an ArithmeticException.
      */
 NO_ARG_RUNTIME_EXCEPTION art_quick_throw_div_zero, artThrowDivZeroFromCode
diff --git a/runtime/art_method-inl.h b/runtime/art_method-inl.h
index 32ae6ff..26450c4 100644
--- a/runtime/art_method-inl.h
+++ b/runtime/art_method-inl.h
@@ -120,10 +120,6 @@
   return dex_method_index_;
 }
 
-inline uint32_t ArtMethod::GetImtIndex() {
-  return GetDexMethodIndex() % ImTable::kSize;
-}
-
 inline ArtMethod** ArtMethod::GetDexCacheResolvedMethods(size_t pointer_size) {
   return GetNativePointer<ArtMethod**>(DexCacheResolvedMethodsOffset(pointer_size),
                                        pointer_size);
diff --git a/runtime/art_method.h b/runtime/art_method.h
index 849af97..2b025f8 100644
--- a/runtime/art_method.h
+++ b/runtime/art_method.h
@@ -99,22 +99,6 @@
     return GetMethod(index * kMethodCount + kMethodImplementation, pointer_size);
   }
 
-  // Return true if two conflict tables are the same.
-  bool Equals(ImtConflictTable* other, size_t pointer_size) const {
-    size_t num = NumEntries(pointer_size);
-    if (num != other->NumEntries(pointer_size)) {
-      return false;
-    }
-    for (size_t i = 0; i < num; ++i) {
-      if (GetInterfaceMethod(i, pointer_size) != other->GetInterfaceMethod(i, pointer_size) ||
-          GetImplementationMethod(i, pointer_size) !=
-              other->GetImplementationMethod(i, pointer_size)) {
-        return false;
-      }
-    }
-    return true;
-  }
-
   // Visit all of the entries.
   // NO_THREAD_SAFETY_ANALYSIS for calling with held locks. Visitor is passed a pair of ArtMethod*
   // and also returns one. The order is <interface, implementation>.
@@ -419,8 +403,6 @@
 
   ALWAYS_INLINE uint32_t GetDexMethodIndex() SHARED_REQUIRES(Locks::mutator_lock_);
 
-  ALWAYS_INLINE uint32_t GetImtIndex() SHARED_REQUIRES(Locks::mutator_lock_);
-
   void SetDexMethodIndex(uint32_t new_idx) {
     // Not called within a transaction.
     dex_method_index_ = new_idx;
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 7c00315..fe7448f 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -857,13 +857,11 @@
     if (vtable != nullptr) {
       SanityCheckArtMethodPointerArray(vtable, nullptr, pointer_size, image_spaces);
     }
-    if (klass->ShouldHaveImt()) {
-      ImTable* imt = klass->GetImt(pointer_size);
-      for (size_t i = 0; i < ImTable::kSize; ++i) {
-        SanityCheckArtMethod(imt->Get(i, pointer_size), nullptr, image_spaces);
+    if (klass->ShouldHaveEmbeddedImtAndVTable()) {
+      for (size_t i = 0; i < mirror::Class::kImtSize; ++i) {
+        SanityCheckArtMethod(
+            klass->GetEmbeddedImTableEntry(i, pointer_size), nullptr, image_spaces);
       }
-    }
-    if (klass->ShouldHaveEmbeddedVTable()) {
       for (int32_t i = 0; i < klass->GetEmbeddedVTableLength(); ++i) {
         SanityCheckArtMethod(klass->GetEmbeddedVTableEntry(i, pointer_size), nullptr, image_spaces);
       }
@@ -3458,11 +3456,16 @@
     new_class->SetClassFlags(mirror::kClassFlagObjectArray);
   }
   mirror::Class::SetStatus(new_class, mirror::Class::kStatusLoaded, self);
-  new_class->PopulateEmbeddedVTable(image_pointer_size_);
+  {
+    ArtMethod* imt[mirror::Class::kImtSize];
+    std::fill_n(imt, arraysize(imt), Runtime::Current()->GetImtUnimplementedMethod());
+    new_class->PopulateEmbeddedImtAndVTable(imt, image_pointer_size_);
+  }
   mirror::Class::SetStatus(new_class, mirror::Class::kStatusInitialized, self);
   // don't need to set new_class->SetObjectSize(..)
   // because Object::SizeOf delegates to Array::SizeOf
 
+
   // All arrays have java/lang/Cloneable and java/io/Serializable as
   // interfaces.  We need to set that up here, so that stuff like
   // "instanceof" works right.
@@ -5033,11 +5036,9 @@
   if (!LinkSuperClass(klass)) {
     return false;
   }
-  ArtMethod* imt_data[ImTable::kSize];
-  // If there are any new conflicts compared to super class.
-  bool new_conflict = false;
-  std::fill_n(imt_data, arraysize(imt_data), Runtime::Current()->GetImtUnimplementedMethod());
-  if (!LinkMethods(self, klass, interfaces, &new_conflict, imt_data)) {
+  ArtMethod* imt[mirror::Class::kImtSize];
+  std::fill_n(imt, arraysize(imt), Runtime::Current()->GetImtUnimplementedMethod());
+  if (!LinkMethods(self, klass, interfaces, imt)) {
     return false;
   }
   if (!LinkInstanceFields(self, klass)) {
@@ -5050,45 +5051,15 @@
   CreateReferenceInstanceOffsets(klass);
   CHECK_EQ(mirror::Class::kStatusLoaded, klass->GetStatus());
 
-  ImTable* imt = nullptr;
-  if (klass->ShouldHaveImt()) {
-    // If there are any new conflicts compared to the super class we can not make a copy. There
-    // can be cases where both will have a conflict method at the same slot without having the same
-    // set of conflicts. In this case, we can not share the IMT since the conflict table slow path
-    // will possibly create a table that is incorrect for either of the classes.
-    // Same IMT with new_conflict does not happen very often.
-    if (!new_conflict && klass->HasSuperClass() && klass->GetSuperClass()->ShouldHaveImt()) {
-      ImTable* super_imt = klass->GetSuperClass()->GetImt(image_pointer_size_);
-      bool imt_equals = true;
-      for (size_t i = 0; i < ImTable::kSize && imt_equals; ++i) {
-        imt_equals = imt_equals && (super_imt->Get(i, image_pointer_size_) == imt_data[i]);
-      }
-      if (imt_equals) {
-        imt = super_imt;
-      }
-    }
-    if (imt == nullptr) {
-      LinearAlloc* allocator = GetAllocatorForClassLoader(klass->GetClassLoader());
-      imt = reinterpret_cast<ImTable*>(
-          allocator->Alloc(self, ImTable::SizeInBytes(image_pointer_size_)));
-      if (imt == nullptr) {
-        return false;
-      }
-      imt->Populate(imt_data, image_pointer_size_);
-    }
-  }
-
   if (!klass->IsTemp() || (!init_done_ && klass->GetClassSize() == class_size)) {
     // We don't need to retire this class as it has no embedded tables or it was created the
     // correct size during class linker initialization.
     CHECK_EQ(klass->GetClassSize(), class_size) << PrettyDescriptor(klass.Get());
 
-    if (klass->ShouldHaveEmbeddedVTable()) {
-      klass->PopulateEmbeddedVTable(image_pointer_size_);
+    if (klass->ShouldHaveEmbeddedImtAndVTable()) {
+      klass->PopulateEmbeddedImtAndVTable(imt, image_pointer_size_);
     }
-    if (klass->ShouldHaveImt()) {
-      klass->SetImt(imt, image_pointer_size_);
-    }
+
     // This will notify waiters on klass that saw the not yet resolved
     // class in the class_table_ during EnsureResolved.
     mirror::Class::SetStatus(klass, mirror::Class::kStatusResolved, self);
@@ -5480,7 +5451,6 @@
 bool ClassLinker::LinkMethods(Thread* self,
                               Handle<mirror::Class> klass,
                               Handle<mirror::ObjectArray<mirror::Class>> interfaces,
-                              bool* out_new_conflict,
                               ArtMethod** out_imt) {
   self->AllowThreadSuspension();
   // A map from vtable indexes to the method they need to be updated to point to. Used because we
@@ -5492,7 +5462,7 @@
   // any vtable entries with new default method implementations.
   return SetupInterfaceLookupTable(self, klass, interfaces)
           && LinkVirtualMethods(self, klass, /*out*/ &default_translations)
-          && LinkInterfaceMethods(self, klass, default_translations, out_new_conflict, out_imt);
+          && LinkInterfaceMethods(self, klass, default_translations, out_imt);
 }
 
 // Comparator for name and signature of a method, used in finding overriding methods. Implementation
@@ -5650,7 +5620,7 @@
     StackHandleScope<2> hs(self);
     Handle<mirror::Class> super_class(hs.NewHandle(klass->GetSuperClass()));
     MutableHandle<mirror::PointerArray> vtable;
-    if (super_class->ShouldHaveEmbeddedVTable()) {
+    if (super_class->ShouldHaveEmbeddedImtAndVTable()) {
       vtable = hs.NewHandle(AllocPointerArray(self, max_count));
       if (UNLIKELY(vtable.Get() == nullptr)) {
         self->AssertPendingOOMException();
@@ -6050,7 +6020,6 @@
 void ClassLinker::SetIMTRef(ArtMethod* unimplemented_method,
                             ArtMethod* imt_conflict_method,
                             ArtMethod* current_method,
-                            /*out*/bool* new_conflict,
                             /*out*/ArtMethod** imt_ref) {
   // Place method in imt if entry is empty, place conflict otherwise.
   if (*imt_ref == unimplemented_method) {
@@ -6067,77 +6036,40 @@
       *imt_ref = current_method;
     } else {
       *imt_ref = imt_conflict_method;
-      *new_conflict = true;
     }
   } else {
     // Place the default conflict method. Note that there may be an existing conflict
     // method in the IMT, but it could be one tailored to the super class, with a
     // specific ImtConflictTable.
     *imt_ref = imt_conflict_method;
-    *new_conflict = true;
   }
 }
 
 void ClassLinker::FillIMTAndConflictTables(mirror::Class* klass) {
-  DCHECK(klass->ShouldHaveImt()) << PrettyClass(klass);
+  DCHECK(klass->ShouldHaveEmbeddedImtAndVTable()) << PrettyClass(klass);
   DCHECK(!klass->IsTemp()) << PrettyClass(klass);
-  ArtMethod* imt_data[ImTable::kSize];
+  ArtMethod* imt[mirror::Class::kImtSize];
   Runtime* const runtime = Runtime::Current();
   ArtMethod* const unimplemented_method = runtime->GetImtUnimplementedMethod();
   ArtMethod* const conflict_method = runtime->GetImtConflictMethod();
-  std::fill_n(imt_data, arraysize(imt_data), unimplemented_method);
+  std::fill_n(imt, arraysize(imt), unimplemented_method);
   if (klass->GetIfTable() != nullptr) {
-    bool new_conflict = false;
     FillIMTFromIfTable(klass->GetIfTable(),
                        unimplemented_method,
                        conflict_method,
                        klass,
-                       /*create_conflict_tables*/true,
-                       /*ignore_copied_methods*/false,
-                       &new_conflict,
-                       &imt_data[0]);
+                       true,
+                       false,
+                       &imt[0]);
   }
-  if (!klass->ShouldHaveImt()) {
-    return;
+  for (size_t i = 0; i < mirror::Class::kImtSize; ++i) {
+    klass->SetEmbeddedImTableEntry(i, imt[i], image_pointer_size_);
   }
-  // Compare the IMT with the super class including the conflict methods. If they are equivalent,
-  // we can just use the same pointer.
-  ImTable* imt = nullptr;
-  mirror::Class* super_class = klass->GetSuperClass();
-  if (super_class != nullptr && super_class->ShouldHaveImt()) {
-    ImTable* super_imt = super_class->GetImt(image_pointer_size_);
-    bool same = true;
-    for (size_t i = 0; same && i < ImTable::kSize; ++i) {
-      ArtMethod* method = imt_data[i];
-      ArtMethod* super_method = super_imt->Get(i, image_pointer_size_);
-      if (method != super_method) {
-        bool is_conflict_table = method->IsRuntimeMethod() &&
-                                 method != unimplemented_method &&
-                                 method != conflict_method;
-        // Verify conflict contents.
-        bool super_conflict_table = super_method->IsRuntimeMethod() &&
-                                    super_method != unimplemented_method &&
-                                    super_method != conflict_method;
-        if (!is_conflict_table || !super_conflict_table) {
-          same = false;
-        } else {
-          ImtConflictTable* table1 = method->GetImtConflictTable(image_pointer_size_);
-          ImtConflictTable* table2 = super_method->GetImtConflictTable(image_pointer_size_);
-          same = same && table1->Equals(table2, image_pointer_size_);
-        }
-      }
-    }
-    if (same) {
-      imt = super_imt;
-    }
-  }
-  if (imt == nullptr) {
-    imt = klass->GetImt(image_pointer_size_);
-    DCHECK(imt != nullptr);
-    imt->Populate(imt_data, image_pointer_size_);
-  } else {
-    klass->SetImt(imt, image_pointer_size_);
-  }
+}
+
+static inline uint32_t GetIMTIndex(ArtMethod* interface_method)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  return interface_method->GetDexMethodIndex() % mirror::Class::kImtSize;
 }
 
 ImtConflictTable* ClassLinker::CreateImtConflictTable(size_t count,
@@ -6159,9 +6091,8 @@
                                      mirror::Class* klass,
                                      bool create_conflict_tables,
                                      bool ignore_copied_methods,
-                                     /*out*/bool* new_conflict,
-                                     /*out*/ArtMethod** imt) {
-  uint32_t conflict_counts[ImTable::kSize] = {};
+                                     ArtMethod** imt) {
+  uint32_t conflict_counts[mirror::Class::kImtSize] = {};
   for (size_t i = 0, length = if_table->Count(); i < length; ++i) {
     mirror::Class* interface = if_table->GetInterface(i);
     const size_t num_virtuals = interface->NumVirtualMethods();
@@ -6191,7 +6122,7 @@
       // or interface methods in the IMT here they will not create extra conflicts since we compare
       // names and signatures in SetIMTRef.
       ArtMethod* interface_method = interface->GetVirtualMethod(j, image_pointer_size_);
-      const uint32_t imt_index = interface_method->GetImtIndex();
+      const uint32_t imt_index = GetIMTIndex(interface_method);
 
       // There is only any conflicts if all of the interface methods for an IMT slot don't have
       // the same implementation method, keep track of this to avoid creating a conflict table in
@@ -6203,7 +6134,6 @@
       SetIMTRef(unimplemented_method,
                 imt_conflict_method,
                 implementation_method,
-                /*out*/new_conflict,
                 /*out*/&imt[imt_index]);
     }
   }
@@ -6211,7 +6141,7 @@
   if (create_conflict_tables) {
     // Create the conflict tables.
     LinearAlloc* linear_alloc = GetAllocatorForClassLoader(klass->GetClassLoader());
-    for (size_t i = 0; i < ImTable::kSize; ++i) {
+    for (size_t i = 0; i < mirror::Class::kImtSize; ++i) {
       size_t conflicts = conflict_counts[i];
       if (imt[i] == imt_conflict_method) {
         ImtConflictTable* new_table = CreateImtConflictTable(conflicts, linear_alloc);
@@ -6245,7 +6175,7 @@
         }
         DCHECK(implementation_method != nullptr);
         ArtMethod* interface_method = interface->GetVirtualMethod(j, image_pointer_size_);
-        const uint32_t imt_index = interface_method->GetImtIndex();
+        const uint32_t imt_index = GetIMTIndex(interface_method);
         if (!imt[imt_index]->IsRuntimeMethod() ||
             imt[imt_index] == unimplemented_method ||
             imt[imt_index] == imt_conflict_method) {
@@ -6498,14 +6428,12 @@
 void ClassLinker::FillImtFromSuperClass(Handle<mirror::Class> klass,
                                         ArtMethod* unimplemented_method,
                                         ArtMethod* imt_conflict_method,
-                                        bool* new_conflict,
                                         ArtMethod** imt) {
   DCHECK(klass->HasSuperClass());
   mirror::Class* super_class = klass->GetSuperClass();
-  if (super_class->ShouldHaveImt()) {
-    ImTable* super_imt = super_class->GetImt(image_pointer_size_);
-    for (size_t i = 0; i < ImTable::kSize; ++i) {
-      imt[i] = super_imt->Get(i, image_pointer_size_);
+  if (super_class->ShouldHaveEmbeddedImtAndVTable()) {
+    for (size_t i = 0; i < mirror::Class::kImtSize; ++i) {
+      imt[i] = super_class->GetEmbeddedImTableEntry(i, image_pointer_size_);
     }
   } else {
     // No imt in the super class, need to reconstruct from the iftable.
@@ -6518,7 +6446,6 @@
                          klass.Get(),
                          /*create_conflict_table*/false,
                          /*ignore_copied_methods*/true,
-                         /*out*/new_conflict,
                          /*out*/imt);
     }
   }
@@ -6529,7 +6456,6 @@
     Thread* self,
     Handle<mirror::Class> klass,
     const std::unordered_map<size_t, ClassLinker::MethodTranslation>& default_translations,
-    bool* out_new_conflict,
     ArtMethod** out_imt) {
   StackHandleScope<3> hs(self);
   Runtime* const runtime = Runtime::Current();
@@ -6565,7 +6491,6 @@
     FillImtFromSuperClass(klass,
                           unimplemented_method,
                           imt_conflict_method,
-                          out_new_conflict,
                           out_imt);
   }
   // Allocate method arrays before since we don't want miss visiting miranda method roots due to
@@ -6651,7 +6576,7 @@
         auto* interface_method = iftable->GetInterface(i)->GetVirtualMethod(j, image_pointer_size_);
         MethodNameAndSignatureComparator interface_name_comparator(
             interface_method->GetInterfaceMethodIfProxy(image_pointer_size_));
-        uint32_t imt_index = interface_method->GetImtIndex();
+        uint32_t imt_index = GetIMTIndex(interface_method);
         ArtMethod** imt_ptr = &out_imt[imt_index];
         // For each method listed in the interface's method list, find the
         // matching method in our class's method list.  We want to favor the
@@ -6697,7 +6622,6 @@
                 SetIMTRef(unimplemented_method,
                           imt_conflict_method,
                           vtable_method,
-                          /*out*/out_new_conflict,
                           /*out*/imt_ptr);
               }
               break;
@@ -6840,7 +6764,6 @@
             SetIMTRef(unimplemented_method,
                       imt_conflict_method,
                       current_method,
-                      /*out*/out_new_conflict,
                       /*out*/imt_ptr);
           }
         }
@@ -7040,7 +6963,7 @@
       }
 
       // Fix up IMT next
-      for (size_t i = 0; i < ImTable::kSize; ++i) {
+      for (size_t i = 0; i < mirror::Class::kImtSize; ++i) {
         auto it = move_table.find(out_imt[i]);
         if (it != move_table.end()) {
           out_imt[i] = it->second;
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index d6822c5..ca5af19 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -833,7 +833,6 @@
   bool LinkMethods(Thread* self,
                    Handle<mirror::Class> klass,
                    Handle<mirror::ObjectArray<mirror::Class>> interfaces,
-                   bool* out_new_conflict,
                    ArtMethod** out_imt)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -969,20 +968,19 @@
   // * kDefaultConflict - Conflicting method implementations were found when searching for
   //                      target_method. The value of *out_default_method is null.
   DefaultMethodSearchResult FindDefaultMethodImplementation(
-      Thread* self,
-      ArtMethod* target_method,
-      Handle<mirror::Class> klass,
-      /*out*/ArtMethod** out_default_method) const
+          Thread* self,
+          ArtMethod* target_method,
+          Handle<mirror::Class> klass,
+          /*out*/ArtMethod** out_default_method) const
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Sets the imt entries and fixes up the vtable for the given class by linking all the interface
   // methods. See LinkVirtualMethods for an explanation of what default_translations is.
   bool LinkInterfaceMethods(
-      Thread* self,
-      Handle<mirror::Class> klass,
-      const std::unordered_map<size_t, MethodTranslation>& default_translations,
-      bool* out_new_conflict,
-      ArtMethod** out_imt)
+          Thread* self,
+          Handle<mirror::Class> klass,
+          const std::unordered_map<size_t, MethodTranslation>& default_translations,
+          ArtMethod** out_imt)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   bool LinkStaticFields(Thread* self, Handle<mirror::Class> klass, size_t* class_size)
@@ -1098,7 +1096,6 @@
   void SetIMTRef(ArtMethod* unimplemented_method,
                  ArtMethod* imt_conflict_method,
                  ArtMethod* current_method,
-                 /*out*/bool* new_conflict,
                  /*out*/ArtMethod** imt_ref) SHARED_REQUIRES(Locks::mutator_lock_);
 
   void FillIMTFromIfTable(mirror::IfTable* if_table,
@@ -1107,13 +1104,11 @@
                           mirror::Class* klass,
                           bool create_conflict_tables,
                           bool ignore_copied_methods,
-                          /*out*/bool* new_conflict,
-                          /*out*/ArtMethod** imt) SHARED_REQUIRES(Locks::mutator_lock_);
+                          ArtMethod** imt) SHARED_REQUIRES(Locks::mutator_lock_);
 
   void FillImtFromSuperClass(Handle<mirror::Class> klass,
                              ArtMethod* unimplemented_method,
                              ArtMethod* imt_conflict_method,
-                             bool* new_conflict,
                              ArtMethod** imt) SHARED_REQUIRES(Locks::mutator_lock_);
 
   std::vector<const DexFile*> boot_class_path_;
diff --git a/runtime/class_linker_test.cc b/runtime/class_linker_test.cc
index 9b59f2b..488826b 100644
--- a/runtime/class_linker_test.cc
+++ b/runtime/class_linker_test.cc
@@ -148,8 +148,7 @@
     EXPECT_EQ(0U, array->NumInstanceFields());
     EXPECT_EQ(0U, array->NumStaticFields());
     EXPECT_EQ(2U, array->NumDirectInterfaces());
-    EXPECT_FALSE(array->ShouldHaveImt());
-    EXPECT_TRUE(array->ShouldHaveEmbeddedVTable());
+    EXPECT_TRUE(array->ShouldHaveEmbeddedImtAndVTable());
     EXPECT_EQ(2, array->GetIfTableCount());
     ASSERT_TRUE(array->GetIfTable() != nullptr);
     mirror::Class* direct_interface0 = mirror::Class::GetDirectInterface(self, array, 0);
diff --git a/runtime/common_throws.cc b/runtime/common_throws.cc
index 4f705f2..60f45cd 100644
--- a/runtime/common_throws.cc
+++ b/runtime/common_throws.cc
@@ -402,12 +402,124 @@
                                                dex_file, type);
 }
 
-void ThrowNullPointerExceptionFromDexPC() {
+static bool IsValidImplicitCheck(uintptr_t addr, ArtMethod* method, const Instruction& instr)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  if (!CanDoImplicitNullCheckOn(addr)) {
+    return false;
+  }
+
+  switch (instr.Opcode()) {
+    case Instruction::INVOKE_DIRECT:
+    case Instruction::INVOKE_DIRECT_RANGE:
+    case Instruction::INVOKE_VIRTUAL:
+    case Instruction::INVOKE_VIRTUAL_RANGE:
+    case Instruction::INVOKE_INTERFACE:
+    case Instruction::INVOKE_INTERFACE_RANGE:
+    case Instruction::INVOKE_VIRTUAL_QUICK:
+    case Instruction::INVOKE_VIRTUAL_RANGE_QUICK: {
+      // Without inlining, we could just check that the offset is the class offset.
+      // However, when inlining, the compiler can (validly) merge the null check with a field access
+      // on the same object. Note that the stack map at the NPE will reflect the invoke's location,
+      // which is the caller.
+      return true;
+    }
+
+    case Instruction::IGET:
+    case Instruction::IGET_WIDE:
+    case Instruction::IGET_OBJECT:
+    case Instruction::IGET_BOOLEAN:
+    case Instruction::IGET_BYTE:
+    case Instruction::IGET_CHAR:
+    case Instruction::IGET_SHORT:
+    case Instruction::IPUT:
+    case Instruction::IPUT_WIDE:
+    case Instruction::IPUT_OBJECT:
+    case Instruction::IPUT_BOOLEAN:
+    case Instruction::IPUT_BYTE:
+    case Instruction::IPUT_CHAR:
+    case Instruction::IPUT_SHORT: {
+      // Check that the fault address is at the offset of the field or null. The compiler
+      // can generate both.
+      ArtField* field =
+          Runtime::Current()->GetClassLinker()->ResolveField(instr.VRegC_22c(), method, false);
+      return (addr == 0) || (addr == field->GetOffset().Uint32Value());
+    }
+
+    case Instruction::IGET_QUICK:
+    case Instruction::IGET_BOOLEAN_QUICK:
+    case Instruction::IGET_BYTE_QUICK:
+    case Instruction::IGET_CHAR_QUICK:
+    case Instruction::IGET_SHORT_QUICK:
+    case Instruction::IGET_WIDE_QUICK:
+    case Instruction::IGET_OBJECT_QUICK:
+    case Instruction::IPUT_QUICK:
+    case Instruction::IPUT_BOOLEAN_QUICK:
+    case Instruction::IPUT_BYTE_QUICK:
+    case Instruction::IPUT_CHAR_QUICK:
+    case Instruction::IPUT_SHORT_QUICK:
+    case Instruction::IPUT_WIDE_QUICK:
+    case Instruction::IPUT_OBJECT_QUICK: {
+      // Check that the fault address is at the offset in the quickened instruction or null.
+      // The compiler can generate both.
+      return (addr == 0u) || (addr == instr.VRegC_22c());
+    }
+
+    case Instruction::AGET:
+    case Instruction::AGET_WIDE:
+    case Instruction::AGET_OBJECT:
+    case Instruction::AGET_BOOLEAN:
+    case Instruction::AGET_BYTE:
+    case Instruction::AGET_CHAR:
+    case Instruction::AGET_SHORT:
+    case Instruction::APUT:
+    case Instruction::APUT_WIDE:
+    case Instruction::APUT_OBJECT:
+    case Instruction::APUT_BOOLEAN:
+    case Instruction::APUT_BYTE:
+    case Instruction::APUT_CHAR:
+    case Instruction::APUT_SHORT: {
+      // The length access should crash. We currently do not do implicit checks on
+      // the array access itself.
+      return (addr == 0u) || (addr == mirror::Array::LengthOffset().Uint32Value());
+    }
+
+    case Instruction::FILL_ARRAY_DATA: {
+      // The length access should crash. We currently do not do implicit checks on
+      // the array access itself.
+      return (addr == 0u) || (addr == mirror::Array::LengthOffset().Uint32Value());
+    }
+
+    case Instruction::ARRAY_LENGTH: {
+      // The length access should crash.
+      return (addr == 0u) || (addr == mirror::Array::LengthOffset().Uint32Value());
+    }
+
+    default: {
+      // We have covered all the cases where an NPE could occur.
+      // Note that this must be kept in sync with the compiler, and adding
+      // any new way to do implicit checks in the compiler should also update
+      // this code.
+      return false;
+    }
+  }
+}
+
+void ThrowNullPointerExceptionFromDexPC(bool check_address, uintptr_t addr) {
   uint32_t throw_dex_pc;
   ArtMethod* method = Thread::Current()->GetCurrentMethod(&throw_dex_pc);
   const DexFile::CodeItem* code = method->GetCodeItem();
   CHECK_LT(throw_dex_pc, code->insns_size_in_code_units_);
   const Instruction* instr = Instruction::At(&code->insns_[throw_dex_pc]);
+  if (check_address && !IsValidImplicitCheck(addr, method, *instr)) {
+    const DexFile* dex_file = method->GetDeclaringClass()->GetDexCache()->GetDexFile();
+    LOG(FATAL) << "Invalid address for an implicit NullPointerException check: "
+               << "0x" << std::hex << addr << std::dec
+               << ", at "
+               << instr->DumpString(dex_file)
+               << " in "
+               << PrettyMethod(method);
+  }
+
   switch (instr->Opcode()) {
     case Instruction::INVOKE_DIRECT:
       ThrowNullPointerExceptionForMethodAccess(instr->VRegB_35c(), kDirect);
@@ -530,14 +642,32 @@
       ThrowException("Ljava/lang/NullPointerException;", nullptr,
                      "Attempt to get length of null array");
       break;
+    case Instruction::FILL_ARRAY_DATA: {
+      ThrowException("Ljava/lang/NullPointerException;", nullptr,
+                     "Attempt to write to null array");
+      break;
+    }
+    case Instruction::INVOKE_LAMBDA:
+    case Instruction::BOX_LAMBDA:
+    case Instruction::UNBOX_LAMBDA:
+    case Instruction::LIBERATE_VARIABLE: {
+      ThrowException("Ljava/lang/NullPointerException;", nullptr,
+                     "Using a null lambda");
+      break;
+    }
+    case Instruction::MONITOR_ENTER:
+    case Instruction::MONITOR_EXIT: {
+      ThrowException("Ljava/lang/NullPointerException;", nullptr,
+                     "Attempt to do a synchronize operation on a null object");
+      break;
+    }
     default: {
-      // TODO: We should have covered all the cases where we expect a NPE above, this
-      //       message/logging is so we can improve any cases we've missed in the future.
       const DexFile* dex_file =
           method->GetDeclaringClass()->GetDexCache()->GetDexFile();
-      ThrowException("Ljava/lang/NullPointerException;", nullptr,
-                     StringPrintf("Null pointer exception during instruction '%s'",
-                                  instr->DumpString(dex_file).c_str()).c_str());
+      LOG(FATAL) << "NullPointerException at an unexpected instruction: "
+                 << instr->DumpString(dex_file)
+                 << " in "
+                 << PrettyMethod(method);
       break;
     }
   }
diff --git a/runtime/common_throws.h b/runtime/common_throws.h
index 7a33585..cbd338d 100644
--- a/runtime/common_throws.h
+++ b/runtime/common_throws.h
@@ -195,7 +195,7 @@
                                               InvokeType type)
     SHARED_REQUIRES(Locks::mutator_lock_) COLD_ATTR;
 
-void ThrowNullPointerExceptionFromDexPC()
+void ThrowNullPointerExceptionFromDexPC(bool check_address = false, uintptr_t addr = 0)
     SHARED_REQUIRES(Locks::mutator_lock_) COLD_ATTR;
 
 void ThrowNullPointerException(const char* msg)
diff --git a/runtime/dex_file-inl.h b/runtime/dex_file-inl.h
index 4e6c3ca..108a5af 100644
--- a/runtime/dex_file-inl.h
+++ b/runtime/dex_file-inl.h
@@ -38,10 +38,88 @@
   return reinterpret_cast<const char*>(ptr);
 }
 
+inline const char* DexFile::GetStringData(const StringId& string_id) const {
+  uint32_t ignored;
+  return GetStringDataAndUtf16Length(string_id, &ignored);
+}
+
+inline const char* DexFile::StringDataAndUtf16LengthByIdx(uint32_t idx,
+                                                          uint32_t* utf16_length) const {
+  if (idx == kDexNoIndex) {
+    *utf16_length = 0;
+    return nullptr;
+  }
+  const StringId& string_id = GetStringId(idx);
+  return GetStringDataAndUtf16Length(string_id, utf16_length);
+}
+
+inline const char* DexFile::StringDataByIdx(uint32_t idx) const {
+  uint32_t unicode_length;
+  return StringDataAndUtf16LengthByIdx(idx, &unicode_length);
+}
+
+inline const char* DexFile::StringByTypeIdx(uint32_t idx, uint32_t* unicode_length) const {
+  const TypeId& type_id = GetTypeId(idx);
+  return StringDataAndUtf16LengthByIdx(type_id.descriptor_idx_, unicode_length);
+}
+
+inline const char* DexFile::StringByTypeIdx(uint32_t idx) const {
+  const TypeId& type_id = GetTypeId(idx);
+  return StringDataByIdx(type_id.descriptor_idx_);
+}
+
+inline const char* DexFile::GetTypeDescriptor(const TypeId& type_id) const {
+  return StringDataByIdx(type_id.descriptor_idx_);
+}
+
+inline const char* DexFile::GetFieldTypeDescriptor(const FieldId& field_id) const {
+  const DexFile::TypeId& type_id = GetTypeId(field_id.type_idx_);
+  return GetTypeDescriptor(type_id);
+}
+
+inline const char* DexFile::GetFieldName(const FieldId& field_id) const {
+  return StringDataByIdx(field_id.name_idx_);
+}
+
+inline const char* DexFile::GetMethodDeclaringClassDescriptor(const MethodId& method_id) const {
+  const DexFile::TypeId& type_id = GetTypeId(method_id.class_idx_);
+  return GetTypeDescriptor(type_id);
+}
+
 inline const Signature DexFile::GetMethodSignature(const MethodId& method_id) const {
   return Signature(this, GetProtoId(method_id.proto_idx_));
 }
 
+inline const char* DexFile::GetMethodName(const MethodId& method_id) const {
+  return StringDataByIdx(method_id.name_idx_);
+}
+
+inline const char* DexFile::GetMethodShorty(uint32_t idx) const {
+  return StringDataByIdx(GetProtoId(GetMethodId(idx).proto_idx_).shorty_idx_);
+}
+
+inline const char* DexFile::GetMethodShorty(const MethodId& method_id) const {
+  return StringDataByIdx(GetProtoId(method_id.proto_idx_).shorty_idx_);
+}
+
+inline const char* DexFile::GetMethodShorty(const MethodId& method_id, uint32_t* length) const {
+  // Using the UTF16 length is safe here as shorties are guaranteed to be ASCII characters.
+  return StringDataAndUtf16LengthByIdx(GetProtoId(method_id.proto_idx_).shorty_idx_, length);
+}
+
+inline const char* DexFile::GetClassDescriptor(const ClassDef& class_def) const {
+  return StringByTypeIdx(class_def.class_idx_);
+}
+
+inline const char* DexFile::GetReturnTypeDescriptor(const ProtoId& proto_id) const {
+  return StringByTypeIdx(proto_id.return_type_idx_);
+}
+
+inline const char* DexFile::GetShorty(uint32_t proto_idx) const {
+  const ProtoId& proto_id = GetProtoId(proto_idx);
+  return StringDataByIdx(proto_id.shorty_idx_);
+}
+
 inline const DexFile::TryItem* DexFile::GetTryItems(const CodeItem& code_item, uint32_t offset) {
   const uint16_t* insns_end_ = &code_item.insns_[code_item.insns_size_in_code_units_];
   return reinterpret_cast<const TryItem*>
diff --git a/runtime/dex_file.h b/runtime/dex_file.h
index 759986e..3dffe4b 100644
--- a/runtime/dex_file.h
+++ b/runtime/dex_file.h
@@ -528,25 +528,12 @@
   // as the string length of the string data.
   const char* GetStringDataAndUtf16Length(const StringId& string_id, uint32_t* utf16_length) const;
 
-  const char* GetStringData(const StringId& string_id) const {
-    uint32_t ignored;
-    return GetStringDataAndUtf16Length(string_id, &ignored);
-  }
+  const char* GetStringData(const StringId& string_id) const;
 
   // Index version of GetStringDataAndUtf16Length.
-  const char* StringDataAndUtf16LengthByIdx(uint32_t idx, uint32_t* utf16_length) const {
-    if (idx == kDexNoIndex) {
-      *utf16_length = 0;
-      return nullptr;
-    }
-    const StringId& string_id = GetStringId(idx);
-    return GetStringDataAndUtf16Length(string_id, utf16_length);
-  }
+  const char* StringDataAndUtf16LengthByIdx(uint32_t idx, uint32_t* utf16_length) const;
 
-  const char* StringDataByIdx(uint32_t idx) const {
-    uint32_t unicode_length;
-    return StringDataAndUtf16LengthByIdx(idx, &unicode_length);
-  }
+  const char* StringDataByIdx(uint32_t idx) const;
 
   // Looks up a string id for a given modified utf8 string.
   const StringId* FindStringId(const char* string) const;
@@ -577,20 +564,12 @@
   }
 
   // Get the descriptor string associated with a given type index.
-  const char* StringByTypeIdx(uint32_t idx, uint32_t* unicode_length) const {
-    const TypeId& type_id = GetTypeId(idx);
-    return StringDataAndUtf16LengthByIdx(type_id.descriptor_idx_, unicode_length);
-  }
+  const char* StringByTypeIdx(uint32_t idx, uint32_t* unicode_length) const;
 
-  const char* StringByTypeIdx(uint32_t idx) const {
-    const TypeId& type_id = GetTypeId(idx);
-    return StringDataByIdx(type_id.descriptor_idx_);
-  }
+  const char* StringByTypeIdx(uint32_t idx) const;
 
   // Returns the type descriptor string of a type id.
-  const char* GetTypeDescriptor(const TypeId& type_id) const {
-    return StringDataByIdx(type_id.descriptor_idx_);
-  }
+  const char* GetTypeDescriptor(const TypeId& type_id) const;
 
   // Looks up a type for the given string index
   const TypeId* FindTypeId(uint32_t string_idx) const;
@@ -625,15 +604,10 @@
   }
 
   // Returns the class descriptor string of a field id.
-  const char* GetFieldTypeDescriptor(const FieldId& field_id) const {
-    const DexFile::TypeId& type_id = GetTypeId(field_id.type_idx_);
-    return GetTypeDescriptor(type_id);
-  }
+  const char* GetFieldTypeDescriptor(const FieldId& field_id) const;
 
   // Returns the name of a field id.
-  const char* GetFieldName(const FieldId& field_id) const {
-    return StringDataByIdx(field_id.name_idx_);
-  }
+  const char* GetFieldName(const FieldId& field_id) const;
 
   // Returns the number of method identifiers in the .dex file.
   size_t NumMethodIds() const {
@@ -659,10 +633,7 @@
                                const DexFile::ProtoId& signature) const;
 
   // Returns the declaring class descriptor string of a method id.
-  const char* GetMethodDeclaringClassDescriptor(const MethodId& method_id) const {
-    const DexFile::TypeId& type_id = GetTypeId(method_id.class_idx_);
-    return GetTypeDescriptor(type_id);
-  }
+  const char* GetMethodDeclaringClassDescriptor(const MethodId& method_id) const;
 
   // Returns the prototype of a method id.
   const ProtoId& GetMethodPrototype(const MethodId& method_id) const {
@@ -673,23 +644,15 @@
   const Signature GetMethodSignature(const MethodId& method_id) const;
 
   // Returns the name of a method id.
-  const char* GetMethodName(const MethodId& method_id) const {
-    return StringDataByIdx(method_id.name_idx_);
-  }
+  const char* GetMethodName(const MethodId& method_id) const;
 
   // Returns the shorty of a method by its index.
-  const char* GetMethodShorty(uint32_t idx) const {
-    return StringDataByIdx(GetProtoId(GetMethodId(idx).proto_idx_).shorty_idx_);
-  }
+  const char* GetMethodShorty(uint32_t idx) const;
 
   // Returns the shorty of a method id.
-  const char* GetMethodShorty(const MethodId& method_id) const {
-    return StringDataByIdx(GetProtoId(method_id.proto_idx_).shorty_idx_);
-  }
-  const char* GetMethodShorty(const MethodId& method_id, uint32_t* length) const {
-    // Using the UTF16 length is safe here as shorties are guaranteed to be ASCII characters.
-    return StringDataAndUtf16LengthByIdx(GetProtoId(method_id.proto_idx_).shorty_idx_, length);
-  }
+  const char* GetMethodShorty(const MethodId& method_id) const;
+  const char* GetMethodShorty(const MethodId& method_id, uint32_t* length) const;
+
   // Returns the number of class definitions in the .dex file.
   uint32_t NumClassDefs() const {
     DCHECK(header_ != nullptr) << GetLocation();
@@ -709,9 +672,7 @@
   }
 
   // Returns the class descriptor string of a class definition.
-  const char* GetClassDescriptor(const ClassDef& class_def) const {
-    return StringByTypeIdx(class_def.class_idx_);
-  }
+  const char* GetClassDescriptor(const ClassDef& class_def) const;
 
   // Looks up a class definition by its class descriptor. Hash must be
   // ComputeModifiedUtf8Hash(descriptor).
@@ -749,9 +710,7 @@
     }
   }
 
-  const char* GetReturnTypeDescriptor(const ProtoId& proto_id) const {
-    return StringByTypeIdx(proto_id.return_type_idx_);
-  }
+  const char* GetReturnTypeDescriptor(const ProtoId& proto_id) const;
 
   // Returns the number of prototype identifiers in the .dex file.
   size_t NumProtoIds() const {
@@ -788,10 +747,7 @@
   const Signature CreateSignature(const StringPiece& signature) const;
 
   // Returns the short form method descriptor for the given prototype.
-  const char* GetShorty(uint32_t proto_idx) const {
-    const ProtoId& proto_id = GetProtoId(proto_idx);
-    return StringDataByIdx(proto_id.shorty_idx_);
-  }
+  const char* GetShorty(uint32_t proto_idx) const;
 
   const TypeList* GetProtoParameters(const ProtoId& proto_id) const {
     if (proto_id.parameters_off_ == 0) {
diff --git a/runtime/entrypoints/entrypoint_utils-inl.h b/runtime/entrypoints/entrypoint_utils-inl.h
index db3f88f..fc62573 100644
--- a/runtime/entrypoints/entrypoint_utils-inl.h
+++ b/runtime/entrypoints/entrypoint_utils-inl.h
@@ -19,7 +19,7 @@
 
 #include "entrypoint_utils.h"
 
-#include "art_method-inl.h"
+#include "art_method.h"
 #include "class_linker-inl.h"
 #include "common_throws.h"
 #include "dex_file.h"
@@ -559,10 +559,9 @@
       }
     }
     case kInterface: {
-      uint32_t imt_index = resolved_method->GetImtIndex();
-      size_t pointer_size = class_linker->GetImagePointerSize();
-      ArtMethod* imt_method = (*this_object)->GetClass()->GetImt(pointer_size)->
-          Get(imt_index, pointer_size);
+      uint32_t imt_index = resolved_method->GetDexMethodIndex() % mirror::Class::kImtSize;
+      ArtMethod* imt_method = (*this_object)->GetClass()->GetEmbeddedImTableEntry(
+          imt_index, class_linker->GetImagePointerSize());
       if (!imt_method->IsRuntimeMethod()) {
         if (kIsDebugBuild) {
           mirror::Class* klass = (*this_object)->GetClass();
diff --git a/runtime/entrypoints/quick/quick_default_externs.h b/runtime/entrypoints/quick/quick_default_externs.h
index 771e143..d0dad34 100644
--- a/runtime/entrypoints/quick/quick_default_externs.h
+++ b/runtime/entrypoints/quick/quick_default_externs.h
@@ -120,6 +120,7 @@
 extern "C" void art_quick_throw_div_zero();
 extern "C" void art_quick_throw_no_such_method(int32_t method_idx);
 extern "C" void art_quick_throw_null_pointer_exception();
+extern "C" void art_quick_throw_null_pointer_exception_from_signal(uintptr_t address);
 extern "C" void art_quick_throw_stack_overflow(void*);
 extern "C" void art_quick_throw_string_bounds(int32_t index, int32_t limit);
 
diff --git a/runtime/entrypoints/quick/quick_throw_entrypoints.cc b/runtime/entrypoints/quick/quick_throw_entrypoints.cc
index 2778e32..ea9f7b0 100644
--- a/runtime/entrypoints/quick/quick_throw_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_throw_entrypoints.cc
@@ -52,8 +52,18 @@
 extern "C" NO_RETURN void artThrowNullPointerExceptionFromCode(Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
+  // We come from an explicit check in the generated code. This path is triggered
+  // only if the object is indeed null.
+  ThrowNullPointerExceptionFromDexPC(/* check_address */ false, 0U);
+  self->QuickDeliverException();
+}
+
+// Installed by a signal handler to throw a NPE exception.
+extern "C" NO_RETURN void artThrowNullPointerExceptionFromSignal(uintptr_t addr, Thread* self)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  ScopedQuickEntrypointChecks sqec(self);
   self->NoteSignalBeingHandled();
-  ThrowNullPointerExceptionFromDexPC();
+  ThrowNullPointerExceptionFromDexPC(/* check_address */ true, addr);
   self->NoteSignalHandlerDone();
   self->QuickDeliverException();
 }
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 0a70be1..03771aa 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -2169,12 +2169,13 @@
       dex_method_idx, sizeof(void*));
   DCHECK(interface_method != nullptr) << dex_method_idx << " " << PrettyMethod(caller_method);
   ArtMethod* method = nullptr;
-  ImTable* imt = cls->GetImt(sizeof(void*));
 
   if (LIKELY(interface_method->GetDexMethodIndex() != DexFile::kDexNoIndex)) {
     // If the dex cache already resolved the interface method, look whether we have
     // a match in the ImtConflictTable.
-    ArtMethod* conflict_method = imt->Get(interface_method->GetImtIndex(), sizeof(void*));
+    uint32_t imt_index = interface_method->GetDexMethodIndex();
+    ArtMethod* conflict_method = cls->GetEmbeddedImTableEntry(
+        imt_index % mirror::Class::kImtSize, sizeof(void*));
     if (LIKELY(conflict_method->IsRuntimeMethod())) {
       ImtConflictTable* current_table = conflict_method->GetImtConflictTable(sizeof(void*));
       DCHECK(current_table != nullptr);
@@ -2225,8 +2226,9 @@
 
   // We arrive here if we have found an implementation, and it is not in the ImtConflictTable.
   // We create a new table with the new pair { interface_method, method }.
-  uint32_t imt_index = interface_method->GetImtIndex();
-  ArtMethod* conflict_method = imt->Get(imt_index, sizeof(void*));
+  uint32_t imt_index = interface_method->GetDexMethodIndex();
+  ArtMethod* conflict_method = cls->GetEmbeddedImTableEntry(
+      imt_index % mirror::Class::kImtSize, sizeof(void*));
   if (conflict_method->IsRuntimeMethod()) {
     ArtMethod* new_conflict_method = Runtime::Current()->GetClassLinker()->AddMethodToConflictTable(
         cls.Get(),
@@ -2237,9 +2239,9 @@
     if (new_conflict_method != conflict_method) {
       // Update the IMT if we create a new conflict method. No fence needed here, as the
       // data is consistent.
-      imt->Set(imt_index,
-               new_conflict_method,
-               sizeof(void*));
+      cls->SetEmbeddedImTableEntry(imt_index % mirror::Class::kImtSize,
+                                  new_conflict_method,
+                                  sizeof(void*));
     }
   }
 
diff --git a/runtime/fault_handler.h b/runtime/fault_handler.h
index 625b1e8..56e0fb7 100644
--- a/runtime/fault_handler.h
+++ b/runtime/fault_handler.h
@@ -96,6 +96,14 @@
 
   bool Action(int sig, siginfo_t* siginfo, void* context) OVERRIDE;
 
+  static bool IsValidImplicitCheck(siginfo_t* siginfo) {
+    // Our implicit NPE checks always limit the range to a page.
+    // Note that the runtime will do more exhaustive checks (that we cannot
+    // reasonably do in signal processing code) based on the dex instruction
+    // faulting.
+    return CanDoImplicitNullCheckOn(reinterpret_cast<uintptr_t>(siginfo->si_addr));
+  }
+
  private:
   DISALLOW_COPY_AND_ASSIGN(NullPointerHandler);
 };
diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc
index 8cadc2e..e896c7a 100644
--- a/runtime/gc/space/image_space.cc
+++ b/runtime/gc/space/image_space.cc
@@ -1130,10 +1130,6 @@
       image_header.VisitPackedArtFields(&field_visitor, target_base);
     }
     {
-      TimingLogger::ScopedTiming timing("Fixup imt", &logger);
-      image_header.VisitPackedImTables(fixup_adapter, target_base, pointer_size);
-    }
-    {
       TimingLogger::ScopedTiming timing("Fixup conflict tables", &logger);
       image_header.VisitPackedImtConflictTables(fixup_adapter, target_base, pointer_size);
     }
diff --git a/runtime/globals.h b/runtime/globals.h
index 477cbdf..0b44c47 100644
--- a/runtime/globals.h
+++ b/runtime/globals.h
@@ -40,6 +40,12 @@
 // compile-time constant so the compiler can generate better code.
 static constexpr int kPageSize = 4096;
 
+// Returns whether the given memory offset can be used for generating
+// an implicit null check.
+static inline bool CanDoImplicitNullCheckOn(uintptr_t offset) {
+  return offset < kPageSize;
+}
+
 // Required object alignment
 static constexpr size_t kObjectAlignment = 8;
 static constexpr size_t kLargeObjectAlignment = kPageSize;
diff --git a/runtime/image-inl.h b/runtime/image-inl.h
index cd0557a..ea75a62 100644
--- a/runtime/image-inl.h
+++ b/runtime/image-inl.h
@@ -20,7 +20,6 @@
 #include "image.h"
 
 #include "art_method.h"
-#include "imtable.h"
 
 namespace art {
 
@@ -46,24 +45,6 @@
 }
 
 template <typename Visitor>
-inline void ImageHeader::VisitPackedImTables(const Visitor& visitor,
-                                             uint8_t* base,
-                                             size_t pointer_size) const {
-  const ImageSection& section = GetImageSection(kSectionImTables);
-  for (size_t pos = 0; pos < section.Size();) {
-    ImTable* imt = reinterpret_cast<ImTable*>(base + section.Offset() + pos);
-    for (size_t i = 0; i < ImTable::kSize; ++i) {
-      ArtMethod* orig = imt->Get(i, pointer_size);
-      ArtMethod* updated = visitor(orig);
-      if (updated != orig) {
-        imt->Set(i, updated, pointer_size);
-      }
-    }
-    pos += ImTable::SizeInBytes(pointer_size);
-  }
-}
-
-template <typename Visitor>
 inline void ImageHeader::VisitPackedImtConflictTables(const Visitor& visitor,
                                                       uint8_t* base,
                                                       size_t pointer_size) const {
diff --git a/runtime/image.cc b/runtime/image.cc
index 2362a92..a9552c2 100644
--- a/runtime/image.cc
+++ b/runtime/image.cc
@@ -24,7 +24,7 @@
 namespace art {
 
 const uint8_t ImageHeader::kImageMagic[] = { 'a', 'r', 't', '\n' };
-const uint8_t ImageHeader::kImageVersion[] = { '0', '3', '0', '\0' };
+const uint8_t ImageHeader::kImageVersion[] = { '0', '2', '9', '\0' };
 
 ImageHeader::ImageHeader(uint32_t image_begin,
                          uint32_t image_size,
diff --git a/runtime/image.h b/runtime/image.h
index 06f06ee..2ea9af7 100644
--- a/runtime/image.h
+++ b/runtime/image.h
@@ -195,7 +195,6 @@
     kSectionArtFields,
     kSectionArtMethods,
     kSectionRuntimeMethods,
-    kSectionImTables,
     kSectionIMTConflictTables,
     kSectionDexCacheArrays,
     kSectionInternedStrings,
@@ -280,11 +279,6 @@
   void VisitPackedArtFields(ArtFieldVisitor* visitor, uint8_t* base) const;
 
   template <typename Visitor>
-  void VisitPackedImTables(const Visitor& visitor,
-                           uint8_t* base,
-                           size_t pointer_size) const;
-
-  template <typename Visitor>
   void VisitPackedImtConflictTables(const Visitor& visitor,
                                     uint8_t* base,
                                     size_t pointer_size) const;
diff --git a/runtime/imtable.h b/runtime/imtable.h
deleted file mode 100644
index 51faf70..0000000
--- a/runtime/imtable.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Copyright (C) 2016 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_RUNTIME_IMTABLE_H_
-#define ART_RUNTIME_IMTABLE_H_
-
-#ifndef IMT_SIZE
-#error IMT_SIZE not defined
-#endif
-
-namespace art {
-
-class ArtMethod;
-
-class ImTable {
- public:
-  // Interface method table size. Increasing this value reduces the chance of two interface methods
-  // colliding in the interface method table but increases the size of classes that implement
-  // (non-marker) interfaces.
-  static constexpr size_t kSize = IMT_SIZE;
-
-  ArtMethod* Get(size_t index, size_t pointer_size) {
-    DCHECK_LT(index, kSize);
-    uint8_t* ptr = reinterpret_cast<uint8_t*>(this) + OffsetOfElement(index, pointer_size);
-    if (pointer_size == 4) {
-      uint32_t value = *reinterpret_cast<uint32_t*>(ptr);
-      return reinterpret_cast<ArtMethod*>(value);
-    } else {
-      uint64_t value = *reinterpret_cast<uint64_t*>(ptr);
-      return reinterpret_cast<ArtMethod*>(value);
-    }
-  }
-
-  void Set(size_t index, ArtMethod* method, size_t pointer_size) {
-    DCHECK_LT(index, kSize);
-    uint8_t* ptr = reinterpret_cast<uint8_t*>(this) + OffsetOfElement(index, pointer_size);
-    if (pointer_size == 4) {
-      uintptr_t value = reinterpret_cast<uintptr_t>(method);
-      DCHECK_EQ(static_cast<uint32_t>(value), value);  // Check that we dont lose any non 0 bits.
-      *reinterpret_cast<uint32_t*>(ptr) = static_cast<uint32_t>(value);
-    } else {
-      *reinterpret_cast<uint64_t*>(ptr) = reinterpret_cast<uint64_t>(method);
-    }
-  }
-
-  static size_t OffsetOfElement(size_t index, size_t pointer_size) {
-    return index * pointer_size;
-  }
-
-  void Populate(ArtMethod** data, size_t pointer_size) {
-    for (size_t i = 0; i < kSize; ++i) {
-      Set(i, data[i], pointer_size);
-    }
-  }
-
-  constexpr static size_t SizeInBytes(size_t pointer_size) {
-    return kSize * pointer_size;
-  }
-};
-
-}  // namespace art
-
-#endif  // ART_RUNTIME_IMTABLE_H_
-
diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h
index 3750b7a..cc470f3 100644
--- a/runtime/interpreter/interpreter_common.h
+++ b/runtime/interpreter/interpreter_common.h
@@ -679,7 +679,7 @@
     return false;
   }
   const uint32_t vtable_idx = (is_range) ? inst->VRegB_3rc() : inst->VRegB_35c();
-  CHECK(receiver->GetClass()->ShouldHaveEmbeddedVTable());
+  CHECK(receiver->GetClass()->ShouldHaveEmbeddedImtAndVTable());
   ArtMethod* const called_method = receiver->GetClass()->GetEmbeddedVTableEntry(
       vtable_idx, sizeof(void*));
   if (UNLIKELY(called_method == nullptr)) {
diff --git a/runtime/jit/offline_profiling_info.cc b/runtime/jit/offline_profiling_info.cc
index c99d363..5039d2d 100644
--- a/runtime/jit/offline_profiling_info.cc
+++ b/runtime/jit/offline_profiling_info.cc
@@ -19,6 +19,7 @@
 #include "errno.h"
 #include <limits.h>
 #include <vector>
+#include <stdlib.h>
 #include <sys/file.h>
 #include <sys/stat.h>
 #include <sys/uio.h>
@@ -40,6 +41,11 @@
 
 static constexpr uint16_t kMaxDexFileKeyLength = PATH_MAX;
 
+// Debug flag to ignore checksums when testing if a method or a class is present in the profile.
+// Used to facilitate testing profile guided compilation across a large number of apps
+// using the same test profile.
+static constexpr bool kDebugIgnoreChecksum = false;
+
 // Transform the actual dex location into relative paths.
 // Note: this is OK because we don't store profiles of different apps into the same file.
 // Apps with split apks don't cause trouble because each split has a different name and will not
@@ -547,10 +553,14 @@
   return true;
 }
 
+static bool ChecksumMatch(const DexFile& dex_file, uint32_t checksum) {
+  return kDebugIgnoreChecksum || dex_file.GetLocationChecksum() == checksum;
+}
+
 bool ProfileCompilationInfo::ContainsMethod(const MethodReference& method_ref) const {
   auto info_it = info_.find(GetProfileDexFileKey(method_ref.dex_file->GetLocation()));
   if (info_it != info_.end()) {
-    if (method_ref.dex_file->GetLocationChecksum() != info_it->second.checksum) {
+    if (!ChecksumMatch(*method_ref.dex_file, info_it->second.checksum)) {
       return false;
     }
     const std::set<uint16_t>& methods = info_it->second.method_set;
@@ -562,7 +572,7 @@
 bool ProfileCompilationInfo::ContainsClass(const DexFile& dex_file, uint16_t class_def_idx) const {
   auto info_it = info_.find(GetProfileDexFileKey(dex_file.GetLocation()));
   if (info_it != info_.end()) {
-    if (dex_file.GetLocationChecksum() != info_it->second.checksum) {
+    if (!ChecksumMatch(dex_file, info_it->second.checksum)) {
       return false;
     }
     const std::set<uint16_t>& classes = info_it->second.class_set;
@@ -659,4 +669,47 @@
   }
 }
 
+// Naive implementation to generate a random profile file suitable for testing.
+bool ProfileCompilationInfo::GenerateTestProfile(int fd,
+                                                 uint16_t number_of_dex_files,
+                                                 uint16_t method_ratio,
+                                                 uint16_t class_ratio) {
+  const std::string base_dex_location = "base.apk";
+  ProfileCompilationInfo info;
+  // The limits are defined by the dex specification.
+  uint16_t max_method = std::numeric_limits<uint16_t>::max();
+  uint16_t max_classes = std::numeric_limits<uint16_t>::max();
+  uint16_t number_of_methods = max_method * method_ratio / 100;
+  uint16_t number_of_classes = max_classes * class_ratio / 100;
+
+  srand(MicroTime());
+
+  // Make sure we generate more samples with a low index value.
+  // This makes it more likely to hit valid method/class indices in small apps.
+  const uint16_t kFavorFirstN = 10000;
+  const uint16_t kFavorSplit = 2;
+
+  for (uint16_t i = 0; i < number_of_dex_files; i++) {
+    std::string dex_location = DexFile::GetMultiDexLocation(i, base_dex_location.c_str());
+    std::string profile_key = GetProfileDexFileKey(dex_location);
+
+    for (uint16_t m = 0; m < number_of_methods; m++) {
+      uint16_t method_idx = rand() % max_method;
+      if (m < (number_of_methods / kFavorSplit)) {
+        method_idx %= kFavorFirstN;
+      }
+      info.AddMethodIndex(profile_key, 0, method_idx);
+    }
+
+    for (uint16_t c = 0; c < number_of_classes; c++) {
+      uint16_t class_idx = rand() % max_classes;
+      if (c < (number_of_classes / kFavorSplit)) {
+        class_idx %= kFavorFirstN;
+      }
+      info.AddClassIndex(profile_key, 0, class_idx);
+    }
+  }
+  return info.Save(fd);
+}
+
 }  // namespace art
diff --git a/runtime/jit/offline_profiling_info.h b/runtime/jit/offline_profiling_info.h
index 5a07da7..0b26f9b 100644
--- a/runtime/jit/offline_profiling_info.h
+++ b/runtime/jit/offline_profiling_info.h
@@ -87,6 +87,11 @@
   // Clears the resolved classes from the current object.
   void ClearResolvedClasses();
 
+  static bool GenerateTestProfile(int fd,
+                                  uint16_t number_of_dex_files,
+                                  uint16_t method_ratio,
+                                  uint16_t class_ratio);
+
  private:
   enum ProfileLoadSatus {
     kProfileLoadIOError,
diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index b783a01..cefd9f0 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h
@@ -247,19 +247,38 @@
   SetFieldObject<false>(OFFSET_OF_OBJECT_MEMBER(Class, vtable_), new_vtable);
 }
 
+inline MemberOffset Class::EmbeddedImTableEntryOffset(uint32_t i, size_t pointer_size) {
+  DCHECK_LT(i, kImtSize);
+  return MemberOffset(
+      EmbeddedImTableOffset(pointer_size).Uint32Value() + i * ImTableEntrySize(pointer_size));
+}
+
+template <VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
+inline ArtMethod* Class::GetEmbeddedImTableEntry(uint32_t i, size_t pointer_size) {
+  DCHECK((ShouldHaveEmbeddedImtAndVTable<kVerifyFlags, kReadBarrierOption>()));
+  return GetFieldPtrWithSize<ArtMethod*>(
+      EmbeddedImTableEntryOffset(i, pointer_size), pointer_size);
+}
+
+template <VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
+inline void Class::SetEmbeddedImTableEntry(uint32_t i, ArtMethod* method, size_t pointer_size) {
+  DCHECK((ShouldHaveEmbeddedImtAndVTable<kVerifyFlags, kReadBarrierOption>()));
+  SetFieldPtrWithSize<false>(EmbeddedImTableEntryOffset(i, pointer_size), method, pointer_size);
+}
+
 inline bool Class::HasVTable() {
-  return GetVTable() != nullptr || ShouldHaveEmbeddedVTable();
+  return GetVTable() != nullptr || ShouldHaveEmbeddedImtAndVTable();
 }
 
 inline int32_t Class::GetVTableLength() {
-  if (ShouldHaveEmbeddedVTable()) {
+  if (ShouldHaveEmbeddedImtAndVTable()) {
     return GetEmbeddedVTableLength();
   }
   return GetVTable() != nullptr ? GetVTable()->GetLength() : 0;
 }
 
 inline ArtMethod* Class::GetVTableEntry(uint32_t i, size_t pointer_size) {
-  if (ShouldHaveEmbeddedVTable()) {
+  if (ShouldHaveEmbeddedImtAndVTable()) {
     return GetEmbeddedVTableEntry(i, pointer_size);
   }
   auto* vtable = GetVTable();
@@ -275,14 +294,6 @@
   SetField32<false>(MemberOffset(EmbeddedVTableLengthOffset()), len);
 }
 
-inline ImTable* Class::GetImt(size_t pointer_size) {
-  return GetFieldPtrWithSize<ImTable*>(MemberOffset(ImtPtrOffset(pointer_size)), pointer_size);
-}
-
-inline void Class::SetImt(ImTable* imt, size_t pointer_size) {
-  return SetFieldPtrWithSize<false>(MemberOffset(ImtPtrOffset(pointer_size)), imt, pointer_size);
-}
-
 inline MemberOffset Class::EmbeddedVTableEntryOffset(uint32_t i, size_t pointer_size) {
   return MemberOffset(
       EmbeddedVTableOffset(pointer_size).Uint32Value() + i * VTableEntrySize(pointer_size));
@@ -530,7 +541,7 @@
 inline MemberOffset Class::GetFirstReferenceStaticFieldOffset(size_t pointer_size) {
   DCHECK(IsResolved());
   uint32_t base = sizeof(mirror::Class);  // Static fields come after the class.
-  if (ShouldHaveEmbeddedVTable<kVerifyFlags, kReadBarrierOption>()) {
+  if (ShouldHaveEmbeddedImtAndVTable<kVerifyFlags, kReadBarrierOption>()) {
     // Static fields come after the embedded tables.
     base = mirror::Class::ComputeClassSize(
         true, GetEmbeddedVTableLength(), 0, 0, 0, 0, 0, pointer_size);
@@ -541,7 +552,7 @@
 inline MemberOffset Class::GetFirstReferenceStaticFieldOffsetDuringLinking(size_t pointer_size) {
   DCHECK(IsLoaded());
   uint32_t base = sizeof(mirror::Class);  // Static fields come after the class.
-  if (ShouldHaveEmbeddedVTable()) {
+  if (ShouldHaveEmbeddedImtAndVTable()) {
     // Static fields come after the embedded tables.
     base = mirror::Class::ComputeClassSize(true, GetVTableDuringLinking()->GetLength(),
                                            0, 0, 0, 0, 0, pointer_size);
@@ -700,7 +711,7 @@
   return Alloc<true>(self, Runtime::Current()->GetHeap()->GetCurrentNonMovingAllocator());
 }
 
-inline uint32_t Class::ComputeClassSize(bool has_embedded_vtable,
+inline uint32_t Class::ComputeClassSize(bool has_embedded_tables,
                                         uint32_t num_vtable_entries,
                                         uint32_t num_8bit_static_fields,
                                         uint32_t num_16bit_static_fields,
@@ -711,10 +722,11 @@
   // Space used by java.lang.Class and its instance fields.
   uint32_t size = sizeof(Class);
   // Space used by embedded tables.
-  if (has_embedded_vtable) {
-    size = RoundUp(size + sizeof(uint32_t), pointer_size);
-    size += pointer_size;  // size of pointer to IMT
-    size += num_vtable_entries * VTableEntrySize(pointer_size);
+  if (has_embedded_tables) {
+    const uint32_t embedded_imt_size = kImtSize * ImTableEntrySize(pointer_size);
+    const uint32_t embedded_vtable_size = num_vtable_entries * VTableEntrySize(pointer_size);
+    size = RoundUp(size + sizeof(uint32_t) /* embedded vtable len */, pointer_size) +
+        embedded_imt_size + embedded_vtable_size;
   }
 
   // Space used by reference statics.
@@ -978,9 +990,18 @@
   return MakeIterationRangeFromLengthPrefixedArray(GetSFieldsPtrUnchecked());
 }
 
+inline MemberOffset Class::EmbeddedImTableOffset(size_t pointer_size) {
+  CheckPointerSize(pointer_size);
+  // Round up since we want the embedded imt and vtable to be pointer size aligned in case 64 bits.
+  // Add 32 bits for embedded vtable length.
+  return MemberOffset(
+      RoundUp(EmbeddedVTableLengthOffset().Uint32Value() + sizeof(uint32_t), pointer_size));
+}
+
 inline MemberOffset Class::EmbeddedVTableOffset(size_t pointer_size) {
   CheckPointerSize(pointer_size);
-  return MemberOffset(ImtPtrOffset(pointer_size).Uint32Value() + pointer_size);
+  return MemberOffset(EmbeddedImTableOffset(pointer_size).Uint32Value() +
+                      kImtSize * ImTableEntrySize(pointer_size));
 }
 
 inline void Class::CheckPointerSize(size_t pointer_size) {
@@ -1065,7 +1086,7 @@
     dest->SetDexCacheStrings(new_strings);
   }
   // Fix up embedded tables.
-  if (!IsTemp() && ShouldHaveEmbeddedVTable<kVerifyNone, kReadBarrierOption>()) {
+  if (!IsTemp() && ShouldHaveEmbeddedImtAndVTable<kVerifyNone, kReadBarrierOption>()) {
     for (int32_t i = 0, count = GetEmbeddedVTableLength(); i < count; ++i) {
       ArtMethod* method = GetEmbeddedVTableEntry(i, pointer_size);
       ArtMethod* new_method = visitor(method);
@@ -1073,9 +1094,16 @@
         dest->SetEmbeddedVTableEntryUnchecked(i, new_method, pointer_size);
       }
     }
-  }
-  if (!IsTemp() && ShouldHaveImt<kVerifyNone, kReadBarrierOption>()) {
-    dest->SetImt(visitor(GetImt(pointer_size)), pointer_size);
+    for (size_t i = 0; i < mirror::Class::kImtSize; ++i) {
+      ArtMethod* method = GetEmbeddedImTableEntry<kVerifyFlags, kReadBarrierOption>(i,
+                                                                                    pointer_size);
+      ArtMethod* new_method = visitor(method);
+      if (method != new_method) {
+        dest->SetEmbeddedImTableEntry<kVerifyFlags, kReadBarrierOption>(i,
+                                                                        new_method,
+                                                                        pointer_size);
+      }
+    }
   }
 }
 
diff --git a/runtime/mirror/class.cc b/runtime/mirror/class.cc
index 9c77d38..b4a23ba 100644
--- a/runtime/mirror/class.cc
+++ b/runtime/mirror/class.cc
@@ -914,7 +914,13 @@
   return GetDexFile().GetInterfacesList(*class_def);
 }
 
-void Class::PopulateEmbeddedVTable(size_t pointer_size) {
+void Class::PopulateEmbeddedImtAndVTable(ArtMethod* const (&methods)[kImtSize],
+                                         size_t pointer_size) {
+  for (size_t i = 0; i < kImtSize; i++) {
+    auto method = methods[i];
+    DCHECK(method != nullptr);
+    SetEmbeddedImTableEntry(i, method, pointer_size);
+  }
   PointerArray* table = GetVTableDuringLinking();
   CHECK(table != nullptr) << PrettyClass(this);
   const size_t table_length = table->GetLength();
@@ -961,7 +967,7 @@
 class CopyClassVisitor {
  public:
   CopyClassVisitor(Thread* self, Handle<mirror::Class>* orig, size_t new_length,
-                   size_t copy_bytes, ImTable* imt,
+                   size_t copy_bytes, ArtMethod* const (&imt)[mirror::Class::kImtSize],
                    size_t pointer_size)
       : self_(self), orig_(orig), new_length_(new_length),
         copy_bytes_(copy_bytes), imt_(imt), pointer_size_(pointer_size) {
@@ -973,8 +979,7 @@
     Handle<mirror::Class> h_new_class_obj(hs.NewHandle(obj->AsClass()));
     mirror::Object::CopyObject(self_, h_new_class_obj.Get(), orig_->Get(), copy_bytes_);
     mirror::Class::SetStatus(h_new_class_obj, Class::kStatusResolving, self_);
-    h_new_class_obj->PopulateEmbeddedVTable(pointer_size_);
-    h_new_class_obj->SetImt(imt_, pointer_size_);
+    h_new_class_obj->PopulateEmbeddedImtAndVTable(imt_, pointer_size_);
     h_new_class_obj->SetClassSize(new_length_);
     // Visit all of the references to make sure there is no from space references in the native
     // roots.
@@ -987,13 +992,13 @@
   Handle<mirror::Class>* const orig_;
   const size_t new_length_;
   const size_t copy_bytes_;
-  ImTable* imt_;
+  ArtMethod* const (&imt_)[mirror::Class::kImtSize];
   const size_t pointer_size_;
   DISALLOW_COPY_AND_ASSIGN(CopyClassVisitor);
 };
 
 Class* Class::CopyOf(Thread* self, int32_t new_length,
-                     ImTable* imt, size_t pointer_size) {
+                     ArtMethod* const (&imt)[mirror::Class::kImtSize], size_t pointer_size) {
   DCHECK_GE(new_length, static_cast<int32_t>(sizeof(Class)));
   // We may get copied by a compacting GC.
   StackHandleScope<1> hs(self);
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index 2adf54a..5235a3e 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -22,7 +22,6 @@
 #include "class_flags.h"
 #include "gc_root.h"
 #include "gc/allocator_type.h"
-#include "imtable.h"
 #include "invoke_type.h"
 #include "modifiers.h"
 #include "object.h"
@@ -34,6 +33,10 @@
 #include "thread.h"
 #include "utils.h"
 
+#ifndef IMT_SIZE
+#error IMT_SIZE not defined
+#endif
+
 namespace art {
 
 class ArtField;
@@ -63,6 +66,11 @@
   // 2 ref instance fields.]
   static constexpr uint32_t kClassWalkSuper = 0xC0000000;
 
+  // Interface method table size. Increasing this value reduces the chance of two interface methods
+  // colliding in the interface method table but increases the size of classes that implement
+  // (non-marker) interfaces.
+  static constexpr size_t kImtSize = IMT_SIZE;
+
   // Class Status
   //
   // kStatusRetired: Class that's temporarily used till class linking time
@@ -343,7 +351,7 @@
   // be replaced with a class with the right size for embedded imt/vtable.
   bool IsTemp() SHARED_REQUIRES(Locks::mutator_lock_) {
     Status s = GetStatus();
-    return s < Status::kStatusResolving && ShouldHaveEmbeddedVTable();
+    return s < Status::kStatusResolving && ShouldHaveEmbeddedImtAndVTable();
   }
 
   String* GetName() SHARED_REQUIRES(Locks::mutator_lock_);  // Returns the cached name.
@@ -549,7 +557,7 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Compute how many bytes would be used a class with the given elements.
-  static uint32_t ComputeClassSize(bool has_embedded_vtable,
+  static uint32_t ComputeClassSize(bool has_embedded_tables,
                                    uint32_t num_vtable_entries,
                                    uint32_t num_8bit_static_fields,
                                    uint32_t num_16bit_static_fields,
@@ -822,29 +830,28 @@
     return MemberOffset(sizeof(Class));
   }
 
-  static MemberOffset ImtPtrOffset(size_t pointer_size) {
-    return MemberOffset(
-        RoundUp(EmbeddedVTableLengthOffset().Uint32Value() + sizeof(uint32_t), pointer_size));
-  }
-
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
            ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
-  bool ShouldHaveImt() SHARED_REQUIRES(Locks::mutator_lock_) {
-    return ShouldHaveEmbeddedVTable<kVerifyFlags, kReadBarrierOption>() &&
-        GetIfTable<kVerifyFlags, kReadBarrierOption>() != nullptr &&
-        !IsArrayClass<kVerifyFlags, kReadBarrierOption>();
-  }
-
-  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
-           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
-  bool ShouldHaveEmbeddedVTable() SHARED_REQUIRES(Locks::mutator_lock_) {
+  bool ShouldHaveEmbeddedImtAndVTable() SHARED_REQUIRES(Locks::mutator_lock_) {
     return IsInstantiable<kVerifyFlags, kReadBarrierOption>();
   }
 
   bool HasVTable() SHARED_REQUIRES(Locks::mutator_lock_);
 
+  static MemberOffset EmbeddedImTableEntryOffset(uint32_t i, size_t pointer_size);
+
   static MemberOffset EmbeddedVTableEntryOffset(uint32_t i, size_t pointer_size);
 
+  template <VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+            ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
+  ArtMethod* GetEmbeddedImTableEntry(uint32_t i, size_t pointer_size)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  template <VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+            ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
+  void SetEmbeddedImTableEntry(uint32_t i, ArtMethod* method, size_t pointer_size)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
   int32_t GetVTableLength() SHARED_REQUIRES(Locks::mutator_lock_);
 
   ArtMethod* GetVTableEntry(uint32_t i, size_t pointer_size)
@@ -854,10 +861,6 @@
 
   void SetEmbeddedVTableLength(int32_t len) SHARED_REQUIRES(Locks::mutator_lock_);
 
-  ImTable* GetImt(size_t pointer_size) SHARED_REQUIRES(Locks::mutator_lock_);
-
-  void SetImt(ImTable* imt, size_t pointer_size) SHARED_REQUIRES(Locks::mutator_lock_);
-
   ArtMethod* GetEmbeddedVTableEntry(uint32_t i, size_t pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -867,7 +870,7 @@
   inline void SetEmbeddedVTableEntryUnchecked(uint32_t i, ArtMethod* method, size_t pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  void PopulateEmbeddedVTable(size_t pointer_size)
+  void PopulateEmbeddedImtAndVTable(ArtMethod* const (&methods)[kImtSize], size_t pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Given a method implemented by this class but potentially from a super class, return the
@@ -1192,7 +1195,7 @@
   void AssertInitializedOrInitializingInThread(Thread* self)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  Class* CopyOf(Thread* self, int32_t new_length, ImTable* imt,
+  Class* CopyOf(Thread* self, int32_t new_length, ArtMethod* const (&imt)[mirror::Class::kImtSize],
                 size_t pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_);
 
@@ -1319,7 +1322,10 @@
 
   // Check that the pointer size matches the one in the class linker.
   ALWAYS_INLINE static void CheckPointerSize(size_t pointer_size);
+
+  static MemberOffset EmbeddedImTableOffset(size_t pointer_size);
   static MemberOffset EmbeddedVTableOffset(size_t pointer_size);
+
   template <bool kVisitNativeRoots,
             VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
             ReadBarrierOption kReadBarrierOption = kWithReadBarrier,
diff --git a/runtime/mirror/string-inl.h b/runtime/mirror/string-inl.h
index cf9b8eb..96f2098 100644
--- a/runtime/mirror/string-inl.h
+++ b/runtime/mirror/string-inl.h
@@ -34,7 +34,7 @@
 namespace mirror {
 
 inline uint32_t String::ClassSize(size_t pointer_size) {
-  uint32_t vtable_entries = Object::kVTableLength + 56;
+  uint32_t vtable_entries = Object::kVTableLength + 57;
   return Class::ComputeClassSize(true, vtable_entries, 0, 0, 0, 1, 2, pointer_size);
 }
 
diff --git a/runtime/thread.h b/runtime/thread.h
index 9b6a20e..ab24625 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -1060,6 +1060,8 @@
     return tlsPtr_.mterp_alt_ibase;
   }
 
+  // Notify that a signal is being handled. This is to protect us from doing recursive
+  // NPE handling after a SIGSEGV.
   void NoteSignalBeingHandled() {
     if (tls32_.handling_signal_) {
       LOG(FATAL) << "Detected signal while processing a signal";
diff --git a/test/004-ThreadStress/check b/test/004-ThreadStress/check
index ffbb8cf..77e4cdb 100755
--- a/test/004-ThreadStress/check
+++ b/test/004-ThreadStress/check
@@ -14,5 +14,5 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# Only compare the last line.
-tail -n 1 "$2" | diff --strip-trailing-cr -q "$1" - >/dev/null
\ No newline at end of file
+# Do not compare numbers, so replace numbers with 'N'.
+sed '-es/[0-9][0-9]*/N/g' "$2" | diff --strip-trailing-cr -q "$1" - >/dev/null
\ No newline at end of file
diff --git a/test/004-ThreadStress/expected.txt b/test/004-ThreadStress/expected.txt
index a26fb4f..772faf6 100644
--- a/test/004-ThreadStress/expected.txt
+++ b/test/004-ThreadStress/expected.txt
@@ -1 +1,11 @@
+JNI_OnLoad called
+Starting worker for N
+Starting worker for N
+Starting worker for N
+Starting worker for N
+Starting worker for N
+Finishing worker
+Finishing worker
+Finishing worker
+Finishing worker
 Finishing worker
diff --git a/test/004-ThreadStress/src/Main.java b/test/004-ThreadStress/src/Main.java
index acd8e8b..5cae398 100644
--- a/test/004-ThreadStress/src/Main.java
+++ b/test/004-ThreadStress/src/Main.java
@@ -93,9 +93,7 @@
 
                 killTemp = osClass.getDeclaredMethod("kill", int.class, int.class);
             } catch (Exception e) {
-                if (!e.getClass().getName().equals("ErrnoException")) {
-                    e.printStackTrace(System.out);
-                }
+                Main.printThrowable(e);
             }
 
             pid = pidTemp;
@@ -109,8 +107,8 @@
                 kill.invoke(null, pid, sigquit);
             } catch (OutOfMemoryError e) {
             } catch (Exception e) {
-                if (!e.getClass().getName().equals("ErrnoException")) {
-                    e.printStackTrace(System.out);
+                if (!e.getClass().getName().equals(Main.errnoExceptionName)) {
+                    Main.printThrowable(e);
                 }
             }
             return true;
@@ -268,6 +266,7 @@
     }
 
     public static void main(String[] args) throws Exception {
+        System.loadLibrary(args[0]);
         parseAndRun(args);
     }
 
@@ -399,12 +398,21 @@
             System.out.println(frequencyMap);
         }
 
-        runTest(numberOfThreads, numberOfDaemons, operationsPerThread, lock, frequencyMap);
+        try {
+            runTest(numberOfThreads, numberOfDaemons, operationsPerThread, lock, frequencyMap);
+        } catch (Throwable t) {
+            // In this case, the output should not contain all the required
+            // "Finishing worker" lines.
+            Main.printThrowable(t);
+        }
     }
 
     public static void runTest(final int numberOfThreads, final int numberOfDaemons,
                                final int operationsPerThread, final Object lock,
                                Map<Operation, Double> frequencyMap) throws Exception {
+        final Thread mainThread = Thread.currentThread();
+        final Barrier startBarrier = new Barrier(numberOfThreads + numberOfDaemons + 1);
+
         // Each normal thread is going to do operationsPerThread
         // operations. Each daemon thread will loop over all
         // the operations and will not stop.
@@ -438,8 +446,9 @@
             }
             // Randomize the operation order
             Collections.shuffle(Arrays.asList(operations));
-            threadStresses[t] = t < numberOfThreads ? new Main(lock, t, operations) :
-                                                      new Daemon(lock, t, operations);
+            threadStresses[t] = (t < numberOfThreads)
+                    ? new Main(lock, t, operations)
+                    : new Daemon(lock, t, operations, mainThread, startBarrier);
         }
 
         // Enable to dump operation counts per thread to make sure its
@@ -474,32 +483,41 @@
             runners[r] = new Thread("Runner thread " + r) {
                 final Main threadStress = ts;
                 public void run() {
-                    int id = threadStress.id;
-                    System.out.println("Starting worker for " + id);
-                    while (threadStress.nextOperation < operationsPerThread) {
-                        try {
-                            Thread thread = new Thread(ts, "Worker thread " + id);
-                            thread.start();
+                    try {
+                        int id = threadStress.id;
+                        // No memory hungry task are running yet, so println() should succeed.
+                        System.out.println("Starting worker for " + id);
+                        // Wait until all runners and daemons reach the starting point.
+                        startBarrier.await();
+                        // Run the stress tasks.
+                        while (threadStress.nextOperation < operationsPerThread) {
                             try {
+                                Thread thread = new Thread(ts, "Worker thread " + id);
+                                thread.start();
                                 thread.join();
-                            } catch (InterruptedException e) {
-                            }
 
-                            System.out.println("Thread exited for " + id + " with "
-                                               + (operationsPerThread - threadStress.nextOperation)
-                                               + " operations remaining.");
-                        } catch (OutOfMemoryError e) {
-                            // Ignore OOME since we need to print "Finishing worker" for the test
-                            // to pass.
+                                if (DEBUG) {
+                                    System.out.println(
+                                        "Thread exited for " + id + " with " +
+                                        (operationsPerThread - threadStress.nextOperation) +
+                                        " operations remaining.");
+                                }
+                            } catch (OutOfMemoryError e) {
+                                // Ignore OOME since we need to print "Finishing worker"
+                                // for the test to pass. This OOM can come from creating
+                                // the Thread or from the DEBUG output.
+                                // Note that the Thread creation may fail repeatedly,
+                                // preventing the runner from making any progress,
+                                // especially if the number of daemons is too high.
+                            }
                         }
-                    }
-                    // Keep trying to print "Finishing worker" until it succeeds.
-                    while (true) {
-                        try {
-                            System.out.println("Finishing worker");
-                            break;
-                        } catch (OutOfMemoryError e) {
-                        }
+                        // Print "Finishing worker" through JNI to avoid OOME.
+                        Main.printString(Main.finishingWorkerMessage);
+                    } catch (Throwable t) {
+                        Main.printThrowable(t);
+                        // Interrupt the main thread, so that it can orderly shut down
+                        // instead of waiting indefinitely for some Barrier.
+                        mainThread.interrupt();
                     }
                 }
             };
@@ -532,6 +550,9 @@
         for (int r = 0; r < runners.length; r++) {
             runners[r].start();
         }
+        // Wait for all threads to reach the starting point.
+        startBarrier.await();
+        // Wait for runners to finish.
         for (int r = 0; r < runners.length; r++) {
             runners[r].join();
         }
@@ -574,8 +595,14 @@
     }
 
     private static class Daemon extends Main {
-        private Daemon(Object lock, int id, Operation[] operations) {
+        private Daemon(Object lock,
+                       int id,
+                       Operation[] operations,
+                       Thread mainThread,
+                       Barrier startBarrier) {
             super(lock, id, operations);
+            this.mainThread = mainThread;
+            this.startBarrier = startBarrier;
         }
 
         public void run() {
@@ -583,26 +610,74 @@
                 if (DEBUG) {
                     System.out.println("Starting ThreadStress Daemon " + id);
                 }
-                int i = 0;
-                while (true) {
-                    Operation operation = operations[i];
-                    if (DEBUG) {
-                        System.out.println("ThreadStress Daemon " + id
-                                           + " operation " + i
-                                           + " is " + operation);
+                startBarrier.await();
+                try {
+                    int i = 0;
+                    while (true) {
+                        Operation operation = operations[i];
+                        if (DEBUG) {
+                            System.out.println("ThreadStress Daemon " + id
+                                               + " operation " + i
+                                               + " is " + operation);
+                        }
+                        operation.perform();
+                        i = (i + 1) % operations.length;
                     }
-                    operation.perform();
-                    i = (i + 1) % operations.length;
+                } catch (OutOfMemoryError e) {
+                    // Catch OutOfMemoryErrors since these can cause the test to fail it they print
+                    // the stack trace after "Finishing worker". Note that operations should catch
+                    // their own OOME, this guards only agains OOME in the DEBUG output.
                 }
-            } catch (OutOfMemoryError e) {
-                // Catch OutOfMemoryErrors since these can cause the test to fail it they print
-                // the stack trace after "Finishing worker".
-            } finally {
                 if (DEBUG) {
                     System.out.println("Finishing ThreadStress Daemon for " + id);
                 }
+            } catch (Throwable t) {
+                Main.printThrowable(t);
+                // Interrupt the main thread, so that it can orderly shut down
+                // instead of waiting indefinitely for some Barrier.
+                mainThread.interrupt();
             }
         }
+
+        final Thread mainThread;
+        final Barrier startBarrier;
     }
 
+    // Note: java.util.concurrent.CyclicBarrier.await() allocates memory and may throw OOM.
+    // That is highly undesirable in this test, so we use our own simple barrier class.
+    // The only memory allocation that can happen here is the lock inflation which uses
+    // a native allocation. As such, it should succeed even if the Java heap is full.
+    // If the native allocation surprisingly fails, the program shall abort().
+    private static class Barrier {
+        public Barrier(int initialCount) {
+            count = initialCount;
+        }
+
+        public synchronized void await() throws InterruptedException {
+            --count;
+            if (count != 0) {
+                do {
+                    wait();
+                } while (count != 0);  // Check for spurious wakeup.
+            } else {
+                notifyAll();
+            }
+        }
+
+        private int count;
+    }
+
+    // Printing a String/Throwable through JNI requires only native memory and space
+    // in the local reference table, so it should succeed even if the Java heap is full.
+    private static native void printString(String s);
+    private static native void printThrowable(Throwable t);
+
+    static final String finishingWorkerMessage;
+    static final String errnoExceptionName;
+    static {
+        // We pre-allocate the strings in class initializer to avoid const-string
+        // instructions in code using these strings later as they may throw OOME.
+        finishingWorkerMessage = "Finishing worker\n";
+        errnoExceptionName = "ErrnoException";
+    }
 }
diff --git a/test/004-ThreadStress/thread_stress.cc b/test/004-ThreadStress/thread_stress.cc
new file mode 100644
index 0000000..573c352
--- /dev/null
+++ b/test/004-ThreadStress/thread_stress.cc
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <iostream>
+
+#include "jni.h"
+#include "mirror/string.h"
+#include "mirror/throwable.h"
+#include "scoped_thread_state_change.h"
+
+namespace art {
+
+extern "C" JNIEXPORT void JNICALL Java_Main_printString(JNIEnv*, jclass, jstring s) {
+  ScopedObjectAccess soa(Thread::Current());
+  std::cout << soa.Decode<mirror::String*>(s)->ToModifiedUtf8();
+}
+
+extern "C" JNIEXPORT void JNICALL Java_Main_printThrowable(JNIEnv*, jclass, jthrowable t) {
+  ScopedObjectAccess soa(Thread::Current());
+  std::cout << soa.Decode<mirror::Throwable*>(t)->Dump();
+}
+
+}  // namespace art
diff --git a/test/004-UnsafeTest/src/Main.java b/test/004-UnsafeTest/src/Main.java
index b2f905e..9d4618a 100644
--- a/test/004-UnsafeTest/src/Main.java
+++ b/test/004-UnsafeTest/src/Main.java
@@ -39,16 +39,24 @@
     }
   }
 
-  private static Unsafe getUnsafe() throws Exception {
+  private static Unsafe getUnsafe() throws NoSuchFieldException, IllegalAccessException {
     Class<?> unsafeClass = Unsafe.class;
     Field f = unsafeClass.getDeclaredField("theUnsafe");
     f.setAccessible(true);
     return (Unsafe) f.get(null);
   }
 
-  public static void main(String[] args) throws Exception {
+  public static void main(String[] args) throws NoSuchFieldException, IllegalAccessException {
     System.loadLibrary(args[0]);
     Unsafe unsafe = getUnsafe();
+
+    testArrayBaseOffset(unsafe);
+    testArrayIndexScale(unsafe);
+    testGetAndPutAndCAS(unsafe);
+    testGetAndPutVolatile(unsafe);
+  }
+
+  private static void testArrayBaseOffset(Unsafe unsafe) {
     check(unsafe.arrayBaseOffset(boolean[].class), vmArrayBaseOffset(boolean[].class),
         "Unsafe.arrayBaseOffset(boolean[])");
     check(unsafe.arrayBaseOffset(byte[].class), vmArrayBaseOffset(byte[].class),
@@ -65,7 +73,9 @@
         "Unsafe.arrayBaseOffset(long[])");
     check(unsafe.arrayBaseOffset(Object[].class), vmArrayBaseOffset(Object[].class),
         "Unsafe.arrayBaseOffset(Object[])");
+  }
 
+  private static void testArrayIndexScale(Unsafe unsafe) {
     check(unsafe.arrayIndexScale(boolean[].class), vmArrayIndexScale(boolean[].class),
         "Unsafe.arrayIndexScale(boolean[])");
     check(unsafe.arrayIndexScale(byte[].class), vmArrayIndexScale(byte[].class),
@@ -82,7 +92,9 @@
         "Unsafe.arrayIndexScale(long[])");
     check(unsafe.arrayIndexScale(Object[].class), vmArrayIndexScale(Object[].class),
         "Unsafe.arrayIndexScale(Object[])");
+  }
 
+  private static void testGetAndPutAndCAS(Unsafe unsafe) throws NoSuchFieldException {
     TestClass t = new TestClass();
 
     int intValue = 12345678;
@@ -185,12 +197,58 @@
     }
   }
 
+  private static void testGetAndPutVolatile(Unsafe unsafe) throws NoSuchFieldException {
+    TestVolatileClass tv = new TestVolatileClass();
+
+    int intValue = 12345678;
+    Field volatileIntField = TestVolatileClass.class.getDeclaredField("volatileIntVar");
+    long volatileIntOffset = unsafe.objectFieldOffset(volatileIntField);
+    check(unsafe.getIntVolatile(tv, volatileIntOffset),
+          0,
+          "Unsafe.getIntVolatile(Object, long) - initial");
+    unsafe.putIntVolatile(tv, volatileIntOffset, intValue);
+    check(tv.volatileIntVar, intValue, "Unsafe.putIntVolatile(Object, long, int)");
+    check(unsafe.getIntVolatile(tv, volatileIntOffset),
+          intValue,
+          "Unsafe.getIntVolatile(Object, long)");
+
+    long longValue = 1234567887654321L;
+    Field volatileLongField = TestVolatileClass.class.getDeclaredField("volatileLongVar");
+    long volatileLongOffset = unsafe.objectFieldOffset(volatileLongField);
+    check(unsafe.getLongVolatile(tv, volatileLongOffset),
+          0,
+          "Unsafe.getLongVolatile(Object, long) - initial");
+    unsafe.putLongVolatile(tv, volatileLongOffset, longValue);
+    check(tv.volatileLongVar, longValue, "Unsafe.putLongVolatile(Object, long, long)");
+    check(unsafe.getLongVolatile(tv, volatileLongOffset),
+          longValue,
+          "Unsafe.getLongVolatile(Object, long)");
+
+    Object objectValue = new Object();
+    Field volatileObjectField = TestVolatileClass.class.getDeclaredField("volatileObjectVar");
+    long volatileObjectOffset = unsafe.objectFieldOffset(volatileObjectField);
+    check(unsafe.getObjectVolatile(tv, volatileObjectOffset),
+          null,
+          "Unsafe.getObjectVolatile(Object, long) - initial");
+    unsafe.putObjectVolatile(tv, volatileObjectOffset, objectValue);
+    check(tv.volatileObjectVar, objectValue, "Unsafe.putObjectVolatile(Object, long, Object)");
+    check(unsafe.getObjectVolatile(tv, volatileObjectOffset),
+          objectValue,
+          "Unsafe.getObjectVolatile(Object, long)");
+  }
+
   private static class TestClass {
     public int intVar = 0;
     public long longVar = 0;
     public Object objectVar = null;
   }
 
+  private static class TestVolatileClass {
+    public volatile int volatileIntVar = 0;
+    public volatile long volatileLongVar = 0;
+    public volatile Object volatileObjectVar = null;
+  }
+
   private static native int vmArrayBaseOffset(Class clazz);
   private static native int vmArrayIndexScale(Class clazz);
 }
diff --git a/test/100-reflect2/expected.txt b/test/100-reflect2/expected.txt
index d878e69..dd89d64 100644
--- a/test/100-reflect2/expected.txt
+++ b/test/100-reflect2/expected.txt
@@ -33,7 +33,7 @@
 14 (class java.lang.Short)
 [java.lang.String(int,int,char[]), public java.lang.String(), public java.lang.String(byte[]), public java.lang.String(byte[],int), public java.lang.String(byte[],int,int), public java.lang.String(byte[],int,int,int), public java.lang.String(byte[],int,int,java.lang.String) throws java.io.UnsupportedEncodingException, public java.lang.String(byte[],int,int,java.nio.charset.Charset), public java.lang.String(byte[],java.lang.String) throws java.io.UnsupportedEncodingException, public java.lang.String(byte[],java.nio.charset.Charset), public java.lang.String(char[]), public java.lang.String(char[],int,int), public java.lang.String(int[],int,int), public java.lang.String(java.lang.String), public java.lang.String(java.lang.StringBuffer), public java.lang.String(java.lang.StringBuilder)]
 [private final int java.lang.String.count, private int java.lang.String.hash, private static final java.io.ObjectStreamField[] java.lang.String.serialPersistentFields, private static final long java.lang.String.serialVersionUID, public static final java.util.Comparator java.lang.String.CASE_INSENSITIVE_ORDER]
-[native void java.lang.String.getCharsNoCheck(int,int,char[],int), native void java.lang.String.setCharAt(int,char), private int java.lang.String.indexOfSupplementary(int,int), private int java.lang.String.lastIndexOfSupplementary(int,int), private native int java.lang.String.fastIndexOf(int,int), private native java.lang.String java.lang.String.fastSubstring(int,int), public boolean java.lang.String.contains(java.lang.CharSequence), public boolean java.lang.String.contentEquals(java.lang.CharSequence), public boolean java.lang.String.contentEquals(java.lang.StringBuffer), public boolean java.lang.String.endsWith(java.lang.String), public boolean java.lang.String.equals(java.lang.Object), public boolean java.lang.String.equalsIgnoreCase(java.lang.String), public boolean java.lang.String.isEmpty(), public boolean java.lang.String.matches(java.lang.String), public boolean java.lang.String.regionMatches(boolean,int,java.lang.String,int,int), public boolean java.lang.String.regionMatches(int,java.lang.String,int,int), public boolean java.lang.String.startsWith(java.lang.String), public boolean java.lang.String.startsWith(java.lang.String,int), public byte[] java.lang.String.getBytes(), public byte[] java.lang.String.getBytes(java.lang.String) throws java.io.UnsupportedEncodingException, public byte[] java.lang.String.getBytes(java.nio.charset.Charset), public int java.lang.String.codePointAt(int), public int java.lang.String.codePointBefore(int), public int java.lang.String.codePointCount(int,int), public int java.lang.String.compareTo(java.lang.Object), public int java.lang.String.compareToIgnoreCase(java.lang.String), public int java.lang.String.hashCode(), public int java.lang.String.indexOf(int), public int java.lang.String.indexOf(int,int), public int java.lang.String.indexOf(java.lang.String), public int java.lang.String.indexOf(java.lang.String,int), public int java.lang.String.lastIndexOf(int), public int java.lang.String.lastIndexOf(int,int), public int java.lang.String.lastIndexOf(java.lang.String), public int java.lang.String.lastIndexOf(java.lang.String,int), public int java.lang.String.length(), public int java.lang.String.offsetByCodePoints(int,int), public java.lang.CharSequence java.lang.String.subSequence(int,int), public java.lang.String java.lang.String.replace(char,char), public java.lang.String java.lang.String.replace(java.lang.CharSequence,java.lang.CharSequence), public java.lang.String java.lang.String.replaceAll(java.lang.String,java.lang.String), public java.lang.String java.lang.String.replaceFirst(java.lang.String,java.lang.String), public java.lang.String java.lang.String.substring(int), public java.lang.String java.lang.String.substring(int,int), public java.lang.String java.lang.String.toLowerCase(), public java.lang.String java.lang.String.toLowerCase(java.util.Locale), public java.lang.String java.lang.String.toString(), public java.lang.String java.lang.String.toUpperCase(), public java.lang.String java.lang.String.toUpperCase(java.util.Locale), public java.lang.String java.lang.String.trim(), public java.lang.String[] java.lang.String.split(java.lang.String), public java.lang.String[] java.lang.String.split(java.lang.String,int), public native char java.lang.String.charAt(int), public native char[] java.lang.String.toCharArray(), public native int java.lang.String.compareTo(java.lang.String), public native java.lang.String java.lang.String.concat(java.lang.String), public native java.lang.String java.lang.String.intern(), public static java.lang.String java.lang.String.copyValueOf(char[]), public static java.lang.String java.lang.String.copyValueOf(char[],int,int), public static java.lang.String java.lang.String.format(java.lang.String,java.lang.Object[]), public static java.lang.String java.lang.String.format(java.util.Locale,java.lang.String,java.lang.Object[]), public static java.lang.String java.lang.String.valueOf(boolean), public static java.lang.String java.lang.String.valueOf(char), public static java.lang.String java.lang.String.valueOf(char[]), public static java.lang.String java.lang.String.valueOf(char[],int,int), public static java.lang.String java.lang.String.valueOf(double), public static java.lang.String java.lang.String.valueOf(float), public static java.lang.String java.lang.String.valueOf(int), public static java.lang.String java.lang.String.valueOf(java.lang.Object), public static java.lang.String java.lang.String.valueOf(long), public void java.lang.String.getBytes(int,int,byte[],int), public void java.lang.String.getChars(int,int,char[],int), static int java.lang.String.indexOf(char[],int,int,char[],int,int,int), static int java.lang.String.indexOf(java.lang.String,java.lang.String,int), static int java.lang.String.lastIndexOf(char[],int,int,char[],int,int,int), static int java.lang.String.lastIndexOf(java.lang.String,java.lang.String,int)]
+[native void java.lang.String.getCharsNoCheck(int,int,char[],int), native void java.lang.String.setCharAt(int,char), private boolean java.lang.String.nonSyncContentEquals(java.lang.AbstractStringBuilder), private int java.lang.String.indexOfSupplementary(int,int), private int java.lang.String.lastIndexOfSupplementary(int,int), private native int java.lang.String.fastIndexOf(int,int), private native java.lang.String java.lang.String.fastSubstring(int,int), public boolean java.lang.String.contains(java.lang.CharSequence), public boolean java.lang.String.contentEquals(java.lang.CharSequence), public boolean java.lang.String.contentEquals(java.lang.StringBuffer), public boolean java.lang.String.endsWith(java.lang.String), public boolean java.lang.String.equals(java.lang.Object), public boolean java.lang.String.equalsIgnoreCase(java.lang.String), public boolean java.lang.String.isEmpty(), public boolean java.lang.String.matches(java.lang.String), public boolean java.lang.String.regionMatches(boolean,int,java.lang.String,int,int), public boolean java.lang.String.regionMatches(int,java.lang.String,int,int), public boolean java.lang.String.startsWith(java.lang.String), public boolean java.lang.String.startsWith(java.lang.String,int), public byte[] java.lang.String.getBytes(), public byte[] java.lang.String.getBytes(java.lang.String) throws java.io.UnsupportedEncodingException, public byte[] java.lang.String.getBytes(java.nio.charset.Charset), public int java.lang.String.codePointAt(int), public int java.lang.String.codePointBefore(int), public int java.lang.String.codePointCount(int,int), public int java.lang.String.compareTo(java.lang.Object), public int java.lang.String.compareToIgnoreCase(java.lang.String), public int java.lang.String.hashCode(), public int java.lang.String.indexOf(int), public int java.lang.String.indexOf(int,int), public int java.lang.String.indexOf(java.lang.String), public int java.lang.String.indexOf(java.lang.String,int), public int java.lang.String.lastIndexOf(int), public int java.lang.String.lastIndexOf(int,int), public int java.lang.String.lastIndexOf(java.lang.String), public int java.lang.String.lastIndexOf(java.lang.String,int), public int java.lang.String.length(), public int java.lang.String.offsetByCodePoints(int,int), public java.lang.CharSequence java.lang.String.subSequence(int,int), public java.lang.String java.lang.String.replace(char,char), public java.lang.String java.lang.String.replace(java.lang.CharSequence,java.lang.CharSequence), public java.lang.String java.lang.String.replaceAll(java.lang.String,java.lang.String), public java.lang.String java.lang.String.replaceFirst(java.lang.String,java.lang.String), public java.lang.String java.lang.String.substring(int), public java.lang.String java.lang.String.substring(int,int), public java.lang.String java.lang.String.toLowerCase(), public java.lang.String java.lang.String.toLowerCase(java.util.Locale), public java.lang.String java.lang.String.toString(), public java.lang.String java.lang.String.toUpperCase(), public java.lang.String java.lang.String.toUpperCase(java.util.Locale), public java.lang.String java.lang.String.trim(), public java.lang.String[] java.lang.String.split(java.lang.String), public java.lang.String[] java.lang.String.split(java.lang.String,int), public native char java.lang.String.charAt(int), public native char[] java.lang.String.toCharArray(), public native int java.lang.String.compareTo(java.lang.String), public native java.lang.String java.lang.String.concat(java.lang.String), public native java.lang.String java.lang.String.intern(), public static java.lang.String java.lang.String.copyValueOf(char[]), public static java.lang.String java.lang.String.copyValueOf(char[],int,int), public static java.lang.String java.lang.String.format(java.lang.String,java.lang.Object[]), public static java.lang.String java.lang.String.format(java.util.Locale,java.lang.String,java.lang.Object[]), public static java.lang.String java.lang.String.join(java.lang.CharSequence,java.lang.CharSequence[]), public static java.lang.String java.lang.String.join(java.lang.CharSequence,java.lang.Iterable), public static java.lang.String java.lang.String.valueOf(boolean), public static java.lang.String java.lang.String.valueOf(char), public static java.lang.String java.lang.String.valueOf(char[]), public static java.lang.String java.lang.String.valueOf(char[],int,int), public static java.lang.String java.lang.String.valueOf(double), public static java.lang.String java.lang.String.valueOf(float), public static java.lang.String java.lang.String.valueOf(int), public static java.lang.String java.lang.String.valueOf(java.lang.Object), public static java.lang.String java.lang.String.valueOf(long), public void java.lang.String.getBytes(int,int,byte[],int), public void java.lang.String.getChars(int,int,char[],int), static int java.lang.String.indexOf(char[],int,int,char[],int,int,int), static int java.lang.String.indexOf(java.lang.String,java.lang.String,int), static int java.lang.String.lastIndexOf(char[],int,int,char[],int,int,int), static int java.lang.String.lastIndexOf(java.lang.String,java.lang.String,int), void java.lang.String.getChars(char[],int)]
 []
 [interface java.io.Serializable, interface java.lang.Comparable, interface java.lang.CharSequence]
 0
diff --git a/test/201-built-in-exception-detail-messages/src/Main.java b/test/201-built-in-exception-detail-messages/src/Main.java
index 52d4259..f0bb6dd 100644
--- a/test/201-built-in-exception-detail-messages/src/Main.java
+++ b/test/201-built-in-exception-detail-messages/src/Main.java
@@ -461,7 +461,7 @@
       "hello there".substring(9,14);
       fail();
     } catch (StringIndexOutOfBoundsException ex) {
-      assertEquals("length=11; regionStart=9; regionLength=5", ex.getMessage());
+      assertEquals("length=11; index=14", ex.getMessage());
     }
   }
 }
diff --git a/test/450-checker-types/src/Main.java b/test/450-checker-types/src/Main.java
index 08b6cec..36f14d8 100644
--- a/test/450-checker-types/src/Main.java
+++ b/test/450-checker-types/src/Main.java
@@ -30,6 +30,11 @@
   public void $noinline$f() {
     throw new RuntimeException();
   }
+
+  public int $inline$h(boolean cond) {
+    Super obj = (cond ? this : null);
+    return obj.hashCode();
+  }
 }
 
 class SubclassA extends Super {
@@ -620,6 +625,46 @@
     o.mainField = 0;
   }
 
+  /// CHECK-START: void Main.testThisArgumentMoreSpecific(boolean) inliner (before)
+  /// CHECK-DAG:     <<Arg:l\d+>>   NewInstance
+  /// CHECK-DAG:                    InvokeVirtual [<<Arg>>,{{z\d+}}] method_name:Super.$inline$h
+
+  /// CHECK-START: void Main.testThisArgumentMoreSpecific(boolean) inliner (after)
+  /// CHECK-DAG:     <<Arg:l\d+>>   NewInstance
+  /// CHECK-DAG:     <<Null:l\d+>>  NullConstant
+  /// CHECK-DAG:     <<Phi:l\d+>>   Phi [<<Arg>>,<<Null>>] klass:SubclassA
+  /// CHECK-DAG:     <<NCPhi:l\d+>> NullCheck [<<Phi>>]
+  /// CHECK-DAG:                    InvokeVirtual [<<NCPhi>>] method_name:Super.hashCode
+
+  public void testThisArgumentMoreSpecific(boolean cond) {
+    // Inlining method from Super will build it with `this` typed as Super.
+    // Running RTP will sharpen it to SubclassA.
+    SubclassA obj = new SubclassA();
+    ((Super) obj).$inline$h(cond);
+  }
+
+  public static int $inline$hashCode(Super obj) {
+    return obj.hashCode();
+  }
+
+  /// CHECK-START: void Main.testExplicitArgumentMoreSpecific(SubclassA) inliner (before)
+  /// CHECK-DAG:     <<Arg:l\d+>>   ParameterValue klass:SubclassA
+  // Note: The ArtMethod* (typed as int or long) is optional after sharpening.
+  /// CHECK-DAG:                    InvokeStaticOrDirect [<<Arg>>{{(,[ij]\d+)?}}] method_name:Main.$inline$hashCode
+
+  /// CHECK-START: void Main.testExplicitArgumentMoreSpecific(SubclassA) inliner (after)
+  /// CHECK-DAG:     <<Arg:l\d+>>   ParameterValue klass:SubclassA
+  /// CHECK-DAG:     <<NCArg:l\d+>> NullCheck [<<Arg>>] klass:SubclassA
+  /// CHECK-DAG:                    InvokeVirtual [<<NCArg>>] method_name:Super.hashCode
+
+  public void testExplicitArgumentMoreSpecific(SubclassA obj) {
+    // Inlining a method will build it with reference types from its signature,
+    // here the callee graph is built with Super as the type of its only argument.
+    // Running RTP after its ParameterValue instructions are replaced with actual
+    // arguments will type the inner graph more precisely.
+    $inline$hashCode(obj);
+  }
+
   /// CHECK-START: void Main.testPhiHasOnlyNullInputs(boolean) inliner (before)
   /// CHECK:      <<Int:i\d+>>       IntConstant 0
   /// CHECK:      <<Phi:l\d+>>       Phi klass:Main exact:false
diff --git a/test/458-checker-instruction-simplification/src/Main.java b/test/458-checker-instruction-simplification/src/Main.java
index 53c2e0b..ffce49d 100644
--- a/test/458-checker-instruction-simplification/src/Main.java
+++ b/test/458-checker-instruction-simplification/src/Main.java
@@ -18,6 +18,8 @@
 
 public class Main {
 
+  static boolean doThrow = false;
+
   public static void assertBooleanEquals(boolean expected, boolean result) {
     if (expected != result) {
       throw new Error("Expected: " + expected + ", found: " + result);
@@ -58,41 +60,43 @@
    * Tiny programs exercising optimizations of arithmetic identities.
    */
 
-  /// CHECK-START: long Main.Add0(long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$Add0(long) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:j\d+>>     ParameterValue
   /// CHECK-DAG:     <<Const0:j\d+>>  LongConstant 0
   /// CHECK-DAG:     <<Add:j\d+>>     Add [<<Const0>>,<<Arg>>]
   /// CHECK-DAG:                      Return [<<Add>>]
 
-  /// CHECK-START: long Main.Add0(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$Add0(long) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:j\d+>>     ParameterValue
   /// CHECK-DAG:                      Return [<<Arg>>]
 
-  /// CHECK-START: long Main.Add0(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$Add0(long) instruction_simplifier (after)
   /// CHECK-NOT:                        Add
 
-  public static long Add0(long arg) {
+  public static long $noinline$Add0(long arg) {
+    if (doThrow) { throw new Error(); }
     return 0 + arg;
   }
 
-  /// CHECK-START: int Main.AndAllOnes(int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$AndAllOnes(int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<ConstF:i\d+>>  IntConstant -1
   /// CHECK-DAG:     <<And:i\d+>>     And [<<Arg>>,<<ConstF>>]
   /// CHECK-DAG:                      Return [<<And>>]
 
-  /// CHECK-START: int Main.AndAllOnes(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$AndAllOnes(int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:i\d+>>     ParameterValue
   /// CHECK-DAG:                      Return [<<Arg>>]
 
-  /// CHECK-START: int Main.AndAllOnes(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$AndAllOnes(int) instruction_simplifier (after)
   /// CHECK-NOT:                      And
 
-  public static int AndAllOnes(int arg) {
+  public static int $noinline$AndAllOnes(int arg) {
+    if (doThrow) { throw new Error(); }
     return arg & -1;
   }
 
-  /// CHECK-START: int Main.UShr28And15(int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$UShr28And15(int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const28:i\d+>>  IntConstant 28
   /// CHECK-DAG:     <<Const15:i\d+>>  IntConstant 15
@@ -100,20 +104,21 @@
   /// CHECK-DAG:     <<And:i\d+>>      And [<<UShr>>,<<Const15>>]
   /// CHECK-DAG:                       Return [<<And>>]
 
-  /// CHECK-START: int Main.UShr28And15(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$UShr28And15(int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const28:i\d+>>  IntConstant 28
   /// CHECK-DAG:     <<UShr:i\d+>>     UShr [<<Arg>>,<<Const28>>]
   /// CHECK-DAG:                       Return [<<UShr>>]
 
-  /// CHECK-START: int Main.UShr28And15(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$UShr28And15(int) instruction_simplifier (after)
   /// CHECK-NOT:                       And
 
-  public static int UShr28And15(int arg) {
+  public static int $noinline$UShr28And15(int arg) {
+    if (doThrow) { throw new Error(); }
     return (arg >>> 28) & 15;
   }
 
-  /// CHECK-START: long Main.UShr60And15(long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$UShr60And15(long) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const60:i\d+>>  IntConstant 60
   /// CHECK-DAG:     <<Const15:j\d+>>  LongConstant 15
@@ -121,20 +126,21 @@
   /// CHECK-DAG:     <<And:j\d+>>      And [<<UShr>>,<<Const15>>]
   /// CHECK-DAG:                       Return [<<And>>]
 
-  /// CHECK-START: long Main.UShr60And15(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$UShr60And15(long) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const60:i\d+>>  IntConstant 60
   /// CHECK-DAG:     <<UShr:j\d+>>     UShr [<<Arg>>,<<Const60>>]
   /// CHECK-DAG:                       Return [<<UShr>>]
 
-  /// CHECK-START: long Main.UShr60And15(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$UShr60And15(long) instruction_simplifier (after)
   /// CHECK-NOT:                       And
 
-  public static long UShr60And15(long arg) {
+  public static long $noinline$UShr60And15(long arg) {
+    if (doThrow) { throw new Error(); }
     return (arg >>> 60) & 15;
   }
 
-  /// CHECK-START: int Main.UShr28And7(int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$UShr28And7(int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const28:i\d+>>  IntConstant 28
   /// CHECK-DAG:     <<Const7:i\d+>>   IntConstant 7
@@ -142,7 +148,7 @@
   /// CHECK-DAG:     <<And:i\d+>>      And [<<UShr>>,<<Const7>>]
   /// CHECK-DAG:                       Return [<<And>>]
 
-  /// CHECK-START: int Main.UShr28And7(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$UShr28And7(int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const28:i\d+>>  IntConstant 28
   /// CHECK-DAG:     <<Const7:i\d+>>   IntConstant 7
@@ -150,11 +156,12 @@
   /// CHECK-DAG:     <<And:i\d+>>      And [<<UShr>>,<<Const7>>]
   /// CHECK-DAG:                       Return [<<And>>]
 
-  public static int UShr28And7(int arg) {
+  public static int $noinline$UShr28And7(int arg) {
+    if (doThrow) { throw new Error(); }
     return (arg >>> 28) & 7;
   }
 
-  /// CHECK-START: long Main.UShr60And7(long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$UShr60And7(long) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const60:i\d+>>  IntConstant 60
   /// CHECK-DAG:     <<Const7:j\d+>>   LongConstant 7
@@ -162,7 +169,7 @@
   /// CHECK-DAG:     <<And:j\d+>>      And [<<UShr>>,<<Const7>>]
   /// CHECK-DAG:                       Return [<<And>>]
 
-  /// CHECK-START: long Main.UShr60And7(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$UShr60And7(long) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const60:i\d+>>  IntConstant 60
   /// CHECK-DAG:     <<Const7:j\d+>>   LongConstant 7
@@ -170,11 +177,12 @@
   /// CHECK-DAG:     <<And:j\d+>>      And [<<UShr>>,<<Const7>>]
   /// CHECK-DAG:                       Return [<<And>>]
 
-  public static long UShr60And7(long arg) {
+  public static long $noinline$UShr60And7(long arg) {
+    if (doThrow) { throw new Error(); }
     return (arg >>> 60) & 7;
   }
 
-  /// CHECK-START: int Main.Shr24And255(int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$Shr24And255(int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const24:i\d+>>  IntConstant 24
   /// CHECK-DAG:     <<Const255:i\d+>> IntConstant 255
@@ -182,21 +190,22 @@
   /// CHECK-DAG:     <<And:i\d+>>      And [<<Shr>>,<<Const255>>]
   /// CHECK-DAG:                       Return [<<And>>]
 
-  /// CHECK-START: int Main.Shr24And255(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$Shr24And255(int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const24:i\d+>>  IntConstant 24
   /// CHECK-DAG:     <<UShr:i\d+>>     UShr [<<Arg>>,<<Const24>>]
   /// CHECK-DAG:                       Return [<<UShr>>]
 
-  /// CHECK-START: int Main.Shr24And255(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$Shr24And255(int) instruction_simplifier (after)
   /// CHECK-NOT:                       Shr
   /// CHECK-NOT:                       And
 
-  public static int Shr24And255(int arg) {
+  public static int $noinline$Shr24And255(int arg) {
+    if (doThrow) { throw new Error(); }
     return (arg >> 24) & 255;
   }
 
-  /// CHECK-START: long Main.Shr56And255(long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$Shr56And255(long) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const56:i\d+>>  IntConstant 56
   /// CHECK-DAG:     <<Const255:j\d+>> LongConstant 255
@@ -204,21 +213,22 @@
   /// CHECK-DAG:     <<And:j\d+>>      And [<<Shr>>,<<Const255>>]
   /// CHECK-DAG:                       Return [<<And>>]
 
-  /// CHECK-START: long Main.Shr56And255(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$Shr56And255(long) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const56:i\d+>>  IntConstant 56
   /// CHECK-DAG:     <<UShr:j\d+>>     UShr [<<Arg>>,<<Const56>>]
   /// CHECK-DAG:                       Return [<<UShr>>]
 
-  /// CHECK-START: long Main.Shr56And255(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$Shr56And255(long) instruction_simplifier (after)
   /// CHECK-NOT:                       Shr
   /// CHECK-NOT:                       And
 
-  public static long Shr56And255(long arg) {
+  public static long $noinline$Shr56And255(long arg) {
+    if (doThrow) { throw new Error(); }
     return (arg >> 56) & 255;
   }
 
-  /// CHECK-START: int Main.Shr24And127(int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$Shr24And127(int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const24:i\d+>>  IntConstant 24
   /// CHECK-DAG:     <<Const127:i\d+>> IntConstant 127
@@ -226,7 +236,7 @@
   /// CHECK-DAG:     <<And:i\d+>>      And [<<Shr>>,<<Const127>>]
   /// CHECK-DAG:                       Return [<<And>>]
 
-  /// CHECK-START: int Main.Shr24And127(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$Shr24And127(int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const24:i\d+>>  IntConstant 24
   /// CHECK-DAG:     <<Const127:i\d+>> IntConstant 127
@@ -234,11 +244,12 @@
   /// CHECK-DAG:     <<And:i\d+>>      And [<<Shr>>,<<Const127>>]
   /// CHECK-DAG:                       Return [<<And>>]
 
-  public static int Shr24And127(int arg) {
+  public static int $noinline$Shr24And127(int arg) {
+    if (doThrow) { throw new Error(); }
     return (arg >> 24) & 127;
   }
 
-  /// CHECK-START: long Main.Shr56And127(long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$Shr56And127(long) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const56:i\d+>>  IntConstant 56
   /// CHECK-DAG:     <<Const127:j\d+>> LongConstant 127
@@ -246,7 +257,7 @@
   /// CHECK-DAG:     <<And:j\d+>>      And [<<Shr>>,<<Const127>>]
   /// CHECK-DAG:                       Return [<<And>>]
 
-  /// CHECK-START: long Main.Shr56And127(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$Shr56And127(long) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const56:i\d+>>  IntConstant 56
   /// CHECK-DAG:     <<Const127:j\d+>> LongConstant 127
@@ -254,267 +265,283 @@
   /// CHECK-DAG:     <<And:j\d+>>      And [<<Shr>>,<<Const127>>]
   /// CHECK-DAG:                       Return [<<And>>]
 
-  public static long Shr56And127(long arg) {
+  public static long $noinline$Shr56And127(long arg) {
+    if (doThrow) { throw new Error(); }
     return (arg >> 56) & 127;
   }
 
-  /// CHECK-START: long Main.Div1(long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$Div1(long) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:j\d+>>     ParameterValue
   /// CHECK-DAG:     <<Const1:j\d+>>  LongConstant 1
   /// CHECK-DAG:     <<Div:j\d+>>     Div [<<Arg>>,<<Const1>>]
   /// CHECK-DAG:                      Return [<<Div>>]
 
-  /// CHECK-START: long Main.Div1(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$Div1(long) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:j\d+>>     ParameterValue
   /// CHECK-DAG:                      Return [<<Arg>>]
 
-  /// CHECK-START: long Main.Div1(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$Div1(long) instruction_simplifier (after)
   /// CHECK-NOT:                      Div
 
-  public static long Div1(long arg) {
+  public static long $noinline$Div1(long arg) {
+    if (doThrow) { throw new Error(); }
     return arg / 1;
   }
 
-  /// CHECK-START: int Main.DivN1(int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$DivN1(int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<ConstN1:i\d+>>  IntConstant -1
   /// CHECK-DAG:     <<Div:i\d+>>      Div [<<Arg>>,<<ConstN1>>]
   /// CHECK-DAG:                       Return [<<Div>>]
 
-  /// CHECK-START: int Main.DivN1(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$DivN1(int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Neg:i\d+>>      Neg [<<Arg>>]
   /// CHECK-DAG:                       Return [<<Neg>>]
 
-  /// CHECK-START: int Main.DivN1(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$DivN1(int) instruction_simplifier (after)
   /// CHECK-NOT:                       Div
 
-  public static int DivN1(int arg) {
+  public static int $noinline$DivN1(int arg) {
+    if (doThrow) { throw new Error(); }
     return arg / -1;
   }
 
-  /// CHECK-START: long Main.Mul1(long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$Mul1(long) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:j\d+>>     ParameterValue
   /// CHECK-DAG:     <<Const1:j\d+>>  LongConstant 1
   /// CHECK-DAG:     <<Mul:j\d+>>     Mul [<<Const1>>,<<Arg>>]
   /// CHECK-DAG:                      Return [<<Mul>>]
 
-  /// CHECK-START: long Main.Mul1(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$Mul1(long) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:j\d+>>     ParameterValue
   /// CHECK-DAG:                      Return [<<Arg>>]
 
-  /// CHECK-START: long Main.Mul1(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$Mul1(long) instruction_simplifier (after)
   /// CHECK-NOT:                       Mul
 
-  public static long Mul1(long arg) {
+  public static long $noinline$Mul1(long arg) {
+    if (doThrow) { throw new Error(); }
     return arg * 1;
   }
 
-  /// CHECK-START: int Main.MulN1(int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$MulN1(int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<ConstN1:i\d+>>  IntConstant -1
   /// CHECK-DAG:     <<Mul:i\d+>>      Mul [<<Arg>>,<<ConstN1>>]
   /// CHECK-DAG:                       Return [<<Mul>>]
 
-  /// CHECK-START: int Main.MulN1(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$MulN1(int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Neg:i\d+>>      Neg [<<Arg>>]
   /// CHECK-DAG:                       Return [<<Neg>>]
 
-  /// CHECK-START: int Main.MulN1(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$MulN1(int) instruction_simplifier (after)
   /// CHECK-NOT:                       Mul
 
-  public static int MulN1(int arg) {
+  public static int $noinline$MulN1(int arg) {
+    if (doThrow) { throw new Error(); }
     return arg * -1;
   }
 
-  /// CHECK-START: long Main.MulPowerOfTwo128(long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$MulPowerOfTwo128(long) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:j\d+>>       ParameterValue
   /// CHECK-DAG:     <<Const128:j\d+>>  LongConstant 128
   /// CHECK-DAG:     <<Mul:j\d+>>       Mul [<<Const128>>,<<Arg>>]
   /// CHECK-DAG:                        Return [<<Mul>>]
 
-  /// CHECK-START: long Main.MulPowerOfTwo128(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$MulPowerOfTwo128(long) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:j\d+>>       ParameterValue
   /// CHECK-DAG:     <<Const7:i\d+>>    IntConstant 7
   /// CHECK-DAG:     <<Shl:j\d+>>       Shl [<<Arg>>,<<Const7>>]
   /// CHECK-DAG:                        Return [<<Shl>>]
 
-  /// CHECK-START: long Main.MulPowerOfTwo128(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$MulPowerOfTwo128(long) instruction_simplifier (after)
   /// CHECK-NOT:                        Mul
 
-  public static long MulPowerOfTwo128(long arg) {
+  public static long $noinline$MulPowerOfTwo128(long arg) {
+    if (doThrow) { throw new Error(); }
     return arg * 128;
   }
 
-  /// CHECK-START: int Main.Or0(int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$Or0(int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
   /// CHECK-DAG:     <<Or:i\d+>>       Or [<<Arg>>,<<Const0>>]
   /// CHECK-DAG:                       Return [<<Or>>]
 
-  /// CHECK-START: int Main.Or0(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$Or0(int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:                       Return [<<Arg>>]
 
-  /// CHECK-START: int Main.Or0(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$Or0(int) instruction_simplifier (after)
   /// CHECK-NOT:                       Or
 
-  public static int Or0(int arg) {
+  public static int $noinline$Or0(int arg) {
+    if (doThrow) { throw new Error(); }
     return arg | 0;
   }
 
-  /// CHECK-START: long Main.OrSame(long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$OrSame(long) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:j\d+>>       ParameterValue
   /// CHECK-DAG:     <<Or:j\d+>>        Or [<<Arg>>,<<Arg>>]
   /// CHECK-DAG:                        Return [<<Or>>]
 
-  /// CHECK-START: long Main.OrSame(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$OrSame(long) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:j\d+>>       ParameterValue
   /// CHECK-DAG:                        Return [<<Arg>>]
 
-  /// CHECK-START: long Main.OrSame(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$OrSame(long) instruction_simplifier (after)
   /// CHECK-NOT:                        Or
 
-  public static long OrSame(long arg) {
+  public static long $noinline$OrSame(long arg) {
+    if (doThrow) { throw new Error(); }
     return arg | arg;
   }
 
-  /// CHECK-START: int Main.Shl0(int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$Shl0(int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
   /// CHECK-DAG:     <<Shl:i\d+>>      Shl [<<Arg>>,<<Const0>>]
   /// CHECK-DAG:                       Return [<<Shl>>]
 
-  /// CHECK-START: int Main.Shl0(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$Shl0(int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:                       Return [<<Arg>>]
 
-  /// CHECK-START: int Main.Shl0(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$Shl0(int) instruction_simplifier (after)
   /// CHECK-NOT:                       Shl
 
-  public static int Shl0(int arg) {
+  public static int $noinline$Shl0(int arg) {
+    if (doThrow) { throw new Error(); }
     return arg << 0;
   }
 
-  /// CHECK-START: long Main.Shr0(long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$Shr0(long) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
   /// CHECK-DAG:     <<Shr:j\d+>>      Shr [<<Arg>>,<<Const0>>]
   /// CHECK-DAG:                       Return [<<Shr>>]
 
-  /// CHECK-START: long Main.Shr0(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$Shr0(long) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:                       Return [<<Arg>>]
 
-  /// CHECK-START: long Main.Shr0(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$Shr0(long) instruction_simplifier (after)
   /// CHECK-NOT:                       Shr
 
-  public static long Shr0(long arg) {
+  public static long $noinline$Shr0(long arg) {
+    if (doThrow) { throw new Error(); }
     return arg >> 0;
   }
 
-  /// CHECK-START: long Main.Shr64(long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$Shr64(long) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const64:i\d+>>  IntConstant 64
   /// CHECK-DAG:     <<Shr:j\d+>>      Shr [<<Arg>>,<<Const64>>]
   /// CHECK-DAG:                       Return [<<Shr>>]
 
-  /// CHECK-START: long Main.Shr64(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$Shr64(long) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:                       Return [<<Arg>>]
 
-  /// CHECK-START: long Main.Shr64(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$Shr64(long) instruction_simplifier (after)
   /// CHECK-NOT:                       Shr
 
-  public static long Shr64(long arg) {
+  public static long $noinline$Shr64(long arg) {
+    if (doThrow) { throw new Error(); }
     return arg >> 64;
   }
 
-  /// CHECK-START: long Main.Sub0(long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$Sub0(long) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const0:j\d+>>   LongConstant 0
   /// CHECK-DAG:     <<Sub:j\d+>>      Sub [<<Arg>>,<<Const0>>]
   /// CHECK-DAG:                       Return [<<Sub>>]
 
-  /// CHECK-START: long Main.Sub0(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$Sub0(long) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:                       Return [<<Arg>>]
 
-  /// CHECK-START: long Main.Sub0(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$Sub0(long) instruction_simplifier (after)
   /// CHECK-NOT:                       Sub
 
-  public static long Sub0(long arg) {
+  public static long $noinline$Sub0(long arg) {
+    if (doThrow) { throw new Error(); }
     return arg - 0;
   }
 
-  /// CHECK-START: int Main.SubAliasNeg(int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$SubAliasNeg(int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
   /// CHECK-DAG:     <<Sub:i\d+>>      Sub [<<Const0>>,<<Arg>>]
   /// CHECK-DAG:                       Return [<<Sub>>]
 
-  /// CHECK-START: int Main.SubAliasNeg(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$SubAliasNeg(int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Neg:i\d+>>      Neg [<<Arg>>]
   /// CHECK-DAG:                       Return [<<Neg>>]
 
-  /// CHECK-START: int Main.SubAliasNeg(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$SubAliasNeg(int) instruction_simplifier (after)
   /// CHECK-NOT:                       Sub
 
-  public static int SubAliasNeg(int arg) {
+  public static int $noinline$SubAliasNeg(int arg) {
+    if (doThrow) { throw new Error(); }
     return 0 - arg;
   }
 
-  /// CHECK-START: long Main.UShr0(long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$UShr0(long) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
   /// CHECK-DAG:     <<UShr:j\d+>>     UShr [<<Arg>>,<<Const0>>]
   /// CHECK-DAG:                       Return [<<UShr>>]
 
-  /// CHECK-START: long Main.UShr0(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$UShr0(long) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:                       Return [<<Arg>>]
 
-  /// CHECK-START: long Main.UShr0(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$UShr0(long) instruction_simplifier (after)
   /// CHECK-NOT:                       UShr
 
-  public static long UShr0(long arg) {
+  public static long $noinline$UShr0(long arg) {
+    if (doThrow) { throw new Error(); }
     return arg >>> 0;
   }
 
-  /// CHECK-START: int Main.Xor0(int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$Xor0(int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
   /// CHECK-DAG:     <<Xor:i\d+>>      Xor [<<Arg>>,<<Const0>>]
   /// CHECK-DAG:                       Return [<<Xor>>]
 
-  /// CHECK-START: int Main.Xor0(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$Xor0(int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:                       Return [<<Arg>>]
 
-  /// CHECK-START: int Main.Xor0(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$Xor0(int) instruction_simplifier (after)
   /// CHECK-NOT:                       Xor
 
-  public static int Xor0(int arg) {
+  public static int $noinline$Xor0(int arg) {
+    if (doThrow) { throw new Error(); }
     return arg ^ 0;
   }
 
-  /// CHECK-START: int Main.XorAllOnes(int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$XorAllOnes(int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<ConstF:i\d+>>   IntConstant -1
   /// CHECK-DAG:     <<Xor:i\d+>>      Xor [<<Arg>>,<<ConstF>>]
   /// CHECK-DAG:                       Return [<<Xor>>]
 
-  /// CHECK-START: int Main.XorAllOnes(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$XorAllOnes(int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Not:i\d+>>      Not [<<Arg>>]
   /// CHECK-DAG:                       Return [<<Not>>]
 
-  /// CHECK-START: int Main.XorAllOnes(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$XorAllOnes(int) instruction_simplifier (after)
   /// CHECK-NOT:                       Xor
 
-  public static int XorAllOnes(int arg) {
+  public static int $noinline$XorAllOnes(int arg) {
+    if (doThrow) { throw new Error(); }
     return arg ^ -1;
   }
 
@@ -525,7 +552,7 @@
    * `InstructionSimplifierVisitor::TryMoveNegOnInputsAfterBinop`.
    */
 
-  /// CHECK-START: int Main.AddNegs1(int, int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$AddNegs1(int, int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg1:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Neg1:i\d+>>     Neg [<<Arg1>>]
@@ -533,7 +560,7 @@
   /// CHECK-DAG:     <<Add:i\d+>>      Add [<<Neg1>>,<<Neg2>>]
   /// CHECK-DAG:                       Return [<<Add>>]
 
-  /// CHECK-START: int Main.AddNegs1(int, int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$AddNegs1(int, int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg1:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:i\d+>>     ParameterValue
   /// CHECK-NOT:                       Neg
@@ -541,7 +568,8 @@
   /// CHECK-DAG:     <<Neg:i\d+>>      Neg [<<Add>>]
   /// CHECK-DAG:                       Return [<<Neg>>]
 
-  public static int AddNegs1(int arg1, int arg2) {
+  public static int $noinline$AddNegs1(int arg1, int arg2) {
+    if (doThrow) { throw new Error(); }
     return -arg1 + -arg2;
   }
 
@@ -556,7 +584,7 @@
    * increasing the register pressure by creating or extending live ranges.
    */
 
-  /// CHECK-START: int Main.AddNegs2(int, int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$AddNegs2(int, int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg1:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Neg1:i\d+>>     Neg [<<Arg1>>]
@@ -566,7 +594,7 @@
   /// CHECK-DAG:     <<Or:i\d+>>       Or [<<Add1>>,<<Add2>>]
   /// CHECK-DAG:                       Return [<<Or>>]
 
-  /// CHECK-START: int Main.AddNegs2(int, int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$AddNegs2(int, int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg1:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Neg1:i\d+>>     Neg [<<Arg1>>]
@@ -577,7 +605,7 @@
   /// CHECK-DAG:     <<Or:i\d+>>       Or [<<Add1>>,<<Add2>>]
   /// CHECK-DAG:                       Return [<<Or>>]
 
-  /// CHECK-START: int Main.AddNegs2(int, int) GVN (after)
+  /// CHECK-START: int Main.$noinline$AddNegs2(int, int) GVN (after)
   /// CHECK-DAG:     <<Arg1:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Neg1:i\d+>>     Neg [<<Arg1>>]
@@ -586,7 +614,8 @@
   /// CHECK-DAG:     <<Or:i\d+>>       Or [<<Add>>,<<Add>>]
   /// CHECK-DAG:                       Return [<<Or>>]
 
-  public static int AddNegs2(int arg1, int arg2) {
+  public static int $noinline$AddNegs2(int arg1, int arg2) {
+    if (doThrow) { throw new Error(); }
     int temp1 = -arg1;
     int temp2 = -arg2;
     return (temp1 + temp2) | (temp1 + temp2);
@@ -600,7 +629,7 @@
    * the loop.
    */
 
-  /// CHECK-START: long Main.AddNegs3(long, long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$AddNegs3(long, long) instruction_simplifier (before)
   //  -------------- Arguments and initial negation operations.
   /// CHECK-DAG:     <<Arg1:j\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:j\d+>>     ParameterValue
@@ -612,7 +641,7 @@
   /// CHECK:         <<Add:j\d+>>      Add [<<Neg1>>,<<Neg2>>]
   /// CHECK:                           Goto
 
-  /// CHECK-START: long Main.AddNegs3(long, long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$AddNegs3(long, long) instruction_simplifier (after)
   //  -------------- Arguments and initial negation operations.
   /// CHECK-DAG:     <<Arg1:j\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:j\d+>>     ParameterValue
@@ -625,7 +654,8 @@
   /// CHECK-NOT:                       Neg
   /// CHECK:                           Goto
 
-  public static long AddNegs3(long arg1, long arg2) {
+  public static long $noinline$AddNegs3(long arg1, long arg2) {
+    if (doThrow) { throw new Error(); }
     long res = 0;
     long n_arg1 = -arg1;
     long n_arg2 = -arg2;
@@ -641,24 +671,25 @@
    * The transformation tested is implemented in `InstructionSimplifierVisitor::VisitAdd`.
    */
 
-  /// CHECK-START: long Main.AddNeg1(long, long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$AddNeg1(long, long) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg1:j\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:j\d+>>     ParameterValue
   /// CHECK-DAG:     <<Neg:j\d+>>      Neg [<<Arg1>>]
   /// CHECK-DAG:     <<Add:j\d+>>      Add [<<Neg>>,<<Arg2>>]
   /// CHECK-DAG:                       Return [<<Add>>]
 
-  /// CHECK-START: long Main.AddNeg1(long, long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$AddNeg1(long, long) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg1:j\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:j\d+>>     ParameterValue
   /// CHECK-DAG:     <<Sub:j\d+>>      Sub [<<Arg2>>,<<Arg1>>]
   /// CHECK-DAG:                       Return [<<Sub>>]
 
-  /// CHECK-START: long Main.AddNeg1(long, long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$AddNeg1(long, long) instruction_simplifier (after)
   /// CHECK-NOT:                       Neg
   /// CHECK-NOT:                       Add
 
-  public static long AddNeg1(long arg1, long arg2) {
+  public static long $noinline$AddNeg1(long arg1, long arg2) {
+    if (doThrow) { throw new Error(); }
     return -arg1 + arg2;
   }
 
@@ -671,7 +702,7 @@
    * increasing the register pressure by creating or extending live ranges.
    */
 
-  /// CHECK-START: long Main.AddNeg2(long, long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$AddNeg2(long, long) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg1:j\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:j\d+>>     ParameterValue
   /// CHECK-DAG:     <<Neg:j\d+>>      Neg [<<Arg2>>]
@@ -680,7 +711,7 @@
   /// CHECK-DAG:     <<Res:j\d+>>      Or [<<Add1>>,<<Add2>>]
   /// CHECK-DAG:                       Return [<<Res>>]
 
-  /// CHECK-START: long Main.AddNeg2(long, long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$AddNeg2(long, long) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg1:j\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:j\d+>>     ParameterValue
   /// CHECK-DAG:     <<Neg:j\d+>>      Neg [<<Arg2>>]
@@ -689,10 +720,11 @@
   /// CHECK-DAG:     <<Res:j\d+>>      Or [<<Add1>>,<<Add2>>]
   /// CHECK-DAG:                       Return [<<Res>>]
 
-  /// CHECK-START: long Main.AddNeg2(long, long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$AddNeg2(long, long) instruction_simplifier (after)
   /// CHECK-NOT:                       Sub
 
-  public static long AddNeg2(long arg1, long arg2) {
+  public static long $noinline$AddNeg2(long arg1, long arg2) {
+    if (doThrow) { throw new Error(); }
     long temp = -arg2;
     return (arg1 + temp) | (arg1 + temp);
   }
@@ -702,20 +734,21 @@
    * The transformation tested is implemented in `InstructionSimplifierVisitor::VisitNeg`.
    */
 
-  /// CHECK-START: long Main.NegNeg1(long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$NegNeg1(long) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:     <<Neg1:j\d+>>     Neg [<<Arg>>]
   /// CHECK-DAG:     <<Neg2:j\d+>>     Neg [<<Neg1>>]
   /// CHECK-DAG:                       Return [<<Neg2>>]
 
-  /// CHECK-START: long Main.NegNeg1(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$NegNeg1(long) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:                       Return [<<Arg>>]
 
-  /// CHECK-START: long Main.NegNeg1(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$NegNeg1(long) instruction_simplifier (after)
   /// CHECK-NOT:                       Neg
 
-  public static long NegNeg1(long arg) {
+  public static long $noinline$NegNeg1(long arg) {
+    if (doThrow) { throw new Error(); }
     return -(-arg);
   }
 
@@ -726,29 +759,30 @@
    * and in `InstructionSimplifierVisitor::VisitAdd`.
    */
 
-  /// CHECK-START: int Main.NegNeg2(int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$NegNeg2(int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Neg1:i\d+>>     Neg [<<Arg>>]
   /// CHECK-DAG:     <<Neg2:i\d+>>     Neg [<<Neg1>>]
   /// CHECK-DAG:     <<Add:i\d+>>      Add [<<Neg2>>,<<Neg1>>]
   /// CHECK-DAG:                       Return [<<Add>>]
 
-  /// CHECK-START: int Main.NegNeg2(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$NegNeg2(int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Sub:i\d+>>      Sub [<<Arg>>,<<Arg>>]
   /// CHECK-DAG:                       Return [<<Sub>>]
 
-  /// CHECK-START: int Main.NegNeg2(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$NegNeg2(int) instruction_simplifier (after)
   /// CHECK-NOT:                       Neg
   /// CHECK-NOT:                       Add
 
-  /// CHECK-START: int Main.NegNeg2(int) constant_folding_after_inlining (after)
+  /// CHECK-START: int Main.$noinline$NegNeg2(int) constant_folding_after_inlining (after)
   /// CHECK:         <<Const0:i\d+>>   IntConstant 0
   /// CHECK-NOT:                       Neg
   /// CHECK-NOT:                       Add
   /// CHECK:                           Return [<<Const0>>]
 
-  public static int NegNeg2(int arg) {
+  public static int $noinline$NegNeg2(int arg) {
+    if (doThrow) { throw new Error(); }
     int temp = -arg;
     return temp + -temp;
   }
@@ -760,22 +794,23 @@
    * and in `InstructionSimplifierVisitor::VisitSub`.
    */
 
-  /// CHECK-START: long Main.NegNeg3(long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$NegNeg3(long) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const0:j\d+>>   LongConstant 0
   /// CHECK-DAG:     <<Neg:j\d+>>      Neg [<<Arg>>]
   /// CHECK-DAG:     <<Sub:j\d+>>      Sub [<<Const0>>,<<Neg>>]
   /// CHECK-DAG:                       Return [<<Sub>>]
 
-  /// CHECK-START: long Main.NegNeg3(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$NegNeg3(long) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:                       Return [<<Arg>>]
 
-  /// CHECK-START: long Main.NegNeg3(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$NegNeg3(long) instruction_simplifier (after)
   /// CHECK-NOT:                       Neg
   /// CHECK-NOT:                       Sub
 
-  public static long NegNeg3(long arg) {
+  public static long $noinline$NegNeg3(long arg) {
+    if (doThrow) { throw new Error(); }
     return 0 - -arg;
   }
 
@@ -785,23 +820,24 @@
    * The transformation tested is implemented in `InstructionSimplifierVisitor::VisitNeg`.
    */
 
-  /// CHECK-START: int Main.NegSub1(int, int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$NegSub1(int, int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg1:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Sub:i\d+>>      Sub [<<Arg1>>,<<Arg2>>]
   /// CHECK-DAG:     <<Neg:i\d+>>      Neg [<<Sub>>]
   /// CHECK-DAG:                       Return [<<Neg>>]
 
-  /// CHECK-START: int Main.NegSub1(int, int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$NegSub1(int, int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg1:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Sub:i\d+>>      Sub [<<Arg2>>,<<Arg1>>]
   /// CHECK-DAG:                       Return [<<Sub>>]
 
-  /// CHECK-START: int Main.NegSub1(int, int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$NegSub1(int, int) instruction_simplifier (after)
   /// CHECK-NOT:                       Neg
 
-  public static int NegSub1(int arg1, int arg2) {
+  public static int $noinline$NegSub1(int arg1, int arg2) {
+    if (doThrow) { throw new Error(); }
     return -(arg1 - arg2);
   }
 
@@ -815,7 +851,7 @@
    * increasing the register pressure by creating or extending live ranges.
    */
 
-  /// CHECK-START: int Main.NegSub2(int, int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$NegSub2(int, int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg1:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Sub:i\d+>>      Sub [<<Arg1>>,<<Arg2>>]
@@ -824,7 +860,7 @@
   /// CHECK-DAG:     <<Or:i\d+>>       Or [<<Neg1>>,<<Neg2>>]
   /// CHECK-DAG:                       Return [<<Or>>]
 
-  /// CHECK-START: int Main.NegSub2(int, int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$NegSub2(int, int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg1:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Sub:i\d+>>      Sub [<<Arg1>>,<<Arg2>>]
@@ -833,7 +869,8 @@
   /// CHECK-DAG:     <<Or:i\d+>>       Or [<<Neg1>>,<<Neg2>>]
   /// CHECK-DAG:                       Return [<<Or>>]
 
-  public static int NegSub2(int arg1, int arg2) {
+  public static int $noinline$NegSub2(int arg1, int arg2) {
+    if (doThrow) { throw new Error(); }
     int temp = arg1 - arg2;
     return -temp | -temp;
   }
@@ -843,41 +880,43 @@
    * The transformation tested is implemented in `InstructionSimplifierVisitor::VisitNot`.
    */
 
-  /// CHECK-START: long Main.NotNot1(long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$NotNot1(long) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:     <<Not1:j\d+>>     Not [<<Arg>>]
   /// CHECK-DAG:     <<Not2:j\d+>>     Not [<<Not1>>]
   /// CHECK-DAG:                       Return [<<Not2>>]
 
-  /// CHECK-START: long Main.NotNot1(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$NotNot1(long) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:                       Return [<<Arg>>]
 
-  /// CHECK-START: long Main.NotNot1(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$NotNot1(long) instruction_simplifier (after)
   /// CHECK-NOT:                       Not
 
-  public static long NotNot1(long arg) {
+  public static long $noinline$NotNot1(long arg) {
+    if (doThrow) { throw new Error(); }
     return ~~arg;
   }
 
-  /// CHECK-START: int Main.NotNot2(int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$NotNot2(int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Not1:i\d+>>     Not [<<Arg>>]
   /// CHECK-DAG:     <<Not2:i\d+>>     Not [<<Not1>>]
   /// CHECK-DAG:     <<Add:i\d+>>      Add [<<Not2>>,<<Not1>>]
   /// CHECK-DAG:                       Return [<<Add>>]
 
-  /// CHECK-START: int Main.NotNot2(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$NotNot2(int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Not:i\d+>>      Not [<<Arg>>]
   /// CHECK-DAG:     <<Add:i\d+>>      Add [<<Arg>>,<<Not>>]
   /// CHECK-DAG:                       Return [<<Add>>]
 
-  /// CHECK-START: int Main.NotNot2(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$NotNot2(int) instruction_simplifier (after)
   /// CHECK:                           Not
   /// CHECK-NOT:                       Not
 
-  public static int NotNot2(int arg) {
+  public static int $noinline$NotNot2(int arg) {
+    if (doThrow) { throw new Error(); }
     int temp = ~arg;
     return temp + ~temp;
   }
@@ -887,24 +926,25 @@
    * The transformation tested is implemented in `InstructionSimplifierVisitor::VisitSub`.
    */
 
-  /// CHECK-START: int Main.SubNeg1(int, int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$SubNeg1(int, int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg1:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Neg:i\d+>>      Neg [<<Arg1>>]
   /// CHECK-DAG:     <<Sub:i\d+>>      Sub [<<Neg>>,<<Arg2>>]
   /// CHECK-DAG:                       Return [<<Sub>>]
 
-  /// CHECK-START: int Main.SubNeg1(int, int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$SubNeg1(int, int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg1:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Add:i\d+>>      Add [<<Arg1>>,<<Arg2>>]
   /// CHECK-DAG:     <<Neg:i\d+>>      Neg [<<Add>>]
   /// CHECK-DAG:                       Return [<<Neg>>]
 
-  /// CHECK-START: int Main.SubNeg1(int, int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$SubNeg1(int, int) instruction_simplifier (after)
   /// CHECK-NOT:                       Sub
 
-  public static int SubNeg1(int arg1, int arg2) {
+  public static int $noinline$SubNeg1(int arg1, int arg2) {
+    if (doThrow) { throw new Error(); }
     return -arg1 - arg2;
   }
 
@@ -918,7 +958,7 @@
    * increasing the register pressure by creating or extending live ranges.
    */
 
-  /// CHECK-START: int Main.SubNeg2(int, int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$SubNeg2(int, int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg1:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Neg:i\d+>>      Neg [<<Arg1>>]
@@ -927,7 +967,7 @@
   /// CHECK-DAG:     <<Or:i\d+>>       Or [<<Sub1>>,<<Sub2>>]
   /// CHECK-DAG:                       Return [<<Or>>]
 
-  /// CHECK-START: int Main.SubNeg2(int, int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$SubNeg2(int, int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg1:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Neg:i\d+>>      Neg [<<Arg1>>]
@@ -936,10 +976,11 @@
   /// CHECK-DAG:     <<Or:i\d+>>       Or [<<Sub1>>,<<Sub2>>]
   /// CHECK-DAG:                       Return [<<Or>>]
 
-  /// CHECK-START: int Main.SubNeg2(int, int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$SubNeg2(int, int) instruction_simplifier (after)
   /// CHECK-NOT:                       Add
 
-  public static int SubNeg2(int arg1, int arg2) {
+  public static int $noinline$SubNeg2(int arg1, int arg2) {
+    if (doThrow) { throw new Error(); }
     int temp = -arg1;
     return (temp - arg2) | (temp - arg2);
   }
@@ -951,7 +992,7 @@
    * the loop.
    */
 
-  /// CHECK-START: long Main.SubNeg3(long, long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$SubNeg3(long, long) instruction_simplifier (before)
   //  -------------- Arguments and initial negation operation.
   /// CHECK-DAG:     <<Arg1:j\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:j\d+>>     ParameterValue
@@ -962,7 +1003,7 @@
   /// CHECK:         <<Sub:j\d+>>      Sub [<<Neg>>,<<Arg2>>]
   /// CHECK:                           Goto
 
-  /// CHECK-START: long Main.SubNeg3(long, long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$SubNeg3(long, long) instruction_simplifier (after)
   //  -------------- Arguments and initial negation operation.
   /// CHECK-DAG:     <<Arg1:j\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:j\d+>>     ParameterValue
@@ -974,7 +1015,8 @@
   /// CHECK-NOT:                       Neg
   /// CHECK:                           Goto
 
-  public static long SubNeg3(long arg1, long arg2) {
+  public static long $noinline$SubNeg3(long arg1, long arg2) {
+    if (doThrow) { throw new Error(); }
     long res = 0;
     long temp = -arg1;
     for (long i = 0; i < 1; i++) {
@@ -983,7 +1025,7 @@
     return res;
   }
 
-  /// CHECK-START: boolean Main.EqualBoolVsIntConst(boolean) instruction_simplifier_after_bce (before)
+  /// CHECK-START: boolean Main.$noinline$EqualBoolVsIntConst(boolean) instruction_simplifier_after_bce (before)
   /// CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
   /// CHECK-DAG:     <<Const1:i\d+>>   IntConstant 1
@@ -993,15 +1035,16 @@
   /// CHECK-DAG:     <<NotCond:i\d+>>  Select [<<Const1>>,<<Const0>>,<<Cond>>]
   /// CHECK-DAG:                       Return [<<NotCond>>]
 
-  /// CHECK-START: boolean Main.EqualBoolVsIntConst(boolean) instruction_simplifier_after_bce (after)
+  /// CHECK-START: boolean Main.$noinline$EqualBoolVsIntConst(boolean) instruction_simplifier_after_bce (after)
   /// CHECK-DAG:     <<True:i\d+>>     IntConstant 1
   /// CHECK-DAG:                       Return [<<True>>]
 
-  public static boolean EqualBoolVsIntConst(boolean arg) {
+  public static boolean $noinline$EqualBoolVsIntConst(boolean arg) {
+    if (doThrow) { throw new Error(); }
     return (arg ? 0 : 1) != 2;
   }
 
-  /// CHECK-START: boolean Main.NotEqualBoolVsIntConst(boolean) instruction_simplifier_after_bce (before)
+  /// CHECK-START: boolean Main.$noinline$NotEqualBoolVsIntConst(boolean) instruction_simplifier_after_bce (before)
   /// CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
   /// CHECK-DAG:     <<Const1:i\d+>>   IntConstant 1
@@ -1011,11 +1054,12 @@
   /// CHECK-DAG:     <<NotCond:i\d+>>  Select [<<Const1>>,<<Const0>>,<<Cond>>]
   /// CHECK-DAG:                       Return [<<NotCond>>]
 
-  /// CHECK-START: boolean Main.NotEqualBoolVsIntConst(boolean) instruction_simplifier_after_bce (after)
+  /// CHECK-START: boolean Main.$noinline$NotEqualBoolVsIntConst(boolean) instruction_simplifier_after_bce (after)
   /// CHECK-DAG:     <<False:i\d+>>    IntConstant 0
   /// CHECK-DAG:                       Return [<<False>>]
 
-  public static boolean NotEqualBoolVsIntConst(boolean arg) {
+  public static boolean $noinline$NotEqualBoolVsIntConst(boolean arg) {
+    if (doThrow) { throw new Error(); }
     return (arg ? 0 : 1) == 2;
   }
 
@@ -1025,7 +1069,7 @@
    * remove the second.
    */
 
-  /// CHECK-START: boolean Main.NotNotBool(boolean) instruction_simplifier_after_bce (before)
+  /// CHECK-START: boolean Main.$noinline$NotNotBool(boolean) instruction_simplifier_after_bce (before)
   /// CHECK-DAG:     <<Arg:z\d+>>       ParameterValue
   /// CHECK-DAG:     <<Const0:i\d+>>    IntConstant 0
   /// CHECK-DAG:     <<Const1:i\d+>>    IntConstant 1
@@ -1033,7 +1077,7 @@
   /// CHECK-DAG:     <<NotNotArg:i\d+>> Select [<<Const1>>,<<Const0>>,<<NotArg>>]
   /// CHECK-DAG:                        Return [<<NotNotArg>>]
 
-  /// CHECK-START: boolean Main.NotNotBool(boolean) instruction_simplifier_after_bce (after)
+  /// CHECK-START: boolean Main.$noinline$NotNotBool(boolean) instruction_simplifier_after_bce (after)
   /// CHECK-DAG:     <<Arg:z\d+>>       ParameterValue
   /// CHECK-DAG:                        Return [<<Arg>>]
 
@@ -1041,81 +1085,86 @@
     return !arg;
   }
 
-  public static boolean NotNotBool(boolean arg) {
+  public static boolean $noinline$NotNotBool(boolean arg) {
+    if (doThrow) { throw new Error(); }
     return !(NegateValue(arg));
   }
 
-  /// CHECK-START: float Main.Div2(float) instruction_simplifier (before)
+  /// CHECK-START: float Main.$noinline$Div2(float) instruction_simplifier (before)
   /// CHECK-DAG:      <<Arg:f\d+>>      ParameterValue
   /// CHECK-DAG:      <<Const2:f\d+>>   FloatConstant 2
   /// CHECK-DAG:      <<Div:f\d+>>      Div [<<Arg>>,<<Const2>>]
   /// CHECK-DAG:                        Return [<<Div>>]
 
-  /// CHECK-START: float Main.Div2(float) instruction_simplifier (after)
+  /// CHECK-START: float Main.$noinline$Div2(float) instruction_simplifier (after)
   /// CHECK-DAG:      <<Arg:f\d+>>      ParameterValue
   /// CHECK-DAG:      <<ConstP5:f\d+>>  FloatConstant 0.5
   /// CHECK-DAG:      <<Mul:f\d+>>      Mul [<<Arg>>,<<ConstP5>>]
   /// CHECK-DAG:                        Return [<<Mul>>]
 
-  /// CHECK-START: float Main.Div2(float) instruction_simplifier (after)
+  /// CHECK-START: float Main.$noinline$Div2(float) instruction_simplifier (after)
   /// CHECK-NOT:                        Div
 
-  public static float Div2(float arg) {
+  public static float $noinline$Div2(float arg) {
+    if (doThrow) { throw new Error(); }
     return arg / 2.0f;
   }
 
-  /// CHECK-START: double Main.Div2(double) instruction_simplifier (before)
+  /// CHECK-START: double Main.$noinline$Div2(double) instruction_simplifier (before)
   /// CHECK-DAG:      <<Arg:d\d+>>      ParameterValue
   /// CHECK-DAG:      <<Const2:d\d+>>   DoubleConstant 2
   /// CHECK-DAG:      <<Div:d\d+>>      Div [<<Arg>>,<<Const2>>]
   /// CHECK-DAG:                        Return [<<Div>>]
 
-  /// CHECK-START: double Main.Div2(double) instruction_simplifier (after)
+  /// CHECK-START: double Main.$noinline$Div2(double) instruction_simplifier (after)
   /// CHECK-DAG:      <<Arg:d\d+>>      ParameterValue
   /// CHECK-DAG:      <<ConstP5:d\d+>>  DoubleConstant 0.5
   /// CHECK-DAG:      <<Mul:d\d+>>      Mul [<<Arg>>,<<ConstP5>>]
   /// CHECK-DAG:                        Return [<<Mul>>]
 
-  /// CHECK-START: double Main.Div2(double) instruction_simplifier (after)
+  /// CHECK-START: double Main.$noinline$Div2(double) instruction_simplifier (after)
   /// CHECK-NOT:                        Div
-  public static double Div2(double arg) {
+  public static double $noinline$Div2(double arg) {
+    if (doThrow) { throw new Error(); }
     return arg / 2.0;
   }
 
-  /// CHECK-START: float Main.DivMP25(float) instruction_simplifier (before)
+  /// CHECK-START: float Main.$noinline$DivMP25(float) instruction_simplifier (before)
   /// CHECK-DAG:      <<Arg:f\d+>>      ParameterValue
   /// CHECK-DAG:      <<ConstMP25:f\d+>>   FloatConstant -0.25
   /// CHECK-DAG:      <<Div:f\d+>>      Div [<<Arg>>,<<ConstMP25>>]
   /// CHECK-DAG:                        Return [<<Div>>]
 
-  /// CHECK-START: float Main.DivMP25(float) instruction_simplifier (after)
+  /// CHECK-START: float Main.$noinline$DivMP25(float) instruction_simplifier (after)
   /// CHECK-DAG:      <<Arg:f\d+>>      ParameterValue
   /// CHECK-DAG:      <<ConstM4:f\d+>>  FloatConstant -4
   /// CHECK-DAG:      <<Mul:f\d+>>      Mul [<<Arg>>,<<ConstM4>>]
   /// CHECK-DAG:                        Return [<<Mul>>]
 
-  /// CHECK-START: float Main.DivMP25(float) instruction_simplifier (after)
+  /// CHECK-START: float Main.$noinline$DivMP25(float) instruction_simplifier (after)
   /// CHECK-NOT:                        Div
 
-  public static float DivMP25(float arg) {
+  public static float $noinline$DivMP25(float arg) {
+    if (doThrow) { throw new Error(); }
     return arg / -0.25f;
   }
 
-  /// CHECK-START: double Main.DivMP25(double) instruction_simplifier (before)
+  /// CHECK-START: double Main.$noinline$DivMP25(double) instruction_simplifier (before)
   /// CHECK-DAG:      <<Arg:d\d+>>      ParameterValue
   /// CHECK-DAG:      <<ConstMP25:d\d+>>   DoubleConstant -0.25
   /// CHECK-DAG:      <<Div:d\d+>>      Div [<<Arg>>,<<ConstMP25>>]
   /// CHECK-DAG:                        Return [<<Div>>]
 
-  /// CHECK-START: double Main.DivMP25(double) instruction_simplifier (after)
+  /// CHECK-START: double Main.$noinline$DivMP25(double) instruction_simplifier (after)
   /// CHECK-DAG:      <<Arg:d\d+>>      ParameterValue
   /// CHECK-DAG:      <<ConstM4:d\d+>>  DoubleConstant -4
   /// CHECK-DAG:      <<Mul:d\d+>>      Mul [<<Arg>>,<<ConstM4>>]
   /// CHECK-DAG:                        Return [<<Mul>>]
 
-  /// CHECK-START: double Main.DivMP25(double) instruction_simplifier (after)
+  /// CHECK-START: double Main.$noinline$DivMP25(double) instruction_simplifier (after)
   /// CHECK-NOT:                        Div
-  public static double DivMP25(double arg) {
+  public static double $noinline$DivMP25(double arg) {
+    if (doThrow) { throw new Error(); }
     return arg / -0.25f;
   }
 
@@ -1123,18 +1172,19 @@
    * Test strength reduction of factors of the form (2^n + 1).
    */
 
-  /// CHECK-START: int Main.mulPow2Plus1(int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$mulPow2Plus1(int) instruction_simplifier (before)
   /// CHECK-DAG:   <<Arg:i\d+>>         ParameterValue
   /// CHECK-DAG:   <<Const9:i\d+>>      IntConstant 9
   /// CHECK:                            Mul [<<Arg>>,<<Const9>>]
 
-  /// CHECK-START: int Main.mulPow2Plus1(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$mulPow2Plus1(int) instruction_simplifier (after)
   /// CHECK-DAG:   <<Arg:i\d+>>         ParameterValue
   /// CHECK-DAG:   <<Const3:i\d+>>      IntConstant 3
   /// CHECK:       <<Shift:i\d+>>       Shl [<<Arg>>,<<Const3>>]
   /// CHECK-NEXT:                       Add [<<Arg>>,<<Shift>>]
 
-  public static int mulPow2Plus1(int arg) {
+  public static int $noinline$mulPow2Plus1(int arg) {
+    if (doThrow) { throw new Error(); }
     return arg * 9;
   }
 
@@ -1142,62 +1192,69 @@
    * Test strength reduction of factors of the form (2^n - 1).
    */
 
-  /// CHECK-START: long Main.mulPow2Minus1(long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$mulPow2Minus1(long) instruction_simplifier (before)
   /// CHECK-DAG:   <<Arg:j\d+>>         ParameterValue
   /// CHECK-DAG:   <<Const31:j\d+>>     LongConstant 31
   /// CHECK:                            Mul [<<Const31>>,<<Arg>>]
 
-  /// CHECK-START: long Main.mulPow2Minus1(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$mulPow2Minus1(long) instruction_simplifier (after)
   /// CHECK-DAG:   <<Arg:j\d+>>         ParameterValue
   /// CHECK-DAG:   <<Const5:i\d+>>      IntConstant 5
   /// CHECK:       <<Shift:j\d+>>       Shl [<<Arg>>,<<Const5>>]
   /// CHECK-NEXT:                       Sub [<<Shift>>,<<Arg>>]
 
-  public static long mulPow2Minus1(long arg) {
+  public static long $noinline$mulPow2Minus1(long arg) {
+    if (doThrow) { throw new Error(); }
     return arg * 31;
   }
 
-  /// CHECK-START: int Main.booleanFieldNotEqualOne() instruction_simplifier_after_bce (before)
+  /// CHECK-START: int Main.$noinline$booleanFieldNotEqualOne() instruction_simplifier_after_bce (before)
   /// CHECK-DAG:      <<Const1:i\d+>>   IntConstant 1
   /// CHECK-DAG:      <<Const13:i\d+>>  IntConstant 13
   /// CHECK-DAG:      <<Const54:i\d+>>  IntConstant 54
+  /// CHECK-DAG:      <<doThrow:z\d+>>  StaticFieldGet
   /// CHECK-DAG:      <<Field:z\d+>>    StaticFieldGet
   /// CHECK-DAG:      <<NE:z\d+>>       NotEqual [<<Field>>,<<Const1>>]
   /// CHECK-DAG:      <<Select:i\d+>>   Select [<<Const13>>,<<Const54>>,<<NE>>]
   /// CHECK-DAG:                        Return [<<Select>>]
 
-  /// CHECK-START: int Main.booleanFieldNotEqualOne() instruction_simplifier_after_bce (after)
+  /// CHECK-START: int Main.$noinline$booleanFieldNotEqualOne() instruction_simplifier_after_bce (after)
+  /// CHECK-DAG:      <<doThrow:z\d+>>  StaticFieldGet
   /// CHECK-DAG:      <<Field:z\d+>>    StaticFieldGet
   /// CHECK-DAG:      <<Const13:i\d+>>  IntConstant 13
   /// CHECK-DAG:      <<Const54:i\d+>>  IntConstant 54
   /// CHECK-DAG:      <<Select:i\d+>>   Select [<<Const54>>,<<Const13>>,<<Field>>]
   /// CHECK-DAG:                        Return [<<Select>>]
 
-  public static int booleanFieldNotEqualOne() {
+  public static int $noinline$booleanFieldNotEqualOne() {
+    if (doThrow) { throw new Error(); }
     return (booleanField == $inline$true()) ? 13 : 54;
   }
 
-  /// CHECK-START: int Main.booleanFieldEqualZero() instruction_simplifier_after_bce (before)
+  /// CHECK-START: int Main.$noinline$booleanFieldEqualZero() instruction_simplifier_after_bce (before)
   /// CHECK-DAG:      <<Const0:i\d+>>   IntConstant 0
   /// CHECK-DAG:      <<Const13:i\d+>>  IntConstant 13
   /// CHECK-DAG:      <<Const54:i\d+>>  IntConstant 54
+  /// CHECK-DAG:      <<doThrow:z\d+>>  StaticFieldGet
   /// CHECK-DAG:      <<Field:z\d+>>    StaticFieldGet
   /// CHECK-DAG:      <<NE:z\d+>>       Equal [<<Field>>,<<Const0>>]
   /// CHECK-DAG:      <<Select:i\d+>>   Select [<<Const13>>,<<Const54>>,<<NE>>]
   /// CHECK-DAG:                        Return [<<Select>>]
 
-  /// CHECK-START: int Main.booleanFieldEqualZero() instruction_simplifier_after_bce (after)
+  /// CHECK-START: int Main.$noinline$booleanFieldEqualZero() instruction_simplifier_after_bce (after)
+  /// CHECK-DAG:      <<doThrow:z\d+>>  StaticFieldGet
   /// CHECK-DAG:      <<Field:z\d+>>    StaticFieldGet
   /// CHECK-DAG:      <<Const13:i\d+>>  IntConstant 13
   /// CHECK-DAG:      <<Const54:i\d+>>  IntConstant 54
   /// CHECK-DAG:      <<Select:i\d+>>   Select [<<Const54>>,<<Const13>>,<<Field>>]
   /// CHECK-DAG:                        Return [<<Select>>]
 
-  public static int booleanFieldEqualZero() {
+  public static int $noinline$booleanFieldEqualZero() {
+    if (doThrow) { throw new Error(); }
     return (booleanField != $inline$false()) ? 13 : 54;
   }
 
-  /// CHECK-START: int Main.intConditionNotEqualOne(int) instruction_simplifier_after_bce (before)
+  /// CHECK-START: int Main.$noinline$intConditionNotEqualOne(int) instruction_simplifier_after_bce (before)
   /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:      <<Const0:i\d+>>   IntConstant 0
   /// CHECK-DAG:      <<Const1:i\d+>>   IntConstant 1
@@ -1210,7 +1267,7 @@
   /// CHECK-DAG:      <<Result:i\d+>>   Select [<<Const13>>,<<Const54>>,<<NE>>]
   /// CHECK-DAG:                        Return [<<Result>>]
 
-  /// CHECK-START: int Main.intConditionNotEqualOne(int) instruction_simplifier_after_bce (after)
+  /// CHECK-START: int Main.$noinline$intConditionNotEqualOne(int) instruction_simplifier_after_bce (after)
   /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:      <<Const13:i\d+>>  IntConstant 13
   /// CHECK-DAG:      <<Const42:i\d+>>  IntConstant 42
@@ -1221,11 +1278,12 @@
   // Note that we match `LE` from Select because there are two identical
   // LessThanOrEqual instructions.
 
-  public static int intConditionNotEqualOne(int i) {
+  public static int $noinline$intConditionNotEqualOne(int i) {
+    if (doThrow) { throw new Error(); }
     return ((i > 42) == $inline$true()) ? 13 : 54;
   }
 
-  /// CHECK-START: int Main.intConditionEqualZero(int) instruction_simplifier_after_bce (before)
+  /// CHECK-START: int Main.$noinline$intConditionEqualZero(int) instruction_simplifier_after_bce (before)
   /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:      <<Const0:i\d+>>   IntConstant 0
   /// CHECK-DAG:      <<Const1:i\d+>>   IntConstant 1
@@ -1238,7 +1296,7 @@
   /// CHECK-DAG:      <<Result:i\d+>>   Select [<<Const13>>,<<Const54>>,<<NE>>]
   /// CHECK-DAG:                        Return [<<Result>>]
 
-  /// CHECK-START: int Main.intConditionEqualZero(int) instruction_simplifier_after_bce (after)
+  /// CHECK-START: int Main.$noinline$intConditionEqualZero(int) instruction_simplifier_after_bce (after)
   /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:      <<Const13:i\d+>>  IntConstant 13
   /// CHECK-DAG:      <<Const42:i\d+>>  IntConstant 42
@@ -1249,16 +1307,17 @@
   // Note that we match `LE` from Select because there are two identical
   // LessThanOrEqual instructions.
 
-  public static int intConditionEqualZero(int i) {
+  public static int $noinline$intConditionEqualZero(int i) {
+    if (doThrow) { throw new Error(); }
     return ((i > 42) != $inline$false()) ? 13 : 54;
   }
 
   // Test that conditions on float/double are not flipped.
 
-  /// CHECK-START: int Main.floatConditionNotEqualOne(float) builder (after)
+  /// CHECK-START: int Main.$noinline$floatConditionNotEqualOne(float) builder (after)
   /// CHECK:                            LessThanOrEqual
 
-  /// CHECK-START: int Main.floatConditionNotEqualOne(float) instruction_simplifier_before_codegen (after)
+  /// CHECK-START: int Main.$noinline$floatConditionNotEqualOne(float) instruction_simplifier_before_codegen (after)
   /// CHECK-DAG:      <<Arg:f\d+>>      ParameterValue
   /// CHECK-DAG:      <<Const13:i\d+>>  IntConstant 13
   /// CHECK-DAG:      <<Const54:i\d+>>  IntConstant 54
@@ -1267,14 +1326,15 @@
   /// CHECK-DAG:      <<Select:i\d+>>   Select [<<Const13>>,<<Const54>>,<<LE>>]
   /// CHECK-DAG:                        Return [<<Select>>]
 
-  public static int floatConditionNotEqualOne(float f) {
+  public static int $noinline$floatConditionNotEqualOne(float f) {
+    if (doThrow) { throw new Error(); }
     return ((f > 42.0f) == true) ? 13 : 54;
   }
 
-  /// CHECK-START: int Main.doubleConditionEqualZero(double) builder (after)
+  /// CHECK-START: int Main.$noinline$doubleConditionEqualZero(double) builder (after)
   /// CHECK:                            LessThanOrEqual
 
-  /// CHECK-START: int Main.doubleConditionEqualZero(double) instruction_simplifier_before_codegen (after)
+  /// CHECK-START: int Main.$noinline$doubleConditionEqualZero(double) instruction_simplifier_before_codegen (after)
   /// CHECK-DAG:      <<Arg:d\d+>>      ParameterValue
   /// CHECK-DAG:      <<Const13:i\d+>>  IntConstant 13
   /// CHECK-DAG:      <<Const54:i\d+>>  IntConstant 54
@@ -1283,42 +1343,45 @@
   /// CHECK-DAG:      <<Select:i\d+>>   Select [<<Const13>>,<<Const54>>,<<LE>>]
   /// CHECK-DAG:                        Return [<<Select>>]
 
-  public static int doubleConditionEqualZero(double d) {
+  public static int $noinline$doubleConditionEqualZero(double d) {
+    if (doThrow) { throw new Error(); }
     return ((d > 42.0) != false) ? 13 : 54;
   }
 
-  /// CHECK-START: int Main.intToDoubleToInt(int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$intToDoubleToInt(int) instruction_simplifier (before)
   /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:      <<Double:d\d+>>   TypeConversion [<<Arg>>]
   /// CHECK-DAG:      <<Int:i\d+>>      TypeConversion [<<Double>>]
   /// CHECK-DAG:                        Return [<<Int>>]
 
-  /// CHECK-START: int Main.intToDoubleToInt(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$intToDoubleToInt(int) instruction_simplifier (after)
   /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:                        Return [<<Arg>>]
 
-  /// CHECK-START: int Main.intToDoubleToInt(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$intToDoubleToInt(int) instruction_simplifier (after)
   /// CHECK-NOT:                        TypeConversion
 
-  public static int intToDoubleToInt(int value) {
+  public static int $noinline$intToDoubleToInt(int value) {
+    if (doThrow) { throw new Error(); }
     // Lossless conversion followed by a conversion back.
     return (int) (double) value;
   }
 
-  /// CHECK-START: java.lang.String Main.intToDoubleToIntPrint(int) instruction_simplifier (before)
+  /// CHECK-START: java.lang.String Main.$noinline$intToDoubleToIntPrint(int) instruction_simplifier (before)
   /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:      <<Double:d\d+>>   TypeConversion [<<Arg>>]
   /// CHECK-DAG:      {{i\d+}}          TypeConversion [<<Double>>]
 
-  /// CHECK-START: java.lang.String Main.intToDoubleToIntPrint(int) instruction_simplifier (after)
+  /// CHECK-START: java.lang.String Main.$noinline$intToDoubleToIntPrint(int) instruction_simplifier (after)
   /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:      {{d\d+}}          TypeConversion [<<Arg>>]
 
-  /// CHECK-START: java.lang.String Main.intToDoubleToIntPrint(int) instruction_simplifier (after)
+  /// CHECK-START: java.lang.String Main.$noinline$intToDoubleToIntPrint(int) instruction_simplifier (after)
   /// CHECK-DAG:                        TypeConversion
   /// CHECK-NOT:                        TypeConversion
 
-  public static String intToDoubleToIntPrint(int value) {
+  public static String $noinline$intToDoubleToIntPrint(int value) {
+    if (doThrow) { throw new Error(); }
     // Lossless conversion followed by a conversion back
     // with another use of the intermediate result.
     double d = (double) value;
@@ -1326,55 +1389,58 @@
     return "d=" + d + ", i=" + i;
   }
 
-  /// CHECK-START: int Main.byteToDoubleToInt(byte) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$byteToDoubleToInt(byte) instruction_simplifier (before)
   /// CHECK-DAG:      <<Arg:b\d+>>      ParameterValue
   /// CHECK-DAG:      <<Double:d\d+>>   TypeConversion [<<Arg>>]
   /// CHECK-DAG:      <<Int:i\d+>>      TypeConversion [<<Double>>]
   /// CHECK-DAG:                        Return [<<Int>>]
 
-  /// CHECK-START: int Main.byteToDoubleToInt(byte) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$byteToDoubleToInt(byte) instruction_simplifier (after)
   /// CHECK-DAG:      <<Arg:b\d+>>      ParameterValue
   /// CHECK-DAG:                        Return [<<Arg>>]
 
-  /// CHECK-START: int Main.byteToDoubleToInt(byte) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$byteToDoubleToInt(byte) instruction_simplifier (after)
   /// CHECK-NOT:                        TypeConversion
 
-  public static int byteToDoubleToInt(byte value) {
+  public static int $noinline$byteToDoubleToInt(byte value) {
+    if (doThrow) { throw new Error(); }
     // Lossless conversion followed by another conversion, use implicit conversion.
     return (int) (double) value;
   }
 
-  /// CHECK-START: int Main.floatToDoubleToInt(float) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$floatToDoubleToInt(float) instruction_simplifier (before)
   /// CHECK-DAG:      <<Arg:f\d+>>      ParameterValue
   /// CHECK-DAG:      <<Double:d\d+>>   TypeConversion [<<Arg>>]
   /// CHECK-DAG:      <<Int:i\d+>>      TypeConversion [<<Double>>]
   /// CHECK-DAG:                        Return [<<Int>>]
 
-  /// CHECK-START: int Main.floatToDoubleToInt(float) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$floatToDoubleToInt(float) instruction_simplifier (after)
   /// CHECK-DAG:      <<Arg:f\d+>>      ParameterValue
   /// CHECK-DAG:      <<Int:i\d+>>      TypeConversion [<<Arg>>]
   /// CHECK-DAG:                        Return [<<Int>>]
 
-  /// CHECK-START: int Main.floatToDoubleToInt(float) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$floatToDoubleToInt(float) instruction_simplifier (after)
   /// CHECK-DAG:                        TypeConversion
   /// CHECK-NOT:                        TypeConversion
 
-  public static int floatToDoubleToInt(float value) {
+  public static int $noinline$floatToDoubleToInt(float value) {
+    if (doThrow) { throw new Error(); }
     // Lossless conversion followed by another conversion.
     return (int) (double) value;
   }
 
-  /// CHECK-START: java.lang.String Main.floatToDoubleToIntPrint(float) instruction_simplifier (before)
+  /// CHECK-START: java.lang.String Main.$noinline$floatToDoubleToIntPrint(float) instruction_simplifier (before)
   /// CHECK-DAG:      <<Arg:f\d+>>      ParameterValue
   /// CHECK-DAG:      <<Double:d\d+>>   TypeConversion [<<Arg>>]
   /// CHECK-DAG:      {{i\d+}}          TypeConversion [<<Double>>]
 
-  /// CHECK-START: java.lang.String Main.floatToDoubleToIntPrint(float) instruction_simplifier (after)
+  /// CHECK-START: java.lang.String Main.$noinline$floatToDoubleToIntPrint(float) instruction_simplifier (after)
   /// CHECK-DAG:      <<Arg:f\d+>>      ParameterValue
   /// CHECK-DAG:      <<Double:d\d+>>   TypeConversion [<<Arg>>]
   /// CHECK-DAG:      {{i\d+}}          TypeConversion [<<Double>>]
 
-  public static String floatToDoubleToIntPrint(float value) {
+  public static String $noinline$floatToDoubleToIntPrint(float value) {
+    if (doThrow) { throw new Error(); }
     // Lossless conversion followed by another conversion with
     // an extra use of the intermediate result.
     double d = (double) value;
@@ -1382,176 +1448,186 @@
     return "d=" + d + ", i=" + i;
   }
 
-  /// CHECK-START: short Main.byteToDoubleToShort(byte) instruction_simplifier (before)
+  /// CHECK-START: short Main.$noinline$byteToDoubleToShort(byte) instruction_simplifier (before)
   /// CHECK-DAG:      <<Arg:b\d+>>      ParameterValue
   /// CHECK-DAG:      <<Double:d\d+>>   TypeConversion [<<Arg>>]
   /// CHECK-DAG:      <<Int:i\d+>>      TypeConversion [<<Double>>]
   /// CHECK-DAG:      <<Short:s\d+>>    TypeConversion [<<Int>>]
   /// CHECK-DAG:                        Return [<<Short>>]
 
-  /// CHECK-START: short Main.byteToDoubleToShort(byte) instruction_simplifier (after)
+  /// CHECK-START: short Main.$noinline$byteToDoubleToShort(byte) instruction_simplifier (after)
   /// CHECK-DAG:      <<Arg:b\d+>>      ParameterValue
   /// CHECK-DAG:                        Return [<<Arg>>]
 
-  /// CHECK-START: short Main.byteToDoubleToShort(byte) instruction_simplifier (after)
+  /// CHECK-START: short Main.$noinline$byteToDoubleToShort(byte) instruction_simplifier (after)
   /// CHECK-NOT:                        TypeConversion
 
-  public static short byteToDoubleToShort(byte value) {
+  public static short $noinline$byteToDoubleToShort(byte value) {
+    if (doThrow) { throw new Error(); }
     // Originally, this is byte->double->int->short. The first conversion is lossless,
     // so we merge this with the second one to byte->int which we omit as it's an implicit
     // conversion. Then we eliminate the resulting byte->short as an implicit conversion.
     return (short) (double) value;
   }
 
-  /// CHECK-START: short Main.charToDoubleToShort(char) instruction_simplifier (before)
+  /// CHECK-START: short Main.$noinline$charToDoubleToShort(char) instruction_simplifier (before)
   /// CHECK-DAG:      <<Arg:c\d+>>      ParameterValue
   /// CHECK-DAG:      <<Double:d\d+>>   TypeConversion [<<Arg>>]
   /// CHECK-DAG:      <<Int:i\d+>>      TypeConversion [<<Double>>]
   /// CHECK-DAG:      <<Short:s\d+>>    TypeConversion [<<Int>>]
   /// CHECK-DAG:                        Return [<<Short>>]
 
-  /// CHECK-START: short Main.charToDoubleToShort(char) instruction_simplifier (after)
+  /// CHECK-START: short Main.$noinline$charToDoubleToShort(char) instruction_simplifier (after)
   /// CHECK-DAG:      <<Arg:c\d+>>      ParameterValue
   /// CHECK-DAG:      <<Short:s\d+>>    TypeConversion [<<Arg>>]
   /// CHECK-DAG:                        Return [<<Short>>]
 
-  /// CHECK-START: short Main.charToDoubleToShort(char) instruction_simplifier (after)
+  /// CHECK-START: short Main.$noinline$charToDoubleToShort(char) instruction_simplifier (after)
   /// CHECK-DAG:                        TypeConversion
   /// CHECK-NOT:                        TypeConversion
 
-  public static short charToDoubleToShort(char value) {
+  public static short $noinline$charToDoubleToShort(char value) {
+    if (doThrow) { throw new Error(); }
     // Originally, this is char->double->int->short. The first conversion is lossless,
     // so we merge this with the second one to char->int which we omit as it's an implicit
     // conversion. Then we are left with the resulting char->short conversion.
     return (short) (double) value;
   }
 
-  /// CHECK-START: short Main.floatToIntToShort(float) instruction_simplifier (before)
+  /// CHECK-START: short Main.$noinline$floatToIntToShort(float) instruction_simplifier (before)
   /// CHECK-DAG:      <<Arg:f\d+>>      ParameterValue
   /// CHECK-DAG:      <<Int:i\d+>>      TypeConversion [<<Arg>>]
   /// CHECK-DAG:      <<Short:s\d+>>    TypeConversion [<<Int>>]
   /// CHECK-DAG:                        Return [<<Short>>]
 
-  /// CHECK-START: short Main.floatToIntToShort(float) instruction_simplifier (after)
+  /// CHECK-START: short Main.$noinline$floatToIntToShort(float) instruction_simplifier (after)
   /// CHECK-DAG:      <<Arg:f\d+>>      ParameterValue
   /// CHECK-DAG:      <<Int:i\d+>>      TypeConversion [<<Arg>>]
   /// CHECK-DAG:      <<Short:s\d+>>    TypeConversion [<<Int>>]
   /// CHECK-DAG:                        Return [<<Short>>]
 
-  public static short floatToIntToShort(float value) {
+  public static short $noinline$floatToIntToShort(float value) {
+    if (doThrow) { throw new Error(); }
     // Lossy FP to integral conversion followed by another conversion: no simplification.
     return (short) value;
   }
 
-  /// CHECK-START: int Main.intToFloatToInt(int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$intToFloatToInt(int) instruction_simplifier (before)
   /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:      <<Float:f\d+>>    TypeConversion [<<Arg>>]
   /// CHECK-DAG:      <<Int:i\d+>>      TypeConversion [<<Float>>]
   /// CHECK-DAG:                        Return [<<Int>>]
 
-  /// CHECK-START: int Main.intToFloatToInt(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$intToFloatToInt(int) instruction_simplifier (after)
   /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:      <<Float:f\d+>>    TypeConversion [<<Arg>>]
   /// CHECK-DAG:      <<Int:i\d+>>      TypeConversion [<<Float>>]
   /// CHECK-DAG:                        Return [<<Int>>]
 
-  public static int intToFloatToInt(int value) {
+  public static int $noinline$intToFloatToInt(int value) {
+    if (doThrow) { throw new Error(); }
     // Lossy integral to FP conversion followed another conversion: no simplification.
     return (int) (float) value;
   }
 
-  /// CHECK-START: double Main.longToIntToDouble(long) instruction_simplifier (before)
+  /// CHECK-START: double Main.$noinline$longToIntToDouble(long) instruction_simplifier (before)
   /// CHECK-DAG:      <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:      <<Int:i\d+>>      TypeConversion [<<Arg>>]
   /// CHECK-DAG:      <<Double:d\d+>>   TypeConversion [<<Int>>]
   /// CHECK-DAG:                        Return [<<Double>>]
 
-  /// CHECK-START: double Main.longToIntToDouble(long) instruction_simplifier (after)
+  /// CHECK-START: double Main.$noinline$longToIntToDouble(long) instruction_simplifier (after)
   /// CHECK-DAG:      <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:      <<Int:i\d+>>      TypeConversion [<<Arg>>]
   /// CHECK-DAG:      <<Double:d\d+>>   TypeConversion [<<Int>>]
   /// CHECK-DAG:                        Return [<<Double>>]
 
-  public static double longToIntToDouble(long value) {
+  public static double $noinline$longToIntToDouble(long value) {
+    if (doThrow) { throw new Error(); }
     // Lossy long-to-int conversion followed an integral to FP conversion: no simplification.
     return (double) (int) value;
   }
 
-  /// CHECK-START: long Main.longToIntToLong(long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$longToIntToLong(long) instruction_simplifier (before)
   /// CHECK-DAG:      <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:      <<Int:i\d+>>      TypeConversion [<<Arg>>]
   /// CHECK-DAG:      <<Long:j\d+>>     TypeConversion [<<Int>>]
   /// CHECK-DAG:                        Return [<<Long>>]
 
-  /// CHECK-START: long Main.longToIntToLong(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$longToIntToLong(long) instruction_simplifier (after)
   /// CHECK-DAG:      <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:      <<Int:i\d+>>      TypeConversion [<<Arg>>]
   /// CHECK-DAG:      <<Long:j\d+>>     TypeConversion [<<Int>>]
   /// CHECK-DAG:                        Return [<<Long>>]
 
-  public static long longToIntToLong(long value) {
+  public static long $noinline$longToIntToLong(long value) {
+    if (doThrow) { throw new Error(); }
     // Lossy long-to-int conversion followed an int-to-long conversion: no simplification.
     return (long) (int) value;
   }
 
-  /// CHECK-START: short Main.shortToCharToShort(short) instruction_simplifier (before)
+  /// CHECK-START: short Main.$noinline$shortToCharToShort(short) instruction_simplifier (before)
   /// CHECK-DAG:      <<Arg:s\d+>>      ParameterValue
   /// CHECK-DAG:      <<Char:c\d+>>     TypeConversion [<<Arg>>]
   /// CHECK-DAG:      <<Short:s\d+>>    TypeConversion [<<Char>>]
   /// CHECK-DAG:                        Return [<<Short>>]
 
-  /// CHECK-START: short Main.shortToCharToShort(short) instruction_simplifier (after)
+  /// CHECK-START: short Main.$noinline$shortToCharToShort(short) instruction_simplifier (after)
   /// CHECK-DAG:      <<Arg:s\d+>>      ParameterValue
   /// CHECK-DAG:                        Return [<<Arg>>]
 
-  public static short shortToCharToShort(short value) {
+  public static short $noinline$shortToCharToShort(short value) {
+    if (doThrow) { throw new Error(); }
     // Integral conversion followed by non-widening integral conversion to original type.
     return (short) (char) value;
   }
 
-  /// CHECK-START: int Main.shortToLongToInt(short) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$shortToLongToInt(short) instruction_simplifier (before)
   /// CHECK-DAG:      <<Arg:s\d+>>      ParameterValue
   /// CHECK-DAG:      <<Long:j\d+>>     TypeConversion [<<Arg>>]
   /// CHECK-DAG:      <<Int:i\d+>>      TypeConversion [<<Long>>]
   /// CHECK-DAG:                        Return [<<Int>>]
 
-  /// CHECK-START: int Main.shortToLongToInt(short) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$shortToLongToInt(short) instruction_simplifier (after)
   /// CHECK-DAG:      <<Arg:s\d+>>      ParameterValue
   /// CHECK-DAG:                        Return [<<Arg>>]
 
-  public static int shortToLongToInt(short value) {
+  public static int $noinline$shortToLongToInt(short value) {
+    if (doThrow) { throw new Error(); }
     // Integral conversion followed by non-widening integral conversion, use implicit conversion.
     return (int) (long) value;
   }
 
-  /// CHECK-START: byte Main.shortToCharToByte(short) instruction_simplifier (before)
+  /// CHECK-START: byte Main.$noinline$shortToCharToByte(short) instruction_simplifier (before)
   /// CHECK-DAG:      <<Arg:s\d+>>      ParameterValue
   /// CHECK-DAG:      <<Char:c\d+>>     TypeConversion [<<Arg>>]
   /// CHECK-DAG:      <<Byte:b\d+>>     TypeConversion [<<Char>>]
   /// CHECK-DAG:                        Return [<<Byte>>]
 
-  /// CHECK-START: byte Main.shortToCharToByte(short) instruction_simplifier (after)
+  /// CHECK-START: byte Main.$noinline$shortToCharToByte(short) instruction_simplifier (after)
   /// CHECK-DAG:      <<Arg:s\d+>>      ParameterValue
   /// CHECK-DAG:      <<Byte:b\d+>>     TypeConversion [<<Arg>>]
   /// CHECK-DAG:                        Return [<<Byte>>]
 
-  public static byte shortToCharToByte(short value) {
+  public static byte $noinline$shortToCharToByte(short value) {
+    if (doThrow) { throw new Error(); }
     // Integral conversion followed by non-widening integral conversion losing bits
     // from the original type. Simplify to use only one conversion.
     return (byte) (char) value;
   }
 
-  /// CHECK-START: java.lang.String Main.shortToCharToBytePrint(short) instruction_simplifier (before)
+  /// CHECK-START: java.lang.String Main.$noinline$shortToCharToBytePrint(short) instruction_simplifier (before)
   /// CHECK-DAG:      <<Arg:s\d+>>      ParameterValue
   /// CHECK-DAG:      <<Char:c\d+>>     TypeConversion [<<Arg>>]
   /// CHECK-DAG:      {{b\d+}}          TypeConversion [<<Char>>]
 
-  /// CHECK-START: java.lang.String Main.shortToCharToBytePrint(short) instruction_simplifier (after)
+  /// CHECK-START: java.lang.String Main.$noinline$shortToCharToBytePrint(short) instruction_simplifier (after)
   /// CHECK-DAG:      <<Arg:s\d+>>      ParameterValue
   /// CHECK-DAG:      <<Char:c\d+>>     TypeConversion [<<Arg>>]
   /// CHECK-DAG:      {{b\d+}}          TypeConversion [<<Char>>]
 
-  public static String shortToCharToBytePrint(short value) {
+  public static String $noinline$shortToCharToBytePrint(short value) {
+    if (doThrow) { throw new Error(); }
     // Integral conversion followed by non-widening integral conversion losing bits
     // from the original type with an extra use of the intermediate result.
     char c = (char) value;
@@ -1559,7 +1635,7 @@
     return "c=" + ((int) c) + ", b=" + ((int) b);  // implicit conversions.
   }
 
-  /// CHECK-START: byte Main.longAnd0xffToByte(long) instruction_simplifier (before)
+  /// CHECK-START: byte Main.$noinline$longAnd0xffToByte(long) instruction_simplifier (before)
   /// CHECK-DAG:      <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:      <<Mask:j\d+>>     LongConstant 255
   /// CHECK-DAG:      <<And:j\d+>>      And [<<Mask>>,<<Arg>>]
@@ -1567,58 +1643,61 @@
   /// CHECK-DAG:      <<Byte:b\d+>>     TypeConversion [<<Int>>]
   /// CHECK-DAG:                        Return [<<Byte>>]
 
-  /// CHECK-START: byte Main.longAnd0xffToByte(long) instruction_simplifier (after)
+  /// CHECK-START: byte Main.$noinline$longAnd0xffToByte(long) instruction_simplifier (after)
   /// CHECK-DAG:      <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:      <<Byte:b\d+>>     TypeConversion [<<Arg>>]
   /// CHECK-DAG:                        Return [<<Byte>>]
 
-  /// CHECK-START: byte Main.longAnd0xffToByte(long) instruction_simplifier (after)
+  /// CHECK-START: byte Main.$noinline$longAnd0xffToByte(long) instruction_simplifier (after)
   /// CHECK-NOT:                        And
 
-  public static byte longAnd0xffToByte(long value) {
+  public static byte $noinline$longAnd0xffToByte(long value) {
+    if (doThrow) { throw new Error(); }
     return (byte) (value & 0xff);
   }
 
-  /// CHECK-START: char Main.intAnd0x1ffffToChar(int) instruction_simplifier (before)
+  /// CHECK-START: char Main.$noinline$intAnd0x1ffffToChar(int) instruction_simplifier (before)
   /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:      <<Mask:i\d+>>     IntConstant 131071
   /// CHECK-DAG:      <<And:i\d+>>      And [<<Mask>>,<<Arg>>]
   /// CHECK-DAG:      <<Char:c\d+>>     TypeConversion [<<And>>]
   /// CHECK-DAG:                        Return [<<Char>>]
 
-  /// CHECK-START: char Main.intAnd0x1ffffToChar(int) instruction_simplifier (after)
+  /// CHECK-START: char Main.$noinline$intAnd0x1ffffToChar(int) instruction_simplifier (after)
   /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:      <<Char:c\d+>>     TypeConversion [<<Arg>>]
   /// CHECK-DAG:                        Return [<<Char>>]
 
-  /// CHECK-START: char Main.intAnd0x1ffffToChar(int) instruction_simplifier (after)
+  /// CHECK-START: char Main.$noinline$intAnd0x1ffffToChar(int) instruction_simplifier (after)
   /// CHECK-NOT:                        And
 
-  public static char intAnd0x1ffffToChar(int value) {
+  public static char $noinline$intAnd0x1ffffToChar(int value) {
+    if (doThrow) { throw new Error(); }
     // Keeping all significant bits and one more.
     return (char) (value & 0x1ffff);
   }
 
-  /// CHECK-START: short Main.intAnd0x17fffToShort(int) instruction_simplifier (before)
+  /// CHECK-START: short Main.$noinline$intAnd0x17fffToShort(int) instruction_simplifier (before)
   /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:      <<Mask:i\d+>>     IntConstant 98303
   /// CHECK-DAG:      <<And:i\d+>>      And [<<Mask>>,<<Arg>>]
   /// CHECK-DAG:      <<Short:s\d+>>    TypeConversion [<<And>>]
   /// CHECK-DAG:                        Return [<<Short>>]
 
-  /// CHECK-START: short Main.intAnd0x17fffToShort(int) instruction_simplifier (after)
+  /// CHECK-START: short Main.$noinline$intAnd0x17fffToShort(int) instruction_simplifier (after)
   /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:      <<Mask:i\d+>>     IntConstant 98303
   /// CHECK-DAG:      <<And:i\d+>>      And [<<Mask>>,<<Arg>>]
   /// CHECK-DAG:      <<Short:s\d+>>    TypeConversion [<<And>>]
   /// CHECK-DAG:                        Return [<<Short>>]
 
-  public static short intAnd0x17fffToShort(int value) {
+  public static short $noinline$intAnd0x17fffToShort(int value) {
+    if (doThrow) { throw new Error(); }
     // No simplification: clearing a significant bit.
     return (short) (value & 0x17fff);
   }
 
-  /// CHECK-START: double Main.shortAnd0xffffToShortToDouble(short) instruction_simplifier (before)
+  /// CHECK-START: double Main.$noinline$shortAnd0xffffToShortToDouble(short) instruction_simplifier (before)
   /// CHECK-DAG:      <<Arg:s\d+>>      ParameterValue
   /// CHECK-DAG:      <<Mask:i\d+>>     IntConstant 65535
   /// CHECK-DAG:      <<And:i\d+>>      And [<<Mask>>,<<Arg>>]
@@ -1626,45 +1705,49 @@
   /// CHECK-DAG:      <<Double:d\d+>>   TypeConversion [<<Same>>]
   /// CHECK-DAG:                        Return [<<Double>>]
 
-  /// CHECK-START: double Main.shortAnd0xffffToShortToDouble(short) instruction_simplifier (after)
+  /// CHECK-START: double Main.$noinline$shortAnd0xffffToShortToDouble(short) instruction_simplifier (after)
   /// CHECK-DAG:      <<Arg:s\d+>>      ParameterValue
   /// CHECK-DAG:      <<Double:d\d+>>   TypeConversion [<<Arg>>]
   /// CHECK-DAG:                        Return [<<Double>>]
 
-  public static double shortAnd0xffffToShortToDouble(short value) {
+  public static double $noinline$shortAnd0xffffToShortToDouble(short value) {
+    if (doThrow) { throw new Error(); }
     short same = (short) (value & 0xffff);
     return (double) same;
   }
 
-  /// CHECK-START: int Main.intReverseCondition(int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$intReverseCondition(int) instruction_simplifier (before)
   /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:      <<Const42:i\d+>>  IntConstant 42
   /// CHECK-DAG:      <<LE:z\d+>>       LessThanOrEqual [<<Const42>>,<<Arg>>]
 
-  /// CHECK-START: int Main.intReverseCondition(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$intReverseCondition(int) instruction_simplifier (after)
   /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:      <<Const42:i\d+>>  IntConstant 42
   /// CHECK-DAG:      <<GE:z\d+>>       GreaterThanOrEqual [<<Arg>>,<<Const42>>]
 
-  public static int intReverseCondition(int i) {
+  public static int $noinline$intReverseCondition(int i) {
+    if (doThrow) { throw new Error(); }
     return (42 > i) ? 13 : 54;
   }
 
-  /// CHECK-START: int Main.intReverseConditionNaN(int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$intReverseConditionNaN(int) instruction_simplifier (before)
   /// CHECK-DAG:      <<Const42:d\d+>>  DoubleConstant 42
   /// CHECK-DAG:      <<Result:d\d+>>   InvokeStaticOrDirect
   /// CHECK-DAG:      <<CMP:i\d+>>      Compare [<<Const42>>,<<Result>>]
 
-  /// CHECK-START: int Main.intReverseConditionNaN(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$intReverseConditionNaN(int) instruction_simplifier (after)
   /// CHECK-DAG:      <<Const42:d\d+>>  DoubleConstant 42
   /// CHECK-DAG:      <<Result:d\d+>>   InvokeStaticOrDirect
   /// CHECK-DAG:      <<EQ:z\d+>>       Equal [<<Result>>,<<Const42>>]
 
-  public static int intReverseConditionNaN(int i) {
+  public static int $noinline$intReverseConditionNaN(int i) {
+    if (doThrow) { throw new Error(); }
     return (42 != Math.sqrt(i)) ? 13 : 54;
   }
 
-  public static int runSmaliTest(String name, boolean input) {
+  public static int $noinline$runSmaliTest(String name, boolean input) {
+    if (doThrow) { throw new Error(); }
     try {
       Class<?> c = Class.forName("SmaliTests");
       Method m = c.getMethod(name, new Class[] { boolean.class });
@@ -1674,155 +1757,270 @@
     }
   }
 
+  /// CHECK-START: int Main.$noinline$intUnnecessaryShiftMasking(int, int) instruction_simplifier (before)
+  /// CHECK:          <<Value:i\d+>>    ParameterValue
+  /// CHECK:          <<Shift:i\d+>>    ParameterValue
+  /// CHECK-DAG:      <<Const31:i\d+>>  IntConstant 31
+  /// CHECK-DAG:      <<And:i\d+>>      And [<<Shift>>,<<Const31>>]
+  /// CHECK-DAG:      <<Shl:i\d+>>      Shl [<<Value>>,<<And>>]
+  /// CHECK-DAG:                        Return [<<Shl>>]
+
+  /// CHECK-START: int Main.$noinline$intUnnecessaryShiftMasking(int, int) instruction_simplifier (after)
+  /// CHECK:          <<Value:i\d+>>    ParameterValue
+  /// CHECK:          <<Shift:i\d+>>    ParameterValue
+  /// CHECK-DAG:      <<Shl:i\d+>>      Shl [<<Value>>,<<Shift>>]
+  /// CHECK-DAG:                        Return [<<Shl>>]
+
+  public static int $noinline$intUnnecessaryShiftMasking(int value, int shift) {
+    if (doThrow) { throw new Error(); }
+    return value << (shift & 31);
+  }
+
+  /// CHECK-START: long Main.$noinline$longUnnecessaryShiftMasking(long, int) instruction_simplifier (before)
+  /// CHECK:          <<Value:j\d+>>    ParameterValue
+  /// CHECK:          <<Shift:i\d+>>    ParameterValue
+  /// CHECK-DAG:      <<Const63:i\d+>>  IntConstant 63
+  /// CHECK-DAG:      <<And:i\d+>>      And [<<Shift>>,<<Const63>>]
+  /// CHECK-DAG:      <<Shr:j\d+>>      Shr [<<Value>>,<<And>>]
+  /// CHECK-DAG:                        Return [<<Shr>>]
+
+  /// CHECK-START: long Main.$noinline$longUnnecessaryShiftMasking(long, int) instruction_simplifier (after)
+  /// CHECK:          <<Value:j\d+>>    ParameterValue
+  /// CHECK:          <<Shift:i\d+>>    ParameterValue
+  /// CHECK-DAG:      <<Shr:j\d+>>      Shr [<<Value>>,<<Shift>>]
+  /// CHECK-DAG:                        Return [<<Shr>>]
+
+  public static long $noinline$longUnnecessaryShiftMasking(long value, int shift) {
+    if (doThrow) { throw new Error(); }
+    return value >> (shift & 63);
+  }
+
+  /// CHECK-START: int Main.$noinline$intUnnecessaryWiderShiftMasking(int, int) instruction_simplifier (before)
+  /// CHECK:          <<Value:i\d+>>    ParameterValue
+  /// CHECK:          <<Shift:i\d+>>    ParameterValue
+  /// CHECK-DAG:      <<Const255:i\d+>> IntConstant 255
+  /// CHECK-DAG:      <<And:i\d+>>      And [<<Shift>>,<<Const255>>]
+  /// CHECK-DAG:      <<UShr:i\d+>>     UShr [<<Value>>,<<And>>]
+  /// CHECK-DAG:                        Return [<<UShr>>]
+
+  /// CHECK-START: int Main.$noinline$intUnnecessaryWiderShiftMasking(int, int) instruction_simplifier (after)
+  /// CHECK:          <<Value:i\d+>>    ParameterValue
+  /// CHECK:          <<Shift:i\d+>>    ParameterValue
+  /// CHECK-DAG:      <<UShr:i\d+>>     UShr [<<Value>>,<<Shift>>]
+  /// CHECK-DAG:                        Return [<<UShr>>]
+
+  public static int $noinline$intUnnecessaryWiderShiftMasking(int value, int shift) {
+    if (doThrow) { throw new Error(); }
+    return value >>> (shift & 0xff);
+  }
+
+  /// CHECK-START: long Main.$noinline$longSmallerShiftMasking(long, int) instruction_simplifier (before)
+  /// CHECK:          <<Value:j\d+>>    ParameterValue
+  /// CHECK:          <<Shift:i\d+>>    ParameterValue
+  /// CHECK-DAG:      <<Const3:i\d+>>   IntConstant 3
+  /// CHECK-DAG:      <<And:i\d+>>      And [<<Shift>>,<<Const3>>]
+  /// CHECK-DAG:      <<Shl:j\d+>>      Shl [<<Value>>,<<And>>]
+  /// CHECK-DAG:                        Return [<<Shl>>]
+
+  /// CHECK-START: long Main.$noinline$longSmallerShiftMasking(long, int) instruction_simplifier (after)
+  /// CHECK:          <<Value:j\d+>>    ParameterValue
+  /// CHECK:          <<Shift:i\d+>>    ParameterValue
+  /// CHECK-DAG:      <<Const3:i\d+>>   IntConstant 3
+  /// CHECK-DAG:      <<And:i\d+>>      And [<<Shift>>,<<Const3>>]
+  /// CHECK-DAG:      <<Shl:j\d+>>      Shl [<<Value>>,<<And>>]
+  /// CHECK-DAG:                        Return [<<Shl>>]
+
+  public static long $noinline$longSmallerShiftMasking(long value, int shift) {
+    if (doThrow) { throw new Error(); }
+    return value << (shift & 3);
+  }
+
+  /// CHECK-START: int Main.$noinline$otherUseOfUnnecessaryShiftMasking(int, int) instruction_simplifier (before)
+  /// CHECK:          <<Value:i\d+>>    ParameterValue
+  /// CHECK:          <<Shift:i\d+>>    ParameterValue
+  /// CHECK-DAG:      <<Const31:i\d+>>  IntConstant 31
+  /// CHECK-DAG:      <<And:i\d+>>      And [<<Shift>>,<<Const31>>]
+  /// CHECK-DAG:      <<Shr:i\d+>>      Shr [<<Value>>,<<And>>]
+  /// CHECK-DAG:      <<Add:i\d+>>      Add [<<Shr>>,<<And>>]
+  /// CHECK-DAG:                        Return [<<Add>>]
+
+  /// CHECK-START: int Main.$noinline$otherUseOfUnnecessaryShiftMasking(int, int) instruction_simplifier (after)
+  /// CHECK:          <<Value:i\d+>>    ParameterValue
+  /// CHECK:          <<Shift:i\d+>>    ParameterValue
+  /// CHECK-DAG:      <<Const31:i\d+>>  IntConstant 31
+  /// CHECK-DAG:      <<And:i\d+>>      And [<<Shift>>,<<Const31>>]
+  /// CHECK-DAG:      <<Shr:i\d+>>      Shr [<<Value>>,<<Shift>>]
+  /// CHECK-DAG:      <<Add:i\d+>>      Add [<<Shr>>,<<And>>]
+  /// CHECK-DAG:                        Return [<<Add>>]
+
+  public static int $noinline$otherUseOfUnnecessaryShiftMasking(int value, int shift) {
+    if (doThrow) { throw new Error(); }
+    int temp = shift & 31;
+    return (value >> temp) + temp;
+  }
+
 public static void main(String[] args) {
     int arg = 123456;
 
-    assertLongEquals(Add0(arg), arg);
-    assertIntEquals(AndAllOnes(arg), arg);
-    assertLongEquals(Div1(arg), arg);
-    assertIntEquals(DivN1(arg), -arg);
-    assertLongEquals(Mul1(arg), arg);
-    assertIntEquals(MulN1(arg), -arg);
-    assertLongEquals(MulPowerOfTwo128(arg), (128 * arg));
-    assertIntEquals(Or0(arg), arg);
-    assertLongEquals(OrSame(arg), arg);
-    assertIntEquals(Shl0(arg), arg);
-    assertLongEquals(Shr0(arg), arg);
-    assertLongEquals(Shr64(arg), arg);
-    assertLongEquals(Sub0(arg), arg);
-    assertIntEquals(SubAliasNeg(arg), -arg);
-    assertLongEquals(UShr0(arg), arg);
-    assertIntEquals(Xor0(arg), arg);
-    assertIntEquals(XorAllOnes(arg), ~arg);
-    assertIntEquals(AddNegs1(arg, arg + 1), -(arg + arg + 1));
-    assertIntEquals(AddNegs2(arg, arg + 1), -(arg + arg + 1));
-    assertLongEquals(AddNegs3(arg, arg + 1), -(2 * arg + 1));
-    assertLongEquals(AddNeg1(arg, arg + 1), 1);
-    assertLongEquals(AddNeg2(arg, arg + 1), -1);
-    assertLongEquals(NegNeg1(arg), arg);
-    assertIntEquals(NegNeg2(arg), 0);
-    assertLongEquals(NegNeg3(arg), arg);
-    assertIntEquals(NegSub1(arg, arg + 1), 1);
-    assertIntEquals(NegSub2(arg, arg + 1), 1);
-    assertLongEquals(NotNot1(arg), arg);
-    assertIntEquals(NotNot2(arg), -1);
-    assertIntEquals(SubNeg1(arg, arg + 1), -(arg + arg + 1));
-    assertIntEquals(SubNeg2(arg, arg + 1), -(arg + arg + 1));
-    assertLongEquals(SubNeg3(arg, arg + 1), -(2 * arg + 1));
-    assertBooleanEquals(EqualBoolVsIntConst(true), true);
-    assertBooleanEquals(EqualBoolVsIntConst(true), true);
-    assertBooleanEquals(NotEqualBoolVsIntConst(false), false);
-    assertBooleanEquals(NotEqualBoolVsIntConst(false), false);
-    assertBooleanEquals(NotNotBool(true), true);
-    assertBooleanEquals(NotNotBool(false), false);
-    assertFloatEquals(Div2(100.0f), 50.0f);
-    assertDoubleEquals(Div2(150.0), 75.0);
-    assertFloatEquals(DivMP25(100.0f), -400.0f);
-    assertDoubleEquals(DivMP25(150.0), -600.0);
-    assertIntEquals(UShr28And15(0xc1234567), 0xc);
-    assertLongEquals(UShr60And15(0xc123456787654321L), 0xcL);
-    assertIntEquals(UShr28And7(0xc1234567), 0x4);
-    assertLongEquals(UShr60And7(0xc123456787654321L), 0x4L);
-    assertIntEquals(Shr24And255(0xc1234567), 0xc1);
-    assertLongEquals(Shr56And255(0xc123456787654321L), 0xc1L);
-    assertIntEquals(Shr24And127(0xc1234567), 0x41);
-    assertLongEquals(Shr56And127(0xc123456787654321L), 0x41L);
-    assertIntEquals(0, mulPow2Plus1(0));
-    assertIntEquals(9, mulPow2Plus1(1));
-    assertIntEquals(18, mulPow2Plus1(2));
-    assertIntEquals(900, mulPow2Plus1(100));
-    assertIntEquals(111105, mulPow2Plus1(12345));
-    assertLongEquals(0, mulPow2Minus1(0));
-    assertLongEquals(31, mulPow2Minus1(1));
-    assertLongEquals(62, mulPow2Minus1(2));
-    assertLongEquals(3100, mulPow2Minus1(100));
-    assertLongEquals(382695, mulPow2Minus1(12345));
+    assertLongEquals(arg, $noinline$Add0(arg));
+    assertIntEquals(arg, $noinline$AndAllOnes(arg));
+    assertLongEquals(arg, $noinline$Div1(arg));
+    assertIntEquals(-arg, $noinline$DivN1(arg));
+    assertLongEquals(arg, $noinline$Mul1(arg));
+    assertIntEquals(-arg, $noinline$MulN1(arg));
+    assertLongEquals((128 * arg), $noinline$MulPowerOfTwo128(arg));
+    assertIntEquals(arg, $noinline$Or0(arg));
+    assertLongEquals(arg, $noinline$OrSame(arg));
+    assertIntEquals(arg, $noinline$Shl0(arg));
+    assertLongEquals(arg, $noinline$Shr0(arg));
+    assertLongEquals(arg, $noinline$Shr64(arg));
+    assertLongEquals(arg, $noinline$Sub0(arg));
+    assertIntEquals(-arg, $noinline$SubAliasNeg(arg));
+    assertLongEquals(arg, $noinline$UShr0(arg));
+    assertIntEquals(arg, $noinline$Xor0(arg));
+    assertIntEquals(~arg, $noinline$XorAllOnes(arg));
+    assertIntEquals(-(arg + arg + 1), $noinline$AddNegs1(arg, arg + 1));
+    assertIntEquals(-(arg + arg + 1), $noinline$AddNegs2(arg, arg + 1));
+    assertLongEquals(-(2 * arg + 1), $noinline$AddNegs3(arg, arg + 1));
+    assertLongEquals(1, $noinline$AddNeg1(arg, arg + 1));
+    assertLongEquals(-1, $noinline$AddNeg2(arg, arg + 1));
+    assertLongEquals(arg, $noinline$NegNeg1(arg));
+    assertIntEquals(0, $noinline$NegNeg2(arg));
+    assertLongEquals(arg, $noinline$NegNeg3(arg));
+    assertIntEquals(1, $noinline$NegSub1(arg, arg + 1));
+    assertIntEquals(1, $noinline$NegSub2(arg, arg + 1));
+    assertLongEquals(arg, $noinline$NotNot1(arg));
+    assertIntEquals(-1, $noinline$NotNot2(arg));
+    assertIntEquals(-(arg + arg + 1), $noinline$SubNeg1(arg, arg + 1));
+    assertIntEquals(-(arg + arg + 1), $noinline$SubNeg2(arg, arg + 1));
+    assertLongEquals(-(2 * arg + 1), $noinline$SubNeg3(arg, arg + 1));
+    assertBooleanEquals(true, $noinline$EqualBoolVsIntConst(true));
+    assertBooleanEquals(true, $noinline$EqualBoolVsIntConst(true));
+    assertBooleanEquals(false, $noinline$NotEqualBoolVsIntConst(false));
+    assertBooleanEquals(false, $noinline$NotEqualBoolVsIntConst(false));
+    assertBooleanEquals(true, $noinline$NotNotBool(true));
+    assertBooleanEquals(false, $noinline$NotNotBool(false));
+    assertFloatEquals(50.0f, $noinline$Div2(100.0f));
+    assertDoubleEquals(75.0, $noinline$Div2(150.0));
+    assertFloatEquals(-400.0f, $noinline$DivMP25(100.0f));
+    assertDoubleEquals(-600.0, $noinline$DivMP25(150.0));
+    assertIntEquals(0xc, $noinline$UShr28And15(0xc1234567));
+    assertLongEquals(0xcL, $noinline$UShr60And15(0xc123456787654321L));
+    assertIntEquals(0x4, $noinline$UShr28And7(0xc1234567));
+    assertLongEquals(0x4L, $noinline$UShr60And7(0xc123456787654321L));
+    assertIntEquals(0xc1, $noinline$Shr24And255(0xc1234567));
+    assertLongEquals(0xc1L, $noinline$Shr56And255(0xc123456787654321L));
+    assertIntEquals(0x41, $noinline$Shr24And127(0xc1234567));
+    assertLongEquals(0x41L, $noinline$Shr56And127(0xc123456787654321L));
+    assertIntEquals(0, $noinline$mulPow2Plus1(0));
+    assertIntEquals(9, $noinline$mulPow2Plus1(1));
+    assertIntEquals(18, $noinline$mulPow2Plus1(2));
+    assertIntEquals(900, $noinline$mulPow2Plus1(100));
+    assertIntEquals(111105, $noinline$mulPow2Plus1(12345));
+    assertLongEquals(0, $noinline$mulPow2Minus1(0));
+    assertLongEquals(31, $noinline$mulPow2Minus1(1));
+    assertLongEquals(62, $noinline$mulPow2Minus1(2));
+    assertLongEquals(3100, $noinline$mulPow2Minus1(100));
+    assertLongEquals(382695, $noinline$mulPow2Minus1(12345));
 
     booleanField = false;
-    assertIntEquals(booleanFieldNotEqualOne(), 54);
-    assertIntEquals(booleanFieldEqualZero(), 54);
+    assertIntEquals($noinline$booleanFieldNotEqualOne(), 54);
+    assertIntEquals($noinline$booleanFieldEqualZero(), 54);
     booleanField = true;
-    assertIntEquals(booleanFieldNotEqualOne(), 13);
-    assertIntEquals(booleanFieldEqualZero(), 13);
-    assertIntEquals(intConditionNotEqualOne(6), 54);
-    assertIntEquals(intConditionNotEqualOne(43), 13);
-    assertIntEquals(intConditionEqualZero(6), 54);
-    assertIntEquals(intConditionEqualZero(43), 13);
-    assertIntEquals(floatConditionNotEqualOne(6.0f), 54);
-    assertIntEquals(floatConditionNotEqualOne(43.0f), 13);
-    assertIntEquals(doubleConditionEqualZero(6.0), 54);
-    assertIntEquals(doubleConditionEqualZero(43.0), 13);
+    assertIntEquals(13, $noinline$booleanFieldNotEqualOne());
+    assertIntEquals(13, $noinline$booleanFieldEqualZero());
+    assertIntEquals(54, $noinline$intConditionNotEqualOne(6));
+    assertIntEquals(13, $noinline$intConditionNotEqualOne(43));
+    assertIntEquals(54, $noinline$intConditionEqualZero(6));
+    assertIntEquals(13, $noinline$intConditionEqualZero(43));
+    assertIntEquals(54, $noinline$floatConditionNotEqualOne(6.0f));
+    assertIntEquals(13, $noinline$floatConditionNotEqualOne(43.0f));
+    assertIntEquals(54, $noinline$doubleConditionEqualZero(6.0));
+    assertIntEquals(13, $noinline$doubleConditionEqualZero(43.0));
 
-    assertIntEquals(1234567, intToDoubleToInt(1234567));
-    assertIntEquals(Integer.MIN_VALUE, intToDoubleToInt(Integer.MIN_VALUE));
-    assertIntEquals(Integer.MAX_VALUE, intToDoubleToInt(Integer.MAX_VALUE));
-    assertStringEquals("d=7654321.0, i=7654321", intToDoubleToIntPrint(7654321));
-    assertIntEquals(12, byteToDoubleToInt((byte) 12));
-    assertIntEquals(Byte.MIN_VALUE, byteToDoubleToInt(Byte.MIN_VALUE));
-    assertIntEquals(Byte.MAX_VALUE, byteToDoubleToInt(Byte.MAX_VALUE));
-    assertIntEquals(11, floatToDoubleToInt(11.3f));
-    assertStringEquals("d=12.25, i=12", floatToDoubleToIntPrint(12.25f));
-    assertIntEquals(123, byteToDoubleToShort((byte) 123));
-    assertIntEquals(Byte.MIN_VALUE, byteToDoubleToShort(Byte.MIN_VALUE));
-    assertIntEquals(Byte.MAX_VALUE, byteToDoubleToShort(Byte.MAX_VALUE));
-    assertIntEquals(1234, charToDoubleToShort((char) 1234));
-    assertIntEquals(Character.MIN_VALUE, charToDoubleToShort(Character.MIN_VALUE));
-    assertIntEquals(/* sign-extended */ -1, charToDoubleToShort(Character.MAX_VALUE));
-    assertIntEquals(12345, floatToIntToShort(12345.75f));
-    assertIntEquals(Short.MAX_VALUE, floatToIntToShort((float)(Short.MIN_VALUE - 1)));
-    assertIntEquals(Short.MIN_VALUE, floatToIntToShort((float)(Short.MAX_VALUE + 1)));
-    assertIntEquals(-54321, intToFloatToInt(-54321));
-    assertDoubleEquals((double) 0x12345678, longToIntToDouble(0x1234567812345678L));
-    assertDoubleEquals(0.0, longToIntToDouble(Long.MIN_VALUE));
-    assertDoubleEquals(-1.0, longToIntToDouble(Long.MAX_VALUE));
-    assertLongEquals(0x0000000012345678L, longToIntToLong(0x1234567812345678L));
-    assertLongEquals(0xffffffff87654321L, longToIntToLong(0x1234567887654321L));
-    assertLongEquals(0L, longToIntToLong(Long.MIN_VALUE));
-    assertLongEquals(-1L, longToIntToLong(Long.MAX_VALUE));
-    assertIntEquals((short) -5678, shortToCharToShort((short) -5678));
-    assertIntEquals(Short.MIN_VALUE, shortToCharToShort(Short.MIN_VALUE));
-    assertIntEquals(Short.MAX_VALUE, shortToCharToShort(Short.MAX_VALUE));
-    assertIntEquals(5678, shortToLongToInt((short) 5678));
-    assertIntEquals(Short.MIN_VALUE, shortToLongToInt(Short.MIN_VALUE));
-    assertIntEquals(Short.MAX_VALUE, shortToLongToInt(Short.MAX_VALUE));
-    assertIntEquals(0x34, shortToCharToByte((short) 0x1234));
-    assertIntEquals(-0x10, shortToCharToByte((short) 0x12f0));
-    assertIntEquals(0, shortToCharToByte(Short.MIN_VALUE));
-    assertIntEquals(-1, shortToCharToByte(Short.MAX_VALUE));
-    assertStringEquals("c=1025, b=1", shortToCharToBytePrint((short) 1025));
-    assertStringEquals("c=1023, b=-1", shortToCharToBytePrint((short) 1023));
-    assertStringEquals("c=65535, b=-1", shortToCharToBytePrint((short) -1));
+    assertIntEquals(1234567, $noinline$intToDoubleToInt(1234567));
+    assertIntEquals(Integer.MIN_VALUE, $noinline$intToDoubleToInt(Integer.MIN_VALUE));
+    assertIntEquals(Integer.MAX_VALUE, $noinline$intToDoubleToInt(Integer.MAX_VALUE));
+    assertStringEquals("d=7654321.0, i=7654321", $noinline$intToDoubleToIntPrint(7654321));
+    assertIntEquals(12, $noinline$byteToDoubleToInt((byte) 12));
+    assertIntEquals(Byte.MIN_VALUE, $noinline$byteToDoubleToInt(Byte.MIN_VALUE));
+    assertIntEquals(Byte.MAX_VALUE, $noinline$byteToDoubleToInt(Byte.MAX_VALUE));
+    assertIntEquals(11, $noinline$floatToDoubleToInt(11.3f));
+    assertStringEquals("d=12.25, i=12", $noinline$floatToDoubleToIntPrint(12.25f));
+    assertIntEquals(123, $noinline$byteToDoubleToShort((byte) 123));
+    assertIntEquals(Byte.MIN_VALUE, $noinline$byteToDoubleToShort(Byte.MIN_VALUE));
+    assertIntEquals(Byte.MAX_VALUE, $noinline$byteToDoubleToShort(Byte.MAX_VALUE));
+    assertIntEquals(1234, $noinline$charToDoubleToShort((char) 1234));
+    assertIntEquals(Character.MIN_VALUE, $noinline$charToDoubleToShort(Character.MIN_VALUE));
+    assertIntEquals(/* sign-extended */ -1, $noinline$charToDoubleToShort(Character.MAX_VALUE));
+    assertIntEquals(12345, $noinline$floatToIntToShort(12345.75f));
+    assertIntEquals(Short.MAX_VALUE, $noinline$floatToIntToShort((float)(Short.MIN_VALUE - 1)));
+    assertIntEquals(Short.MIN_VALUE, $noinline$floatToIntToShort((float)(Short.MAX_VALUE + 1)));
+    assertIntEquals(-54321, $noinline$intToFloatToInt(-54321));
+    assertDoubleEquals((double) 0x12345678, $noinline$longToIntToDouble(0x1234567812345678L));
+    assertDoubleEquals(0.0, $noinline$longToIntToDouble(Long.MIN_VALUE));
+    assertDoubleEquals(-1.0, $noinline$longToIntToDouble(Long.MAX_VALUE));
+    assertLongEquals(0x0000000012345678L, $noinline$longToIntToLong(0x1234567812345678L));
+    assertLongEquals(0xffffffff87654321L, $noinline$longToIntToLong(0x1234567887654321L));
+    assertLongEquals(0L, $noinline$longToIntToLong(Long.MIN_VALUE));
+    assertLongEquals(-1L, $noinline$longToIntToLong(Long.MAX_VALUE));
+    assertIntEquals((short) -5678, $noinline$shortToCharToShort((short) -5678));
+    assertIntEquals(Short.MIN_VALUE, $noinline$shortToCharToShort(Short.MIN_VALUE));
+    assertIntEquals(Short.MAX_VALUE, $noinline$shortToCharToShort(Short.MAX_VALUE));
+    assertIntEquals(5678, $noinline$shortToLongToInt((short) 5678));
+    assertIntEquals(Short.MIN_VALUE, $noinline$shortToLongToInt(Short.MIN_VALUE));
+    assertIntEquals(Short.MAX_VALUE, $noinline$shortToLongToInt(Short.MAX_VALUE));
+    assertIntEquals(0x34, $noinline$shortToCharToByte((short) 0x1234));
+    assertIntEquals(-0x10, $noinline$shortToCharToByte((short) 0x12f0));
+    assertIntEquals(0, $noinline$shortToCharToByte(Short.MIN_VALUE));
+    assertIntEquals(-1, $noinline$shortToCharToByte(Short.MAX_VALUE));
+    assertStringEquals("c=1025, b=1", $noinline$shortToCharToBytePrint((short) 1025));
+    assertStringEquals("c=1023, b=-1", $noinline$shortToCharToBytePrint((short) 1023));
+    assertStringEquals("c=65535, b=-1", $noinline$shortToCharToBytePrint((short) -1));
 
-    assertIntEquals(0x21, longAnd0xffToByte(0x1234432112344321L));
-    assertIntEquals(0, longAnd0xffToByte(Long.MIN_VALUE));
-    assertIntEquals(-1, longAnd0xffToByte(Long.MAX_VALUE));
-    assertIntEquals(0x1234, intAnd0x1ffffToChar(0x43211234));
-    assertIntEquals(0, intAnd0x1ffffToChar(Integer.MIN_VALUE));
-    assertIntEquals(Character.MAX_VALUE, intAnd0x1ffffToChar(Integer.MAX_VALUE));
-    assertIntEquals(0x4321, intAnd0x17fffToShort(0x87654321));
-    assertIntEquals(0x0888, intAnd0x17fffToShort(0x88888888));
-    assertIntEquals(0, intAnd0x17fffToShort(Integer.MIN_VALUE));
-    assertIntEquals(Short.MAX_VALUE, intAnd0x17fffToShort(Integer.MAX_VALUE));
+    assertIntEquals(0x21, $noinline$longAnd0xffToByte(0x1234432112344321L));
+    assertIntEquals(0, $noinline$longAnd0xffToByte(Long.MIN_VALUE));
+    assertIntEquals(-1, $noinline$longAnd0xffToByte(Long.MAX_VALUE));
+    assertIntEquals(0x1234, $noinline$intAnd0x1ffffToChar(0x43211234));
+    assertIntEquals(0, $noinline$intAnd0x1ffffToChar(Integer.MIN_VALUE));
+    assertIntEquals(Character.MAX_VALUE, $noinline$intAnd0x1ffffToChar(Integer.MAX_VALUE));
+    assertIntEquals(0x4321, $noinline$intAnd0x17fffToShort(0x87654321));
+    assertIntEquals(0x0888, $noinline$intAnd0x17fffToShort(0x88888888));
+    assertIntEquals(0, $noinline$intAnd0x17fffToShort(Integer.MIN_VALUE));
+    assertIntEquals(Short.MAX_VALUE, $noinline$intAnd0x17fffToShort(Integer.MAX_VALUE));
 
-    assertDoubleEquals(0.0, shortAnd0xffffToShortToDouble((short) 0));
-    assertDoubleEquals(1.0, shortAnd0xffffToShortToDouble((short) 1));
-    assertDoubleEquals(-2.0, shortAnd0xffffToShortToDouble((short) -2));
-    assertDoubleEquals(12345.0, shortAnd0xffffToShortToDouble((short) 12345));
-    assertDoubleEquals((double)Short.MAX_VALUE, shortAnd0xffffToShortToDouble(Short.MAX_VALUE));
-    assertDoubleEquals((double)Short.MIN_VALUE, shortAnd0xffffToShortToDouble(Short.MIN_VALUE));
+    assertDoubleEquals(0.0, $noinline$shortAnd0xffffToShortToDouble((short) 0));
+    assertDoubleEquals(1.0, $noinline$shortAnd0xffffToShortToDouble((short) 1));
+    assertDoubleEquals(-2.0, $noinline$shortAnd0xffffToShortToDouble((short) -2));
+    assertDoubleEquals(12345.0, $noinline$shortAnd0xffffToShortToDouble((short) 12345));
+    assertDoubleEquals((double)Short.MAX_VALUE,
+                       $noinline$shortAnd0xffffToShortToDouble(Short.MAX_VALUE));
+    assertDoubleEquals((double)Short.MIN_VALUE,
+                       $noinline$shortAnd0xffffToShortToDouble(Short.MIN_VALUE));
 
-    assertIntEquals(intReverseCondition(41), 13);
-    assertIntEquals(intReverseConditionNaN(-5), 13);
+    assertIntEquals(13, $noinline$intReverseCondition(41));
+    assertIntEquals(13, $noinline$intReverseConditionNaN(-5));
 
     for (String condition : new String[] { "Equal", "NotEqual" }) {
       for (String constant : new String[] { "True", "False" }) {
         for (String side : new String[] { "Rhs", "Lhs" }) {
           String name = condition + constant + side;
-          assertIntEquals(runSmaliTest(name, true), 5);
-          assertIntEquals(runSmaliTest(name, false), 3);
+          assertIntEquals(5, $noinline$runSmaliTest(name, true));
+          assertIntEquals(3, $noinline$runSmaliTest(name, false));
         }
       }
     }
+
+    assertIntEquals(0x5e6f7808, $noinline$intUnnecessaryShiftMasking(0xabcdef01, 3));
+    assertIntEquals(0x5e6f7808, $noinline$intUnnecessaryShiftMasking(0xabcdef01, 3 + 32));
+    assertLongEquals(0xffffffffffffeaf3L, $noinline$longUnnecessaryShiftMasking(0xabcdef0123456789L, 50));
+    assertLongEquals(0xffffffffffffeaf3L, $noinline$longUnnecessaryShiftMasking(0xabcdef0123456789L, 50 + 64));
+    assertIntEquals(0x2af37b, $noinline$intUnnecessaryWiderShiftMasking(0xabcdef01, 10));
+    assertIntEquals(0x2af37b, $noinline$intUnnecessaryWiderShiftMasking(0xabcdef01, 10 + 128));
+    assertLongEquals(0xaf37bc048d159e24L, $noinline$longSmallerShiftMasking(0xabcdef0123456789L, 2));
+    assertLongEquals(0xaf37bc048d159e24L, $noinline$longSmallerShiftMasking(0xabcdef0123456789L, 2 + 256));
+    assertIntEquals(0xfffd5e7c, $noinline$otherUseOfUnnecessaryShiftMasking(0xabcdef01, 13));
+    assertIntEquals(0xfffd5e7c, $noinline$otherUseOfUnnecessaryShiftMasking(0xabcdef01, 13 + 512));
   }
 
   private static boolean $inline$true() { return true; }
diff --git a/test/608-checker-unresolved-lse/expected.txt b/test/608-checker-unresolved-lse/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/608-checker-unresolved-lse/expected.txt
diff --git a/test/608-checker-unresolved-lse/info.txt b/test/608-checker-unresolved-lse/info.txt
new file mode 100644
index 0000000..466d5f4
--- /dev/null
+++ b/test/608-checker-unresolved-lse/info.txt
@@ -0,0 +1,3 @@
+Regression test for the load store elimination optimization,
+which used to wrongly remove field stores in the presence of
+unresolved accesses.
diff --git a/test/608-checker-unresolved-lse/run b/test/608-checker-unresolved-lse/run
new file mode 100644
index 0000000..226891f
--- /dev/null
+++ b/test/608-checker-unresolved-lse/run
@@ -0,0 +1,18 @@
+#!/bin/bash
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Use secondary switch to add secondary dex file to class path.
+exec ${RUN} "${@}" --secondary
diff --git a/test/608-checker-unresolved-lse/src-dex2oat-unresolved/MissingSuperClass.java b/test/608-checker-unresolved-lse/src-dex2oat-unresolved/MissingSuperClass.java
new file mode 100644
index 0000000..b11b9be
--- /dev/null
+++ b/test/608-checker-unresolved-lse/src-dex2oat-unresolved/MissingSuperClass.java
@@ -0,0 +1,18 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class MissingSuperClass {
+}
diff --git a/test/608-checker-unresolved-lse/src/Main.java b/test/608-checker-unresolved-lse/src/Main.java
new file mode 100644
index 0000000..c6f8854
--- /dev/null
+++ b/test/608-checker-unresolved-lse/src/Main.java
@@ -0,0 +1,127 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// We make Main extend an unresolved super class. This will lead to an
+// unresolved access to Foo.field, as we won't know if Main can access
+// a package private field.
+public class Main extends MissingSuperClass {
+
+  public static void main(String[] args) {
+    instanceFieldTest();
+    staticFieldTest();
+    instanceFieldTest2();
+  }
+
+  /// CHECK-START: void Main.instanceFieldTest() inliner (before)
+  /// CHECK-NOT:    InstanceFieldSet
+
+  /// CHECK-START: void Main.instanceFieldTest() inliner (after)
+  /// CHECK:        InstanceFieldSet
+  /// CHECK:        UnresolvedInstanceFieldGet
+
+  // Load store elimination used to remove the InstanceFieldSet, thinking
+  // that the UnresolvedInstanceFieldGet was not related. However inlining
+  // can put you in a situation where the UnresolvedInstanceFieldGet resolves
+  // to the same field as the one in InstanceFieldSet. So the InstanceFieldSet
+  // must be preserved.
+
+  /// CHECK-START: void Main.instanceFieldTest() load_store_elimination (after)
+  /// CHECK:        InstanceFieldSet
+  /// CHECK:        UnresolvedInstanceFieldGet
+  public static void instanceFieldTest() {
+    Foo f = new Foo();
+    if (f.iField != 42) {
+      throw new Error("Expected 42, got " + f.iField);
+    }
+  }
+
+  /// CHECK-START: void Main.instanceFieldTest2() inliner (before)
+  /// CHECK-NOT:    InstanceFieldSet
+  /// CHECK-NOT:    InstanceFieldGet
+
+  /// CHECK-START: void Main.instanceFieldTest2() inliner (after)
+  /// CHECK:        InstanceFieldSet
+  /// CHECK:        InstanceFieldGet
+  /// CHECK:        UnresolvedInstanceFieldSet
+  /// CHECK:        InstanceFieldGet
+
+  // Load store elimination will eliminate the first InstanceFieldGet because
+  // it simply follows an InstanceFieldSet. It must however not eliminate the second
+  // InstanceFieldGet, as the UnresolvedInstanceFieldSet might resolve to the same
+  // field.
+
+  /// CHECK-START: void Main.instanceFieldTest2() load_store_elimination (after)
+  /// CHECK:        InstanceFieldSet
+  /// CHECK-NOT:    InstanceFieldGet
+  /// CHECK:        UnresolvedInstanceFieldSet
+  /// CHECK:        InstanceFieldGet
+  public static void instanceFieldTest2() {
+    Foo f = new Foo();
+    int a = f.$inline$GetInstanceField();
+    f.iField = 43;
+    a = f.$inline$GetInstanceField();
+    if (a != 43) {
+      throw new Error("Expected 43, got " + a);
+    }
+  }
+
+  /// CHECK-START: void Main.staticFieldTest() inliner (before)
+  /// CHECK-NOT:    StaticFieldSet
+
+  /// CHECK-START: void Main.staticFieldTest() inliner (after)
+  /// CHECK:        StaticFieldSet
+  /// CHECK:        StaticFieldSet
+  /// CHECK:        UnresolvedStaticFieldGet
+
+  /// CHECK-START: void Main.staticFieldTest() load_store_elimination (after)
+  /// CHECK:        StaticFieldSet
+  /// CHECK:        StaticFieldSet
+  /// CHECK:        UnresolvedStaticFieldGet
+  public static void staticFieldTest() {
+    // Ensure Foo is initialized.
+    Foo f = new Foo();
+    f.$inline$StaticSet42();
+    f.$inline$StaticSet43();
+    if (Foo.sField != 43) {
+      throw new Error("Expected 43, got " + Foo.sField);
+    }
+  }
+}
+
+class Foo {
+  // field needs to be package-private to make the access in Main.main
+  // unresolved.
+  int iField;
+  static int sField;
+
+  public void $inline$StaticSet42() {
+    sField = 42;
+  }
+
+  public void $inline$StaticSet43() {
+    sField = 43;
+  }
+
+  public int $inline$GetInstanceField() {
+    return iField;
+  }
+
+  // Constructor needs to be public to get it resolved in Main.main
+  // and therefore inlined.
+  public Foo() {
+    iField = 42;
+  }
+}
diff --git a/test/Android.libarttest.mk b/test/Android.libarttest.mk
index 8598474..01790ae 100644
--- a/test/Android.libarttest.mk
+++ b/test/Android.libarttest.mk
@@ -25,6 +25,7 @@
   004-SignalTest/signaltest.cc \
   004-ReferenceMap/stack_walk_refmap_jni.cc \
   004-StackWalk/stack_walk_jni.cc \
+  004-ThreadStress/thread_stress.cc \
   004-UnsafeTest/unsafe_test.cc \
   044-proxy/native_proxy.cc \
   051-thread/thread_test.cc \
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index f118a76..dd6b6f3 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -294,7 +294,8 @@
   147-stripped-dex-fallback \
   554-jit-profile-file \
   529-checker-unresolved \
-  555-checker-regression-x86const
+  555-checker-regression-x86const \
+  608-checker-unresolved-lse
 
 ifneq (,$(filter no-prebuild,$(PREBUILD_TYPES)))
   ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),no-prebuild, \