Merge "Rename kCall to kCallOnMainOnly"
diff --git a/benchmark/Android.mk b/benchmark/Android.mk
index a4a603a..17ea4da 100644
--- a/benchmark/Android.mk
+++ b/benchmark/Android.mk
@@ -56,7 +56,7 @@
   else # host
     LOCAL_CLANG := $(ART_HOST_CLANG)
     LOCAL_CFLAGS := $(ART_HOST_CFLAGS) $(ART_HOST_DEBUG_CFLAGS)
-    LOCAL_ASFLAGS := $(ART_HOST_ASFLAGS)
+    LOCAL_ASFLAGS := $(ART_HOST_ASFLAGS) $(ART_HOST_DEBUG_ASFLAGS)
     LOCAL_LDLIBS := $(ART_HOST_LDLIBS) -ldl -lpthread
     LOCAL_IS_HOST_MODULE := true
     LOCAL_MULTILIB := both
diff --git a/build/Android.common_build.mk b/build/Android.common_build.mk
index 123bcaa..bd13d16 100644
--- a/build/Android.common_build.mk
+++ b/build/Android.common_build.mk
@@ -34,7 +34,7 @@
 ART_BUILD_HOST_NDEBUG ?= true
 ART_BUILD_HOST_DEBUG ?= true
 
-# Set this to change what opt level Art is built at.
+# Set this to change what opt level ART is built at.
 ART_DEBUG_OPT_FLAG ?= -O2
 ART_NDEBUG_OPT_FLAG ?= -O3
 
@@ -336,6 +336,12 @@
   -DDYNAMIC_ANNOTATIONS_ENABLED=1 \
   -UNDEBUG
 
+# Assembler flags for non-debug ART and ART tools.
+art_non_debug_asflags :=
+
+# Assembler flags for debug ART and ART tools.
+art_debug_asflags := -UNDEBUG
+
 art_host_non_debug_cflags := $(art_non_debug_cflags)
 art_target_non_debug_cflags := $(art_non_debug_cflags)
 
@@ -386,6 +392,11 @@
 ART_HOST_DEBUG_CFLAGS := $(art_debug_cflags)
 ART_TARGET_DEBUG_CFLAGS := $(art_debug_cflags)
 
+ART_HOST_NON_DEBUG_ASFLAGS := $(art_non_debug_asflags)
+ART_TARGET_NON_DEBUG_ASFLAGS := $(art_non_debug_asflags)
+ART_HOST_DEBUG_ASFLAGS := $(art_debug_asflags)
+ART_TARGET_DEBUG_ASFLAGS := $(art_debug_asflags)
+
 ifndef LIBART_IMG_HOST_MIN_BASE_ADDRESS_DELTA
   LIBART_IMG_HOST_MIN_BASE_ADDRESS_DELTA=-0x1000000
 endif
@@ -414,6 +425,8 @@
 art_target_cflags :=
 art_debug_cflags :=
 art_non_debug_cflags :=
+art_debug_asflags :=
+art_non_debug_asflags :=
 art_host_non_debug_cflags :=
 art_target_non_debug_cflags :=
 art_default_gc_type_cflags :=
@@ -435,8 +448,10 @@
   art_target_cflags_ndebug_or_debug := $(1)
   ifeq ($$(art_target_cflags_ndebug_or_debug),debug)
     LOCAL_CFLAGS += $(ART_TARGET_DEBUG_CFLAGS)
+    LOCAL_ASFLAGS += $(ART_TARGET_DEBUG_ASFLAGS)
   else
     LOCAL_CFLAGS += $(ART_TARGET_NON_DEBUG_CFLAGS)
+    LOCAL_ASFLAGS += $(ART_TARGET_NON_DEBUG_ASFLAGS)
   endif
 
   LOCAL_CLANG_CFLAGS := $(ART_TARGET_CLANG_CFLAGS)
diff --git a/build/Android.executable.mk b/build/Android.executable.mk
index cb6d340..157500b 100644
--- a/build/Android.executable.mk
+++ b/build/Android.executable.mk
@@ -90,8 +90,10 @@
     LOCAL_ASFLAGS += $(ART_HOST_ASFLAGS)
     ifeq ($$(art_ndebug_or_debug),debug)
       LOCAL_CFLAGS += $(ART_HOST_DEBUG_CFLAGS)
+      LOCAL_ASFLAGS += $(ART_HOST_DEBUG_ASFLAGS)
     else
       LOCAL_CFLAGS += $(ART_HOST_NON_DEBUG_CFLAGS)
+      LOCAL_ASFLAGS += $(ART_HOST_NON_DEBUG_ASFLAGS)
     endif
     LOCAL_LDLIBS += -lpthread -ldl
     ifeq ($$(art_static_or_shared),static)
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index 009933d..74c3033 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -94,17 +94,17 @@
 
 ART_GTEST_dex2oat_environment_tests_HOST_DEPS := \
   $(HOST_CORE_IMAGE_default_no-pic_64) \
-  $(HOST_CORE_IMAGE_default_no-pic_32)
+  $(HOST_CORE_IMAGE_default_no-pic_32) \
+  $(HOST_OUT_EXECUTABLES)/patchoatd
 ART_GTEST_dex2oat_environment_tests_TARGET_DEPS := \
   $(TARGET_CORE_IMAGE_default_no-pic_64) \
-  $(TARGET_CORE_IMAGE_default_no-pic_32)
+  $(TARGET_CORE_IMAGE_default_no-pic_32) \
+  $(TARGET_OUT_EXECUTABLES)/patchoatd
 
 ART_GTEST_oat_file_assistant_test_HOST_DEPS := \
-   $(ART_GTEST_dex2oat_environment_tests_HOST_DEPS) \
-   $(HOST_OUT_EXECUTABLES)/patchoatd
+  $(ART_GTEST_dex2oat_environment_tests_HOST_DEPS)
 ART_GTEST_oat_file_assistant_test_TARGET_DEPS := \
-   $(ART_GTEST_dex2oat_environment_tests_TARGET_DEPS) \
-   $(TARGET_OUT_EXECUTABLES)/patchoatd
+  $(ART_GTEST_dex2oat_environment_tests_TARGET_DEPS)
 
 
 ART_GTEST_dex2oat_test_HOST_DEPS := \
@@ -679,7 +679,7 @@
   else # host
     LOCAL_CLANG := $$(ART_HOST_CLANG)
     LOCAL_CFLAGS += $$(ART_HOST_CFLAGS) $$(ART_HOST_DEBUG_CFLAGS)
-    LOCAL_ASFLAGS += $$(ART_HOST_ASFLAGS)
+    LOCAL_ASFLAGS += $$(ART_HOST_ASFLAGS) $$(ART_HOST_DEBUG_ASFLAGS)
     LOCAL_SHARED_LIBRARIES += libicuuc-host libicui18n-host libnativehelper libziparchive-host libz-host libvixl
     LOCAL_LDLIBS := $(ART_HOST_LDLIBS) -lpthread -ldl
     LOCAL_IS_HOST_MODULE := true
diff --git a/compiler/Android.mk b/compiler/Android.mk
index 4ec7d72..02c176c 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -262,8 +262,10 @@
     endif
     ifeq ($$(art_ndebug_or_debug),debug)
       LOCAL_CFLAGS += $(ART_HOST_DEBUG_CFLAGS)
+      LOCAL_ASFLAGS += $(ART_HOST_DEBUG_ASFLAGS)
     else
       LOCAL_CFLAGS += $(ART_HOST_NON_DEBUG_CFLAGS)
+      LOCAL_ASFLAGS += $(ART_HOST_NON_DEBUG_ASFLAGS)
     endif
   endif
 
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 474530a..4c0095d 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -2521,11 +2521,28 @@
                                                               true);
     }
     // Create the conflict tables.
-    if (!klass->IsTemp() && klass->ShouldHaveEmbeddedImtAndVTable()) {
-      Runtime::Current()->GetClassLinker()->FillIMTAndConflictTables(klass);
-    }
+    FillIMTAndConflictTables(klass);
     return true;
   }
+
+ private:
+  void FillIMTAndConflictTables(mirror::Class* klass) SHARED_REQUIRES(Locks::mutator_lock_) {
+    if (!klass->ShouldHaveImt()) {
+      return;
+    }
+    if (visited_classes_.find(klass) != visited_classes_.end()) {
+      return;
+    }
+    if (klass->HasSuperClass()) {
+      FillIMTAndConflictTables(klass->GetSuperClass());
+    }
+    if (!klass->IsTemp()) {
+      Runtime::Current()->GetClassLinker()->FillIMTAndConflictTables(klass);
+    }
+    visited_classes_.insert(klass);
+  }
+
+  std::set<mirror::Class*> visited_classes_;
 };
 
 void CompilerDriver::InitializeClasses(jobject class_loader,
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index da10568..063eb11 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -1232,9 +1232,10 @@
       }
       // Assign offsets for all runtime methods in the IMT since these may hold conflict tables
       // live.
-      if (as_klass->ShouldHaveEmbeddedImtAndVTable()) {
-        for (size_t i = 0; i < mirror::Class::kImtSize; ++i) {
-          ArtMethod* imt_method = as_klass->GetEmbeddedImTableEntry(i, target_ptr_size_);
+      if (as_klass->ShouldHaveImt()) {
+        ImTable* imt = as_klass->GetImt(target_ptr_size_);
+        for (size_t i = 0; i < ImTable::kSize; ++i) {
+          ArtMethod* imt_method = imt->Get(i, target_ptr_size_);
           DCHECK(imt_method != nullptr);
           if (imt_method->IsRuntimeMethod() &&
               !IsInBootImage(imt_method) &&
@@ -1243,6 +1244,11 @@
           }
         }
       }
+
+      if (as_klass->ShouldHaveImt()) {
+        ImTable* imt = as_klass->GetImt(target_ptr_size_);
+        TryAssignImTableOffset(imt, oat_index);
+      }
     } else if (h_obj->IsObjectArray()) {
       // Walk elements of an object array.
       int32_t length = h_obj->AsObjectArray<mirror::Object>()->GetLength();
@@ -1269,6 +1275,23 @@
   return native_object_relocations_.find(ptr) != native_object_relocations_.end();
 }
 
+void ImageWriter::TryAssignImTableOffset(ImTable* imt, size_t oat_index) {
+  // No offset, or already assigned.
+  if (imt == nullptr || IsInBootImage(imt) || NativeRelocationAssigned(imt)) {
+    return;
+  }
+  // If the method is a conflict method we also want to assign the conflict table offset.
+  ImageInfo& image_info = GetImageInfo(oat_index);
+  const size_t size = ImTable::SizeInBytes(target_ptr_size_);
+  native_object_relocations_.emplace(
+      imt,
+      NativeObjectRelocation {
+          oat_index,
+          image_info.bin_slot_sizes_[kBinImTable],
+          kNativeObjectRelocationTypeIMTable});
+  image_info.bin_slot_sizes_[kBinImTable] += size;
+}
+
 void ImageWriter::TryAssignConflictTableOffset(ImtConflictTable* table, size_t oat_index) {
   // No offset, or already assigned.
   if (table == nullptr || NativeRelocationAssigned(table)) {
@@ -1391,6 +1414,7 @@
           bin_offset = RoundUp(bin_offset, method_alignment);
           break;
         }
+        case kBinImTable:
         case kBinIMTConflictTable: {
           bin_offset = RoundUp(bin_offset, target_ptr_size_);
           break;
@@ -1461,6 +1485,10 @@
       bin_slot_offsets_[kBinArtMethodClean],
       bin_slot_sizes_[kBinArtMethodClean] + bin_slot_sizes_[kBinArtMethodDirty]);
 
+  // IMT section.
+  ImageSection* imt_section = &out_sections[ImageHeader::kSectionImTables];
+  *imt_section = ImageSection(bin_slot_offsets_[kBinImTable], bin_slot_sizes_[kBinImTable]);
+
   // Conflict tables section.
   ImageSection* imt_conflict_tables_section = &out_sections[ImageHeader::kSectionIMTConflictTables];
   *imt_conflict_tables_section = ImageSection(bin_slot_offsets_[kBinIMTConflictTable],
@@ -1585,6 +1613,13 @@
   ImageWriter* const image_writer_;
 };
 
+void ImageWriter::CopyAndFixupImTable(ImTable* orig, ImTable* copy) {
+  for (size_t i = 0; i < ImTable::kSize; ++i) {
+    ArtMethod* method = orig->Get(i, target_ptr_size_);
+    copy->Set(i, NativeLocationInImage(method), target_ptr_size_);
+  }
+}
+
 void ImageWriter::CopyAndFixupImtConflictTable(ImtConflictTable* orig, ImtConflictTable* copy) {
   const size_t count = orig->NumEntries(target_ptr_size_);
   for (size_t i = 0; i < count; ++i) {
@@ -1642,6 +1677,12 @@
       case kNativeObjectRelocationTypeDexCacheArray:
         // Nothing to copy here, everything is done in FixupDexCache().
         break;
+      case kNativeObjectRelocationTypeIMTable: {
+        ImTable* orig_imt = reinterpret_cast<ImTable*>(pair.first);
+        ImTable* dest_imt = reinterpret_cast<ImTable*>(dest);
+        CopyAndFixupImTable(orig_imt, dest_imt);
+        break;
+      }
       case kNativeObjectRelocationTypeIMTConflictTable: {
         auto* orig_table = reinterpret_cast<ImtConflictTable*>(pair.first);
         CopyAndFixupImtConflictTable(
@@ -1850,13 +1891,25 @@
 }
 
 template <typename T>
+std::string PrettyPrint(T* ptr) SHARED_REQUIRES(Locks::mutator_lock_) {
+  std::ostringstream oss;
+  oss << ptr;
+  return oss.str();
+}
+
+template <>
+std::string PrettyPrint(ArtMethod* method) SHARED_REQUIRES(Locks::mutator_lock_) {
+  return PrettyMethod(method);
+}
+
+template <typename T>
 T* ImageWriter::NativeLocationInImage(T* obj) {
   if (obj == nullptr || IsInBootImage(obj)) {
     return obj;
   } else {
     auto it = native_object_relocations_.find(obj);
-    CHECK(it != native_object_relocations_.end()) << obj << " spaces "
-        << Runtime::Current()->GetHeap()->DumpSpaces();
+    CHECK(it != native_object_relocations_.end()) << obj << " " << PrettyPrint(obj)
+        << " spaces " << Runtime::Current()->GetHeap()->DumpSpaces();
     const NativeObjectRelocation& relocation = it->second;
     ImageInfo& image_info = GetImageInfo(relocation.oat_index);
     return reinterpret_cast<T*>(image_info.image_begin_ + relocation.offset);
@@ -2210,6 +2263,8 @@
       return kBinDexCacheArray;
     case kNativeObjectRelocationTypeRuntimeMethod:
       return kBinRuntimeMethod;
+    case kNativeObjectRelocationTypeIMTable:
+      return kBinImTable;
     case kNativeObjectRelocationTypeIMTConflictTable:
       return kBinIMTConflictTable;
   }
diff --git a/compiler/image_writer.h b/compiler/image_writer.h
index 51976c5..1efdc22 100644
--- a/compiler/image_writer.h
+++ b/compiler/image_writer.h
@@ -169,6 +169,8 @@
     // ArtMethods may be dirty if the class has native methods or a declaring class that isn't
     // initialized.
     kBinArtMethodDirty,
+    // IMT (clean)
+    kBinImTable,
     // Conflict tables (clean).
     kBinIMTConflictTable,
     // Runtime methods (always clean, do not have a length prefix array).
@@ -191,6 +193,7 @@
     kNativeObjectRelocationTypeArtMethodDirty,
     kNativeObjectRelocationTypeArtMethodArrayDirty,
     kNativeObjectRelocationTypeRuntimeMethod,
+    kNativeObjectRelocationTypeIMTable,
     kNativeObjectRelocationTypeIMTConflictTable,
     kNativeObjectRelocationTypeDexCacheArray,
   };
@@ -401,6 +404,7 @@
   void CopyAndFixupObject(mirror::Object* obj) SHARED_REQUIRES(Locks::mutator_lock_);
   void CopyAndFixupMethod(ArtMethod* orig, ArtMethod* copy, const ImageInfo& image_info)
       SHARED_REQUIRES(Locks::mutator_lock_);
+  void CopyAndFixupImTable(ImTable* orig, ImTable* copy) SHARED_REQUIRES(Locks::mutator_lock_);
   void CopyAndFixupImtConflictTable(ImtConflictTable* orig, ImtConflictTable* copy)
       SHARED_REQUIRES(Locks::mutator_lock_);
   void FixupClass(mirror::Class* orig, mirror::Class* copy)
@@ -433,6 +437,8 @@
                           size_t oat_index)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  void TryAssignImTableOffset(ImTable* imt, size_t oat_index) SHARED_REQUIRES(Locks::mutator_lock_);
+
   // Assign the offset for an IMT conflict table. Does nothing if the table already has a native
   // relocation.
   void TryAssignConflictTableOffset(ImtConflictTable* table, size_t oat_index)
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 9b49a44..0ac621f 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -1889,8 +1889,6 @@
   LocationSummary* locations = invoke->GetLocations();
   Register temp = locations->GetTemp(0).AsRegister<Register>();
   Register hidden_reg = locations->GetTemp(1).AsRegister<Register>();
-  uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset(
-      invoke->GetImtIndex() % mirror::Class::kImtSize, kArmPointerSize).Uint32Value();
   Location receiver = locations->InAt(0);
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
 
@@ -1916,10 +1914,14 @@
   // intact/accessible until the end of the marking phase (the
   // concurrent copying collector may not in the future).
   __ MaybeUnpoisonHeapReference(temp);
+  __ LoadFromOffset(kLoadWord, temp, temp,
+        mirror::Class::ImtPtrOffset(kArmPointerSize).Uint32Value());
+  uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
+      invoke->GetImtIndex() % ImTable::kSize, kArmPointerSize));
   // temp = temp->GetImtEntryAt(method_offset);
+  __ LoadFromOffset(kLoadWord, temp, temp, method_offset);
   uint32_t entry_point =
       ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmWordSize).Int32Value();
-  __ LoadFromOffset(kLoadWord, temp, temp, method_offset);
   // LR = temp->GetEntryPoint();
   __ LoadFromOffset(kLoadWord, LR, temp, entry_point);
   // LR();
@@ -2290,8 +2292,7 @@
         case Primitive::kPrimFloat: {
           // Processing a Dex `float-to-int' instruction.
           SRegister temp = locations->GetTemp(0).AsFpuRegisterPairLow<SRegister>();
-          __ vmovs(temp, in.AsFpuRegister<SRegister>());
-          __ vcvtis(temp, temp);
+          __ vcvtis(temp, in.AsFpuRegister<SRegister>());
           __ vmovrs(out.AsRegister<Register>(), temp);
           break;
         }
@@ -2299,9 +2300,7 @@
         case Primitive::kPrimDouble: {
           // Processing a Dex `double-to-int' instruction.
           SRegister temp_s = locations->GetTemp(0).AsFpuRegisterPairLow<SRegister>();
-          DRegister temp_d = FromLowSToD(temp_s);
-          __ vmovd(temp_d, FromLowSToD(in.AsFpuRegisterPairLow<SRegister>()));
-          __ vcvtid(temp_s, temp_d);
+          __ vcvtid(temp_s, FromLowSToD(in.AsFpuRegisterPairLow<SRegister>()));
           __ vmovrs(out.AsRegister<Register>(), temp_s);
           break;
         }
@@ -6959,8 +6958,11 @@
     method_offset = mirror::Class::EmbeddedVTableEntryOffset(
         instruction->GetIndex(), kArmPointerSize).SizeValue();
   } else {
-    method_offset = mirror::Class::EmbeddedImTableEntryOffset(
-        instruction->GetIndex() % mirror::Class::kImtSize, kArmPointerSize).Uint32Value();
+    __ LoadFromOffset(kLoadWord, locations->Out().AsRegister<Register>(),
+        locations->InAt(0).AsRegister<Register>(),
+        mirror::Class::ImtPtrOffset(kArmPointerSize).Uint32Value());
+    method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
+        instruction->GetIndex() % ImTable::kSize, kArmPointerSize));
   }
   __ LoadFromOffset(kLoadWord,
                     locations->Out().AsRegister<Register>(),
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 02f45ab..84cb049 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -3506,8 +3506,6 @@
   // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
   LocationSummary* locations = invoke->GetLocations();
   Register temp = XRegisterFrom(locations->GetTemp(0));
-  uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset(
-      invoke->GetImtIndex() % mirror::Class::kImtSize, kArm64PointerSize).Uint32Value();
   Location receiver = locations->InAt(0);
   Offset class_offset = mirror::Object::ClassOffset();
   Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64WordSize);
@@ -3537,6 +3535,10 @@
   // intact/accessible until the end of the marking phase (the
   // concurrent copying collector may not in the future).
   GetAssembler()->MaybeUnpoisonHeapReference(temp.W());
+  __ Ldr(temp,
+      MemOperand(temp, mirror::Class::ImtPtrOffset(kArm64PointerSize).Uint32Value()));
+  uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
+      invoke->GetImtIndex() % ImTable::kSize, kArm64PointerSize));
   // temp = temp->GetImtEntryAt(method_offset);
   __ Ldr(temp, MemOperand(temp, method_offset));
   // lr = temp->GetEntryPoint();
@@ -5354,8 +5356,10 @@
     method_offset = mirror::Class::EmbeddedVTableEntryOffset(
         instruction->GetIndex(), kArm64PointerSize).SizeValue();
   } else {
-    method_offset = mirror::Class::EmbeddedImTableEntryOffset(
-        instruction->GetIndex() % mirror::Class::kImtSize, kArm64PointerSize).Uint32Value();
+    __ Ldr(XRegisterFrom(locations->Out()), MemOperand(XRegisterFrom(locations->InAt(0)),
+        mirror::Class::ImtPtrOffset(kArm64PointerSize).Uint32Value()));
+    method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
+        instruction->GetIndex() % ImTable::kSize, kArm64PointerSize));
   }
   __ Ldr(XRegisterFrom(locations->Out()),
          MemOperand(XRegisterFrom(locations->InAt(0)), method_offset));
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index e49d6f2..1038b2d 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -3772,8 +3772,6 @@
 void InstructionCodeGeneratorMIPS::VisitInvokeInterface(HInvokeInterface* invoke) {
   // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
   Register temp = invoke->GetLocations()->GetTemp(0).AsRegister<Register>();
-  uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset(
-      invoke->GetImtIndex() % mirror::Class::kImtSize, kMipsPointerSize).Uint32Value();
   Location receiver = invoke->GetLocations()->InAt(0);
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMipsWordSize);
@@ -3790,6 +3788,10 @@
     __ LoadFromOffset(kLoadWord, temp, receiver.AsRegister<Register>(), class_offset);
   }
   codegen_->MaybeRecordImplicitNullCheck(invoke);
+  __ LoadFromOffset(kLoadWord, temp, temp,
+      mirror::Class::ImtPtrOffset(kMipsPointerSize).Uint32Value());
+  uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
+      invoke->GetImtIndex() % ImTable::kSize, kMipsPointerSize));
   // temp = temp->GetImtEntryAt(method_offset);
   __ LoadFromOffset(kLoadWord, temp, temp, method_offset);
   // T9 = temp->GetEntryPoint();
@@ -5383,8 +5385,12 @@
     method_offset = mirror::Class::EmbeddedVTableEntryOffset(
         instruction->GetIndex(), kMipsPointerSize).SizeValue();
   } else {
-    method_offset = mirror::Class::EmbeddedImTableEntryOffset(
-        instruction->GetIndex() % mirror::Class::kImtSize, kMipsPointerSize).Uint32Value();
+    __ LoadFromOffset(kLoadWord,
+                      locations->Out().AsRegister<Register>(),
+                      locations->InAt(0).AsRegister<Register>(),
+                      mirror::Class::ImtPtrOffset(kMipsPointerSize).Uint32Value());
+    method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
+        instruction->GetIndex() % ImTable::kSize, kMipsPointerSize));
   }
   __ LoadFromOffset(kLoadWord,
                     locations->Out().AsRegister<Register>(),
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 621cd1e..aa1ba84 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -2932,8 +2932,6 @@
 void InstructionCodeGeneratorMIPS64::VisitInvokeInterface(HInvokeInterface* invoke) {
   // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
   GpuRegister temp = invoke->GetLocations()->GetTemp(0).AsRegister<GpuRegister>();
-  uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset(
-      invoke->GetImtIndex() % mirror::Class::kImtSize, kMips64PointerSize).Uint32Value();
   Location receiver = invoke->GetLocations()->InAt(0);
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMips64DoublewordSize);
@@ -2950,6 +2948,10 @@
     __ LoadFromOffset(kLoadUnsignedWord, temp, receiver.AsRegister<GpuRegister>(), class_offset);
   }
   codegen_->MaybeRecordImplicitNullCheck(invoke);
+  __ LoadFromOffset(kLoadDoubleword, temp, temp,
+      mirror::Class::ImtPtrOffset(kMips64PointerSize).Uint32Value());
+  uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
+      invoke->GetImtIndex() % ImTable::kSize, kMips64PointerSize));
   // temp = temp->GetImtEntryAt(method_offset);
   __ LoadFromOffset(kLoadDoubleword, temp, temp, method_offset);
   // T9 = temp->GetEntryPoint();
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index c45739d..2ded562 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -2027,8 +2027,6 @@
   LocationSummary* locations = invoke->GetLocations();
   Register temp = locations->GetTemp(0).AsRegister<Register>();
   XmmRegister hidden_reg = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
-  uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset(
-      invoke->GetImtIndex() % mirror::Class::kImtSize, kX86PointerSize).Uint32Value();
   Location receiver = locations->InAt(0);
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
 
@@ -2055,7 +2053,12 @@
   // intact/accessible until the end of the marking phase (the
   // concurrent copying collector may not in the future).
   __ MaybeUnpoisonHeapReference(temp);
+  // temp = temp->GetAddressOfIMT()
+  __ movl(temp,
+      Address(temp, mirror::Class::ImtPtrOffset(kX86PointerSize).Uint32Value()));
   // temp = temp->GetImtEntryAt(method_offset);
+  uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
+      invoke->GetImtIndex() % ImTable::kSize, kX86PointerSize));
   __ movl(temp, Address(temp, method_offset));
   // call temp->GetEntryPoint();
   __ call(Address(temp,
@@ -4075,8 +4078,12 @@
     method_offset = mirror::Class::EmbeddedVTableEntryOffset(
         instruction->GetIndex(), kX86PointerSize).SizeValue();
   } else {
-    method_offset = mirror::Class::EmbeddedImTableEntryOffset(
-        instruction->GetIndex() % mirror::Class::kImtSize, kX86PointerSize).Uint32Value();
+    __ movl(locations->InAt(0).AsRegister<Register>(),
+        Address(locations->InAt(0).AsRegister<Register>(),
+        mirror::Class::ImtPtrOffset(kX86PointerSize).Uint32Value()));
+    // temp = temp->GetImtEntryAt(method_offset);
+    method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
+        instruction->GetIndex() % ImTable::kSize, kX86PointerSize));
   }
   __ movl(locations->Out().AsRegister<Register>(),
           Address(locations->InAt(0).AsRegister<Register>(), method_offset));
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index f05dbba..fd7d483 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -2257,8 +2257,6 @@
   LocationSummary* locations = invoke->GetLocations();
   CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
   CpuRegister hidden_reg = locations->GetTemp(1).AsRegister<CpuRegister>();
-  uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset(
-      invoke->GetImtIndex() % mirror::Class::kImtSize, kX86_64PointerSize).Uint32Value();
   Location receiver = locations->InAt(0);
   size_t class_offset = mirror::Object::ClassOffset().SizeValue();
 
@@ -2284,6 +2282,12 @@
   // intact/accessible until the end of the marking phase (the
   // concurrent copying collector may not in the future).
   __ MaybeUnpoisonHeapReference(temp);
+  // temp = temp->GetAddressOfIMT()
+  __ movq(temp,
+      Address(temp, mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value()));
+  // temp = temp->GetImtEntryAt(method_offset);
+  uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
+      invoke->GetImtIndex() % ImTable::kSize, kX86_64PointerSize));
   // temp = temp->GetImtEntryAt(method_offset);
   __ movq(temp, Address(temp, method_offset));
   // call temp->GetEntryPoint();
@@ -4007,8 +4011,11 @@
     method_offset = mirror::Class::EmbeddedVTableEntryOffset(
         instruction->GetIndex(), kX86_64PointerSize).SizeValue();
   } else {
-    method_offset = mirror::Class::EmbeddedImTableEntryOffset(
-        instruction->GetIndex() % mirror::Class::kImtSize, kX86_64PointerSize).Uint32Value();
+    __ movq(locations->Out().AsRegister<CpuRegister>(),
+            Address(locations->InAt(0).AsRegister<CpuRegister>(),
+            mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value()));
+    method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
+        instruction->GetIndex() % ImTable::kSize, kX86_64PointerSize));
   }
   __ movq(locations->Out().AsRegister<CpuRegister>(),
           Address(locations->InAt(0).AsRegister<CpuRegister>(), method_offset));
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index f9e78b0..6c1292c 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -656,8 +656,8 @@
     }
     ArtMethod* new_method = nullptr;
     if (invoke_instruction->IsInvokeInterface()) {
-      new_method = ic.GetTypeAt(i)->GetEmbeddedImTableEntry(
-          method_index % mirror::Class::kImtSize, pointer_size);
+      new_method = ic.GetTypeAt(i)->GetImt(pointer_size)->Get(
+          method_index % ImTable::kSize, pointer_size);
       if (new_method->IsRuntimeMethod()) {
         // Bail out as soon as we see a conflict trampoline in one of the target's
         // interface table.
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index dda779c..16438a7 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -608,54 +608,66 @@
   __ Frintn(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0)));
 }
 
-static void CreateFPToIntPlusTempLocations(ArenaAllocator* arena, HInvoke* invoke) {
+static void CreateFPToIntPlusFPTempLocations(ArenaAllocator* arena, HInvoke* invoke) {
   LocationSummary* locations = new (arena) LocationSummary(invoke,
                                                            LocationSummary::kNoCall,
                                                            kIntrinsified);
   locations->SetInAt(0, Location::RequiresFpuRegister());
   locations->SetOut(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresFpuRegister());
 }
 
-static void GenMathRound(LocationSummary* locations,
-                         bool is_double,
-                         vixl::MacroAssembler* masm) {
-  FPRegister in_reg = is_double ?
-      DRegisterFrom(locations->InAt(0)) : SRegisterFrom(locations->InAt(0));
-  Register out_reg = is_double ?
-      XRegisterFrom(locations->Out()) : WRegisterFrom(locations->Out());
-  UseScratchRegisterScope temps(masm);
-  FPRegister temp1_reg = temps.AcquireSameSizeAs(in_reg);
+static void GenMathRound(HInvoke* invoke, bool is_double, vixl::MacroAssembler* masm) {
+  // Java 8 API definition for Math.round():
+  // Return the closest long or int to the argument, with ties rounding to positive infinity.
+  //
+  // There is no single instruction in ARMv8 that can support the above definition.
+  // We choose to use FCVTAS here, because it has closest semantic.
+  // FCVTAS performs rounding to nearest integer, ties away from zero.
+  // For most inputs (positive values, zero or NaN), this instruction is enough.
+  // We only need a few handling code after FCVTAS if the input is negative half value.
+  //
+  // The reason why we didn't choose FCVTPS instruction here is that
+  // although it performs rounding toward positive infinity, it doesn't perform rounding to nearest.
+  // For example, FCVTPS(-1.9) = -1 and FCVTPS(1.1) = 2.
+  // If we were using this instruction, for most inputs, more handling code would be needed.
+  LocationSummary* l = invoke->GetLocations();
+  FPRegister in_reg = is_double ? DRegisterFrom(l->InAt(0)) : SRegisterFrom(l->InAt(0));
+  FPRegister tmp_fp = is_double ? DRegisterFrom(l->GetTemp(0)) : SRegisterFrom(l->GetTemp(0));
+  Register out_reg = is_double ? XRegisterFrom(l->Out()) : WRegisterFrom(l->Out());
+  vixl::Label done;
 
-  // 0.5 can be encoded as an immediate, so use fmov.
-  if (is_double) {
-    __ Fmov(temp1_reg, static_cast<double>(0.5));
-  } else {
-    __ Fmov(temp1_reg, static_cast<float>(0.5));
-  }
-  __ Fadd(temp1_reg, in_reg, temp1_reg);
-  __ Fcvtms(out_reg, temp1_reg);
+  // Round to nearest integer, ties away from zero.
+  __ Fcvtas(out_reg, in_reg);
+
+  // For positive values, zero or NaN inputs, rounding is done.
+  __ Tbz(out_reg, out_reg.size() - 1, &done);
+
+  // Handle input < 0 cases.
+  // If input is negative but not a tie, previous result (round to nearest) is valid.
+  // If input is a negative tie, out_reg += 1.
+  __ Frinta(tmp_fp, in_reg);
+  __ Fsub(tmp_fp, in_reg, tmp_fp);
+  __ Fcmp(tmp_fp, 0.5);
+  __ Cinc(out_reg, out_reg, eq);
+
+  __ Bind(&done);
 }
 
 void IntrinsicLocationsBuilderARM64::VisitMathRoundDouble(HInvoke* invoke) {
-  // See intrinsics.h.
-  if (kRoundIsPlusPointFive) {
-    CreateFPToIntPlusTempLocations(arena_, invoke);
-  }
+  CreateFPToIntPlusFPTempLocations(arena_, invoke);
 }
 
 void IntrinsicCodeGeneratorARM64::VisitMathRoundDouble(HInvoke* invoke) {
-  GenMathRound(invoke->GetLocations(), /* is_double */ true, GetVIXLAssembler());
+  GenMathRound(invoke, /* is_double */ true, GetVIXLAssembler());
 }
 
 void IntrinsicLocationsBuilderARM64::VisitMathRoundFloat(HInvoke* invoke) {
-  // See intrinsics.h.
-  if (kRoundIsPlusPointFive) {
-    CreateFPToIntPlusTempLocations(arena_, invoke);
-  }
+  CreateFPToIntPlusFPTempLocations(arena_, invoke);
 }
 
 void IntrinsicCodeGeneratorARM64::VisitMathRoundFloat(HInvoke* invoke) {
-  GenMathRound(invoke->GetLocations(), /* is_double */ false, GetVIXLAssembler());
+  GenMathRound(invoke, /* is_double */ false, GetVIXLAssembler());
 }
 
 void IntrinsicLocationsBuilderARM64::VisitMemoryPeekByte(HInvoke* invoke) {
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index b3b31e8..a9807bd 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -876,6 +876,151 @@
   GenRoundingMode(invoke->GetLocations(), kCeil, GetAssembler());
 }
 
+static void GenRound(LocationSummary* locations, Mips64Assembler* assembler, Primitive::Type type) {
+  FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>();
+  FpuRegister half = locations->GetTemp(0).AsFpuRegister<FpuRegister>();
+  GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+
+  DCHECK(type == Primitive::kPrimFloat || type == Primitive::kPrimDouble);
+
+  Mips64Label done;
+  Mips64Label finite;
+  Mips64Label add;
+
+  // if (in.isNaN) {
+  //   return 0;
+  // }
+  //
+  // out = floor(in);
+  //
+  // /*
+  //  * TODO: Amend this code when emulator FCSR.NAN2008=1 bug is fixed.
+  //  *
+  //  * Starting with MIPSR6, which always sets FCSR.NAN2008=1, negative
+  //  * numbers which are too large to be represented in a 32-/64-bit
+  //  * signed integer will be processed by floor.X.Y to output
+  //  * Integer.MIN_VALUE/Long.MIN_VALUE, and will no longer be
+  //  * processed by this "if" statement.
+  //  *
+  //  * However, this bug in the 64-bit MIPS emulator causes the
+  //  * behavior of floor.X.Y to be the same as pre-R6 implementations
+  //  * of MIPS64.  When that bug is fixed this logic should be amended.
+  //  */
+  // if (out == MAX_VALUE) {
+  //   TMP = (in < 0.0) ? 1 : 0;
+  //   /*
+  //    * If TMP is 1, then adding it to out will wrap its value from
+  //    * MAX_VALUE to MIN_VALUE.
+  //    */
+  //   return out += TMP;
+  // }
+  //
+  // /*
+  //  * For negative values not handled by the previous "if" statement the
+  //  * test here will correctly set the value of TMP.
+  //  */
+  // TMP = ((in - out) >= 0.5) ? 1 : 0;
+  // return out += TMP;
+
+  // Test for NaN.
+  if (type == Primitive::kPrimDouble) {
+    __ CmpUnD(FTMP, in, in);
+  } else {
+    __ CmpUnS(FTMP, in, in);
+  }
+
+  // Return zero for NaN.
+  __ Move(out, ZERO);
+  __ Bc1nez(FTMP, &done);
+
+  // out = floor(in);
+  if (type == Primitive::kPrimDouble) {
+    __ FloorLD(FTMP, in);
+    __ Dmfc1(out, FTMP);
+  } else {
+    __ FloorWS(FTMP, in);
+    __ Mfc1(out, FTMP);
+  }
+
+  // TMP = (out = java.lang.Integer.MAX_VALUE) ? 1 : 0;
+  if (type == Primitive::kPrimDouble) {
+    __ LoadConst64(AT, std::numeric_limits<int64_t>::max());
+  } else {
+    __ LoadConst32(AT, std::numeric_limits<int32_t>::max());
+  }
+  __ Bnec(AT, out, &finite);
+
+  if (type == Primitive::kPrimDouble) {
+    __ Dmtc1(ZERO, FTMP);
+    __ CmpLtD(FTMP, in, FTMP);
+    __ Dmfc1(AT, FTMP);
+  } else {
+    __ Mtc1(ZERO, FTMP);
+    __ CmpLtS(FTMP, in, FTMP);
+    __ Mfc1(AT, FTMP);
+  }
+
+  __ Bc(&add);
+
+  __ Bind(&finite);
+
+  // TMP = (0.5 <= (in - out)) ? -1 : 0;
+  if (type == Primitive::kPrimDouble) {
+    __ Cvtdl(FTMP, FTMP);  // Convert output of floor.l.d back to "double".
+    __ LoadConst64(AT, bit_cast<int64_t, double>(0.5));
+    __ SubD(FTMP, in, FTMP);
+    __ Dmtc1(AT, half);
+    __ CmpLeD(FTMP, half, FTMP);
+    __ Dmfc1(AT, FTMP);
+  } else {
+    __ Cvtsw(FTMP, FTMP);  // Convert output of floor.w.s back to "float".
+    __ LoadConst32(AT, bit_cast<int32_t, float>(0.5f));
+    __ SubS(FTMP, in, FTMP);
+    __ Mtc1(AT, half);
+    __ CmpLeS(FTMP, half, FTMP);
+    __ Mfc1(AT, FTMP);
+  }
+
+  __ Bind(&add);
+
+  // Return out -= TMP.
+  if (type == Primitive::kPrimDouble) {
+    __ Dsubu(out, out, AT);
+  } else {
+    __ Subu(out, out, AT);
+  }
+
+  __ Bind(&done);
+}
+
+// int java.lang.Math.round(float)
+void IntrinsicLocationsBuilderMIPS64::VisitMathRoundFloat(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresFpuRegister());
+  locations->AddTemp(Location::RequiresFpuRegister());
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMathRoundFloat(HInvoke* invoke) {
+  GenRound(invoke->GetLocations(), GetAssembler(), Primitive::kPrimFloat);
+}
+
+// long java.lang.Math.round(double)
+void IntrinsicLocationsBuilderMIPS64::VisitMathRoundDouble(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresFpuRegister());
+  locations->AddTemp(Location::RequiresFpuRegister());
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMathRoundDouble(HInvoke* invoke) {
+  GenRound(invoke->GetLocations(), GetAssembler(), Primitive::kPrimDouble);
+}
+
 // byte libcore.io.Memory.peekByte(long address)
 void IntrinsicLocationsBuilderMIPS64::VisitMemoryPeekByte(HInvoke* invoke) {
   CreateIntToIntLocations(arena_, invoke);
@@ -1734,9 +1879,6 @@
   GenIsInfinite(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
 }
 
-UNIMPLEMENTED_INTRINSIC(MIPS64, MathRoundDouble)
-UNIMPLEMENTED_INTRINSIC(MIPS64, MathRoundFloat)
-
 UNIMPLEMENTED_INTRINSIC(MIPS64, ReferenceGetReferent)
 UNIMPLEMENTED_INTRINSIC(MIPS64, StringGetCharsNoCheck)
 UNIMPLEMENTED_INTRINSIC(MIPS64, SystemArrayCopyChar)
diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h
index a571d14..9cf72a2 100644
--- a/compiler/utils/arm/assembler_arm.h
+++ b/compiler/utils/arm/assembler_arm.h
@@ -754,32 +754,7 @@
     }
   }
 
-  void LoadDImmediate(DRegister sd, double value, Condition cond = AL) {
-    if (!vmovd(sd, value, cond)) {
-      uint64_t int_value = bit_cast<uint64_t, double>(value);
-      if (int_value == bit_cast<uint64_t, double>(0.0)) {
-        // 0.0 is quite common, so we special case it by loading
-        // 2.0 in `sd` and then substracting it.
-        bool success = vmovd(sd, 2.0, cond);
-        CHECK(success);
-        vsubd(sd, sd, sd, cond);
-      } else {
-        if (sd < 16) {
-          SRegister low = static_cast<SRegister>(sd << 1);
-          SRegister high = static_cast<SRegister>(low + 1);
-          LoadSImmediate(low, bit_cast<float, uint32_t>(Low32Bits(int_value)), cond);
-          if (High32Bits(int_value) == Low32Bits(int_value)) {
-            vmovs(high, low);
-          } else {
-            LoadSImmediate(high, bit_cast<float, uint32_t>(High32Bits(int_value)), cond);
-          }
-        } else {
-          LOG(FATAL) << "Unimplemented loading of double into a D register "
-                     << "that cannot be split into two S registers";
-        }
-      }
-    }
-  }
+  virtual void LoadDImmediate(DRegister dd, double value, Condition cond = AL) = 0;
 
   virtual void MarkExceptionHandler(Label* label) = 0;
   virtual void LoadFromOffset(LoadOperandType type,
diff --git a/compiler/utils/arm/assembler_arm32.cc b/compiler/utils/arm/assembler_arm32.cc
index 6f7119d..c95dfa8 100644
--- a/compiler/utils/arm/assembler_arm32.cc
+++ b/compiler/utils/arm/assembler_arm32.cc
@@ -1486,6 +1486,34 @@
   }
 }
 
+void Arm32Assembler::LoadDImmediate(DRegister dd, double value, Condition cond) {
+  if (!vmovd(dd, value, cond)) {
+    uint64_t int_value = bit_cast<uint64_t, double>(value);
+    if (int_value == bit_cast<uint64_t, double>(0.0)) {
+      // 0.0 is quite common, so we special case it by loading
+      // 2.0 in `dd` and then subtracting it.
+      bool success = vmovd(dd, 2.0, cond);
+      CHECK(success);
+      vsubd(dd, dd, dd, cond);
+    } else {
+      if (dd < 16) {
+        // Note: Depending on the particular CPU, this may cause register
+        // forwarding hazard, negatively impacting the performance.
+        SRegister low = static_cast<SRegister>(dd << 1);
+        SRegister high = static_cast<SRegister>(low + 1);
+        LoadSImmediate(low, bit_cast<float, uint32_t>(Low32Bits(int_value)), cond);
+        if (High32Bits(int_value) == Low32Bits(int_value)) {
+          vmovs(high, low);
+        } else {
+          LoadSImmediate(high, bit_cast<float, uint32_t>(High32Bits(int_value)), cond);
+        }
+      } else {
+        LOG(FATAL) << "Unimplemented loading of double into a D register "
+                   << "that cannot be split into two S registers";
+      }
+    }
+  }
+}
 
 // Implementation note: this method must emit at most one instruction when
 // Address::CanHoldLoadOffsetArm.
diff --git a/compiler/utils/arm/assembler_arm32.h b/compiler/utils/arm/assembler_arm32.h
index 8726ac8..554dd23 100644
--- a/compiler/utils/arm/assembler_arm32.h
+++ b/compiler/utils/arm/assembler_arm32.h
@@ -270,6 +270,7 @@
 
   // Load and Store. May clobber IP.
   void LoadImmediate(Register rd, int32_t value, Condition cond = AL) OVERRIDE;
+  void LoadDImmediate(DRegister dd, double value, Condition cond = AL) OVERRIDE;
   void MarkExceptionHandler(Label* label) OVERRIDE;
   void LoadFromOffset(LoadOperandType type,
                       Register reg,
diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc
index a72ea41..8747dad 100644
--- a/compiler/utils/arm/assembler_thumb2.cc
+++ b/compiler/utils/arm/assembler_thumb2.cc
@@ -1917,7 +1917,7 @@
 
     case kLongOrFPLiteral1KiB:
       return 4u;
-    case kLongOrFPLiteral256KiB:
+    case kLongOrFPLiteral64KiB:
       return 10u;
     case kLongOrFPLiteralFar:
       return 14u;
@@ -1989,7 +1989,7 @@
       break;
     case kLiteral1MiB:
     case kLiteral64KiB:
-    case kLongOrFPLiteral256KiB:
+    case kLongOrFPLiteral64KiB:
     case kLiteralAddr64KiB:
       DCHECK_GE(diff, 4);  // The target must be at least 4 bytes after the ADD rX, PC.
       diff -= 4;        // One extra 32-bit MOV.
@@ -2105,10 +2105,10 @@
       if (IsUint<10>(GetOffset(current_code_size))) {
         break;
       }
-      current_code_size += IncreaseSize(kLongOrFPLiteral256KiB);
+      current_code_size += IncreaseSize(kLongOrFPLiteral64KiB);
       FALLTHROUGH_INTENDED;
-    case kLongOrFPLiteral256KiB:
-      if (IsUint<18>(GetOffset(current_code_size))) {
+    case kLongOrFPLiteral64KiB:
+      if (IsUint<16>(GetOffset(current_code_size))) {
         break;
       }
       current_code_size += IncreaseSize(kLongOrFPLiteralFar);
@@ -2269,11 +2269,10 @@
       buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(encoding & 0xffff));
       break;
     }
-    case kLongOrFPLiteral256KiB: {
-      int32_t offset = GetOffset(code_size);
-      int32_t mov_encoding = MovModImmEncoding32(IP, offset & ~0x3ff);
+    case kLongOrFPLiteral64KiB: {
+      int32_t mov_encoding = MovwEncoding32(IP, GetOffset(code_size));
       int16_t add_pc_encoding = AddRdnRmEncoding16(IP, PC);
-      int32_t ldr_encoding = LoadWideOrFpEncoding(IP, offset & 0x3ff);    // DCHECKs type_.
+      int32_t ldr_encoding = LoadWideOrFpEncoding(IP, 0u);    // DCHECKs type_.
       buffer->Store<int16_t>(location_, mov_encoding >> 16);
       buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(mov_encoding & 0xffff));
       buffer->Store<int16_t>(location_ + 4u, add_pc_encoding);
@@ -3598,6 +3597,24 @@
   }
 }
 
+void Thumb2Assembler::LoadDImmediate(DRegister dd, double value, Condition cond) {
+  if (!vmovd(dd, value, cond)) {
+    uint64_t int_value = bit_cast<uint64_t, double>(value);
+    if (int_value == bit_cast<uint64_t, double>(0.0)) {
+      // 0.0 is quite common, so we special case it by loading
+      // 2.0 in `dd` and then subtracting it.
+      bool success = vmovd(dd, 2.0, cond);
+      CHECK(success);
+      vsubd(dd, dd, dd, cond);
+    } else {
+      Literal* literal = literal64_dedupe_map_.GetOrCreate(
+          int_value,
+          [this, int_value]() { return NewLiteral<uint64_t>(int_value); });
+      LoadLiteral(dd, literal);
+    }
+  }
+}
+
 int32_t Thumb2Assembler::GetAllowedLoadOffsetBits(LoadOperandType type) {
   switch (type) {
     case kLoadSignedByte:
diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h
index 2ca74fc..4ee23c0 100644
--- a/compiler/utils/arm/assembler_thumb2.h
+++ b/compiler/utils/arm/assembler_thumb2.h
@@ -43,6 +43,7 @@
         fixups_(arena->Adapter(kArenaAllocAssembler)),
         fixup_dependents_(arena->Adapter(kArenaAllocAssembler)),
         literals_(arena->Adapter(kArenaAllocAssembler)),
+        literal64_dedupe_map_(std::less<uint64_t>(), arena->Adapter(kArenaAllocAssembler)),
         jump_tables_(arena->Adapter(kArenaAllocAssembler)),
         last_position_adjustment_(0u),
         last_old_position_(0u),
@@ -319,6 +320,7 @@
 
   // Load and Store. May clobber IP.
   void LoadImmediate(Register rd, int32_t value, Condition cond = AL) OVERRIDE;
+  void LoadDImmediate(DRegister dd, double value, Condition cond = AL) OVERRIDE;
   void MarkExceptionHandler(Label* label) OVERRIDE;
   void LoadFromOffset(LoadOperandType type,
                       Register reg,
@@ -464,8 +466,8 @@
       // Load long or FP literal variants.
       // VLDR s/dX, label; 32-bit insn, up to 1KiB offset; 4 bytes.
       kLongOrFPLiteral1KiB,
-      // MOV ip, modimm + ADD ip, pc + VLDR s/dX, [IP, #imm8*4]; up to 256KiB offset; 10 bytes.
-      kLongOrFPLiteral256KiB,
+      // MOV ip, imm16 + ADD ip, pc + VLDR s/dX, [IP, #0]; up to 64KiB offset; 10 bytes.
+      kLongOrFPLiteral64KiB,
       // MOV ip, imm16 + MOVT ip, imm16 + ADD ip, pc + VLDR s/dX, [IP]; any offset; 14 bytes.
       kLongOrFPLiteralFar,
     };
@@ -500,7 +502,7 @@
     // Load wide literal.
     static Fixup LoadWideLiteral(uint32_t location, Register rt, Register rt2,
                                  Size size = kLongOrFPLiteral1KiB) {
-      DCHECK(size == kLongOrFPLiteral1KiB || size == kLongOrFPLiteral256KiB ||
+      DCHECK(size == kLongOrFPLiteral1KiB || size == kLongOrFPLiteral64KiB ||
              size == kLongOrFPLiteralFar);
       DCHECK(!IsHighRegister(rt) || (size != kLiteral1KiB && size != kLiteral64KiB));
       return Fixup(rt, rt2, kNoSRegister, kNoDRegister,
@@ -510,7 +512,7 @@
     // Load FP single literal.
     static Fixup LoadSingleLiteral(uint32_t location, SRegister sd,
                                    Size size = kLongOrFPLiteral1KiB) {
-      DCHECK(size == kLongOrFPLiteral1KiB || size == kLongOrFPLiteral256KiB ||
+      DCHECK(size == kLongOrFPLiteral1KiB || size == kLongOrFPLiteral64KiB ||
              size == kLongOrFPLiteralFar);
       return Fixup(kNoRegister, kNoRegister, sd, kNoDRegister,
                    AL, kLoadFPLiteralSingle, size, location);
@@ -519,7 +521,7 @@
     // Load FP double literal.
     static Fixup LoadDoubleLiteral(uint32_t location, DRegister dd,
                                    Size size = kLongOrFPLiteral1KiB) {
-      DCHECK(size == kLongOrFPLiteral1KiB || size == kLongOrFPLiteral256KiB ||
+      DCHECK(size == kLongOrFPLiteral1KiB || size == kLongOrFPLiteral64KiB ||
              size == kLongOrFPLiteralFar);
       return Fixup(kNoRegister, kNoRegister, kNoSRegister, dd,
                    AL, kLoadFPLiteralDouble, size, location);
@@ -870,6 +872,9 @@
   // without invalidating pointers and references to existing elements.
   ArenaDeque<Literal> literals_;
 
+  // Deduplication map for 64-bit literals, used for LoadDImmediate().
+  ArenaSafeMap<uint64_t, Literal*> literal64_dedupe_map_;
+
   // Jump table list.
   ArenaDeque<JumpTable> jump_tables_;
 
diff --git a/compiler/utils/arm/assembler_thumb2_test.cc b/compiler/utils/arm/assembler_thumb2_test.cc
index 7f1dc49..f3fa72c 100644
--- a/compiler/utils/arm/assembler_thumb2_test.cc
+++ b/compiler/utils/arm/assembler_thumb2_test.cc
@@ -869,10 +869,11 @@
   }
 
   std::string expected =
-      "mov.w ip, #((2f - 1f - 4) & ~0x3ff)\n"
+      // "as" does not consider ((2f - 1f - 4) & 0xffff) a constant expression for movw.
+      "movw ip, #(0x408 - 0x4 - 4)\n"
       "1:\n"
       "add ip, pc\n"
-      "ldrd r1, r3, [ip, #((2f - 1b - 4) & 0x3ff)]\n" +
+      "ldrd r1, r3, [ip, #0]\n" +
       RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
       ".align 2, 0\n"
       "2:\n"
@@ -884,48 +885,78 @@
             __ GetAdjustedPosition(label.Position()));
 }
 
-TEST_F(AssemblerThumb2Test, LoadLiteralSingleMax256KiB) {
+TEST_F(AssemblerThumb2Test, LoadLiteralSingleMax64KiB) {
   // The literal size must match but the type doesn't, so use an int32_t rather than float.
   arm::Literal* literal = __ NewLiteral<int32_t>(0x12345678);
   __ LoadLiteral(arm::S3, literal);
   Label label;
   __ Bind(&label);
-  constexpr size_t kLdrR0R0Count = (1 << 17) - 3u;
-  for (size_t i = 0; i != kLdrR0R0Count; ++i) {
-    __ ldr(arm::R0, arm::Address(arm::R0));
-  }
-
-  std::string expected =
-      "mov.w ip, #((2f - 1f - 4) & ~0x3ff)\n"
-      "1:\n"
-      "add ip, pc\n"
-      "vldr s3, [ip, #((2f - 1b - 4) & 0x3ff)]\n" +
-      RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
-      ".align 2, 0\n"
-      "2:\n"
-      ".word 0x12345678\n";
-  DriverStr(expected, "LoadLiteralSingleMax256KiB");
-
-  EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 6u,
-            __ GetAdjustedPosition(label.Position()));
-}
-
-TEST_F(AssemblerThumb2Test, LoadLiteralDoubleBeyondMax256KiB) {
-  // The literal size must match but the type doesn't, so use an int64_t rather than double.
-  arm::Literal* literal = __ NewLiteral<int64_t>(INT64_C(0x1234567887654321));
-  __ LoadLiteral(arm::D3, literal);
-  Label label;
-  __ Bind(&label);
-  constexpr size_t kLdrR0R0Count = (1 << 17) - 2u;
+  constexpr size_t kLdrR0R0Count = (1 << 15) - 3u;
   for (size_t i = 0; i != kLdrR0R0Count; ++i) {
     __ ldr(arm::R0, arm::Address(arm::R0));
   }
 
   std::string expected =
       // "as" does not consider ((2f - 1f - 4) & 0xffff) a constant expression for movw.
-      "movw ip, #(0x40000 & 0xffff)\n"
+      "movw ip, #(0x10004 - 0x4 - 4)\n"
+      "1:\n"
+      "add ip, pc\n"
+      "vldr s3, [ip, #0]\n" +
+      RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+      ".align 2, 0\n"
+      "2:\n"
+      ".word 0x12345678\n";
+  DriverStr(expected, "LoadLiteralSingleMax64KiB");
+
+  EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 6u,
+            __ GetAdjustedPosition(label.Position()));
+}
+
+TEST_F(AssemblerThumb2Test, LoadLiteralSingleMax64KiB_UnalignedPC) {
+  // The literal size must match but the type doesn't, so use an int32_t rather than float.
+  arm::Literal* literal = __ NewLiteral<int32_t>(0x12345678);
+  __ ldr(arm::R0, arm::Address(arm::R0));
+  __ LoadLiteral(arm::S3, literal);
+  Label label;
+  __ Bind(&label);
+  constexpr size_t kLdrR0R0Count = (1 << 15) - 4u;
+  for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+    __ ldr(arm::R0, arm::Address(arm::R0));
+  }
+
+  std::string expected =
+      "ldr r0, [r0]\n"
+      // "as" does not consider ((2f - 1f - 4) & 0xffff) a constant expression for movw.
+      "movw ip, #(0x10004 - 0x6 - 4)\n"
+      "1:\n"
+      "add ip, pc\n"
+      "vldr s3, [ip, #0]\n" +
+      RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+      ".align 2, 0\n"
+      "2:\n"
+      ".word 0x12345678\n";
+  DriverStr(expected, "LoadLiteralSingleMax64KiB_UnalignedPC");
+
+  EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 6u,
+            __ GetAdjustedPosition(label.Position()));
+}
+
+TEST_F(AssemblerThumb2Test, LoadLiteralDoubleBeyondMax64KiB) {
+  // The literal size must match but the type doesn't, so use an int64_t rather than double.
+  arm::Literal* literal = __ NewLiteral<int64_t>(INT64_C(0x1234567887654321));
+  __ LoadLiteral(arm::D3, literal);
+  Label label;
+  __ Bind(&label);
+  constexpr size_t kLdrR0R0Count = (1 << 15) - 2u;
+  for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+    __ ldr(arm::R0, arm::Address(arm::R0));
+  }
+
+  std::string expected =
+      // "as" does not consider ((2f - 1f - 4) & 0xffff) a constant expression for movw.
+      "movw ip, #((0x1000c - 0x8 - 4) & 0xffff)\n"
       // "as" does not consider ((2f - 1f - 4) >> 16) a constant expression for movt.
-      "movt ip, #(0x40000 >> 16)\n"
+      "movt ip, #((0x1000c - 0x8 - 4) >> 16)\n"
       "1:\n"
       "add ip, pc\n"
       "vldr d3, [ip, #0]\n" +
@@ -934,7 +965,7 @@
       "2:\n"
       ".word 0x87654321\n"
       ".word 0x12345678\n";
-  DriverStr(expected, "LoadLiteralDoubleBeyondMax256KiB");
+  DriverStr(expected, "LoadLiteralDoubleBeyondMax64KiB");
 
   EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 10u,
             __ GetAdjustedPosition(label.Position()));
@@ -946,16 +977,16 @@
   __ LoadLiteral(arm::D3, literal);
   Label label;
   __ Bind(&label);
-  constexpr size_t kLdrR0R0Count = (1 << 17) - 2u + 0x1234;
+  constexpr size_t kLdrR0R0Count = (1 << 15) - 2u + 0x1234;
   for (size_t i = 0; i != kLdrR0R0Count; ++i) {
     __ ldr(arm::R0, arm::Address(arm::R0));
   }
 
   std::string expected =
       // "as" does not consider ((2f - 1f - 4) & 0xffff) a constant expression for movw.
-      "movw ip, #((0x40000 + 2 * 0x1234) & 0xffff)\n"
+      "movw ip, #((0x1000c + 2 * 0x1234 - 0x8 - 4) & 0xffff)\n"
       // "as" does not consider ((2f - 1f - 4) >> 16) a constant expression for movt.
-      "movt ip, #((0x40000 + 2 * 0x1234) >> 16)\n"
+      "movt ip, #((0x1000c + 2 * 0x1234 - 0x8 - 4) >> 16)\n"
       "1:\n"
       "add ip, pc\n"
       "vldr d3, [ip, #0]\n" +
diff --git a/dexdump/dexdump.cc b/dexdump/dexdump.cc
index 48b773e..565a8f0 100644
--- a/dexdump/dexdump.cc
+++ b/dexdump/dexdump.cc
@@ -777,15 +777,13 @@
 
 /*
  * Helper for dumpInstruction(), which builds the string
- * representation for the index in the given instruction. This will
- * first try to use the given buffer, but if the result won't fit,
- * then this will allocate a new buffer to hold the result. A pointer
- * to the buffer which holds the full result is always returned, and
- * this can be compared with the one passed in, to see if the result
- * needs to be free()d.
+ * representation for the index in the given instruction.
+ * Returns a pointer to a buffer of sufficient size.
  */
-static char* indexString(const DexFile* pDexFile,
-                         const Instruction* pDecInsn, char* buf, size_t bufSize) {
+static std::unique_ptr<char[]> indexString(const DexFile* pDexFile,
+                                           const Instruction* pDecInsn,
+                                           size_t bufSize) {
+  std::unique_ptr<char[]> buf(new char[bufSize]);
   // Determine index and width of the string.
   u4 index = 0;
   u4 width = 4;
@@ -821,27 +819,27 @@
     case Instruction::kIndexUnknown:
       // This function should never get called for this type, but do
       // something sensible here, just to help with debugging.
-      outSize = snprintf(buf, bufSize, "<unknown-index>");
+      outSize = snprintf(buf.get(), bufSize, "<unknown-index>");
       break;
     case Instruction::kIndexNone:
       // This function should never get called for this type, but do
       // something sensible here, just to help with debugging.
-      outSize = snprintf(buf, bufSize, "<no-index>");
+      outSize = snprintf(buf.get(), bufSize, "<no-index>");
       break;
     case Instruction::kIndexTypeRef:
       if (index < pDexFile->GetHeader().type_ids_size_) {
         const char* tp = pDexFile->StringByTypeIdx(index);
-        outSize = snprintf(buf, bufSize, "%s // type@%0*x", tp, width, index);
+        outSize = snprintf(buf.get(), bufSize, "%s // type@%0*x", tp, width, index);
       } else {
-        outSize = snprintf(buf, bufSize, "<type?> // type@%0*x", width, index);
+        outSize = snprintf(buf.get(), bufSize, "<type?> // type@%0*x", width, index);
       }
       break;
     case Instruction::kIndexStringRef:
       if (index < pDexFile->GetHeader().string_ids_size_) {
         const char* st = pDexFile->StringDataByIdx(index);
-        outSize = snprintf(buf, bufSize, "\"%s\" // string@%0*x", st, width, index);
+        outSize = snprintf(buf.get(), bufSize, "\"%s\" // string@%0*x", st, width, index);
       } else {
-        outSize = snprintf(buf, bufSize, "<string?> // string@%0*x", width, index);
+        outSize = snprintf(buf.get(), bufSize, "<string?> // string@%0*x", width, index);
       }
       break;
     case Instruction::kIndexMethodRef:
@@ -850,10 +848,10 @@
         const char* name = pDexFile->StringDataByIdx(pMethodId.name_idx_);
         const Signature signature = pDexFile->GetMethodSignature(pMethodId);
         const char* backDescriptor = pDexFile->StringByTypeIdx(pMethodId.class_idx_);
-        outSize = snprintf(buf, bufSize, "%s.%s:%s // method@%0*x",
+        outSize = snprintf(buf.get(), bufSize, "%s.%s:%s // method@%0*x",
                            backDescriptor, name, signature.ToString().c_str(), width, index);
       } else {
-        outSize = snprintf(buf, bufSize, "<method?> // method@%0*x", width, index);
+        outSize = snprintf(buf.get(), bufSize, "<method?> // method@%0*x", width, index);
       }
       break;
     case Instruction::kIndexFieldRef:
@@ -862,38 +860,33 @@
         const char* name = pDexFile->StringDataByIdx(pFieldId.name_idx_);
         const char* typeDescriptor = pDexFile->StringByTypeIdx(pFieldId.type_idx_);
         const char* backDescriptor = pDexFile->StringByTypeIdx(pFieldId.class_idx_);
-        outSize = snprintf(buf, bufSize, "%s.%s:%s // field@%0*x",
+        outSize = snprintf(buf.get(), bufSize, "%s.%s:%s // field@%0*x",
                            backDescriptor, name, typeDescriptor, width, index);
       } else {
-        outSize = snprintf(buf, bufSize, "<field?> // field@%0*x", width, index);
+        outSize = snprintf(buf.get(), bufSize, "<field?> // field@%0*x", width, index);
       }
       break;
     case Instruction::kIndexVtableOffset:
-      outSize = snprintf(buf, bufSize, "[%0*x] // vtable #%0*x",
+      outSize = snprintf(buf.get(), bufSize, "[%0*x] // vtable #%0*x",
                          width, index, width, index);
       break;
     case Instruction::kIndexFieldOffset:
-      outSize = snprintf(buf, bufSize, "[obj+%0*x]", width, index);
+      outSize = snprintf(buf.get(), bufSize, "[obj+%0*x]", width, index);
       break;
     // SOME NOT SUPPORTED:
     // case Instruction::kIndexVaries:
     // case Instruction::kIndexInlineMethod:
     default:
-      outSize = snprintf(buf, bufSize, "<?>");
+      outSize = snprintf(buf.get(), bufSize, "<?>");
       break;
   }  // switch
 
   // Determine success of string construction.
   if (outSize >= bufSize) {
-    // The buffer wasn't big enough; allocate and retry. Note:
-    // snprintf() doesn't count the '\0' as part of its returned
-    // size, so we add explicit space for it here.
-    outSize++;
-    buf = reinterpret_cast<char*>(malloc(outSize));
-    if (buf == nullptr) {
-      return nullptr;
-    }
-    return indexString(pDexFile, pDecInsn, buf, outSize);
+    // The buffer wasn't big enough; retry with computed size. Note: snprintf()
+    // doesn't count/ the '\0' as part of its returned size, so we add explicit
+    // space for it here.
+    return indexString(pDexFile, pDecInsn, outSize + 1);
   }
   return buf;
 }
@@ -941,11 +934,9 @@
   }
 
   // Set up additional argument.
-  char indexBufChars[200];
-  char *indexBuf = indexBufChars;
+  std::unique_ptr<char[]> indexBuf;
   if (Instruction::IndexTypeOf(pDecInsn->Opcode()) != Instruction::kIndexNone) {
-    indexBuf = indexString(pDexFile, pDecInsn,
-                           indexBufChars, sizeof(indexBufChars));
+    indexBuf = indexString(pDexFile, pDecInsn, 200);
   }
 
   // Dump the instruction.
@@ -1003,7 +994,7 @@
       break;
     case Instruction::k21c:        // op vAA, thing@BBBB
     case Instruction::k31c:        // op vAA, thing@BBBBBBBB
-      fprintf(gOutFile, " v%d, %s", pDecInsn->VRegA(), indexBuf);
+      fprintf(gOutFile, " v%d, %s", pDecInsn->VRegA(), indexBuf.get());
       break;
     case Instruction::k23x:        // op vAA, vBB, vCC
       fprintf(gOutFile, " v%d, v%d, v%d",
@@ -1032,7 +1023,7 @@
     // NOT SUPPORTED:
     // case Instruction::k22cs:    // [opt] op vA, vB, field offset CCCC
       fprintf(gOutFile, " v%d, v%d, %s",
-              pDecInsn->VRegA(), pDecInsn->VRegB(), indexBuf);
+              pDecInsn->VRegA(), pDecInsn->VRegB(), indexBuf.get());
       break;
     case Instruction::k30t:
       fprintf(gOutFile, " #%08x", pDecInsn->VRegA());
@@ -1069,7 +1060,7 @@
           fprintf(gOutFile, ", v%d", arg[i]);
         }
       }  // for
-      fprintf(gOutFile, "}, %s", indexBuf);
+      fprintf(gOutFile, "}, %s", indexBuf.get());
       break;
     }
     case Instruction::k25x: {      // op vC, {vD, vE, vF, vG} (B: count)
@@ -1101,7 +1092,7 @@
             fprintf(gOutFile, ", v%d", pDecInsn->VRegC() + i);
           }
         }  // for
-        fprintf(gOutFile, "}, %s", indexBuf);
+        fprintf(gOutFile, "}, %s", indexBuf.get());
       }
       break;
     case Instruction::k51l: {      // op vAA, #+BBBBBBBBBBBBBBBB
@@ -1124,10 +1115,6 @@
   }  // switch
 
   fputc('\n', gOutFile);
-
-  if (indexBuf != indexBufChars) {
-    free(indexBuf);
-  }
 }
 
 /*
@@ -1274,7 +1261,7 @@
         // Primitive char, copy it.
         if (strchr("ZBCSIFJD", *base) == NULL) {
           fprintf(stderr, "ERROR: bad method signature '%s'\n", base);
-          goto bail;
+          break;  // while
         }
         *cp++ = *base++;
       }
diff --git a/disassembler/Android.mk b/disassembler/Android.mk
index bf563c7..d76bbb8 100644
--- a/disassembler/Android.mk
+++ b/disassembler/Android.mk
@@ -68,8 +68,10 @@
     LOCAL_ASFLAGS += $(ART_HOST_ASFLAGS)
     ifeq ($$(art_ndebug_or_debug),debug)
       LOCAL_CFLAGS += $(ART_HOST_DEBUG_CFLAGS)
+      LOCAL_ASFLAGS += $(ART_HOST_DEBUG_ASFLAGS)
     else
       LOCAL_CFLAGS += $(ART_HOST_NON_DEBUG_CFLAGS)
+      LOCAL_ASFLAGS += $(ART_HOST_NON_DEBUG_ASFLAGS)
     endif
   endif
 
diff --git a/patchoat/patchoat.cc b/patchoat/patchoat.cc
index 0a7ffda..5bb61bb 100644
--- a/patchoat/patchoat.cc
+++ b/patchoat/patchoat.cc
@@ -494,6 +494,17 @@
   image_header->VisitPackedArtMethods(&visitor, heap_->Begin(), pointer_size);
 }
 
+void PatchOat::PatchImTables(const ImageHeader* image_header) {
+  const size_t pointer_size = InstructionSetPointerSize(isa_);
+  // We can safely walk target image since the conflict tables are independent.
+  image_header->VisitPackedImTables(
+      [this](ArtMethod* method) {
+        return RelocatedAddressOfPointer(method);
+      },
+      image_->Begin(),
+      pointer_size);
+}
+
 void PatchOat::PatchImtConflictTables(const ImageHeader* image_header) {
   const size_t pointer_size = InstructionSetPointerSize(isa_);
   // We can safely walk target image since the conflict tables are independent.
@@ -636,6 +647,7 @@
 
   PatchArtFields(image_header);
   PatchArtMethods(image_header);
+  PatchImTables(image_header);
   PatchImtConflictTables(image_header);
   PatchInternedStrings(image_header);
   PatchClassTable(image_header);
diff --git a/patchoat/patchoat.h b/patchoat/patchoat.h
index 3ef837f..61ec695 100644
--- a/patchoat/patchoat.h
+++ b/patchoat/patchoat.h
@@ -117,6 +117,7 @@
   bool PatchImage(bool primary_image) SHARED_REQUIRES(Locks::mutator_lock_);
   void PatchArtFields(const ImageHeader* image_header) SHARED_REQUIRES(Locks::mutator_lock_);
   void PatchArtMethods(const ImageHeader* image_header) SHARED_REQUIRES(Locks::mutator_lock_);
+  void PatchImTables(const ImageHeader* image_header) SHARED_REQUIRES(Locks::mutator_lock_);
   void PatchImtConflictTables(const ImageHeader* image_header)
       SHARED_REQUIRES(Locks::mutator_lock_);
   void PatchInternedStrings(const ImageHeader* image_header)
diff --git a/runtime/Android.mk b/runtime/Android.mk
index 1c442fc..99c4a82 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -515,8 +515,10 @@
 
     ifeq ($$(art_ndebug_or_debug),debug)
       LOCAL_CFLAGS += $$(ART_HOST_DEBUG_CFLAGS)
+      LOCAL_ASFLAGS += $$(ART_HOST_DEBUG_ASFLAGS)
     else
       LOCAL_CFLAGS += $$(ART_HOST_NON_DEBUG_CFLAGS)
+      LOCAL_ASFLAGS += $$(ART_HOST_NON_DEBUG_ASFLAGS)
     endif
     LOCAL_MULTILIB := both
   endif
diff --git a/runtime/art_method.h b/runtime/art_method.h
index 2b025f8..90b2406 100644
--- a/runtime/art_method.h
+++ b/runtime/art_method.h
@@ -99,6 +99,22 @@
     return GetMethod(index * kMethodCount + kMethodImplementation, pointer_size);
   }
 
+  // Return true if two conflict tables are the same.
+  bool Equals(ImtConflictTable* other, size_t pointer_size) const {
+    size_t num = NumEntries(pointer_size);
+    if (num != other->NumEntries(pointer_size)) {
+      return false;
+    }
+    for (size_t i = 0; i < num; ++i) {
+      if (GetInterfaceMethod(i, pointer_size) != other->GetInterfaceMethod(i, pointer_size) ||
+          GetImplementationMethod(i, pointer_size) !=
+              other->GetImplementationMethod(i, pointer_size)) {
+        return false;
+      }
+    }
+    return true;
+  }
+
   // Visit all of the entries.
   // NO_THREAD_SAFETY_ANALYSIS for calling with held locks. Visitor is passed a pair of ArtMethod*
   // and also returns one. The order is <interface, implementation>.
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index 2d702f6..da68923 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -140,7 +140,7 @@
 ADD_TEST_EQ(THREAD_LOCAL_OBJECTS_OFFSET,
             art::Thread::ThreadLocalObjectsOffset<__SIZEOF_POINTER__>().Int32Value())
 // Offset of field Thread::tlsPtr_.mterp_current_ibase.
-#define THREAD_CURRENT_IBASE_OFFSET (THREAD_LOCAL_OBJECTS_OFFSET + __SIZEOF_POINTER__)
+#define THREAD_CURRENT_IBASE_OFFSET (THREAD_LOCAL_OBJECTS_OFFSET + __SIZEOF_SIZE_T__)
 ADD_TEST_EQ(THREAD_CURRENT_IBASE_OFFSET,
             art::Thread::MterpCurrentIBaseOffset<__SIZEOF_POINTER__>().Int32Value())
 // Offset of field Thread::tlsPtr_.mterp_default_ibase.
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 3ec8f21..cb97faa 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -857,11 +857,13 @@
     if (vtable != nullptr) {
       SanityCheckArtMethodPointerArray(vtable, nullptr, pointer_size, image_spaces);
     }
-    if (klass->ShouldHaveEmbeddedImtAndVTable()) {
-      for (size_t i = 0; i < mirror::Class::kImtSize; ++i) {
-        SanityCheckArtMethod(
-            klass->GetEmbeddedImTableEntry(i, pointer_size), nullptr, image_spaces);
+    if (klass->ShouldHaveImt()) {
+      ImTable* imt = klass->GetImt(pointer_size);
+      for (size_t i = 0; i < ImTable::kSize; ++i) {
+        SanityCheckArtMethod(imt->Get(i, pointer_size), nullptr, image_spaces);
       }
+    }
+    if (klass->ShouldHaveEmbeddedVTable()) {
       for (int32_t i = 0; i < klass->GetEmbeddedVTableLength(); ++i) {
         SanityCheckArtMethod(klass->GetEmbeddedVTableEntry(i, pointer_size), nullptr, image_spaces);
       }
@@ -3460,16 +3462,13 @@
     new_class->SetClassFlags(mirror::kClassFlagObjectArray);
   }
   mirror::Class::SetStatus(new_class, mirror::Class::kStatusLoaded, self);
-  {
-    ArtMethod* imt[mirror::Class::kImtSize];
-    std::fill_n(imt, arraysize(imt), Runtime::Current()->GetImtUnimplementedMethod());
-    new_class->PopulateEmbeddedImtAndVTable(imt, image_pointer_size_);
-  }
+  new_class->PopulateEmbeddedVTable(image_pointer_size_);
+  ImTable* object_imt = java_lang_Object->GetImt(image_pointer_size_);
+  new_class->SetImt(object_imt, image_pointer_size_);
   mirror::Class::SetStatus(new_class, mirror::Class::kStatusInitialized, self);
   // don't need to set new_class->SetObjectSize(..)
   // because Object::SizeOf delegates to Array::SizeOf
 
-
   // All arrays have java/lang/Cloneable and java/io/Serializable as
   // interfaces.  We need to set that up here, so that stuff like
   // "instanceof" works right.
@@ -5030,6 +5029,17 @@
   return class_loader == nullptr ? &boot_class_table_ : class_loader->GetClassTable();
 }
 
+static ImTable* FindSuperImt(mirror::Class* klass, size_t pointer_size)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  while (klass->HasSuperClass()) {
+    klass = klass->GetSuperClass();
+    if (klass->ShouldHaveImt()) {
+      return klass->GetImt(pointer_size);
+    }
+  }
+  return nullptr;
+}
+
 bool ClassLinker::LinkClass(Thread* self,
                             const char* descriptor,
                             Handle<mirror::Class> klass,
@@ -5040,9 +5050,11 @@
   if (!LinkSuperClass(klass)) {
     return false;
   }
-  ArtMethod* imt[mirror::Class::kImtSize];
-  std::fill_n(imt, arraysize(imt), Runtime::Current()->GetImtUnimplementedMethod());
-  if (!LinkMethods(self, klass, interfaces, imt)) {
+  ArtMethod* imt_data[ImTable::kSize];
+  // If there are any new conflicts compared to super class.
+  bool new_conflict = false;
+  std::fill_n(imt_data, arraysize(imt_data), Runtime::Current()->GetImtUnimplementedMethod());
+  if (!LinkMethods(self, klass, interfaces, &new_conflict, imt_data)) {
     return false;
   }
   if (!LinkInstanceFields(self, klass)) {
@@ -5055,15 +5067,47 @@
   CreateReferenceInstanceOffsets(klass);
   CHECK_EQ(mirror::Class::kStatusLoaded, klass->GetStatus());
 
+  ImTable* imt = nullptr;
+  if (klass->ShouldHaveImt()) {
+    // If there are any new conflicts compared to the super class we can not make a copy. There
+    // can be cases where both will have a conflict method at the same slot without having the same
+    // set of conflicts. In this case, we can not share the IMT since the conflict table slow path
+    // will possibly create a table that is incorrect for either of the classes.
+    // Same IMT with new_conflict does not happen very often.
+    if (!new_conflict) {
+      ImTable* super_imt = FindSuperImt(klass.Get(), image_pointer_size_);
+      if (super_imt != nullptr) {
+        bool imt_equals = true;
+        for (size_t i = 0; i < ImTable::kSize && imt_equals; ++i) {
+          imt_equals = imt_equals && (super_imt->Get(i, image_pointer_size_) == imt_data[i]);
+        }
+        if (imt_equals) {
+          imt = super_imt;
+        }
+      }
+    }
+    if (imt == nullptr) {
+      LinearAlloc* allocator = GetAllocatorForClassLoader(klass->GetClassLoader());
+      imt = reinterpret_cast<ImTable*>(
+          allocator->Alloc(self, ImTable::SizeInBytes(image_pointer_size_)));
+      if (imt == nullptr) {
+        return false;
+      }
+      imt->Populate(imt_data, image_pointer_size_);
+    }
+  }
+
   if (!klass->IsTemp() || (!init_done_ && klass->GetClassSize() == class_size)) {
     // We don't need to retire this class as it has no embedded tables or it was created the
     // correct size during class linker initialization.
     CHECK_EQ(klass->GetClassSize(), class_size) << PrettyDescriptor(klass.Get());
 
-    if (klass->ShouldHaveEmbeddedImtAndVTable()) {
-      klass->PopulateEmbeddedImtAndVTable(imt, image_pointer_size_);
+    if (klass->ShouldHaveEmbeddedVTable()) {
+      klass->PopulateEmbeddedVTable(image_pointer_size_);
     }
-
+    if (klass->ShouldHaveImt()) {
+      klass->SetImt(imt, image_pointer_size_);
+    }
     // This will notify waiters on klass that saw the not yet resolved
     // class in the class_table_ during EnsureResolved.
     mirror::Class::SetStatus(klass, mirror::Class::kStatusResolved, self);
@@ -5455,6 +5499,7 @@
 bool ClassLinker::LinkMethods(Thread* self,
                               Handle<mirror::Class> klass,
                               Handle<mirror::ObjectArray<mirror::Class>> interfaces,
+                              bool* out_new_conflict,
                               ArtMethod** out_imt) {
   self->AllowThreadSuspension();
   // A map from vtable indexes to the method they need to be updated to point to. Used because we
@@ -5466,7 +5511,7 @@
   // any vtable entries with new default method implementations.
   return SetupInterfaceLookupTable(self, klass, interfaces)
           && LinkVirtualMethods(self, klass, /*out*/ &default_translations)
-          && LinkInterfaceMethods(self, klass, default_translations, out_imt);
+          && LinkInterfaceMethods(self, klass, default_translations, out_new_conflict, out_imt);
 }
 
 // Comparator for name and signature of a method, used in finding overriding methods. Implementation
@@ -5624,7 +5669,7 @@
     StackHandleScope<2> hs(self);
     Handle<mirror::Class> super_class(hs.NewHandle(klass->GetSuperClass()));
     MutableHandle<mirror::PointerArray> vtable;
-    if (super_class->ShouldHaveEmbeddedImtAndVTable()) {
+    if (super_class->ShouldHaveEmbeddedVTable()) {
       vtable = hs.NewHandle(AllocPointerArray(self, max_count));
       if (UNLIKELY(vtable.Get() == nullptr)) {
         self->AssertPendingOOMException();
@@ -6024,6 +6069,7 @@
 void ClassLinker::SetIMTRef(ArtMethod* unimplemented_method,
                             ArtMethod* imt_conflict_method,
                             ArtMethod* current_method,
+                            /*out*/bool* new_conflict,
                             /*out*/ArtMethod** imt_ref) {
   // Place method in imt if entry is empty, place conflict otherwise.
   if (*imt_ref == unimplemented_method) {
@@ -6040,40 +6086,82 @@
       *imt_ref = current_method;
     } else {
       *imt_ref = imt_conflict_method;
+      *new_conflict = true;
     }
   } else {
     // Place the default conflict method. Note that there may be an existing conflict
     // method in the IMT, but it could be one tailored to the super class, with a
     // specific ImtConflictTable.
     *imt_ref = imt_conflict_method;
+    *new_conflict = true;
   }
 }
 
 void ClassLinker::FillIMTAndConflictTables(mirror::Class* klass) {
-  DCHECK(klass->ShouldHaveEmbeddedImtAndVTable()) << PrettyClass(klass);
+  DCHECK(klass->ShouldHaveImt()) << PrettyClass(klass);
   DCHECK(!klass->IsTemp()) << PrettyClass(klass);
-  ArtMethod* imt[mirror::Class::kImtSize];
+  ArtMethod* imt_data[ImTable::kSize];
   Runtime* const runtime = Runtime::Current();
   ArtMethod* const unimplemented_method = runtime->GetImtUnimplementedMethod();
   ArtMethod* const conflict_method = runtime->GetImtConflictMethod();
-  std::fill_n(imt, arraysize(imt), unimplemented_method);
+  std::fill_n(imt_data, arraysize(imt_data), unimplemented_method);
   if (klass->GetIfTable() != nullptr) {
+    bool new_conflict = false;
     FillIMTFromIfTable(klass->GetIfTable(),
                        unimplemented_method,
                        conflict_method,
                        klass,
-                       true,
-                       false,
-                       &imt[0]);
+                       /*create_conflict_tables*/true,
+                       /*ignore_copied_methods*/false,
+                       &new_conflict,
+                       &imt_data[0]);
   }
-  for (size_t i = 0; i < mirror::Class::kImtSize; ++i) {
-    klass->SetEmbeddedImTableEntry(i, imt[i], image_pointer_size_);
+  if (!klass->ShouldHaveImt()) {
+    return;
+  }
+  // Compare the IMT with the super class including the conflict methods. If they are equivalent,
+  // we can just use the same pointer.
+  ImTable* imt = nullptr;
+  mirror::Class* super_class = klass->GetSuperClass();
+  if (super_class != nullptr && super_class->ShouldHaveImt()) {
+    ImTable* super_imt = super_class->GetImt(image_pointer_size_);
+    bool same = true;
+    for (size_t i = 0; same && i < ImTable::kSize; ++i) {
+      ArtMethod* method = imt_data[i];
+      ArtMethod* super_method = super_imt->Get(i, image_pointer_size_);
+      if (method != super_method) {
+        bool is_conflict_table = method->IsRuntimeMethod() &&
+                                 method != unimplemented_method &&
+                                 method != conflict_method;
+        // Verify conflict contents.
+        bool super_conflict_table = super_method->IsRuntimeMethod() &&
+                                    super_method != unimplemented_method &&
+                                    super_method != conflict_method;
+        if (!is_conflict_table || !super_conflict_table) {
+          same = false;
+        } else {
+          ImtConflictTable* table1 = method->GetImtConflictTable(image_pointer_size_);
+          ImtConflictTable* table2 = super_method->GetImtConflictTable(image_pointer_size_);
+          same = same && table1->Equals(table2, image_pointer_size_);
+        }
+      }
+    }
+    if (same) {
+      imt = super_imt;
+    }
+  }
+  if (imt == nullptr) {
+    imt = klass->GetImt(image_pointer_size_);
+    DCHECK(imt != nullptr);
+    imt->Populate(imt_data, image_pointer_size_);
+  } else {
+    klass->SetImt(imt, image_pointer_size_);
   }
 }
 
 static inline uint32_t GetIMTIndex(ArtMethod* interface_method)
     SHARED_REQUIRES(Locks::mutator_lock_) {
-  return interface_method->GetDexMethodIndex() % mirror::Class::kImtSize;
+  return interface_method->GetDexMethodIndex() % ImTable::kSize;
 }
 
 ImtConflictTable* ClassLinker::CreateImtConflictTable(size_t count,
@@ -6095,8 +6183,9 @@
                                      mirror::Class* klass,
                                      bool create_conflict_tables,
                                      bool ignore_copied_methods,
-                                     ArtMethod** imt) {
-  uint32_t conflict_counts[mirror::Class::kImtSize] = {};
+                                     /*out*/bool* new_conflict,
+                                     /*out*/ArtMethod** imt) {
+  uint32_t conflict_counts[ImTable::kSize] = {};
   for (size_t i = 0, length = if_table->Count(); i < length; ++i) {
     mirror::Class* interface = if_table->GetInterface(i);
     const size_t num_virtuals = interface->NumVirtualMethods();
@@ -6138,6 +6227,7 @@
       SetIMTRef(unimplemented_method,
                 imt_conflict_method,
                 implementation_method,
+                /*out*/new_conflict,
                 /*out*/&imt[imt_index]);
     }
   }
@@ -6145,7 +6235,7 @@
   if (create_conflict_tables) {
     // Create the conflict tables.
     LinearAlloc* linear_alloc = GetAllocatorForClassLoader(klass->GetClassLoader());
-    for (size_t i = 0; i < mirror::Class::kImtSize; ++i) {
+    for (size_t i = 0; i < ImTable::kSize; ++i) {
       size_t conflicts = conflict_counts[i];
       if (imt[i] == imt_conflict_method) {
         ImtConflictTable* new_table = CreateImtConflictTable(conflicts, linear_alloc);
@@ -6432,12 +6522,14 @@
 void ClassLinker::FillImtFromSuperClass(Handle<mirror::Class> klass,
                                         ArtMethod* unimplemented_method,
                                         ArtMethod* imt_conflict_method,
+                                        bool* new_conflict,
                                         ArtMethod** imt) {
   DCHECK(klass->HasSuperClass());
   mirror::Class* super_class = klass->GetSuperClass();
-  if (super_class->ShouldHaveEmbeddedImtAndVTable()) {
-    for (size_t i = 0; i < mirror::Class::kImtSize; ++i) {
-      imt[i] = super_class->GetEmbeddedImTableEntry(i, image_pointer_size_);
+  if (super_class->ShouldHaveImt()) {
+    ImTable* super_imt = super_class->GetImt(image_pointer_size_);
+    for (size_t i = 0; i < ImTable::kSize; ++i) {
+      imt[i] = super_imt->Get(i, image_pointer_size_);
     }
   } else {
     // No imt in the super class, need to reconstruct from the iftable.
@@ -6450,6 +6542,7 @@
                          klass.Get(),
                          /*create_conflict_table*/false,
                          /*ignore_copied_methods*/true,
+                         /*out*/new_conflict,
                          /*out*/imt);
     }
   }
@@ -6460,6 +6553,7 @@
     Thread* self,
     Handle<mirror::Class> klass,
     const std::unordered_map<size_t, ClassLinker::MethodTranslation>& default_translations,
+    bool* out_new_conflict,
     ArtMethod** out_imt) {
   StackHandleScope<3> hs(self);
   Runtime* const runtime = Runtime::Current();
@@ -6495,6 +6589,7 @@
     FillImtFromSuperClass(klass,
                           unimplemented_method,
                           imt_conflict_method,
+                          out_new_conflict,
                           out_imt);
   }
   // Allocate method arrays before since we don't want miss visiting miranda method roots due to
@@ -6626,6 +6721,7 @@
                 SetIMTRef(unimplemented_method,
                           imt_conflict_method,
                           vtable_method,
+                          /*out*/out_new_conflict,
                           /*out*/imt_ptr);
               }
               break;
@@ -6768,6 +6864,7 @@
             SetIMTRef(unimplemented_method,
                       imt_conflict_method,
                       current_method,
+                      /*out*/out_new_conflict,
                       /*out*/imt_ptr);
           }
         }
@@ -6967,7 +7064,7 @@
       }
 
       // Fix up IMT next
-      for (size_t i = 0; i < mirror::Class::kImtSize; ++i) {
+      for (size_t i = 0; i < ImTable::kSize; ++i) {
         auto it = move_table.find(out_imt[i]);
         if (it != move_table.end()) {
           out_imt[i] = it->second;
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index ca5af19..d6822c5 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -833,6 +833,7 @@
   bool LinkMethods(Thread* self,
                    Handle<mirror::Class> klass,
                    Handle<mirror::ObjectArray<mirror::Class>> interfaces,
+                   bool* out_new_conflict,
                    ArtMethod** out_imt)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -968,19 +969,20 @@
   // * kDefaultConflict - Conflicting method implementations were found when searching for
   //                      target_method. The value of *out_default_method is null.
   DefaultMethodSearchResult FindDefaultMethodImplementation(
-          Thread* self,
-          ArtMethod* target_method,
-          Handle<mirror::Class> klass,
-          /*out*/ArtMethod** out_default_method) const
+      Thread* self,
+      ArtMethod* target_method,
+      Handle<mirror::Class> klass,
+      /*out*/ArtMethod** out_default_method) const
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Sets the imt entries and fixes up the vtable for the given class by linking all the interface
   // methods. See LinkVirtualMethods for an explanation of what default_translations is.
   bool LinkInterfaceMethods(
-          Thread* self,
-          Handle<mirror::Class> klass,
-          const std::unordered_map<size_t, MethodTranslation>& default_translations,
-          ArtMethod** out_imt)
+      Thread* self,
+      Handle<mirror::Class> klass,
+      const std::unordered_map<size_t, MethodTranslation>& default_translations,
+      bool* out_new_conflict,
+      ArtMethod** out_imt)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   bool LinkStaticFields(Thread* self, Handle<mirror::Class> klass, size_t* class_size)
@@ -1096,6 +1098,7 @@
   void SetIMTRef(ArtMethod* unimplemented_method,
                  ArtMethod* imt_conflict_method,
                  ArtMethod* current_method,
+                 /*out*/bool* new_conflict,
                  /*out*/ArtMethod** imt_ref) SHARED_REQUIRES(Locks::mutator_lock_);
 
   void FillIMTFromIfTable(mirror::IfTable* if_table,
@@ -1104,11 +1107,13 @@
                           mirror::Class* klass,
                           bool create_conflict_tables,
                           bool ignore_copied_methods,
-                          ArtMethod** imt) SHARED_REQUIRES(Locks::mutator_lock_);
+                          /*out*/bool* new_conflict,
+                          /*out*/ArtMethod** imt) SHARED_REQUIRES(Locks::mutator_lock_);
 
   void FillImtFromSuperClass(Handle<mirror::Class> klass,
                              ArtMethod* unimplemented_method,
                              ArtMethod* imt_conflict_method,
+                             bool* new_conflict,
                              ArtMethod** imt) SHARED_REQUIRES(Locks::mutator_lock_);
 
   std::vector<const DexFile*> boot_class_path_;
diff --git a/runtime/class_linker_test.cc b/runtime/class_linker_test.cc
index 488826b..48b6316 100644
--- a/runtime/class_linker_test.cc
+++ b/runtime/class_linker_test.cc
@@ -100,6 +100,62 @@
     EXPECT_EQ(kAccPublic | kAccFinal | kAccAbstract, primitive->GetAccessFlags());
   }
 
+  void AssertObjectClass(mirror::Class* JavaLangObject)
+      SHARED_REQUIRES(Locks::mutator_lock_) {
+    ASSERT_TRUE(JavaLangObject != nullptr);
+    ASSERT_TRUE(JavaLangObject->GetClass() != nullptr);
+    ASSERT_EQ(JavaLangObject->GetClass(),
+              JavaLangObject->GetClass()->GetClass());
+    EXPECT_EQ(JavaLangObject, JavaLangObject->GetClass()->GetSuperClass());
+    std::string temp;
+    ASSERT_STREQ(JavaLangObject->GetDescriptor(&temp), "Ljava/lang/Object;");
+    EXPECT_TRUE(JavaLangObject->GetSuperClass() == nullptr);
+    EXPECT_FALSE(JavaLangObject->HasSuperClass());
+    EXPECT_TRUE(JavaLangObject->GetClassLoader() == nullptr);
+    EXPECT_EQ(mirror::Class::kStatusInitialized, JavaLangObject->GetStatus());
+    EXPECT_FALSE(JavaLangObject->IsErroneous());
+    EXPECT_TRUE(JavaLangObject->IsLoaded());
+    EXPECT_TRUE(JavaLangObject->IsResolved());
+    EXPECT_TRUE(JavaLangObject->IsVerified());
+    EXPECT_TRUE(JavaLangObject->IsInitialized());
+    EXPECT_FALSE(JavaLangObject->IsArrayInstance());
+    EXPECT_FALSE(JavaLangObject->IsArrayClass());
+    EXPECT_TRUE(JavaLangObject->GetComponentType() == nullptr);
+    EXPECT_FALSE(JavaLangObject->IsInterface());
+    EXPECT_TRUE(JavaLangObject->IsPublic());
+    EXPECT_FALSE(JavaLangObject->IsFinal());
+    EXPECT_FALSE(JavaLangObject->IsPrimitive());
+    EXPECT_FALSE(JavaLangObject->IsSynthetic());
+    EXPECT_EQ(2U, JavaLangObject->NumDirectMethods());
+    EXPECT_EQ(11U, JavaLangObject->NumVirtualMethods());
+    if (!kUseBrooksReadBarrier) {
+      EXPECT_EQ(2U, JavaLangObject->NumInstanceFields());
+    } else {
+      EXPECT_EQ(4U, JavaLangObject->NumInstanceFields());
+    }
+    EXPECT_STREQ(JavaLangObject->GetInstanceField(0)->GetName(),
+                 "shadow$_klass_");
+    EXPECT_STREQ(JavaLangObject->GetInstanceField(1)->GetName(),
+                 "shadow$_monitor_");
+    if (kUseBrooksReadBarrier) {
+      EXPECT_STREQ(JavaLangObject->GetInstanceField(2)->GetName(),
+                   "shadow$_x_rb_ptr_");
+      EXPECT_STREQ(JavaLangObject->GetInstanceField(3)->GetName(),
+                   "shadow$_x_xpadding_");
+    }
+
+    EXPECT_EQ(0U, JavaLangObject->NumStaticFields());
+    EXPECT_EQ(0U, JavaLangObject->NumDirectInterfaces());
+
+    size_t pointer_size = class_linker_->GetImagePointerSize();
+    ArtMethod* unimplemented = runtime_->GetImtUnimplementedMethod();
+    ImTable* imt = JavaLangObject->GetImt(pointer_size);
+    ASSERT_NE(nullptr, imt);
+    for (size_t i = 0; i < ImTable::kSize; ++i) {
+      ASSERT_EQ(unimplemented, imt->Get(i, pointer_size));
+    }
+  }
+
   void AssertArrayClass(const std::string& array_descriptor,
                         const std::string& component_type,
                         mirror::ClassLoader* class_loader)
@@ -148,7 +204,8 @@
     EXPECT_EQ(0U, array->NumInstanceFields());
     EXPECT_EQ(0U, array->NumStaticFields());
     EXPECT_EQ(2U, array->NumDirectInterfaces());
-    EXPECT_TRUE(array->ShouldHaveEmbeddedImtAndVTable());
+    EXPECT_TRUE(array->ShouldHaveImt());
+    EXPECT_TRUE(array->ShouldHaveEmbeddedVTable());
     EXPECT_EQ(2, array->GetIfTableCount());
     ASSERT_TRUE(array->GetIfTable() != nullptr);
     mirror::Class* direct_interface0 = mirror::Class::GetDirectInterface(self, array, 0);
@@ -158,6 +215,13 @@
     EXPECT_STREQ(direct_interface1->GetDescriptor(&temp), "Ljava/io/Serializable;");
     mirror::Class* array_ptr = array->GetComponentType();
     EXPECT_EQ(class_linker_->FindArrayClass(self, &array_ptr), array.Get());
+
+    size_t pointer_size = class_linker_->GetImagePointerSize();
+    mirror::Class* JavaLangObject =
+        class_linker_->FindSystemClass(self, "Ljava/lang/Object;");
+    ImTable* JavaLangObject_imt = JavaLangObject->GetImt(pointer_size);
+    // IMT of a array class should be shared with the IMT of the java.lag.Object
+    ASSERT_EQ(JavaLangObject_imt, array->GetImt(pointer_size));
   }
 
   void AssertMethod(ArtMethod* method) SHARED_REQUIRES(Locks::mutator_lock_) {
@@ -713,45 +777,7 @@
 TEST_F(ClassLinkerTest, FindClass) {
   ScopedObjectAccess soa(Thread::Current());
   mirror::Class* JavaLangObject = class_linker_->FindSystemClass(soa.Self(), "Ljava/lang/Object;");
-  ASSERT_TRUE(JavaLangObject != nullptr);
-  ASSERT_TRUE(JavaLangObject->GetClass() != nullptr);
-  ASSERT_EQ(JavaLangObject->GetClass(), JavaLangObject->GetClass()->GetClass());
-  EXPECT_EQ(JavaLangObject, JavaLangObject->GetClass()->GetSuperClass());
-  std::string temp;
-  ASSERT_STREQ(JavaLangObject->GetDescriptor(&temp), "Ljava/lang/Object;");
-  EXPECT_TRUE(JavaLangObject->GetSuperClass() == nullptr);
-  EXPECT_FALSE(JavaLangObject->HasSuperClass());
-  EXPECT_TRUE(JavaLangObject->GetClassLoader() == nullptr);
-  EXPECT_EQ(mirror::Class::kStatusInitialized, JavaLangObject->GetStatus());
-  EXPECT_FALSE(JavaLangObject->IsErroneous());
-  EXPECT_TRUE(JavaLangObject->IsLoaded());
-  EXPECT_TRUE(JavaLangObject->IsResolved());
-  EXPECT_TRUE(JavaLangObject->IsVerified());
-  EXPECT_TRUE(JavaLangObject->IsInitialized());
-  EXPECT_FALSE(JavaLangObject->IsArrayInstance());
-  EXPECT_FALSE(JavaLangObject->IsArrayClass());
-  EXPECT_TRUE(JavaLangObject->GetComponentType() == nullptr);
-  EXPECT_FALSE(JavaLangObject->IsInterface());
-  EXPECT_TRUE(JavaLangObject->IsPublic());
-  EXPECT_FALSE(JavaLangObject->IsFinal());
-  EXPECT_FALSE(JavaLangObject->IsPrimitive());
-  EXPECT_FALSE(JavaLangObject->IsSynthetic());
-  EXPECT_EQ(2U, JavaLangObject->NumDirectMethods());
-  EXPECT_EQ(11U, JavaLangObject->NumVirtualMethods());
-  if (!kUseBrooksReadBarrier) {
-    EXPECT_EQ(2U, JavaLangObject->NumInstanceFields());
-  } else {
-    EXPECT_EQ(4U, JavaLangObject->NumInstanceFields());
-  }
-  EXPECT_STREQ(JavaLangObject->GetInstanceField(0)->GetName(), "shadow$_klass_");
-  EXPECT_STREQ(JavaLangObject->GetInstanceField(1)->GetName(), "shadow$_monitor_");
-  if (kUseBrooksReadBarrier) {
-    EXPECT_STREQ(JavaLangObject->GetInstanceField(2)->GetName(), "shadow$_x_rb_ptr_");
-    EXPECT_STREQ(JavaLangObject->GetInstanceField(3)->GetName(), "shadow$_x_xpadding_");
-  }
-
-  EXPECT_EQ(0U, JavaLangObject->NumStaticFields());
-  EXPECT_EQ(0U, JavaLangObject->NumDirectInterfaces());
+  AssertObjectClass(JavaLangObject);
 
   StackHandleScope<1> hs(soa.Self());
   Handle<mirror::ClassLoader> class_loader(
@@ -762,6 +788,7 @@
   ASSERT_TRUE(MyClass->GetClass() != nullptr);
   ASSERT_EQ(MyClass->GetClass(), MyClass->GetClass()->GetClass());
   EXPECT_EQ(JavaLangObject, MyClass->GetClass()->GetSuperClass());
+  std::string temp;
   ASSERT_STREQ(MyClass->GetDescriptor(&temp), "LMyClass;");
   EXPECT_TRUE(MyClass->GetSuperClass() == JavaLangObject);
   EXPECT_TRUE(MyClass->HasSuperClass());
diff --git a/runtime/entrypoints/entrypoint_utils-inl.h b/runtime/entrypoints/entrypoint_utils-inl.h
index d61d0aa..ab14655 100644
--- a/runtime/entrypoints/entrypoint_utils-inl.h
+++ b/runtime/entrypoints/entrypoint_utils-inl.h
@@ -600,9 +600,10 @@
       }
     }
     case kInterface: {
-      uint32_t imt_index = resolved_method->GetDexMethodIndex() % mirror::Class::kImtSize;
-      ArtMethod* imt_method = (*this_object)->GetClass()->GetEmbeddedImTableEntry(
-          imt_index, class_linker->GetImagePointerSize());
+      uint32_t imt_index = resolved_method->GetDexMethodIndex() % ImTable::kSize;
+      size_t pointer_size = class_linker->GetImagePointerSize();
+      ArtMethod* imt_method = (*this_object)->GetClass()->GetImt(pointer_size)->
+          Get(imt_index, pointer_size);
       if (!imt_method->IsRuntimeMethod()) {
         if (kIsDebugBuild) {
           mirror::Class* klass = (*this_object)->GetClass();
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 923ea1a..1152b94 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -2165,13 +2165,13 @@
       dex_method_idx, sizeof(void*));
   DCHECK(interface_method != nullptr) << dex_method_idx << " " << PrettyMethod(caller_method);
   ArtMethod* method = nullptr;
+  ImTable* imt = cls->GetImt(sizeof(void*));
 
   if (LIKELY(interface_method->GetDexMethodIndex() != DexFile::kDexNoIndex)) {
     // If the dex cache already resolved the interface method, look whether we have
     // a match in the ImtConflictTable.
     uint32_t imt_index = interface_method->GetDexMethodIndex();
-    ArtMethod* conflict_method = cls->GetEmbeddedImTableEntry(
-        imt_index % mirror::Class::kImtSize, sizeof(void*));
+    ArtMethod* conflict_method = imt->Get(imt_index % ImTable::kSize, sizeof(void*));
     if (LIKELY(conflict_method->IsRuntimeMethod())) {
       ImtConflictTable* current_table = conflict_method->GetImtConflictTable(sizeof(void*));
       DCHECK(current_table != nullptr);
@@ -2223,8 +2223,7 @@
   // We arrive here if we have found an implementation, and it is not in the ImtConflictTable.
   // We create a new table with the new pair { interface_method, method }.
   uint32_t imt_index = interface_method->GetDexMethodIndex();
-  ArtMethod* conflict_method = cls->GetEmbeddedImTableEntry(
-      imt_index % mirror::Class::kImtSize, sizeof(void*));
+  ArtMethod* conflict_method = imt->Get(imt_index % ImTable::kSize, sizeof(void*));
   if (conflict_method->IsRuntimeMethod()) {
     ArtMethod* new_conflict_method = Runtime::Current()->GetClassLinker()->AddMethodToConflictTable(
         cls.Get(),
@@ -2235,9 +2234,9 @@
     if (new_conflict_method != conflict_method) {
       // Update the IMT if we create a new conflict method. No fence needed here, as the
       // data is consistent.
-      cls->SetEmbeddedImTableEntry(imt_index % mirror::Class::kImtSize,
-                                  new_conflict_method,
-                                  sizeof(void*));
+      imt->Set(imt_index % ImTable::kSize,
+               new_conflict_method,
+               sizeof(void*));
     }
   }
 
diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc
index e896c7a..8cadc2e 100644
--- a/runtime/gc/space/image_space.cc
+++ b/runtime/gc/space/image_space.cc
@@ -1130,6 +1130,10 @@
       image_header.VisitPackedArtFields(&field_visitor, target_base);
     }
     {
+      TimingLogger::ScopedTiming timing("Fixup imt", &logger);
+      image_header.VisitPackedImTables(fixup_adapter, target_base, pointer_size);
+    }
+    {
       TimingLogger::ScopedTiming timing("Fixup conflict tables", &logger);
       image_header.VisitPackedImtConflictTables(fixup_adapter, target_base, pointer_size);
     }
diff --git a/runtime/image-inl.h b/runtime/image-inl.h
index ea75a62..cd0557a 100644
--- a/runtime/image-inl.h
+++ b/runtime/image-inl.h
@@ -20,6 +20,7 @@
 #include "image.h"
 
 #include "art_method.h"
+#include "imtable.h"
 
 namespace art {
 
@@ -45,6 +46,24 @@
 }
 
 template <typename Visitor>
+inline void ImageHeader::VisitPackedImTables(const Visitor& visitor,
+                                             uint8_t* base,
+                                             size_t pointer_size) const {
+  const ImageSection& section = GetImageSection(kSectionImTables);
+  for (size_t pos = 0; pos < section.Size();) {
+    ImTable* imt = reinterpret_cast<ImTable*>(base + section.Offset() + pos);
+    for (size_t i = 0; i < ImTable::kSize; ++i) {
+      ArtMethod* orig = imt->Get(i, pointer_size);
+      ArtMethod* updated = visitor(orig);
+      if (updated != orig) {
+        imt->Set(i, updated, pointer_size);
+      }
+    }
+    pos += ImTable::SizeInBytes(pointer_size);
+  }
+}
+
+template <typename Visitor>
 inline void ImageHeader::VisitPackedImtConflictTables(const Visitor& visitor,
                                                       uint8_t* base,
                                                       size_t pointer_size) const {
diff --git a/runtime/image.cc b/runtime/image.cc
index a9552c2..2362a92 100644
--- a/runtime/image.cc
+++ b/runtime/image.cc
@@ -24,7 +24,7 @@
 namespace art {
 
 const uint8_t ImageHeader::kImageMagic[] = { 'a', 'r', 't', '\n' };
-const uint8_t ImageHeader::kImageVersion[] = { '0', '2', '9', '\0' };
+const uint8_t ImageHeader::kImageVersion[] = { '0', '3', '0', '\0' };
 
 ImageHeader::ImageHeader(uint32_t image_begin,
                          uint32_t image_size,
diff --git a/runtime/image.h b/runtime/image.h
index 2ea9af7..06f06ee 100644
--- a/runtime/image.h
+++ b/runtime/image.h
@@ -195,6 +195,7 @@
     kSectionArtFields,
     kSectionArtMethods,
     kSectionRuntimeMethods,
+    kSectionImTables,
     kSectionIMTConflictTables,
     kSectionDexCacheArrays,
     kSectionInternedStrings,
@@ -279,6 +280,11 @@
   void VisitPackedArtFields(ArtFieldVisitor* visitor, uint8_t* base) const;
 
   template <typename Visitor>
+  void VisitPackedImTables(const Visitor& visitor,
+                           uint8_t* base,
+                           size_t pointer_size) const;
+
+  template <typename Visitor>
   void VisitPackedImtConflictTables(const Visitor& visitor,
                                     uint8_t* base,
                                     size_t pointer_size) const;
diff --git a/runtime/imtable.h b/runtime/imtable.h
new file mode 100644
index 0000000..51faf70
--- /dev/null
+++ b/runtime/imtable.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_IMTABLE_H_
+#define ART_RUNTIME_IMTABLE_H_
+
+#ifndef IMT_SIZE
+#error IMT_SIZE not defined
+#endif
+
+namespace art {
+
+class ArtMethod;
+
+class ImTable {
+ public:
+  // Interface method table size. Increasing this value reduces the chance of two interface methods
+  // colliding in the interface method table but increases the size of classes that implement
+  // (non-marker) interfaces.
+  static constexpr size_t kSize = IMT_SIZE;
+
+  ArtMethod* Get(size_t index, size_t pointer_size) {
+    DCHECK_LT(index, kSize);
+    uint8_t* ptr = reinterpret_cast<uint8_t*>(this) + OffsetOfElement(index, pointer_size);
+    if (pointer_size == 4) {
+      uint32_t value = *reinterpret_cast<uint32_t*>(ptr);
+      return reinterpret_cast<ArtMethod*>(value);
+    } else {
+      uint64_t value = *reinterpret_cast<uint64_t*>(ptr);
+      return reinterpret_cast<ArtMethod*>(value);
+    }
+  }
+
+  void Set(size_t index, ArtMethod* method, size_t pointer_size) {
+    DCHECK_LT(index, kSize);
+    uint8_t* ptr = reinterpret_cast<uint8_t*>(this) + OffsetOfElement(index, pointer_size);
+    if (pointer_size == 4) {
+      uintptr_t value = reinterpret_cast<uintptr_t>(method);
+      DCHECK_EQ(static_cast<uint32_t>(value), value);  // Check that we dont lose any non 0 bits.
+      *reinterpret_cast<uint32_t*>(ptr) = static_cast<uint32_t>(value);
+    } else {
+      *reinterpret_cast<uint64_t*>(ptr) = reinterpret_cast<uint64_t>(method);
+    }
+  }
+
+  static size_t OffsetOfElement(size_t index, size_t pointer_size) {
+    return index * pointer_size;
+  }
+
+  void Populate(ArtMethod** data, size_t pointer_size) {
+    for (size_t i = 0; i < kSize; ++i) {
+      Set(i, data[i], pointer_size);
+    }
+  }
+
+  constexpr static size_t SizeInBytes(size_t pointer_size) {
+    return kSize * pointer_size;
+  }
+};
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_IMTABLE_H_
+
diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h
index 03d03d5..7dfa6e2 100644
--- a/runtime/interpreter/interpreter_common.h
+++ b/runtime/interpreter/interpreter_common.h
@@ -679,7 +679,7 @@
     return false;
   }
   const uint32_t vtable_idx = (is_range) ? inst->VRegB_3rc() : inst->VRegB_35c();
-  CHECK(receiver->GetClass()->ShouldHaveEmbeddedImtAndVTable());
+  CHECK(receiver->GetClass()->ShouldHaveEmbeddedVTable());
   ArtMethod* const called_method = receiver->GetClass()->GetEmbeddedVTableEntry(
       vtable_idx, sizeof(void*));
   if (UNLIKELY(called_method == nullptr)) {
diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index cefd9f0..b783a01 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h
@@ -247,38 +247,19 @@
   SetFieldObject<false>(OFFSET_OF_OBJECT_MEMBER(Class, vtable_), new_vtable);
 }
 
-inline MemberOffset Class::EmbeddedImTableEntryOffset(uint32_t i, size_t pointer_size) {
-  DCHECK_LT(i, kImtSize);
-  return MemberOffset(
-      EmbeddedImTableOffset(pointer_size).Uint32Value() + i * ImTableEntrySize(pointer_size));
-}
-
-template <VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
-inline ArtMethod* Class::GetEmbeddedImTableEntry(uint32_t i, size_t pointer_size) {
-  DCHECK((ShouldHaveEmbeddedImtAndVTable<kVerifyFlags, kReadBarrierOption>()));
-  return GetFieldPtrWithSize<ArtMethod*>(
-      EmbeddedImTableEntryOffset(i, pointer_size), pointer_size);
-}
-
-template <VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
-inline void Class::SetEmbeddedImTableEntry(uint32_t i, ArtMethod* method, size_t pointer_size) {
-  DCHECK((ShouldHaveEmbeddedImtAndVTable<kVerifyFlags, kReadBarrierOption>()));
-  SetFieldPtrWithSize<false>(EmbeddedImTableEntryOffset(i, pointer_size), method, pointer_size);
-}
-
 inline bool Class::HasVTable() {
-  return GetVTable() != nullptr || ShouldHaveEmbeddedImtAndVTable();
+  return GetVTable() != nullptr || ShouldHaveEmbeddedVTable();
 }
 
 inline int32_t Class::GetVTableLength() {
-  if (ShouldHaveEmbeddedImtAndVTable()) {
+  if (ShouldHaveEmbeddedVTable()) {
     return GetEmbeddedVTableLength();
   }
   return GetVTable() != nullptr ? GetVTable()->GetLength() : 0;
 }
 
 inline ArtMethod* Class::GetVTableEntry(uint32_t i, size_t pointer_size) {
-  if (ShouldHaveEmbeddedImtAndVTable()) {
+  if (ShouldHaveEmbeddedVTable()) {
     return GetEmbeddedVTableEntry(i, pointer_size);
   }
   auto* vtable = GetVTable();
@@ -294,6 +275,14 @@
   SetField32<false>(MemberOffset(EmbeddedVTableLengthOffset()), len);
 }
 
+inline ImTable* Class::GetImt(size_t pointer_size) {
+  return GetFieldPtrWithSize<ImTable*>(MemberOffset(ImtPtrOffset(pointer_size)), pointer_size);
+}
+
+inline void Class::SetImt(ImTable* imt, size_t pointer_size) {
+  return SetFieldPtrWithSize<false>(MemberOffset(ImtPtrOffset(pointer_size)), imt, pointer_size);
+}
+
 inline MemberOffset Class::EmbeddedVTableEntryOffset(uint32_t i, size_t pointer_size) {
   return MemberOffset(
       EmbeddedVTableOffset(pointer_size).Uint32Value() + i * VTableEntrySize(pointer_size));
@@ -541,7 +530,7 @@
 inline MemberOffset Class::GetFirstReferenceStaticFieldOffset(size_t pointer_size) {
   DCHECK(IsResolved());
   uint32_t base = sizeof(mirror::Class);  // Static fields come after the class.
-  if (ShouldHaveEmbeddedImtAndVTable<kVerifyFlags, kReadBarrierOption>()) {
+  if (ShouldHaveEmbeddedVTable<kVerifyFlags, kReadBarrierOption>()) {
     // Static fields come after the embedded tables.
     base = mirror::Class::ComputeClassSize(
         true, GetEmbeddedVTableLength(), 0, 0, 0, 0, 0, pointer_size);
@@ -552,7 +541,7 @@
 inline MemberOffset Class::GetFirstReferenceStaticFieldOffsetDuringLinking(size_t pointer_size) {
   DCHECK(IsLoaded());
   uint32_t base = sizeof(mirror::Class);  // Static fields come after the class.
-  if (ShouldHaveEmbeddedImtAndVTable()) {
+  if (ShouldHaveEmbeddedVTable()) {
     // Static fields come after the embedded tables.
     base = mirror::Class::ComputeClassSize(true, GetVTableDuringLinking()->GetLength(),
                                            0, 0, 0, 0, 0, pointer_size);
@@ -711,7 +700,7 @@
   return Alloc<true>(self, Runtime::Current()->GetHeap()->GetCurrentNonMovingAllocator());
 }
 
-inline uint32_t Class::ComputeClassSize(bool has_embedded_tables,
+inline uint32_t Class::ComputeClassSize(bool has_embedded_vtable,
                                         uint32_t num_vtable_entries,
                                         uint32_t num_8bit_static_fields,
                                         uint32_t num_16bit_static_fields,
@@ -722,11 +711,10 @@
   // Space used by java.lang.Class and its instance fields.
   uint32_t size = sizeof(Class);
   // Space used by embedded tables.
-  if (has_embedded_tables) {
-    const uint32_t embedded_imt_size = kImtSize * ImTableEntrySize(pointer_size);
-    const uint32_t embedded_vtable_size = num_vtable_entries * VTableEntrySize(pointer_size);
-    size = RoundUp(size + sizeof(uint32_t) /* embedded vtable len */, pointer_size) +
-        embedded_imt_size + embedded_vtable_size;
+  if (has_embedded_vtable) {
+    size = RoundUp(size + sizeof(uint32_t), pointer_size);
+    size += pointer_size;  // size of pointer to IMT
+    size += num_vtable_entries * VTableEntrySize(pointer_size);
   }
 
   // Space used by reference statics.
@@ -990,18 +978,9 @@
   return MakeIterationRangeFromLengthPrefixedArray(GetSFieldsPtrUnchecked());
 }
 
-inline MemberOffset Class::EmbeddedImTableOffset(size_t pointer_size) {
-  CheckPointerSize(pointer_size);
-  // Round up since we want the embedded imt and vtable to be pointer size aligned in case 64 bits.
-  // Add 32 bits for embedded vtable length.
-  return MemberOffset(
-      RoundUp(EmbeddedVTableLengthOffset().Uint32Value() + sizeof(uint32_t), pointer_size));
-}
-
 inline MemberOffset Class::EmbeddedVTableOffset(size_t pointer_size) {
   CheckPointerSize(pointer_size);
-  return MemberOffset(EmbeddedImTableOffset(pointer_size).Uint32Value() +
-                      kImtSize * ImTableEntrySize(pointer_size));
+  return MemberOffset(ImtPtrOffset(pointer_size).Uint32Value() + pointer_size);
 }
 
 inline void Class::CheckPointerSize(size_t pointer_size) {
@@ -1086,7 +1065,7 @@
     dest->SetDexCacheStrings(new_strings);
   }
   // Fix up embedded tables.
-  if (!IsTemp() && ShouldHaveEmbeddedImtAndVTable<kVerifyNone, kReadBarrierOption>()) {
+  if (!IsTemp() && ShouldHaveEmbeddedVTable<kVerifyNone, kReadBarrierOption>()) {
     for (int32_t i = 0, count = GetEmbeddedVTableLength(); i < count; ++i) {
       ArtMethod* method = GetEmbeddedVTableEntry(i, pointer_size);
       ArtMethod* new_method = visitor(method);
@@ -1094,16 +1073,9 @@
         dest->SetEmbeddedVTableEntryUnchecked(i, new_method, pointer_size);
       }
     }
-    for (size_t i = 0; i < mirror::Class::kImtSize; ++i) {
-      ArtMethod* method = GetEmbeddedImTableEntry<kVerifyFlags, kReadBarrierOption>(i,
-                                                                                    pointer_size);
-      ArtMethod* new_method = visitor(method);
-      if (method != new_method) {
-        dest->SetEmbeddedImTableEntry<kVerifyFlags, kReadBarrierOption>(i,
-                                                                        new_method,
-                                                                        pointer_size);
-      }
-    }
+  }
+  if (!IsTemp() && ShouldHaveImt<kVerifyNone, kReadBarrierOption>()) {
+    dest->SetImt(visitor(GetImt(pointer_size)), pointer_size);
   }
 }
 
diff --git a/runtime/mirror/class.cc b/runtime/mirror/class.cc
index b4a23ba..9c77d38 100644
--- a/runtime/mirror/class.cc
+++ b/runtime/mirror/class.cc
@@ -914,13 +914,7 @@
   return GetDexFile().GetInterfacesList(*class_def);
 }
 
-void Class::PopulateEmbeddedImtAndVTable(ArtMethod* const (&methods)[kImtSize],
-                                         size_t pointer_size) {
-  for (size_t i = 0; i < kImtSize; i++) {
-    auto method = methods[i];
-    DCHECK(method != nullptr);
-    SetEmbeddedImTableEntry(i, method, pointer_size);
-  }
+void Class::PopulateEmbeddedVTable(size_t pointer_size) {
   PointerArray* table = GetVTableDuringLinking();
   CHECK(table != nullptr) << PrettyClass(this);
   const size_t table_length = table->GetLength();
@@ -967,7 +961,7 @@
 class CopyClassVisitor {
  public:
   CopyClassVisitor(Thread* self, Handle<mirror::Class>* orig, size_t new_length,
-                   size_t copy_bytes, ArtMethod* const (&imt)[mirror::Class::kImtSize],
+                   size_t copy_bytes, ImTable* imt,
                    size_t pointer_size)
       : self_(self), orig_(orig), new_length_(new_length),
         copy_bytes_(copy_bytes), imt_(imt), pointer_size_(pointer_size) {
@@ -979,7 +973,8 @@
     Handle<mirror::Class> h_new_class_obj(hs.NewHandle(obj->AsClass()));
     mirror::Object::CopyObject(self_, h_new_class_obj.Get(), orig_->Get(), copy_bytes_);
     mirror::Class::SetStatus(h_new_class_obj, Class::kStatusResolving, self_);
-    h_new_class_obj->PopulateEmbeddedImtAndVTable(imt_, pointer_size_);
+    h_new_class_obj->PopulateEmbeddedVTable(pointer_size_);
+    h_new_class_obj->SetImt(imt_, pointer_size_);
     h_new_class_obj->SetClassSize(new_length_);
     // Visit all of the references to make sure there is no from space references in the native
     // roots.
@@ -992,13 +987,13 @@
   Handle<mirror::Class>* const orig_;
   const size_t new_length_;
   const size_t copy_bytes_;
-  ArtMethod* const (&imt_)[mirror::Class::kImtSize];
+  ImTable* imt_;
   const size_t pointer_size_;
   DISALLOW_COPY_AND_ASSIGN(CopyClassVisitor);
 };
 
 Class* Class::CopyOf(Thread* self, int32_t new_length,
-                     ArtMethod* const (&imt)[mirror::Class::kImtSize], size_t pointer_size) {
+                     ImTable* imt, size_t pointer_size) {
   DCHECK_GE(new_length, static_cast<int32_t>(sizeof(Class)));
   // We may get copied by a compacting GC.
   StackHandleScope<1> hs(self);
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index 5235a3e..f044b59 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -22,6 +22,7 @@
 #include "class_flags.h"
 #include "gc_root.h"
 #include "gc/allocator_type.h"
+#include "imtable.h"
 #include "invoke_type.h"
 #include "modifiers.h"
 #include "object.h"
@@ -33,10 +34,6 @@
 #include "thread.h"
 #include "utils.h"
 
-#ifndef IMT_SIZE
-#error IMT_SIZE not defined
-#endif
-
 namespace art {
 
 class ArtField;
@@ -66,11 +63,6 @@
   // 2 ref instance fields.]
   static constexpr uint32_t kClassWalkSuper = 0xC0000000;
 
-  // Interface method table size. Increasing this value reduces the chance of two interface methods
-  // colliding in the interface method table but increases the size of classes that implement
-  // (non-marker) interfaces.
-  static constexpr size_t kImtSize = IMT_SIZE;
-
   // Class Status
   //
   // kStatusRetired: Class that's temporarily used till class linking time
@@ -351,7 +343,7 @@
   // be replaced with a class with the right size for embedded imt/vtable.
   bool IsTemp() SHARED_REQUIRES(Locks::mutator_lock_) {
     Status s = GetStatus();
-    return s < Status::kStatusResolving && ShouldHaveEmbeddedImtAndVTable();
+    return s < Status::kStatusResolving && ShouldHaveEmbeddedVTable();
   }
 
   String* GetName() SHARED_REQUIRES(Locks::mutator_lock_);  // Returns the cached name.
@@ -557,7 +549,7 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Compute how many bytes would be used a class with the given elements.
-  static uint32_t ComputeClassSize(bool has_embedded_tables,
+  static uint32_t ComputeClassSize(bool has_embedded_vtable,
                                    uint32_t num_vtable_entries,
                                    uint32_t num_8bit_static_fields,
                                    uint32_t num_16bit_static_fields,
@@ -830,28 +822,27 @@
     return MemberOffset(sizeof(Class));
   }
 
+  static MemberOffset ImtPtrOffset(size_t pointer_size) {
+    return MemberOffset(
+        RoundUp(EmbeddedVTableLengthOffset().Uint32Value() + sizeof(uint32_t), pointer_size));
+  }
+
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
            ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
-  bool ShouldHaveEmbeddedImtAndVTable() SHARED_REQUIRES(Locks::mutator_lock_) {
+  bool ShouldHaveImt() SHARED_REQUIRES(Locks::mutator_lock_) {
+    return ShouldHaveEmbeddedVTable<kVerifyFlags, kReadBarrierOption>();
+  }
+
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
+  bool ShouldHaveEmbeddedVTable() SHARED_REQUIRES(Locks::mutator_lock_) {
     return IsInstantiable<kVerifyFlags, kReadBarrierOption>();
   }
 
   bool HasVTable() SHARED_REQUIRES(Locks::mutator_lock_);
 
-  static MemberOffset EmbeddedImTableEntryOffset(uint32_t i, size_t pointer_size);
-
   static MemberOffset EmbeddedVTableEntryOffset(uint32_t i, size_t pointer_size);
 
-  template <VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
-            ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
-  ArtMethod* GetEmbeddedImTableEntry(uint32_t i, size_t pointer_size)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
-  template <VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
-            ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
-  void SetEmbeddedImTableEntry(uint32_t i, ArtMethod* method, size_t pointer_size)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
   int32_t GetVTableLength() SHARED_REQUIRES(Locks::mutator_lock_);
 
   ArtMethod* GetVTableEntry(uint32_t i, size_t pointer_size)
@@ -861,6 +852,10 @@
 
   void SetEmbeddedVTableLength(int32_t len) SHARED_REQUIRES(Locks::mutator_lock_);
 
+  ImTable* GetImt(size_t pointer_size) SHARED_REQUIRES(Locks::mutator_lock_);
+
+  void SetImt(ImTable* imt, size_t pointer_size) SHARED_REQUIRES(Locks::mutator_lock_);
+
   ArtMethod* GetEmbeddedVTableEntry(uint32_t i, size_t pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -870,7 +865,7 @@
   inline void SetEmbeddedVTableEntryUnchecked(uint32_t i, ArtMethod* method, size_t pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  void PopulateEmbeddedImtAndVTable(ArtMethod* const (&methods)[kImtSize], size_t pointer_size)
+  void PopulateEmbeddedVTable(size_t pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Given a method implemented by this class but potentially from a super class, return the
@@ -1195,7 +1190,7 @@
   void AssertInitializedOrInitializingInThread(Thread* self)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  Class* CopyOf(Thread* self, int32_t new_length, ArtMethod* const (&imt)[mirror::Class::kImtSize],
+  Class* CopyOf(Thread* self, int32_t new_length, ImTable* imt,
                 size_t pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_);
 
@@ -1322,10 +1317,7 @@
 
   // Check that the pointer size matches the one in the class linker.
   ALWAYS_INLINE static void CheckPointerSize(size_t pointer_size);
-
-  static MemberOffset EmbeddedImTableOffset(size_t pointer_size);
   static MemberOffset EmbeddedVTableOffset(size_t pointer_size);
-
   template <bool kVisitNativeRoots,
             VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
             ReadBarrierOption kReadBarrierOption = kWithReadBarrier,
diff --git a/runtime/native/dalvik_system_ZygoteHooks.cc b/runtime/native/dalvik_system_ZygoteHooks.cc
index 1aa789f..9da44a4 100644
--- a/runtime/native/dalvik_system_ZygoteHooks.cc
+++ b/runtime/native/dalvik_system_ZygoteHooks.cc
@@ -46,6 +46,16 @@
   if (prctl(PR_SET_DUMPABLE, 1, 0, 0, 0) == -1) {
     PLOG(ERROR) << "prctl(PR_SET_DUMPABLE) failed for pid " << getpid();
   }
+
+  // Even if Yama is on a non-privileged native debugger should
+  // be able to attach to the debuggable app.
+  if (prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY, 0, 0, 0) == -1) {
+    // if Yama is off prctl(PR_SET_PTRACER) returns EINVAL - don't log in this
+    // case since it's expected behaviour.
+    if (errno != EINVAL) {
+      PLOG(ERROR) << "prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY) failed for pid " << getpid();
+    }
+  }
 #endif
   // We don't want core dumps, though, so set the core dump size to 0.
   rlimit rl;
diff --git a/runtime/simulator/Android.mk b/runtime/simulator/Android.mk
index 5c71da6..ad91cde 100644
--- a/runtime/simulator/Android.mk
+++ b/runtime/simulator/Android.mk
@@ -65,8 +65,10 @@
     LOCAL_ASFLAGS += $(ART_HOST_ASFLAGS)
     ifeq ($$(art_ndebug_or_debug),debug)
       LOCAL_CFLAGS += $(ART_HOST_DEBUG_CFLAGS)
+      LOCAL_ASFLAGS += $(ART_HOST_DEBUG_ASFLAGS)
     else
       LOCAL_CFLAGS += $(ART_HOST_NON_DEBUG_CFLAGS)
+      LOCAL_ASFLAGS += $(ART_HOST_NON_DEBUG_ASFLAGS)
     endif
   endif
 
diff --git a/test/082-inline-execute/src/Main.java b/test/082-inline-execute/src/Main.java
index bf561e9..06f193a 100644
--- a/test/082-inline-execute/src/Main.java
+++ b/test/082-inline-execute/src/Main.java
@@ -808,11 +808,21 @@
     Assert.assertEquals(Math.round(-2.9d), -3l);
     Assert.assertEquals(Math.round(-3.0d), -3l);
     Assert.assertEquals(Math.round(0.49999999999999994d), 0l);
+    Assert.assertEquals(Math.round(4503599627370495.0d), 4503599627370495l);  // 2^52 - 1
+    Assert.assertEquals(Math.round(4503599627370495.5d), 4503599627370496l);  // 2^52 - 0.5
+    Assert.assertEquals(Math.round(4503599627370496.0d), 4503599627370496l);  // 2^52
+    Assert.assertEquals(Math.round(-4503599627370495.0d), -4503599627370495l);  // -(2^52 - 1)
+    Assert.assertEquals(Math.round(-4503599627370495.5d), -4503599627370495l);  // -(2^52 - 0.5)
+    Assert.assertEquals(Math.round(-4503599627370496.0d), -4503599627370496l);  // -2^52
     Assert.assertEquals(Math.round(9007199254740991.0d), 9007199254740991l);  // 2^53 - 1
     Assert.assertEquals(Math.round(-9007199254740991.0d), -9007199254740991l);  // -(2^53 - 1)
     Assert.assertEquals(Math.round(Double.NaN), (long)+0.0d);
     Assert.assertEquals(Math.round(Long.MAX_VALUE + 1.0d), Long.MAX_VALUE);
     Assert.assertEquals(Math.round(Long.MIN_VALUE - 1.0d), Long.MIN_VALUE);
+    Assert.assertEquals(Math.round(Double.longBitsToDouble(0x43F0000000000000l)),
+                        Long.MAX_VALUE); // 2^64
+    Assert.assertEquals(Math.round(Double.longBitsToDouble(0xC3F0000000000000l)),
+                        Long.MIN_VALUE); // -2^64
     Assert.assertEquals(Math.round(Double.POSITIVE_INFINITY), Long.MAX_VALUE);
     Assert.assertEquals(Math.round(Double.NEGATIVE_INFINITY), Long.MIN_VALUE);
   }
@@ -846,6 +856,10 @@
     Assert.assertEquals(Math.round(Float.NaN), (int)+0.0f);
     Assert.assertEquals(Math.round(Integer.MAX_VALUE + 1.0f), Integer.MAX_VALUE);
     Assert.assertEquals(Math.round(Integer.MIN_VALUE - 1.0f), Integer.MIN_VALUE);
+    Assert.assertEquals(Math.round(Float.intBitsToFloat(0x4F800000)),
+                        Integer.MAX_VALUE); // 2^32
+    Assert.assertEquals(Math.round(Float.intBitsToFloat(0xCF800000)),
+                        Integer.MIN_VALUE); // -2^32
     Assert.assertEquals(Math.round(Float.POSITIVE_INFINITY), Integer.MAX_VALUE);
     Assert.assertEquals(Math.round(Float.NEGATIVE_INFINITY), Integer.MIN_VALUE);
   }
@@ -1153,11 +1167,21 @@
     Assert.assertEquals(StrictMath.round(-2.9d), -3l);
     Assert.assertEquals(StrictMath.round(-3.0d), -3l);
     Assert.assertEquals(StrictMath.round(0.49999999999999994d), 0l);
+    Assert.assertEquals(StrictMath.round(4503599627370495.0d), 4503599627370495l);  // 2^52 - 1
+    Assert.assertEquals(StrictMath.round(4503599627370495.5d), 4503599627370496l);  // 2^52 - 0.5
+    Assert.assertEquals(StrictMath.round(4503599627370496.0d), 4503599627370496l);  // 2^52
+    Assert.assertEquals(StrictMath.round(-4503599627370495.0d), -4503599627370495l);  // -(2^52 - 1)
+    Assert.assertEquals(StrictMath.round(-4503599627370495.5d), -4503599627370495l);  // -(2^52 - 0.5)
+    Assert.assertEquals(StrictMath.round(-4503599627370496.0d), -4503599627370496l);  // -2^52
     Assert.assertEquals(StrictMath.round(9007199254740991.0d), 9007199254740991l);  // 2^53 - 1
     Assert.assertEquals(StrictMath.round(-9007199254740991.0d), -9007199254740991l);  // -(2^53 - 1)
     Assert.assertEquals(StrictMath.round(Double.NaN), (long)+0.0d);
     Assert.assertEquals(StrictMath.round(Long.MAX_VALUE + 1.0d), Long.MAX_VALUE);
     Assert.assertEquals(StrictMath.round(Long.MIN_VALUE - 1.0d), Long.MIN_VALUE);
+    Assert.assertEquals(StrictMath.round(Double.longBitsToDouble(0x43F0000000000000l)),
+                        Long.MAX_VALUE); // 2^64
+    Assert.assertEquals(StrictMath.round(Double.longBitsToDouble(0xC3F0000000000000l)),
+                        Long.MIN_VALUE); // -2^64
     Assert.assertEquals(StrictMath.round(Double.POSITIVE_INFINITY), Long.MAX_VALUE);
     Assert.assertEquals(StrictMath.round(Double.NEGATIVE_INFINITY), Long.MIN_VALUE);
   }
@@ -1191,6 +1215,10 @@
     Assert.assertEquals(StrictMath.round(Float.NaN), (int)+0.0f);
     Assert.assertEquals(StrictMath.round(Integer.MAX_VALUE + 1.0f), Integer.MAX_VALUE);
     Assert.assertEquals(StrictMath.round(Integer.MIN_VALUE - 1.0f), Integer.MIN_VALUE);
+    Assert.assertEquals(StrictMath.round(Float.intBitsToFloat(0x4F800000)),
+                        Integer.MAX_VALUE); // 2^32
+    Assert.assertEquals(StrictMath.round(Float.intBitsToFloat(0xCF800000)),
+                        Integer.MIN_VALUE); // -2^32
     Assert.assertEquals(StrictMath.round(Float.POSITIVE_INFINITY), Integer.MAX_VALUE);
     Assert.assertEquals(StrictMath.round(Float.NEGATIVE_INFINITY), Integer.MIN_VALUE);
   }
diff --git a/test/141-class-unload/expected.txt b/test/141-class-unload/expected.txt
index 11de660..2b77b29 100644
--- a/test/141-class-unload/expected.txt
+++ b/test/141-class-unload/expected.txt
@@ -12,7 +12,6 @@
 JNI_OnUnload called
 null
 loader null false
-loader null false
 JNI_OnLoad called
 JNI_OnUnload called
 null
diff --git a/test/141-class-unload/src/Main.java b/test/141-class-unload/src/Main.java
index 17a6049..9ed8d28 100644
--- a/test/141-class-unload/src/Main.java
+++ b/test/141-class-unload/src/Main.java
@@ -37,8 +37,6 @@
         try {
             testUnloadClass(constructor);
             testUnloadLoader(constructor);
-            // Test that we don't unload if we have a Method keeping the class live.
-            testNoUnloadInvoke(constructor);
             // Test that we don't unload if we have an instance.
             testNoUnloadInstance(constructor);
             // Test JNI_OnLoad and JNI_OnUnload.
@@ -79,10 +77,10 @@
     }
 
     private static void testUnloadClass(Constructor constructor) throws Exception {
-        WeakReference<Class> klass = setUpUnloadClass(constructor);
+        WeakReference<Class> klass = setUpUnloadClassWeak(constructor);
         // No strong references to class loader, should get unloaded.
         Runtime.getRuntime().gc();
-        WeakReference<Class> klass2 = setUpUnloadClass(constructor);
+        WeakReference<Class> klass2 = setUpUnloadClassWeak(constructor);
         Runtime.getRuntime().gc();
         // If the weak reference is cleared, then it was unloaded.
         System.out.println(klass.get());
@@ -99,12 +97,14 @@
     }
 
     private static void testStackTrace(Constructor constructor) throws Exception {
-        WeakReference<Class> klass = setUpUnloadClass(constructor);
-        Method stackTraceMethod = klass.get().getDeclaredMethod("generateStackTrace");
-        Throwable throwable = (Throwable) stackTraceMethod.invoke(klass.get());
+        Class klass = setUpUnloadClass(constructor);
+        WeakReference<Class> weak_klass = new WeakReference(klass);
+        Method stackTraceMethod = klass.getDeclaredMethod("generateStackTrace");
+        Throwable throwable = (Throwable) stackTraceMethod.invoke(klass);
         stackTraceMethod = null;
+        klass = null;
         Runtime.getRuntime().gc();
-        boolean isNull = klass.get() == null;
+        boolean isNull = weak_klass.get() == null;
         System.out.println("class null " + isNull + " " + throwable.getMessage());
     }
 
@@ -116,28 +116,37 @@
         System.out.println(loader.get());
     }
 
-    private static void testNoUnloadInvoke(Constructor constructor) throws Exception {
-        WeakReference<ClassLoader> loader =
-            new WeakReference((ClassLoader) constructor.newInstance(
-                DEX_FILE, LIBRARY_SEARCH_PATH, ClassLoader.getSystemClassLoader()));
-        WeakReference<Class> intHolder = new WeakReference(loader.get().loadClass("IntHolder"));
-        intHolder.get().getDeclaredMethod("runGC").invoke(intHolder.get());
-        boolean isNull = loader.get() == null;
-        System.out.println("loader null " + isNull);
+    private static Object testNoUnloadHelper(ClassLoader loader) throws Exception {
+        Class intHolder = loader.loadClass("IntHolder");
+        return intHolder.newInstance();
+    }
+
+    static class Pair {
+      public Pair(Object o, ClassLoader l) {
+        object = o;
+        classLoader = new WeakReference<ClassLoader>(l);
+      }
+
+      public Object object;
+      public WeakReference<ClassLoader> classLoader;
+    }
+
+    private static Pair testNoUnloadInstanceHelper(Constructor constructor) throws Exception {
+        ClassLoader loader = (ClassLoader) constructor.newInstance(
+            DEX_FILE, LIBRARY_SEARCH_PATH, ClassLoader.getSystemClassLoader());
+        Object o = testNoUnloadHelper(loader);
+        return new Pair(o, loader);
     }
 
     private static void testNoUnloadInstance(Constructor constructor) throws Exception {
-        WeakReference<ClassLoader> loader =
-            new WeakReference((ClassLoader) constructor.newInstance(
-                DEX_FILE, LIBRARY_SEARCH_PATH, ClassLoader.getSystemClassLoader()));
-        WeakReference<Class> intHolder = new WeakReference(loader.get().loadClass("IntHolder"));
-        Object o = intHolder.get().newInstance();
+        Pair p = testNoUnloadInstanceHelper(constructor);
         Runtime.getRuntime().gc();
-        boolean isNull = loader.get() == null;
+        // If the class loader was unloded too early due to races, just pass the test.
+        boolean isNull = p.classLoader.get() == null;
         System.out.println("loader null " + isNull);
     }
 
-    private static WeakReference<Class> setUpUnloadClass(Constructor constructor) throws Exception {
+    private static Class setUpUnloadClass(Constructor constructor) throws Exception {
         ClassLoader loader = (ClassLoader) constructor.newInstance(
             DEX_FILE, LIBRARY_SEARCH_PATH, ClassLoader.getSystemClassLoader());
         Class intHolder = loader.loadClass("IntHolder");
@@ -149,7 +158,12 @@
         setValue.invoke(intHolder, 2);
         System.out.println((int) getValue.invoke(intHolder));
         waitForCompilation(intHolder);
-        return new WeakReference(intHolder);
+        return intHolder;
+    }
+
+    private static WeakReference<Class> setUpUnloadClassWeak(Constructor constructor)
+            throws Exception {
+        return new WeakReference<Class>(setUpUnloadClass(constructor));
     }
 
     private static WeakReference<ClassLoader> setUpUnloadLoader(Constructor constructor,
diff --git a/test/800-smali/expected.txt b/test/800-smali/expected.txt
index 11150c2..8473e06 100644
--- a/test/800-smali/expected.txt
+++ b/test/800-smali/expected.txt
@@ -67,4 +67,6 @@
 b/27799205 (5)
 b/27799205 (6)
 b/28187158
+b/29778499 (1)
+b/29778499 (2)
 Done!
diff --git a/test/800-smali/smali/b_29778499_1.smali b/test/800-smali/smali/b_29778499_1.smali
new file mode 100644
index 0000000..6cc0731
--- /dev/null
+++ b/test/800-smali/smali/b_29778499_1.smali
@@ -0,0 +1,19 @@
+.class public LB29778499_1;
+.super Ljava/lang/Object;
+
+# Test returning an object that doesn't implement the declared output interface.
+
+.method public static run()V
+.registers 2
+       invoke-static {}, LB29778499_1;->test()Ljava/lang/Runnable;
+       move-result-object v0
+       invoke-interface {v0}, Ljava/lang/Runnable;->run()V
+       return-void
+.end method
+
+.method public static test()Ljava/lang/Runnable;
+.registers 1
+       new-instance v0, LB29778499_1;
+       invoke-direct {v0}, LB29778499_1;-><init>()V
+       return-object v0
+.end method
diff --git a/test/800-smali/smali/b_29778499_2.smali b/test/800-smali/smali/b_29778499_2.smali
new file mode 100644
index 0000000..ad24d2f
--- /dev/null
+++ b/test/800-smali/smali/b_29778499_2.smali
@@ -0,0 +1,13 @@
+.class public LB29778499_2;
+.super Ljava/lang/Object;
+
+# Test invoking an interface method on an object that doesn't implement any interface.
+# This is testing an edge case (not implementing any interface) for b/18116999.
+
+.method public static run()V
+.registers 1
+       new-instance v0, Ljava/lang/Object;
+       invoke-direct {v0}, Ljava/lang/Object;-><init>()V
+       invoke-interface {v0}, Ljava/lang/Runnable;->run()V
+       return-void
+.end method
diff --git a/test/800-smali/src/Main.java b/test/800-smali/src/Main.java
index b2fc005..bf50879 100644
--- a/test/800-smali/src/Main.java
+++ b/test/800-smali/src/Main.java
@@ -176,6 +176,10 @@
         testCases.add(new TestCase("b/27799205 (6)", "B27799205Helper", "run6", null, null, null));
         testCases.add(new TestCase("b/28187158", "B28187158", "run", new Object[] { null },
                 new VerifyError(), null));
+        testCases.add(new TestCase("b/29778499 (1)", "B29778499_1", "run", null,
+                new IncompatibleClassChangeError(), null));
+        testCases.add(new TestCase("b/29778499 (2)", "B29778499_2", "run", null,
+                new IncompatibleClassChangeError(), null));
     }
 
     public void runTests() {
diff --git a/test/Android.libarttest.mk b/test/Android.libarttest.mk
index 01790ae..75e74ec 100644
--- a/test/Android.libarttest.mk
+++ b/test/Android.libarttest.mk
@@ -102,12 +102,14 @@
   else # host
     LOCAL_CLANG := $(ART_HOST_CLANG)
     LOCAL_CFLAGS := $(ART_HOST_CFLAGS)
+    LOCAL_ASFLAGS := $(ART_HOST_ASFLAGS)
     ifeq ($$(suffix),d)
       LOCAL_CFLAGS += $(ART_HOST_DEBUG_CFLAGS)
+      LOCAL_ASFLAGS += $(ART_HOST_DEBUG_ASFLAGS)
     else
       LOCAL_CFLAGS += $(ART_HOST_NON_DEBUG_CFLAGS)
+      LOCAL_ASFLAGS += $(ART_HOST_NON_DEBUG_ASFLAGS)
     endif
-    LOCAL_ASFLAGS := $(ART_HOST_ASFLAGS)
     LOCAL_LDLIBS := $(ART_HOST_LDLIBS) -ldl -lpthread
     LOCAL_IS_HOST_MODULE := true
     LOCAL_MULTILIB := both
diff --git a/test/Android.libnativebridgetest.mk b/test/Android.libnativebridgetest.mk
index e8cc7e4..992332e 100644
--- a/test/Android.libnativebridgetest.mk
+++ b/test/Android.libnativebridgetest.mk
@@ -60,7 +60,7 @@
   else # host
     LOCAL_CLANG := $(ART_HOST_CLANG)
     LOCAL_CFLAGS := $(ART_HOST_CFLAGS) $(ART_HOST_DEBUG_CFLAGS)
-    LOCAL_ASFLAGS := $(ART_HOST_ASFLAGS)
+    LOCAL_ASFLAGS := $(ART_HOST_ASFLAGS) $(ART_HOST_DEBUG_ASFLAGS)
     LOCAL_SHARED_LIBRARIES := libcutils
     LOCAL_LDLIBS := $(ART_HOST_LDLIBS) -ldl -lpthread
     ifeq ($(HOST_OS),linux)