Merge "Update Jack options to enable invoke-polymorphic"
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index 9902628..1691dbb 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -84,6 +84,7 @@
 # Dex file dependencies for each gtest.
 ART_GTEST_dex2oat_environment_tests_DEX_DEPS := Main MainStripped MultiDex MultiDexModifiedSecondary Nested
 
+ART_GTEST_atomic_method_ref_map_test_DEX_DEPS := Interfaces
 ART_GTEST_class_linker_test_DEX_DEPS := Interfaces MethodTypes MultiDex MyClass Nested Statics StaticsFromCode
 ART_GTEST_compiler_driver_test_DEX_DEPS := AbstractMethod StaticLeafMethods ProfileTestMultiDex
 ART_GTEST_dex_cache_test_DEX_DEPS := Main Packages MethodTypes
diff --git a/build/Android.oat.mk b/build/Android.oat.mk
index 3b273a2..e297b4f 100644
--- a/build/Android.oat.mk
+++ b/build/Android.oat.mk
@@ -215,24 +215,9 @@
       $(4)TARGET_CORE_IMAGE_$(1)_$(2)_64 := $$(core_image_name)
     else
       $(4)TARGET_CORE_IMAGE_$(1)_$(2)_32 := $$(core_image_name)
-      ifdef ART_USE_VIXL_ARM_BACKEND
-        ifeq ($(1),optimizing)
-          # TODO(VIXL): The ARM VIXL backend is still work in progress. Therefore for now we do not
-          # compile the core image with the Optimizing backend when ART_USE_VIXL_ARM_BACKEND is
-          # defined.
-          core_compile_options += --compiler-filter=interpret-only
-        endif
-      endif
     endif
   else
     $(4)TARGET_CORE_IMAGE_$(1)_$(2)_32 := $$(core_image_name)
-    ifdef ART_USE_VIXL_ARM_BACKEND
-      ifeq ($(1),optimizing)
-      # TODO(VIXL): The ARM VIXL backend is still work in progress. Therefore for now we do not
-      # compile the core image with the Optimizing backend when ART_USE_VIXL_ARM_BACKEND is defined.
-      core_compile_options += --compiler-filter=interpret-only
-      endif
-    endif
   endif
   $(4)TARGET_CORE_IMG_OUTS += $$(core_image_name)
   $(4)TARGET_CORE_OAT_OUTS += $$(core_oat_name)
diff --git a/compiler/Android.bp b/compiler/Android.bp
index b883e08..db55ea0 100644
--- a/compiler/Android.bp
+++ b/compiler/Android.bp
@@ -349,6 +349,7 @@
         "optimizing/ssa_test.cc",
         "optimizing/stack_map_test.cc",
         "optimizing/suspend_check_test.cc",
+        "utils/atomic_method_ref_map_test.cc",
         "utils/dedupe_set_test.cc",
         "utils/intrusive_forward_list_test.cc",
         "utils/string_reference_test.cc",
diff --git a/compiler/compiled_method.h b/compiler/compiled_method.h
index 174e85e..bbf9eee 100644
--- a/compiler/compiled_method.h
+++ b/compiler/compiled_method.h
@@ -315,11 +315,11 @@
     return target_dex_file_;
   }
 
-  uint32_t TargetStringIndex() const {
+  dex::StringIndex TargetStringIndex() const {
     DCHECK(patch_type_ == Type::kString ||
            patch_type_ == Type::kStringRelative ||
            patch_type_ == Type::kStringBssEntry);
-    return string_idx_;
+    return dex::StringIndex(string_idx_);
   }
 
   const DexFile* TargetDexCacheDexFile() const {
diff --git a/compiler/debug/elf_debug_info_writer.h b/compiler/debug/elf_debug_info_writer.h
index 0a4f094..30d4b47 100644
--- a/compiler/debug/elf_debug_info_writer.h
+++ b/compiler/debug/elf_debug_info_writer.h
@@ -53,7 +53,7 @@
       uint32_t parameters_size = DecodeUnsignedLeb128(&stream);
       for (uint32_t i = 0; i < parameters_size; ++i) {
         uint32_t id = DecodeUnsignedLeb128P1(&stream);
-        names.push_back(mi->dex_file->StringDataByIdx(id));
+        names.push_back(mi->dex_file->StringDataByIdx(dex::StringIndex(id)));
       }
     }
   }
diff --git a/compiler/dex/verification_results.cc b/compiler/dex/verification_results.cc
index 3fb10d8..9d39bf2 100644
--- a/compiler/dex/verification_results.cc
+++ b/compiler/dex/verification_results.cc
@@ -23,6 +23,7 @@
 #include "driver/compiler_options.h"
 #include "thread.h"
 #include "thread-inl.h"
+#include "utils/atomic_method_ref_map-inl.h"
 #include "verified_method.h"
 #include "verifier/method_verifier-inl.h"
 
@@ -35,8 +36,11 @@
 
 VerificationResults::~VerificationResults() {
   WriterMutexLock mu(Thread::Current(), verified_methods_lock_);
-  DeleteResults(preregistered_dex_files_);
   STLDeleteValues(&verified_methods_);
+  atomic_verified_methods_.Visit([](const MethodReference& ref ATTRIBUTE_UNUSED,
+                                    const VerifiedMethod* method) {
+    delete method;
+  });
 }
 
 void VerificationResults::ProcessVerifiedMethod(verifier::MethodVerifier* method_verifier) {
@@ -49,16 +53,17 @@
     // We'll punt this later.
     return;
   }
-  bool inserted;
-  DexFileMethodArray* const array = GetMethodArray(ref.dex_file);
+  AtomicMap::InsertResult result = atomic_verified_methods_.Insert(ref,
+                                                                   /*expected*/ nullptr,
+                                                                   verified_method.get());
   const VerifiedMethod* existing = nullptr;
-  if (array != nullptr) {
-    DCHECK(array != nullptr);
-    Atomic<const VerifiedMethod*>* slot = &(*array)[ref.dex_method_index];
-    inserted = slot->CompareExchangeStrongSequentiallyConsistent(nullptr, verified_method.get());
+  bool inserted;
+  if (result != AtomicMap::kInsertResultInvalidDexFile) {
+    inserted = (result == AtomicMap::kInsertResultSuccess);
     if (!inserted) {
-      existing = slot->LoadSequentiallyConsistent();
-      DCHECK_NE(verified_method.get(), existing);
+      // Rare case.
+      CHECK(atomic_verified_methods_.Get(ref, &existing));
+      CHECK_NE(verified_method.get(), existing);
     }
   } else {
     WriterMutexLock mu(Thread::Current(), verified_methods_lock_);
@@ -89,15 +94,26 @@
 }
 
 const VerifiedMethod* VerificationResults::GetVerifiedMethod(MethodReference ref) {
-  DexFileMethodArray* array = GetMethodArray(ref.dex_file);
-  if (array != nullptr) {
-    return (*array)[ref.dex_method_index].LoadRelaxed();
+  const VerifiedMethod* ret = nullptr;
+  if (atomic_verified_methods_.Get(ref, &ret)) {
+    return ret;
   }
   ReaderMutexLock mu(Thread::Current(), verified_methods_lock_);
   auto it = verified_methods_.find(ref);
   return (it != verified_methods_.end()) ? it->second : nullptr;
 }
 
+void VerificationResults::CreateVerifiedMethodFor(MethodReference ref) {
+  // This method should only be called for classes verified at compile time,
+  // which have no verifier error, nor has methods that we know will throw
+  // at runtime.
+  AtomicMap::InsertResult result = atomic_verified_methods_.Insert(
+      ref,
+      /*expected*/ nullptr,
+      new VerifiedMethod(/* encountered_error_types */ 0, /* has_runtime_throw */ false));
+  DCHECK_EQ(result, AtomicMap::kInsertResultSuccess);
+}
+
 void VerificationResults::AddRejectedClass(ClassReference ref) {
   {
     WriterMutexLock mu(Thread::Current(), rejected_classes_lock_);
@@ -124,10 +140,8 @@
   return true;
 }
 
-void VerificationResults::PreRegisterDexFile(const DexFile* dex_file) {
-  CHECK(preregistered_dex_files_.find(dex_file) == preregistered_dex_files_.end())
-      << dex_file->GetLocation();
-  DexFileMethodArray array(dex_file->NumMethodIds());
+void VerificationResults::AddDexFile(const DexFile* dex_file) {
+  atomic_verified_methods_.AddDexFile(dex_file);
   WriterMutexLock mu(Thread::Current(), verified_methods_lock_);
   // There can be some verified methods that are already registered for the dex_file since we set
   // up well known classes earlier. Remove these and put them in the array so that we don't
@@ -135,31 +149,13 @@
   for (auto it = verified_methods_.begin(); it != verified_methods_.end(); ) {
     MethodReference ref = it->first;
     if (ref.dex_file == dex_file) {
-      array[ref.dex_method_index].StoreSequentiallyConsistent(it->second);
+      CHECK(atomic_verified_methods_.Insert(ref, nullptr, it->second) ==
+          AtomicMap::kInsertResultSuccess);
       it = verified_methods_.erase(it);
     } else {
       ++it;
     }
   }
-  preregistered_dex_files_.emplace(dex_file, std::move(array));
-}
-
-void VerificationResults::DeleteResults(DexFileResults& array) {
-  for (auto& pair : array) {
-    for (Atomic<const VerifiedMethod*>& method : pair.second) {
-      delete method.LoadSequentiallyConsistent();
-    }
-  }
-  array.clear();
-}
-
-VerificationResults::DexFileMethodArray* VerificationResults::GetMethodArray(
-    const DexFile* dex_file) {
-  auto it = preregistered_dex_files_.find(dex_file);
-  if (it != preregistered_dex_files_.end()) {
-    return &it->second;
-  }
-  return nullptr;
 }
 
 }  // namespace art
diff --git a/compiler/dex/verification_results.h b/compiler/dex/verification_results.h
index b3356e0..ab735c1 100644
--- a/compiler/dex/verification_results.h
+++ b/compiler/dex/verification_results.h
@@ -26,6 +26,7 @@
 #include "class_reference.h"
 #include "method_reference.h"
 #include "safe_map.h"
+#include "utils/atomic_method_ref_map.h"
 
 namespace art {
 
@@ -46,6 +47,9 @@
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!verified_methods_lock_);
 
+  void CreateVerifiedMethodFor(MethodReference ref)
+      REQUIRES(!verified_methods_lock_);
+
   const VerifiedMethod* GetVerifiedMethod(MethodReference ref)
       REQUIRES(!verified_methods_lock_);
 
@@ -54,26 +58,22 @@
 
   bool IsCandidateForCompilation(MethodReference& method_ref, const uint32_t access_flags);
 
-  // Add a dex file array to the preregistered_dex_files_ array. These dex files require no locks to
-  // access. It is not safe to call if other callers are calling GetVerifiedMethod concurrently.
-  void PreRegisterDexFile(const DexFile* dex_file) REQUIRES(!verified_methods_lock_);
+  // Add a dex file to enable using the atomic map.
+  void AddDexFile(const DexFile* dex_file) REQUIRES(!verified_methods_lock_);
 
  private:
   // Verified methods. The method array is fixed to avoid needing a lock to extend it.
-  using DexFileMethodArray = dchecked_vector<Atomic<const VerifiedMethod*>>;
-  using DexFileResults = std::map<const DexFile*, DexFileMethodArray>;
+  using AtomicMap = AtomicMethodRefMap<const VerifiedMethod*>;
   using VerifiedMethodMap = SafeMap<MethodReference,
                                     const VerifiedMethod*,
                                     MethodReferenceComparator>;
 
-  static void DeleteResults(DexFileResults& array);
-
-  DexFileMethodArray* GetMethodArray(const DexFile* dex_file) REQUIRES(!verified_methods_lock_);
   VerifiedMethodMap verified_methods_ GUARDED_BY(verified_methods_lock_);
   const CompilerOptions* const compiler_options_;
 
-  // Dex2oat can preregister dex files to avoid locking when calling GetVerifiedMethod.
-  DexFileResults preregistered_dex_files_;
+  // Dex2oat can add dex files to atomic_verified_methods_ to avoid locking when calling
+  // GetVerifiedMethod.
+  AtomicMap atomic_verified_methods_;
 
   ReaderWriterMutex verified_methods_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
 
diff --git a/compiler/dex/verified_method.h b/compiler/dex/verified_method.h
index 04331e5..ce53417 100644
--- a/compiler/dex/verified_method.h
+++ b/compiler/dex/verified_method.h
@@ -32,6 +32,8 @@
 
 class VerifiedMethod {
  public:
+  VerifiedMethod(uint32_t encountered_error_types, bool has_runtime_throw);
+
   // Cast elision set type.
   // Since we're adding the dex PCs to the set in increasing order, a sorted vector
   // is better for performance (not just memory usage), especially for large sets.
@@ -80,8 +82,6 @@
   }
 
  private:
-  VerifiedMethod(uint32_t encountered_error_types, bool has_runtime_throw);
-
   /*
    * Generate the GC map for a method that has just been verified (i.e. we're doing this as part of
    * verification). For type-precise determination we have all the data we need, so we just need to
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index ad75ec4..a2bab80 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -71,6 +71,7 @@
 #include "thread_pool.h"
 #include "trampolines/trampoline_compiler.h"
 #include "transaction.h"
+#include "utils/atomic_method_ref_map-inl.h"
 #include "utils/dex_cache_arrays_layout-inl.h"
 #include "utils/swap_space.h"
 #include "vdex_file.h"
@@ -287,8 +288,6 @@
       instruction_set_features_(instruction_set_features),
       requires_constructor_barrier_lock_("constructor barrier lock"),
       compiled_classes_lock_("compiled classes lock"),
-      compiled_methods_lock_("compiled method lock"),
-      compiled_methods_(MethodTable::key_compare()),
       non_relative_linker_patch_count_(0u),
       image_classes_(image_classes),
       classes_to_compile_(compiled_classes),
@@ -326,12 +325,12 @@
     MutexLock mu(self, compiled_classes_lock_);
     STLDeleteValues(&compiled_classes_);
   }
-  {
-    MutexLock mu(self, compiled_methods_lock_);
-    for (auto& pair : compiled_methods_) {
-      CompiledMethod::ReleaseSwapAllocatedCompiledMethod(this, pair.second);
+  compiled_methods_.Visit([this](const MethodReference& ref ATTRIBUTE_UNUSED,
+                                 CompiledMethod* method) {
+    if (method != nullptr) {
+      CompiledMethod::ReleaseSwapAllocatedCompiledMethod(this, method);
     }
-  }
+  });
   compiler_->UnInit();
 }
 
@@ -575,8 +574,7 @@
                           const DexFile& dex_file,
                           optimizer::DexToDexCompilationLevel dex_to_dex_compilation_level,
                           bool compilation_enabled,
-                          Handle<mirror::DexCache> dex_cache)
-    REQUIRES(!driver->compiled_methods_lock_) {
+                          Handle<mirror::DexCache> dex_cache) {
   DCHECK(driver != nullptr);
   CompiledMethod* compiled_method = nullptr;
   uint64_t start_ns = kTimeCompileMethod ? NanoTime() : 0;
@@ -842,9 +840,9 @@
     switch (inst->Opcode()) {
       case Instruction::CONST_STRING:
       case Instruction::CONST_STRING_JUMBO: {
-        uint32_t string_index = (inst->Opcode() == Instruction::CONST_STRING)
+        dex::StringIndex string_index((inst->Opcode() == Instruction::CONST_STRING)
             ? inst->VRegB_21c()
-            : inst->VRegB_31c();
+            : inst->VRegB_31c());
         mirror::String* string = class_linker->ResolveString(dex_file, string_index, dex_cache);
         CHECK(string != nullptr) << "Could not allocate a string when forcing determinism";
         break;
@@ -940,6 +938,13 @@
                                 TimingLogger* timings) {
   CheckThreadPools();
 
+  for (const DexFile* dex_file : dex_files) {
+    // Can be already inserted if the caller is CompileOne. This happens for gtests.
+    if (!compiled_methods_.HaveDexFile(dex_file)) {
+      compiled_methods_.AddDexFile(dex_file);
+    }
+  }
+
   LoadImageClasses(timings);
   VLOG(compiler) << "LoadImageClasses: " << GetMemoryUsageString(false);
 
@@ -2000,6 +2005,35 @@
   }
 }
 
+static void PopulateVerifiedMethods(const DexFile& dex_file,
+                                    uint32_t class_def_index,
+                                    VerificationResults* verification_results) {
+  const DexFile::ClassDef& class_def = dex_file.GetClassDef(class_def_index);
+  const uint8_t* class_data = dex_file.GetClassData(class_def);
+  if (class_data == nullptr) {
+    return;
+  }
+  ClassDataItemIterator it(dex_file, class_data);
+  // Skip fields
+  while (it.HasNextStaticField()) {
+    it.Next();
+  }
+  while (it.HasNextInstanceField()) {
+    it.Next();
+  }
+
+  while (it.HasNextDirectMethod()) {
+    verification_results->CreateVerifiedMethodFor(MethodReference(&dex_file, it.GetMemberIndex()));
+    it.Next();
+  }
+
+  while (it.HasNextVirtualMethod()) {
+    verification_results->CreateVerifiedMethodFor(MethodReference(&dex_file, it.GetMemberIndex()));
+    it.Next();
+  }
+  DCHECK(!it.HasNext());
+}
+
 void CompilerDriver::Verify(jobject jclass_loader,
                             const std::vector<const DexFile*>& dex_files,
                             TimingLogger* timings) {
@@ -2036,6 +2070,13 @@
           } else if (set.find(class_def.class_idx_) == set.end()) {
             ObjectLock<mirror::Class> lock(soa.Self(), cls);
             mirror::Class::SetStatus(cls, mirror::Class::kStatusVerified, soa.Self());
+            // Create `VerifiedMethod`s for each methods, the compiler expects one for
+            // quickening or compiling.
+            // Note that this means:
+            // - We're only going to compile methods that did verify.
+            // - Quickening will not do checkcast ellision.
+            // TODO(ngeoffray): Reconsider this once we refactor compiler filters.
+            PopulateVerifiedMethods(*dex_file, i, verification_results_);
           }
         }
       }
@@ -2616,30 +2657,15 @@
                                        size_t non_relative_linker_patch_count) {
   DCHECK(GetCompiledMethod(method_ref) == nullptr)
       << method_ref.dex_file->PrettyMethod(method_ref.dex_method_index);
-  {
-    MutexLock mu(Thread::Current(), compiled_methods_lock_);
-    compiled_methods_.Put(method_ref, compiled_method);
-    non_relative_linker_patch_count_ += non_relative_linker_patch_count;
-  }
+  MethodTable::InsertResult result = compiled_methods_.Insert(method_ref,
+                                                              /*expected*/ nullptr,
+                                                              compiled_method);
+  CHECK(result == MethodTable::kInsertResultSuccess);
+  non_relative_linker_patch_count_.FetchAndAddRelaxed(non_relative_linker_patch_count);
   DCHECK(GetCompiledMethod(method_ref) != nullptr)
       << method_ref.dex_file->PrettyMethod(method_ref.dex_method_index);
 }
 
-void CompilerDriver::RemoveCompiledMethod(const MethodReference& method_ref) {
-  CompiledMethod* compiled_method = nullptr;
-  {
-    MutexLock mu(Thread::Current(), compiled_methods_lock_);
-    auto it = compiled_methods_.find(method_ref);
-    if (it != compiled_methods_.end()) {
-      compiled_method = it->second;
-      compiled_methods_.erase(it);
-    }
-  }
-  if (compiled_method != nullptr) {
-    CompiledMethod::ReleaseSwapAllocatedCompiledMethod(this, compiled_method);
-  }
-}
-
 CompiledClass* CompilerDriver::GetCompiledClass(ClassReference ref) const {
   MutexLock mu(Thread::Current(), compiled_classes_lock_);
   ClassTable::const_iterator it = compiled_classes_.find(ref);
@@ -2678,13 +2704,9 @@
 }
 
 CompiledMethod* CompilerDriver::GetCompiledMethod(MethodReference ref) const {
-  MutexLock mu(Thread::Current(), compiled_methods_lock_);
-  MethodTable::const_iterator it = compiled_methods_.find(ref);
-  if (it == compiled_methods_.end()) {
-    return nullptr;
-  }
-  CHECK(it->second != nullptr);
-  return it->second;
+  CompiledMethod* compiled_method = nullptr;
+  compiled_methods_.Get(ref, &compiled_method);
+  return compiled_method;
 }
 
 bool CompilerDriver::IsMethodVerifiedWithoutFailures(uint32_t method_idx,
@@ -2713,8 +2735,7 @@
 }
 
 size_t CompilerDriver::GetNonRelativeLinkerPatchCount() const {
-  MutexLock mu(Thread::Current(), compiled_methods_lock_);
-  return non_relative_linker_patch_count_;
+  return non_relative_linker_patch_count_.LoadRelaxed();
 }
 
 void CompilerDriver::SetRequiresConstructorBarrier(Thread* self,
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index 7418b00..cc50197 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -41,6 +41,7 @@
 #include "runtime.h"
 #include "safe_map.h"
 #include "thread_pool.h"
+#include "utils/atomic_method_ref_map.h"
 #include "utils/dex_cache_arrays_layout.h"
 
 namespace art {
@@ -131,7 +132,7 @@
   // Compile a single Method.
   void CompileOne(Thread* self, ArtMethod* method, TimingLogger* timings)
       REQUIRES_SHARED(Locks::mutator_lock_)
-      REQUIRES(!compiled_methods_lock_, !compiled_classes_lock_, !dex_to_dex_references_lock_);
+      REQUIRES(!compiled_classes_lock_, !dex_to_dex_references_lock_);
 
   VerificationResults* GetVerificationResults() const {
     DCHECK(Runtime::Current()->IsAotCompiler());
@@ -168,18 +169,12 @@
   CompiledClass* GetCompiledClass(ClassReference ref) const
       REQUIRES(!compiled_classes_lock_);
 
-  CompiledMethod* GetCompiledMethod(MethodReference ref) const
-      REQUIRES(!compiled_methods_lock_);
-  size_t GetNonRelativeLinkerPatchCount() const
-      REQUIRES(!compiled_methods_lock_);
-
+  CompiledMethod* GetCompiledMethod(MethodReference ref) const;
+  size_t GetNonRelativeLinkerPatchCount() const;
   // Add a compiled method.
   void AddCompiledMethod(const MethodReference& method_ref,
                          CompiledMethod* const compiled_method,
-                         size_t non_relative_linker_patch_count)
-      REQUIRES(!compiled_methods_lock_);
-  // Remove and delete a compiled method.
-  void RemoveCompiledMethod(const MethodReference& method_ref) REQUIRES(!compiled_methods_lock_);
+                         size_t non_relative_linker_patch_count);
 
   void SetRequiresConstructorBarrier(Thread* self,
                                      const DexFile* dex_file,
@@ -519,18 +514,15 @@
   mutable Mutex compiled_classes_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
   ClassTable compiled_classes_ GUARDED_BY(compiled_classes_lock_);
 
-  typedef SafeMap<const MethodReference, CompiledMethod*, MethodReferenceComparator> MethodTable;
-
- public:
-  // Lock is public so that non-members can have lock annotations.
-  mutable Mutex compiled_methods_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+  typedef AtomicMethodRefMap<CompiledMethod*> MethodTable;
 
  private:
   // All method references that this compiler has compiled.
-  MethodTable compiled_methods_ GUARDED_BY(compiled_methods_lock_);
+  MethodTable compiled_methods_;
+
   // Number of non-relative patches in all compiled methods. These patches need space
   // in the .oat_patches ELF section if requested in the compiler options.
-  size_t non_relative_linker_patch_count_ GUARDED_BY(compiled_methods_lock_);
+  Atomic<size_t> non_relative_linker_patch_count_;
 
   // If image_ is true, specifies the classes that will be included in the image.
   // Note if image_classes_ is null, all classes are included in the image.
diff --git a/compiler/driver/compiler_driver_test.cc b/compiler/driver/compiler_driver_test.cc
index f40c712..12684c0 100644
--- a/compiler/driver/compiler_driver_test.cc
+++ b/compiler/driver/compiler_driver_test.cc
@@ -111,7 +111,7 @@
   ObjPtr<mirror::DexCache> dex_cache = class_linker_->FindDexCache(soa.Self(), dex);
   EXPECT_EQ(dex.NumStringIds(), dex_cache->NumStrings());
   for (size_t i = 0; i < dex_cache->NumStrings(); i++) {
-    const mirror::String* string = dex_cache->GetResolvedString(i);
+    const mirror::String* string = dex_cache->GetResolvedString(dex::StringIndex(i));
     EXPECT_TRUE(string != nullptr) << "string_idx=" << i;
   }
   EXPECT_EQ(dex.NumTypeIds(), dex_cache->NumResolvedTypes());
diff --git a/compiler/image_test.cc b/compiler/image_test.cc
index 5629dff..9bbe595 100644
--- a/compiler/image_test.cc
+++ b/compiler/image_test.cc
@@ -29,6 +29,8 @@
 #include "elf_writer_quick.h"
 #include "gc/space/image_space.h"
 #include "image_writer.h"
+#include "linker/buffered_output_stream.h"
+#include "linker/file_output_stream.h"
 #include "linker/multi_oat_relative_patcher.h"
 #include "lock_word.h"
 #include "mirror/object-inl.h"
@@ -256,6 +258,16 @@
       bool image_space_ok = writer->PrepareImageAddressSpace();
       ASSERT_TRUE(image_space_ok);
 
+      if (kIsVdexEnabled) {
+        for (size_t i = 0, size = vdex_files.size(); i != size; ++i) {
+          std::unique_ptr<BufferedOutputStream> vdex_out(
+              MakeUnique<BufferedOutputStream>(
+                  MakeUnique<FileOutputStream>(vdex_files[i].GetFile())));
+          oat_writers[i]->WriteVerifierDeps(vdex_out.get(), nullptr);
+          oat_writers[i]->WriteChecksumsAndVdexHeader(vdex_out.get());
+        }
+      }
+
       for (size_t i = 0, size = oat_files.size(); i != size; ++i) {
         linker::MultiOatRelativePatcher patcher(driver->GetInstructionSet(),
                                                 driver->GetInstructionSetFeatures());
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index a706697..7bb2bb7 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -433,7 +433,7 @@
 
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   Thread* const self = Thread::Current();
-  ReaderMutexLock mu(self, *class_linker->DexLock());
+  ReaderMutexLock mu(self, *Locks::dex_lock_);
   for (const ClassLinker::DexCacheData& data : class_linker->GetDexCachesData()) {
     ObjPtr<mirror::DexCache> dex_cache =
         ObjPtr<mirror::DexCache>::DownCast(self->DecodeJObject(data.weak_root));
@@ -884,7 +884,7 @@
 
   ScopedAssertNoThreadSuspension sa(__FUNCTION__);
   ReaderMutexLock mu(self, *Locks::classlinker_classes_lock_);  // For ClassInClassTable
-  ReaderMutexLock mu2(self, *class_linker->DexLock());
+  ReaderMutexLock mu2(self, *Locks::dex_lock_);
   for (const ClassLinker::DexCacheData& data : class_linker->GetDexCachesData()) {
     if (self->IsJWeakCleared(data.weak_root)) {
       continue;
@@ -1013,7 +1013,7 @@
   // caches. We check that the number of dex caches does not change.
   size_t dex_cache_count = 0;
   {
-    ReaderMutexLock mu(self, *class_linker->DexLock());
+    ReaderMutexLock mu(self, *Locks::dex_lock_);
     // Count number of dex caches not in the boot image.
     for (const ClassLinker::DexCacheData& data : class_linker->GetDexCachesData()) {
       ObjPtr<mirror::DexCache> dex_cache =
@@ -1031,7 +1031,7 @@
       hs.NewHandle(ObjectArray<Object>::Alloc(self, object_array_class.Get(), dex_cache_count)));
   CHECK(dex_caches.Get() != nullptr) << "Failed to allocate a dex cache array.";
   {
-    ReaderMutexLock mu(self, *class_linker->DexLock());
+    ReaderMutexLock mu(self, *Locks::dex_lock_);
     size_t non_image_dex_caches = 0;
     // Re-count number of non image dex caches.
     for (const ClassLinker::DexCacheData& data : class_linker->GetDexCachesData()) {
@@ -1451,7 +1451,8 @@
     InternTable* const intern_table = runtime->GetInternTable();
     for (size_t i = 0, count = dex_file->NumStringIds(); i < count; ++i) {
       uint32_t utf16_length;
-      const char* utf8_data = dex_file->StringDataAndUtf16LengthByIdx(i, &utf16_length);
+      const char* utf8_data = dex_file->StringDataAndUtf16LengthByIdx(dex::StringIndex(i),
+                                                                      &utf16_length);
       mirror::String* string = intern_table->LookupStrong(self, utf16_length, utf8_data).Ptr();
       TryAssignBinSlot(work_stack, string, oat_index);
     }
diff --git a/compiler/intrinsics_list.h b/compiler/intrinsics_list.h
index 555baf6..9bd25d8 100644
--- a/compiler/intrinsics_list.h
+++ b/compiler/intrinsics_list.h
@@ -117,6 +117,12 @@
   V(StringNewStringFromBytes, kStatic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Ljava/lang/StringFactory;", "newStringFromBytes", "([BIII)Ljava/lang/String;") \
   V(StringNewStringFromChars, kStatic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Ljava/lang/StringFactory;", "newStringFromChars", "(II[C)Ljava/lang/String;") \
   V(StringNewStringFromString, kStatic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Ljava/lang/StringFactory;", "newStringFromString", "(Ljava/lang/String;)Ljava/lang/String;") \
+  V(StringBufferAppend, kVirtual, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Ljava/lang/StringBuffer;", "append", "(Ljava/lang/String;)Ljava/lang/StringBuffer;") \
+  V(StringBufferLength, kVirtual, kNeedsEnvironmentOrCache, kAllSideEffects, kNoThrow, "Ljava/lang/StringBuffer;", "length", "()I") \
+  V(StringBufferToString, kVirtual, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Ljava/lang/StringBuffer;", "toString", "()Ljava/lang/String;") \
+  V(StringBuilderAppend, kVirtual, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Ljava/lang/StringBuilder;", "append", "(Ljava/lang/String;)Ljava/lang/StringBuilder;") \
+  V(StringBuilderLength, kVirtual, kNeedsEnvironmentOrCache, kReadSideEffects, kNoThrow, "Ljava/lang/StringBuilder;", "length", "()I") \
+  V(StringBuilderToString, kVirtual, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Ljava/lang/StringBuilder;", "toString", "()Ljava/lang/String;") \
   V(UnsafeCASInt, kVirtual, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Lsun/misc/Unsafe;", "compareAndSwapInt", "(Ljava/lang/Object;JII)Z") \
   V(UnsafeCASLong, kVirtual, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Lsun/misc/Unsafe;", "compareAndSwapLong", "(Ljava/lang/Object;JJJ)Z") \
   V(UnsafeCASObject, kVirtual, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Lsun/misc/Unsafe;", "compareAndSwapObject", "(Ljava/lang/Object;JLjava/lang/Object;Ljava/lang/Object;)Z") \
diff --git a/compiler/linker/relative_patcher_test.h b/compiler/linker/relative_patcher_test.h
index 0151789..233daf4 100644
--- a/compiler/linker/relative_patcher_test.h
+++ b/compiler/linker/relative_patcher_test.h
@@ -163,7 +163,8 @@
                                                offset + patch.LiteralOffset(),
                                                target_offset);
           } else if (patch.GetType() == LinkerPatch::Type::kStringRelative) {
-            uint32_t target_offset = string_index_to_offset_map_.Get(patch.TargetStringIndex());
+            uint32_t target_offset =
+                string_index_to_offset_map_.Get(patch.TargetStringIndex().index_);
             patcher_->PatchPcRelativeReference(&patched_code_,
                                                patch,
                                                offset + patch.LiteralOffset(),
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index 9458576..0a778b0 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -30,6 +30,8 @@
 #include "elf_writer.h"
 #include "elf_writer_quick.h"
 #include "entrypoints/quick/quick_entrypoints.h"
+#include "linker/buffered_output_stream.h"
+#include "linker/file_output_stream.h"
 #include "linker/multi_oat_relative_patcher.h"
 #include "linker/vector_output_stream.h"
 #include "mirror/class-inl.h"
@@ -218,6 +220,17 @@
                                       oat_writer.GetBssSize(),
                                       oat_writer.GetBssRootsOffset());
 
+    if (kIsVdexEnabled) {
+      std::unique_ptr<BufferedOutputStream> vdex_out(
+            MakeUnique<BufferedOutputStream>(MakeUnique<FileOutputStream>(vdex_file)));
+      if (!oat_writer.WriteVerifierDeps(vdex_out.get(), nullptr)) {
+        return false;
+      }
+      if (!oat_writer.WriteChecksumsAndVdexHeader(vdex_out.get())) {
+        return false;
+      }
+    }
+
     if (!oat_writer.WriteRodata(oat_rodata)) {
       return false;
     }
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index 153aff4..bebd5f5 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -300,6 +300,7 @@
     oat_data_offset_(0u),
     oat_header_(nullptr),
     size_vdex_header_(0),
+    size_vdex_checksums_(0),
     size_dex_file_alignment_(0),
     size_executable_offset_alignment_(0),
     size_oat_header_(0),
@@ -409,10 +410,11 @@
                                       CreateTypeLookupTable create_type_lookup_table) {
   DCHECK(write_state_ == WriteState::kAddingDexFileSources);
   const uint8_t* current_dex_data = nullptr;
-  for (size_t i = 0; ; ++i) {
+  for (size_t i = 0; i < vdex_file.GetHeader().GetNumberOfDexFiles(); ++i) {
     current_dex_data = vdex_file.GetNextDexFileData(current_dex_data);
     if (current_dex_data == nullptr) {
-      break;
+      LOG(ERROR) << "Unexpected number of dex files in vdex " << location;
+      return false;
     }
     if (!DexFile::IsMagicValid(current_dex_data)) {
       LOG(ERROR) << "Invalid magic in vdex file created from " << location;
@@ -424,7 +426,14 @@
     oat_dex_files_.emplace_back(full_location,
                                 DexFileSource(current_dex_data),
                                 create_type_lookup_table);
+    oat_dex_files_.back().dex_file_location_checksum_ = vdex_file.GetLocationChecksum(i);
   }
+
+  if (vdex_file.GetNextDexFileData(current_dex_data) != nullptr) {
+    LOG(ERROR) << "Unexpected number of dex files in vdex " << location;
+    return false;
+  }
+
   if (oat_dex_files_.empty()) {
     LOG(ERROR) << "No dex files in vdex file created from " << location;
     return false;
@@ -488,8 +497,8 @@
 
   // Initialize VDEX and OAT headers.
   if (kIsVdexEnabled) {
-    size_vdex_header_ = sizeof(VdexFile::Header);
-    vdex_size_ = size_vdex_header_;
+    // Reserve space for Vdex header and checksums.
+    vdex_size_ = sizeof(VdexFile::Header) + oat_dex_files_.size() * sizeof(VdexFile::VdexChecksum);
   }
   size_t oat_data_offset = InitOatHeader(instruction_set,
                                         instruction_set_features,
@@ -793,7 +802,7 @@
       // Update quick method header.
       DCHECK_LT(method_offsets_index_, oat_class->method_headers_.size());
       OatQuickMethodHeader* method_header = &oat_class->method_headers_[method_offsets_index_];
-      uint32_t vmap_table_offset = method_header->vmap_table_offset_;
+      uint32_t vmap_table_offset = method_header->GetVmapTableOffset();
       // The code offset was 0 when the mapping/vmap table offset was set, so it's set
       // to 0-offset and we need to adjust it by code_offset.
       uint32_t code_offset = quick_code_offset - thumb_offset;
@@ -935,7 +944,7 @@
       // If vdex is enabled, we only emit the stack map of compiled code. The quickening info will
       // be in the vdex file.
       if (!compiled_method->GetQuickCode().empty() || !kIsVdexEnabled) {
-        DCHECK_EQ(oat_class->method_headers_[method_offsets_index_].vmap_table_offset_, 0u);
+        DCHECK_EQ(oat_class->method_headers_[method_offsets_index_].GetVmapTableOffset(), 0u);
 
         ArrayRef<const uint8_t> map = compiled_method->GetVmapTable();
         uint32_t map_size = map.size() * sizeof(map[0]);
@@ -949,7 +958,7 @@
               });
           // Code offset is not initialized yet, so set the map offset to 0u-offset.
           DCHECK_EQ(oat_class->method_offsets_[method_offsets_index_].code_offset_, 0u);
-          oat_class->method_headers_[method_offsets_index_].vmap_table_offset_ = 0u - offset;
+          oat_class->method_headers_[method_offsets_index_].SetVmapTableOffset(0u - offset);
         }
       }
       ++method_offsets_index_;
@@ -1406,7 +1415,7 @@
       size_t file_offset = file_offset_;
       OutputStream* out = out_;
 
-      uint32_t map_offset = oat_class->method_headers_[method_offsets_index_].vmap_table_offset_;
+      uint32_t map_offset = oat_class->method_headers_[method_offsets_index_].GetVmapTableOffset();
       uint32_t code_offset = oat_class->method_offsets_[method_offsets_index_].code_offset_;
       ++method_offsets_index_;
 
@@ -1837,6 +1846,7 @@
       size_total += (x);
 
     DO_STAT(size_vdex_header_);
+    DO_STAT(size_vdex_checksums_);
     DO_STAT(size_dex_file_alignment_);
     DO_STAT(size_executable_offset_alignment_);
     DO_STAT(size_oat_header_);
@@ -2383,6 +2393,7 @@
 
   // Update dex file size and resize class offsets in the OatDexFile.
   // Note: For raw data, the checksum is passed directly to AddRawDexFileSource().
+  // Note: For vdex, the checksum is copied from the existing vdex file.
   oat_dex_file->dex_file_size_ = header->file_size_;
   oat_dex_file->class_offsets_.resize(header->class_defs_size_);
   return true;
@@ -2592,11 +2603,31 @@
   return true;
 }
 
-bool OatWriter::WriteVdexHeader(OutputStream* vdex_out) {
+bool OatWriter::WriteChecksumsAndVdexHeader(OutputStream* vdex_out) {
   if (!kIsVdexEnabled) {
     return true;
   }
-  off_t actual_offset = vdex_out->Seek(0, kSeekSet);
+  // Write checksums
+  off_t actual_offset = vdex_out->Seek(sizeof(VdexFile::Header), kSeekSet);
+  if (actual_offset != sizeof(VdexFile::Header)) {
+    PLOG(ERROR) << "Failed to seek to the checksum location of vdex file. Actual: " << actual_offset
+                << " File: " << vdex_out->GetLocation();
+    return false;
+  }
+
+  for (size_t i = 0, size = oat_dex_files_.size(); i != size; ++i) {
+    OatDexFile* oat_dex_file = &oat_dex_files_[i];
+    if (!vdex_out->WriteFully(
+            &oat_dex_file->dex_file_location_checksum_, sizeof(VdexFile::VdexChecksum))) {
+      PLOG(ERROR) << "Failed to write dex file location checksum. File: "
+                  << vdex_out->GetLocation();
+      return false;
+    }
+    size_vdex_checksums_ += sizeof(VdexFile::VdexChecksum);
+  }
+
+  // Write header.
+  actual_offset = vdex_out->Seek(0, kSeekSet);
   if (actual_offset != 0) {
     PLOG(ERROR) << "Failed to seek to the beginning of vdex file. Actual: " << actual_offset
                 << " File: " << vdex_out->GetLocation();
@@ -2610,12 +2641,15 @@
   size_t verifier_deps_section_size = vdex_quickening_info_offset_ - vdex_verifier_deps_offset_;
   size_t quickening_info_section_size = vdex_size_ - vdex_quickening_info_offset_;
 
-  VdexFile::Header vdex_header(
-      dex_section_size, verifier_deps_section_size, quickening_info_section_size);
+  VdexFile::Header vdex_header(oat_dex_files_.size(),
+                               dex_section_size,
+                               verifier_deps_section_size,
+                               quickening_info_section_size);
   if (!vdex_out->WriteFully(&vdex_header, sizeof(VdexFile::Header))) {
     PLOG(ERROR) << "Failed to write vdex header. File: " << vdex_out->GetLocation();
     return false;
   }
+  size_vdex_header_ = sizeof(VdexFile::Header);
 
   if (!vdex_out->Flush()) {
     PLOG(ERROR) << "Failed to flush stream after writing to vdex file."
diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h
index 0dcf79e..da221d6 100644
--- a/compiler/oat_writer.h
+++ b/compiler/oat_writer.h
@@ -124,7 +124,7 @@
   //   - Initialize()
   //   - WriteVerifierDeps()
   //   - WriteQuickeningInfo()
-  //   - WriteVdexHeader()
+  //   - WriteChecksumsAndVdexHeader()
   //   - PrepareLayout(),
   //   - WriteRodata(),
   //   - WriteCode(),
@@ -168,7 +168,7 @@
                             /*out*/ std::vector<std::unique_ptr<const DexFile>>* opened_dex_files);
   bool WriteQuickeningInfo(OutputStream* vdex_out);
   bool WriteVerifierDeps(OutputStream* vdex_out, verifier::VerifierDeps* verifier_deps);
-  bool WriteVdexHeader(OutputStream* vdex_out);
+  bool WriteChecksumsAndVdexHeader(OutputStream* vdex_out);
   // Initialize the writer with the given parameters.
   void Initialize(const CompilerDriver* compiler,
                   ImageWriter* image_writer,
@@ -387,6 +387,7 @@
 
   // output stats
   uint32_t size_vdex_header_;
+  uint32_t size_vdex_checksums_;
   uint32_t size_dex_file_alignment_;
   uint32_t size_executable_offset_alignment_;
   uint32_t size_oat_header_;
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 9f6b78a..fa6a522 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -304,6 +304,7 @@
     SetFrameSize(RoundUp(
         first_register_slot_in_slow_path_
         + maximum_safepoint_spill_size
+        + (GetGraph()->HasShouldDeoptimizeFlag() ? kShouldDeoptimizeFlagSize : 0)
         + FrameEntrySpillSize(),
         kStackAlignment));
   }
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index a5d19ab..4b11e7c 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -307,6 +307,12 @@
     return POPCOUNT(GetSlowPathSpills(locations, core_registers));
   }
 
+  size_t GetStackOffsetOfShouldDeoptimizeFlag() const {
+    DCHECK(GetGraph()->HasShouldDeoptimizeFlag());
+    DCHECK_GE(GetFrameSize(), FrameEntrySpillSize() + kShouldDeoptimizeFlagSize);
+    return GetFrameSize() - FrameEntrySpillSize() - kShouldDeoptimizeFlagSize;
+  }
+
   // Record native to dex mapping for a suspend point.  Required by runtime.
   void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc, SlowPathCode* slow_path = nullptr);
   // Check whether we have already recorded mapping at this PC.
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 8a6b94e..ed6eef1 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -430,7 +430,7 @@
     LocationSummary* locations = instruction_->GetLocations();
     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
     HLoadString* load = instruction_->AsLoadString();
-    const uint32_t string_index = load->GetStringIndex();
+    const uint32_t string_index = load->GetStringIndex().index_;
     Register out = locations->Out().AsRegister<Register>();
     Register temp = locations->GetTemp(0).AsRegister<Register>();
     constexpr bool call_saves_everything_except_r0 = (!kUseReadBarrier || kUseBakerReadBarrier);
@@ -1329,6 +1329,13 @@
     __ cfi().AdjustCFAOffset(kArmWordSize * POPCOUNT(fpu_spill_mask_));
     __ cfi().RelOffsetForMany(DWARFReg(S0), 0, fpu_spill_mask_, kArmWordSize);
   }
+
+  if (GetGraph()->HasShouldDeoptimizeFlag()) {
+    // Initialize should_deoptimize flag to 0.
+    __ mov(IP, ShifterOperand(0));
+    __ StoreToOffset(kStoreWord, IP, SP, -kShouldDeoptimizeFlagSize);
+  }
+
   int adjust = GetFrameSize() - FrameEntrySpillSize();
   __ AddConstant(SP, -adjust);
   __ cfi().AdjustCFAOffset(adjust);
@@ -1944,6 +1951,19 @@
                         /* false_target */ nullptr);
 }
 
+void LocationsBuilderARM::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
+  LocationSummary* locations = new (GetGraph()->GetArena())
+      LocationSummary(flag, LocationSummary::kNoCall);
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorARM::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
+  __ LoadFromOffset(kLoadWord,
+                    flag->GetLocations()->Out().AsRegister<Register>(),
+                    SP,
+                    codegen_->GetStackOffsetOfShouldDeoptimizeFlag());
+}
+
 void LocationsBuilderARM::VisitSelect(HSelect* select) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select);
   if (Primitive::IsFloatingPointType(select->GetType())) {
@@ -5946,7 +5966,7 @@
     case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
       DCHECK(codegen_->GetCompilerOptions().IsBootImage());
       CodeGeneratorARM::PcRelativePatchInfo* labels =
-          codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex());
+          codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex().index_);
       __ BindTrackedLabel(&labels->movw_label);
       __ movw(out, /* placeholder */ 0u);
       __ BindTrackedLabel(&labels->movt_label);
@@ -5965,7 +5985,7 @@
       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
       Register temp = locations->GetTemp(0).AsRegister<Register>();
       CodeGeneratorARM::PcRelativePatchInfo* labels =
-          codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex());
+          codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex().index_);
       __ BindTrackedLabel(&labels->movw_label);
       __ movw(temp, /* placeholder */ 0u);
       __ BindTrackedLabel(&labels->movt_label);
@@ -5994,7 +6014,7 @@
   DCHECK(load_kind == HLoadString::LoadKind::kDexCacheViaMethod);
   InvokeRuntimeCallingConvention calling_convention;
   DCHECK_EQ(calling_convention.GetRegisterAt(0), out);
-  __ LoadImmediate(calling_convention.GetRegisterAt(0), load->GetStringIndex());
+  __ LoadImmediate(calling_convention.GetRegisterAt(0), load->GetStringIndex().index_);
   codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
   CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
 }
@@ -7340,7 +7360,7 @@
 }
 
 Literal* CodeGeneratorARM::DeduplicateBootImageStringLiteral(const DexFile& dex_file,
-                                                             uint32_t string_index) {
+                                                             dex::StringIndex string_index) {
   return boot_image_string_patches_.GetOrCreate(
       StringReference(&dex_file, string_index),
       [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); });
@@ -7364,7 +7384,7 @@
 }
 
 Literal* CodeGeneratorARM::DeduplicateJitStringLiteral(const DexFile& dex_file,
-                                                       uint32_t string_index) {
+                                                       dex::StringIndex string_index) {
   jit_string_roots_.Overwrite(StringReference(&dex_file, string_index), /* placeholder */ 0u);
   return jit_string_patches_.GetOrCreate(
       StringReference(&dex_file, string_index),
@@ -7436,7 +7456,7 @@
     uint32_t literal_offset = literal->GetLabel()->Position();
     linker_patches->push_back(LinkerPatch::StringPatch(literal_offset,
                                                        target_string.dex_file,
-                                                       target_string.string_index));
+                                                       target_string.string_index.index_));
   }
   if (!GetCompilerOptions().IsBootImage()) {
     EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_,
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index a4ccb57..8230512 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -485,11 +485,12 @@
   PcRelativePatchInfo* NewPcRelativeTypePatch(const DexFile& dex_file, dex::TypeIndex type_index);
   PcRelativePatchInfo* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file,
                                                        uint32_t element_offset);
-  Literal* DeduplicateBootImageStringLiteral(const DexFile& dex_file, uint32_t string_index);
+  Literal* DeduplicateBootImageStringLiteral(const DexFile& dex_file,
+                                             dex::StringIndex string_index);
   Literal* DeduplicateBootImageTypeLiteral(const DexFile& dex_file, dex::TypeIndex type_index);
   Literal* DeduplicateBootImageAddressLiteral(uint32_t address);
   Literal* DeduplicateDexCacheAddressLiteral(uint32_t address);
-  Literal* DeduplicateJitStringLiteral(const DexFile& dex_file, uint32_t string_index);
+  Literal* DeduplicateJitStringLiteral(const DexFile& dex_file, dex::StringIndex string_index);
 
   void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE;
 
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index a78b3da..6eebd69 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -349,7 +349,7 @@
     SaveLiveRegisters(codegen, locations);
 
     InvokeRuntimeCallingConvention calling_convention;
-    const uint32_t string_index = instruction_->AsLoadString()->GetStringIndex();
+    const uint32_t string_index = instruction_->AsLoadString()->GetStringIndex().index_;
     __ Mov(calling_convention.GetRegisterAt(0).W(), string_index);
     arm64_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this);
     CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
@@ -1273,6 +1273,12 @@
         frame_size - GetCoreSpillSize());
     GetAssembler()->SpillRegisters(GetFramePreservedFPRegisters(),
         frame_size - FrameEntrySpillSize());
+
+    if (GetGraph()->HasShouldDeoptimizeFlag()) {
+      // Initialize should_deoptimize flag to 0.
+      Register wzr = Register(VIXLRegCodeFromART(WZR), kWRegSize);
+      __ Str(wzr, MemOperand(sp, GetStackOffsetOfShouldDeoptimizeFlag()));
+    }
   }
 }
 
@@ -3235,6 +3241,17 @@
                         /* false_target */ nullptr);
 }
 
+void LocationsBuilderARM64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
+  LocationSummary* locations = new (GetGraph()->GetArena())
+      LocationSummary(flag, LocationSummary::kNoCall);
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorARM64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
+  __ Ldr(OutputRegister(flag),
+         MemOperand(sp, codegen_->GetStackOffsetOfShouldDeoptimizeFlag()));
+}
+
 static inline bool IsConditionOnFloatingPointValues(HInstruction* condition) {
   return condition->IsCondition() &&
          Primitive::IsFloatingPointType(condition->InputAt(0)->GetType());
@@ -4132,7 +4149,7 @@
 }
 
 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateBootImageStringLiteral(
-    const DexFile& dex_file, uint32_t string_index) {
+    const DexFile& dex_file, dex::StringIndex string_index) {
   return boot_image_string_patches_.GetOrCreate(
       StringReference(&dex_file, string_index),
       [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u); });
@@ -4158,7 +4175,7 @@
 }
 
 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateJitStringLiteral(
-    const DexFile& dex_file, uint32_t string_index) {
+    const DexFile& dex_file, dex::StringIndex string_index) {
   jit_string_roots_.Overwrite(StringReference(&dex_file, string_index), /* placeholder */ 0u);
   return jit_string_patches_.GetOrCreate(
       StringReference(&dex_file, string_index),
@@ -4246,7 +4263,7 @@
     vixl::aarch64::Literal<uint32_t>* literal = entry.second;
     linker_patches->push_back(LinkerPatch::StringPatch(literal->GetOffset(),
                                                        target_string.dex_file,
-                                                       target_string.string_index));
+                                                       target_string.string_index.index_));
   }
   if (!GetCompilerOptions().IsBootImage()) {
     EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_,
@@ -4594,7 +4611,7 @@
     case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
       // Add ADRP with its PC-relative String patch.
       const DexFile& dex_file = load->GetDexFile();
-      uint32_t string_index = load->GetStringIndex();
+      uint32_t string_index = load->GetStringIndex().index_;
       DCHECK(codegen_->GetCompilerOptions().IsBootImage());
       vixl::aarch64::Label* adrp_label = codegen_->NewPcRelativeStringPatch(dex_file, string_index);
       codegen_->EmitAdrpPlaceholder(adrp_label, out.X());
@@ -4612,7 +4629,7 @@
     case HLoadString::LoadKind::kBssEntry: {
       // Add ADRP with its PC-relative String .bss entry patch.
       const DexFile& dex_file = load->GetDexFile();
-      uint32_t string_index = load->GetStringIndex();
+      uint32_t string_index = load->GetStringIndex().index_;
       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
       UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
       Register temp = temps.AcquireX();
@@ -4653,7 +4670,7 @@
   // TODO: Re-add the compiler code to do string dex cache lookup again.
   InvokeRuntimeCallingConvention calling_convention;
   DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(), out.GetCode());
-  __ Mov(calling_convention.GetRegisterAt(0).W(), load->GetStringIndex());
+  __ Mov(calling_convention.GetRegisterAt(0).W(), load->GetStringIndex().index_);
   codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
   CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
 }
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 1545fd3..868c8b0 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -560,14 +560,15 @@
       uint32_t element_offset,
       vixl::aarch64::Label* adrp_label = nullptr);
 
-  vixl::aarch64::Literal<uint32_t>* DeduplicateBootImageStringLiteral(const DexFile& dex_file,
-                                                                      uint32_t string_index);
+  vixl::aarch64::Literal<uint32_t>* DeduplicateBootImageStringLiteral(
+      const DexFile& dex_file,
+      dex::StringIndex string_index);
   vixl::aarch64::Literal<uint32_t>* DeduplicateBootImageTypeLiteral(const DexFile& dex_file,
                                                                     dex::TypeIndex type_index);
   vixl::aarch64::Literal<uint32_t>* DeduplicateBootImageAddressLiteral(uint64_t address);
   vixl::aarch64::Literal<uint64_t>* DeduplicateDexCacheAddressLiteral(uint64_t address);
   vixl::aarch64::Literal<uint32_t>* DeduplicateJitStringLiteral(const DexFile& dex_file,
-                                                                uint32_t string_index);
+                                                                dex::StringIndex string_index);
 
   void EmitAdrpPlaceholder(vixl::aarch64::Label* fixup_label, vixl::aarch64::Register reg);
   void EmitAddPlaceholder(vixl::aarch64::Label* fixup_label,
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index 1b5138f..4b24ac3 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -63,9 +63,10 @@
   // We expected this for both core and fpu register pairs.
   return ((location.low() & 1) == 0) && (location.low() + 1 == location.high());
 }
-
+// Use a local definition to prevent copying mistakes.
+static constexpr size_t kArmWordSize = static_cast<size_t>(kArmPointerSize);
+static constexpr size_t kArmBitsPerWord = kArmWordSize * kBitsPerByte;
 static constexpr int kCurrentMethodStackOffset = 0;
-static constexpr size_t kArmInstrMaxSizeInBytes = 4u;
 static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7;
 
 #ifdef __
@@ -438,6 +439,62 @@
   DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathARMVIXL);
 };
 
+class LoadStringSlowPathARMVIXL : public SlowPathCodeARMVIXL {
+ public:
+  explicit LoadStringSlowPathARMVIXL(HLoadString* instruction)
+      : SlowPathCodeARMVIXL(instruction) {}
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
+    HLoadString* load = instruction_->AsLoadString();
+    const uint32_t string_index = load->GetStringIndex().index_;
+    vixl32::Register out = OutputRegister(load);
+    vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
+    constexpr bool call_saves_everything_except_r0 = (!kUseReadBarrier || kUseBakerReadBarrier);
+
+    CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, locations);
+
+    InvokeRuntimeCallingConventionARMVIXL calling_convention;
+    // In the unlucky case that the `temp` is R0, we preserve the address in `out` across
+    // the kSaveEverything call (or use `out` for the address after non-kSaveEverything call).
+    bool temp_is_r0 = (temp.Is(calling_convention.GetRegisterAt(0)));
+    vixl32::Register entry_address = temp_is_r0 ? out : temp;
+    DCHECK(!entry_address.Is(calling_convention.GetRegisterAt(0)));
+    if (call_saves_everything_except_r0 && temp_is_r0) {
+      __ Mov(entry_address, temp);
+    }
+
+    __ Mov(calling_convention.GetRegisterAt(0), string_index);
+    arm_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this);
+    CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
+
+    // Store the resolved String to the .bss entry.
+    if (call_saves_everything_except_r0) {
+      // The string entry address was preserved in `entry_address` thanks to kSaveEverything.
+      __ Str(r0, MemOperand(entry_address));
+    } else {
+      // For non-Baker read barrier, we need to re-calculate the address of the string entry.
+      CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
+          arm_codegen->NewPcRelativeStringPatch(load->GetDexFile(), string_index);
+      arm_codegen->EmitMovwMovtPlaceholder(labels, out);
+      __ Str(r0, MemOperand(entry_address));
+    }
+
+    arm_codegen->Move32(locations->Out(), LocationFrom(r0));
+    RestoreLiveRegisters(codegen, locations);
+
+    __ B(GetExitLabel());
+  }
+
+  const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathARMVIXL"; }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathARMVIXL);
+};
+
 class TypeCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL {
  public:
   TypeCheckSlowPathARMVIXL(HInstruction* instruction, bool is_fatal)
@@ -630,9 +687,30 @@
   return mask;
 }
 
-size_t CodeGeneratorARMVIXL::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
-  GetAssembler()->LoadSFromOffset(vixl32::SRegister(reg_id), sp, stack_index);
-  return kArmWordSize;
+// Saves the register in the stack. Returns the size taken on stack.
+size_t CodeGeneratorARMVIXL::SaveCoreRegister(size_t stack_index ATTRIBUTE_UNUSED,
+                                              uint32_t reg_id ATTRIBUTE_UNUSED) {
+  TODO_VIXL32(FATAL);
+  return 0;
+}
+
+// Restores the register from the stack. Returns the size taken on stack.
+size_t CodeGeneratorARMVIXL::RestoreCoreRegister(size_t stack_index ATTRIBUTE_UNUSED,
+                                                 uint32_t reg_id ATTRIBUTE_UNUSED) {
+  TODO_VIXL32(FATAL);
+  return 0;
+}
+
+size_t CodeGeneratorARMVIXL::SaveFloatingPointRegister(size_t stack_index ATTRIBUTE_UNUSED,
+                                                       uint32_t reg_id ATTRIBUTE_UNUSED) {
+  TODO_VIXL32(FATAL);
+  return 0;
+}
+
+size_t CodeGeneratorARMVIXL::RestoreFloatingPointRegister(size_t stack_index ATTRIBUTE_UNUSED,
+                                                          uint32_t reg_id ATTRIBUTE_UNUSED) {
+  TODO_VIXL32(FATAL);
+  return 0;
 }
 
 #undef __
@@ -655,7 +733,11 @@
       instruction_visitor_(graph, this),
       move_resolver_(graph->GetArena(), this),
       assembler_(graph->GetArena()),
-      isa_features_(isa_features) {
+      isa_features_(isa_features),
+      relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
   // Always save the LR register to mimic Quick.
   AddAllocatedRegister(Location::RegisterLocation(LR));
   // Give d14 and d15 as scratch registers to VIXL.
@@ -793,7 +875,7 @@
     __ Sub(temp, sp, static_cast<int32_t>(GetStackOverflowReservedBytes(kArm)));
     // The load must immediately precede RecordPcInfo.
     AssemblerAccurateScope aas(GetVIXLAssembler(),
-                               kArmInstrMaxSizeInBytes,
+                               vixl32::kMaxInstructionSizeInBytes,
                                CodeBufferCheckScope::kMaximumSize);
     __ ldr(temp, MemOperand(temp));
     RecordPcInfo(nullptr, 0);
@@ -853,6 +935,116 @@
   __ Bind(GetLabelOf(block));
 }
 
+Location InvokeDexCallingConventionVisitorARMVIXL::GetNextLocation(Primitive::Type type) {
+  switch (type) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimNot: {
+      uint32_t index = gp_index_++;
+      uint32_t stack_index = stack_index_++;
+      if (index < calling_convention.GetNumberOfRegisters()) {
+        return LocationFrom(calling_convention.GetRegisterAt(index));
+      } else {
+        return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index));
+      }
+    }
+
+    case Primitive::kPrimLong: {
+      uint32_t index = gp_index_;
+      uint32_t stack_index = stack_index_;
+      gp_index_ += 2;
+      stack_index_ += 2;
+      if (index + 1 < calling_convention.GetNumberOfRegisters()) {
+        if (calling_convention.GetRegisterAt(index).Is(r1)) {
+          // Skip R1, and use R2_R3 instead.
+          gp_index_++;
+          index++;
+        }
+      }
+      if (index + 1 < calling_convention.GetNumberOfRegisters()) {
+        DCHECK_EQ(calling_convention.GetRegisterAt(index).GetCode() + 1,
+                  calling_convention.GetRegisterAt(index + 1).GetCode());
+
+        return LocationFrom(calling_convention.GetRegisterAt(index),
+                            calling_convention.GetRegisterAt(index + 1));
+      } else {
+        return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index));
+      }
+    }
+
+    case Primitive::kPrimFloat: {
+      uint32_t stack_index = stack_index_++;
+      if (float_index_ % 2 == 0) {
+        float_index_ = std::max(double_index_, float_index_);
+      }
+      if (float_index_ < calling_convention.GetNumberOfFpuRegisters()) {
+        return LocationFrom(calling_convention.GetFpuRegisterAt(float_index_++));
+      } else {
+        return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index));
+      }
+    }
+
+    case Primitive::kPrimDouble: {
+      double_index_ = std::max(double_index_, RoundUp(float_index_, 2));
+      uint32_t stack_index = stack_index_;
+      stack_index_ += 2;
+      if (double_index_ + 1 < calling_convention.GetNumberOfFpuRegisters()) {
+        uint32_t index = double_index_;
+        double_index_ += 2;
+        Location result = LocationFrom(
+          calling_convention.GetFpuRegisterAt(index),
+          calling_convention.GetFpuRegisterAt(index + 1));
+        DCHECK(ExpectedPairLayout(result));
+        return result;
+      } else {
+        return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index));
+      }
+    }
+
+    case Primitive::kPrimVoid:
+      LOG(FATAL) << "Unexpected parameter type " << type;
+      break;
+  }
+  return Location::NoLocation();
+}
+
+Location InvokeDexCallingConventionVisitorARMVIXL::GetReturnLocation(Primitive::Type type) const {
+  switch (type) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimNot: {
+      return LocationFrom(r0);
+    }
+
+    case Primitive::kPrimFloat: {
+      return LocationFrom(s0);
+    }
+
+    case Primitive::kPrimLong: {
+      return LocationFrom(r0, r1);
+    }
+
+    case Primitive::kPrimDouble: {
+      return LocationFrom(s0, s1);
+    }
+
+    case Primitive::kPrimVoid:
+      return Location::NoLocation();
+  }
+
+  UNREACHABLE();
+}
+
+Location InvokeDexCallingConventionVisitorARMVIXL::GetMethodLocation() const {
+  return LocationFrom(kMethodRegister);
+}
+
 void CodeGeneratorARMVIXL::Move32(Location destination, Location source) {
   if (source.Equals(destination)) {
     return;
@@ -924,10 +1116,14 @@
                                          uint32_t dex_pc,
                                          SlowPathCode* slow_path) {
   ValidateInvokeRuntime(entrypoint, instruction, slow_path);
-  GenerateInvokeRuntime(GetThreadOffset<kArmPointerSize>(entrypoint).Int32Value());
+  __ Ldr(lr, MemOperand(tr, GetThreadOffset<kArmPointerSize>(entrypoint).Int32Value()));
+  // Ensure the pc position is recorded immediately after the `blx` instruction.
+  // blx in T32 has only 16bit encoding that's why a stricter check for the scope is used.
+  AssemblerAccurateScope aas(GetVIXLAssembler(),
+                             vixl32::k16BitT32InstructionSizeInBytes,
+                             CodeBufferCheckScope::kExactSize);
+  __ blx(lr);
   if (EntrypointRequiresStackMap(entrypoint)) {
-    // TODO(VIXL): If necessary, use a scope to ensure we record the pc info immediately after the
-    // previous instruction.
     RecordPcInfo(instruction, dex_pc, slow_path);
   }
 }
@@ -936,11 +1132,7 @@
                                                                HInstruction* instruction,
                                                                SlowPathCode* slow_path) {
   ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
-  GenerateInvokeRuntime(entry_point_offset);
-}
-
-void CodeGeneratorARMVIXL::GenerateInvokeRuntime(int32_t entry_point_offset) {
-  GetAssembler()->LoadFromOffset(kLoadWord, lr, tr, entry_point_offset);
+  __ Ldr(lr, MemOperand(tr, entry_point_offset));
   __ Blx(lr);
 }
 
@@ -1270,6 +1462,19 @@
                         /* false_target */ nullptr);
 }
 
+void LocationsBuilderARMVIXL::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
+  LocationSummary* locations = new (GetGraph()->GetArena())
+      LocationSummary(flag, LocationSummary::kNoCall);
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
+  GetAssembler()->LoadFromOffset(kLoadWord,
+                                 OutputRegister(flag),
+                                 sp,
+                                 codegen_->GetStackOffsetOfShouldDeoptimizeFlag());
+}
+
 void LocationsBuilderARMVIXL::VisitSelect(HSelect* select) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select);
   if (Primitive::IsFloatingPointType(select->GetType())) {
@@ -1322,11 +1527,10 @@
       }
       break;
 
-    // TODO(VIXL): https://android-review.googlesource.com/#/c/252265/
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble:
       locations->SetInAt(0, Location::RequiresFpuRegister());
-      locations->SetInAt(1, Location::RequiresFpuRegister());
+      locations->SetInAt(1, ArithmeticZeroOrFpuRegister(cond->InputAt(1)));
       if (!cond->IsEmittedAtUseSite()) {
         locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       }
@@ -1346,15 +1550,22 @@
     return;
   }
 
+  Location right = cond->GetLocations()->InAt(1);
   vixl32::Register out = OutputRegister(cond);
   vixl32::Label true_label, false_label;
 
   switch (cond->InputAt(0)->GetType()) {
     default: {
       // Integer case.
-      __ Cmp(InputRegisterAt(cond, 0), InputOperandAt(cond, 1));
+      if (right.IsRegister()) {
+        __ Cmp(InputRegisterAt(cond, 0), InputOperandAt(cond, 1));
+      } else {
+        DCHECK(right.IsConstant());
+        __ Cmp(InputRegisterAt(cond, 0),
+               CodeGenerator::GetInt32ValueOf(right.GetConstant()));
+      }
       AssemblerAccurateScope aas(GetVIXLAssembler(),
-                                 kArmInstrMaxSizeInBytes * 3u,
+                                 3 * vixl32::kMaxInstructionSizeInBytes,
                                  CodeBufferCheckScope::kMaximumSize);
       __ ite(ARMCondition(cond->GetCondition()));
       __ mov(ARMCondition(cond->GetCondition()), OutputRegister(cond), 1);
@@ -1569,7 +1780,10 @@
 
   HandleInvoke(invoke);
 
-  // TODO(VIXL): invoke->HasPcRelativeDexCache()
+  // For PC-relative dex cache the invoke has an extra input, the PC-relative address base.
+  if (invoke->HasPcRelativeDexCache()) {
+    invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister());
+  }
 }
 
 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARMVIXL* codegen) {
@@ -1591,15 +1805,13 @@
   }
 
   LocationSummary* locations = invoke->GetLocations();
-  DCHECK(locations->HasTemps());
-  codegen_->GenerateStaticOrDirectCall(invoke, locations->GetTemp(0));
-  // TODO(VIXL): If necessary, use a scope to ensure we record the pc info immediately after the
-  // previous instruction.
+  codegen_->GenerateStaticOrDirectCall(
+      invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
 }
 
 void LocationsBuilderARMVIXL::HandleInvoke(HInvoke* invoke) {
-  InvokeDexCallingConventionVisitorARM calling_convention_visitor;
+  InvokeDexCallingConventionVisitorARMVIXL calling_convention_visitor;
   CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
 }
 
@@ -1618,10 +1830,8 @@
   }
 
   codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
-  DCHECK(!codegen_->IsLeafMethod());
-  // TODO(VIXL): If necessary, use a scope to ensure we record the pc info immediately after the
-  // previous instruction.
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+  DCHECK(!codegen_->IsLeafMethod());
 }
 
 void LocationsBuilderARMVIXL::VisitInvokeInterface(HInvokeInterface* invoke) {
@@ -1640,10 +1850,15 @@
 
   DCHECK(!receiver.IsStackSlot());
 
-  // /* HeapReference<Class> */ temp = receiver->klass_
-  GetAssembler()->LoadFromOffset(kLoadWord, temp, RegisterFrom(receiver), class_offset);
-
-  codegen_->MaybeRecordImplicitNullCheck(invoke);
+  // Ensure the pc position is recorded immediately after the `ldr` instruction.
+  {
+    AssemblerAccurateScope aas(GetVIXLAssembler(),
+                               vixl32::kMaxInstructionSizeInBytes,
+                               CodeBufferCheckScope::kMaximumSize);
+    // /* HeapReference<Class> */ temp = receiver->klass_
+    __ ldr(temp, MemOperand(RegisterFrom(receiver), class_offset));
+    codegen_->MaybeRecordImplicitNullCheck(invoke);
+  }
   // Instead of simply (possibly) unpoisoning `temp` here, we should
   // emit a read barrier for the previous class reference load.
   // However this is not required in practice, as this is an
@@ -1682,15 +1897,16 @@
     temps.Exclude(hidden_reg);
     __ Mov(hidden_reg, invoke->GetDexMethodIndex());
   }
-
   {
+    // Ensure the pc position is recorded immediately after the `blx` instruction.
+    // blx in T32 has only 16bit encoding that's why a stricter check for the scope is used.
     AssemblerAccurateScope aas(GetVIXLAssembler(),
-                               kArmInstrMaxSizeInBytes,
-                               CodeBufferCheckScope::kMaximumSize);
+                           vixl32::k16BitT32InstructionSizeInBytes,
+                           CodeBufferCheckScope::kExactSize);
     // LR();
     __ blx(lr);
-    DCHECK(!codegen_->IsLeafMethod());
     codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+    DCHECK(!codegen_->IsLeafMethod());
   }
 }
 
@@ -2776,15 +2992,8 @@
 
 
 void LocationsBuilderARMVIXL::VisitDivZeroCheck(HDivZeroCheck* instruction) {
-  // TODO(VIXL): https://android-review.googlesource.com/#/c/275337/
-  LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
-      ? LocationSummary::kCallOnSlowPath
-      : LocationSummary::kNoCall;
-  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
+  LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
   locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
-  if (instruction->HasUses()) {
-    locations->SetOut(Location::SameAsFirstInput());
-  }
 }
 
 void InstructionCodeGeneratorARMVIXL::VisitDivZeroCheck(HDivZeroCheck* instruction) {
@@ -3068,7 +3277,7 @@
           __ Subs(temp, o_l, Operand::From(kArmBitsPerWord));
           {
             AssemblerAccurateScope guard(GetVIXLAssembler(),
-                                         3 * kArmInstrMaxSizeInBytes,
+                                         2 * vixl32::kMaxInstructionSizeInBytes,
                                          CodeBufferCheckScope::kMaximumSize);
             __ it(pl);
             __ lsl(pl, o_h, low, temp);
@@ -3087,7 +3296,7 @@
           __ Subs(temp, o_h, Operand::From(kArmBitsPerWord));
           {
             AssemblerAccurateScope guard(GetVIXLAssembler(),
-                                         3 * kArmInstrMaxSizeInBytes,
+                                         2 * vixl32::kMaxInstructionSizeInBytes,
                                          CodeBufferCheckScope::kMaximumSize);
             __ it(pl);
             __ asr(pl, o_l, high, temp);
@@ -3104,7 +3313,7 @@
           __ Subs(temp, o_h, Operand::From(kArmBitsPerWord));
           {
             AssemblerAccurateScope guard(GetVIXLAssembler(),
-                                         3 * kArmInstrMaxSizeInBytes,
+                                         2 * vixl32::kMaxInstructionSizeInBytes,
                                          CodeBufferCheckScope::kMaximumSize);
           __ it(pl);
           __ lsr(pl, o_l, high, temp);
@@ -3221,9 +3430,10 @@
     MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize);
     GetAssembler()->LoadFromOffset(kLoadWord, temp, tr, QUICK_ENTRY_POINT(pNewEmptyString));
     GetAssembler()->LoadFromOffset(kLoadWord, lr, temp, code_offset.Int32Value());
+    // blx in T32 has only 16bit encoding that's why a stricter check for the scope is used.
     AssemblerAccurateScope aas(GetVIXLAssembler(),
-                               kArmInstrMaxSizeInBytes,
-                               CodeBufferCheckScope::kMaximumSize);
+                               vixl32::k16BitT32InstructionSizeInBytes,
+                               CodeBufferCheckScope::kExactSize);
     __ blx(lr);
     codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
   } else {
@@ -3463,10 +3673,16 @@
     addr = temp;
   }
   __ Bind(&fail);
-  // We need a load followed by store. (The address used in a STREX instruction must
-  // be the same as the address in the most recently executed LDREX instruction.)
-  __ Ldrexd(temp1, temp2, MemOperand(addr));
-  codegen_->MaybeRecordImplicitNullCheck(instruction);
+  {
+    // Ensure the pc position is recorded immediately after the `ldrexd` instruction.
+    AssemblerAccurateScope aas(GetVIXLAssembler(),
+                               vixl32::kMaxInstructionSizeInBytes,
+                               CodeBufferCheckScope::kMaximumSize);
+    // We need a load followed by store. (The address used in a STREX instruction must
+    // be the same as the address in the most recently executed LDREX instruction.)
+    __ ldrexd(temp1, temp2, MemOperand(addr));
+    codegen_->MaybeRecordImplicitNullCheck(instruction);
+  }
   __ Strexd(temp1, value_lo, value_hi, MemOperand(addr));
   __ CompareAndBranchIfNonZero(temp1, &fail);
 }
@@ -3615,6 +3831,11 @@
 
   // Longs and doubles are handled in the switch.
   if (field_type != Primitive::kPrimLong && field_type != Primitive::kPrimDouble) {
+    // TODO(VIXL): Here and for other calls to `MaybeRecordImplicitNullCheck` in this method, we
+    // should use a scope and the assembler to emit the store instruction to guarantee that we
+    // record the pc at the correct position. But the `Assembler` does not automatically handle
+    // unencodable offsets. Practically, everything is fine because the helper and VIXL, at the time
+    // of writing, do generate the store instruction last.
     codegen_->MaybeRecordImplicitNullCheck(instruction);
   }
 
@@ -3789,7 +4010,6 @@
         TODO_VIXL32(FATAL);
       } else {
         GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(out), base, offset);
-        // TODO(VIXL): Scope to guarantee the position immediately after the load.
         codegen_->MaybeRecordImplicitNullCheck(instruction);
         if (is_volatile) {
           codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
@@ -3826,7 +4046,6 @@
         __ Vmov(out_dreg, lo, hi);
       } else {
         GetAssembler()->LoadDFromOffset(out_dreg, base, offset);
-        // TODO(VIXL): Scope to guarantee the position immediately after the load.
         codegen_->MaybeRecordImplicitNullCheck(instruction);
       }
       break;
@@ -3842,6 +4061,11 @@
     // double fields, are handled in the previous switch statement.
   } else {
     // Address cases other than reference and double that may require an implicit null check.
+    // TODO(VIXL): Here and for other calls to `MaybeRecordImplicitNullCheck` in this method, we
+    // should use a scope and the assembler to emit the load instruction to guarantee that we
+    // record the pc at the correct position. But the `Assembler` does not automatically handle
+    // unencodable offsets. Practically, everything is fine because the helper and VIXL, at the time
+    // of writing, do generate the store instruction last.
     codegen_->MaybeRecordImplicitNullCheck(instruction);
   }
 
@@ -3956,15 +4180,8 @@
 }
 
 void LocationsBuilderARMVIXL::VisitNullCheck(HNullCheck* instruction) {
-  // TODO(VIXL): https://android-review.googlesource.com/#/c/275337/
-  LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
-      ? LocationSummary::kCallOnSlowPath
-      : LocationSummary::kNoCall;
-  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
+  LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
   locations->SetInAt(0, Location::RequiresRegister());
-  if (instruction->HasUses()) {
-    locations->SetOut(Location::SameAsFirstInput());
-  }
 }
 
 void CodeGeneratorARMVIXL::GenerateImplicitNullCheck(HNullCheck* instruction) {
@@ -3973,8 +4190,9 @@
   }
 
   UseScratchRegisterScope temps(GetVIXLAssembler());
+  // Ensure the pc position is recorded immediately after the `ldr` instruction.
   AssemblerAccurateScope aas(GetVIXLAssembler(),
-                             kArmInstrMaxSizeInBytes,
+                             vixl32::kMaxInstructionSizeInBytes,
                              CodeBufferCheckScope::kMaximumSize);
   __ ldr(temps.Acquire(), MemOperand(InputRegisterAt(instruction, 0)));
   RecordPcInfo(instruction, instruction->GetDexPc());
@@ -4241,6 +4459,11 @@
           size_t offset =
               (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
           GetAssembler()->LoadFromOffset(kLoadWord, out, obj, offset);
+          // TODO(VIXL): Here and for other calls to `MaybeRecordImplicitNullCheck` in this method,
+          // we should use a scope and the assembler to emit the load instruction to guarantee that
+          // we record the pc at the correct position. But the `Assembler` does not automatically
+          // handle unencodable offsets. Practically, everything is fine because the helper and
+          // VIXL, at the time of writing, do generate the store instruction last.
           codegen_->MaybeRecordImplicitNullCheck(instruction);
           // If read barriers are enabled, emit read barriers other than
           // Baker's using a slow path (and also unpoison the loaded
@@ -4263,7 +4486,9 @@
           }
           codegen_->LoadFromShiftedRegOffset(type, out_loc, temp, RegisterFrom(index));
           temps.Release(temp);
-
+          // TODO(VIXL): Use a scope to ensure that we record the pc position immediately after the
+          // load instruction. Practically, everything is fine because the helper and VIXL, at the
+          // time of writing, do generate the store instruction last.
           codegen_->MaybeRecordImplicitNullCheck(instruction);
           // If read barriers are enabled, emit read barriers other than
           // Baker's using a slow path (and also unpoison the loaded
@@ -4325,6 +4550,8 @@
     // Potential implicit null checks, in the case of reference
     // arrays, are handled in the previous switch statement.
   } else if (!maybe_compressed_char_at) {
+    // TODO(VIXL): Use a scope to ensure we record the pc info immediately after
+    // the preceding load instruction.
     codegen_->MaybeRecordImplicitNullCheck(instruction);
   }
 }
@@ -4425,6 +4652,8 @@
           codegen_->StoreToShiftedRegOffset(value_type, value_loc, temp, RegisterFrom(index));
           temps.Release(temp);
         }
+        // TODO(VIXL): Use a scope to ensure we record the pc info immediately after the preceding
+        // store instruction.
         codegen_->MaybeRecordImplicitNullCheck(instruction);
         DCHECK(!needs_write_barrier);
         DCHECK(!may_need_runtime_call_for_type_check);
@@ -4459,6 +4688,8 @@
             codegen_->StoreToShiftedRegOffset(value_type, value_loc, temp, RegisterFrom(index));
             temps.Release(temp);
           }
+          // TODO(VIXL): Use a scope to ensure we record the pc info immediately after the preceding
+          // store instruction.
           codegen_->MaybeRecordImplicitNullCheck(instruction);
           __ B(&done);
           __ Bind(&non_zero);
@@ -4472,9 +4703,15 @@
         // negative, in which case we would take the ArraySet slow
         // path.
 
-        // /* HeapReference<Class> */ temp1 = array->klass_
-        GetAssembler()->LoadFromOffset(kLoadWord, temp1, array, class_offset);
-        codegen_->MaybeRecordImplicitNullCheck(instruction);
+        {
+          // Ensure we record the pc position immediately after the `ldr` instruction.
+          AssemblerAccurateScope aas(GetVIXLAssembler(),
+                                     vixl32::kMaxInstructionSizeInBytes,
+                                     CodeBufferCheckScope::kMaximumSize);
+          // /* HeapReference<Class> */ temp1 = array->klass_
+          __ ldr(temp1, MemOperand(array, class_offset));
+          codegen_->MaybeRecordImplicitNullCheck(instruction);
+        }
         GetAssembler()->MaybeUnpoisonHeapReference(temp1);
 
         // /* HeapReference<Class> */ temp1 = temp1->component_type_
@@ -4531,6 +4768,8 @@
       }
 
       if (!may_need_runtime_call_for_type_check) {
+        // TODO(VIXL): Ensure we record the pc position immediately after the preceding store
+        // instruction.
         codegen_->MaybeRecordImplicitNullCheck(instruction);
       }
 
@@ -4599,6 +4838,8 @@
 
   // Objects are handled in the switch.
   if (value_type != Primitive::kPrimNot) {
+    // TODO(VIXL): Ensure we record the pc position immediately after the preceding store
+    // instruction.
     codegen_->MaybeRecordImplicitNullCheck(instruction);
   }
 }
@@ -4614,8 +4855,13 @@
   uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
   vixl32::Register obj = InputRegisterAt(instruction, 0);
   vixl32::Register out = OutputRegister(instruction);
-  GetAssembler()->LoadFromOffset(kLoadWord, out, obj, offset);
-  codegen_->MaybeRecordImplicitNullCheck(instruction);
+  {
+    AssemblerAccurateScope aas(GetVIXLAssembler(),
+                               vixl32::kMaxInstructionSizeInBytes,
+                               CodeBufferCheckScope::kMaximumSize);
+    __ ldr(out, MemOperand(obj, offset));
+    codegen_->MaybeRecordImplicitNullCheck(instruction);
+  }
   // Mask out compression flag from String's array length.
   if (mirror::kUseStringCompression && instruction->IsStringLength()) {
     __ Lsr(out, out, 1u);
@@ -4697,8 +4943,9 @@
 }
 
 void LocationsBuilderARMVIXL::VisitSuspendCheck(HSuspendCheck* instruction) {
-  new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
-  // TODO(VIXL): https://android-review.googlesource.com/#/c/275337/ and related.
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
+  locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
 }
 
 void InstructionCodeGeneratorARMVIXL::VisitSuspendCheck(HSuspendCheck* instruction) {
@@ -4992,12 +5239,37 @@
   TODO_VIXL32(FATAL);
 }
 
-// Check if the desired_class_load_kind is supported. If it is, return it,
-// otherwise return a fall-back kind that should be used instead.
 HLoadClass::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadClassKind(
-      HLoadClass::LoadKind desired_class_load_kind ATTRIBUTE_UNUSED) {
-  // TODO(VIXL): Implement optimized code paths.
-  return HLoadClass::LoadKind::kDexCacheViaMethod;
+    HLoadClass::LoadKind desired_class_load_kind) {
+  switch (desired_class_load_kind) {
+    case HLoadClass::LoadKind::kReferrersClass:
+      break;
+    case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
+      // TODO(VIXL): Enable it back when literal pools are fixed in VIXL.
+      return HLoadClass::LoadKind::kDexCacheViaMethod;
+    case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
+      DCHECK(GetCompilerOptions().GetCompilePic());
+      break;
+    case HLoadClass::LoadKind::kBootImageAddress:
+      // TODO(VIXL): Enable it back when literal pools are fixed in VIXL.
+      return HLoadClass::LoadKind::kDexCacheViaMethod;
+    case HLoadClass::LoadKind::kDexCacheAddress:
+      // TODO(VIXL): Enable it back when literal pools are fixed in VIXL.
+      return HLoadClass::LoadKind::kDexCacheViaMethod;
+    case HLoadClass::LoadKind::kDexCachePcRelative:
+      DCHECK(!Runtime::Current()->UseJitCompilation());
+      // We disable pc-relative load when there is an irreducible loop, as the optimization
+      // is incompatible with it.
+      // TODO: Create as many ArmDexCacheArraysBase instructions as needed for methods
+      // with irreducible loops.
+      if (GetGraph()->HasIrreducibleLoops()) {
+        return HLoadClass::LoadKind::kDexCacheViaMethod;
+      }
+      break;
+    case HLoadClass::LoadKind::kDexCacheViaMethod:
+      break;
+  }
+  return desired_class_load_kind;
 }
 
 void LocationsBuilderARMVIXL::VisitLoadClass(HLoadClass* cls) {
@@ -5011,11 +5283,15 @@
     return;
   }
 
-  // TODO(VIXL): read barrier code.
-  LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || kEmitCompilerReadBarrier)
+  const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
+  LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
       ? LocationSummary::kCallOnSlowPath
       : LocationSummary::kNoCall;
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind);
+  if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) {
+      TODO_VIXL32(FATAL);
+  }
+
   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
   if (load_kind == HLoadClass::LoadKind::kReferrersClass ||
       load_kind == HLoadClass::LoadKind::kDexCacheViaMethod ||
@@ -5037,7 +5313,9 @@
   Location out_loc = locations->Out();
   vixl32::Register out = OutputRegister(cls);
 
-  // TODO(VIXL): read barrier code.
+  const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
+      ? kWithoutReadBarrier
+      : kCompilerReadBarrierOption;
   bool generate_null_check = false;
   switch (cls->GetLoadKind()) {
     case HLoadClass::LoadKind::kReferrersClass: {
@@ -5049,7 +5327,35 @@
                               out_loc,
                               current_method,
                               ArtMethod::DeclaringClassOffset().Int32Value(),
-                              kEmitCompilerReadBarrier);
+                              read_barrier_option);
+      break;
+    }
+    case HLoadClass::LoadKind::kBootImageLinkTimeAddress: {
+      TODO_VIXL32(FATAL);
+      break;
+    }
+    case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
+      DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
+      CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
+          codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex());
+      codegen_->EmitMovwMovtPlaceholder(labels, out);
+      break;
+    }
+    case HLoadClass::LoadKind::kBootImageAddress: {
+      TODO_VIXL32(FATAL);
+      break;
+    }
+    case HLoadClass::LoadKind::kDexCacheAddress: {
+      TODO_VIXL32(FATAL);
+      break;
+    }
+    case HLoadClass::LoadKind::kDexCachePcRelative: {
+      vixl32::Register base_reg = InputRegisterAt(cls, 0);
+      HArmDexCacheArraysBase* base = cls->InputAt(0)->AsArmDexCacheArraysBase();
+      int32_t offset = cls->GetDexCacheElementOffset() - base->GetElementOffset();
+      // /* GcRoot<mirror::Class> */ out = *(dex_cache_arrays_base + offset)
+      GenerateGcRootFieldLoad(cls, out_loc, base_reg, offset, read_barrier_option);
+      generate_null_check = !cls->IsInDexCache();
       break;
     }
     case HLoadClass::LoadKind::kDexCacheViaMethod: {
@@ -5061,7 +5367,7 @@
       GetAssembler()->LoadFromOffset(kLoadWord, out, current_method, resolved_types_offset);
       // /* GcRoot<mirror::Class> */ out = out[type_index]
       size_t offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex().index_);
-      GenerateGcRootFieldLoad(cls, out_loc, out, offset, kEmitCompilerReadBarrier);
+      GenerateGcRootFieldLoad(cls, out_loc, out, offset, read_barrier_option);
       generate_null_check = !cls->IsInDexCache();
       break;
     }
@@ -5121,42 +5427,106 @@
   __ Bind(slow_path->GetExitLabel());
 }
 
-// Check if the desired_string_load_kind is supported. If it is, return it,
-// otherwise return a fall-back kind that should be used instead.
 HLoadString::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadStringKind(
-      HLoadString::LoadKind desired_string_load_kind ATTRIBUTE_UNUSED) {
-  // TODO(VIXL): Implement optimized code paths. For now we always use the simpler fallback code.
-  return HLoadString::LoadKind::kDexCacheViaMethod;
+    HLoadString::LoadKind desired_string_load_kind) {
+  switch (desired_string_load_kind) {
+    case HLoadString::LoadKind::kBootImageLinkTimeAddress:
+      // TODO(VIXL): Implement missing optimization.
+      return HLoadString::LoadKind::kDexCacheViaMethod;
+    case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
+      DCHECK(GetCompilerOptions().GetCompilePic());
+      break;
+    case HLoadString::LoadKind::kBootImageAddress:
+      // TODO(VIXL): Implement missing optimization.
+      return HLoadString::LoadKind::kDexCacheViaMethod;
+    case HLoadString::LoadKind::kBssEntry:
+      DCHECK(!Runtime::Current()->UseJitCompilation());
+      break;
+    case HLoadString::LoadKind::kJitTableAddress:
+      DCHECK(Runtime::Current()->UseJitCompilation());
+      // TODO(VIXL): Implement missing optimization.
+      return HLoadString::LoadKind::kDexCacheViaMethod;
+    case HLoadString::LoadKind::kDexCacheViaMethod:
+      break;
+  }
+  return desired_string_load_kind;
 }
 
 void LocationsBuilderARMVIXL::VisitLoadString(HLoadString* load) {
-  LocationSummary::CallKind call_kind = load->NeedsEnvironment()
-      ? LocationSummary::kCallOnMainOnly
-      : LocationSummary::kNoCall;
+  LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load);
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
-
-  // TODO(VIXL): Implement optimized code paths.
-  // See InstructionCodeGeneratorARMVIXL::VisitLoadString.
   HLoadString::LoadKind load_kind = load->GetLoadKind();
   if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod) {
-    locations->SetInAt(0, Location::RequiresRegister());
-    // TODO(VIXL): Use InvokeRuntimeCallingConventionARMVIXL instead.
     locations->SetOut(LocationFrom(r0));
   } else {
     locations->SetOut(Location::RequiresRegister());
+    if (load_kind == HLoadString::LoadKind::kBssEntry) {
+      if (!kUseReadBarrier || kUseBakerReadBarrier) {
+        // Rely on the pResolveString and/or marking to save everything, including temps.
+        // Note that IP may theoretically be clobbered by saving/restoring the live register
+        // (only one thanks to the custom calling convention), so we request a different temp.
+        locations->AddTemp(Location::RequiresRegister());
+        RegisterSet caller_saves = RegisterSet::Empty();
+        InvokeRuntimeCallingConventionARMVIXL calling_convention;
+        caller_saves.Add(LocationFrom(calling_convention.GetRegisterAt(0)));
+        // TODO: Add GetReturnLocation() to the calling convention so that we can DCHECK()
+        // that the the kPrimNot result register is the same as the first argument register.
+        locations->SetCustomSlowPathCallerSaves(caller_saves);
+      } else {
+        // For non-Baker read barrier we have a temp-clobbering call.
+      }
+    }
   }
 }
 
 void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) {
-  // TODO(VIXL): Implement optimized code paths.
-  // We implemented the simplest solution to get first ART tests passing, we deferred the
-  // optimized path until later, we should implement it using ARM64 implementation as a
-  // reference. The same related to LocationsBuilderARMVIXL::VisitLoadString.
+  LocationSummary* locations = load->GetLocations();
+  Location out_loc = locations->Out();
+  vixl32::Register out = OutputRegister(load);
+  HLoadString::LoadKind load_kind = load->GetLoadKind();
+
+  switch (load_kind) {
+    case HLoadString::LoadKind::kBootImageLinkTimeAddress: {
+      TODO_VIXL32(FATAL);
+      break;
+    }
+    case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
+      DCHECK(codegen_->GetCompilerOptions().IsBootImage());
+      CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
+          codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex().index_);
+      codegen_->EmitMovwMovtPlaceholder(labels, out);
+      return;  // No dex cache slow path.
+    }
+    case HLoadString::LoadKind::kBootImageAddress: {
+      TODO_VIXL32(FATAL);
+      break;
+    }
+    case HLoadString::LoadKind::kBssEntry: {
+      DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
+      vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
+      CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
+          codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex().index_);
+      codegen_->EmitMovwMovtPlaceholder(labels, temp);
+      GenerateGcRootFieldLoad(load, out_loc, temp, /* offset */ 0, kCompilerReadBarrierOption);
+      LoadStringSlowPathARMVIXL* slow_path =
+          new (GetGraph()->GetArena()) LoadStringSlowPathARMVIXL(load);
+      codegen_->AddSlowPath(slow_path);
+      __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel());
+      __ Bind(slow_path->GetExitLabel());
+      return;
+    }
+    case HLoadString::LoadKind::kJitTableAddress: {
+      TODO_VIXL32(FATAL);
+      break;
+    }
+    default:
+      break;
+  }
 
   // TODO: Re-add the compiler code to do string dex cache lookup again.
   DCHECK_EQ(load->GetLoadKind(), HLoadString::LoadKind::kDexCacheViaMethod);
   InvokeRuntimeCallingConventionARMVIXL calling_convention;
-  __ Mov(calling_convention.GetRegisterAt(0), load->GetStringIndex());
+  __ Mov(calling_convention.GetRegisterAt(0), load->GetStringIndex().index_);
   codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
   CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
 }
@@ -5200,14 +5570,27 @@
   CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
 }
 
-static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) {
-  return kEmitCompilerReadBarrier &&
-      (kUseBakerReadBarrier ||
-       type_check_kind == TypeCheckKind::kAbstractClassCheck ||
-       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
-       type_check_kind == TypeCheckKind::kArrayObjectCheck);
+// Temp is used for read barrier.
+static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
+  if (kEmitCompilerReadBarrier &&
+       (kUseBakerReadBarrier ||
+          type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+          type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+          type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
+    return 1;
+  }
+  return 0;
 }
 
+// Interface case has 3 temps, one for holding the number of interfaces, one for the current
+// interface pointer, one for loading the current interface.
+// The other checks have one temp for loading the object's class.
+static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) {
+  if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
+    return 3;
+  }
+  return 1 + NumberOfInstanceOfTemps(type_check_kind);
+}
 
 void LocationsBuilderARMVIXL::VisitInstanceOf(HInstanceOf* instruction) {
   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
@@ -5238,11 +5621,7 @@
   // The "out" register is used as a temporary, so it overlaps with the inputs.
   // Note that TypeCheckSlowPathARM uses this register too.
   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
-  // When read barriers are enabled, we need a temporary register for
-  // some cases.
-  if (TypeCheckNeedsATemporary(type_check_kind)) {
-    locations->AddTemp(Location::RequiresRegister());
-  }
+  locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind));
 }
 
 void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) {
@@ -5253,9 +5632,9 @@
   vixl32::Register cls = InputRegisterAt(instruction, 1);
   Location out_loc = locations->Out();
   vixl32::Register out = OutputRegister(instruction);
-  Location maybe_temp_loc = TypeCheckNeedsATemporary(type_check_kind) ?
-      locations->GetTemp(0) :
-      Location::NoLocation();
+  const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
+  DCHECK_LE(num_temps, 1u);
+  Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
@@ -5276,7 +5655,8 @@
                                         out_loc,
                                         obj_loc,
                                         class_offset,
-                                        maybe_temp_loc);
+                                        maybe_temp_loc,
+                                        kCompilerReadBarrierOption);
       __ Cmp(out, cls);
       // Classes must be equal for the instanceof to succeed.
       __ B(ne, &zero);
@@ -5291,13 +5671,18 @@
                                         out_loc,
                                         obj_loc,
                                         class_offset,
-                                        maybe_temp_loc);
+                                        maybe_temp_loc,
+                                        kCompilerReadBarrierOption);
       // If the class is abstract, we eagerly fetch the super class of the
       // object to avoid doing a comparison we know will fail.
       vixl32::Label loop;
       __ Bind(&loop);
       // /* HeapReference<Class> */ out = out->super_class_
-      GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, maybe_temp_loc);
+      GenerateReferenceLoadOneRegister(instruction,
+                                       out_loc,
+                                       super_offset,
+                                       maybe_temp_loc,
+                                       kCompilerReadBarrierOption);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ CompareAndBranchIfZero(out, &done, /* far_target */ false);
       __ Cmp(out, cls);
@@ -5315,14 +5700,19 @@
                                         out_loc,
                                         obj_loc,
                                         class_offset,
-                                        maybe_temp_loc);
+                                        maybe_temp_loc,
+                                        kCompilerReadBarrierOption);
       // Walk over the class hierarchy to find a match.
       vixl32::Label loop, success;
       __ Bind(&loop);
       __ Cmp(out, cls);
       __ B(eq, &success);
       // /* HeapReference<Class> */ out = out->super_class_
-      GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, maybe_temp_loc);
+      GenerateReferenceLoadOneRegister(instruction,
+                                       out_loc,
+                                       super_offset,
+                                       maybe_temp_loc,
+                                       kCompilerReadBarrierOption);
       __ CompareAndBranchIfNonZero(out, &loop);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ B(&done);
@@ -5340,14 +5730,19 @@
                                         out_loc,
                                         obj_loc,
                                         class_offset,
-                                        maybe_temp_loc);
+                                        maybe_temp_loc,
+                                        kCompilerReadBarrierOption);
       // Do an exact check.
       vixl32::Label exact_check;
       __ Cmp(out, cls);
       __ B(eq, &exact_check);
       // Otherwise, we need to check that the object's class is a non-primitive array.
       // /* HeapReference<Class> */ out = out->component_type_
-      GenerateReferenceLoadOneRegister(instruction, out_loc, component_offset, maybe_temp_loc);
+      GenerateReferenceLoadOneRegister(instruction,
+                                       out_loc,
+                                       component_offset,
+                                       maybe_temp_loc,
+                                       kCompilerReadBarrierOption);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ CompareAndBranchIfZero(out, &done, /* far_target */ false);
       GetAssembler()->LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset);
@@ -5360,12 +5755,14 @@
     }
 
     case TypeCheckKind::kArrayCheck: {
+      // No read barrier since the slow path will retry upon failure.
       // /* HeapReference<Class> */ out = obj->klass_
       GenerateReferenceLoadTwoRegisters(instruction,
                                         out_loc,
                                         obj_loc,
                                         class_offset,
-                                        maybe_temp_loc);
+                                        maybe_temp_loc,
+                                        kWithoutReadBarrier);
       __ Cmp(out, cls);
       DCHECK(locations->OnlyCallsOnSlowPath());
       slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARMVIXL(instruction,
@@ -5449,13 +5846,7 @@
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RequiresRegister());
-  // Note that TypeCheckSlowPathARM uses this "temp" register too.
-  locations->AddTemp(Location::RequiresRegister());
-  // When read barriers are enabled, we need an additional temporary
-  // register for some cases.
-  if (TypeCheckNeedsATemporary(type_check_kind)) {
-    locations->AddTemp(Location::RequiresRegister());
-  }
+  locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
 }
 
 void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) {
@@ -5466,20 +5857,31 @@
   vixl32::Register cls = InputRegisterAt(instruction, 1);
   Location temp_loc = locations->GetTemp(0);
   vixl32::Register temp = RegisterFrom(temp_loc);
-  Location maybe_temp2_loc = TypeCheckNeedsATemporary(type_check_kind) ?
-      locations->GetTemp(1) :
-      Location::NoLocation();
-  uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
-  uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
-  uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
-  uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+  const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
+  DCHECK_LE(num_temps, 3u);
+  Location maybe_temp2_loc = (num_temps >= 2) ? locations->GetTemp(1) : Location::NoLocation();
+  Location maybe_temp3_loc = (num_temps >= 3) ? locations->GetTemp(2) : Location::NoLocation();
+  const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+  const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
+  const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
+  const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+  const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
+  const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
+  const uint32_t object_array_data_offset =
+      mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
 
-  bool is_type_check_slow_path_fatal =
-      (type_check_kind == TypeCheckKind::kExactCheck ||
-       type_check_kind == TypeCheckKind::kAbstractClassCheck ||
-       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
-       type_check_kind == TypeCheckKind::kArrayObjectCheck) &&
-      !instruction->CanThrowIntoCatchBlock();
+  // Always false for read barriers since we may need to go to the entrypoint for non-fatal cases
+  // from false negatives. The false negatives may come from avoiding read barriers below. Avoiding
+  // read barriers is done for performance and code size reasons.
+  bool is_type_check_slow_path_fatal = false;
+  if (!kEmitCompilerReadBarrier) {
+    is_type_check_slow_path_fatal =
+        (type_check_kind == TypeCheckKind::kExactCheck ||
+         type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+         type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+         type_check_kind == TypeCheckKind::kArrayObjectCheck) &&
+        !instruction->CanThrowIntoCatchBlock();
+  }
   SlowPathCodeARMVIXL* type_check_slow_path =
       new (GetGraph()->GetArena()) TypeCheckSlowPathARMVIXL(instruction,
                                                             is_type_check_slow_path_fatal);
@@ -5491,12 +5893,17 @@
     __ CompareAndBranchIfZero(obj, &done, /* far_target */ false);
   }
 
-  // /* HeapReference<Class> */ temp = obj->klass_
-  GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
-
   switch (type_check_kind) {
     case TypeCheckKind::kExactCheck:
     case TypeCheckKind::kArrayCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
+
       __ Cmp(temp, cls);
       // Jump to slow path for throwing the exception or doing a
       // more involved array check.
@@ -5505,12 +5912,24 @@
     }
 
     case TypeCheckKind::kAbstractClassCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
+
       // If the class is abstract, we eagerly fetch the super class of the
       // object to avoid doing a comparison we know will fail.
       vixl32::Label loop;
       __ Bind(&loop);
       // /* HeapReference<Class> */ temp = temp->super_class_
-      GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, maybe_temp2_loc);
+      GenerateReferenceLoadOneRegister(instruction,
+                                       temp_loc,
+                                       super_offset,
+                                       maybe_temp2_loc,
+                                       kWithoutReadBarrier);
 
       // If the class reference currently in `temp` is null, jump to the slow path to throw the
       // exception.
@@ -5523,6 +5942,14 @@
     }
 
     case TypeCheckKind::kClassHierarchyCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
+
       // Walk over the class hierarchy to find a match.
       vixl32::Label loop;
       __ Bind(&loop);
@@ -5530,7 +5957,11 @@
       __ B(eq, &done);
 
       // /* HeapReference<Class> */ temp = temp->super_class_
-      GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, maybe_temp2_loc);
+      GenerateReferenceLoadOneRegister(instruction,
+                                       temp_loc,
+                                       super_offset,
+                                       maybe_temp2_loc,
+                                       kWithoutReadBarrier);
 
       // If the class reference currently in `temp` is null, jump to the slow path to throw the
       // exception.
@@ -5541,13 +5972,25 @@
     }
 
     case TypeCheckKind::kArrayObjectCheck:  {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
+
       // Do an exact check.
       __ Cmp(temp, cls);
       __ B(eq, &done);
 
       // Otherwise, we need to check that the object's class is a non-primitive array.
       // /* HeapReference<Class> */ temp = temp->component_type_
-      GenerateReferenceLoadOneRegister(instruction, temp_loc, component_offset, maybe_temp2_loc);
+      GenerateReferenceLoadOneRegister(instruction,
+                                       temp_loc,
+                                       component_offset,
+                                       maybe_temp2_loc,
+                                       kWithoutReadBarrier);
       // If the component type is null, jump to the slow path to throw the exception.
       __ CompareAndBranchIfZero(temp, type_check_slow_path->GetEntryLabel());
       // Otherwise,the object is indeed an array, jump to label `check_non_primitive_component_type`
@@ -5559,10 +6002,7 @@
     }
 
     case TypeCheckKind::kUnresolvedCheck:
-    case TypeCheckKind::kInterfaceCheck:
-      // We always go into the type check slow path for the unresolved
-      // and interface check cases.
-      //
+      // We always go into the type check slow path for the unresolved check case.
       // We cannot directly call the CheckCast runtime entry point
       // without resorting to a type checking slow path here (i.e. by
       // calling InvokeRuntime directly), as it would require to
@@ -5570,8 +6010,45 @@
       // instruction (following the runtime calling convention), which
       // might be cluttered by the potential first read barrier
       // emission at the beginning of this method.
+
       __ B(type_check_slow_path->GetEntryLabel());
       break;
+
+    case TypeCheckKind::kInterfaceCheck: {
+      // Avoid read barriers to improve performance of the fast path. We can not get false
+      // positives by doing this.
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
+
+      // /* HeapReference<Class> */ temp = temp->iftable_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        temp_loc,
+                                        iftable_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
+      // Iftable is never null.
+      __ Ldr(RegisterFrom(maybe_temp2_loc), MemOperand(temp, array_length_offset));
+      // Loop through the iftable and check if any class matches.
+      vixl32::Label start_loop;
+      __ Bind(&start_loop);
+      __ CompareAndBranchIfZero(RegisterFrom(maybe_temp2_loc),
+                                type_check_slow_path->GetEntryLabel());
+      __ Ldr(RegisterFrom(maybe_temp3_loc), MemOperand(temp, object_array_data_offset));
+      GetAssembler()->MaybeUnpoisonHeapReference(RegisterFrom(maybe_temp3_loc));
+      // Go to next interface.
+      __ Add(temp, temp, Operand::From(2 * kHeapReferenceSize));
+      __ Sub(RegisterFrom(maybe_temp2_loc), RegisterFrom(maybe_temp2_loc), 2);
+      // Compare the classes and continue the loop if they do not match.
+      __ Cmp(cls, RegisterFrom(maybe_temp3_loc));
+      __ B(ne, &start_loop);
+      break;
+    }
   }
   __ Bind(&done);
 
@@ -5862,7 +6339,8 @@
     HInstruction* instruction ATTRIBUTE_UNUSED,
     Location out,
     uint32_t offset,
-    Location maybe_temp ATTRIBUTE_UNUSED) {
+    Location maybe_temp ATTRIBUTE_UNUSED,
+    ReadBarrierOption read_barrier_option ATTRIBUTE_UNUSED) {
   vixl32::Register out_reg = RegisterFrom(out);
   if (kEmitCompilerReadBarrier) {
     TODO_VIXL32(FATAL);
@@ -5879,7 +6357,8 @@
     Location out,
     Location obj,
     uint32_t offset,
-    Location maybe_temp ATTRIBUTE_UNUSED) {
+    Location maybe_temp ATTRIBUTE_UNUSED,
+    ReadBarrierOption read_barrier_option ATTRIBUTE_UNUSED) {
   vixl32::Register out_reg = RegisterFrom(out);
   vixl32::Register obj_reg = RegisterFrom(obj);
   if (kEmitCompilerReadBarrier) {
@@ -5897,9 +6376,9 @@
     Location root,
     vixl32::Register obj,
     uint32_t offset,
-    bool requires_read_barrier) {
+    ReadBarrierOption read_barrier_option) {
   vixl32::Register root_reg = RegisterFrom(root);
-  if (requires_read_barrier) {
+  if (read_barrier_option == kWithReadBarrier) {
     TODO_VIXL32(FATAL);
   } else {
     // Plain GC root load with no read barrier.
@@ -5960,15 +6439,51 @@
 // Check if the desired_dispatch_info is supported. If it is, return it,
 // otherwise return a fall-back info that should be used instead.
 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARMVIXL::GetSupportedInvokeStaticOrDirectDispatch(
-      const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info ATTRIBUTE_UNUSED,
-      HInvokeStaticOrDirect* invoke ATTRIBUTE_UNUSED) {
+    const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
+    HInvokeStaticOrDirect* invoke) {
   // TODO(VIXL): Implement optimized code paths.
-  return {
-    HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod,
-    HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod,
-    0u,
-    0u
-  };
+  if (desired_dispatch_info.method_load_kind ==
+          HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup ||
+      desired_dispatch_info.code_ptr_location ==
+          HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup) {
+    return {
+      HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod,
+      HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod,
+      0u,
+      0u
+    };
+  }
+
+  HInvokeStaticOrDirect::DispatchInfo dispatch_info = desired_dispatch_info;
+  // We disable pc-relative load when there is an irreducible loop, as the optimization
+  // is incompatible with it.
+  // TODO: Create as many ArmDexCacheArraysBase instructions as needed for methods
+  // with irreducible loops.
+  if (GetGraph()->HasIrreducibleLoops() &&
+      (dispatch_info.method_load_kind ==
+          HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative)) {
+    dispatch_info.method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod;
+  }
+
+  if (dispatch_info.code_ptr_location == HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative) {
+    const DexFile& outer_dex_file = GetGraph()->GetDexFile();
+    if (&outer_dex_file != invoke->GetTargetMethod().dex_file) {
+      // Calls across dex files are more likely to exceed the available BL range,
+      // so use absolute patch with fixup if available and kCallArtMethod otherwise.
+      HInvokeStaticOrDirect::CodePtrLocation code_ptr_location =
+          (desired_dispatch_info.method_load_kind ==
+           HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup)
+          ? HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup
+          : HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod;
+      return HInvokeStaticOrDirect::DispatchInfo {
+        dispatch_info.method_load_kind,
+        code_ptr_location,
+        dispatch_info.method_load_data,
+        0u
+      };
+    }
+  }
+  return dispatch_info;
 }
 
 vixl32::Register CodeGeneratorARMVIXL::GetInvokeStaticOrDirectExtraParameter(
@@ -5999,59 +6514,119 @@
 
 void CodeGeneratorARMVIXL::GenerateStaticOrDirectCall(
     HInvokeStaticOrDirect* invoke, Location temp) {
-  Location callee_method = temp;  // For all kinds except kRecursive, callee will be in temp.
-  vixl32::Register temp_reg = RegisterFrom(temp);
+  // For better instruction scheduling we load the direct code pointer before the method pointer.
+  switch (invoke->GetCodePtrLocation()) {
+    case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup:
+      // LR = code address from literal pool with link-time patch.
+      TODO_VIXL32(FATAL);
+      break;
+    case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect:
+      // LR = invoke->GetDirectCodePtr();
+      __ Mov(lr, Operand::From(invoke->GetDirectCodePtr()));
+      break;
+    default:
+      break;
+  }
 
+  Location callee_method = temp;  // For all kinds except kRecursive, callee will be in temp.
   switch (invoke->GetMethodLoadKind()) {
     case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: {
       uint32_t offset =
           GetThreadOffset<kArmPointerSize>(invoke->GetStringInitEntryPoint()).Int32Value();
       // temp = thread->string_init_entrypoint
-      GetAssembler()->LoadFromOffset(kLoadWord, temp_reg, tr, offset);
+      GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(temp), tr, offset);
+      break;
+    }
+    case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
+      callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
+      break;
+    case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
+      __ Mov(RegisterFrom(temp), Operand::From(invoke->GetMethodAddress()));
+      break;
+    case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup:
+      TODO_VIXL32(FATAL);
+      break;
+    case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: {
+      HArmDexCacheArraysBase* base =
+          invoke->InputAt(invoke->GetSpecialInputIndex())->AsArmDexCacheArraysBase();
+      vixl32::Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke, RegisterFrom(temp));
+      int32_t offset = invoke->GetDexCacheArrayOffset() - base->GetElementOffset();
+      GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(temp), base_reg, offset);
       break;
     }
     case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: {
       Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
       vixl32::Register method_reg;
+      vixl32::Register reg = RegisterFrom(temp);
       if (current_method.IsRegister()) {
         method_reg = RegisterFrom(current_method);
       } else {
         DCHECK(invoke->GetLocations()->Intrinsified());
         DCHECK(!current_method.IsValid());
-        method_reg = temp_reg;
-        GetAssembler()->LoadFromOffset(kLoadWord, temp_reg, sp, kCurrentMethodStackOffset);
+        method_reg = reg;
+        GetAssembler()->LoadFromOffset(kLoadWord, reg, sp, kCurrentMethodStackOffset);
       }
       // /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_;
       GetAssembler()->LoadFromOffset(
           kLoadWord,
-          temp_reg,
+          reg,
           method_reg,
           ArtMethod::DexCacheResolvedMethodsOffset(kArmPointerSize).Int32Value());
       // temp = temp[index_in_cache];
       // Note: Don't use invoke->GetTargetMethod() as it may point to a different dex file.
       uint32_t index_in_cache = invoke->GetDexMethodIndex();
       GetAssembler()->LoadFromOffset(
-          kLoadWord, temp_reg, temp_reg, CodeGenerator::GetCachePointerOffset(index_in_cache));
+          kLoadWord, reg, reg, CodeGenerator::GetCachePointerOffset(index_in_cache));
       break;
     }
-    default:
-      TODO_VIXL32(FATAL);
   }
 
-  // TODO(VIXL): Support `CodePtrLocation` values other than `kCallArtMethod`.
-  if (invoke->GetCodePtrLocation() != HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod) {
-    TODO_VIXL32(FATAL);
+  switch (invoke->GetCodePtrLocation()) {
+    case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
+      __ Bl(GetFrameEntryLabel());
+      break;
+    case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative:
+      relative_call_patches_.emplace_back(*invoke->GetTargetMethod().dex_file,
+                                          invoke->GetTargetMethod().dex_method_index);
+      {
+        AssemblerAccurateScope aas(GetVIXLAssembler(),
+                                   vixl32::kMaxInstructionSizeInBytes,
+                                   CodeBufferCheckScope::kMaximumSize);
+        __ bind(&relative_call_patches_.back().label);
+        // Arbitrarily branch to the BL itself, override at link time.
+        __ bl(&relative_call_patches_.back().label);
+      }
+      break;
+    case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup:
+    case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect:
+      // LR prepared above for better instruction scheduling.
+      // LR()
+      {
+        // blx in T32 has only 16bit encoding that's why a stricter check for the scope is used.
+        AssemblerAccurateScope aas(GetVIXLAssembler(),
+                                   vixl32::k16BitT32InstructionSizeInBytes,
+                                   CodeBufferCheckScope::kExactSize);
+        __ blx(lr);
+      }
+      break;
+    case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod:
+      // LR = callee_method->entry_point_from_quick_compiled_code_
+      GetAssembler()->LoadFromOffset(
+            kLoadWord,
+            lr,
+            RegisterFrom(callee_method),
+            ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize).Int32Value());
+      {
+        // blx in T32 has only 16bit encoding that's why a stricter check for the scope is used.
+        AssemblerAccurateScope aas(GetVIXLAssembler(),
+                                   vixl32::k16BitT32InstructionSizeInBytes,
+                                   CodeBufferCheckScope::kExactSize);
+        // LR()
+        __ blx(lr);
+      }
+      break;
   }
 
-  // LR = callee_method->entry_point_from_quick_compiled_code_
-  GetAssembler()->LoadFromOffset(
-      kLoadWord,
-      lr,
-      RegisterFrom(callee_method),
-      ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize).Int32Value());
-  // LR()
-  __ Blx(lr);
-
   DCHECK(!IsLeafMethod());
 }
 
@@ -6067,9 +6642,15 @@
   InvokeDexCallingConventionARMVIXL calling_convention;
   vixl32::Register receiver = calling_convention.GetRegisterAt(0);
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
-  // /* HeapReference<Class> */ temp = receiver->klass_
-  GetAssembler()->LoadFromOffset(kLoadWord, temp, receiver, class_offset);
-  MaybeRecordImplicitNullCheck(invoke);
+  {
+    // Make sure the pc is recorded immediately after the `ldr` instruction.
+    AssemblerAccurateScope aas(GetVIXLAssembler(),
+                               vixl32::kMaxInstructionSizeInBytes,
+                               CodeBufferCheckScope::kMaximumSize);
+    // /* HeapReference<Class> */ temp = receiver->klass_
+    __ ldr(temp, MemOperand(receiver, class_offset));
+    MaybeRecordImplicitNullCheck(invoke);
+  }
   // Instead of simply (possibly) unpoisoning `temp` here, we should
   // emit a read barrier for the previous class reference load.
   // However this is not required in practice, as this is an
@@ -6086,7 +6667,81 @@
   // LR = temp->GetEntryPoint();
   GetAssembler()->LoadFromOffset(kLoadWord, lr, temp, entry_point);
   // LR();
-  __ Blx(lr);
+  // This `blx` *must* be the *last* instruction generated by this stub, so that calls to
+  // `RecordPcInfo()` immediately following record the correct pc. Use a scope to help guarantee
+  // that.
+  // blx in T32 has only 16bit encoding that's why a stricter check for the scope is used.
+  AssemblerAccurateScope aas(GetVIXLAssembler(),
+                             vixl32::k16BitT32InstructionSizeInBytes,
+                             CodeBufferCheckScope::kExactSize);
+  __ blx(lr);
+}
+
+CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativeStringPatch(
+    const DexFile& dex_file, uint32_t string_index) {
+  return NewPcRelativePatch(dex_file, string_index, &pc_relative_string_patches_);
+}
+
+CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativeTypePatch(
+    const DexFile& dex_file, dex::TypeIndex type_index) {
+  return NewPcRelativePatch(dex_file, type_index.index_, &pc_relative_type_patches_);
+}
+
+CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativeDexCacheArrayPatch(
+    const DexFile& dex_file, uint32_t element_offset) {
+  return NewPcRelativePatch(dex_file, element_offset, &pc_relative_dex_cache_patches_);
+}
+
+CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativePatch(
+    const DexFile& dex_file, uint32_t offset_or_index, ArenaDeque<PcRelativePatchInfo>* patches) {
+  patches->emplace_back(dex_file, offset_or_index);
+  return &patches->back();
+}
+
+template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
+inline void CodeGeneratorARMVIXL::EmitPcRelativeLinkerPatches(
+    const ArenaDeque<PcRelativePatchInfo>& infos,
+    ArenaVector<LinkerPatch>* linker_patches) {
+  for (const PcRelativePatchInfo& info : infos) {
+    const DexFile& dex_file = info.target_dex_file;
+    size_t offset_or_index = info.offset_or_index;
+    DCHECK(info.add_pc_label.IsBound());
+    uint32_t add_pc_offset = dchecked_integral_cast<uint32_t>(info.add_pc_label.GetLocation());
+    // Add MOVW patch.
+    DCHECK(info.movw_label.IsBound());
+    uint32_t movw_offset = dchecked_integral_cast<uint32_t>(info.movw_label.GetLocation());
+    linker_patches->push_back(Factory(movw_offset, &dex_file, add_pc_offset, offset_or_index));
+    // Add MOVT patch.
+    DCHECK(info.movt_label.IsBound());
+    uint32_t movt_offset = dchecked_integral_cast<uint32_t>(info.movt_label.GetLocation());
+    linker_patches->push_back(Factory(movt_offset, &dex_file, add_pc_offset, offset_or_index));
+  }
+}
+
+void CodeGeneratorARMVIXL::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) {
+  DCHECK(linker_patches->empty());
+  size_t size =
+      relative_call_patches_.size() +
+      /* MOVW+MOVT for each entry */ 2u * pc_relative_dex_cache_patches_.size() +
+      /* MOVW+MOVT for each entry */ 2u * pc_relative_string_patches_.size() +
+      /* MOVW+MOVT for each entry */ 2u * pc_relative_type_patches_.size();
+  linker_patches->reserve(size);
+  for (const PatchInfo<vixl32::Label>& info : relative_call_patches_) {
+    uint32_t literal_offset = info.label.GetLocation();
+    linker_patches->push_back(
+        LinkerPatch::RelativeCodePatch(literal_offset, &info.dex_file, info.index));
+  }
+  EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_,
+                                                               linker_patches);
+  if (!GetCompilerOptions().IsBootImage()) {
+    EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_,
+                                                                  linker_patches);
+  } else {
+    EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(pc_relative_string_patches_,
+                                                                  linker_patches);
+  }
+  EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(pc_relative_type_patches_,
+                                                              linker_patches);
 }
 
 void LocationsBuilderARMVIXL::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
@@ -6213,6 +6868,17 @@
     jump_table->EmitTable(codegen_);
   }
 }
+void LocationsBuilderARMVIXL::VisitArmDexCacheArraysBase(HArmDexCacheArraysBase* base) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(base);
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitArmDexCacheArraysBase(HArmDexCacheArraysBase* base) {
+  vixl32::Register base_reg = OutputRegister(base);
+  CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
+      codegen_->NewPcRelativeDexCacheArrayPatch(base->GetDexFile(), base->GetElementOffset());
+  codegen_->EmitMovwMovtPlaceholder(labels, base_reg);
+}
 
 // Copy the result of a call into the given target.
 void CodeGeneratorARMVIXL::MoveFromReturnRegister(Location trg, Primitive::Type type) {
@@ -6223,7 +6889,7 @@
 
   DCHECK_NE(type, Primitive::kPrimVoid);
 
-  Location return_loc = InvokeDexCallingConventionVisitorARM().GetReturnLocation(type);
+  Location return_loc = InvokeDexCallingConventionVisitorARMVIXL().GetReturnLocation(type);
   if (return_loc.Equals(trg)) {
     return;
   }
@@ -6271,6 +6937,21 @@
   }
 }
 
+void CodeGeneratorARMVIXL::EmitMovwMovtPlaceholder(
+    CodeGeneratorARMVIXL::PcRelativePatchInfo* labels,
+    vixl32::Register out) {
+  AssemblerAccurateScope aas(GetVIXLAssembler(),
+                             3 * vixl32::kMaxInstructionSizeInBytes,
+                             CodeBufferCheckScope::kMaximumSize);
+  // TODO(VIXL): Think about using mov instead of movw.
+  __ bind(&labels->movw_label);
+  __ movw(out, /* placeholder */ 0u);
+  __ bind(&labels->movt_label);
+  __ movt(out, /* placeholder */ 0u);
+  __ bind(&labels->add_pc_label);
+  __ add(out, out, pc);
+}
+
 #undef __
 #undef QUICK_ENTRY_POINT
 #undef TODO_VIXL32
diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h
index 89fef43..b7ba8dd 100644
--- a/compiler/optimizing/code_generator_arm_vixl.h
+++ b/compiler/optimizing/code_generator_arm_vixl.h
@@ -17,9 +17,15 @@
 #ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_ARM_VIXL_H_
 #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_ARM_VIXL_H_
 
-#include "code_generator_arm.h"
+#include "base/enums.h"
+#include "code_generator.h"
 #include "common_arm.h"
+#include "driver/compiler_options.h"
+#include "nodes.h"
+#include "string_reference.h"
+#include "parallel_move_resolver.h"
 #include "utils/arm/assembler_arm_vixl.h"
+#include "utils/type_reference.h"
 
 // TODO(VIXL): make vixl clean wrt -Wshadow.
 #pragma GCC diagnostic push
@@ -44,7 +50,7 @@
     vixl::aarch32::r2,
     vixl::aarch32::r3
 };
-static const size_t kParameterCoreRegistersLengthVIXL = arraysize(kParameterCoreRegisters);
+static const size_t kParameterCoreRegistersLengthVIXL = arraysize(kParameterCoreRegistersVIXL);
 static const vixl::aarch32::SRegister kParameterFpuRegistersVIXL[] = {
     vixl::aarch32::s0,
     vixl::aarch32::s1,
@@ -63,7 +69,7 @@
     vixl::aarch32::s14,
     vixl::aarch32::s15
 };
-static const size_t kParameterFpuRegistersLengthVIXL = arraysize(kParameterFpuRegisters);
+static const size_t kParameterFpuRegistersLengthVIXL = arraysize(kParameterFpuRegistersVIXL);
 
 static const vixl::aarch32::Register kMethodRegister = vixl::aarch32::r0;
 
@@ -90,7 +96,7 @@
     vixl::aarch32::r3
 };
 static const size_t kRuntimeParameterCoreRegistersLengthVIXL =
-    arraysize(kRuntimeParameterCoreRegisters);
+    arraysize(kRuntimeParameterCoreRegistersVIXL);
 static const vixl::aarch32::SRegister kRuntimeParameterFpuRegistersVIXL[] = {
     vixl::aarch32::s0,
     vixl::aarch32::s1,
@@ -98,98 +104,10 @@
     vixl::aarch32::s3
 };
 static const size_t kRuntimeParameterFpuRegistersLengthVIXL =
-    arraysize(kRuntimeParameterFpuRegisters);
+    arraysize(kRuntimeParameterFpuRegistersVIXL);
 
 class LoadClassSlowPathARMVIXL;
 
-#define FOR_EACH_IMPLEMENTED_INSTRUCTION(M)     \
-  M(Above)                                      \
-  M(AboveOrEqual)                               \
-  M(Add)                                        \
-  M(And)                                        \
-  M(ArrayGet)                                   \
-  M(ArrayLength)                                \
-  M(ArraySet)                                   \
-  M(Below)                                      \
-  M(BelowOrEqual)                               \
-  M(BitwiseNegatedRight)                        \
-  M(BooleanNot)                                 \
-  M(BoundsCheck)                                \
-  M(BoundType)                                  \
-  M(CheckCast)                                  \
-  M(ClassTableGet)                              \
-  M(ClearException)                             \
-  M(ClinitCheck)                                \
-  M(Compare)                                    \
-  M(CurrentMethod)                              \
-  M(Deoptimize)                                 \
-  M(Div)                                        \
-  M(DivZeroCheck)                               \
-  M(DoubleConstant)                             \
-  M(Equal)                                      \
-  M(Exit)                                       \
-  M(FloatConstant)                              \
-  M(Goto)                                       \
-  M(GreaterThan)                                \
-  M(GreaterThanOrEqual)                         \
-  M(If)                                         \
-  M(InstanceFieldGet)                           \
-  M(InstanceFieldSet)                           \
-  M(InstanceOf)                                 \
-  M(IntConstant)                                \
-  M(IntermediateAddress)                        \
-  M(InvokeInterface)                            \
-  M(InvokeStaticOrDirect)                       \
-  M(InvokeUnresolved)                           \
-  M(InvokeVirtual)                              \
-  M(LessThan)                                   \
-  M(LessThanOrEqual)                            \
-  M(LoadClass)                                  \
-  M(LoadException)                              \
-  M(LoadString)                                 \
-  M(LongConstant)                               \
-  M(MemoryBarrier)                              \
-  M(MonitorOperation)                           \
-  M(Mul)                                        \
-  M(MultiplyAccumulate)                         \
-  M(NativeDebugInfo)                            \
-  M(Neg)                                        \
-  M(NewArray)                                   \
-  M(NewInstance)                                \
-  M(Not)                                        \
-  M(NotEqual)                                   \
-  M(NullCheck)                                  \
-  M(NullConstant)                               \
-  M(Or)                                         \
-  M(PackedSwitch)                               \
-  M(ParallelMove)                               \
-  M(ParameterValue)                             \
-  M(Phi)                                        \
-  M(Rem)                                        \
-  M(Return)                                     \
-  M(ReturnVoid)                                 \
-  M(Ror)                                        \
-  M(Select)                                     \
-  M(Shl)                                        \
-  M(Shr)                                        \
-  M(StaticFieldGet)                             \
-  M(StaticFieldSet)                             \
-  M(Sub)                                        \
-  M(SuspendCheck)                               \
-  M(Throw)                                      \
-  M(TryBoundary)                                \
-  M(TypeConversion)                             \
-  M(UnresolvedInstanceFieldGet)                 \
-  M(UnresolvedInstanceFieldSet)                 \
-  M(UnresolvedStaticFieldGet)                   \
-  M(UnresolvedStaticFieldSet)                   \
-  M(UShr)                                       \
-  M(Xor)                                        \
-
-// TODO: Remove once the VIXL32 backend is implemented completely.
-#define FOR_EACH_UNIMPLEMENTED_INSTRUCTION(M)   \
-  M(ArmDexCacheArraysBase)                      \
-
 class CodeGeneratorARMVIXL;
 
 class JumpTableARMVIXL : public DeletableArenaObject<kArenaAllocSwitchTable> {
@@ -248,6 +166,22 @@
   DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionARMVIXL);
 };
 
+class InvokeDexCallingConventionVisitorARMVIXL : public InvokeDexCallingConventionVisitor {
+ public:
+  InvokeDexCallingConventionVisitorARMVIXL() {}
+  virtual ~InvokeDexCallingConventionVisitorARMVIXL() {}
+
+  Location GetNextLocation(Primitive::Type type) OVERRIDE;
+  Location GetReturnLocation(Primitive::Type type) const OVERRIDE;
+  Location GetMethodLocation() const OVERRIDE;
+
+ private:
+  InvokeDexCallingConventionARMVIXL calling_convention;
+  uint32_t double_index_ = 0;
+
+  DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitorARMVIXL);
+};
+
 class FieldAccessCallingConventionARMVIXL : public FieldAccessCallingConvention {
  public:
   FieldAccessCallingConventionARMVIXL() {}
@@ -319,27 +253,26 @@
   DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolverARMVIXL);
 };
 
-#define DEFINE_IMPLEMENTED_INSTRUCTION_VISITOR(Name)            \
-  void Visit##Name(H##Name*) OVERRIDE;
-
-#define DEFINE_UNIMPLEMENTED_INSTRUCTION_VISITOR(Name)          \
-  void Visit##Name(H##Name* instr) OVERRIDE {                   \
-    VisitUnimplemementedInstruction(instr); }
-
 class LocationsBuilderARMVIXL : public HGraphVisitor {
  public:
   LocationsBuilderARMVIXL(HGraph* graph, CodeGeneratorARMVIXL* codegen)
       : HGraphVisitor(graph), codegen_(codegen) {}
 
-  FOR_EACH_IMPLEMENTED_INSTRUCTION(DEFINE_IMPLEMENTED_INSTRUCTION_VISITOR)
+#define DECLARE_VISIT_INSTRUCTION(name, super)     \
+  void Visit##name(H##name* instr) OVERRIDE;
 
-  FOR_EACH_UNIMPLEMENTED_INSTRUCTION(DEFINE_UNIMPLEMENTED_INSTRUCTION_VISITOR)
+  FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION)
+  FOR_EACH_CONCRETE_INSTRUCTION_ARM(DECLARE_VISIT_INSTRUCTION)
+  FOR_EACH_CONCRETE_INSTRUCTION_SHARED(DECLARE_VISIT_INSTRUCTION)
 
- private:
-  void VisitUnimplemementedInstruction(HInstruction* instruction) {
-    LOG(FATAL) << "Unimplemented Instruction: " << instruction->DebugName();
+#undef DECLARE_VISIT_INSTRUCTION
+
+  void VisitInstruction(HInstruction* instruction) OVERRIDE {
+    LOG(FATAL) << "Unreachable instruction " << instruction->DebugName()
+               << " (id " << instruction->GetId() << ")";
   }
 
+ private:
   void HandleInvoke(HInvoke* invoke);
   void HandleBitwiseOperation(HBinaryOperation* operation, Opcode opcode);
   void HandleCondition(HCondition* condition);
@@ -355,7 +288,7 @@
   bool CanEncodeConstantAsImmediate(uint32_t value, Opcode opcode, SetCc set_cc = kCcDontCare);
 
   CodeGeneratorARMVIXL* const codegen_;
-  InvokeDexCallingConventionVisitorARM parameter_visitor_;
+  InvokeDexCallingConventionVisitorARMVIXL parameter_visitor_;
 
   DISALLOW_COPY_AND_ASSIGN(LocationsBuilderARMVIXL);
 };
@@ -364,25 +297,30 @@
  public:
   InstructionCodeGeneratorARMVIXL(HGraph* graph, CodeGeneratorARMVIXL* codegen);
 
-  FOR_EACH_IMPLEMENTED_INSTRUCTION(DEFINE_IMPLEMENTED_INSTRUCTION_VISITOR)
+#define DECLARE_VISIT_INSTRUCTION(name, super)     \
+  void Visit##name(H##name* instr) OVERRIDE;
 
-  FOR_EACH_UNIMPLEMENTED_INSTRUCTION(DEFINE_UNIMPLEMENTED_INSTRUCTION_VISITOR)
+  FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION)
+  FOR_EACH_CONCRETE_INSTRUCTION_ARM(DECLARE_VISIT_INSTRUCTION)
+  FOR_EACH_CONCRETE_INSTRUCTION_SHARED(DECLARE_VISIT_INSTRUCTION)
+
+#undef DECLARE_VISIT_INSTRUCTION
+
+  void VisitInstruction(HInstruction* instruction) OVERRIDE {
+    LOG(FATAL) << "Unreachable instruction " << instruction->DebugName()
+               << " (id " << instruction->GetId() << ")";
+  }
 
   ArmVIXLAssembler* GetAssembler() const { return assembler_; }
   ArmVIXLMacroAssembler* GetVIXLAssembler() { return GetAssembler()->GetVIXLAssembler(); }
 
  private:
-  void VisitUnimplemementedInstruction(HInstruction* instruction) {
-    LOG(FATAL) << "Unimplemented Instruction: " << instruction->DebugName();
-  }
-
   // Generate code for the given suspend check. If not null, `successor`
   // is the block to branch to if the suspend check is not needed, and after
   // the suspend call.
   void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor);
   void GenerateClassInitializationCheck(LoadClassSlowPathARMVIXL* slow_path,
                                         vixl32::Register class_reg);
-  void HandleGoto(HInstruction* got, HBasicBlock* successor);
   void GenerateAndConst(vixl::aarch32::Register out, vixl::aarch32::Register first, uint32_t value);
   void GenerateOrrConst(vixl::aarch32::Register out, vixl::aarch32::Register first, uint32_t value);
   void GenerateEorConst(vixl::aarch32::Register out, vixl::aarch32::Register first, uint32_t value);
@@ -422,7 +360,8 @@
   void GenerateReferenceLoadOneRegister(HInstruction* instruction,
                                         Location out,
                                         uint32_t offset,
-                                        Location maybe_temp);
+                                        Location maybe_temp,
+                                        ReadBarrierOption read_barrier_option);
   // Generate a heap reference load using two different registers
   // `out` and `obj`:
   //
@@ -437,18 +376,18 @@
                                          Location out,
                                          Location obj,
                                          uint32_t offset,
-                                         Location maybe_temp);
-
+                                         Location maybe_temp,
+                                         ReadBarrierOption read_barrier_option);
   // Generate a GC root reference load:
   //
   //   root <- *(obj + offset)
   //
-  // while honoring read barriers if `requires_read_barrier` is true.
+  // while honoring read barriers based on read_barrier_option.
   void GenerateGcRootFieldLoad(HInstruction* instruction,
                                Location root,
                                vixl::aarch32::Register obj,
                                uint32_t offset,
-                               bool requires_read_barrier);
+                               ReadBarrierOption read_barrier_option);
   void GenerateTestAndBranch(HInstruction* instruction,
                              size_t condition_input_index,
                              vixl::aarch32::Label* true_target,
@@ -468,6 +407,7 @@
   void DivRemByPowerOfTwo(HBinaryOperation* instruction);
   void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
   void GenerateDivRemConstantIntegral(HBinaryOperation* instruction);
+  void HandleGoto(HInstruction* got, HBasicBlock* successor);
 
   ArmVIXLAssembler* const assembler_;
   CodeGeneratorARMVIXL* const codegen_;
@@ -481,62 +421,50 @@
                        const ArmInstructionSetFeatures& isa_features,
                        const CompilerOptions& compiler_options,
                        OptimizingCompilerStats* stats = nullptr);
-
   virtual ~CodeGeneratorARMVIXL() {}
 
-  void Initialize() OVERRIDE {
-    block_labels_.resize(GetGraph()->GetBlocks().size());
-  }
-
   void GenerateFrameEntry() OVERRIDE;
   void GenerateFrameExit() OVERRIDE;
-
   void Bind(HBasicBlock* block) OVERRIDE;
-
-  vixl::aarch32::Label* GetLabelOf(HBasicBlock* block) {
-    block = FirstNonEmptyBlock(block);
-    return &(block_labels_[block->GetBlockId()]);
-  }
-
   void MoveConstant(Location destination, int32_t value) OVERRIDE;
   void MoveLocation(Location dst, Location src, Primitive::Type dst_type) OVERRIDE;
   void AddLocationAsTemp(Location location, LocationSummary* locations) OVERRIDE;
 
+  size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
+  size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
+  size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
+  size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
+
+  size_t GetWordSize() const OVERRIDE {
+    return static_cast<size_t>(kArmPointerSize);
+  }
+
+  size_t GetFloatingPointSpillSlotSize() const OVERRIDE { return vixl::aarch32::kRegSizeInBytes; }
+
+  HGraphVisitor* GetLocationBuilder() OVERRIDE { return &location_builder_; }
+
+  HGraphVisitor* GetInstructionVisitor() OVERRIDE { return &instruction_visitor_; }
+
   ArmVIXLAssembler* GetAssembler() OVERRIDE { return &assembler_; }
 
   const ArmVIXLAssembler& GetAssembler() const OVERRIDE { return assembler_; }
 
   ArmVIXLMacroAssembler* GetVIXLAssembler() { return GetAssembler()->GetVIXLAssembler(); }
 
-  size_t GetWordSize() const OVERRIDE { return kArmWordSize; }
-
-  size_t GetFloatingPointSpillSlotSize() const OVERRIDE { return vixl::aarch32::kRegSizeInBytes; }
-
   uintptr_t GetAddressOf(HBasicBlock* block) OVERRIDE {
     vixl::aarch32::Label* block_entry_label = GetLabelOf(block);
     DCHECK(block_entry_label->IsBound());
     return block_entry_label->GetLocation();
   }
 
-  JumpTableARMVIXL* CreateJumpTable(HPackedSwitch* switch_instr) {
-    jump_tables_.emplace_back(new (GetGraph()->GetArena()) JumpTableARMVIXL(switch_instr));
-    return jump_tables_.back().get();
-  }
-
-  HGraphVisitor* GetLocationBuilder() OVERRIDE { return &location_builder_; }
-
-  HGraphVisitor* GetInstructionVisitor() OVERRIDE { return &instruction_visitor_; }
-
   void FixJumpTables();
-  void GenerateMemoryBarrier(MemBarrierKind kind);
-  void Finalize(CodeAllocator* allocator) OVERRIDE;
   void SetupBlockedRegisters() const OVERRIDE;
 
   void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE;
   void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE;
 
+  ParallelMoveResolver* GetMoveResolver() OVERRIDE { return &move_resolver_; }
   InstructionSet GetInstructionSet() const OVERRIDE { return InstructionSet::kThumb2; }
-
   // Helper method to move a 32-bit value between two locations.
   void Move32(Location destination, Location source);
 
@@ -551,45 +479,6 @@
                                vixl::aarch32::Register reg_index,
                                vixl::aarch32::Condition cond = vixl::aarch32::al);
 
-  const ArmInstructionSetFeatures& GetInstructionSetFeatures() const { return isa_features_; }
-
-  vixl::aarch32::Label* GetFrameEntryLabel() { return &frame_entry_label_; }
-
-  // Saves the register in the stack. Returns the size taken on stack.
-  size_t SaveCoreRegister(size_t stack_index ATTRIBUTE_UNUSED,
-                          uint32_t reg_id ATTRIBUTE_UNUSED) OVERRIDE {
-    UNIMPLEMENTED(INFO) << "TODO: SaveCoreRegister";
-    return 0;
-  }
-
-  // Restores the register from the stack. Returns the size taken on stack.
-  size_t RestoreCoreRegister(size_t stack_index ATTRIBUTE_UNUSED,
-                             uint32_t reg_id ATTRIBUTE_UNUSED) OVERRIDE {
-    UNIMPLEMENTED(INFO) << "TODO: RestoreCoreRegister";
-    return 0;
-  }
-
-  size_t SaveFloatingPointRegister(size_t stack_index ATTRIBUTE_UNUSED,
-                                   uint32_t reg_id ATTRIBUTE_UNUSED) OVERRIDE {
-    UNIMPLEMENTED(INFO) << "TODO: SaveFloatingPointRegister";
-    return 0;
-  }
-
-  size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
-
-  bool NeedsTwoRegisters(Primitive::Type type) const OVERRIDE {
-    return type == Primitive::kPrimDouble || type == Primitive::kPrimLong;
-  }
-
-  void ComputeSpillMask() OVERRIDE;
-
-  void GenerateImplicitNullCheck(HNullCheck* null_check) OVERRIDE;
-  void GenerateExplicitNullCheck(HNullCheck* null_check) OVERRIDE;
-
-  ParallelMoveResolver* GetMoveResolver() OVERRIDE {
-    return &move_resolver_;
-  }
-
   // Generate code to invoke a runtime entry point.
   void InvokeRuntime(QuickEntrypointEnum entrypoint,
                      HInstruction* instruction,
@@ -602,8 +491,6 @@
                                            HInstruction* instruction,
                                            SlowPathCode* slow_path);
 
-  void GenerateInvokeRuntime(int32_t entry_point_offset);
-
   // Emit a write barrier.
   void MarkGCCard(vixl::aarch32::Register temp,
                   vixl::aarch32::Register card,
@@ -611,6 +498,76 @@
                   vixl::aarch32::Register value,
                   bool can_be_null);
 
+  void GenerateMemoryBarrier(MemBarrierKind kind);
+
+  vixl::aarch32::Label* GetLabelOf(HBasicBlock* block) {
+    block = FirstNonEmptyBlock(block);
+    return &(block_labels_[block->GetBlockId()]);
+  }
+
+  void Initialize() OVERRIDE {
+    block_labels_.resize(GetGraph()->GetBlocks().size());
+  }
+
+  void Finalize(CodeAllocator* allocator) OVERRIDE;
+
+  const ArmInstructionSetFeatures& GetInstructionSetFeatures() const { return isa_features_; }
+
+  bool NeedsTwoRegisters(Primitive::Type type) const OVERRIDE {
+    return type == Primitive::kPrimDouble || type == Primitive::kPrimLong;
+  }
+
+  void ComputeSpillMask() OVERRIDE;
+
+  vixl::aarch32::Label* GetFrameEntryLabel() { return &frame_entry_label_; }
+
+  // Check if the desired_string_load_kind is supported. If it is, return it,
+  // otherwise return a fall-back kind that should be used instead.
+  HLoadString::LoadKind GetSupportedLoadStringKind(
+      HLoadString::LoadKind desired_string_load_kind) OVERRIDE;
+
+  // Check if the desired_class_load_kind is supported. If it is, return it,
+  // otherwise return a fall-back kind that should be used instead.
+  HLoadClass::LoadKind GetSupportedLoadClassKind(
+      HLoadClass::LoadKind desired_class_load_kind) OVERRIDE;
+
+  // Check if the desired_dispatch_info is supported. If it is, return it,
+  // otherwise return a fall-back info that should be used instead.
+  HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
+      const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
+      HInvokeStaticOrDirect* invoke) OVERRIDE;
+
+  void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE;
+  void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE;
+
+  void MoveFromReturnRegister(Location trg, Primitive::Type type) OVERRIDE;
+
+  // The PcRelativePatchInfo is used for PC-relative addressing of dex cache arrays
+  // and boot image strings/types. The only difference is the interpretation of the
+  // offset_or_index. The PC-relative address is loaded with three instructions,
+  // MOVW+MOVT to load the offset to base_reg and then ADD base_reg, PC. The offset
+  // is calculated from the ADD's effective PC, i.e. PC+4 on Thumb2. Though we
+  // currently emit these 3 instructions together, instruction scheduling could
+  // split this sequence apart, so we keep separate labels for each of them.
+  struct PcRelativePatchInfo {
+    PcRelativePatchInfo(const DexFile& dex_file, uint32_t off_or_idx)
+        : target_dex_file(dex_file), offset_or_index(off_or_idx) { }
+    PcRelativePatchInfo(PcRelativePatchInfo&& other) = default;
+
+    const DexFile& target_dex_file;
+    // Either the dex cache array element offset or the string/type index.
+    uint32_t offset_or_index;
+    vixl::aarch32::Label movw_label;
+    vixl::aarch32::Label movt_label;
+    vixl::aarch32::Label add_pc_label;
+  };
+
+  PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file, uint32_t string_index);
+  PcRelativePatchInfo* NewPcRelativeTypePatch(const DexFile& dex_file, dex::TypeIndex type_index);
+  PcRelativePatchInfo* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file,
+                                                       uint32_t element_offset);
+  void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE;
+
   // Fast path implementation of ReadBarrier::Barrier for a heap
   // reference field load when Baker's read barriers are used.
   void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
@@ -677,33 +634,35 @@
                                     uint32_t offset,
                                     Location index = Location::NoLocation());
 
-  // Check if the desired_string_load_kind is supported. If it is, return it,
-  // otherwise return a fall-back kind that should be used instead.
-  HLoadString::LoadKind GetSupportedLoadStringKind(
-      HLoadString::LoadKind desired_string_load_kind) OVERRIDE;
-
-  // Check if the desired_class_load_kind is supported. If it is, return it,
-  // otherwise return a fall-back kind that should be used instead.
-  HLoadClass::LoadKind GetSupportedLoadClassKind(
-      HLoadClass::LoadKind desired_class_load_kind) OVERRIDE;
-
-  // Check if the desired_dispatch_info is supported. If it is, return it,
-  // otherwise return a fall-back info that should be used instead.
-  HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
-      const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
-      HInvokeStaticOrDirect* invoke) OVERRIDE;
-
-  void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE;
-  void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE;
-
-  void MoveFromReturnRegister(Location trg, Primitive::Type type) OVERRIDE;
-
   void GenerateNop() OVERRIDE;
 
+  void GenerateImplicitNullCheck(HNullCheck* instruction) OVERRIDE;
+  void GenerateExplicitNullCheck(HNullCheck* instruction) OVERRIDE;
+
+  JumpTableARMVIXL* CreateJumpTable(HPackedSwitch* switch_instr) {
+    jump_tables_.emplace_back(new (GetGraph()->GetArena()) JumpTableARMVIXL(switch_instr));
+    return jump_tables_.back().get();
+  }
+  void EmitJumpTables();
+
+  void EmitMovwMovtPlaceholder(CodeGeneratorARMVIXL::PcRelativePatchInfo* labels,
+                               vixl::aarch32::Register out);
+
  private:
   vixl::aarch32::Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke,
                                                                 vixl::aarch32::Register temp);
 
+  using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, vixl::aarch32::Literal<uint32_t>*>;
+  using MethodToLiteralMap =
+      ArenaSafeMap<MethodReference, vixl::aarch32::Literal<uint32_t>*, MethodReferenceComparator>;
+
+  PcRelativePatchInfo* NewPcRelativePatch(const DexFile& dex_file,
+                                          uint32_t offset_or_index,
+                                          ArenaDeque<PcRelativePatchInfo>* patches);
+  template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
+  static void EmitPcRelativeLinkerPatches(const ArenaDeque<PcRelativePatchInfo>& infos,
+                                          ArenaVector<LinkerPatch>* linker_patches);
+
   // Labels for each block that will be compiled.
   // We use a deque so that the `vixl::aarch32::Label` objects do not move in memory.
   ArenaDeque<vixl::aarch32::Label> block_labels_;  // Indexed by block id.
@@ -717,15 +676,19 @@
   ArmVIXLAssembler assembler_;
   const ArmInstructionSetFeatures& isa_features_;
 
+  // Relative call patch info.
+  // Using ArenaDeque<> which retains element addresses on push/emplace_back().
+  ArenaDeque<PatchInfo<vixl::aarch32::Label>> relative_call_patches_;
+  // PC-relative patch info for each HArmDexCacheArraysBase.
+  ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_;
+  // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC).
+  ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_;
+  // PC-relative type patch info.
+  ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_;
+
   DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARMVIXL);
 };
 
-#undef FOR_EACH_IMPLEMENTED_INSTRUCTION
-#undef FOR_EACH_UNIMPLEMENTED_INSTRUCTION
-#undef DEFINE_IMPLEMENTED_INSTRUCTION_VISITOR
-#undef DEFINE_UNIMPLEMENTED_INSTRUCTION_VISITOR
-
-
 }  // namespace arm
 }  // namespace art
 
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 8f94834..61dabfa 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -280,7 +280,7 @@
 
     InvokeRuntimeCallingConvention calling_convention;
     HLoadString* load = instruction_->AsLoadString();
-    const uint32_t string_index = load->GetStringIndex();
+    const uint32_t string_index = load->GetStringIndex().index_;
     __ LoadConst32(calling_convention.GetRegisterAt(0), string_index);
     mips_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this);
     CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
@@ -1047,7 +1047,7 @@
     uint32_t literal_offset = __ GetLabelLocation(literal->GetLabel());
     linker_patches->push_back(LinkerPatch::StringPatch(literal_offset,
                                                        target_string.dex_file,
-                                                       target_string.string_index));
+                                                       target_string.string_index.index_));
   }
   for (const auto& entry : boot_image_type_patches_) {
     const TypeReference& target_type = entry.first;
@@ -1110,7 +1110,7 @@
 }
 
 Literal* CodeGeneratorMIPS::DeduplicateBootImageStringLiteral(const DexFile& dex_file,
-                                                              uint32_t string_index) {
+                                                              dex::StringIndex string_index) {
   return boot_image_string_patches_.GetOrCreate(
       StringReference(&dex_file, string_index),
       [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); });
@@ -4688,6 +4688,16 @@
   }
 }
 
+void LocationsBuilderMIPS::VisitShouldDeoptimizeFlag(
+    HShouldDeoptimizeFlag* flag ATTRIBUTE_UNUSED) {
+  // TODO: to be implemented.
+}
+
+void InstructionCodeGeneratorMIPS::VisitShouldDeoptimizeFlag(
+    HShouldDeoptimizeFlag* flag ATTRIBUTE_UNUSED) {
+  // TODO: to be implemented.
+}
+
 void LocationsBuilderMIPS::VisitSelect(HSelect* select) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select);
   CanMoveConditionally(select, codegen_->GetInstructionSetFeatures().IsR6(), locations);
@@ -5743,7 +5753,7 @@
       DCHECK(!kEmitCompilerReadBarrier);
       DCHECK(codegen_->GetCompilerOptions().IsBootImage());
       CodeGeneratorMIPS::PcRelativePatchInfo* info =
-          codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex());
+          codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex().index_);
       codegen_->EmitPcRelativeAddressPlaceholder(info, out, base_or_current_method_reg);
       return;  // No dex cache slow path.
     }
@@ -5759,7 +5769,7 @@
     case HLoadString::LoadKind::kBssEntry: {
       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
       CodeGeneratorMIPS::PcRelativePatchInfo* info =
-          codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex());
+          codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex().index_);
       codegen_->EmitPcRelativeAddressPlaceholder(info, out, base_or_current_method_reg);
       __ LoadFromOffset(kLoadWord, out, out, 0);
       SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS(load);
@@ -5775,7 +5785,7 @@
   // TODO: Re-add the compiler code to do string dex cache lookup again.
   DCHECK(load_kind == HLoadString::LoadKind::kDexCacheViaMethod);
   InvokeRuntimeCallingConvention calling_convention;
-  __ LoadConst32(calling_convention.GetRegisterAt(0), load->GetStringIndex());
+  __ LoadConst32(calling_convention.GetRegisterAt(0), load->GetStringIndex().index_);
   codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
   CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
 }
diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h
index e225d20..2273e52 100644
--- a/compiler/optimizing/code_generator_mips.h
+++ b/compiler/optimizing/code_generator_mips.h
@@ -18,6 +18,7 @@
 #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_MIPS_H_
 
 #include "code_generator.h"
+#include "dex_file_types.h"
 #include "driver/compiler_options.h"
 #include "nodes.h"
 #include "parallel_move_resolver.h"
@@ -452,7 +453,8 @@
   PcRelativePatchInfo* NewPcRelativeTypePatch(const DexFile& dex_file, dex::TypeIndex type_index);
   PcRelativePatchInfo* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file,
                                                        uint32_t element_offset);
-  Literal* DeduplicateBootImageStringLiteral(const DexFile& dex_file, uint32_t string_index);
+  Literal* DeduplicateBootImageStringLiteral(const DexFile& dex_file,
+                                             dex::StringIndex string_index);
   Literal* DeduplicateBootImageTypeLiteral(const DexFile& dex_file, dex::TypeIndex type_index);
   Literal* DeduplicateBootImageAddressLiteral(uint32_t address);
 
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 02b01c8..b1f9b1d 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -234,7 +234,7 @@
     SaveLiveRegisters(codegen, locations);
 
     InvokeRuntimeCallingConvention calling_convention;
-    const uint32_t string_index = instruction_->AsLoadString()->GetStringIndex();
+    const uint32_t string_index = instruction_->AsLoadString()->GetStringIndex().index_;
     __ LoadConst32(calling_convention.GetRegisterAt(0), string_index);
     mips64_codegen->InvokeRuntime(kQuickResolveString,
                                   instruction_,
@@ -2636,6 +2636,16 @@
                         /* false_target */ nullptr);
 }
 
+void LocationsBuilderMIPS64::VisitShouldDeoptimizeFlag(
+    HShouldDeoptimizeFlag* flag ATTRIBUTE_UNUSED) {
+  // TODO: to be implemented.
+}
+
+void InstructionCodeGeneratorMIPS64::VisitShouldDeoptimizeFlag(
+    HShouldDeoptimizeFlag* flag ATTRIBUTE_UNUSED) {
+  // TODO: to be implemented.
+}
+
 void LocationsBuilderMIPS64::VisitSelect(HSelect* select) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select);
   if (Primitive::IsFloatingPointType(select->GetType())) {
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 51e902a..d6e92cc 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -225,7 +225,7 @@
     SaveLiveRegisters(codegen, locations);
 
     InvokeRuntimeCallingConvention calling_convention;
-    const uint32_t string_index = instruction_->AsLoadString()->GetStringIndex();
+    const uint32_t string_index = instruction_->AsLoadString()->GetStringIndex().index_;
     __ movl(calling_convention.GetRegisterAt(0), Immediate(string_index));
     x86_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this);
     CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
@@ -1059,6 +1059,11 @@
     }
   }
 
+  if (GetGraph()->HasShouldDeoptimizeFlag()) {
+    // Initialize should_deoptimize flag to 0.
+    __ movl(Address(ESP, -kShouldDeoptimizeFlagSize), Immediate(0));
+  }
+
   int adjust = GetFrameSize() - FrameEntrySpillSize();
   __ subl(ESP, Immediate(adjust));
   __ cfi().AdjustCFAOffset(adjust);
@@ -1676,6 +1681,17 @@
                                /* false_target */ nullptr);
 }
 
+void LocationsBuilderX86::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
+  LocationSummary* locations = new (GetGraph()->GetArena())
+      LocationSummary(flag, LocationSummary::kNoCall);
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorX86::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
+  __ movl(flag->GetLocations()->Out().AsRegister<Register>(),
+          Address(ESP, codegen_->GetStackOffsetOfShouldDeoptimizeFlag()));
+}
+
 static bool SelectCanUseCMOV(HSelect* select) {
   // There are no conditional move instructions for XMMs.
   if (Primitive::IsFloatingPointType(select->GetType())) {
@@ -4607,7 +4623,7 @@
 
 void CodeGeneratorX86::RecordBootStringPatch(HLoadString* load_string) {
   DCHECK(GetCompilerOptions().IsBootImage());
-  string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex());
+  string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex().index_);
   __ Bind(&string_patches_.back().label);
 }
 
@@ -4618,7 +4634,7 @@
 
 Label* CodeGeneratorX86::NewStringBssEntryPatch(HLoadString* load_string) {
   DCHECK(!GetCompilerOptions().IsBootImage());
-  string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex());
+  string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex().index_);
   return &string_patches_.back().label;
 }
 
@@ -6253,10 +6269,11 @@
   }
 }
 
-Label* CodeGeneratorX86::NewJitRootStringPatch(const DexFile& dex_file, uint32_t dex_index) {
+Label* CodeGeneratorX86::NewJitRootStringPatch(const DexFile& dex_file,
+                                               dex::StringIndex dex_index) {
   jit_string_roots_.Overwrite(StringReference(&dex_file, dex_index), /* placeholder */ 0u);
   // Add a patch entry and return the label.
-  jit_string_patches_.emplace_back(dex_file, dex_index);
+  jit_string_patches_.emplace_back(dex_file, dex_index.index_);
   PatchInfo<Label>* info = &jit_string_patches_.back();
   return &info->label;
 }
@@ -6313,7 +6330,7 @@
   // TODO: Re-add the compiler code to do string dex cache lookup again.
   InvokeRuntimeCallingConvention calling_convention;
   DCHECK_EQ(calling_convention.GetRegisterAt(0), out);
-  __ movl(calling_convention.GetRegisterAt(0), Immediate(load->GetStringIndex()));
+  __ movl(calling_convention.GetRegisterAt(0), Immediate(load->GetStringIndex().index_));
   codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
   CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
 }
@@ -7755,7 +7772,8 @@
 
 void CodeGeneratorX86::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
   for (const PatchInfo<Label>& info : jit_string_patches_) {
-    const auto& it = jit_string_roots_.find(StringReference(&info.dex_file, info.index));
+    const auto& it = jit_string_roots_.find(StringReference(&info.dex_file,
+                                                            dex::StringIndex(info.index)));
     DCHECK(it != jit_string_roots_.end());
     size_t index_in_table = it->second;
     uint32_t code_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 16ea6b5..2ae3670 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -20,6 +20,7 @@
 #include "arch/x86/instruction_set_features_x86.h"
 #include "base/enums.h"
 #include "code_generator.h"
+#include "dex_file_types.h"
 #include "driver/compiler_options.h"
 #include "nodes.h"
 #include "parallel_move_resolver.h"
@@ -414,7 +415,7 @@
   void RecordTypePatch(HLoadClass* load_class);
   Label* NewStringBssEntryPatch(HLoadString* load_string);
   Label* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, uint32_t element_offset);
-  Label* NewJitRootStringPatch(const DexFile& dex_file, uint32_t dex_index);
+  Label* NewJitRootStringPatch(const DexFile& dex_file, dex::StringIndex dex_index);
 
   void MoveFromReturnRegister(Location trg, Primitive::Type type) OVERRIDE;
 
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 3467313..4474dec 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -300,7 +300,7 @@
     __ Bind(GetEntryLabel());
     SaveLiveRegisters(codegen, locations);
 
-    const uint32_t string_index = instruction_->AsLoadString()->GetStringIndex();
+    const uint32_t string_index = instruction_->AsLoadString()->GetStringIndex().index_;
     // Custom calling convention: RAX serves as both input and output.
     __ movl(CpuRegister(RAX), Immediate(string_index));
     x86_64_codegen->InvokeRuntime(kQuickResolveString,
@@ -1106,7 +1106,7 @@
 
 void CodeGeneratorX86_64::RecordBootStringPatch(HLoadString* load_string) {
   DCHECK(GetCompilerOptions().IsBootImage());
-  string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex());
+  string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex().index_);
   __ Bind(&string_patches_.back().label);
 }
 
@@ -1117,7 +1117,7 @@
 
 Label* CodeGeneratorX86_64::NewStringBssEntryPatch(HLoadString* load_string) {
   DCHECK(!GetCompilerOptions().IsBootImage());
-  string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex());
+  string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex().index_);
   return &string_patches_.back().label;
 }
 
@@ -1326,6 +1326,12 @@
     }
   }
 
+  if (GetGraph()->HasShouldDeoptimizeFlag()) {
+    // Initialize should_deoptimize flag to 0.
+    __ movl(Address(CpuRegister(RSP), xmm_spill_location - kShouldDeoptimizeFlagSize),
+            Immediate(0));
+  }
+
   // Save the current method if we need it. Note that we do not
   // do this in HCurrentMethod, as the instruction might have been removed
   // in the SSA graph.
@@ -1747,6 +1753,17 @@
                                /* false_target */ nullptr);
 }
 
+void LocationsBuilderX86_64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
+  LocationSummary* locations = new (GetGraph()->GetArena())
+      LocationSummary(flag, LocationSummary::kNoCall);
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorX86_64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
+  __ movl(flag->GetLocations()->Out().AsRegister<CpuRegister>(),
+          Address(CpuRegister(RSP), codegen_->GetStackOffsetOfShouldDeoptimizeFlag()));
+}
+
 static bool SelectCanUseCMOV(HSelect* select) {
   // There are no conditional move instructions for XMMs.
   if (Primitive::IsFloatingPointType(select->GetType())) {
@@ -5660,10 +5677,11 @@
   }
 }
 
-Label* CodeGeneratorX86_64::NewJitRootStringPatch(const DexFile& dex_file, uint32_t dex_index) {
+Label* CodeGeneratorX86_64::NewJitRootStringPatch(const DexFile& dex_file,
+                                                  dex::StringIndex dex_index) {
   jit_string_roots_.Overwrite(StringReference(&dex_file, dex_index), /* placeholder */ 0u);
   // Add a patch entry and return the label.
-  jit_string_patches_.emplace_back(dex_file, dex_index);
+  jit_string_patches_.emplace_back(dex_file, dex_index.index_);
   PatchInfo<Label>* info = &jit_string_patches_.back();
   return &info->label;
 }
@@ -5714,7 +5732,7 @@
 
   // TODO: Re-add the compiler code to do string dex cache lookup again.
   // Custom calling convention: RAX serves as both input and output.
-  __ movl(CpuRegister(RAX), Immediate(load->GetStringIndex()));
+  __ movl(CpuRegister(RAX), Immediate(load->GetStringIndex().index_));
   codegen_->InvokeRuntime(kQuickResolveString,
                           load,
                           load->GetDexPc());
@@ -7111,7 +7129,8 @@
 
 void CodeGeneratorX86_64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
   for (const PatchInfo<Label>& info : jit_string_patches_) {
-    const auto& it = jit_string_roots_.find(StringReference(&info.dex_file, info.index));
+    const auto& it = jit_string_roots_.find(StringReference(&info.dex_file,
+                                                            dex::StringIndex(info.index)));
     DCHECK(it != jit_string_roots_.end());
     size_t index_in_table = it->second;
     uint32_t code_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 0f70b15..2f41f73 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -412,7 +412,7 @@
   void RecordTypePatch(HLoadClass* load_class);
   Label* NewStringBssEntryPatch(HLoadString* load_string);
   Label* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, uint32_t element_offset);
-  Label* NewJitRootStringPatch(const DexFile& dex_file, uint32_t dex_index);
+  Label* NewJitRootStringPatch(const DexFile& dex_file, dex::StringIndex dex_index);
 
   void MoveFromReturnRegister(Location trg, Primitive::Type type) OVERRIDE;
 
diff --git a/compiler/optimizing/common_arm.h b/compiler/optimizing/common_arm.h
index d3623f1..eabdbad 100644
--- a/compiler/optimizing/common_arm.h
+++ b/compiler/optimizing/common_arm.h
@@ -17,6 +17,11 @@
 #ifndef ART_COMPILER_OPTIMIZING_COMMON_ARM_H_
 #define ART_COMPILER_OPTIMIZING_COMMON_ARM_H_
 
+#include "debug/dwarf/register.h"
+#include "locations.h"
+#include "nodes.h"
+#include "utils/arm/constants_arm.h"
+
 // TODO(VIXL): Make VIXL compile with -Wshadow.
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wshadow"
diff --git a/compiler/optimizing/dex_cache_array_fixups_arm.cc b/compiler/optimizing/dex_cache_array_fixups_arm.cc
index 82b8123..10a36c6 100644
--- a/compiler/optimizing/dex_cache_array_fixups_arm.cc
+++ b/compiler/optimizing/dex_cache_array_fixups_arm.cc
@@ -17,12 +17,24 @@
 #include "dex_cache_array_fixups_arm.h"
 
 #include "base/arena_containers.h"
+#ifdef ART_USE_VIXL_ARM_BACKEND
+#include "code_generator_arm_vixl.h"
+#include "intrinsics_arm_vixl.h"
+#else
 #include "code_generator_arm.h"
 #include "intrinsics_arm.h"
+#endif
 #include "utils/dex_cache_arrays_layout-inl.h"
 
 namespace art {
 namespace arm {
+#ifdef ART_USE_VIXL_ARM_BACKEND
+typedef CodeGeneratorARMVIXL CodeGeneratorARMType;
+typedef IntrinsicLocationsBuilderARMVIXL IntrinsicLocationsBuilderARMType;
+#else
+typedef CodeGeneratorARM CodeGeneratorARMType;
+typedef IntrinsicLocationsBuilderARM IntrinsicLocationsBuilderARMType;
+#endif
 
 /**
  * Finds instructions that need the dex cache arrays base as an input.
@@ -31,7 +43,7 @@
  public:
   DexCacheArrayFixupsVisitor(HGraph* graph, CodeGenerator* codegen)
       : HGraphVisitor(graph),
-        codegen_(down_cast<CodeGeneratorARM*>(codegen)),
+        codegen_(down_cast<CodeGeneratorARMType*>(codegen)),
         dex_cache_array_bases_(std::less<const DexFile*>(),
                                // Attribute memory use to code generator.
                                graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {}
@@ -66,7 +78,7 @@
     // If this is an invoke with PC-relative access to the dex cache methods array,
     // we need to add the dex cache arrays base as the special input.
     if (invoke->HasPcRelativeDexCache() &&
-        !IsCallFreeIntrinsic<IntrinsicLocationsBuilderARM>(invoke, codegen_)) {
+        !IsCallFreeIntrinsic<IntrinsicLocationsBuilderARMType>(invoke, codegen_)) {
       HArmDexCacheArraysBase* base = GetOrCreateDexCacheArrayBase(invoke->GetDexFile());
       // Update the element offset in base.
       DexCacheArraysLayout layout(kArmPointerSize, &invoke->GetDexFile());
@@ -94,7 +106,7 @@
     return base;
   }
 
-  CodeGeneratorARM* codegen_;
+  CodeGeneratorARMType* codegen_;
 
   using DexCacheArraysBaseMap =
       ArenaSafeMap<const DexFile*, HArmDexCacheArraysBase*, std::less<const DexFile*>>;
diff --git a/compiler/optimizing/escape.cc b/compiler/optimizing/escape.cc
index c80e19e..9df5bf1 100644
--- a/compiler/optimizing/escape.cc
+++ b/compiler/optimizing/escape.cc
@@ -23,16 +23,19 @@
 void CalculateEscape(HInstruction* reference,
                      bool (*no_escape)(HInstruction*, HInstruction*),
                      /*out*/ bool* is_singleton,
-                     /*out*/ bool* is_singleton_and_non_escaping) {
+                     /*out*/ bool* is_singleton_and_not_returned,
+                     /*out*/ bool* is_singleton_and_not_deopt_visible) {
   // For references not allocated in the method, don't assume anything.
   if (!reference->IsNewInstance() && !reference->IsNewArray()) {
     *is_singleton = false;
-    *is_singleton_and_non_escaping = false;
+    *is_singleton_and_not_returned = false;
+    *is_singleton_and_not_deopt_visible = false;
     return;
   }
   // Assume the best until proven otherwise.
   *is_singleton = true;
-  *is_singleton_and_non_escaping = true;
+  *is_singleton_and_not_returned = true;
+  *is_singleton_and_not_deopt_visible = true;
   // Visit all uses to determine if this reference can escape into the heap,
   // a method call, an alias, etc.
   for (const HUseListNode<HInstruction*>& use : reference->GetUses()) {
@@ -45,7 +48,8 @@
       // for the uncommon cases. Similarly, null checks are eventually eliminated for explicit
       // allocations, but if we see one before it is simplified, assume an alias.
       *is_singleton = false;
-      *is_singleton_and_non_escaping = false;
+      *is_singleton_and_not_returned = false;
+      *is_singleton_and_not_deopt_visible = false;
       return;
     } else if (user->IsPhi() || user->IsSelect() || user->IsInvoke() ||
                (user->IsInstanceFieldSet() && (reference == user->InputAt(1))) ||
@@ -56,7 +60,8 @@
       // The reference is merged to HPhi/HSelect, passed to a callee, or stored to heap.
       // Hence, the reference is no longer the only name that can refer to its value.
       *is_singleton = false;
-      *is_singleton_and_non_escaping = false;
+      *is_singleton_and_not_returned = false;
+      *is_singleton_and_not_deopt_visible = false;
       return;
     } else if ((user->IsUnresolvedInstanceFieldGet() && (reference == user->InputAt(0))) ||
                (user->IsUnresolvedInstanceFieldSet() && (reference == user->InputAt(0)))) {
@@ -64,37 +69,35 @@
       // Note that we could optimize this case and still perform some optimizations until
       // we hit the unresolved access, but the conservative assumption is the simplest.
       *is_singleton = false;
-      *is_singleton_and_non_escaping = false;
+      *is_singleton_and_not_returned = false;
+      *is_singleton_and_not_deopt_visible = false;
       return;
     } else if (user->IsReturn()) {
-      *is_singleton_and_non_escaping = false;
+      *is_singleton_and_not_returned = false;
     }
   }
 
-  // Need for further analysis?
-  if (!*is_singleton_and_non_escaping) {
-    return;
-  }
-
-  // Look at the environment uses and if it's for HDeoptimize, it's treated the
-  // same as a return which escapes at the end of executing the compiled code.
-  // Other environment uses are fine, as long as all client optimizations that
-  // rely on this informations are disabled for debuggable.
+  // Look at the environment uses if it's for HDeoptimize. Other environment uses are fine,
+  // as long as client optimizations that rely on this information are disabled for debuggable.
   for (const HUseListNode<HEnvironment*>& use : reference->GetEnvUses()) {
     HEnvironment* user = use.GetUser();
     if (user->GetHolder()->IsDeoptimize()) {
-      *is_singleton_and_non_escaping = false;
+      *is_singleton_and_not_deopt_visible = false;
       break;
     }
   }
 }
 
-bool IsNonEscapingSingleton(HInstruction* reference,
-                            bool (*no_escape)(HInstruction*, HInstruction*)) {
-  bool is_singleton = true;
-  bool is_singleton_and_non_escaping = true;
-  CalculateEscape(reference, no_escape, &is_singleton, &is_singleton_and_non_escaping);
-  return is_singleton_and_non_escaping;
+bool DoesNotEscape(HInstruction* reference, bool (*no_escape)(HInstruction*, HInstruction*)) {
+  bool is_singleton = false;
+  bool is_singleton_and_not_returned = false;
+  bool is_singleton_and_not_deopt_visible = false;  // not relevant for escape
+  CalculateEscape(reference,
+                  no_escape,
+                  &is_singleton,
+                  &is_singleton_and_not_returned,
+                  &is_singleton_and_not_deopt_visible);
+  return is_singleton_and_not_returned;
 }
 
 }  // namespace art
diff --git a/compiler/optimizing/escape.h b/compiler/optimizing/escape.h
index 6514843..75e37b0 100644
--- a/compiler/optimizing/escape.h
+++ b/compiler/optimizing/escape.h
@@ -31,9 +31,18 @@
  * allocation. The method assigns true to parameter 'is_singleton' if the reference
  * is the only name that can refer to its value during the lifetime of the method,
  * meaning that the reference is not aliased with something else, is not stored to
- * heap memory, and not passed to another method. The method assigns true to parameter
- * 'is_singleton_and_non_escaping' if the reference is a singleton and is not returned
- * to the caller or used as an environment local of an HDeoptimize instruction.
+ * heap memory, and not passed to another method. In addition, the method assigns
+ * true to parameter 'is_singleton_and_not_returned' if the reference is a singleton
+ * and not returned to the caller and to parameter 'is_singleton_and_not_deopt_visible'
+ * if the reference is a singleton and not used as an environment local of an
+ * HDeoptimize instruction (clients of the final value must run after BCE to ensure
+ * all such instructions have been introduced already).
+ *
+ * Note that being visible to a HDeoptimize instruction does not count for ordinary
+ * escape analysis, since switching between compiled code and interpreted code keeps
+ * non escaping references restricted to the lifetime of the method and the thread
+ * executing it. This property only concerns optimizations that are interested in
+ * escape analysis with respect to the *compiled* code (such as LSE).
  *
  * When set, the no_escape function is applied to any use of the allocation instruction
  * prior to any built-in escape analysis. This allows clients to define better escape
@@ -45,14 +54,14 @@
 void CalculateEscape(HInstruction* reference,
                      bool (*no_escape)(HInstruction*, HInstruction*),
                      /*out*/ bool* is_singleton,
-                     /*out*/ bool* is_singleton_and_non_escaping);
+                     /*out*/ bool* is_singleton_and_not_returned,
+                     /*out*/ bool* is_singleton_and_not_deopt_visible);
 
 /*
- * Convenience method for testing singleton and non-escaping property at once.
+ * Convenience method for testing the singleton and not returned properties at once.
  * Callers should be aware that this method invokes the full analysis at each call.
  */
-bool IsNonEscapingSingleton(HInstruction* reference,
-                            bool (*no_escape)(HInstruction*, HInstruction*));
+bool DoesNotEscape(HInstruction* reference, bool (*no_escape)(HInstruction*, HInstruction*));
 
 }  // namespace art
 
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index c8cba20..188ee3a 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -23,7 +23,6 @@
 #include "base/arena_containers.h"
 #include "base/bit_vector-inl.h"
 #include "base/stringprintf.h"
-#include "handle_scope-inl.h"
 
 namespace art {
 
@@ -448,7 +447,6 @@
 
   // Ensure that reference type instructions have reference type info.
   if (instruction->GetType() == Primitive::kPrimNot) {
-    ScopedObjectAccess soa(Thread::Current());
     if (!instruction->GetReferenceTypeInfo().IsValid()) {
       AddError(StringPrintf("Reference type instruction %s:%d does not have "
                             "valid reference type information.",
@@ -1011,7 +1009,6 @@
 void GraphChecker::VisitBoundType(HBoundType* instruction) {
   VisitInstruction(instruction);
 
-  ScopedObjectAccess soa(Thread::Current());
   if (!instruction->GetUpperBound().IsValid()) {
     AddError(StringPrintf(
         "%s %d does not have a valid upper bound RTI.",
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 01e89bb..8d93867 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -292,6 +292,21 @@
       classes->Get(InlineCache::kIndividualCacheSize - 1) == nullptr;
 }
 
+ArtMethod* HInliner::TryCHADevirtualization(ArtMethod* resolved_method) {
+  if (!resolved_method->HasSingleImplementation()) {
+    return nullptr;
+  }
+  if (Runtime::Current()->IsAotCompiler()) {
+    // No CHA-based devirtulization for AOT compiler (yet).
+    return nullptr;
+  }
+  if (outermost_graph_->IsCompilingOsr()) {
+    // We do not support HDeoptimize in OSR methods.
+    return nullptr;
+  }
+  return resolved_method->GetSingleImplementation();
+}
+
 bool HInliner::TryInline(HInvoke* invoke_instruction) {
   if (invoke_instruction->IsInvokeUnresolved()) {
     return false;  // Don't bother to move further if we know the method is unresolved.
@@ -317,10 +332,29 @@
     actual_method = FindVirtualOrInterfaceTarget(invoke_instruction, resolved_method);
   }
 
+  bool cha_devirtualize = false;
+  if (actual_method == nullptr) {
+    ArtMethod* method = TryCHADevirtualization(resolved_method);
+    if (method != nullptr) {
+      cha_devirtualize = true;
+      actual_method = method;
+    }
+  }
+
   if (actual_method != nullptr) {
-    bool result = TryInlineAndReplace(invoke_instruction, actual_method, /* do_rtp */ true);
+    bool result = TryInlineAndReplace(invoke_instruction,
+                                      actual_method,
+                                      /* do_rtp */ true,
+                                      cha_devirtualize);
     if (result && !invoke_instruction->IsInvokeStaticOrDirect()) {
-      MaybeRecordStat(kInlinedInvokeVirtualOrInterface);
+      if (cha_devirtualize) {
+        // Add dependency due to devirtulization. We've assumed resolved_method
+        // has single implementation.
+        outermost_graph_->AddCHASingleImplementationDependency(resolved_method);
+        MaybeRecordStat(kCHAInline);
+      } else {
+        MaybeRecordStat(kInlinedInvokeVirtualOrInterface);
+      }
     }
     return result;
   }
@@ -438,7 +472,10 @@
   HInstruction* cursor = invoke_instruction->GetPrevious();
   HBasicBlock* bb_cursor = invoke_instruction->GetBlock();
 
-  if (!TryInlineAndReplace(invoke_instruction, resolved_method, /* do_rtp */ false)) {
+  if (!TryInlineAndReplace(invoke_instruction,
+                           resolved_method,
+                           /* do_rtp */ false,
+                           /* cha_devirtualize */ false)) {
     return false;
   }
 
@@ -465,6 +502,25 @@
   return true;
 }
 
+void HInliner::AddCHAGuard(HInstruction* invoke_instruction,
+                           uint32_t dex_pc,
+                           HInstruction* cursor,
+                           HBasicBlock* bb_cursor) {
+  HInstruction* deopt_flag = new (graph_->GetArena()) HShouldDeoptimizeFlag(dex_pc);
+  HInstruction* should_deopt = new (graph_->GetArena()) HNotEqual(
+      deopt_flag, graph_->GetIntConstant(0, dex_pc));
+  HInstruction* deopt = new (graph_->GetArena()) HDeoptimize(should_deopt, dex_pc);
+
+  if (cursor != nullptr) {
+    bb_cursor->InsertInstructionAfter(deopt_flag, cursor);
+  } else {
+    bb_cursor->InsertInstructionBefore(deopt_flag, bb_cursor->GetFirstInstruction());
+  }
+  bb_cursor->InsertInstructionAfter(should_deopt, deopt_flag);
+  bb_cursor->InsertInstructionAfter(deopt, should_deopt);
+  deopt->CopyEnvironmentFrom(invoke_instruction->GetEnvironment());
+}
+
 HInstruction* HInliner::AddTypeGuard(HInstruction* receiver,
                                      HInstruction* cursor,
                                      HBasicBlock* bb_cursor,
@@ -787,8 +843,14 @@
   return true;
 }
 
-bool HInliner::TryInlineAndReplace(HInvoke* invoke_instruction, ArtMethod* method, bool do_rtp) {
+bool HInliner::TryInlineAndReplace(HInvoke* invoke_instruction,
+                                   ArtMethod* method,
+                                   bool do_rtp,
+                                   bool cha_devirtualize) {
   HInstruction* return_replacement = nullptr;
+  uint32_t dex_pc = invoke_instruction->GetDexPc();
+  HInstruction* cursor = invoke_instruction->GetPrevious();
+  HBasicBlock* bb_cursor = invoke_instruction->GetBlock();
   if (!TryBuildAndInline(invoke_instruction, method, &return_replacement)) {
     if (invoke_instruction->IsInvokeInterface()) {
       // Turn an invoke-interface into an invoke-virtual. An invoke-virtual is always
@@ -826,6 +888,9 @@
       return false;
     }
   }
+  if (cha_devirtualize) {
+    AddCHAGuard(invoke_instruction, dex_pc, cursor, bb_cursor);
+  }
   if (return_replacement != nullptr) {
     invoke_instruction->ReplaceWith(return_replacement);
   }
diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h
index a2b4fc9..ffebd97 100644
--- a/compiler/optimizing/inliner.h
+++ b/compiler/optimizing/inliner.h
@@ -62,8 +62,12 @@
 
   // Try to inline `resolved_method` in place of `invoke_instruction`. `do_rtp` is whether
   // reference type propagation can run after the inlining. If the inlining is successful, this
-  // method will replace and remove the `invoke_instruction`.
-  bool TryInlineAndReplace(HInvoke* invoke_instruction, ArtMethod* resolved_method, bool do_rtp)
+  // method will replace and remove the `invoke_instruction`. If `cha_devirtualize` is true,
+  // a CHA guard needs to be added for the inlining.
+  bool TryInlineAndReplace(HInvoke* invoke_instruction,
+                           ArtMethod* resolved_method,
+                           bool do_rtp,
+                           bool cha_devirtualize)
     REQUIRES_SHARED(Locks::mutator_lock_);
 
   bool TryBuildAndInline(HInvoke* invoke_instruction,
@@ -118,6 +122,18 @@
                                             Handle<mirror::ObjectArray<mirror::Class>> classes)
     REQUIRES_SHARED(Locks::mutator_lock_);
 
+  // Try CHA-based devirtualization to change virtual method calls into
+  // direct calls.
+  // Returns the actual method that resolved_method can be devirtualized to.
+  ArtMethod* TryCHADevirtualization(ArtMethod* resolved_method)
+    REQUIRES_SHARED(Locks::mutator_lock_);
+
+  // Add a CHA guard for a CHA-based devirtualized call. A CHA guard checks a
+  // should_deoptimize flag and if it's true, does deoptimization.
+  void AddCHAGuard(HInstruction* invoke_instruction,
+                   uint32_t dex_pc,
+                   HInstruction* cursor,
+                   HBasicBlock* bb_cursor);
 
   HInstanceFieldGet* BuildGetReceiverClass(ClassLinker* class_linker,
                                            HInstruction* receiver,
diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc
index 40de5ce..b97581b 100644
--- a/compiler/optimizing/instruction_builder.cc
+++ b/compiler/optimizing/instruction_builder.cc
@@ -2625,7 +2625,7 @@
     }
 
     case Instruction::CONST_STRING: {
-      uint32_t string_index = instruction.VRegB_21c();
+      dex::StringIndex string_index(instruction.VRegB_21c());
       AppendInstruction(
           new (arena_) HLoadString(graph_->GetCurrentMethod(), string_index, *dex_file_, dex_pc));
       UpdateLocal(instruction.VRegA_21c(), current_block_->GetLastInstruction());
@@ -2633,7 +2633,7 @@
     }
 
     case Instruction::CONST_STRING_JUMBO: {
-      uint32_t string_index = instruction.VRegB_31c();
+      dex::StringIndex string_index(instruction.VRegB_31c());
       AppendInstruction(
           new (arena_) HLoadString(graph_->GetCurrentMethod(), string_index, *dex_file_, dex_pc));
       UpdateLocal(instruction.VRegA_31c(), current_block_->GetLastInstruction());
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index 85b461d..658b804 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -16,6 +16,7 @@
 
 #include "instruction_simplifier.h"
 
+#include "escape.h"
 #include "intrinsics.h"
 #include "mirror/class-inl.h"
 #include "scoped_thread_state_change-inl.h"
@@ -107,6 +108,8 @@
   void SimplifyStringCharAt(HInvoke* invoke);
   void SimplifyStringIsEmptyOrLength(HInvoke* invoke);
   void SimplifyNPEOnArgN(HInvoke* invoke, size_t);
+  void SimplifyReturnThis(HInvoke* invoke);
+  void SimplifyAllocationIntrinsic(HInvoke* invoke);
   void SimplifyMemBarrier(HInvoke* invoke, MemBarrierKind barrier_kind);
 
   OptimizingCompilerStats* stats_;
@@ -1864,11 +1867,61 @@
 // is provably non-null, we can clear the flag.
 void InstructionSimplifierVisitor::SimplifyNPEOnArgN(HInvoke* invoke, size_t n) {
   HInstruction* arg = invoke->InputAt(n);
-  if (!arg->CanBeNull()) {
+  if (invoke->CanThrow() && !arg->CanBeNull()) {
     invoke->SetCanThrow(false);
   }
 }
 
+// Methods that return "this" can replace the returned value with the receiver.
+void InstructionSimplifierVisitor::SimplifyReturnThis(HInvoke* invoke) {
+  if (invoke->HasUses()) {
+    HInstruction* receiver = invoke->InputAt(0);
+    invoke->ReplaceWith(receiver);
+    RecordSimplification();
+  }
+}
+
+// Helper method for StringBuffer escape analysis.
+static bool NoEscapeForStringBufferReference(HInstruction* reference, HInstruction* user) {
+  if (user->IsInvokeStaticOrDirect()) {
+    // Any constructor on StringBuffer is okay.
+    return user->AsInvokeStaticOrDirect()->GetResolvedMethod()->IsConstructor() &&
+           user->InputAt(0) == reference;
+  } else if (user->IsInvokeVirtual()) {
+    switch (user->AsInvokeVirtual()->GetIntrinsic()) {
+      case Intrinsics::kStringBufferLength:
+      case Intrinsics::kStringBufferToString:
+        DCHECK_EQ(user->InputAt(0), reference);
+        return true;
+      case Intrinsics::kStringBufferAppend:
+        // Returns "this", so only okay if no further uses.
+        DCHECK_EQ(user->InputAt(0), reference);
+        DCHECK_NE(user->InputAt(1), reference);
+        return !user->HasUses();
+      default:
+        break;
+    }
+  }
+  return false;
+}
+
+// Certain allocation intrinsics are not removed by dead code elimination
+// because of potentially throwing an OOM exception or other side effects.
+// This method removes such intrinsics when special circumstances allow.
+void InstructionSimplifierVisitor::SimplifyAllocationIntrinsic(HInvoke* invoke) {
+  if (!invoke->HasUses()) {
+    // Instruction has no uses. If unsynchronized, we can remove right away, safely ignoring
+    // the potential OOM of course. Otherwise, we must ensure the receiver object of this
+    // call does not escape since only thread-local synchronization may be removed.
+    bool is_synchronized = invoke->GetIntrinsic() == Intrinsics::kStringBufferToString;
+    HInstruction* receiver = invoke->InputAt(0);
+    if (!is_synchronized || DoesNotEscape(receiver, NoEscapeForStringBufferReference)) {
+      invoke->GetBlock()->RemoveInstruction(invoke);
+      RecordSimplification();
+    }
+  }
+}
+
 void InstructionSimplifierVisitor::SimplifyMemBarrier(HInvoke* invoke, MemBarrierKind barrier_kind) {
   uint32_t dex_pc = invoke->GetDexPc();
   HMemoryBarrier* mem_barrier = new (GetGraph()->GetArena()) HMemoryBarrier(barrier_kind, dex_pc);
@@ -1926,6 +1979,14 @@
     case Intrinsics::kStringStringIndexOfAfter:
       SimplifyNPEOnArgN(instruction, 1);  // 0th has own NullCheck
       break;
+    case Intrinsics::kStringBufferAppend:
+    case Intrinsics::kStringBuilderAppend:
+      SimplifyReturnThis(instruction);
+      break;
+    case Intrinsics::kStringBufferToString:
+    case Intrinsics::kStringBuilderToString:
+      SimplifyAllocationIntrinsic(instruction);
+      break;
     case Intrinsics::kUnsafeLoadFence:
       SimplifyMemBarrier(instruction, MemBarrierKind::kLoadAny);
       break;
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index 8234b24..8f64fae 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -2613,6 +2613,12 @@
 
 UNIMPLEMENTED_INTRINSIC(ARM, StringStringIndexOf);
 UNIMPLEMENTED_INTRINSIC(ARM, StringStringIndexOfAfter);
+UNIMPLEMENTED_INTRINSIC(ARM, StringBufferAppend);
+UNIMPLEMENTED_INTRINSIC(ARM, StringBufferLength);
+UNIMPLEMENTED_INTRINSIC(ARM, StringBufferToString);
+UNIMPLEMENTED_INTRINSIC(ARM, StringBuilderAppend);
+UNIMPLEMENTED_INTRINSIC(ARM, StringBuilderLength);
+UNIMPLEMENTED_INTRINSIC(ARM, StringBuilderToString);
 
 // 1.8.
 UNIMPLEMENTED_INTRINSIC(ARM, UnsafeGetAndAddInt)
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 17a97da..d8a896e 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -2781,6 +2781,12 @@
 
 UNIMPLEMENTED_INTRINSIC(ARM64, StringStringIndexOf);
 UNIMPLEMENTED_INTRINSIC(ARM64, StringStringIndexOfAfter);
+UNIMPLEMENTED_INTRINSIC(ARM64, StringBufferAppend);
+UNIMPLEMENTED_INTRINSIC(ARM64, StringBufferLength);
+UNIMPLEMENTED_INTRINSIC(ARM64, StringBufferToString);
+UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppend);
+UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderLength);
+UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderToString);
 
 // 1.8.
 UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndAddInt)
diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc
index c8e3534..433dced 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.cc
+++ b/compiler/optimizing/intrinsics_arm_vixl.cc
@@ -71,7 +71,7 @@
       : SlowPathCodeARMVIXL(invoke), invoke_(invoke) {}
 
   Location MoveArguments(CodeGenerator* codegen) {
-    InvokeDexCallingConventionVisitorARM calling_convention_visitor;
+    InvokeDexCallingConventionVisitorARMVIXL calling_convention_visitor;
     IntrinsicVisitor::MoveArguments(invoke_, codegen, &calling_convention_visitor);
     return calling_convention_visitor.GetMethodLocation();
   }
@@ -677,7 +677,10 @@
       vixl32::Register trg_lo = LowRegisterFrom(trg_loc);
       vixl32::Register trg_hi = HighRegisterFrom(trg_loc);
       if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) {
-        __ Ldrexd(trg_lo, trg_hi, MemOperand(base, offset));
+        UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
+        const vixl32::Register temp_reg = temps.Acquire();
+        __ Add(temp_reg, base, offset);
+        __ Ldrexd(trg_lo, trg_hi, MemOperand(temp_reg));
       } else {
         __ Ldrd(trg_lo, trg_hi, MemOperand(base, offset));
       }
@@ -2703,6 +2706,12 @@
 
 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOf);
 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOfAfter);
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBufferAppend);
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBufferLength);
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBufferToString);
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppend);
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderLength);
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderToString);
 
 // 1.8.
 UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndAddInt)
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index 7c81588..9b5d7a0 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -2497,6 +2497,12 @@
 
 UNIMPLEMENTED_INTRINSIC(MIPS, StringStringIndexOf);
 UNIMPLEMENTED_INTRINSIC(MIPS, StringStringIndexOfAfter);
+UNIMPLEMENTED_INTRINSIC(MIPS, StringBufferAppend);
+UNIMPLEMENTED_INTRINSIC(MIPS, StringBufferLength);
+UNIMPLEMENTED_INTRINSIC(MIPS, StringBufferToString);
+UNIMPLEMENTED_INTRINSIC(MIPS, StringBuilderAppend);
+UNIMPLEMENTED_INTRINSIC(MIPS, StringBuilderLength);
+UNIMPLEMENTED_INTRINSIC(MIPS, StringBuilderToString);
 
 // 1.8.
 UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeGetAndAddInt)
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index 2d4f417..5a99886 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -1949,6 +1949,12 @@
 
 UNIMPLEMENTED_INTRINSIC(MIPS64, StringStringIndexOf);
 UNIMPLEMENTED_INTRINSIC(MIPS64, StringStringIndexOfAfter);
+UNIMPLEMENTED_INTRINSIC(MIPS64, StringBufferAppend);
+UNIMPLEMENTED_INTRINSIC(MIPS64, StringBufferLength);
+UNIMPLEMENTED_INTRINSIC(MIPS64, StringBufferToString);
+UNIMPLEMENTED_INTRINSIC(MIPS64, StringBuilderAppend);
+UNIMPLEMENTED_INTRINSIC(MIPS64, StringBuilderLength);
+UNIMPLEMENTED_INTRINSIC(MIPS64, StringBuilderToString);
 
 // 1.8.
 UNIMPLEMENTED_INTRINSIC(MIPS64, UnsafeGetAndAddInt)
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 06ab46f..922c3bc 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -3331,6 +3331,12 @@
 
 UNIMPLEMENTED_INTRINSIC(X86, StringStringIndexOf);
 UNIMPLEMENTED_INTRINSIC(X86, StringStringIndexOfAfter);
+UNIMPLEMENTED_INTRINSIC(X86, StringBufferAppend);
+UNIMPLEMENTED_INTRINSIC(X86, StringBufferLength);
+UNIMPLEMENTED_INTRINSIC(X86, StringBufferToString);
+UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppend);
+UNIMPLEMENTED_INTRINSIC(X86, StringBuilderLength);
+UNIMPLEMENTED_INTRINSIC(X86, StringBuilderToString);
 
 // 1.8.
 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndAddInt)
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 2ea8670..05d270a 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -3000,6 +3000,12 @@
 
 UNIMPLEMENTED_INTRINSIC(X86_64, StringStringIndexOf);
 UNIMPLEMENTED_INTRINSIC(X86_64, StringStringIndexOfAfter);
+UNIMPLEMENTED_INTRINSIC(X86_64, StringBufferAppend);
+UNIMPLEMENTED_INTRINSIC(X86_64, StringBufferLength);
+UNIMPLEMENTED_INTRINSIC(X86_64, StringBufferToString);
+UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppend);
+UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderLength);
+UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderToString);
 
 // 1.8.
 UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndAddInt)
diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc
index edecf17..2856c3e 100644
--- a/compiler/optimizing/load_store_elimination.cc
+++ b/compiler/optimizing/load_store_elimination.cc
@@ -37,8 +37,13 @@
       : reference_(reference),
         position_(pos),
         is_singleton_(true),
-        is_singleton_and_non_escaping_(true) {
-    CalculateEscape(reference_, nullptr, &is_singleton_, &is_singleton_and_non_escaping_);
+        is_singleton_and_not_returned_(true),
+        is_singleton_and_not_deopt_visible_(true) {
+    CalculateEscape(reference_,
+                    nullptr,
+                    &is_singleton_,
+                    &is_singleton_and_not_returned_,
+                    &is_singleton_and_not_deopt_visible_);
   }
 
   HInstruction* GetReference() const {
@@ -59,19 +64,17 @@
   // Returns true if reference_ is a singleton and not returned to the caller or
   // used as an environment local of an HDeoptimize instruction.
   // The allocation and stores into reference_ may be eliminated for such cases.
-  bool IsSingletonAndNonEscaping() const {
-    return is_singleton_and_non_escaping_;
+  bool IsSingletonAndRemovable() const {
+    return is_singleton_and_not_returned_ && is_singleton_and_not_deopt_visible_;
   }
 
  private:
   HInstruction* const reference_;
-  const size_t position_;     // position in HeapLocationCollector's ref_info_array_.
-  bool is_singleton_;         // can only be referred to by a single name in the method.
+  const size_t position_;  // position in HeapLocationCollector's ref_info_array_.
 
-  // reference_ is singleton and does not escape in the end either by
-  // returning to the caller, or being used as an environment local of an
-  // HDeoptimize instruction.
-  bool is_singleton_and_non_escaping_;
+  bool is_singleton_;                        // can only be referred to by a single name in the method,
+  bool is_singleton_and_not_returned_;       // and not returned to caller,
+  bool is_singleton_and_not_deopt_visible_;  // and not used as an environment local of HDeoptimize.
 
   DISALLOW_COPY_AND_ASSIGN(ReferenceInfo);
 };
@@ -623,7 +626,7 @@
       bool from_all_predecessors = true;
       ReferenceInfo* ref_info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo();
       HInstruction* singleton_ref = nullptr;
-      if (ref_info->IsSingletonAndNonEscaping()) {
+      if (ref_info->IsSingletonAndRemovable()) {
         // We do more analysis of liveness when merging heap values for such
         // cases since stores into such references may potentially be eliminated.
         singleton_ref = ref_info->GetReference();
@@ -796,7 +799,7 @@
     } else if (index != nullptr) {
       // For array element, don't eliminate stores since it can be easily aliased
       // with non-constant index.
-    } else if (ref_info->IsSingletonAndNonEscaping()) {
+    } else if (ref_info->IsSingletonAndRemovable()) {
       // Store into a field of a singleton that's not returned. The value cannot be
       // killed due to aliasing/invocation. It can be redundant since future loads can
       // directly get the value set by this instruction. The value can still be killed due to
@@ -970,7 +973,7 @@
       // new_instance isn't used for field accesses. No need to process it.
       return;
     }
-    if (ref_info->IsSingletonAndNonEscaping() &&
+    if (ref_info->IsSingletonAndRemovable() &&
         !new_instance->IsFinalizable() &&
         !new_instance->NeedsAccessCheck()) {
       singleton_new_instances_.push_back(new_instance);
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 680381a..594255c 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -2543,6 +2543,8 @@
       return os << "BssEntry";
     case HLoadString::LoadKind::kDexCacheViaMethod:
       return os << "DexCacheViaMethod";
+    case HLoadString::LoadKind::kJitTableAddress:
+      return os << "JitTableAddress";
     default:
       LOG(FATAL) << "Unknown HLoadString::LoadKind: " << static_cast<int>(rhs);
       UNREACHABLE();
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index eebc49c..e3f4d8f 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -333,7 +333,8 @@
         cached_double_constants_(std::less<int64_t>(), arena->Adapter(kArenaAllocConstantsMap)),
         cached_current_method_(nullptr),
         inexact_object_rti_(ReferenceTypeInfo::CreateInvalid()),
-        osr_(osr) {
+        osr_(osr),
+        cha_single_implementation_list_(arena->Adapter(kArenaAllocCHA)) {
     blocks_.reserve(kDefaultNumberOfBlocks);
   }
 
@@ -536,6 +537,20 @@
 
   bool IsCompilingOsr() const { return osr_; }
 
+  ArenaSet<ArtMethod*>& GetCHASingleImplementationList() {
+    return cha_single_implementation_list_;
+  }
+
+  void AddCHASingleImplementationDependency(ArtMethod* method) {
+    cha_single_implementation_list_.insert(method);
+  }
+
+  bool HasShouldDeoptimizeFlag() const {
+    // TODO: if all CHA guards can be eliminated, there is no need for the flag
+    // even if cha_single_implementation_list_ is not empty.
+    return !cha_single_implementation_list_.empty();
+  }
+
   bool HasTryCatch() const { return has_try_catch_; }
   void SetHasTryCatch(bool value) { has_try_catch_ = value; }
 
@@ -672,6 +687,9 @@
   // compiled code entries which the interpreter can directly jump to.
   const bool osr_;
 
+  // List of methods that are assumed to have single implementation.
+  ArenaSet<ArtMethod*> cha_single_implementation_list_;
+
   friend class SsaBuilder;           // For caching constants.
   friend class SsaLivenessAnalysis;  // For the linear order.
   friend class HInliner;             // For the reverse post order.
@@ -1240,6 +1258,7 @@
   M(ClinitCheck, Instruction)                                           \
   M(Compare, BinaryOperation)                                           \
   M(CurrentMethod, Instruction)                                         \
+  M(ShouldDeoptimizeFlag, Instruction)                                  \
   M(Deoptimize, Instruction)                                            \
   M(Div, BinaryOperation)                                               \
   M(DivZeroCheck, Instruction)                                          \
@@ -2072,6 +2091,8 @@
 #undef INSTRUCTION_TYPE_CHECK
 
   // Returns whether the instruction can be moved within the graph.
+  // TODO: this method is used by LICM and GVN with possibly different
+  //       meanings? split and rename?
   virtual bool CanBeMoved() const { return false; }
 
   // Returns whether the two instructions are of the same kind.
@@ -2873,6 +2894,27 @@
   DISALLOW_COPY_AND_ASSIGN(HDeoptimize);
 };
 
+// Represents a should_deoptimize flag. Currently used for CHA-based devirtualization.
+// The compiled code checks this flag value in a guard before devirtualized call and
+// if it's true, starts to do deoptimization.
+// It has a 4-byte slot on stack.
+// TODO: allocate a register for this flag.
+class HShouldDeoptimizeFlag FINAL : public HExpression<0> {
+ public:
+  // TODO: use SideEffects to aid eliminating some CHA guards.
+  explicit HShouldDeoptimizeFlag(uint32_t dex_pc)
+      : HExpression(Primitive::kPrimInt, SideEffects::None(), dex_pc) {
+  }
+
+  // We don't eliminate CHA guards yet.
+  bool CanBeMoved() const OVERRIDE { return false; }
+
+  DECLARE_INSTRUCTION(ShouldDeoptimizeFlag);
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HShouldDeoptimizeFlag);
+};
+
 // Represents the ArtMethod that was passed as a first argument to
 // the method. It is used by instructions that depend on it, like
 // instructions that work with the dex cache.
@@ -3789,7 +3831,7 @@
 
   bool CanThrow() const OVERRIDE { return GetPackedFlag<kFlagCanThrow>(); }
 
-  bool CanBeMoved() const OVERRIDE { return IsIntrinsic(); }
+  bool CanBeMoved() const OVERRIDE { return IsIntrinsic() && !DoesAnyWrite(); }
 
   bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
     return intrinsic_ != Intrinsics::kNone && intrinsic_ == other->AsInvoke()->intrinsic_;
@@ -4181,6 +4223,19 @@
                 kVirtual),
         vtable_index_(vtable_index) {}
 
+  bool CanBeNull() const OVERRIDE {
+    switch (GetIntrinsic()) {
+      case Intrinsics::kThreadCurrentThread:
+      case Intrinsics::kStringBufferAppend:
+      case Intrinsics::kStringBufferToString:
+      case Intrinsics::kStringBuilderAppend:
+      case Intrinsics::kStringBuilderToString:
+        return false;
+      default:
+        return HInvoke::CanBeNull();
+    }
+  }
+
   bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE {
     // TODO: Add implicit null checks in intrinsics.
     return (obj == InputAt(0)) && !GetLocations()->Intrinsified();
@@ -5698,7 +5753,7 @@
   };
 
   HLoadString(HCurrentMethod* current_method,
-              uint32_t string_index,
+              dex::StringIndex string_index,
               const DexFile& dex_file,
               uint32_t dex_pc)
       : HInstruction(SideEffectsForArchRuntimeCalls(), dex_pc),
@@ -5717,7 +5772,7 @@
 
   void SetLoadKindWithStringReference(LoadKind load_kind,
                                       const DexFile& dex_file,
-                                      uint32_t string_index) {
+                                      dex::StringIndex string_index) {
     DCHECK(HasStringReference(load_kind));
     load_data_.dex_file_ = &dex_file;
     string_index_ = string_index;
@@ -5730,7 +5785,7 @@
 
   const DexFile& GetDexFile() const;
 
-  uint32_t GetStringIndex() const {
+  dex::StringIndex GetStringIndex() const {
     DCHECK(HasStringReference(GetLoadKind()) || /* For slow paths. */ !IsInDexCache());
     return string_index_;
   }
@@ -5744,7 +5799,7 @@
 
   bool InstructionDataEquals(const HInstruction* other) const OVERRIDE;
 
-  size_t ComputeHashCode() const OVERRIDE { return string_index_; }
+  size_t ComputeHashCode() const OVERRIDE { return string_index_.index_; }
 
   // Will call the runtime if we need to load the string through
   // the dex cache and the string is not guaranteed to be there yet.
@@ -5823,7 +5878,7 @@
 
   // String index serves also as the hash code and it's also needed for slow-paths,
   // so it must not be overwritten with other load data.
-  uint32_t string_index_;
+  dex::StringIndex string_index_;
 
   union {
     const DexFile* dex_file_;            // For string reference.
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 2382b72..8ea2b06 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -630,10 +630,8 @@
 #if defined(ART_ENABLE_CODEGEN_arm)
     case kThumb2:
     case kArm: {
-#ifndef ART_USE_VIXL_ARM_BACKEND
       arm::DexCacheArrayFixups* fixups =
           new (arena) arm::DexCacheArrayFixups(graph, codegen, stats);
-#endif
       arm::InstructionSimplifierArm* simplifier =
           new (arena) arm::InstructionSimplifierArm(graph, stats);
       SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph);
@@ -642,9 +640,7 @@
         simplifier,
         side_effects,
         gvn,
-#ifndef ART_USE_VIXL_ARM_BACKEND
         fixups
-#endif
       };
       RunOptimizations(arm_optimizations, arraysize(arm_optimizations), pass_observer);
       break;
@@ -1208,7 +1204,9 @@
       code_allocator.GetMemory().data(),
       code_allocator.GetSize(),
       osr,
-      roots);
+      roots,
+      codegen->GetGraph()->HasShouldDeoptimizeFlag(),
+      codegen->GetGraph()->GetCHASingleImplementationList());
 
   if (code == nullptr) {
     code_cache->ClearData(self, stack_map_data, roots_data);
diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h
index c8d1ce0..203b1ec 100644
--- a/compiler/optimizing/optimizing_compiler_stats.h
+++ b/compiler/optimizing/optimizing_compiler_stats.h
@@ -27,6 +27,7 @@
 
 enum MethodCompilationStat {
   kAttemptCompilation = 0,
+  kCHAInline,
   kCompiled,
   kInlinedInvoke,
   kReplacedInvokeWithSimplePattern,
@@ -106,6 +107,7 @@
     std::string name;
     switch (stat) {
       case kAttemptCompilation : name = "AttemptCompilation"; break;
+      case kCHAInline : name = "CHAInline"; break;
       case kCompiled : name = "Compiled"; break;
       case kInlinedInvoke : name = "InlinedInvoke"; break;
       case kReplacedInvokeWithSimplePattern: name = "ReplacedInvokeWithSimplePattern"; break;
diff --git a/compiler/optimizing/register_allocation_resolver.cc b/compiler/optimizing/register_allocation_resolver.cc
index 5991791..59523a9 100644
--- a/compiler/optimizing/register_allocation_resolver.cc
+++ b/compiler/optimizing/register_allocation_resolver.cc
@@ -87,6 +87,10 @@
       // Adjust the stack slot, now that we know the number of them for each type.
       // The way this implementation lays out the stack is the following:
       // [parameter slots       ]
+      // [art method (caller)   ]
+      // [entry spill (core)    ]
+      // [entry spill (float)   ]
+      // [should_deoptimize flag] (this is optional)
       // [catch phi spill slots ]
       // [double spill slots    ]
       // [long spill slots      ]
diff --git a/compiler/optimizing/register_allocator_graph_color.cc b/compiler/optimizing/register_allocator_graph_color.cc
index aa0d371..9064f86 100644
--- a/compiler/optimizing/register_allocator_graph_color.cc
+++ b/compiler/optimizing/register_allocator_graph_color.cc
@@ -1749,7 +1749,7 @@
 bool RegisterAllocatorGraphColor::IsCallerSave(size_t reg, bool processing_core_regs) {
   return processing_core_regs
       ? !codegen_->IsCoreCalleeSaveRegister(reg)
-      : !codegen_->IsCoreCalleeSaveRegister(reg);
+      : !codegen_->IsFloatingPointCalleeSaveRegister(reg);
 }
 
 static bool RegisterIsAligned(size_t reg) {
diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc
index a127708..daf160a 100644
--- a/compiler/optimizing/sharpening.cc
+++ b/compiler/optimizing/sharpening.cc
@@ -267,7 +267,7 @@
   DCHECK(!load_string->IsInDexCache());
 
   const DexFile& dex_file = load_string->GetDexFile();
-  uint32_t string_index = load_string->GetStringIndex();
+  dex::StringIndex string_index = load_string->GetStringIndex();
 
   HLoadString::LoadKind desired_load_kind = HLoadString::LoadKind::kDexCacheViaMethod;
   uint64_t address = 0u;  // String or dex cache element address.
diff --git a/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc b/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc
index fb6f172..2d026b8 100644
--- a/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc
+++ b/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc
@@ -428,8 +428,6 @@
   UNIMPLEMENTED(FATAL);
 }
 
-static constexpr uint32_t kArmInstrMaxSizeInBytes = 4;
-
 void ArmVIXLJNIMacroAssembler::CreateHandleScopeEntry(ManagedRegister mout_reg,
                                                       FrameOffset handle_scope_offset,
                                                       ManagedRegister min_reg,
@@ -458,14 +456,14 @@
     if (asm_.ShifterOperandCanHold(ADD, handle_scope_offset.Int32Value(), kCcDontCare)) {
       if (!out_reg.Equals(in_reg)) {
         AssemblerAccurateScope guard(asm_.GetVIXLAssembler(),
-                                     3 * kArmInstrMaxSizeInBytes,
+                                     3 * vixl32::kMaxInstructionSizeInBytes,
                                      CodeBufferCheckScope::kMaximumSize);
         ___ it(eq, 0xc);
         ___ mov(eq, out_reg.AsVIXLRegister(), 0);
         asm_.AddConstantInIt(out_reg.AsVIXLRegister(), sp, handle_scope_offset.Int32Value(), ne);
       } else {
         AssemblerAccurateScope guard(asm_.GetVIXLAssembler(),
-                                     2 * kArmInstrMaxSizeInBytes,
+                                     2 * vixl32::kMaxInstructionSizeInBytes,
                                      CodeBufferCheckScope::kMaximumSize);
         ___ it(ne, 0x8);
         asm_.AddConstantInIt(out_reg.AsVIXLRegister(), sp, handle_scope_offset.Int32Value(), ne);
@@ -496,7 +494,7 @@
 
     if (asm_.ShifterOperandCanHold(ADD, handle_scope_offset.Int32Value(), kCcDontCare)) {
       AssemblerAccurateScope guard(asm_.GetVIXLAssembler(),
-                                   2 * kArmInstrMaxSizeInBytes,
+                                   2 * vixl32::kMaxInstructionSizeInBytes,
                                    CodeBufferCheckScope::kMaximumSize);
       ___ it(ne, 0x8);
       asm_.AddConstantInIt(scratch.AsVIXLRegister(), sp, handle_scope_offset.Int32Value(), ne);
@@ -589,7 +587,7 @@
   ___ Cmp(scratch.AsVIXLRegister(), 0);
   {
     AssemblerAccurateScope guard(asm_.GetVIXLAssembler(),
-                                 kArmInstrMaxSizeInBytes,
+                                 vixl32::kMaxInstructionSizeInBytes,
                                  CodeBufferCheckScope::kMaximumSize);
     ___ b(ne, Narrow, exception_blocks_.back()->Entry());
   }
diff --git a/compiler/utils/atomic_method_ref_map-inl.h b/compiler/utils/atomic_method_ref_map-inl.h
new file mode 100644
index 0000000..70ea028
--- /dev/null
+++ b/compiler/utils/atomic_method_ref_map-inl.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_UTILS_ATOMIC_METHOD_REF_MAP_INL_H_
+#define ART_COMPILER_UTILS_ATOMIC_METHOD_REF_MAP_INL_H_
+
+#include "atomic_method_ref_map.h"
+
+#include "dex_file-inl.h"
+
+namespace art {
+
+template <typename T>
+inline typename AtomicMethodRefMap<T>::InsertResult AtomicMethodRefMap<T>::Insert(
+    MethodReference ref,
+    const T& expected,
+    const T& desired) {
+  ElementArray* const array = GetArray(ref.dex_file);
+  if (array == nullptr) {
+    return kInsertResultInvalidDexFile;
+  }
+  return (*array)[ref.dex_method_index].CompareExchangeStrongSequentiallyConsistent(
+      expected, desired)
+      ? kInsertResultSuccess
+      : kInsertResultCASFailure;
+}
+
+template <typename T>
+inline bool AtomicMethodRefMap<T>::Get(MethodReference ref, T* out) const {
+  const ElementArray* const array = GetArray(ref.dex_file);
+  if (array == nullptr) {
+    return kInsertResultInvalidDexFile;
+  }
+  *out = (*array)[ref.dex_method_index].LoadRelaxed();
+  return true;
+}
+
+template <typename T>
+inline void AtomicMethodRefMap<T>::AddDexFile(const DexFile* dex_file) {
+  arrays_.Put(dex_file, std::move(ElementArray(dex_file->NumMethodIds())));
+}
+
+template <typename T>
+inline typename AtomicMethodRefMap<T>::ElementArray* AtomicMethodRefMap<T>::GetArray(
+    const DexFile* dex_file) {
+  auto it = arrays_.find(dex_file);
+  return (it != arrays_.end()) ? &it->second : nullptr;
+}
+
+template <typename T>
+inline const typename AtomicMethodRefMap<T>::ElementArray* AtomicMethodRefMap<T>::GetArray(
+    const DexFile* dex_file) const {
+  auto it = arrays_.find(dex_file);
+  return (it != arrays_.end()) ? &it->second : nullptr;
+}
+
+template <typename T> template <typename Visitor>
+inline void AtomicMethodRefMap<T>::Visit(const Visitor& visitor) {
+  for (auto& pair : arrays_) {
+    const DexFile* dex_file = pair.first;
+    const ElementArray& elements = pair.second;
+    for (size_t i = 0; i < elements.size(); ++i) {
+      visitor(MethodReference(dex_file, i), elements[i].LoadRelaxed());
+    }
+  }
+}
+
+}  // namespace art
+
+#endif  // ART_COMPILER_UTILS_ATOMIC_METHOD_REF_MAP_INL_H_
diff --git a/compiler/utils/atomic_method_ref_map.h b/compiler/utils/atomic_method_ref_map.h
new file mode 100644
index 0000000..11ab211
--- /dev/null
+++ b/compiler/utils/atomic_method_ref_map.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_UTILS_ATOMIC_METHOD_REF_MAP_H_
+#define ART_COMPILER_UTILS_ATOMIC_METHOD_REF_MAP_H_
+
+#include "base/dchecked_vector.h"
+#include "method_reference.h"
+#include "safe_map.h"
+
+namespace art {
+
+class DexFile;
+
+// Used by CompilerCallbacks to track verification information from the Runtime.
+template <typename T>
+class AtomicMethodRefMap {
+ public:
+  explicit AtomicMethodRefMap() {}
+  ~AtomicMethodRefMap() {}
+
+  // Atomically swap the element in if the existing value matches expected.
+  enum InsertResult {
+    kInsertResultInvalidDexFile,
+    kInsertResultCASFailure,
+    kInsertResultSuccess,
+  };
+  InsertResult Insert(MethodReference ref, const T& expected, const T& desired);
+
+  // Retreive an item, returns false if the dex file is not added.
+  bool Get(MethodReference ref, T* out) const;
+
+  // Dex files must be added before method references belonging to them can be used as keys. Not
+  // thread safe.
+  void AddDexFile(const DexFile* dex_file);
+
+  bool HaveDexFile(const DexFile* dex_file) const {
+    return arrays_.find(dex_file) != arrays_.end();
+  }
+
+  // Visit all of the dex files and elements.
+  template <typename Visitor>
+  void Visit(const Visitor& visitor);
+
+ private:
+  // Verified methods. The method array is fixed to avoid needing a lock to extend it.
+  using ElementArray = dchecked_vector<Atomic<T>>;
+  using DexFileArrays = SafeMap<const DexFile*, ElementArray>;
+
+  const ElementArray* GetArray(const DexFile* dex_file) const;
+  ElementArray* GetArray(const DexFile* dex_file);
+
+  DexFileArrays arrays_;
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_UTILS_ATOMIC_METHOD_REF_MAP_H_
diff --git a/compiler/utils/atomic_method_ref_map_test.cc b/compiler/utils/atomic_method_ref_map_test.cc
new file mode 100644
index 0000000..9e5bf4b
--- /dev/null
+++ b/compiler/utils/atomic_method_ref_map_test.cc
@@ -0,0 +1,71 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "atomic_method_ref_map-inl.h"
+
+#include <memory>
+
+#include "common_runtime_test.h"
+#include "dex_file-inl.h"
+#include "method_reference.h"
+#include "scoped_thread_state_change-inl.h"
+
+namespace art {
+
+class AtomicMethodRefMapTest : public CommonRuntimeTest {};
+
+TEST_F(AtomicMethodRefMapTest, RunTests) {
+  ScopedObjectAccess soa(Thread::Current());
+  std::unique_ptr<const DexFile> dex(OpenTestDexFile("Interfaces"));
+  ASSERT_TRUE(dex != nullptr);
+  using Map = AtomicMethodRefMap<int>;
+  Map map;
+  int value = 123;
+  // Error case: Not already inserted.
+  EXPECT_FALSE(map.Get(MethodReference(dex.get(), 1), &value));
+  EXPECT_FALSE(map.HaveDexFile(dex.get()));
+  // Error case: Dex file not registered.
+  EXPECT_TRUE(map.Insert(MethodReference(dex.get(), 1), 0, 1) == Map::kInsertResultInvalidDexFile);
+  map.AddDexFile(dex.get());
+  EXPECT_TRUE(map.HaveDexFile(dex.get()));
+  EXPECT_GT(dex->NumMethodIds(), 10u);
+  // After we have added the get should succeed but return the default value.
+  EXPECT_TRUE(map.Get(MethodReference(dex.get(), 1), &value));
+  EXPECT_EQ(value, 0);
+  // Actually insert an item and make sure we can retreive it.
+  static const int kInsertValue = 44;
+  EXPECT_TRUE(map.Insert(MethodReference(dex.get(), 1), 0, kInsertValue) ==
+              Map::kInsertResultSuccess);
+  EXPECT_TRUE(map.Get(MethodReference(dex.get(), 1), &value));
+  EXPECT_EQ(value, kInsertValue);
+  static const int kInsertValue2 = 123;
+  EXPECT_TRUE(map.Insert(MethodReference(dex.get(), 2), 0, kInsertValue2) ==
+              Map::kInsertResultSuccess);
+  EXPECT_TRUE(map.Get(MethodReference(dex.get(), 1), &value));
+  EXPECT_EQ(value, kInsertValue);
+  EXPECT_TRUE(map.Get(MethodReference(dex.get(), 2), &value));
+  EXPECT_EQ(value, kInsertValue2);
+  // Error case: Incorrect expected value for CAS.
+  EXPECT_TRUE(map.Insert(MethodReference(dex.get(), 1), 0, kInsertValue + 1) ==
+      Map::kInsertResultCASFailure);
+  // Correctly overwrite the value and verify.
+  EXPECT_TRUE(map.Insert(MethodReference(dex.get(), 1), kInsertValue, kInsertValue + 1) ==
+      Map::kInsertResultSuccess);
+  EXPECT_TRUE(map.Get(MethodReference(dex.get(), 1), &value));
+  EXPECT_EQ(value, kInsertValue + 1);
+}
+
+}  // namespace art
diff --git a/compiler/utils/string_reference_test.cc b/compiler/utils/string_reference_test.cc
index 0fd9e5b..90335eb 100644
--- a/compiler/utils/string_reference_test.cc
+++ b/compiler/utils/string_reference_test.cc
@@ -18,6 +18,7 @@
 
 #include <memory>
 
+#include "dex_file_types.h"
 #include "gtest/gtest.h"
 #include "utils/test_dex_file_builder.h"
 
@@ -34,15 +35,15 @@
   builder1.AddString("String1");
   std::unique_ptr<const DexFile> dex_file1 = builder1.Build("dummy location 1");
   ASSERT_EQ(1u, dex_file1->NumStringIds());
-  ASSERT_STREQ("String1", dex_file1->GetStringData(dex_file1->GetStringId(0)));
-  StringReference sr1(dex_file1.get(), 0);
+  ASSERT_STREQ("String1", dex_file1->GetStringData(dex_file1->GetStringId(dex::StringIndex(0))));
+  StringReference sr1(dex_file1.get(), dex::StringIndex(0));
 
   TestDexFileBuilder builder2;
   builder2.AddString("String2");
   std::unique_ptr<const DexFile> dex_file2 = builder2.Build("dummy location 2");
   ASSERT_EQ(1u, dex_file2->NumStringIds());
-  ASSERT_STREQ("String2", dex_file2->GetStringData(dex_file2->GetStringId(0)));
-  StringReference sr2(dex_file2.get(), 0);
+  ASSERT_STREQ("String2", dex_file2->GetStringData(dex_file2->GetStringId(dex::StringIndex(0))));
+  StringReference sr2(dex_file2.get(), dex::StringIndex(0));
 
   StringReferenceValueComparator cmp;
   EXPECT_TRUE(cmp(sr1, sr2));  // "String1" < "String2" is true.
@@ -80,7 +81,8 @@
   std::unique_ptr<const DexFile> dex_file1 = builder1.Build("dummy location 1");
   ASSERT_EQ(arraysize(kDexFile1Strings), dex_file1->NumStringIds());
   for (size_t index = 0; index != arraysize(kDexFile1Strings); ++index) {
-    ASSERT_STREQ(kDexFile1Strings[index], dex_file1->GetStringData(dex_file1->GetStringId(index)));
+    ASSERT_STREQ(kDexFile1Strings[index],
+                 dex_file1->GetStringData(dex_file1->GetStringId(dex::StringIndex(index))));
   }
 
   TestDexFileBuilder builder2;
@@ -90,14 +92,15 @@
   std::unique_ptr<const DexFile> dex_file2 = builder2.Build("dummy location 1");
   ASSERT_EQ(arraysize(kDexFile2Strings), dex_file2->NumStringIds());
   for (size_t index = 0; index != arraysize(kDexFile2Strings); ++index) {
-    ASSERT_STREQ(kDexFile2Strings[index], dex_file2->GetStringData(dex_file2->GetStringId(index)));
+    ASSERT_STREQ(kDexFile2Strings[index],
+                 dex_file2->GetStringData(dex_file2->GetStringId(dex::StringIndex(index))));
   }
 
   StringReferenceValueComparator cmp;
   for (size_t index1 = 0; index1 != arraysize(kDexFile1Strings); ++index1) {
     for (size_t index2 = 0; index2 != arraysize(kDexFile2Strings); ++index2) {
-      StringReference sr1(dex_file1.get(), index1);
-      StringReference sr2(dex_file2.get(), index2);
+      StringReference sr1(dex_file1.get(), dex::StringIndex(index1));
+      StringReference sr2(dex_file2.get(), dex::StringIndex(index2));
       EXPECT_EQ(expectedCmp12[index1][index2], cmp(sr1, sr2)) << index1 << " " << index2;
       EXPECT_EQ(expectedCmp21[index2][index1], cmp(sr2, sr1)) << index1 << " " << index2;
     }
diff --git a/compiler/utils/test_dex_file_builder_test.cc b/compiler/utils/test_dex_file_builder_test.cc
index 922f8b1..c76739b 100644
--- a/compiler/utils/test_dex_file_builder_test.cc
+++ b/compiler/utils/test_dex_file_builder_test.cc
@@ -49,7 +49,8 @@
   };
   ASSERT_EQ(arraysize(expected_strings), dex_file->NumStringIds());
   for (size_t i = 0; i != arraysize(expected_strings); ++i) {
-    EXPECT_STREQ(expected_strings[i], dex_file->GetStringData(dex_file->GetStringId(i))) << i;
+    EXPECT_STREQ(expected_strings[i],
+                 dex_file->GetStringData(dex_file->GetStringId(dex::StringIndex(i)))) << i;
   }
 
   static const char* const expected_types[] = {
diff --git a/compiler/verifier_deps_test.cc b/compiler/verifier_deps_test.cc
index 525a2ee..90fe6da 100644
--- a/compiler/verifier_deps_test.cc
+++ b/compiler/verifier_deps_test.cc
@@ -460,20 +460,20 @@
   ScopedObjectAccess soa(Thread::Current());
   LoadDexFile(&soa);
 
-  uint32_t id_Main1 = verifier_deps_->GetIdFromString(*primary_dex_file_, "LMain;");
-  ASSERT_LT(id_Main1, primary_dex_file_->NumStringIds());
+  dex::StringIndex id_Main1 = verifier_deps_->GetIdFromString(*primary_dex_file_, "LMain;");
+  ASSERT_LT(id_Main1.index_, primary_dex_file_->NumStringIds());
   ASSERT_EQ("LMain;", verifier_deps_->GetStringFromId(*primary_dex_file_, id_Main1));
 
-  uint32_t id_Main2 = verifier_deps_->GetIdFromString(*primary_dex_file_, "LMain;");
-  ASSERT_LT(id_Main2, primary_dex_file_->NumStringIds());
+  dex::StringIndex id_Main2 = verifier_deps_->GetIdFromString(*primary_dex_file_, "LMain;");
+  ASSERT_LT(id_Main2.index_, primary_dex_file_->NumStringIds());
   ASSERT_EQ("LMain;", verifier_deps_->GetStringFromId(*primary_dex_file_, id_Main2));
 
-  uint32_t id_Lorem1 = verifier_deps_->GetIdFromString(*primary_dex_file_, "Lorem ipsum");
-  ASSERT_GE(id_Lorem1, primary_dex_file_->NumStringIds());
+  dex::StringIndex id_Lorem1 = verifier_deps_->GetIdFromString(*primary_dex_file_, "Lorem ipsum");
+  ASSERT_GE(id_Lorem1.index_, primary_dex_file_->NumStringIds());
   ASSERT_EQ("Lorem ipsum", verifier_deps_->GetStringFromId(*primary_dex_file_, id_Lorem1));
 
-  uint32_t id_Lorem2 = verifier_deps_->GetIdFromString(*primary_dex_file_, "Lorem ipsum");
-  ASSERT_GE(id_Lorem2, primary_dex_file_->NumStringIds());
+  dex::StringIndex id_Lorem2 = verifier_deps_->GetIdFromString(*primary_dex_file_, "Lorem ipsum");
+  ASSERT_GE(id_Lorem2.index_, primary_dex_file_->NumStringIds());
   ASSERT_EQ("Lorem ipsum", verifier_deps_->GetStringFromId(*primary_dex_file_, id_Lorem2));
 
   ASSERT_EQ(id_Main1, id_Main2);
@@ -1306,9 +1306,10 @@
     bool found = false;
     for (const auto& entry : deps->fields_) {
       if (!entry.IsResolved()) {
+        constexpr dex::StringIndex kStringIndexZero(0);  // We know there is a class there.
         deps->fields_.insert(VerifierDeps::FieldResolution(0 /* we know there is a field there */,
                                                            VerifierDeps::kUnresolvedMarker - 1,
-                                                           0  /* we know there is a class there */));
+                                                           kStringIndexZero));
         found = true;
         break;
       }
@@ -1341,7 +1342,7 @@
     VerifierDeps::DexFileDeps* deps = decoded_deps.GetDexFileDeps(*primary_dex_file_);
     bool found = false;
     for (const auto& entry : deps->fields_) {
-      static constexpr uint32_t kNewTypeIndex = 0;
+      constexpr dex::StringIndex kNewTypeIndex(0);
       if (entry.GetDeclaringClassIndex() != kNewTypeIndex) {
         deps->fields_.insert(VerifierDeps::FieldResolution(entry.GetDexFieldIndex(),
                                                            entry.GetAccessFlags(),
@@ -1384,9 +1385,10 @@
       std::set<VerifierDeps::MethodResolution>* methods = GetMethods(deps, resolution_kind);
       for (const auto& entry : *methods) {
         if (!entry.IsResolved()) {
+          constexpr dex::StringIndex kStringIndexZero(0);  // We know there is a class there.
           methods->insert(VerifierDeps::MethodResolution(0 /* we know there is a method there */,
                                                          VerifierDeps::kUnresolvedMarker - 1,
-                                                         0  /* we know there is a class there */));
+                                                         kStringIndexZero));
           found = true;
           break;
         }
@@ -1421,7 +1423,7 @@
       bool found = false;
       std::set<VerifierDeps::MethodResolution>* methods = GetMethods(deps, resolution_kind);
       for (const auto& entry : *methods) {
-        static constexpr uint32_t kNewTypeIndex = 0;
+        constexpr dex::StringIndex kNewTypeIndex(0);
         if (entry.IsResolved() && entry.GetDeclaringClassIndex() != kNewTypeIndex) {
           methods->insert(VerifierDeps::MethodResolution(entry.GetDexMethodIndex(),
                                                          entry.GetAccessFlags(),
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 9e6032f..264be99 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -1636,7 +1636,7 @@
                                         soa.Decode<mirror::ClassLoader>(class_loader_).Ptr())));
       // Pre-register dex files so that we can access verification results without locks during
       // compilation and verification.
-      verification_results_->PreRegisterDexFile(dex_file);
+      verification_results_->AddDexFile(dex_file);
     }
 
     return true;
@@ -1855,8 +1855,8 @@
           return false;
         }
 
-        // VDEX finalized, seek back to the beginning and write the header.
-        if (!oat_writers_[i]->WriteVdexHeader(vdex_out.get())) {
+        // VDEX finalized, seek back to the beginning and write checksums and the header.
+        if (!oat_writers_[i]->WriteChecksumsAndVdexHeader(vdex_out.get())) {
           LOG(ERROR) << "Failed to write vdex header into VDEX " << vdex_file->GetPath();
           return false;
         }
diff --git a/dex2oat/dex2oat_test.cc b/dex2oat/dex2oat_test.cc
index 714a58c..b6b62a8 100644
--- a/dex2oat/dex2oat_test.cc
+++ b/dex2oat/dex2oat_test.cc
@@ -26,6 +26,7 @@
 #include "base/stringprintf.h"
 #include "dex_file-inl.h"
 #include "dex2oat_environment_test.h"
+#include "jit/offline_profiling_info.h"
 #include "oat.h"
 #include "oat_file.h"
 #include "utils.h"
@@ -552,26 +553,6 @@
   RunTest(CompilerFilter::kSpeed, true, { "--very-large-app-threshold=100" });
 }
 
-static const char kDexFileLayoutInputProfile[] = "cHJvADAwMgABAAwAAQABAOqMEeFEZXhOb09hdC5qYXIBAAEA";
-
-static void WriteFileBase64(const char* base64, const char* location) {
-  // Decode base64.
-  CHECK(base64 != nullptr);
-  size_t length;
-  std::unique_ptr<uint8_t[]> bytes(DecodeBase64(base64, &length));
-  CHECK(bytes.get() != nullptr);
-
-  // Write to provided file.
-  std::unique_ptr<File> file(OS::CreateEmptyFile(location));
-  CHECK(file.get() != nullptr);
-  if (!file->WriteFully(bytes.get(), length)) {
-    PLOG(FATAL) << "Failed to write base64 as file";
-  }
-  if (file->FlushCloseOrErase() != 0) {
-    PLOG(FATAL) << "Could not flush and close test file.";
-  }
-}
-
 class Dex2oatLayoutTest : public Dex2oatTest {
  protected:
   void CheckFilter(CompilerFilter::Filter input ATTRIBUTE_UNUSED,
@@ -579,13 +560,34 @@
     // Ignore, we'll do our own checks.
   }
 
+  // Emits a profile with a single dex file with the given location and a single class index of 1.
+  void GenerateProfile(const std::string& test_profile,
+                       const std::string& dex_location,
+                       uint32_t checksum) {
+    int profile_test_fd = open(test_profile.c_str(), O_CREAT | O_TRUNC | O_WRONLY, 0644);
+    CHECK_GE(profile_test_fd, 0);
+
+    ProfileCompilationInfo info;
+    std::string profile_key = ProfileCompilationInfo::GetProfileDexFileKey(dex_location);
+    info.AddClassIndex(profile_key, checksum, dex::TypeIndex(1));
+    bool result = info.Save(profile_test_fd);
+    close(profile_test_fd);
+    ASSERT_TRUE(result);
+  }
+
   void RunTest() {
     std::string dex_location = GetScratchDir() + "/DexNoOat.jar";
     std::string profile_location = GetScratchDir() + "/primary.prof";
     std::string odex_location = GetOdexDir() + "/DexOdexNoOat.odex";
 
     Copy(GetDexSrc2(), dex_location);
-    WriteFileBase64(kDexFileLayoutInputProfile, profile_location.c_str());
+    const char* location = dex_location.c_str();
+    std::string error_msg;
+    std::vector<std::unique_ptr<const DexFile>> dex_files;
+    ASSERT_TRUE(DexFile::Open(location, location, true, &error_msg, &dex_files));
+    EXPECT_EQ(dex_files.size(), 1U);
+    std::unique_ptr<const DexFile>& dex_file = dex_files[0];
+    GenerateProfile(profile_location, dex_location, dex_file->GetLocationChecksum());
 
     const std::vector<std::string>& extra_args = { "--profile-file=" + profile_location };
     GenerateOdexForTest(dex_location, odex_location, CompilerFilter::kLayoutProfile, extra_args);
diff --git a/dexdump/dexdump.cc b/dexdump/dexdump.cc
index 03d6227..916984c 100644
--- a/dexdump/dexdump.cc
+++ b/dexdump/dexdump.cc
@@ -475,9 +475,9 @@
     case DexFile::kDexAnnotationString: {
       const u4 idx = static_cast<u4>(readVarWidth(data, arg, false));
       if (gOptions.outputFormat == OUTPUT_PLAIN) {
-        dumpEscapedString(pDexFile->StringDataByIdx(idx));
+        dumpEscapedString(pDexFile->StringDataByIdx(dex::StringIndex(idx)));
       } else {
-        dumpXmlAttribute(pDexFile->StringDataByIdx(idx));
+        dumpXmlAttribute(pDexFile->StringDataByIdx(dex::StringIndex(idx)));
       }
       break;
     }
@@ -518,7 +518,7 @@
       for (u4 i = 0; i < size; i++) {
         const u4 name_idx = DecodeUnsignedLeb128(data);
         fputc(' ', gOutFile);
-        fputs(pDexFile->StringDataByIdx(name_idx), gOutFile);
+        fputs(pDexFile->StringDataByIdx(dex::StringIndex(name_idx)), gOutFile);
         fputc('=', gOutFile);
         dumpEncodedValue(pDexFile, data);
       }
@@ -599,7 +599,7 @@
   fprintf(gOutFile, "superclass_idx      : %d\n", pClassDef.superclass_idx_.index_);
   fprintf(gOutFile, "interfaces_off      : %d (0x%06x)\n",
           pClassDef.interfaces_off_, pClassDef.interfaces_off_);
-  fprintf(gOutFile, "source_file_idx     : %d\n", pClassDef.source_file_idx_);
+  fprintf(gOutFile, "source_file_idx     : %d\n", pClassDef.source_file_idx_.index_);
   fprintf(gOutFile, "annotations_off     : %d (0x%06x)\n",
           pClassDef.annotations_off_, pClassDef.annotations_off_);
   fprintf(gOutFile, "class_data_off      : %d (0x%06x)\n",
@@ -842,7 +842,7 @@
       break;
     case Instruction::kIndexStringRef:
       if (index < pDexFile->GetHeader().string_ids_size_) {
-        const char* st = pDexFile->StringDataByIdx(index);
+        const char* st = pDexFile->StringDataByIdx(dex::StringIndex(index));
         outSize = snprintf(buf.get(), bufSize, "\"%s\" // string@%0*x", st, width, index);
       } else {
         outSize = snprintf(buf.get(), bufSize, "<string?> // string@%0*x", width, index);
@@ -1564,13 +1564,13 @@
   // End of class.
   if (gOptions.outputFormat == OUTPUT_PLAIN) {
     const char* fileName;
-    if (pClassDef.source_file_idx_ != DexFile::kDexNoIndex) {
+    if (pClassDef.source_file_idx_.IsValid()) {
       fileName = pDexFile->StringDataByIdx(pClassDef.source_file_idx_);
     } else {
       fileName = "unknown";
     }
     fprintf(gOutFile, "  source_file_idx   : %d (%s)\n\n",
-            pClassDef.source_file_idx_, fileName);
+            pClassDef.source_file_idx_.index_, fileName);
   } else if (gOptions.outputFormat == OUTPUT_XML) {
     fprintf(gOutFile, "</class>\n");
   }
diff --git a/dexlayout/dex_ir.cc b/dexlayout/dex_ir.cc
index fe2bcce..b1e66be 100644
--- a/dexlayout/dex_ir.cc
+++ b/dexlayout/dex_ir.cc
@@ -340,7 +340,7 @@
 }
 
 void Collections::CreateStringId(const DexFile& dex_file, uint32_t i) {
-  const DexFile::StringId& disk_string_id = dex_file.GetStringId(i);
+  const DexFile::StringId& disk_string_id = dex_file.GetStringId(dex::StringIndex(i));
   StringData* string_data = new StringData(dex_file.GetStringData(disk_string_id));
   string_datas_.AddItem(string_data, disk_string_id.string_data_off_);
 
@@ -350,7 +350,7 @@
 
 void Collections::CreateTypeId(const DexFile& dex_file, uint32_t i) {
   const DexFile::TypeId& disk_type_id = dex_file.GetTypeId(dex::TypeIndex(i));
-  TypeId* type_id = new TypeId(GetStringId(disk_type_id.descriptor_idx_));
+  TypeId* type_id = new TypeId(GetStringId(disk_type_id.descriptor_idx_.index_));
   type_ids_.AddIndexedItem(type_id, TypeIdsOffset() + i * TypeId::ItemSize(), i);
 }
 
@@ -359,7 +359,7 @@
   const DexFile::TypeList* type_list = dex_file.GetProtoParameters(disk_proto_id);
   TypeList* parameter_type_list = CreateTypeList(type_list, disk_proto_id.parameters_off_);
 
-  ProtoId* proto_id = new ProtoId(GetStringId(disk_proto_id.shorty_idx_),
+  ProtoId* proto_id = new ProtoId(GetStringId(disk_proto_id.shorty_idx_.index_),
                                   GetTypeId(disk_proto_id.return_type_idx_.index_),
                                   parameter_type_list);
   proto_ids_.AddIndexedItem(proto_id, ProtoIdsOffset() + i * ProtoId::ItemSize(), i);
@@ -369,7 +369,7 @@
   const DexFile::FieldId& disk_field_id = dex_file.GetFieldId(i);
   FieldId* field_id = new FieldId(GetTypeId(disk_field_id.class_idx_.index_),
                                   GetTypeId(disk_field_id.type_idx_.index_),
-                                  GetStringId(disk_field_id.name_idx_));
+                                  GetStringId(disk_field_id.name_idx_.index_));
   field_ids_.AddIndexedItem(field_id, FieldIdsOffset() + i * FieldId::ItemSize(), i);
 }
 
@@ -377,7 +377,7 @@
   const DexFile::MethodId& disk_method_id = dex_file.GetMethodId(i);
   MethodId* method_id = new MethodId(GetTypeId(disk_method_id.class_idx_.index_),
                                      GetProtoId(disk_method_id.proto_idx_),
-                                     GetStringId(disk_method_id.name_idx_));
+                                     GetStringId(disk_method_id.name_idx_.index_));
   method_ids_.AddIndexedItem(method_id, MethodIdsOffset() + i * MethodId::ItemSize(), i);
 }
 
@@ -390,7 +390,7 @@
   const DexFile::TypeList* type_list = dex_file.GetInterfacesList(disk_class_def);
   TypeList* interfaces_type_list = CreateTypeList(type_list, disk_class_def.interfaces_off_);
 
-  const StringId* source_file = GetStringIdOrNullPtr(disk_class_def.source_file_idx_);
+  const StringId* source_file = GetStringIdOrNullPtr(disk_class_def.source_file_idx_.index_);
   // Annotations.
   AnnotationsDirectoryItem* annotations = nullptr;
   const DexFile::AnnotationsDirectoryItem* disk_annotations_directory_item =
diff --git a/dexlist/dexlist.cc b/dexlist/dexlist.cc
index 68473c4..efe1aad 100644
--- a/dexlist/dexlist.cc
+++ b/dexlist/dexlist.cc
@@ -140,7 +140,7 @@
   const DexFile::ClassDef& pClassDef = pDexFile->GetClassDef(idx);
 
   const char* fileName;
-  if (pClassDef.source_file_idx_ == DexFile::kDexNoIndex) {
+  if (!pClassDef.source_file_idx_.IsValid()) {
     fileName = nullptr;
   } else {
     fileName = pDexFile->StringDataByIdx(pClassDef.source_file_idx_);
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index 3ad0f1e..80c7113 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -703,13 +703,13 @@
         const Instruction* inst = Instruction::At(code_ptr);
         switch (inst->Opcode()) {
           case Instruction::CONST_STRING: {
-            const uint32_t string_index = inst->VRegB_21c();
+            const dex::StringIndex string_index(inst->VRegB_21c());
             unique_string_ids_from_code_.insert(StringReference(&dex_file, string_index));
             ++num_string_ids_from_code_;
             break;
           }
           case Instruction::CONST_STRING_JUMBO: {
-            const uint32_t string_index = inst->VRegB_31c();
+            const dex::StringIndex string_index(inst->VRegB_31c());
             unique_string_ids_from_code_.insert(StringReference(&dex_file, string_index));
             ++num_string_ids_from_code_;
             break;
@@ -1053,7 +1053,8 @@
       if (options_.absolute_addresses_) {
         vios->Stream() << StringPrintf("%p ", oat_method.GetVmapTable());
       }
-      uint32_t vmap_table_offset = method_header == nullptr ? 0 : method_header->vmap_table_offset_;
+      uint32_t vmap_table_offset = method_header ==
+          nullptr ? 0 : method_header->GetVmapTableOffset();
       vios->Stream() << StringPrintf("(offset=0x%08x)\n", vmap_table_offset);
 
       size_t vmap_table_offset_limit =
@@ -1603,7 +1604,7 @@
       // Mark dex caches.
       dex_caches_.clear();
       {
-        ReaderMutexLock mu(self, *class_linker->DexLock());
+        ReaderMutexLock mu(self, *Locks::dex_lock_);
         for (const ClassLinker::DexCacheData& data : class_linker->GetDexCachesData()) {
           ObjPtr<mirror::DexCache> dex_cache =
               ObjPtr<mirror::DexCache>::DownCast(self->DecodeJObject(data.weak_root));
diff --git a/runtime/Android.bp b/runtime/Android.bp
index c6f479f..08be5b2 100644
--- a/runtime/Android.bp
+++ b/runtime/Android.bp
@@ -45,6 +45,7 @@
         "base/timing_logger.cc",
         "base/unix_file/fd_file.cc",
         "base/unix_file/random_access_file_utils.cc",
+        "cha.cc",
         "check_jni.cc",
         "class_linker.cc",
         "class_table.cc",
@@ -514,6 +515,7 @@
         "base/transform_iterator_test.cc",
         "base/variant_map_test.cc",
         "base/unix_file/fd_file_test.cc",
+        "cha_test.cc",
         "class_linker_test.cc",
         "compiler_filter_test.cc",
         "dex_file_test.cc",
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index 3a83eaf..a71ab4b 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -1109,62 +1109,7 @@
      */
 
 ENTRY art_quick_resolve_string
-    push   {r10-r12, lr}
-    .cfi_adjust_cfa_offset 16
-    .cfi_rel_offset r10, 0
-    .cfi_rel_offset r11, 4
-    .cfi_rel_offset ip, 8
-    .cfi_rel_offset lr, 12
-    ldr    r10, [sp, #16]                                        @ load referrer
-    ldr    r10, [r10, #ART_METHOD_DECLARING_CLASS_OFFSET]        @ load declaring class
-    ldr    r10, [r10, #DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET] @ load string dex cache
-    ubfx   r11, r0, #0, #STRING_DEX_CACHE_HASH_BITS
-    add    r10, r10, r11, LSL #STRING_DEX_CACHE_ELEMENT_SIZE_SHIFT
-    ldrd   r10, r11, [r10]                               @ load index into r11 and pointer into r10
-    cmp    r0, r11
-    bne    .Lart_quick_resolve_string_slow_path
-#ifdef USE_READ_BARRIER
-    ldr    r0, [rSELF, #THREAD_IS_GC_MARKING_OFFSET]
-    cbnz   r0, .Lart_quick_resolve_string_marking
-.Lart_quick_resolve_string_no_rb:
-#endif
-    mov    r0, r10
-    pop    {r10-r12, pc}
-
-#ifdef USE_READ_BARRIER
-// GC is marking case, need to check the mark bit.
-.Lart_quick_resolve_string_marking:
-    ldr    r0, [r10, MIRROR_OBJECT_LOCK_WORD_OFFSET]
-    lsrs   r0, #(LOCK_WORD_MARK_BIT_SHIFT + 1)
-    bcs    .Lart_quick_resolve_string_no_rb
-    mov    r0, r10
-    .cfi_remember_state
-    pop    {r10-r12, lr}
-    .cfi_adjust_cfa_offset -16
-    .cfi_restore r10
-    .cfi_restore r11
-    .cfi_restore r12
-    .cfi_restore lr
-    // Note: art_quick_read_barrier_mark_reg00 clobbers IP but the .Lslow_rb_* does not.
-    b      .Lslow_rb_art_quick_read_barrier_mark_reg00  @ Get the marked string back.
-    .cfi_restore_state
-#endif
-
-// Slow path case, the index did not match
-.Lart_quick_resolve_string_slow_path:
-    push {r0-r9}                  @ 10 words of callee saves and args; {r10-r12, lr} already saved.
-    .cfi_adjust_cfa_offset 40
-    .cfi_rel_offset r0, 0
-    .cfi_rel_offset r1, 4
-    .cfi_rel_offset r2, 8
-    .cfi_rel_offset r3, 12
-    .cfi_rel_offset r4, 16
-    .cfi_rel_offset r5, 20
-    .cfi_rel_offset r6, 24
-    .cfi_rel_offset r7, 28
-    .cfi_rel_offset r8, 32
-    .cfi_rel_offset r9, 36
-    SETUP_SAVE_EVERYTHING_FRAME_CORE_REGS_SAVED r1   @ save callee saves in case of GC
+    SETUP_SAVE_EVERYTHING_FRAME r1                   @ save everything in case of GC
     mov    r1, r9                                    @ pass Thread::Current
     bl     artResolveStringFromCode                  @ (uint32_t type_idx, Thread*)
     cbz    r0, 1f                                    @ If result is null, deliver the OOME.
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 73bca03..b88515f 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -1317,6 +1317,7 @@
     RESTORE_TWO_REGS_DECREASE_FRAME x0, x1, 32
     ret
     .cfi_restore_state                // Reset unwind info so following code unwinds.
+    .cfi_def_cfa_offset 32            // workaround for clang bug: 31975598
 
 .Lthrow_class_cast_exception:
     // Restore
@@ -1484,6 +1485,7 @@
     strb w3, [x3, x0]
     ret
     .cfi_restore_state            // Reset unwind info so following code unwinds.
+    .cfi_def_cfa_offset 32        // workaround for clang bug: 31975598
 .Lthrow_array_store_exception:
     RESTORE_TWO_REGS x2, xLR, 16
     RESTORE_TWO_REGS_DECREASE_FRAME x0, x1, 32
@@ -1651,44 +1653,7 @@
      */
 
 ENTRY art_quick_resolve_string
-    SAVE_TWO_REGS_INCREASE_FRAME x29, xLR, 2 * __SIZEOF_POINTER__
-    ldr   x29, [sp, #(2 * __SIZEOF_POINTER__)]                   // load referrer
-    ldr   w29, [x29, #ART_METHOD_DECLARING_CLASS_OFFSET]         // load declaring class
-    ldr   x29, [x29, #DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET]  // load string dex cache
-    ubfx  lr, x0, #0, #STRING_DEX_CACHE_HASH_BITS                // get masked string index into LR
-    ldr   x29, [x29, lr, lsl #STRING_DEX_CACHE_ELEMENT_SIZE_SHIFT]  // load dex cache pair into x29
-    cmp   x0, x29, lsr #32                                       // compare against upper 32 bits
-    bne   .Lart_quick_resolve_string_slow_path
-    ubfx  x0, x29, #0, #32                                       // extract lower 32 bits into x0
-#ifdef USE_READ_BARRIER
-    // Most common case: GC is not marking.
-    ldr    w29, [xSELF, #THREAD_IS_GC_MARKING_OFFSET]
-    cbnz   x29, .Lart_quick_resolve_string_marking
-.Lart_quick_resolve_string_no_rb:
-#endif
-    .cfi_remember_state
-    RESTORE_TWO_REGS_DECREASE_FRAME x29, xLR, 2 * __SIZEOF_POINTER__
-    ret
-    .cfi_restore_state
-    .cfi_def_cfa_offset 16                          // workaround for clang bug: 31975598
-
-#ifdef USE_READ_BARRIER
-// GC is marking case, need to check the mark bit.
-.Lart_quick_resolve_string_marking:
-    ldr   x29, [x0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
-    tbnz  x29, #LOCK_WORD_MARK_BIT_SHIFT, .Lart_quick_resolve_string_no_rb
-    .cfi_remember_state
-    RESTORE_TWO_REGS_DECREASE_FRAME x29, xLR, 2 * __SIZEOF_POINTER__
-    // Note: art_quick_read_barrier_mark_reg00 clobbers IP0 but the .Lslow_rb_* does not.
-    b     .Lslow_rb_art_quick_read_barrier_mark_reg00  // Get the marked string back.
-    .cfi_restore_state
-    .cfi_def_cfa_offset 16                          // workaround for clang bug: 31975598
-#endif
-
-// Slow path case, the index did not match.
-.Lart_quick_resolve_string_slow_path:
-    INCREASE_FRAME (FRAME_SIZE_SAVE_EVERYTHING - 2 * __SIZEOF_POINTER__)
-    SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP_SKIP_X29_LR  // save callee saves in case of GC
+    SETUP_SAVE_EVERYTHING_FRAME                     // save everything for stack crawl
     mov   x1, xSELF                                 // pass Thread::Current
     bl    artResolveStringFromCode                  // (int32_t string_idx, Thread* self)
     cbz   w0, 1f                                    // If result is null, deliver the OOME.
diff --git a/runtime/arch/mips/entrypoints_init_mips.cc b/runtime/arch/mips/entrypoints_init_mips.cc
index 6a442a5..5c56923 100644
--- a/runtime/arch/mips/entrypoints_init_mips.cc
+++ b/runtime/arch/mips/entrypoints_init_mips.cc
@@ -71,7 +71,7 @@
   jpoints->pDlsymLookup = art_jni_dlsym_lookup_stub;
 
   // Alloc
-  ResetQuickAllocEntryPoints(qpoints);
+  ResetQuickAllocEntryPoints(qpoints, /*is_marking*/ false);
 
   // Cast
   qpoints->pInstanceofNonTrivial = artInstanceOfFromCode;
diff --git a/runtime/arch/quick_alloc_entrypoints.S b/runtime/arch/quick_alloc_entrypoints.S
index fa86bf4..db2fdca 100644
--- a/runtime/arch/quick_alloc_entrypoints.S
+++ b/runtime/arch/quick_alloc_entrypoints.S
@@ -107,7 +107,28 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB)
 .endm
 
+.macro GENERATE_ALLOC_ENTRYPOINTS_FOR_TLAB_ALLOCATOR
+// This is to be separately defined for each architecture to allow a hand-written assembly fast path.
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_tlab, TLAB)
+.endm
+
 .macro GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_REGION_TLAB_ALLOCATORS
+GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_TLAB_ALLOCATORS
+GENERATE_ALLOC_ENTRYPOINTS_FOR_TLAB_ALLOCATOR
+.endm
+
+.macro GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_TLAB_ALLOCATORS
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_dlmalloc, DlMalloc)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_dlmalloc, DlMalloc)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_dlmalloc, DlMalloc)
@@ -187,20 +208,6 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_bump_pointer_instrumented, BumpPointerInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_bump_pointer_instrumented, BumpPointerInstrumented)
 
-// This is to be separately defined for each architecture to allow a hand-written assembly fast path.
-// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_tlab, TLAB)
-
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab_instrumented, TLABInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab_instrumented, TLABInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab_instrumented, TLABInstrumented)
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index fb405fa..c6f4c03 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -1085,15 +1085,12 @@
     RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER             // return or deliver exception
 END_MACRO
 
-// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB).
+// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB). May be called
+// for CC if the GC is not marking.
 DEFINE_FUNCTION art_quick_alloc_object_tlab
     // Fast path tlab allocation.
     // EAX: uint32_t type_idx/return value, ECX: ArtMethod*.
     // EBX, EDX: free.
-#if defined(USE_READ_BARRIER)
-    int3
-    int3
-#endif
     PUSH esi
     PUSH edi
     movl ART_METHOD_DEX_CACHE_TYPES_OFFSET_32(%ecx), %edx   // Load dex cache resolved types array
@@ -1151,51 +1148,17 @@
 END_FUNCTION art_quick_alloc_object_region_tlab
 
 DEFINE_FUNCTION art_quick_resolve_string
-    PUSH edi
-    PUSH esi
-    // Save xmm0 at an aligned address on the stack.
-    subl MACRO_LITERAL(12), %esp
-    CFI_ADJUST_CFA_OFFSET(12)
-    movsd %xmm0, 0(%esp)
-    movl 24(%esp), %edi                                          // get referrer
-    movl ART_METHOD_DECLARING_CLASS_OFFSET(%edi), %edi           // get declaring class
-    movl DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET(%edi), %edi    // get string dex cache
-    movl LITERAL(STRING_DEX_CACHE_SIZE_MINUS_ONE), %esi
-    andl %eax, %esi
-    movlps (%edi, %esi, STRING_DEX_CACHE_ELEMENT_SIZE), %xmm0    // load string idx and ptr to xmm0
-    movd %xmm0, %edi                                             // extract pointer
-    pshufd LITERAL(0x55), %xmm0, %xmm0                           // shuffle index into lowest bits
-    movd %xmm0, %esi                                             // extract index
-    // Restore xmm0 and remove it together with padding from the stack.
-    movsd 0(%esp), %xmm0
-    addl MACRO_LITERAL(12), %esp
-    CFI_ADJUST_CFA_OFFSET(-12)
-    cmp %esi, %eax
-    jne .Lart_quick_resolve_string_slow_path
-    movl %edi, %eax
-    CFI_REMEMBER_STATE
-    POP esi
-    POP edi
-#ifdef USE_READ_BARRIER
-    cmpl LITERAL(0), %fs:THREAD_IS_GC_MARKING_OFFSET
-    jne .Lnot_null_art_quick_read_barrier_mark_reg00
-#endif
-    ret
-    CFI_RESTORE_STATE
-    CFI_DEF_CFA(esp, 24)                          // workaround for clang bug: 31975598
-
-.Lart_quick_resolve_string_slow_path:
+    SETUP_SAVE_EVERYTHING_FRAME ebx, ebx
     // Outgoing argument set up
-    SETUP_SAVE_EVERYTHING_FRAME_EDI_ESI_SAVED ebx, ebx
-    subl LITERAL(8), %esp                                        // push padding
+    subl LITERAL(8), %esp                                 // push padding
     CFI_ADJUST_CFA_OFFSET(8)
-    pushl %fs:THREAD_SELF_OFFSET                                 // pass Thread::Current()
+    pushl %fs:THREAD_SELF_OFFSET                          // pass Thread::Current()
     CFI_ADJUST_CFA_OFFSET(4)
-    PUSH eax                                                     // pass arg1
+    PUSH eax                                              // pass arg1
     call SYMBOL(artResolveStringFromCode)
-    addl LITERAL(16), %esp                                       // pop arguments
+    addl LITERAL(16), %esp                                // pop arguments
     CFI_ADJUST_CFA_OFFSET(-16)
-    testl %eax, %eax                                        // If result is null, deliver the OOME.
+    testl %eax, %eax                                      // If result is null, deliver the OOME.
     jz 1f
     CFI_REMEMBER_STATE
     RESTORE_SAVE_EVERYTHING_FRAME_KEEP_EAX
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 860b77e..4c46b08 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -18,6 +18,13 @@
 
 #include "arch/quick_alloc_entrypoints.S"
 
+MACRO0(ASSERT_USE_READ_BARRIER)
+#if !defined(USE_READ_BARRIER)
+    int3
+    int3
+#endif
+END_MACRO
+
 MACRO0(SETUP_FP_CALLEE_SAVE_FRAME)
     // Create space for ART FP callee-saved registers
     subq MACRO_LITERAL(4 * 8), %rsp
@@ -972,8 +979,10 @@
 END_MACRO
 
 // Generate the allocation entrypoints for each allocator.
-GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_REGION_TLAB_ALLOCATORS
+GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_TLAB_ALLOCATORS
+
 // Comment out allocators that have x86_64 specific asm.
+// Region TLAB:
 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB)
 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB)
 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB)
@@ -986,6 +995,19 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab, RegionTLAB)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB)
+// Normal TLAB:
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB)
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB)
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab, TLAB)
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_tlab, TLAB)
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_tlab, TLAB)
 
 // A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc).
 DEFINE_FUNCTION art_quick_alloc_object_rosalloc
@@ -1162,16 +1184,11 @@
     RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER                    // return or deliver exception
 END_MACRO
 
-// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB).
+// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB). May be
+// called with CC if the GC is not active.
 DEFINE_FUNCTION art_quick_alloc_object_tlab
-    // Fast path tlab allocation.
     // RDI: uint32_t type_idx, RSI: ArtMethod*
     // RDX, RCX, R8, R9: free. RAX: return val.
-#if defined(USE_READ_BARRIER)
-    int3
-    int3
-#endif
-    // Might need a special macro since rsi and edx is 32b/64b mismatched.
     movq ART_METHOD_DEX_CACHE_TYPES_OFFSET_64(%rsi), %rdx  // Load dex cache resolved types array
     // Might need to break down into multiple instructions to get the base address in a register.
                                                                // Load the class
@@ -1181,29 +1198,69 @@
     ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeTLAB
 END_FUNCTION art_quick_alloc_object_tlab
 
+// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB). May be
+// called with CC if the GC is not active.
+DEFINE_FUNCTION art_quick_alloc_object_resolved_tlab
+    // RDI: mirror::Class* klass, RSI: ArtMethod*
+    // RDX, RCX, R8, R9: free. RAX: return val.
+    movq %rdi, %rdx
+    ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lart_quick_alloc_object_resolved_tlab_slow_path
+.Lart_quick_alloc_object_resolved_tlab_slow_path:
+    ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeResolvedTLAB
+END_FUNCTION art_quick_alloc_object_resolved_tlab
+
+// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB).
+// May be called with CC if the GC is not active.
+DEFINE_FUNCTION art_quick_alloc_object_initialized_tlab
+    // RDI: mirror::Class* klass, RSI: ArtMethod*
+    // RDX, RCX, R8, R9: free. RAX: return val.
+    movq %rdi, %rdx
+    ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH .Lart_quick_alloc_object_initialized_tlab_slow_path
+.Lart_quick_alloc_object_initialized_tlab_slow_path:
+    ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeInitializedTLAB
+END_FUNCTION art_quick_alloc_object_initialized_tlab
+
+// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_tlab, TLAB).
+DEFINE_FUNCTION art_quick_alloc_array_tlab
+    // RDI: uint32_t type_idx, RSI: int32_t component_count, RDX: ArtMethod*
+    // RCX: klass, R8, R9: free. RAX: return val.
+    movq ART_METHOD_DEX_CACHE_TYPES_OFFSET_64(%rdx), %rcx      // Load dex cache resolved types array
+    movl 0(%rcx, %rdi, COMPRESSED_REFERENCE_SIZE), %ecx        // Load the class
+    testl %ecx, %ecx
+    jz .Lart_quick_alloc_array_tlab_slow_path
+    ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED .Lart_quick_alloc_array_tlab_slow_path
+.Lart_quick_alloc_array_tlab_slow_path:
+    ALLOC_ARRAY_TLAB_SLOW_PATH artAllocArrayFromCodeTLAB
+END_FUNCTION art_quick_alloc_array_tlab
+
+// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab, TLAB).
+DEFINE_FUNCTION art_quick_alloc_array_resolved_tlab
+    // RDI: mirror::Class* klass, RSI: int32_t component_count, RDX: ArtMethod*
+    // RCX: mirror::Class* klass, R8, R9: free. RAX: return val.
+    movq %rdi, %rcx
+    // Already resolved, no null check.
+    ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED .Lart_quick_alloc_array_resolved_tlab_slow_path
+.Lart_quick_alloc_array_resolved_tlab_slow_path:
+    ALLOC_ARRAY_TLAB_SLOW_PATH artAllocArrayFromCodeResolvedTLAB
+END_FUNCTION art_quick_alloc_array_resolved_tlab
+
 // A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_region_tlab, RegionTLAB).
 DEFINE_FUNCTION art_quick_alloc_array_region_tlab
     // Fast path region tlab allocation.
     // RDI: uint32_t type_idx, RSI: int32_t component_count, RDX: ArtMethod*
     // RCX: klass, R8, R9: free. RAX: return val.
-#if !defined(USE_READ_BARRIER)
-    int3
-    int3
-#endif
+    ASSERT_USE_READ_BARRIER
     movq ART_METHOD_DEX_CACHE_TYPES_OFFSET_64(%rdx), %rcx      // Load dex cache resolved types array
     movl 0(%rcx, %rdi, COMPRESSED_REFERENCE_SIZE), %ecx        // Load the class
     // Null check so that we can load the lock word.
     testl %ecx, %ecx
     jz .Lart_quick_alloc_array_region_tlab_slow_path
-
-    cmpl LITERAL(0), %gs:THREAD_IS_GC_MARKING_OFFSET
-    jne .Lart_quick_alloc_array_region_tlab_class_load_read_barrier_marking
+    // Since we have allocation entrypoint switching, we know the GC is marking.
+    // Check the mark bit, if it is 0, do the read barrier mark.
+    testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(%ecx)
+    jz .Lart_quick_alloc_array_region_tlab_class_load_read_barrier_slow_path
 .Lart_quick_alloc_array_region_tlab_class_load_read_barrier_slow_path_exit:
     ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED .Lart_quick_alloc_array_region_tlab_slow_path
-.Lart_quick_alloc_array_region_tlab_class_load_read_barrier_marking:
-    // Check the mark bit, if it is 1 return.
-    testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(%ecx)
-    jnz .Lart_quick_alloc_array_region_tlab_class_load_read_barrier_slow_path_exit
 .Lart_quick_alloc_array_region_tlab_class_load_read_barrier_slow_path:
     // The read barrier slow path. Mark the class.
     PUSH rdi
@@ -1226,33 +1283,11 @@
     // Fast path region tlab allocation.
     // RDI: mirror::Class* klass, RSI: int32_t component_count, RDX: ArtMethod*
     // RCX: mirror::Class* klass, R8, R9: free. RAX: return val.
-#if !defined(USE_READ_BARRIER)
-    int3
-    int3
-#endif
+    ASSERT_USE_READ_BARRIER
     movq %rdi, %rcx
+    // Caller is responsible for read barrier.
     // Already resolved, no null check.
-    cmpl LITERAL(0), %gs:THREAD_IS_GC_MARKING_OFFSET
-    jne .Lart_quick_alloc_array_resolved_region_tlab_class_load_read_barrier_marking
-.Lart_quick_alloc_array_resolved_region_tlab_class_load_read_barrier_slow_path_exit:
     ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED .Lart_quick_alloc_array_resolved_region_tlab_slow_path
-.Lart_quick_alloc_array_resolved_region_tlab_class_load_read_barrier_marking:
-    // Check the mark bit, if it is 1 return.
-    testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(%ecx)
-    jnz .Lart_quick_alloc_array_region_tlab_class_load_read_barrier_slow_path_exit
-.Lart_quick_alloc_array_resolved_region_tlab_class_load_read_barrier_slow_path:
-    // The read barrier slow path. Mark the class.
-    PUSH rdi
-    PUSH rsi
-    PUSH rdx
-    // Outgoing argument set up
-    movq %rcx, %rdi                                            // Pass the class as the first param.
-    call SYMBOL(artReadBarrierMark)                            // cxx_name(mirror::Object* obj)
-    movq %rax, %rcx
-    POP rdx
-    POP rsi
-    POP rdi
-    jmp .Lart_quick_alloc_array_resolved_region_tlab_class_load_read_barrier_slow_path_exit
 .Lart_quick_alloc_array_resolved_region_tlab_slow_path:
     ALLOC_ARRAY_TLAB_SLOW_PATH artAllocArrayFromCodeResolvedRegionTLAB
 END_FUNCTION art_quick_alloc_array_resolved_region_tlab
@@ -1262,24 +1297,19 @@
     // Fast path region tlab allocation.
     // RDI: uint32_t type_idx, RSI: ArtMethod*
     // RDX, RCX, R8, R9: free. RAX: return val.
-#if !defined(USE_READ_BARRIER)
-    int3
-    int3
-#endif
+    ASSERT_USE_READ_BARRIER
     movq ART_METHOD_DEX_CACHE_TYPES_OFFSET_64(%rsi), %rdx  // Load dex cache resolved types array
     movl 0(%rdx, %rdi, COMPRESSED_REFERENCE_SIZE), %edx    // Load the class
     // Null check so that we can load the lock word.
     testl %edx, %edx
     jz .Lart_quick_alloc_object_region_tlab_slow_path
-    // Test if the GC is marking.
-    cmpl LITERAL(0), %gs:THREAD_IS_GC_MARKING_OFFSET
-    jne .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_marking
-.Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit:
-    ALLOC_OBJECT_TLAB_FAST_PATH .Lart_quick_alloc_object_region_tlab_slow_path
-.Lart_quick_alloc_object_region_tlab_class_load_read_barrier_marking:
-    // Check the mark bit, if it is 1 avoid the read barrier.
+    // Since we have allocation entrypoint switching, we know the GC is marking.
+    // Check the mark bit, if it is 0, do the read barrier mark.
     testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(%edx)
-    jnz .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit
+    jz .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path
+.Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit:
+    // Use resolved one since we already did the null check.
+    ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lart_quick_alloc_object_region_tlab_slow_path
 .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path:
     // The read barrier slow path. Mark the class.
     PUSH rdi
@@ -1302,10 +1332,7 @@
     // Fast path region tlab allocation.
     // RDI: mirror::Class* klass, RSI: ArtMethod*
     // RDX, RCX, R8, R9: free. RAX: return val.
-#if !defined(USE_READ_BARRIER)
-    int3
-    int3
-#endif
+    ASSERT_USE_READ_BARRIER
     // No read barrier since the caller is responsible for that.
     movq %rdi, %rdx
     ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lart_quick_alloc_object_resolved_region_tlab_slow_path
@@ -1318,10 +1345,7 @@
     // Fast path region tlab allocation.
     // RDI: mirror::Class* klass, RSI: ArtMethod*
     // RDX, RCX, R8, R9: free. RAX: return val.
-#if !defined(USE_READ_BARRIER)
-    int3
-    int3
-#endif
+    ASSERT_USE_READ_BARRIER
     movq %rdi, %rdx
     // No read barrier since the caller is responsible for that.
     ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH .Lart_quick_alloc_object_initialized_region_tlab_slow_path
@@ -1330,34 +1354,7 @@
 END_FUNCTION art_quick_alloc_object_initialized_region_tlab
 
 DEFINE_FUNCTION art_quick_resolve_string
-    // Custom calling convention: RAX serves as both input and output.
-    PUSH r15
-    PUSH r14
-    movq 24(%rsp), %r15                                         // get referrer
-    movl ART_METHOD_DECLARING_CLASS_OFFSET(%r15), %r15d         // get declaring class
-    movq DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET(%r15d), %r15  // get string dex cache
-    movl LITERAL(STRING_DEX_CACHE_SIZE_MINUS_ONE), %r14d
-    andl %eax, %r14d
-    movq (%r15, %r14, STRING_DEX_CACHE_ELEMENT_SIZE), %r14
-    movl %r14d, %r15d
-    shrq LITERAL(32), %r14
-    cmpl %r14d, %eax
-    jne .Lart_quick_resolve_string_slow_path
-    movl %r15d, %eax
-    CFI_REMEMBER_STATE
-    POP r14
-    POP r15
-#ifdef USE_READ_BARRIER
-    cmpl LITERAL(0), %gs:THREAD_IS_GC_MARKING_OFFSET
-    jne .Lnot_null_art_quick_read_barrier_mark_reg00
-#endif
-    ret
-    CFI_RESTORE_STATE
-    CFI_DEF_CFA(rsp, 24)                        // workaround for clang bug: 31975598
-
-// Slow path, the index did not match.
-.Lart_quick_resolve_string_slow_path:
-    SETUP_SAVE_EVERYTHING_FRAME_R14_R15_SAVED
+    SETUP_SAVE_EVERYTHING_FRAME
     // Outgoing argument set up
     movl %eax, %edi                             // pass string index
     movq %gs:THREAD_SELF_OFFSET, %rsi           // pass Thread::Current()
diff --git a/runtime/art_field.cc b/runtime/art_field.cc
index 25b8ed2..a4a6e5a 100644
--- a/runtime/art_field.cc
+++ b/runtime/art_field.cc
@@ -54,7 +54,7 @@
 
 ObjPtr<mirror::String> ArtField::ResolveGetStringName(Thread* self,
                                                       const DexFile& dex_file,
-                                                      uint32_t string_idx,
+                                                      dex::StringIndex string_idx,
                                                       ObjPtr<mirror::DexCache> dex_cache) {
   StackHandleScope<1> hs(self);
   return Runtime::Current()->GetClassLinker()->ResolveString(dex_file,
diff --git a/runtime/art_field.h b/runtime/art_field.h
index cacb324..427e103 100644
--- a/runtime/art_field.h
+++ b/runtime/art_field.h
@@ -221,7 +221,7 @@
       REQUIRES_SHARED(Locks::mutator_lock_);
   ObjPtr<mirror::String> ResolveGetStringName(Thread* self,
                                               const DexFile& dex_file,
-                                              uint32_t string_idx,
+                                              dex::StringIndex string_idx,
                                               ObjPtr<mirror::DexCache> dex_cache)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
diff --git a/runtime/art_method-inl.h b/runtime/art_method-inl.h
index 1e809d5..ef03bb3 100644
--- a/runtime/art_method-inl.h
+++ b/runtime/art_method-inl.h
@@ -105,7 +105,7 @@
       DoGetAccessFlagsHelper<kReadBarrierOption>(this);
     }
   }
-  return access_flags_;
+  return access_flags_.load(std::memory_order_relaxed);
 }
 
 inline uint16_t ArtMethod::GetMethodIndex() {
@@ -399,7 +399,11 @@
 
 inline mirror::DexCache* ArtMethod::GetDexCache() {
   DCHECK(!IsProxyMethod());
-  return GetDeclaringClass()->GetDexCache();
+  if (UNLIKELY(IsObsolete())) {
+    return GetObsoleteDexCache();
+  } else {
+    return GetDeclaringClass()->GetDexCache();
+  }
 }
 
 template<ReadBarrierOption kReadBarrierOption>
@@ -448,6 +452,53 @@
   return type;
 }
 
+inline bool ArtMethod::HasSingleImplementation() {
+  if (IsFinal() || GetDeclaringClass()->IsFinal()) {
+    // We don't set kAccSingleImplementation for these cases since intrinsic
+    // can use the flag also.
+    return true;
+  }
+  return (GetAccessFlags() & kAccSingleImplementation) != 0;
+}
+
+inline void ArtMethod::SetIntrinsic(uint32_t intrinsic) {
+  DCHECK(IsUint<8>(intrinsic));
+  // Currently we only do intrinsics for static/final methods or methods of final
+  // classes. We don't set kHasSingleImplementation for those methods.
+  DCHECK(IsStatic() || IsFinal() || GetDeclaringClass()->IsFinal()) <<
+      "Potential conflict with kAccSingleImplementation";
+  uint32_t new_value = (GetAccessFlags() & kAccFlagsNotUsedByIntrinsic) |
+      kAccIntrinsic |
+      (intrinsic << POPCOUNT(kAccFlagsNotUsedByIntrinsic));
+  if (kIsDebugBuild) {
+    uint32_t java_flags = (GetAccessFlags() & kAccJavaFlagsMask);
+    bool is_constructor = IsConstructor();
+    bool is_synchronized = IsSynchronized();
+    bool skip_access_checks = SkipAccessChecks();
+    bool is_fast_native = IsFastNative();
+    bool is_copied = IsCopied();
+    bool is_miranda = IsMiranda();
+    bool is_default = IsDefault();
+    bool is_default_conflict = IsDefaultConflicting();
+    bool is_compilable = IsCompilable();
+    bool must_count_locks = MustCountLocks();
+    SetAccessFlags(new_value);
+    DCHECK_EQ(java_flags, (GetAccessFlags() & kAccJavaFlagsMask));
+    DCHECK_EQ(is_constructor, IsConstructor());
+    DCHECK_EQ(is_synchronized, IsSynchronized());
+    DCHECK_EQ(skip_access_checks, SkipAccessChecks());
+    DCHECK_EQ(is_fast_native, IsFastNative());
+    DCHECK_EQ(is_copied, IsCopied());
+    DCHECK_EQ(is_miranda, IsMiranda());
+    DCHECK_EQ(is_default, IsDefault());
+    DCHECK_EQ(is_default_conflict, IsDefaultConflicting());
+    DCHECK_EQ(is_compilable, IsCompilable());
+    DCHECK_EQ(must_count_locks, MustCountLocks());
+  } else {
+    SetAccessFlags(new_value);
+  }
+}
+
 template<ReadBarrierOption kReadBarrierOption, typename RootVisitorType>
 void ArtMethod::VisitRoots(RootVisitorType& visitor, PointerSize pointer_size) {
   if (LIKELY(!declaring_class_.IsNull())) {
diff --git a/runtime/art_method.cc b/runtime/art_method.cc
index d1454b6..96b6f18 100644
--- a/runtime/art_method.cc
+++ b/runtime/art_method.cc
@@ -35,6 +35,7 @@
 #include "jit/profiling_info.h"
 #include "jni_internal.h"
 #include "mirror/class-inl.h"
+#include "mirror/class_ext.h"
 #include "mirror/executable.h"
 #include "mirror/object_array-inl.h"
 #include "mirror/object-inl.h"
@@ -50,6 +51,17 @@
 extern "C" void art_quick_invoke_static_stub(ArtMethod*, uint32_t*, uint32_t, Thread*, JValue*,
                                              const char*);
 
+ArtMethod* ArtMethod::GetSingleImplementation() {
+  DCHECK(!IsNative());
+  if (!IsAbstract()) {
+    // A non-abstract's single implementation is itself.
+    return this;
+  }
+  // TODO: add single-implementation logic for abstract method by storing it
+  // in ptr_sized_fields_.
+  return nullptr;
+}
+
 ArtMethod* ArtMethod::FromReflectedMethod(const ScopedObjectAccessAlreadyRunnable& soa,
                                           jobject jlr_method) {
   ObjPtr<mirror::Executable> executable = soa.Decode<mirror::Executable>(jlr_method);
@@ -57,6 +69,29 @@
   return executable->GetArtMethod();
 }
 
+mirror::DexCache* ArtMethod::GetObsoleteDexCache() {
+  DCHECK(!Runtime::Current()->IsAotCompiler()) << PrettyMethod();
+  DCHECK(IsObsolete());
+  ObjPtr<mirror::ClassExt> ext(GetDeclaringClass()->GetExtData());
+  CHECK(!ext.IsNull());
+  ObjPtr<mirror::PointerArray> obsolete_methods(ext->GetObsoleteMethods());
+  CHECK(!obsolete_methods.IsNull());
+  DCHECK(ext->GetObsoleteDexCaches() != nullptr);
+  int32_t len = obsolete_methods->GetLength();
+  DCHECK_EQ(len, ext->GetObsoleteDexCaches()->GetLength());
+  // Using kRuntimePointerSize (instead of using the image's pointer size) is fine since images
+  // should never have obsolete methods in them so they should always be the same.
+  PointerSize pointer_size = kRuntimePointerSize;
+  DCHECK_EQ(kRuntimePointerSize, Runtime::Current()->GetClassLinker()->GetImagePointerSize());
+  for (int32_t i = 0; i < len; i++) {
+    if (this == obsolete_methods->GetElementPtrSize<ArtMethod*>(i, pointer_size)) {
+      return ext->GetObsoleteDexCaches()->Get(i);
+    }
+  }
+  LOG(FATAL) << "This method does not appear in the obsolete map of its class!";
+  UNREACHABLE();
+}
+
 mirror::String* ArtMethod::GetNameAsString(Thread* self) {
   CHECK(!IsProxyMethod());
   StackHandleScope<1> hs(self);
@@ -322,7 +357,7 @@
   CHECK(!IsFastNative()) << PrettyMethod();
   CHECK(native_method != nullptr) << PrettyMethod();
   if (is_fast) {
-    SetAccessFlags(GetAccessFlags() | kAccFastNative);
+    AddAccessFlags(kAccFastNative);
   }
   SetEntryPointFromJni(native_method);
 }
@@ -480,7 +515,7 @@
     if (oat_file == nullptr) {
       return nullptr;
     }
-    return oat_file->DexBegin() + header->vmap_table_offset_;
+    return oat_file->DexBegin() + header->GetVmapTableOffset();
   } else {
     return oat_method.GetVmapTable();
   }
@@ -578,7 +613,7 @@
   DCHECK(method_header->Contains(pc))
       << PrettyMethod()
       << " " << std::hex << pc << " " << oat_entry_point
-      << " " << (uintptr_t)(method_header->code_ + method_header->code_size_);
+      << " " << (uintptr_t)(method_header->GetCode() + method_header->GetCodeSize());
   return method_header;
 }
 
diff --git a/runtime/art_method.h b/runtime/art_method.h
index 0e1d7e7..3bc6f5d 100644
--- a/runtime/art_method.h
+++ b/runtime/art_method.h
@@ -85,9 +85,29 @@
   template <ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   ALWAYS_INLINE uint32_t GetAccessFlags();
 
+  // This version should only be called when it's certain there is no
+  // concurrency so there is no need to guarantee atomicity. For example,
+  // before the method is linked.
   void SetAccessFlags(uint32_t new_access_flags) {
-    // Not called within a transaction.
-    access_flags_ = new_access_flags;
+    access_flags_.store(new_access_flags, std::memory_order_relaxed);
+  }
+
+  // This setter guarantees atomicity.
+  void AddAccessFlags(uint32_t flag) {
+    uint32_t old_access_flags = access_flags_.load(std::memory_order_relaxed);
+    uint32_t new_access_flags;
+    do {
+      new_access_flags = old_access_flags | flag;
+    } while (!access_flags_.compare_exchange_weak(old_access_flags, new_access_flags));
+  }
+
+  // This setter guarantees atomicity.
+  void ClearAccessFlags(uint32_t flag) {
+    uint32_t old_access_flags = access_flags_.load(std::memory_order_relaxed);
+    uint32_t new_access_flags;
+    do {
+      new_access_flags = old_access_flags & ~flag;
+    } while (!access_flags_.compare_exchange_weak(old_access_flags, new_access_flags));
   }
 
   // Approximate what kind of method call would be used for this method.
@@ -142,39 +162,7 @@
     return (GetAccessFlags() & kAccIntrinsic) != 0;
   }
 
-  void SetIntrinsic(uint32_t intrinsic) {
-    DCHECK(IsUint<8>(intrinsic));
-    uint32_t new_value = (GetAccessFlags() & kAccFlagsNotUsedByIntrinsic) |
-        kAccIntrinsic |
-        (intrinsic << POPCOUNT(kAccFlagsNotUsedByIntrinsic));
-    if (kIsDebugBuild) {
-      uint32_t java_flags = (GetAccessFlags() & kAccJavaFlagsMask);
-      bool is_constructor = IsConstructor();
-      bool is_synchronized = IsSynchronized();
-      bool skip_access_checks = SkipAccessChecks();
-      bool is_fast_native = IsFastNative();
-      bool is_copied = IsCopied();
-      bool is_miranda = IsMiranda();
-      bool is_default = IsDefault();
-      bool is_default_conflict = IsDefaultConflicting();
-      bool is_compilable = IsCompilable();
-      bool must_count_locks = MustCountLocks();
-      SetAccessFlags(new_value);
-      DCHECK_EQ(java_flags, (GetAccessFlags() & kAccJavaFlagsMask));
-      DCHECK_EQ(is_constructor, IsConstructor());
-      DCHECK_EQ(is_synchronized, IsSynchronized());
-      DCHECK_EQ(skip_access_checks, SkipAccessChecks());
-      DCHECK_EQ(is_fast_native, IsFastNative());
-      DCHECK_EQ(is_copied, IsCopied());
-      DCHECK_EQ(is_miranda, IsMiranda());
-      DCHECK_EQ(is_default, IsDefault());
-      DCHECK_EQ(is_default_conflict, IsDefaultConflicting());
-      DCHECK_EQ(is_compilable, IsCompilable());
-      DCHECK_EQ(must_count_locks, MustCountLocks());
-    } else {
-      SetAccessFlags(new_value);
-    }
-  }
+  ALWAYS_INLINE void SetIntrinsic(uint32_t intrinsic) REQUIRES_SHARED(Locks::mutator_lock_);
 
   uint32_t GetIntrinsic() {
     DCHECK(IsIntrinsic());
@@ -227,6 +215,11 @@
     return (GetAccessFlags() & kAccDefault) != 0;
   }
 
+  bool IsObsolete() {
+    // TODO Should maybe make this IsIntrinsic check not needed
+    return !IsIntrinsic() && (GetAccessFlags() & kAccObsoleteMethod) != 0;
+  }
+
   template <ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   bool IsNative() {
     return (GetAccessFlags<kReadBarrierOption>() & kAccNative) != 0;
@@ -245,6 +238,10 @@
     return (GetAccessFlags() & kAccSynthetic) != 0;
   }
 
+  bool IsVarargs() {
+    return (GetAccessFlags() & kAccVarargs) != 0;
+  }
+
   template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   bool IsProxyMethod() REQUIRES_SHARED(Locks::mutator_lock_);
 
@@ -254,7 +251,7 @@
 
   void SetSkipAccessChecks() {
     DCHECK(!SkipAccessChecks());
-    SetAccessFlags(GetAccessFlags() | kAccSkipAccessChecks);
+    AddAccessFlags(kAccSkipAccessChecks);
   }
 
   // Should this method be run in the interpreter and count locks (e.g., failed structured-
@@ -456,6 +453,26 @@
     return DataOffset(kRuntimePointerSize);
   }
 
+  ALWAYS_INLINE bool HasSingleImplementation() REQUIRES_SHARED(Locks::mutator_lock_);
+
+  ALWAYS_INLINE void SetHasSingleImplementation(bool single_impl) {
+    DCHECK(!IsIntrinsic()) << "conflict with intrinsic bits";
+    if (single_impl) {
+      AddAccessFlags(kAccSingleImplementation);
+    } else {
+      ClearAccessFlags(kAccSingleImplementation);
+    }
+  }
+
+  ArtMethod* GetSingleImplementation()
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
+  ALWAYS_INLINE void SetSingleImplementation(ArtMethod* method, PointerSize pointer_size) {
+    DCHECK(!IsNative());
+    DCHECK(IsAbstract());  // Non-abstract method's single implementation is just itself.
+    SetDataPtrSize(method, pointer_size);
+  }
+
   void* GetEntryPointFromJni() {
     DCHECK(IsNative());
     return GetEntryPointFromJniPtrSize(kRuntimePointerSize);
@@ -557,6 +574,7 @@
   mirror::ClassLoader* GetClassLoader() REQUIRES_SHARED(Locks::mutator_lock_);
 
   mirror::DexCache* GetDexCache() REQUIRES_SHARED(Locks::mutator_lock_);
+  mirror::DexCache* GetObsoleteDexCache() REQUIRES_SHARED(Locks::mutator_lock_);
 
   ALWAYS_INLINE ArtMethod* GetInterfaceMethodIfProxy(PointerSize pointer_size)
       REQUIRES_SHARED(Locks::mutator_lock_);
@@ -649,7 +667,10 @@
   GcRoot<mirror::Class> declaring_class_;
 
   // Access flags; low 16 bits are defined by spec.
-  uint32_t access_flags_;
+  // Getting and setting this flag needs to be atomic when concurrency is
+  // possible, e.g. after this method's class is linked. Such as when setting
+  // verifier flags and single-implementation flag.
+  std::atomic<std::uint32_t> access_flags_;
 
   /* Dex file fields. The defining dex file is available via declaring_class_->dex_cache_ */
 
diff --git a/runtime/barrier.cc b/runtime/barrier.cc
index 0d842cc..9bcda35 100644
--- a/runtime/barrier.cc
+++ b/runtime/barrier.cc
@@ -80,6 +80,11 @@
   return timed_out;
 }
 
+int Barrier::GetCount(Thread* self) {
+  MutexLock mu(self, lock_);
+  return count_;
+}
+
 void Barrier::SetCountLocked(Thread* self, int count) {
   count_ = count;
   if (count == 0) {
diff --git a/runtime/barrier.h b/runtime/barrier.h
index 94977fb..d7c4661 100644
--- a/runtime/barrier.h
+++ b/runtime/barrier.h
@@ -61,6 +61,8 @@
   // another thread is still in Wait().  See above.
   void Init(Thread* self, int count) REQUIRES(!lock_);
 
+  int GetCount(Thread* self) REQUIRES(!lock_);
+
  private:
   void SetCountLocked(Thread* self, int count) REQUIRES(lock_);
 
diff --git a/runtime/base/arena_allocator.h b/runtime/base/arena_allocator.h
index 62cd2a7..2feb28a 100644
--- a/runtime/base/arena_allocator.h
+++ b/runtime/base/arena_allocator.h
@@ -94,6 +94,7 @@
   kArenaAllocGraphChecker,
   kArenaAllocVerifier,
   kArenaAllocCallingConvention,
+  kArenaAllocCHA,
   kNumArenaAllocKinds
 };
 
diff --git a/runtime/base/mutex.cc b/runtime/base/mutex.cc
index e8ef69f..ce452cb 100644
--- a/runtime/base/mutex.cc
+++ b/runtime/base/mutex.cc
@@ -58,6 +58,7 @@
 Mutex* Locks::reference_queue_soft_references_lock_ = nullptr;
 Mutex* Locks::reference_queue_weak_references_lock_ = nullptr;
 Mutex* Locks::runtime_shutdown_lock_ = nullptr;
+Mutex* Locks::cha_lock_ = nullptr;
 Mutex* Locks::thread_list_lock_ = nullptr;
 ConditionVariable* Locks::thread_exit_cond_ = nullptr;
 Mutex* Locks::thread_suspend_count_lock_ = nullptr;
@@ -66,6 +67,7 @@
 Uninterruptible Roles::uninterruptible_;
 ReaderWriterMutex* Locks::jni_globals_lock_ = nullptr;
 Mutex* Locks::jni_weak_globals_lock_ = nullptr;
+ReaderWriterMutex* Locks::dex_lock_ = nullptr;
 
 struct AllMutexData {
   // A guard for all_mutexes_ that's not a mutex (Mutexes must CAS to acquire and busy wait).
@@ -955,10 +957,12 @@
     DCHECK(logging_lock_ != nullptr);
     DCHECK(mutator_lock_ != nullptr);
     DCHECK(profiler_lock_ != nullptr);
+    DCHECK(cha_lock_ != nullptr);
     DCHECK(thread_list_lock_ != nullptr);
     DCHECK(thread_suspend_count_lock_ != nullptr);
     DCHECK(trace_lock_ != nullptr);
     DCHECK(unexpected_signal_lock_ != nullptr);
+    DCHECK(dex_lock_ != nullptr);
   } else {
     // Create global locks in level order from highest lock level to lowest.
     LockLevel current_lock_level = kInstrumentEntrypointsLock;
@@ -1014,6 +1018,10 @@
     DCHECK(breakpoint_lock_ == nullptr);
     breakpoint_lock_ = new ReaderWriterMutex("breakpoint lock", current_lock_level);
 
+    UPDATE_CURRENT_LOCK_LEVEL(kCHALock);
+    DCHECK(cha_lock_ == nullptr);
+    cha_lock_ = new Mutex("CHA lock", current_lock_level);
+
     UPDATE_CURRENT_LOCK_LEVEL(kClassLinkerClassesLock);
     DCHECK(classlinker_classes_lock_ == nullptr);
     classlinker_classes_lock_ = new ReaderWriterMutex("ClassLinker classes lock",
@@ -1033,6 +1041,10 @@
       modify_ldt_lock_ = new Mutex("modify_ldt lock", current_lock_level);
     }
 
+    UPDATE_CURRENT_LOCK_LEVEL(kDexLock);
+    DCHECK(dex_lock_ == nullptr);
+    dex_lock_ = new ReaderWriterMutex("ClassLinker dex lock", current_lock_level);
+
     UPDATE_CURRENT_LOCK_LEVEL(kOatFileManagerLock);
     DCHECK(oat_file_manager_lock_ == nullptr);
     oat_file_manager_lock_ = new ReaderWriterMutex("OatFile manager lock", current_lock_level);
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index 7e73e0d..255ad71 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -97,6 +97,7 @@
   kMonitorPoolLock,
   kClassLinkerClassesLock,  // TODO rename.
   kJitCodeCacheLock,
+  kCHALock,
   kBreakpointLock,
   kMonitorLock,
   kMonitorListLock,
@@ -627,9 +628,12 @@
   // TODO: improve name, perhaps instrumentation_update_lock_.
   static Mutex* deoptimization_lock_ ACQUIRED_AFTER(alloc_tracker_lock_);
 
+  // Guards Class Hierarchy Analysis (CHA).
+  static Mutex* cha_lock_ ACQUIRED_AFTER(deoptimization_lock_);
+
   // The thread_list_lock_ guards ThreadList::list_. It is also commonly held to stop threads
   // attaching and detaching.
-  static Mutex* thread_list_lock_ ACQUIRED_AFTER(deoptimization_lock_);
+  static Mutex* thread_list_lock_ ACQUIRED_AFTER(cha_lock_);
 
   // Signaled when threads terminate. Used to determine when all non-daemons have terminated.
   static ConditionVariable* thread_exit_cond_ GUARDED_BY(Locks::thread_list_lock_);
@@ -655,8 +659,10 @@
   // Guards modification of the LDT on x86.
   static Mutex* modify_ldt_lock_ ACQUIRED_AFTER(allocated_thread_ids_lock_);
 
+  static ReaderWriterMutex* dex_lock_ ACQUIRED_AFTER(modify_ldt_lock_);
+
   // Guards opened oat files in OatFileManager.
-  static ReaderWriterMutex* oat_file_manager_lock_ ACQUIRED_AFTER(modify_ldt_lock_);
+  static ReaderWriterMutex* oat_file_manager_lock_ ACQUIRED_AFTER(dex_lock_);
 
   // Guards extra string entries for VerifierDeps.
   static ReaderWriterMutex* verifier_deps_lock_ ACQUIRED_AFTER(oat_file_manager_lock_);
diff --git a/runtime/cha.cc b/runtime/cha.cc
new file mode 100644
index 0000000..be675a8
--- /dev/null
+++ b/runtime/cha.cc
@@ -0,0 +1,356 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "cha.h"
+
+#include "jit/jit.h"
+#include "jit/jit_code_cache.h"
+#include "runtime.h"
+#include "scoped_thread_state_change-inl.h"
+#include "stack.h"
+#include "thread.h"
+#include "thread_list.h"
+#include "thread_pool.h"
+
+namespace art {
+
+void ClassHierarchyAnalysis::AddDependency(ArtMethod* method,
+                                           ArtMethod* dependent_method,
+                                           OatQuickMethodHeader* dependent_header) {
+  auto it = cha_dependency_map_.find(method);
+  if (it == cha_dependency_map_.end()) {
+    cha_dependency_map_[method] =
+        new std::vector<std::pair<art::ArtMethod*, art::OatQuickMethodHeader*>>();
+    it = cha_dependency_map_.find(method);
+  } else {
+    DCHECK(it->second != nullptr);
+  }
+  it->second->push_back(std::make_pair(dependent_method, dependent_header));
+}
+
+std::vector<std::pair<ArtMethod*, OatQuickMethodHeader*>>*
+    ClassHierarchyAnalysis::GetDependents(ArtMethod* method) {
+  auto it = cha_dependency_map_.find(method);
+  if (it != cha_dependency_map_.end()) {
+    DCHECK(it->second != nullptr);
+    return it->second;
+  }
+  return nullptr;
+}
+
+void ClassHierarchyAnalysis::RemoveDependencyFor(ArtMethod* method) {
+  auto it = cha_dependency_map_.find(method);
+  if (it != cha_dependency_map_.end()) {
+    auto dependents = it->second;
+    cha_dependency_map_.erase(it);
+    delete dependents;
+  }
+}
+
+void ClassHierarchyAnalysis::RemoveDependentsWithMethodHeaders(
+    const std::unordered_set<OatQuickMethodHeader*>& method_headers) {
+  // Iterate through all entries in the dependency map and remove any entry that
+  // contains one of those in method_headers.
+  for (auto map_it = cha_dependency_map_.begin(); map_it != cha_dependency_map_.end(); ) {
+    auto dependents = map_it->second;
+    for (auto vec_it = dependents->begin(); vec_it != dependents->end(); ) {
+      OatQuickMethodHeader* method_header = vec_it->second;
+      auto it = std::find(method_headers.begin(), method_headers.end(), method_header);
+      if (it != method_headers.end()) {
+        vec_it = dependents->erase(vec_it);
+      } else {
+        vec_it++;
+      }
+    }
+    // Remove the map entry if there are no more dependents.
+    if (dependents->empty()) {
+      map_it = cha_dependency_map_.erase(map_it);
+      delete dependents;
+    } else {
+      map_it++;
+    }
+  }
+}
+
+// This stack visitor walks the stack and for compiled code with certain method
+// headers, sets the should_deoptimize flag on stack to 1.
+// TODO: also set the register value to 1 when should_deoptimize is allocated in
+// a register.
+class CHAStackVisitor FINAL  : public StackVisitor {
+ public:
+  CHAStackVisitor(Thread* thread_in,
+                  Context* context,
+                  const std::unordered_set<OatQuickMethodHeader*>& method_headers)
+      : StackVisitor(thread_in, context, StackVisitor::StackWalkKind::kSkipInlinedFrames),
+        method_headers_(method_headers) {
+  }
+
+  bool VisitFrame() OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_) {
+    ArtMethod* method = GetMethod();
+    if (method == nullptr || method->IsRuntimeMethod() || method->IsNative()) {
+      return true;
+    }
+    if (GetCurrentQuickFrame() == nullptr) {
+      // Not compiled code.
+      return true;
+    }
+    // Method may have multiple versions of compiled code. Check
+    // the method header to see if it has should_deoptimize flag.
+    const OatQuickMethodHeader* method_header = GetCurrentOatQuickMethodHeader();
+    if (!method_header->HasShouldDeoptimizeFlag()) {
+      // This compiled version doesn't have should_deoptimize flag. Skip.
+      return true;
+    }
+    auto it = std::find(method_headers_.begin(), method_headers_.end(), method_header);
+    if (it == method_headers_.end()) {
+      // Not in the list of method headers that should be deoptimized.
+      return true;
+    }
+
+    // The compiled code on stack is not valid anymore. Need to deoptimize.
+    SetShouldDeoptimizeFlag();
+
+    return true;
+  }
+
+ private:
+  void SetShouldDeoptimizeFlag() REQUIRES_SHARED(Locks::mutator_lock_) {
+    QuickMethodFrameInfo frame_info = GetCurrentQuickFrameInfo();
+    size_t frame_size = frame_info.FrameSizeInBytes();
+    uint8_t* sp = reinterpret_cast<uint8_t*>(GetCurrentQuickFrame());
+    size_t core_spill_size = POPCOUNT(frame_info.CoreSpillMask()) *
+        GetBytesPerGprSpillLocation(kRuntimeISA);
+    size_t fpu_spill_size = POPCOUNT(frame_info.FpSpillMask()) *
+        GetBytesPerFprSpillLocation(kRuntimeISA);
+    size_t offset = frame_size - core_spill_size - fpu_spill_size - kShouldDeoptimizeFlagSize;
+    uint8_t* should_deoptimize_addr = sp + offset;
+    // Set deoptimization flag to 1.
+    DCHECK(*should_deoptimize_addr == 0 || *should_deoptimize_addr == 1);
+    *should_deoptimize_addr = 1;
+  }
+
+  // Set of method headers for compiled code that should be deoptimized.
+  const std::unordered_set<OatQuickMethodHeader*>& method_headers_;
+
+  DISALLOW_COPY_AND_ASSIGN(CHAStackVisitor);
+};
+
+class CHACheckpoint FINAL : public Closure {
+ public:
+  explicit CHACheckpoint(const std::unordered_set<OatQuickMethodHeader*>& method_headers)
+      : barrier_(0),
+        method_headers_(method_headers) {}
+
+  void Run(Thread* thread) OVERRIDE {
+    // Note thread and self may not be equal if thread was already suspended at
+    // the point of the request.
+    Thread* self = Thread::Current();
+    ScopedObjectAccess soa(self);
+    CHAStackVisitor visitor(thread, nullptr, method_headers_);
+    visitor.WalkStack();
+    barrier_.Pass(self);
+  }
+
+  void WaitForThreadsToRunThroughCheckpoint(size_t threads_running_checkpoint) {
+    Thread* self = Thread::Current();
+    ScopedThreadStateChange tsc(self, kWaitingForCheckPointsToRun);
+    barrier_.Increment(self, threads_running_checkpoint);
+  }
+
+ private:
+  // The barrier to be passed through and for the requestor to wait upon.
+  Barrier barrier_;
+  // List of method headers for invalidated compiled code.
+  const std::unordered_set<OatQuickMethodHeader*>& method_headers_;
+
+  DISALLOW_COPY_AND_ASSIGN(CHACheckpoint);
+};
+
+void ClassHierarchyAnalysis::VerifyNonSingleImplementation(mirror::Class* verify_class,
+                                                           uint16_t verify_index) {
+  // Grab cha_lock_ to make sure all single-implementation updates are seen.
+  PointerSize image_pointer_size =
+      Runtime::Current()->GetClassLinker()->GetImagePointerSize();
+  MutexLock cha_mu(Thread::Current(), *Locks::cha_lock_);
+  while (verify_class != nullptr) {
+    if (verify_index >= verify_class->GetVTableLength()) {
+      return;
+    }
+    ArtMethod* verify_method = verify_class->GetVTableEntry(verify_index, image_pointer_size);
+    DCHECK(!verify_method->HasSingleImplementation())
+        << "class: " << verify_class->PrettyClass()
+        << " verify_method: " << verify_method->PrettyMethod(true);
+    verify_class = verify_class->GetSuperClass();
+  }
+}
+
+void ClassHierarchyAnalysis::CheckSingleImplementationInfo(
+    Handle<mirror::Class> klass,
+    ArtMethod* virtual_method,
+    ArtMethod* method_in_super,
+    std::unordered_set<ArtMethod*>& invalidated_single_impl_methods) {
+  // TODO: if klass is not instantiable, virtual_method isn't invocable yet so
+  // even if it overrides, it doesn't invalidate single-implementation
+  // assumption.
+
+  DCHECK_NE(virtual_method, method_in_super);
+  DCHECK(method_in_super->GetDeclaringClass()->IsResolved()) << "class isn't resolved";
+  // If virtual_method doesn't come from a default interface method, it should
+  // be supplied by klass.
+  DCHECK(virtual_method->IsCopied() ||
+         virtual_method->GetDeclaringClass() == klass.Get());
+
+  // A new virtual_method should set method_in_super to
+  // non-single-implementation (if not set already).
+  // We don't grab cha_lock_. Single-implementation flag won't be set to true
+  // again once it's set to false.
+  if (!method_in_super->HasSingleImplementation()) {
+    // method_in_super already has multiple implementations. All methods in the
+    // same vtable slots in its super classes should have
+    // non-single-implementation already.
+    if (kIsDebugBuild) {
+      VerifyNonSingleImplementation(klass->GetSuperClass()->GetSuperClass(),
+                                    method_in_super->GetMethodIndex());
+    }
+    return;
+  }
+
+  // Native methods don't have single-implementation flag set.
+  DCHECK(!method_in_super->IsNative());
+  // Invalidate method_in_super's single-implementation status.
+  invalidated_single_impl_methods.insert(method_in_super);
+}
+
+void ClassHierarchyAnalysis::InitSingleImplementationFlag(Handle<mirror::Class> klass,
+                                                          ArtMethod* method) {
+  DCHECK(method->IsCopied() || method->GetDeclaringClass() == klass.Get());
+  if (klass->IsFinal() || method->IsFinal()) {
+    // Final classes or methods do not need CHA for devirtualization.
+    // This frees up modifier bits for intrinsics which currently are only
+    // used for static methods or methods of final classes.
+    return;
+  }
+  if (method->IsNative()) {
+    // Native method's invocation overhead is already high and it
+    // cannot be inlined. It's not worthwhile to devirtualize the
+    // call which can add a deoptimization point.
+    DCHECK(!method->HasSingleImplementation());
+  } else {
+    method->SetHasSingleImplementation(true);
+    if (method->IsAbstract()) {
+      // There is no real implementation yet.
+      // TODO: implement single-implementation logic for abstract methods.
+      DCHECK(method->GetSingleImplementation() == nullptr);
+    } else {
+      // Single implementation of non-abstract method is itself.
+      DCHECK_EQ(method->GetSingleImplementation(), method);
+    }
+  }
+}
+
+void ClassHierarchyAnalysis::UpdateAfterLoadingOf(Handle<mirror::Class> klass) {
+  if (klass->IsInterface()) {
+    return;
+  }
+  mirror::Class* super_class = klass->GetSuperClass();
+  if (super_class == nullptr) {
+    return;
+  }
+
+  // Keeps track of all methods whose single-implementation assumption
+  // is invalidated by linking `klass`.
+  std::unordered_set<ArtMethod*> invalidated_single_impl_methods;
+
+  PointerSize image_pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
+  // Do an entry-by-entry comparison of vtable contents with super's vtable.
+  for (int32_t i = 0; i < super_class->GetVTableLength(); ++i) {
+    ArtMethod* method = klass->GetVTableEntry(i, image_pointer_size);
+    ArtMethod* method_in_super = super_class->GetVTableEntry(i, image_pointer_size);
+    if (method == method_in_super) {
+      // vtable slot entry is inherited from super class.
+      continue;
+    }
+    InitSingleImplementationFlag(klass, method);
+    CheckSingleImplementationInfo(klass,
+                                  method,
+                                  method_in_super,
+                                  invalidated_single_impl_methods);
+  }
+
+  // For new virtual methods that don't override.
+  for (int32_t i = super_class->GetVTableLength(); i < klass->GetVTableLength(); ++i) {
+    ArtMethod* method = klass->GetVTableEntry(i, image_pointer_size);
+    InitSingleImplementationFlag(klass, method);
+  }
+
+  Runtime* const runtime = Runtime::Current();
+  if (!invalidated_single_impl_methods.empty()) {
+    Thread *self = Thread::Current();
+    // Method headers for compiled code to be invalidated.
+    std::unordered_set<OatQuickMethodHeader*> dependent_method_headers;
+
+    {
+      // We do this under cha_lock_. Committing code also grabs this lock to
+      // make sure the code is only committed when all single-implementation
+      // assumptions are still true.
+      MutexLock cha_mu(self, *Locks::cha_lock_);
+      // Invalidate compiled methods that assume some virtual calls have only
+      // single implementations.
+      for (ArtMethod* invalidated : invalidated_single_impl_methods) {
+        if (!invalidated->HasSingleImplementation()) {
+          // It might have been invalidated already when other class linking is
+          // going on.
+          continue;
+        }
+        invalidated->SetHasSingleImplementation(false);
+
+        if (runtime->IsAotCompiler()) {
+          // No need to invalidate any compiled code as the AotCompiler doesn't
+          // run any code.
+          continue;
+        }
+
+        // Invalidate all dependents.
+        auto dependents = GetDependents(invalidated);
+        if (dependents == nullptr) {
+          continue;
+        }
+        for (const auto& dependent : *dependents) {
+          ArtMethod* method = dependent.first;;
+          OatQuickMethodHeader* method_header = dependent.second;
+          VLOG(class_linker) << "CHA invalidated compiled code for " << method->PrettyMethod();
+          DCHECK(runtime->UseJitCompilation());
+          runtime->GetJit()->GetCodeCache()->InvalidateCompiledCodeFor(
+              method, method_header);
+          dependent_method_headers.insert(method_header);
+        }
+        RemoveDependencyFor(invalidated);
+      }
+    }
+
+    if (dependent_method_headers.empty()) {
+      return;
+    }
+    // Deoptimze compiled code on stack that should have been invalidated.
+    CHACheckpoint checkpoint(dependent_method_headers);
+    size_t threads_running_checkpoint = runtime->GetThreadList()->RunCheckpoint(&checkpoint);
+    if (threads_running_checkpoint != 0) {
+      checkpoint.WaitForThreadsToRunThroughCheckpoint(threads_running_checkpoint);
+    }
+  }
+}
+
+}  // namespace art
diff --git a/runtime/cha.h b/runtime/cha.h
new file mode 100644
index 0000000..ada5c89
--- /dev/null
+++ b/runtime/cha.h
@@ -0,0 +1,144 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_CHA_H_
+#define ART_RUNTIME_CHA_H_
+
+#include "art_method.h"
+#include "base/enums.h"
+#include "base/mutex.h"
+#include "handle.h"
+#include "mirror/class.h"
+#include "oat_quick_method_header.h"
+#include <unordered_map>
+#include <unordered_set>
+
+namespace art {
+
+/**
+ * Class Hierarchy Analysis (CHA) tries to devirtualize virtual calls into
+ * direct calls based on the info generated by analyzing class hierarchies.
+ * If a class is not subclassed, or even if it's subclassed but one of its
+ * virtual methods isn't overridden, a virtual call for that method can be
+ * changed into a direct call.
+ *
+ * Each virtual method carries a single-implementation status. The status is
+ * incrementally maintained at the end of class linking time when method
+ * overriding takes effect.
+ *
+ * Compiler takes advantage of the single-implementation info of a
+ * method. If a method A has the single-implementation flag set, the compiler
+ * devirtualizes the virtual call for method A into a direct call, and
+ * further try to inline the direct call as a result. The compiler will
+ * also register a dependency that the compiled code depends on the
+ * assumption that method A has single-implementation status.
+ *
+ * When single-implementation info is updated at the end of class linking,
+ * and if method A's single-implementation status is invalidated, all compiled
+ * code that depends on the assumption that method A has single-implementation
+ * status need to be invalidated. Method entrypoints that have this dependency
+ * will be updated as a result. Method A can later be recompiled with less
+ * aggressive assumptions.
+ *
+ * For live compiled code that's on stack, deoptmization will be initiated
+ * to force the invalidated compiled code into interpreter mode to guarantee
+ * correctness. The deoptimization mechanism used is a hybrid of
+ * synchronous and asynchronous deoptimization. The synchronous deoptimization
+ * part checks a hidden local variable flag for the method, and if true,
+ * initiates deoptimization. The asynchronous deoptimization part issues a
+ * checkpoint that walks the stack and for any compiled code on the stack
+ * that should be deoptimized, set the hidden local variable value to be true.
+ *
+ * A cha_lock_ needs to be held for updating single-implementation status,
+ * and registering/unregistering CHA dependencies. Registering CHA dependency
+ * and making compiled code visible also need to be atomic. Otherwise, we
+ * may miss invalidating CHA dependents or making compiled code visible even
+ * after it is invalidated. Care needs to be taken between cha_lock_ and
+ * JitCodeCache::lock_ to guarantee the atomicity.
+ *
+ * We base our CHA on dynamically linked class profiles instead of doing static
+ * analysis. Static analysis can be too aggressive due to dynamic class loading
+ * at runtime, and too conservative since some classes may not be really loaded
+ * at runtime.
+ */
+class ClassHierarchyAnalysis {
+ public:
+  // Types for recording CHA dependencies.
+  // For invalidating CHA dependency, we need to know both the ArtMethod and
+  // the method header. If the ArtMethod has compiled code with the method header
+  // as the entrypoint, we update the entrypoint to the interpreter bridge.
+  // We will also deoptimize frames that are currently executing the code of
+  // the method header.
+  typedef std::pair<ArtMethod*, OatQuickMethodHeader*> MethodAndMethodHeaderPair;
+  typedef std::vector<MethodAndMethodHeaderPair> ListOfDependentPairs;
+
+  ClassHierarchyAnalysis() {}
+
+  // Add a dependency that compiled code with `dependent_header` for `dependent_method`
+  // assumes that virtual `method` has single-implementation.
+  void AddDependency(ArtMethod* method,
+                     ArtMethod* dependent_method,
+                     OatQuickMethodHeader* dependent_header) REQUIRES(Locks::cha_lock_);
+
+  // Return compiled code that assumes that `method` has single-implementation.
+  std::vector<MethodAndMethodHeaderPair>* GetDependents(ArtMethod* method)
+      REQUIRES(Locks::cha_lock_);
+
+  // Remove dependency tracking for compiled code that assumes that
+  // `method` has single-implementation.
+  void RemoveDependencyFor(ArtMethod* method) REQUIRES(Locks::cha_lock_);
+
+  // Remove from cha_dependency_map_ all entries that contain OatQuickMethodHeader from
+  // the given `method_headers` set.
+  // This is used when some compiled code is freed.
+  void RemoveDependentsWithMethodHeaders(
+      const std::unordered_set<OatQuickMethodHeader*>& method_headers)
+      REQUIRES(Locks::cha_lock_);
+
+  // Update CHA info for methods that `klass` overrides, after loading `klass`.
+  void UpdateAfterLoadingOf(Handle<mirror::Class> klass) REQUIRES_SHARED(Locks::mutator_lock_);
+
+ private:
+  void InitSingleImplementationFlag(Handle<mirror::Class> klass, ArtMethod* method)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
+  // `virtual_method` in `klass` overrides `method_in_super`.
+  // This will invalidate some assumptions on single-implementation.
+  // Append methods that should have their single-implementation flag invalidated
+  // to `invalidated_single_impl_methods`.
+  void CheckSingleImplementationInfo(
+      Handle<mirror::Class> klass,
+      ArtMethod* virtual_method,
+      ArtMethod* method_in_super,
+      std::unordered_set<ArtMethod*>& invalidated_single_impl_methods)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
+  // Verify all methods in the same vtable slot from verify_class and its supers
+  // don't have single-implementation.
+  void VerifyNonSingleImplementation(mirror::Class* verify_class, uint16_t verify_index)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
+  // A map that maps a method to a set of compiled code that assumes that method has a
+  // single implementation, which is used to do CHA-based devirtualization.
+  std::unordered_map<ArtMethod*, ListOfDependentPairs*> cha_dependency_map_
+    GUARDED_BY(Locks::cha_lock_);
+
+  DISALLOW_COPY_AND_ASSIGN(ClassHierarchyAnalysis);
+};
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_CHA_H_
diff --git a/runtime/cha_test.cc b/runtime/cha_test.cc
new file mode 100644
index 0000000..d2f335e
--- /dev/null
+++ b/runtime/cha_test.cc
@@ -0,0 +1,93 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "cha.h"
+
+#include "common_runtime_test.h"
+
+namespace art {
+
+class CHATest : public CommonRuntimeTest {};
+
+// Mocks some methods.
+#define METHOD1 (reinterpret_cast<ArtMethod*>(8u))
+#define METHOD2 (reinterpret_cast<ArtMethod*>(16u))
+#define METHOD3 (reinterpret_cast<ArtMethod*>(24u))
+
+// Mocks some method headers.
+#define METHOD_HEADER1 (reinterpret_cast<OatQuickMethodHeader*>(128u))
+#define METHOD_HEADER2 (reinterpret_cast<OatQuickMethodHeader*>(136u))
+#define METHOD_HEADER3 (reinterpret_cast<OatQuickMethodHeader*>(144u))
+
+TEST_F(CHATest, CHACheckDependency) {
+  ClassHierarchyAnalysis cha;
+  MutexLock cha_mu(Thread::Current(), *Locks::cha_lock_);
+
+  ASSERT_EQ(cha.GetDependents(METHOD1), nullptr);
+  ASSERT_EQ(cha.GetDependents(METHOD2), nullptr);
+  ASSERT_EQ(cha.GetDependents(METHOD3), nullptr);
+
+  cha.AddDependency(METHOD1, METHOD2, METHOD_HEADER2);
+  ASSERT_EQ(cha.GetDependents(METHOD2), nullptr);
+  ASSERT_EQ(cha.GetDependents(METHOD3), nullptr);
+  auto dependents = cha.GetDependents(METHOD1);
+  ASSERT_EQ(dependents->size(), 1u);
+  ASSERT_EQ(dependents->at(0).first, METHOD2);
+  ASSERT_EQ(dependents->at(0).second, METHOD_HEADER2);
+
+  cha.AddDependency(METHOD1, METHOD3, METHOD_HEADER3);
+  ASSERT_EQ(cha.GetDependents(METHOD2), nullptr);
+  ASSERT_EQ(cha.GetDependents(METHOD3), nullptr);
+  dependents = cha.GetDependents(METHOD1);
+  ASSERT_EQ(dependents->size(), 2u);
+  ASSERT_EQ(dependents->at(0).first, METHOD2);
+  ASSERT_EQ(dependents->at(0).second, METHOD_HEADER2);
+  ASSERT_EQ(dependents->at(1).first, METHOD3);
+  ASSERT_EQ(dependents->at(1).second, METHOD_HEADER3);
+
+  std::unordered_set<OatQuickMethodHeader*> headers;
+  headers.insert(METHOD_HEADER2);
+  cha.RemoveDependentsWithMethodHeaders(headers);
+  ASSERT_EQ(cha.GetDependents(METHOD2), nullptr);
+  ASSERT_EQ(cha.GetDependents(METHOD3), nullptr);
+  dependents = cha.GetDependents(METHOD1);
+  ASSERT_EQ(dependents->size(), 1u);
+  ASSERT_EQ(dependents->at(0).first, METHOD3);
+  ASSERT_EQ(dependents->at(0).second, METHOD_HEADER3);
+
+  cha.AddDependency(METHOD2, METHOD1, METHOD_HEADER1);
+  ASSERT_EQ(cha.GetDependents(METHOD3), nullptr);
+  dependents = cha.GetDependents(METHOD1);
+  ASSERT_EQ(dependents->size(), 1u);
+  dependents = cha.GetDependents(METHOD2);
+  ASSERT_EQ(dependents->size(), 1u);
+
+  headers.insert(METHOD_HEADER3);
+  cha.RemoveDependentsWithMethodHeaders(headers);
+  ASSERT_EQ(cha.GetDependents(METHOD1), nullptr);
+  ASSERT_EQ(cha.GetDependents(METHOD3), nullptr);
+  dependents = cha.GetDependents(METHOD2);
+  ASSERT_EQ(dependents->size(), 1u);
+  ASSERT_EQ(dependents->at(0).first, METHOD1);
+  ASSERT_EQ(dependents->at(0).second, METHOD_HEADER1);
+
+  cha.RemoveDependencyFor(METHOD2);
+  ASSERT_EQ(cha.GetDependents(METHOD1), nullptr);
+  ASSERT_EQ(cha.GetDependents(METHOD2), nullptr);
+  ASSERT_EQ(cha.GetDependents(METHOD3), nullptr);
+}
+
+}  // namespace art
diff --git a/runtime/class_linker-inl.h b/runtime/class_linker-inl.h
index 81adaeb..0a65cd1 100644
--- a/runtime/class_linker-inl.h
+++ b/runtime/class_linker-inl.h
@@ -65,14 +65,15 @@
   return array_class.Ptr();
 }
 
-inline mirror::String* ClassLinker::ResolveString(uint32_t string_idx, ArtMethod* referrer) {
+inline mirror::String* ClassLinker::ResolveString(dex::StringIndex string_idx,
+                                                  ArtMethod* referrer) {
   Thread::PoisonObjectPointersIfDebug();
   ObjPtr<mirror::Class> declaring_class = referrer->GetDeclaringClass();
   // MethodVerifier refuses methods with string_idx out of bounds.
-  DCHECK_LT(string_idx, declaring_class->GetDexFile().NumStringIds());
+  DCHECK_LT(string_idx.index_, declaring_class->GetDexFile().NumStringIds());
   ObjPtr<mirror::String> string =
         mirror::StringDexCachePair::Lookup(declaring_class->GetDexCacheStrings(),
-                                           string_idx,
+                                           string_idx.index_,
                                            mirror::DexCache::kDexCacheStringCacheSize).Read();
   if (UNLIKELY(string == nullptr)) {
     StackHandleScope<1> hs(Thread::Current());
@@ -247,7 +248,7 @@
   DCHECK(proxy_method->IsProxyMethod<kReadBarrierOption>());
   {
     Thread* const self = Thread::Current();
-    ReaderMutexLock mu(self, dex_lock_);
+    ReaderMutexLock mu(self, *Locks::dex_lock_);
     // Locate the dex cache of the original interface/Object
     for (const DexCacheData& data : dex_caches_) {
       if (!self->IsJWeakCleared(data.weak_root) &&
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index f3aba97..674bad7 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -40,6 +40,7 @@
 #include "base/time_utils.h"
 #include "base/unix_file/fd_file.h"
 #include "base/value_object.h"
+#include "cha.h"
 #include "class_linker-inl.h"
 #include "class_table-inl.h"
 #include "compiler_callbacks.h"
@@ -96,6 +97,7 @@
 #include "ScopedLocalRef.h"
 #include "scoped_thread_state_change-inl.h"
 #include "thread-inl.h"
+#include "thread_list.h"
 #include "trace.h"
 #include "utils.h"
 #include "utils/dex_cache_arrays_layout-inl.h"
@@ -240,10 +242,11 @@
   ScopedLocalRef<jthrowable> cause(env, env->ExceptionOccurred());
   CHECK(cause.get() != nullptr);
 
-  // Boot classpath classes should not fail initialization.
-  if (!Runtime::Current()->IsAotCompiler()) {
+  // Boot classpath classes should not fail initialization. This is a sanity debug check. This
+  // cannot in general be guaranteed, but in all likelihood leads to breakage down the line.
+  if (klass->GetClassLoader() == nullptr && !Runtime::Current()->IsAotCompiler()) {
     std::string tmp;
-    CHECK(klass->GetClassLoader() != nullptr) << klass->GetDescriptor(&tmp);
+    LOG(kIsDebugBuild ? FATAL : WARNING) << klass->GetDescriptor(&tmp) << " failed initialization";
   }
 
   env->ExceptionClear();
@@ -339,9 +342,7 @@
 }
 
 ClassLinker::ClassLinker(InternTable* intern_table)
-    // dex_lock_ is recursive as it may be used in stack dumping.
-    : dex_lock_("ClassLinker dex lock", kDexLock),
-      failed_dex_cache_class_lookups_(0),
+    : failed_dex_cache_class_lookups_(0),
       class_roots_(nullptr),
       array_iftable_(nullptr),
       find_array_class_cache_next_victim_(0),
@@ -820,123 +821,6 @@
   }
 }
 
-static void SanityCheckArtMethod(ArtMethod* m,
-                                 ObjPtr<mirror::Class> expected_class,
-                                 const std::vector<gc::space::ImageSpace*>& spaces)
-    REQUIRES_SHARED(Locks::mutator_lock_) {
-  if (m->IsRuntimeMethod()) {
-    ObjPtr<mirror::Class> declaring_class = m->GetDeclaringClassUnchecked();
-    CHECK(declaring_class == nullptr) << declaring_class << " " << m->PrettyMethod();
-  } else if (m->IsCopied()) {
-    CHECK(m->GetDeclaringClass() != nullptr) << m->PrettyMethod();
-  } else if (expected_class != nullptr) {
-    CHECK_EQ(m->GetDeclaringClassUnchecked(), expected_class) << m->PrettyMethod();
-  }
-  if (!spaces.empty()) {
-    bool contains = false;
-    for (gc::space::ImageSpace* space : spaces) {
-      auto& header = space->GetImageHeader();
-      size_t offset = reinterpret_cast<uint8_t*>(m) - space->Begin();
-
-      const ImageSection& methods = header.GetMethodsSection();
-      contains = contains || methods.Contains(offset);
-
-      const ImageSection& runtime_methods = header.GetRuntimeMethodsSection();
-      contains = contains || runtime_methods.Contains(offset);
-    }
-    CHECK(contains) << m << " not found";
-  }
-}
-
-static void SanityCheckArtMethodPointerArray(ObjPtr<mirror::PointerArray> arr,
-                                             ObjPtr<mirror::Class> expected_class,
-                                             PointerSize pointer_size,
-                                             const std::vector<gc::space::ImageSpace*>& spaces)
-    REQUIRES_SHARED(Locks::mutator_lock_) {
-  CHECK(arr != nullptr);
-  for (int32_t j = 0; j < arr->GetLength(); ++j) {
-    auto* method = arr->GetElementPtrSize<ArtMethod*>(j, pointer_size);
-    // expected_class == null means we are a dex cache.
-    if (expected_class != nullptr) {
-      CHECK(method != nullptr);
-    }
-    if (method != nullptr) {
-      SanityCheckArtMethod(method, expected_class, spaces);
-    }
-  }
-}
-
-static void SanityCheckArtMethodPointerArray(ArtMethod** arr,
-                                             size_t size,
-                                             PointerSize pointer_size,
-                                             const std::vector<gc::space::ImageSpace*>& spaces)
-    REQUIRES_SHARED(Locks::mutator_lock_) {
-  CHECK_EQ(arr != nullptr, size != 0u);
-  if (arr != nullptr) {
-    bool contains = false;
-    for (auto space : spaces) {
-      auto offset = reinterpret_cast<uint8_t*>(arr) - space->Begin();
-      if (space->GetImageHeader().GetImageSection(
-          ImageHeader::kSectionDexCacheArrays).Contains(offset)) {
-        contains = true;
-        break;
-      }
-    }
-    CHECK(contains);
-  }
-  for (size_t j = 0; j < size; ++j) {
-    ArtMethod* method = mirror::DexCache::GetElementPtrSize(arr, j, pointer_size);
-    // expected_class == null means we are a dex cache.
-    if (method != nullptr) {
-      SanityCheckArtMethod(method, nullptr, spaces);
-    }
-  }
-}
-
-static void SanityCheckObjectsCallback(mirror::Object* obj, void* arg ATTRIBUTE_UNUSED)
-    REQUIRES_SHARED(Locks::mutator_lock_) {
-  DCHECK(obj != nullptr);
-  CHECK(obj->GetClass() != nullptr) << "Null class in object " << obj;
-  CHECK(obj->GetClass()->GetClass() != nullptr) << "Null class class " << obj;
-  if (obj->IsClass()) {
-    auto klass = obj->AsClass();
-    for (ArtField& field : klass->GetIFields()) {
-      CHECK_EQ(field.GetDeclaringClass(), klass);
-    }
-    for (ArtField& field : klass->GetSFields()) {
-      CHECK_EQ(field.GetDeclaringClass(), klass);
-    }
-    auto* runtime = Runtime::Current();
-    auto image_spaces = runtime->GetHeap()->GetBootImageSpaces();
-    auto pointer_size = runtime->GetClassLinker()->GetImagePointerSize();
-    for (auto& m : klass->GetMethods(pointer_size)) {
-      SanityCheckArtMethod(&m, klass, image_spaces);
-    }
-    auto* vtable = klass->GetVTable();
-    if (vtable != nullptr) {
-      SanityCheckArtMethodPointerArray(vtable, nullptr, pointer_size, image_spaces);
-    }
-    if (klass->ShouldHaveImt()) {
-      ImTable* imt = klass->GetImt(pointer_size);
-      for (size_t i = 0; i < ImTable::kSize; ++i) {
-        SanityCheckArtMethod(imt->Get(i, pointer_size), nullptr, image_spaces);
-      }
-    }
-    if (klass->ShouldHaveEmbeddedVTable()) {
-      for (int32_t i = 0; i < klass->GetEmbeddedVTableLength(); ++i) {
-        SanityCheckArtMethod(klass->GetEmbeddedVTableEntry(i, pointer_size), nullptr, image_spaces);
-      }
-    }
-    mirror::IfTable* iftable = klass->GetIfTable();
-    for (int32_t i = 0; i < klass->GetIfTableCount(); ++i) {
-      if (iftable->GetMethodArrayCount(i) > 0) {
-        SanityCheckArtMethodPointerArray(
-            iftable->GetMethodArray(i), nullptr, pointer_size, image_spaces);
-      }
-    }
-  }
-}
-
 // Set image methods' entry point to interpreter.
 class SetInterpreterEntrypointArtMethodVisitor : public ArtMethodVisitor {
  public:
@@ -1444,7 +1328,7 @@
         }
       }
       {
-        WriterMutexLock mu2(self, dex_lock_);
+        WriterMutexLock mu2(self, *Locks::dex_lock_);
         // Make sure to do this after we update the arrays since we store the resolved types array
         // in DexCacheData in RegisterDexFileLocked. We need the array pointer to be the one in the
         // BSS.
@@ -1607,6 +1491,153 @@
   return true;
 }
 
+// Helper class for ArtMethod checks when adding an image. Keeps all required functionality
+// together and caches some intermediate results.
+class ImageSanityChecks FINAL {
+ public:
+  static void CheckObjects(gc::Heap* heap, ClassLinker* class_linker)
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+    ImageSanityChecks isc(heap, class_linker);
+    heap->VisitObjects(ImageSanityChecks::SanityCheckObjectsCallback, &isc);
+  }
+
+  static void CheckPointerArray(gc::Heap* heap,
+                                ClassLinker* class_linker,
+                                ArtMethod** arr,
+                                size_t size)
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+    ImageSanityChecks isc(heap, class_linker);
+    isc.SanityCheckArtMethodPointerArray(arr, size);
+  }
+
+  static void SanityCheckObjectsCallback(mirror::Object* obj, void* arg)
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+    DCHECK(obj != nullptr);
+    CHECK(obj->GetClass() != nullptr) << "Null class in object " << obj;
+    CHECK(obj->GetClass()->GetClass() != nullptr) << "Null class class " << obj;
+    if (obj->IsClass()) {
+      ImageSanityChecks* isc = reinterpret_cast<ImageSanityChecks*>(arg);
+
+      auto klass = obj->AsClass();
+      for (ArtField& field : klass->GetIFields()) {
+        CHECK_EQ(field.GetDeclaringClass(), klass);
+      }
+      for (ArtField& field : klass->GetSFields()) {
+        CHECK_EQ(field.GetDeclaringClass(), klass);
+      }
+      const auto pointer_size = isc->pointer_size_;
+      for (auto& m : klass->GetMethods(pointer_size)) {
+        isc->SanityCheckArtMethod(&m, klass);
+      }
+      auto* vtable = klass->GetVTable();
+      if (vtable != nullptr) {
+        isc->SanityCheckArtMethodPointerArray(vtable, nullptr);
+      }
+      if (klass->ShouldHaveImt()) {
+        ImTable* imt = klass->GetImt(pointer_size);
+        for (size_t i = 0; i < ImTable::kSize; ++i) {
+          isc->SanityCheckArtMethod(imt->Get(i, pointer_size), nullptr);
+        }
+      }
+      if (klass->ShouldHaveEmbeddedVTable()) {
+        for (int32_t i = 0; i < klass->GetEmbeddedVTableLength(); ++i) {
+          isc->SanityCheckArtMethod(klass->GetEmbeddedVTableEntry(i, pointer_size), nullptr);
+        }
+      }
+      mirror::IfTable* iftable = klass->GetIfTable();
+      for (int32_t i = 0; i < klass->GetIfTableCount(); ++i) {
+        if (iftable->GetMethodArrayCount(i) > 0) {
+          isc->SanityCheckArtMethodPointerArray(iftable->GetMethodArray(i), nullptr);
+        }
+      }
+    }
+  }
+
+ private:
+  ImageSanityChecks(gc::Heap* heap, ClassLinker* class_linker)
+     :  spaces_(heap->GetBootImageSpaces()),
+        pointer_size_(class_linker->GetImagePointerSize()) {
+    space_begin_.reserve(spaces_.size());
+    method_sections_.reserve(spaces_.size());
+    runtime_method_sections_.reserve(spaces_.size());
+    for (gc::space::ImageSpace* space : spaces_) {
+      space_begin_.push_back(space->Begin());
+      auto& header = space->GetImageHeader();
+      method_sections_.push_back(&header.GetMethodsSection());
+      runtime_method_sections_.push_back(&header.GetRuntimeMethodsSection());
+    }
+  }
+
+  void SanityCheckArtMethod(ArtMethod* m, ObjPtr<mirror::Class> expected_class)
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+    if (m->IsRuntimeMethod()) {
+      ObjPtr<mirror::Class> declaring_class = m->GetDeclaringClassUnchecked();
+      CHECK(declaring_class == nullptr) << declaring_class << " " << m->PrettyMethod();
+    } else if (m->IsCopied()) {
+      CHECK(m->GetDeclaringClass() != nullptr) << m->PrettyMethod();
+    } else if (expected_class != nullptr) {
+      CHECK_EQ(m->GetDeclaringClassUnchecked(), expected_class) << m->PrettyMethod();
+    }
+    if (!spaces_.empty()) {
+      bool contains = false;
+      for (size_t i = 0; !contains && i != space_begin_.size(); ++i) {
+        const size_t offset = reinterpret_cast<uint8_t*>(m) - space_begin_[i];
+        contains = method_sections_[i]->Contains(offset) ||
+            runtime_method_sections_[i]->Contains(offset);
+      }
+      CHECK(contains) << m << " not found";
+    }
+  }
+
+  void SanityCheckArtMethodPointerArray(ObjPtr<mirror::PointerArray> arr,
+                                        ObjPtr<mirror::Class> expected_class)
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+    CHECK(arr != nullptr);
+    for (int32_t j = 0; j < arr->GetLength(); ++j) {
+      auto* method = arr->GetElementPtrSize<ArtMethod*>(j, pointer_size_);
+      // expected_class == null means we are a dex cache.
+      if (expected_class != nullptr) {
+        CHECK(method != nullptr);
+      }
+      if (method != nullptr) {
+        SanityCheckArtMethod(method, expected_class);
+      }
+    }
+  }
+
+  void SanityCheckArtMethodPointerArray(ArtMethod** arr, size_t size)
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+    CHECK_EQ(arr != nullptr, size != 0u);
+    if (arr != nullptr) {
+      bool contains = false;
+      for (auto space : spaces_) {
+        auto offset = reinterpret_cast<uint8_t*>(arr) - space->Begin();
+        if (space->GetImageHeader().GetImageSection(
+            ImageHeader::kSectionDexCacheArrays).Contains(offset)) {
+          contains = true;
+          break;
+        }
+      }
+      CHECK(contains);
+    }
+    for (size_t j = 0; j < size; ++j) {
+      ArtMethod* method = mirror::DexCache::GetElementPtrSize(arr, j, pointer_size_);
+      // expected_class == null means we are a dex cache.
+      if (method != nullptr) {
+        SanityCheckArtMethod(method, nullptr);
+      }
+    }
+  }
+
+  const std::vector<gc::space::ImageSpace*>& spaces_;
+  const PointerSize pointer_size_;
+
+  // Cached sections from the spaces.
+  std::vector<const uint8_t*> space_begin_;
+  std::vector<const ImageSection*> method_sections_;
+  std::vector<const ImageSection*> runtime_method_sections_;
+};
+
 bool ClassLinker::AddImageSpace(
     gc::space::ImageSpace* space,
     Handle<mirror::ClassLoader> class_loader,
@@ -1697,10 +1728,10 @@
       }
     } else {
       if (kSanityCheckObjects) {
-        SanityCheckArtMethodPointerArray(h_dex_cache->GetResolvedMethods(),
-                                         h_dex_cache->NumResolvedMethods(),
-                                         image_pointer_size_,
-                                         heap->GetBootImageSpaces());
+        ImageSanityChecks::CheckPointerArray(heap,
+                                             this,
+                                             h_dex_cache->GetResolvedMethods(),
+                                             h_dex_cache->NumResolvedMethods());
       }
       // Register dex files, keep track of existing ones that are conflicts.
       AppendToBootClassPath(*dex_file.get(), h_dex_cache);
@@ -1785,7 +1816,7 @@
       }
     }
     if (!app_image) {
-      heap->VisitObjects(SanityCheckObjectsCallback, nullptr);
+      ImageSanityChecks::CheckObjects(heap, this);
     }
   }
 
@@ -2112,109 +2143,6 @@
           : static_cast<mirror::Array*>(mirror::IntArray::Alloc(self, length)));
 }
 
-void ClassLinker::InitializeDexCache(Thread* self,
-                                     ObjPtr<mirror::DexCache> dex_cache,
-                                     ObjPtr<mirror::String> location,
-                                     const DexFile& dex_file,
-                                     LinearAlloc* linear_alloc) {
-  ScopedAssertNoThreadSuspension sants(__FUNCTION__);
-  DexCacheArraysLayout layout(image_pointer_size_, &dex_file);
-  uint8_t* raw_arrays = nullptr;
-
-  const OatDexFile* const oat_dex = dex_file.GetOatDexFile();
-  if (oat_dex != nullptr && oat_dex->GetDexCacheArrays() != nullptr) {
-    raw_arrays = oat_dex->GetDexCacheArrays();
-  } else if (dex_file.NumStringIds() != 0u ||
-             dex_file.NumTypeIds() != 0u ||
-             dex_file.NumMethodIds() != 0u ||
-             dex_file.NumFieldIds() != 0u) {
-    // Zero-initialized.
-    raw_arrays = reinterpret_cast<uint8_t*>(linear_alloc->Alloc(self, layout.Size()));
-  }
-
-  mirror::StringDexCacheType* strings = (dex_file.NumStringIds() == 0u) ? nullptr :
-      reinterpret_cast<mirror::StringDexCacheType*>(raw_arrays + layout.StringsOffset());
-  GcRoot<mirror::Class>* types = (dex_file.NumTypeIds() == 0u) ? nullptr :
-      reinterpret_cast<GcRoot<mirror::Class>*>(raw_arrays + layout.TypesOffset());
-  ArtMethod** methods = (dex_file.NumMethodIds() == 0u) ? nullptr :
-      reinterpret_cast<ArtMethod**>(raw_arrays + layout.MethodsOffset());
-  ArtField** fields = (dex_file.NumFieldIds() == 0u) ? nullptr :
-      reinterpret_cast<ArtField**>(raw_arrays + layout.FieldsOffset());
-
-  size_t num_strings = mirror::DexCache::kDexCacheStringCacheSize;
-  if (dex_file.NumStringIds() < num_strings) {
-    num_strings = dex_file.NumStringIds();
-  }
-
-  // Note that we allocate the method type dex caches regardless of this flag,
-  // and we make sure here that they're not used by the runtime. This is in the
-  // interest of simplicity and to avoid extensive compiler and layout class changes.
-  //
-  // If this needs to be mitigated in a production system running this code,
-  // DexCache::kDexCacheMethodTypeCacheSize can be set to zero.
-  mirror::MethodTypeDexCacheType* method_types = nullptr;
-  size_t num_method_types = 0;
-
-  if (dex_file.NumProtoIds() < mirror::DexCache::kDexCacheMethodTypeCacheSize) {
-    num_method_types = dex_file.NumProtoIds();
-  } else {
-    num_method_types = mirror::DexCache::kDexCacheMethodTypeCacheSize;
-  }
-
-  if (num_method_types > 0) {
-    method_types = reinterpret_cast<mirror::MethodTypeDexCacheType*>(
-        raw_arrays + layout.MethodTypesOffset());
-  }
-
-  DCHECK_ALIGNED(raw_arrays, alignof(mirror::StringDexCacheType)) <<
-                 "Expected raw_arrays to align to StringDexCacheType.";
-  DCHECK_ALIGNED(layout.StringsOffset(), alignof(mirror::StringDexCacheType)) <<
-                 "Expected StringsOffset() to align to StringDexCacheType.";
-  DCHECK_ALIGNED(strings, alignof(mirror::StringDexCacheType)) <<
-                 "Expected strings to align to StringDexCacheType.";
-  static_assert(alignof(mirror::StringDexCacheType) == 8u,
-                "Expected StringDexCacheType to have align of 8.");
-  if (kIsDebugBuild) {
-    // Sanity check to make sure all the dex cache arrays are empty. b/28992179
-    for (size_t i = 0; i < num_strings; ++i) {
-      CHECK_EQ(strings[i].load(std::memory_order_relaxed).index, 0u);
-      CHECK(strings[i].load(std::memory_order_relaxed).object.IsNull());
-    }
-    for (size_t i = 0; i < dex_file.NumTypeIds(); ++i) {
-      CHECK(types[i].IsNull());
-    }
-    for (size_t i = 0; i < dex_file.NumMethodIds(); ++i) {
-      CHECK(mirror::DexCache::GetElementPtrSize(methods, i, image_pointer_size_) == nullptr);
-    }
-    for (size_t i = 0; i < dex_file.NumFieldIds(); ++i) {
-      CHECK(mirror::DexCache::GetElementPtrSize(fields, i, image_pointer_size_) == nullptr);
-    }
-    for (size_t i = 0; i < num_method_types; ++i) {
-      CHECK_EQ(method_types[i].load(std::memory_order_relaxed).index, 0u);
-      CHECK(method_types[i].load(std::memory_order_relaxed).object.IsNull());
-    }
-  }
-  if (strings != nullptr) {
-    mirror::StringDexCachePair::Initialize(strings);
-  }
-  if (method_types != nullptr) {
-    mirror::MethodTypeDexCachePair::Initialize(method_types);
-  }
-  dex_cache->Init(&dex_file,
-                  location,
-                  strings,
-                  num_strings,
-                  types,
-                  dex_file.NumTypeIds(),
-                  methods,
-                  dex_file.NumMethodIds(),
-                  fields,
-                  dex_file.NumFieldIds(),
-                  method_types,
-                  num_method_types,
-                  image_pointer_size_);
-}
-
 mirror::DexCache* ClassLinker::AllocDexCache(ObjPtr<mirror::String>* out_location,
                                              Thread* self,
                                              const DexFile& dex_file) {
@@ -2241,9 +2169,14 @@
   ObjPtr<mirror::String> location = nullptr;
   ObjPtr<mirror::DexCache> dex_cache = AllocDexCache(&location, self, dex_file);
   if (dex_cache != nullptr) {
-    WriterMutexLock mu(self, dex_lock_);
+    WriterMutexLock mu(self, *Locks::dex_lock_);
     DCHECK(location != nullptr);
-    InitializeDexCache(self, dex_cache, location, dex_file, linear_alloc);
+    mirror::DexCache::InitializeDexCache(self,
+                                         dex_cache,
+                                         location,
+                                         &dex_file,
+                                         linear_alloc,
+                                         image_pointer_size_);
   }
   return dex_cache.Ptr();
 }
@@ -3263,7 +3196,7 @@
 void ClassLinker::RegisterDexFileLocked(const DexFile& dex_file,
                                         Handle<mirror::DexCache> dex_cache) {
   Thread* const self = Thread::Current();
-  dex_lock_.AssertExclusiveHeld(self);
+  Locks::dex_lock_->AssertExclusiveHeld(self);
   CHECK(dex_cache.Get() != nullptr) << dex_file.GetLocation();
   // For app images, the dex cache location may be a suffix of the dex file location since the
   // dex file location is an absolute path.
@@ -3305,7 +3238,7 @@
                                                ObjPtr<mirror::ClassLoader> class_loader) {
   Thread* self = Thread::Current();
   {
-    ReaderMutexLock mu(self, dex_lock_);
+    ReaderMutexLock mu(self, *Locks::dex_lock_);
     ObjPtr<mirror::DexCache> dex_cache = FindDexCacheLocked(self, dex_file, true);
     if (dex_cache != nullptr) {
       return dex_cache.Ptr();
@@ -3328,7 +3261,7 @@
                                                                   dex_file)));
   Handle<mirror::String> h_location(hs.NewHandle(location));
   {
-    WriterMutexLock mu(self, dex_lock_);
+    WriterMutexLock mu(self, *Locks::dex_lock_);
     ObjPtr<mirror::DexCache> dex_cache = FindDexCacheLocked(self, dex_file, true);
     if (dex_cache != nullptr) {
       // Another thread managed to initialize the dex cache faster, so use that DexCache.
@@ -3344,7 +3277,12 @@
     // Do InitializeDexCache while holding dex lock to make sure two threads don't call it at the
     // same time with the same dex cache. Since the .bss is shared this can cause failing DCHECK
     // that the arrays are null.
-    InitializeDexCache(self, h_dex_cache.Get(), h_location.Get(), dex_file, linear_alloc);
+    mirror::DexCache::InitializeDexCache(self,
+                                         h_dex_cache.Get(),
+                                         h_location.Get(),
+                                         &dex_file,
+                                         linear_alloc,
+                                         image_pointer_size_);
     RegisterDexFileLocked(dex_file, h_dex_cache);
   }
   table->InsertStrongRoot(h_dex_cache.Get());
@@ -3353,14 +3291,14 @@
 
 void ClassLinker::RegisterDexFile(const DexFile& dex_file,
                                   Handle<mirror::DexCache> dex_cache) {
-  WriterMutexLock mu(Thread::Current(), dex_lock_);
+  WriterMutexLock mu(Thread::Current(), *Locks::dex_lock_);
   RegisterDexFileLocked(dex_file, dex_cache);
 }
 
 mirror::DexCache* ClassLinker::FindDexCache(Thread* self,
                                             const DexFile& dex_file,
                                             bool allow_failure) {
-  ReaderMutexLock mu(self, dex_lock_);
+  ReaderMutexLock mu(self, *Locks::dex_lock_);
   return FindDexCacheLocked(self, dex_file, allow_failure);
 }
 
@@ -3397,7 +3335,7 @@
 
 void ClassLinker::FixupDexCaches(ArtMethod* resolution_method) {
   Thread* const self = Thread::Current();
-  ReaderMutexLock mu(self, dex_lock_);
+  ReaderMutexLock mu(self, *Locks::dex_lock_);
   for (const DexCacheData& data : dex_caches_) {
     if (!self->IsJWeakCleared(data.weak_root)) {
       ObjPtr<mirror::DexCache> dex_cache = ObjPtr<mirror::DexCache>::DownCast(
@@ -3792,6 +3730,17 @@
   return false;
 }
 
+// Ensures that methods have the kAccSkipAccessChecks bit set. We use the
+// kAccVerificationAttempted bit on the class access flags to determine whether this has been done
+// before.
+static void EnsureSkipAccessChecksMethods(Handle<mirror::Class> klass, PointerSize pointer_size)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  if (!klass->WasVerificationAttempted()) {
+    klass->SetSkipAccessChecksFlagOnAllMethods(pointer_size);
+    klass->SetVerificationAttempted();
+  }
+}
+
 verifier::MethodVerifier::FailureKind ClassLinker::VerifyClass(
     Thread* self, Handle<mirror::Class> klass, verifier::HardFailLogMode log_level) {
   {
@@ -3819,7 +3768,7 @@
 
     // Don't attempt to re-verify if already sufficiently verified.
     if (klass->IsVerified()) {
-      EnsureSkipAccessChecksMethods(klass);
+      EnsureSkipAccessChecksMethods(klass, image_pointer_size_);
       return verifier::MethodVerifier::kNoFailure;
     }
     if (klass->IsCompileTimeVerified() && Runtime::Current()->IsAotCompiler()) {
@@ -3838,7 +3787,7 @@
     // Skip verification if disabled.
     if (!Runtime::Current()->IsVerificationEnabled()) {
       mirror::Class::SetStatus(klass, mirror::Class::kStatusVerified, self);
-      EnsureSkipAccessChecksMethods(klass);
+      EnsureSkipAccessChecksMethods(klass, image_pointer_size_);
       return verifier::MethodVerifier::kNoFailure;
     }
   }
@@ -3973,19 +3922,12 @@
       // Mark the class as having a verification attempt to avoid re-running the verifier.
       klass->SetVerificationAttempted();
     } else {
-      EnsureSkipAccessChecksMethods(klass);
+      EnsureSkipAccessChecksMethods(klass, image_pointer_size_);
     }
   }
   return verifier_failure;
 }
 
-void ClassLinker::EnsureSkipAccessChecksMethods(Handle<mirror::Class> klass) {
-  if (!klass->WasVerificationAttempted()) {
-    klass->SetSkipAccessChecksFlagOnAllMethods(image_pointer_size_);
-    klass->SetVerificationAttempted();
-  }
-}
-
 bool ClassLinker::VerifyClassUsingOatFile(const DexFile& dex_file,
                                           ObjPtr<mirror::Class> klass,
                                           mirror::Class::Status& oat_file_class_status) {
@@ -4955,7 +4897,7 @@
                                     bool can_init_parents) {
   DCHECK(c.Get() != nullptr);
   if (c->IsInitialized()) {
-    EnsureSkipAccessChecksMethods(c);
+    EnsureSkipAccessChecksMethods(c, image_pointer_size_);
     self->AssertNoPendingException();
     return true;
   }
@@ -5111,6 +5053,12 @@
     if (klass->ShouldHaveImt()) {
       klass->SetImt(imt, image_pointer_size_);
     }
+
+    // Update CHA info based on whether we override methods.
+    // Have to do this before setting the class as resolved which allows
+    // instantiation of klass.
+    Runtime::Current()->GetClassHierarchyAnalysis()->UpdateAfterLoadingOf(klass);
+
     // This will notify waiters on klass that saw the not yet resolved
     // class in the class_table_ during EnsureResolved.
     mirror::Class::SetStatus(klass, mirror::Class::kStatusResolved, self);
@@ -5154,6 +5102,11 @@
       }
     }
 
+    // Update CHA info based on whether we override methods.
+    // Have to do this before setting the class as resolved which allows
+    // instantiation of klass.
+    Runtime::Current()->GetClassHierarchyAnalysis()->UpdateAfterLoadingOf(h_new_class);
+
     // This will notify waiters on temp class that saw the not yet resolved class in the
     // class_table_ during EnsureResolved.
     mirror::Class::SetStatus(klass, mirror::Class::kStatusRetired, self);
@@ -7485,7 +7438,7 @@
 }
 
 mirror::String* ClassLinker::ResolveString(const DexFile& dex_file,
-                                           uint32_t string_idx,
+                                           dex::StringIndex string_idx,
                                            Handle<mirror::DexCache> dex_cache) {
   DCHECK(dex_cache.Get() != nullptr);
   Thread::PoisonObjectPointersIfDebug();
@@ -7501,7 +7454,7 @@
 }
 
 mirror::String* ClassLinker::LookupString(const DexFile& dex_file,
-                                          uint32_t string_idx,
+                                          dex::StringIndex string_idx,
                                           Handle<mirror::DexCache> dex_cache) {
   DCHECK(dex_cache.Get() != nullptr);
   ObjPtr<mirror::String> resolved = dex_cache->GetResolvedString(string_idx);
@@ -7510,7 +7463,8 @@
   }
   uint32_t utf16_length;
   const char* utf8_data = dex_file.StringDataAndUtf16LengthByIdx(string_idx, &utf16_length);
-  ObjPtr<mirror::String> string = intern_table_->LookupStrong(Thread::Current(), utf16_length, utf8_data);
+  ObjPtr<mirror::String> string =
+      intern_table_->LookupStrong(Thread::Current(), utf16_length, utf8_data);
   if (string != nullptr) {
     dex_cache->SetResolvedString(string_idx, string);
   }
@@ -7966,42 +7920,6 @@
   return type.Get();
 }
 
-const char* ClassLinker::MethodShorty(uint32_t method_idx,
-                                      ArtMethod* referrer,
-                                      uint32_t* length) {
-  ObjPtr<mirror::Class> declaring_class = referrer->GetDeclaringClass();
-  ObjPtr<mirror::DexCache> dex_cache = declaring_class->GetDexCache();
-  const DexFile& dex_file = *dex_cache->GetDexFile();
-  const DexFile::MethodId& method_id = dex_file.GetMethodId(method_idx);
-  return dex_file.GetMethodShorty(method_id, length);
-}
-
-class DumpClassVisitor : public ClassVisitor {
- public:
-  explicit DumpClassVisitor(int flags) : flags_(flags) {}
-
-  bool operator()(ObjPtr<mirror::Class> klass) OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_) {
-    klass->DumpClass(LOG_STREAM(ERROR), flags_);
-    return true;
-  }
-
- private:
-  const int flags_;
-};
-
-void ClassLinker::DumpAllClasses(int flags) {
-  DumpClassVisitor visitor(flags);
-  VisitClasses(&visitor);
-}
-
-static OatFile::OatMethod CreateOatMethod(const void* code) {
-  CHECK(code != nullptr);
-  const uint8_t* base = reinterpret_cast<const uint8_t*>(code);  // Base of data points at code.
-  base -= sizeof(void*);  // Move backward so that code_offset != 0.
-  const uint32_t code_offset = sizeof(void*);
-  return OatFile::OatMethod(base, code_offset);
-}
-
 bool ClassLinker::IsQuickResolutionStub(const void* entry_point) const {
   return (entry_point == GetQuickResolutionStub()) ||
       (quick_resolution_trampoline_ == entry_point);
@@ -8021,9 +7939,12 @@
   return GetQuickGenericJniStub();
 }
 
-void ClassLinker::SetEntryPointsToCompiledCode(ArtMethod* method,
-                                               const void* method_code) const {
-  OatFile::OatMethod oat_method = CreateOatMethod(method_code);
+void ClassLinker::SetEntryPointsToCompiledCode(ArtMethod* method, const void* code) const {
+  CHECK(code != nullptr);
+  const uint8_t* base = reinterpret_cast<const uint8_t*>(code);  // Base of data points at code.
+  base -= sizeof(void*);  // Move backward so that code_offset != 0.
+  const uint32_t code_offset = sizeof(void*);
+  OatFile::OatMethod oat_method(base, code_offset);
   oat_method.LinkMethod(method);
 }
 
@@ -8031,9 +7952,7 @@
   if (!method->IsNative()) {
     method->SetEntryPointFromQuickCompiledCode(GetQuickToInterpreterBridge());
   } else {
-    const void* quick_method_code = GetQuickGenericJniStub();
-    OatFile::OatMethod oat_method = CreateOatMethod(quick_method_code);
-    oat_method.LinkMethod(method);
+    SetEntryPointsToCompiledCode(method, GetQuickGenericJniStub());
   }
 }
 
@@ -8084,7 +8003,7 @@
 }
 
 pid_t ClassLinker::GetDexLockOwner() {
-  return dex_lock_.GetExclusiveOwnerTid();
+  return Locks::dex_lock_->GetExclusiveOwnerTid();
 }
 
 void ClassLinker::SetClassRoot(ClassRoot class_root, ObjPtr<mirror::Class> klass) {
@@ -8250,20 +8169,6 @@
   return soa.Env()->NewGlobalRef(local_ref.get());
 }
 
-ArtMethod* ClassLinker::CreateRuntimeMethod(LinearAlloc* linear_alloc) {
-  const size_t method_alignment = ArtMethod::Alignment(image_pointer_size_);
-  const size_t method_size = ArtMethod::Size(image_pointer_size_);
-  LengthPrefixedArray<ArtMethod>* method_array = AllocArtMethodArray(
-      Thread::Current(),
-      linear_alloc,
-      1);
-  ArtMethod* method = &method_array->At(0, method_size, method_alignment);
-  CHECK(method != nullptr);
-  method->SetDexMethodIndex(DexFile::kDexNoIndex);
-  CHECK(method->IsRuntimeMethod());
-  return method;
-}
-
 void ClassLinker::DropFindArrayClassCache() {
   std::fill_n(find_array_class_cache_, kFindArrayCacheSize, GcRoot<mirror::Class>(nullptr));
   find_array_class_cache_next_victim_ = 0;
@@ -8337,7 +8242,7 @@
   std::set<DexCacheResolvedClasses> ret;
   VLOG(class_linker) << "Collecting resolved classes";
   const uint64_t start_time = NanoTime();
-  ReaderMutexLock mu(soa.Self(), *DexLock());
+  ReaderMutexLock mu(soa.Self(), *Locks::dex_lock_);
   // Loop through all the dex caches and inspect resolved classes.
   for (const ClassLinker::DexCacheData& data : GetDexCachesData()) {
     if (soa.Self()->IsJWeakCleared(data.weak_root)) {
@@ -8406,7 +8311,7 @@
   std::unordered_map<std::string, const DexFile*> location_to_dex_file;
   ScopedObjectAccess soa(self);
   ScopedAssertNoThreadSuspension ants(__FUNCTION__);
-  ReaderMutexLock mu(self, *DexLock());
+  ReaderMutexLock mu(self, *Locks::dex_lock_);
   for (const ClassLinker::DexCacheData& data : GetDexCachesData()) {
     if (!self->IsJWeakCleared(data.weak_root)) {
       ObjPtr<mirror::DexCache> dex_cache = soa.Decode<mirror::DexCache>(data.weak_root);
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index 9563448..de1f0f0 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -140,12 +140,12 @@
   bool InitWithoutImage(std::vector<std::unique_ptr<const DexFile>> boot_class_path,
                         std::string* error_msg)
       REQUIRES_SHARED(Locks::mutator_lock_)
-      REQUIRES(!dex_lock_);
+      REQUIRES(!Locks::dex_lock_);
 
   // Initialize class linker from one or more boot images.
   bool InitFromBootImage(std::string* error_msg)
       REQUIRES_SHARED(Locks::mutator_lock_)
-      REQUIRES(!dex_lock_);
+      REQUIRES(!Locks::dex_lock_);
 
   // Add an image space to the class linker, may fix up classloader fields and dex cache fields.
   // The dex files that were newly opened for the space are placed in the out argument
@@ -158,13 +158,13 @@
                      const char* dex_location,
                      std::vector<std::unique_ptr<const DexFile>>* out_dex_files,
                      std::string* error_msg)
-      REQUIRES(!dex_lock_)
+      REQUIRES(!Locks::dex_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   bool OpenImageDexFiles(gc::space::ImageSpace* space,
                          std::vector<std::unique_ptr<const DexFile>>* out_dex_files,
                          std::string* error_msg)
-      REQUIRES(!dex_lock_)
+      REQUIRES(!Locks::dex_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Finds a class by its descriptor, loading it if necessary.
@@ -173,18 +173,18 @@
                            const char* descriptor,
                            Handle<mirror::ClassLoader> class_loader)
       REQUIRES_SHARED(Locks::mutator_lock_)
-      REQUIRES(!dex_lock_);
+      REQUIRES(!Locks::dex_lock_);
 
   // Finds a class by its descriptor using the "system" class loader, ie by searching the
   // boot_class_path_.
   mirror::Class* FindSystemClass(Thread* self, const char* descriptor)
       REQUIRES_SHARED(Locks::mutator_lock_)
-      REQUIRES(!dex_lock_);
+      REQUIRES(!Locks::dex_lock_);
 
   // Finds the array class given for the element class.
   mirror::Class* FindArrayClass(Thread* self, ObjPtr<mirror::Class>* element_class)
       REQUIRES_SHARED(Locks::mutator_lock_)
-      REQUIRES(!dex_lock_);
+      REQUIRES(!Locks::dex_lock_);
 
   // Returns true if the class linker is initialized.
   bool IsInitialized() const {
@@ -199,7 +199,7 @@
                              const DexFile& dex_file,
                              const DexFile::ClassDef& dex_class_def)
       REQUIRES_SHARED(Locks::mutator_lock_)
-      REQUIRES(!dex_lock_);
+      REQUIRES(!Locks::dex_lock_);
 
   // Finds a class by its descriptor, returning null if it isn't wasn't loaded
   // by the given 'class_loader'.
@@ -224,10 +224,6 @@
       REQUIRES(!Locks::classlinker_classes_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  void DumpAllClasses(int flags)
-      REQUIRES(!Locks::classlinker_classes_lock_)
-      REQUIRES_SHARED(Locks::mutator_lock_);
-
   void DumpForSigQuit(std::ostream& os) REQUIRES(!Locks::classlinker_classes_lock_);
 
   size_t NumLoadedClasses()
@@ -237,18 +233,20 @@
   // Resolve a String with the given index from the DexFile, storing the
   // result in the DexCache. The referrer is used to identify the
   // target DexCache and ClassLoader to use for resolution.
-  mirror::String* ResolveString(uint32_t string_idx, ArtMethod* referrer)
+  mirror::String* ResolveString(dex::StringIndex string_idx, ArtMethod* referrer)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Resolve a String with the given index from the DexFile, storing the
   // result in the DexCache.
-  mirror::String* ResolveString(const DexFile& dex_file, uint32_t string_idx,
+  mirror::String* ResolveString(const DexFile& dex_file,
+                                dex::StringIndex string_idx,
                                 Handle<mirror::DexCache> dex_cache)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Find a String with the given index from the DexFile, storing the
   // result in the DexCache if found. Return null if not found.
-  mirror::String* LookupString(const DexFile& dex_file, uint32_t string_idx,
+  mirror::String* LookupString(const DexFile& dex_file,
+                               dex::StringIndex string_idx,
                                Handle<mirror::DexCache> dex_cache)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
@@ -259,18 +257,18 @@
                              dex::TypeIndex type_idx,
                              ObjPtr<mirror::Class> referrer)
       REQUIRES_SHARED(Locks::mutator_lock_)
-      REQUIRES(!dex_lock_, !Roles::uninterruptible_);
+      REQUIRES(!Locks::dex_lock_, !Roles::uninterruptible_);
 
   // Resolve a Type with the given index from the DexFile, storing the
   // result in the DexCache. The referrer is used to identify the
   // target DexCache and ClassLoader to use for resolution.
   mirror::Class* ResolveType(dex::TypeIndex type_idx, ArtMethod* referrer)
       REQUIRES_SHARED(Locks::mutator_lock_)
-      REQUIRES(!dex_lock_, !Roles::uninterruptible_);
+      REQUIRES(!Locks::dex_lock_, !Roles::uninterruptible_);
 
   mirror::Class* ResolveType(dex::TypeIndex type_idx, ArtField* referrer)
       REQUIRES_SHARED(Locks::mutator_lock_)
-      REQUIRES(!dex_lock_, !Roles::uninterruptible_);
+      REQUIRES(!Locks::dex_lock_, !Roles::uninterruptible_);
 
   // Look up a resolved type with the given ID from the DexFile. The ClassLoader is used to search
   // for the type, since it may be referenced from but not contained within the given DexFile.
@@ -289,7 +287,7 @@
                              Handle<mirror::DexCache> dex_cache,
                              Handle<mirror::ClassLoader> class_loader)
       REQUIRES_SHARED(Locks::mutator_lock_)
-      REQUIRES(!dex_lock_, !Roles::uninterruptible_);
+      REQUIRES(!Locks::dex_lock_, !Roles::uninterruptible_);
 
   // Determine whether a dex cache result should be trusted, or an IncompatibleClassChangeError
   // check should be performed even after a hit.
@@ -311,7 +309,7 @@
                            ArtMethod* referrer,
                            InvokeType type)
       REQUIRES_SHARED(Locks::mutator_lock_)
-      REQUIRES(!dex_lock_, !Roles::uninterruptible_);
+      REQUIRES(!Locks::dex_lock_, !Roles::uninterruptible_);
 
   ArtMethod* GetResolvedMethod(uint32_t method_idx, ArtMethod* referrer)
       REQUIRES_SHARED(Locks::mutator_lock_);
@@ -323,17 +321,17 @@
                                                 Handle<mirror::DexCache> dex_cache,
                                                 Handle<mirror::ClassLoader> class_loader)
       REQUIRES_SHARED(Locks::mutator_lock_)
-      REQUIRES(!dex_lock_, !Roles::uninterruptible_);
+      REQUIRES(!Locks::dex_lock_, !Roles::uninterruptible_);
   template <ResolveMode kResolveMode>
   ArtMethod* ResolveMethod(Thread* self, uint32_t method_idx, ArtMethod* referrer, InvokeType type)
       REQUIRES_SHARED(Locks::mutator_lock_)
-      REQUIRES(!dex_lock_, !Roles::uninterruptible_);
+      REQUIRES(!Locks::dex_lock_, !Roles::uninterruptible_);
   ArtMethod* ResolveMethodWithoutInvokeType(const DexFile& dex_file,
                                             uint32_t method_idx,
                                             Handle<mirror::DexCache> dex_cache,
                                             Handle<mirror::ClassLoader> class_loader)
       REQUIRES_SHARED(Locks::mutator_lock_)
-      REQUIRES(!dex_lock_, !Roles::uninterruptible_);
+      REQUIRES(!Locks::dex_lock_, !Roles::uninterruptible_);
 
   ArtField* GetResolvedField(uint32_t field_idx, ObjPtr<mirror::Class> field_declaring_class)
       REQUIRES_SHARED(Locks::mutator_lock_);
@@ -341,7 +339,7 @@
       REQUIRES_SHARED(Locks::mutator_lock_);
   ArtField* ResolveField(uint32_t field_idx, ArtMethod* referrer, bool is_static)
       REQUIRES_SHARED(Locks::mutator_lock_)
-      REQUIRES(!dex_lock_, !Roles::uninterruptible_);
+      REQUIRES(!Locks::dex_lock_, !Roles::uninterruptible_);
 
   // Resolve a field with a given ID from the DexFile, storing the
   // result in DexCache. The ClassLinker and ClassLoader are used as
@@ -352,7 +350,7 @@
                          Handle<mirror::DexCache> dex_cache,
                          Handle<mirror::ClassLoader> class_loader, bool is_static)
       REQUIRES_SHARED(Locks::mutator_lock_)
-      REQUIRES(!dex_lock_, !Roles::uninterruptible_);
+      REQUIRES(!Locks::dex_lock_, !Roles::uninterruptible_);
 
   // Resolve a field with a given ID from the DexFile, storing the
   // result in DexCache. The ClassLinker and ClassLoader are used as
@@ -363,7 +361,7 @@
                             Handle<mirror::DexCache> dex_cache,
                             Handle<mirror::ClassLoader> class_loader)
       REQUIRES_SHARED(Locks::mutator_lock_)
-      REQUIRES(!dex_lock_, !Roles::uninterruptible_);
+      REQUIRES(!Locks::dex_lock_, !Roles::uninterruptible_);
 
   // Resolve a method type with a given ID from the DexFile, storing
   // the result in the DexCache.
@@ -372,11 +370,7 @@
                                         Handle<mirror::DexCache> dex_cache,
                                         Handle<mirror::ClassLoader> class_loader)
       REQUIRES_SHARED(Locks::mutator_lock_)
-      REQUIRES(!dex_lock_, !Roles::uninterruptible_);
-
-  // Get shorty from method index without resolution. Used to do handlerization.
-  const char* MethodShorty(uint32_t method_idx, ArtMethod* referrer, uint32_t* length)
-      REQUIRES_SHARED(Locks::mutator_lock_);
+      REQUIRES(!Locks::dex_lock_, !Roles::uninterruptible_);
 
   // Returns true on success, false if there's an exception pending.
   // can_run_clinit=false allows the compiler to attempt to init a class,
@@ -386,20 +380,20 @@
                          bool can_init_fields,
                          bool can_init_parents)
       REQUIRES_SHARED(Locks::mutator_lock_)
-      REQUIRES(!dex_lock_, !Roles::uninterruptible_);
+      REQUIRES(!Locks::dex_lock_, !Roles::uninterruptible_);
 
   // Initializes classes that have instances in the image but that have
   // <clinit> methods so they could not be initialized by the compiler.
   void RunRootClinits()
       REQUIRES_SHARED(Locks::mutator_lock_)
-      REQUIRES(!dex_lock_, !Roles::uninterruptible_);
+      REQUIRES(!Locks::dex_lock_, !Roles::uninterruptible_);
 
   mirror::DexCache* RegisterDexFile(const DexFile& dex_file,
                                     ObjPtr<mirror::ClassLoader> class_loader)
-      REQUIRES(!dex_lock_)
+      REQUIRES(!Locks::dex_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
   void RegisterDexFile(const DexFile& dex_file, Handle<mirror::DexCache> dex_cache)
-      REQUIRES(!dex_lock_)
+      REQUIRES(!Locks::dex_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   const std::vector<const DexFile*>& GetBootClassPath() {
@@ -416,22 +410,22 @@
   // can race with insertion and deletion of classes while the visitor is being called.
   void VisitClassesWithoutClassesLock(ClassVisitor* visitor)
       REQUIRES_SHARED(Locks::mutator_lock_)
-      REQUIRES(!dex_lock_);
+      REQUIRES(!Locks::dex_lock_);
 
   void VisitClassRoots(RootVisitor* visitor, VisitRootFlags flags)
       REQUIRES(!Locks::classlinker_classes_lock_, !Locks::trace_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
   void VisitRoots(RootVisitor* visitor, VisitRootFlags flags)
-      REQUIRES(!dex_lock_, !Locks::classlinker_classes_lock_, !Locks::trace_lock_)
+      REQUIRES(!Locks::dex_lock_, !Locks::classlinker_classes_lock_, !Locks::trace_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   mirror::DexCache* FindDexCache(Thread* self,
                                  const DexFile& dex_file,
                                  bool allow_failure = false)
-      REQUIRES(!dex_lock_)
+      REQUIRES(!Locks::dex_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
   void FixupDexCaches(ArtMethod* resolution_method)
-      REQUIRES(!dex_lock_)
+      REQUIRES(!Locks::dex_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Allocate an instance of a java.lang.Object.
@@ -479,18 +473,18 @@
       Handle<mirror::Class> klass,
       verifier::HardFailLogMode log_level = verifier::HardFailLogMode::kLogNone)
       REQUIRES_SHARED(Locks::mutator_lock_)
-      REQUIRES(!dex_lock_);
+      REQUIRES(!Locks::dex_lock_);
   bool VerifyClassUsingOatFile(const DexFile& dex_file,
                                ObjPtr<mirror::Class> klass,
                                mirror::Class::Status& oat_file_class_status)
       REQUIRES_SHARED(Locks::mutator_lock_)
-      REQUIRES(!dex_lock_);
+      REQUIRES(!Locks::dex_lock_);
   void ResolveClassExceptionHandlerTypes(Handle<mirror::Class> klass)
       REQUIRES_SHARED(Locks::mutator_lock_)
-      REQUIRES(!dex_lock_);
+      REQUIRES(!Locks::dex_lock_);
   void ResolveMethodExceptionHandlerTypes(ArtMethod* klass)
       REQUIRES_SHARED(Locks::mutator_lock_)
-      REQUIRES(!dex_lock_);
+      REQUIRES(!Locks::dex_lock_);
 
   mirror::Class* CreateProxyClass(ScopedObjectAccessAlreadyRunnable& soa,
                                   jstring name,
@@ -503,7 +497,7 @@
       REQUIRES_SHARED(Locks::mutator_lock_);
   template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   ArtMethod* FindMethodForProxy(ObjPtr<mirror::Class> proxy_class, ArtMethod* proxy_method)
-      REQUIRES(!dex_lock_)
+      REQUIRES(!Locks::dex_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Get the oat code for a method when its class isn't yet initialized.
@@ -566,7 +560,7 @@
   // Note: the objects are not completely set up. Do not use this outside of tests and the compiler.
   jobject CreatePathClassLoader(Thread* self, const std::vector<const DexFile*>& dex_files)
       REQUIRES_SHARED(Locks::mutator_lock_)
-      REQUIRES(!dex_lock_);
+      REQUIRES(!Locks::dex_lock_);
 
   PointerSize GetImagePointerSize() const {
     return image_pointer_size_;
@@ -577,8 +571,6 @@
       REQUIRES(Locks::classlinker_classes_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  ArtMethod* CreateRuntimeMethod(LinearAlloc* linear_alloc);
-
   // Clear the ArrayClass cache. This is necessary when cleaning up for the image, as the cache
   // entries are roots, but potentially not image classes.
   void DropFindArrayClassCache() REQUIRES_SHARED(Locks::mutator_lock_);
@@ -609,11 +601,11 @@
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   std::set<DexCacheResolvedClasses> GetResolvedClasses(bool ignore_boot_classes)
-      REQUIRES(!dex_lock_);
+      REQUIRES(!Locks::dex_lock_);
 
   std::unordered_set<std::string> GetClassDescriptorsForProfileKeys(
       const std::set<DexCacheResolvedClasses>& classes)
-      REQUIRES(!dex_lock_);
+      REQUIRES(!Locks::dex_lock_);
 
   static bool IsBootClassLoader(ScopedObjectAccessAlreadyRunnable& soa,
                                 ObjPtr<mirror::ClassLoader> class_loader)
@@ -648,7 +640,7 @@
   // class.
   void ThrowEarlierClassFailure(ObjPtr<mirror::Class> c, bool wrap_in_no_class_def = false)
       REQUIRES_SHARED(Locks::mutator_lock_)
-      REQUIRES(!dex_lock_);
+      REQUIRES(!Locks::dex_lock_);
 
   // Get the actual holding class for a copied method. Pretty slow, don't call often.
   mirror::Class* GetHoldingClassOfCopiedMethod(ArtMethod* method)
@@ -678,7 +670,7 @@
   bool AttemptSupertypeVerification(Thread* self,
                                     Handle<mirror::Class> klass,
                                     Handle<mirror::Class> supertype)
-      REQUIRES(!dex_lock_)
+      REQUIRES(!Locks::dex_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   static void DeleteClassLoader(Thread* self, const ClassLoaderData& data)
@@ -702,7 +694,7 @@
 
   void FinishInit(Thread* self)
       REQUIRES_SHARED(Locks::mutator_lock_)
-      REQUIRES(!dex_lock_, !Roles::uninterruptible_);
+      REQUIRES(!Locks::dex_lock_, !Roles::uninterruptible_);
 
   // For early bootstrapping by Init
   mirror::Class* AllocClass(Thread* self,
@@ -729,17 +721,9 @@
                                                const DexFile& dex_file,
                                                LinearAlloc* linear_alloc)
       REQUIRES_SHARED(Locks::mutator_lock_)
-      REQUIRES(!dex_lock_)
+      REQUIRES(!Locks::dex_lock_)
       REQUIRES(!Roles::uninterruptible_);
 
-  void InitializeDexCache(Thread* self,
-                          ObjPtr<mirror::DexCache> dex_cache,
-                          ObjPtr<mirror::String> location,
-                          const DexFile& dex_file,
-                          LinearAlloc* linear_alloc)
-      REQUIRES_SHARED(Locks::mutator_lock_)
-      REQUIRES(dex_lock_);
-
   mirror::Class* CreatePrimitiveClass(Thread* self, Primitive::Type type)
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!Roles::uninterruptible_);
@@ -753,14 +737,14 @@
                                   size_t hash,
                                   Handle<mirror::ClassLoader> class_loader)
       REQUIRES_SHARED(Locks::mutator_lock_)
-      REQUIRES(!dex_lock_, !Roles::uninterruptible_);
+      REQUIRES(!Locks::dex_lock_, !Roles::uninterruptible_);
 
   void AppendToBootClassPath(Thread* self, const DexFile& dex_file)
       REQUIRES_SHARED(Locks::mutator_lock_)
-      REQUIRES(!dex_lock_);
+      REQUIRES(!Locks::dex_lock_);
   void AppendToBootClassPath(const DexFile& dex_file, Handle<mirror::DexCache> dex_cache)
       REQUIRES_SHARED(Locks::mutator_lock_)
-      REQUIRES(!dex_lock_);
+      REQUIRES(!Locks::dex_lock_);
 
   // Precomputes size needed for Class, in the case of a non-temporary class this size must be
   // sufficient to hold all static fields.
@@ -808,7 +792,7 @@
                                      Handle<mirror::ClassLoader> class_loader,
                                      ObjPtr<mirror::Class>* result)
       REQUIRES_SHARED(Locks::mutator_lock_)
-      REQUIRES(!dex_lock_);
+      REQUIRES(!Locks::dex_lock_);
 
   // Finds a class by its descriptor, returning NULL if it isn't wasn't loaded
   // by the given 'class_loader'. Uses the provided hash for the descriptor.
@@ -820,10 +804,10 @@
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   void RegisterDexFileLocked(const DexFile& dex_file, Handle<mirror::DexCache> dex_cache)
-      REQUIRES(dex_lock_)
+      REQUIRES(Locks::dex_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
   mirror::DexCache* FindDexCacheLocked(Thread* self, const DexFile& dex_file, bool allow_failure)
-      REQUIRES(dex_lock_)
+      REQUIRES(Locks::dex_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   bool InitializeClass(Thread* self,
@@ -831,12 +815,12 @@
                        bool can_run_clinit,
                        bool can_init_parents)
       REQUIRES_SHARED(Locks::mutator_lock_)
-      REQUIRES(!dex_lock_);
+      REQUIRES(!Locks::dex_lock_);
   bool InitializeDefaultInterfaceRecursive(Thread* self,
                                            Handle<mirror::Class> klass,
                                            bool can_run_clinit,
                                            bool can_init_parents)
-      REQUIRES(!dex_lock_)
+      REQUIRES(!Locks::dex_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
   bool WaitForInitializeClass(Handle<mirror::Class> klass,
                               Thread* self,
@@ -869,7 +853,7 @@
 
   bool LoadSuperAndInterfaces(Handle<mirror::Class> klass, const DexFile& dex_file)
       REQUIRES_SHARED(Locks::mutator_lock_)
-      REQUIRES(!dex_lock_);
+      REQUIRES(!Locks::dex_lock_);
 
   bool LinkMethods(Thread* self,
                    Handle<mirror::Class> klass,
@@ -1035,17 +1019,11 @@
   void CheckProxyMethod(ArtMethod* method, ArtMethod* prototype) const
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  // For use by ImageWriter to find DexCaches for its roots
-  ReaderWriterMutex* DexLock()
-      REQUIRES_SHARED(Locks::mutator_lock_)
-      LOCK_RETURNED(dex_lock_) {
-    return &dex_lock_;
-  }
-  size_t GetDexCacheCount() REQUIRES_SHARED(Locks::mutator_lock_, dex_lock_) {
+  size_t GetDexCacheCount() REQUIRES_SHARED(Locks::mutator_lock_, Locks::dex_lock_) {
     return dex_caches_.size();
   }
   const std::list<DexCacheData>& GetDexCachesData()
-      REQUIRES_SHARED(Locks::mutator_lock_, dex_lock_) {
+      REQUIRES_SHARED(Locks::mutator_lock_, Locks::dex_lock_) {
     return dex_caches_;
   }
 
@@ -1054,12 +1032,6 @@
   void CreateProxyMethod(Handle<mirror::Class> klass, ArtMethod* prototype, ArtMethod* out)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  // Ensures that methods have the kAccSkipAccessChecks bit set. We use the
-  // kAccVerificationAttempted bit on the class access flags to determine whether this has been done
-  // before.
-  void EnsureSkipAccessChecksMethods(Handle<mirror::Class> c)
-      REQUIRES_SHARED(Locks::mutator_lock_);
-
   // Register a class loader and create its class table and allocator. Should not be called if
   // these are already created.
   void RegisterClassLoader(ObjPtr<mirror::ClassLoader> class_loader)
@@ -1084,7 +1056,7 @@
   mirror::Class* EnsureResolved(Thread* self, const char* descriptor, ObjPtr<mirror::Class> klass)
       WARN_UNUSED
       REQUIRES_SHARED(Locks::mutator_lock_)
-      REQUIRES(!dex_lock_);
+      REQUIRES(!Locks::dex_lock_);
 
   void FixupTemporaryDeclaringClass(ObjPtr<mirror::Class> temp_class,
                                     ObjPtr<mirror::Class> new_class)
@@ -1115,12 +1087,12 @@
       ClassTable::ClassSet* new_class_set,
       bool* out_forward_dex_cache_array,
       std::string* out_error_msg)
-      REQUIRES(!dex_lock_)
+      REQUIRES(!Locks::dex_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Check that c1 == FindSystemClass(self, descriptor). Abort with class dumps otherwise.
   void CheckSystemClass(Thread* self, Handle<mirror::Class> c1, const char* descriptor)
-      REQUIRES(!dex_lock_)
+      REQUIRES(!Locks::dex_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Sets imt_ref appropriately for LinkInterfaceMethods.
@@ -1151,10 +1123,9 @@
   std::vector<const DexFile*> boot_class_path_;
   std::vector<std::unique_ptr<const DexFile>> boot_dex_files_;
 
-  mutable ReaderWriterMutex dex_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
   // JNI weak globals and side data to allow dex caches to get unloaded. We lazily delete weak
   // globals when we register new dex files.
-  std::list<DexCacheData> dex_caches_ GUARDED_BY(dex_lock_);
+  std::list<DexCacheData> dex_caches_ GUARDED_BY(Locks::dex_lock_);
 
   // This contains the class loaders which have class tables. It is populated by
   // InsertClassTableForClassLoader.
diff --git a/runtime/class_linker_test.cc b/runtime/class_linker_test.cc
index 9e17be2..ddb9e59 100644
--- a/runtime/class_linker_test.cc
+++ b/runtime/class_linker_test.cc
@@ -614,6 +614,9 @@
 
 struct ClassExtOffsets : public CheckOffsets<mirror::ClassExt> {
   ClassExtOffsets() : CheckOffsets<mirror::ClassExt>(false, "Ldalvik/system/ClassExt;") {
+    addOffset(OFFSETOF_MEMBER(mirror::ClassExt, obsolete_dex_caches_), "obsoleteDexCaches");
+    addOffset(OFFSETOF_MEMBER(mirror::ClassExt, obsolete_methods_), "obsoleteMethods");
+    addOffset(OFFSETOF_MEMBER(mirror::ClassExt, original_dex_cache_), "originalDexCache");
     addOffset(OFFSETOF_MEMBER(mirror::ClassExt, verify_error_), "verifyError");
   }
 };
@@ -1316,7 +1319,7 @@
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   MutableHandle<mirror::DexCache> dex_cache(hs.NewHandle<mirror::DexCache>(nullptr));
   {
-    ReaderMutexLock mu(soa.Self(), *class_linker->DexLock());
+    ReaderMutexLock mu(soa.Self(), *Locks::dex_lock_);
     for (const ClassLinker::DexCacheData& data : class_linker->GetDexCachesData()) {
       dex_cache.Assign(soa.Self()->DecodeJObject(data.weak_root)->AsDexCache());
       if (dex_cache.Get() != nullptr) {
@@ -1340,7 +1343,7 @@
                                                 0u,
                                                 nullptr));
   {
-    WriterMutexLock mu(soa.Self(), *class_linker->DexLock());
+    WriterMutexLock mu(soa.Self(), *Locks::dex_lock_);
     // Check that inserting with a UTF16 name works.
     class_linker->RegisterDexFileLocked(*dex_file, dex_cache);
   }
diff --git a/runtime/dex_file-inl.h b/runtime/dex_file-inl.h
index 77a63c1..e884e39 100644
--- a/runtime/dex_file-inl.h
+++ b/runtime/dex_file-inl.h
@@ -43,9 +43,9 @@
   return GetStringDataAndUtf16Length(string_id, &ignored);
 }
 
-inline const char* DexFile::StringDataAndUtf16LengthByIdx(uint32_t idx,
+inline const char* DexFile::StringDataAndUtf16LengthByIdx(dex::StringIndex idx,
                                                           uint32_t* utf16_length) const {
-  if (idx == kDexNoIndex) {
+  if (!idx.IsValid()) {
     *utf16_length = 0;
     return nullptr;
   }
@@ -53,7 +53,7 @@
   return GetStringDataAndUtf16Length(string_id, utf16_length);
 }
 
-inline const char* DexFile::StringDataByIdx(uint32_t idx) const {
+inline const char* DexFile::StringDataByIdx(dex::StringIndex idx) const {
   uint32_t unicode_length;
   return StringDataAndUtf16LengthByIdx(idx, &unicode_length);
 }
@@ -130,8 +130,8 @@
       (RoundUp(reinterpret_cast<uintptr_t>(insns_end_), 4)) + offset;
 }
 
-static inline bool DexFileStringEquals(const DexFile* df1, uint32_t sidx1,
-                                       const DexFile* df2, uint32_t sidx2) {
+static inline bool DexFileStringEquals(const DexFile* df1, dex::StringIndex sidx1,
+                                       const DexFile* df2, dex::StringIndex sidx2) {
   uint32_t s1_len;  // Note: utf16 length != mutf8 length.
   const char* s1_data = df1->StringDataAndUtf16LengthByIdx(sidx1, &s1_len);
   uint32_t s2_len;
diff --git a/runtime/dex_file.cc b/runtime/dex_file.cc
index cc544fd..aa8fb38 100644
--- a/runtime/dex_file.cc
+++ b/runtime/dex_file.cc
@@ -45,6 +45,8 @@
 
 namespace art {
 
+static_assert(sizeof(dex::StringIndex) == sizeof(uint32_t), "StringIndex size is wrong");
+static_assert(std::is_trivially_copyable<dex::StringIndex>::value, "StringIndex not trivial");
 static_assert(sizeof(dex::TypeIndex) == sizeof(uint16_t), "TypeIndex size is wrong");
 static_assert(std::is_trivially_copyable<dex::TypeIndex>::value, "TypeIndex not trivial");
 
@@ -602,7 +604,7 @@
                                              const DexFile::TypeId& type) const {
   // Binary search MethodIds knowing that they are sorted by class_idx, name_idx then proto_idx
   const dex::TypeIndex class_idx = GetIndexForTypeId(declaring_klass);
-  const uint32_t name_idx = GetIndexForStringId(name);
+  const dex::StringIndex name_idx = GetIndexForStringId(name);
   const dex::TypeIndex type_idx = GetIndexForTypeId(type);
   int32_t lo = 0;
   int32_t hi = NumFieldIds() - 1;
@@ -637,7 +639,7 @@
                                                const DexFile::ProtoId& signature) const {
   // Binary search MethodIds knowing that they are sorted by class_idx, name_idx then proto_idx
   const dex::TypeIndex class_idx = GetIndexForTypeId(declaring_klass);
-  const uint32_t name_idx = GetIndexForStringId(name);
+  const dex::StringIndex name_idx = GetIndexForStringId(name);
   const uint16_t proto_idx = GetIndexForProtoId(signature);
   int32_t lo = 0;
   int32_t hi = NumMethodIds() - 1;
@@ -672,7 +674,7 @@
   int32_t hi = NumStringIds() - 1;
   while (hi >= lo) {
     int32_t mid = (hi + lo) / 2;
-    const DexFile::StringId& str_id = GetStringId(mid);
+    const DexFile::StringId& str_id = GetStringId(dex::StringIndex(mid));
     const char* str = GetStringData(str_id);
     int compare = CompareModifiedUtf8ToModifiedUtf8AsUtf16CodePointValues(string, str);
     if (compare > 0) {
@@ -711,7 +713,7 @@
   int32_t hi = NumStringIds() - 1;
   while (hi >= lo) {
     int32_t mid = (hi + lo) / 2;
-    const DexFile::StringId& str_id = GetStringId(mid);
+    const DexFile::StringId& str_id = GetStringId(dex::StringIndex(mid));
     const char* str = GetStringData(str_id);
     int compare = CompareModifiedUtf8ToUtf16AsCodePointValues(str, string, length);
     if (compare > 0) {
@@ -725,7 +727,7 @@
   return nullptr;
 }
 
-const DexFile::TypeId* DexFile::FindTypeId(uint32_t string_idx) const {
+const DexFile::TypeId* DexFile::FindTypeId(dex::StringIndex string_idx) const {
   int32_t lo = 0;
   int32_t hi = NumTypeIds() - 1;
   while (hi >= lo) {
@@ -912,7 +914,7 @@
     }
     uint32_t name_idx = DecodeUnsignedLeb128P1(&stream);
     const char* descriptor = it.GetDescriptor();
-    local_in_reg[arg_reg].name_ = StringDataByIdx(name_idx);
+    local_in_reg[arg_reg].name_ = StringDataByIdx(dex::StringIndex(name_idx));
     local_in_reg[arg_reg].descriptor_ = descriptor;
     local_in_reg[arg_reg].signature_ = nullptr;
     local_in_reg[arg_reg].start_address_ = 0;
@@ -975,10 +977,10 @@
           local_cb(context, local_in_reg[reg]);
         }
 
-        local_in_reg[reg].name_ = StringDataByIdx(name_idx);
+        local_in_reg[reg].name_ = StringDataByIdx(dex::StringIndex(name_idx));
         local_in_reg[reg].descriptor_ =
             StringByTypeIdx(dex::TypeIndex(dchecked_integral_cast<uint16_t>(descriptor_idx)));;
-        local_in_reg[reg].signature_ = StringDataByIdx(signature_idx);
+        local_in_reg[reg].signature_ = StringDataByIdx(dex::StringIndex(signature_idx));
         local_in_reg[reg].start_address_ = address;
         local_in_reg[reg].reg_ = reg;
         local_in_reg[reg].is_live_ = true;
@@ -1080,7 +1082,7 @@
         break;
       case DBG_SET_FILE: {
         uint32_t name_idx = DecodeUnsignedLeb128P1(&stream);
-        entry.source_file_ = StringDataByIdx(name_idx);
+        entry.source_file_ = StringDataByIdx(dex::StringIndex(name_idx));
         break;
       }
       default: {
@@ -1482,6 +1484,11 @@
 
 namespace dex {
 
+std::ostream& operator<<(std::ostream& os, const StringIndex& index) {
+  os << "StringIndex[" << index.index_ << "]";
+  return os;
+}
+
 std::ostream& operator<<(std::ostream& os, const TypeIndex& index) {
   os << "TypeIndex[" << index.index_ << "]";
   return os;
diff --git a/runtime/dex_file.h b/runtime/dex_file.h
index 2384eb6..250795b 100644
--- a/runtime/dex_file.h
+++ b/runtime/dex_file.h
@@ -152,7 +152,7 @@
 
   // Raw type_id_item.
   struct TypeId {
-    uint32_t descriptor_idx_;  // index into string_ids
+    dex::StringIndex descriptor_idx_;  // index into string_ids
 
    private:
     DISALLOW_COPY_AND_ASSIGN(TypeId);
@@ -160,9 +160,9 @@
 
   // Raw field_id_item.
   struct FieldId {
-    dex::TypeIndex class_idx_;  // index into type_ids_ array for defining class
-    dex::TypeIndex type_idx_;  // index into type_ids_ array for field type
-    uint32_t name_idx_;  // index into string_ids_ array for field name
+    dex::TypeIndex class_idx_;   // index into type_ids_ array for defining class
+    dex::TypeIndex type_idx_;    // index into type_ids_ array for field type
+    dex::StringIndex name_idx_;  // index into string_ids_ array for field name
 
    private:
     DISALLOW_COPY_AND_ASSIGN(FieldId);
@@ -170,10 +170,10 @@
 
   // Raw proto_id_item.
   struct ProtoId {
-    uint32_t shorty_idx_;        // index into string_ids array for shorty descriptor
+    dex::StringIndex shorty_idx_;     // index into string_ids array for shorty descriptor
     dex::TypeIndex return_type_idx_;  // index into type_ids array for return type
-    uint16_t pad_;               // padding = 0
-    uint32_t parameters_off_;    // file offset to type_list for parameter types
+    uint16_t pad_;                    // padding = 0
+    uint32_t parameters_off_;         // file offset to type_list for parameter types
 
    private:
     DISALLOW_COPY_AND_ASSIGN(ProtoId);
@@ -182,8 +182,8 @@
   // Raw method_id_item.
   struct MethodId {
     dex::TypeIndex class_idx_;   // index into type_ids_ array for defining class
-    uint16_t proto_idx_;  // index into proto_ids_ array for method prototype
-    uint32_t name_idx_;  // index into string_ids_ array for method name
+    uint16_t proto_idx_;         // index into proto_ids_ array for method prototype
+    dex::StringIndex name_idx_;  // index into string_ids_ array for method name
 
    private:
     DISALLOW_COPY_AND_ASSIGN(MethodId);
@@ -197,7 +197,7 @@
     dex::TypeIndex superclass_idx_;  // index into type_ids_ array for superclass
     uint16_t pad2_;  // padding = 0
     uint32_t interfaces_off_;  // file offset to TypeList
-    uint32_t source_file_idx_;  // index into string_ids_ for source file name
+    dex::StringIndex source_file_idx_;  // index into string_ids_ for source file name
     uint32_t annotations_off_;  // file offset to annotations_directory_item
     uint32_t class_data_off_;  // file offset to class_data_item
     uint32_t static_values_off_;  // file offset to EncodedArray
@@ -501,15 +501,15 @@
   }
 
   // Returns the StringId at the specified index.
-  const StringId& GetStringId(uint32_t idx) const {
-    DCHECK_LT(idx, NumStringIds()) << GetLocation();
-    return string_ids_[idx];
+  const StringId& GetStringId(dex::StringIndex idx) const {
+    DCHECK_LT(idx.index_, NumStringIds()) << GetLocation();
+    return string_ids_[idx.index_];
   }
 
-  uint32_t GetIndexForStringId(const StringId& string_id) const {
+  dex::StringIndex GetIndexForStringId(const StringId& string_id) const {
     CHECK_GE(&string_id, string_ids_) << GetLocation();
     CHECK_LT(&string_id, string_ids_ + header_->string_ids_size_) << GetLocation();
-    return &string_id - string_ids_;
+    return dex::StringIndex(&string_id - string_ids_);
   }
 
   int32_t GetStringLength(const StringId& string_id) const;
@@ -522,9 +522,9 @@
   const char* GetStringData(const StringId& string_id) const;
 
   // Index version of GetStringDataAndUtf16Length.
-  const char* StringDataAndUtf16LengthByIdx(uint32_t idx, uint32_t* utf16_length) const;
+  const char* StringDataAndUtf16LengthByIdx(dex::StringIndex idx, uint32_t* utf16_length) const;
 
-  const char* StringDataByIdx(uint32_t idx) const;
+  const char* StringDataByIdx(dex::StringIndex idx) const;
 
   // Looks up a string id for a given modified utf8 string.
   const StringId* FindStringId(const char* string) const;
@@ -563,7 +563,7 @@
   const char* GetTypeDescriptor(const TypeId& type_id) const;
 
   // Looks up a type for the given string index
-  const TypeId* FindTypeId(uint32_t string_idx) const;
+  const TypeId* FindTypeId(dex::StringIndex string_idx) const;
 
   // Returns the number of field identifiers in the .dex file.
   size_t NumFieldIds() const {
@@ -963,7 +963,7 @@
                                void* context) const;
 
   const char* GetSourceFile(const ClassDef& class_def) const {
-    if (class_def.source_file_idx_ == 0xffffffff) {
+    if (!class_def.source_file_idx_.IsValid()) {
       return nullptr;
     } else {
       return StringDataByIdx(class_def.source_file_idx_);
diff --git a/runtime/dex_file_annotations.cc b/runtime/dex_file_annotations.cc
index 3fe2c40..52b9f11 100644
--- a/runtime/dex_file_annotations.cc
+++ b/runtime/dex_file_annotations.cc
@@ -167,7 +167,8 @@
 
   while (size != 0) {
     uint32_t element_name_index = DecodeUnsignedLeb128(&annotation);
-    const char* element_name = dex_file.GetStringData(dex_file.GetStringId(element_name_index));
+    const char* element_name =
+        dex_file.GetStringData(dex_file.GetStringId(dex::StringIndex(element_name_index)));
     if (strcmp(name, element_name) == 0) {
       return annotation;
     }
@@ -357,7 +358,7 @@
         StackHandleScope<1> hs(self);
         Handle<mirror::DexCache> dex_cache(hs.NewHandle(klass->GetDexCache()));
         element_object = Runtime::Current()->GetClassLinker()->ResolveString(
-            klass->GetDexFile(), index, dex_cache);
+            klass->GetDexFile(), dex::StringIndex(index), dex_cache);
         set_object = true;
         if (element_object == nullptr) {
           return false;
@@ -592,7 +593,7 @@
   ScopedObjectAccessUnchecked soa(self);
   StackHandleScope<5> hs(self);
   uint32_t element_name_index = DecodeUnsignedLeb128(annotation);
-  const char* name = dex_file.StringDataByIdx(element_name_index);
+  const char* name = dex_file.StringDataByIdx(dex::StringIndex(element_name_index));
   Handle<mirror::String> string_name(
       hs.NewHandle(mirror::String::AllocFromModifiedUtf8(self, name)));
 
@@ -1341,7 +1342,9 @@
     case kDouble:  field->SetDouble<kTransactionActive>(field->GetDeclaringClass(), jval_.d); break;
     case kNull:    field->SetObject<kTransactionActive>(field->GetDeclaringClass(), nullptr); break;
     case kString: {
-      mirror::String* resolved = linker_->ResolveString(dex_file_, jval_.i, *dex_cache_);
+      mirror::String* resolved = linker_->ResolveString(dex_file_,
+                                                        dex::StringIndex(jval_.i),
+                                                        *dex_cache_);
       field->SetObject<kTransactionActive>(field->GetDeclaringClass(), resolved);
       break;
     }
diff --git a/runtime/dex_file_test.cc b/runtime/dex_file_test.cc
index f94d07b..0fec856 100644
--- a/runtime/dex_file_test.cc
+++ b/runtime/dex_file_test.cc
@@ -418,7 +418,7 @@
     const char* type_str = java_lang_dex_file_->StringByTypeIdx(dex::TypeIndex(i));
     const DexFile::StringId* type_str_id = java_lang_dex_file_->FindStringId(type_str);
     ASSERT_TRUE(type_str_id != nullptr);
-    uint32_t type_str_idx = java_lang_dex_file_->GetIndexForStringId(*type_str_id);
+    dex::StringIndex type_str_idx = java_lang_dex_file_->GetIndexForStringId(*type_str_id);
     const DexFile::TypeId* type_id = java_lang_dex_file_->FindTypeId(type_str_idx);
     ASSERT_EQ(type_id, java_lang_dex_file_->FindTypeId(type_str));
     ASSERT_TRUE(type_id != nullptr);
diff --git a/runtime/dex_file_types.h b/runtime/dex_file_types.h
index c6d95a1..bd779c4 100644
--- a/runtime/dex_file_types.h
+++ b/runtime/dex_file_types.h
@@ -23,12 +23,47 @@
 namespace art {
 namespace dex {
 
+class StringIndex {
+ public:
+  uint32_t index_;
+
+  constexpr StringIndex() : index_(std::numeric_limits<decltype(index_)>::max()) {}
+  explicit constexpr StringIndex(uint32_t idx) : index_(idx) {}
+
+  bool IsValid() const {
+    return index_ != std::numeric_limits<decltype(index_)>::max();
+  }
+  static StringIndex Invalid() {
+    return StringIndex(std::numeric_limits<decltype(index_)>::max());
+  }
+
+  bool operator==(const StringIndex& other) const {
+    return index_ == other.index_;
+  }
+  bool operator!=(const StringIndex& other) const {
+    return index_ != other.index_;
+  }
+  bool operator<(const StringIndex& other) const {
+    return index_ < other.index_;
+  }
+  bool operator<=(const StringIndex& other) const {
+    return index_ <= other.index_;
+  }
+  bool operator>(const StringIndex& other) const {
+    return index_ > other.index_;
+  }
+  bool operator>=(const StringIndex& other) const {
+    return index_ >= other.index_;
+  }
+};
+std::ostream& operator<<(std::ostream& os, const StringIndex& index);
+
 class TypeIndex {
  public:
   uint16_t index_;
 
-  TypeIndex() : index_(std::numeric_limits<decltype(index_)>::max()) {}
-  explicit TypeIndex(uint16_t idx) : index_(idx) {}
+  constexpr TypeIndex() : index_(std::numeric_limits<decltype(index_)>::max()) {}
+  explicit constexpr TypeIndex(uint16_t idx) : index_(idx) {}
 
   bool IsValid() const {
     return index_ != std::numeric_limits<decltype(index_)>::max();
@@ -63,6 +98,12 @@
 
 namespace std {
 
+template<> struct hash<art::dex::StringIndex> {
+  size_t operator()(const art::dex::StringIndex& index) const {
+    return hash<uint32_t>()(index.index_);
+  }
+};
+
 template<> struct hash<art::dex::TypeIndex> {
   size_t operator()(const art::dex::TypeIndex& index) const {
     return hash<uint16_t>()(index.index_);
diff --git a/runtime/dex_file_verifier.cc b/runtime/dex_file_verifier.cc
index ed50711..07f0fca 100644
--- a/runtime/dex_file_verifier.cc
+++ b/runtime/dex_file_verifier.cc
@@ -80,8 +80,8 @@
   return true;
 }
 
-const char* DexFileVerifier::CheckLoadStringByIdx(uint32_t idx, const char* error_string) {
-  if (UNLIKELY(!CheckIndex(idx, dex_file_->NumStringIds(), error_string))) {
+const char* DexFileVerifier::CheckLoadStringByIdx(dex::StringIndex idx, const char* error_string) {
+  if (UNLIKELY(!CheckIndex(idx.index_, dex_file_->NumStringIds(), error_string))) {
     return nullptr;
   }
   return dex_file_->StringDataByIdx(idx);
@@ -92,9 +92,7 @@
   if (UNLIKELY(!CheckIndex(type_idx.index_, dex_file_->NumTypeIds(), error_string))) {
     return nullptr;
   }
-  const DexFile::TypeId& type_id = dex_file_->GetTypeId(type_idx);
-  uint32_t idx = type_id.descriptor_idx_;
-  return CheckLoadStringByIdx(idx, error_string);
+  return CheckLoadStringByIdx(dex_file_->GetTypeId(type_idx).descriptor_idx_, error_string);
 }
 
 const DexFile::FieldId* DexFileVerifier::CheckLoadFieldId(uint32_t idx, const char* error_string) {
@@ -1782,7 +1780,8 @@
     const DexFile::TypeId* prev_item = reinterpret_cast<const DexFile::TypeId*>(previous_item_);
     if (UNLIKELY(prev_item->descriptor_idx_ >= item->descriptor_idx_)) {
       ErrorStringPrintf("Out-of-order type_ids: %x then %x",
-                        prev_item->descriptor_idx_, item->descriptor_idx_);
+                        prev_item->descriptor_idx_.index_,
+                        item->descriptor_idx_.index_);
       return false;
     }
   }
@@ -2500,14 +2499,15 @@
 
 static std::string GetStringOrError(const uint8_t* const begin,
                                     const DexFile::Header* const header,
-                                    uint32_t string_idx) {
+                                    dex::StringIndex string_idx) {
   // The `string_idx` is not guaranteed to be valid yet.
-  if (header->string_ids_size_ <= string_idx) {
+  if (header->string_ids_size_ <= string_idx.index_) {
     return "(error)";
   }
 
   const DexFile::StringId* string_id =
-      reinterpret_cast<const DexFile::StringId*>(begin + header->string_ids_off_) + string_idx;
+      reinterpret_cast<const DexFile::StringId*>(begin + header->string_ids_off_)
+          + string_idx.index_;
 
   // Assume that the data is OK at this point. String data has been checked at this point.
 
@@ -2664,7 +2664,7 @@
   }
   uint32_t string_idx =
       (reinterpret_cast<const DexFile::MethodId*>(begin + header->method_ids_off_) +
-          method_index)->name_idx_;
+          method_index)->name_idx_.index_;
   if (string_idx >= header->string_ids_size_) {
     *error_msg = "String index not available for method flags verification";
     return false;
diff --git a/runtime/dex_file_verifier.h b/runtime/dex_file_verifier.h
index 19a89de..0327367 100644
--- a/runtime/dex_file_verifier.h
+++ b/runtime/dex_file_verifier.h
@@ -150,7 +150,7 @@
 
   // Load a string by (type) index. Checks whether the index is in bounds, printing the error if
   // not. If there is an error, null is returned.
-  const char* CheckLoadStringByIdx(uint32_t idx, const char* error_fmt);
+  const char* CheckLoadStringByIdx(dex::StringIndex idx, const char* error_fmt);
   const char* CheckLoadStringByTypeIdx(dex::TypeIndex type_idx, const char* error_fmt);
 
   // Load a field/method Id by index. Checks whether the index is in bounds, printing the error if
diff --git a/runtime/dex_file_verifier_test.cc b/runtime/dex_file_verifier_test.cc
index 0e0929f..f14b1d5 100644
--- a/runtime/dex_file_verifier_test.cc
+++ b/runtime/dex_file_verifier_test.cc
@@ -176,7 +176,7 @@
       "method_id_name_idx",
       [](DexFile* dex_file) {
         DexFile::MethodId* method_id = const_cast<DexFile::MethodId*>(&dex_file->GetMethodId(0));
-        method_id->name_idx_ = 0xFF;
+        method_id->name_idx_ = dex::StringIndex(0xFF);
       },
       "String index not available for method flags verification");
 }
@@ -247,7 +247,7 @@
 
   while (it.HasNextDirectMethod() || it.HasNextVirtualMethod()) {
     uint32_t method_index = it.GetMemberIndex();
-    uint32_t name_index = dex_file->GetMethodId(method_index).name_idx_;
+    dex::StringIndex name_index = dex_file->GetMethodId(method_index).name_idx_;
     const DexFile::StringId& string_id = dex_file->GetStringId(name_index);
     const char* str = dex_file->GetStringData(string_id);
     if (strcmp(name, str) == 0) {
@@ -635,7 +635,7 @@
         uint32_t method_idx;
         FindMethodData(dex_file, "foo", &method_idx);
         auto* method_id = const_cast<DexFile::MethodId*>(&dex_file->GetMethodId(method_idx));
-        method_id->name_idx_ = dex_file->NumStringIds();
+        method_id->name_idx_ = dex::StringIndex(dex_file->NumStringIds());
       },
       "Method may have only one of public/protected/private, LMethodFlags;.(error)");
 }
@@ -856,7 +856,7 @@
 
   while (it.HasNextStaticField() || it.HasNextInstanceField()) {
     uint32_t field_index = it.GetMemberIndex();
-    uint32_t name_index = dex_file->GetFieldId(field_index).name_idx_;
+    dex::StringIndex name_index = dex_file->GetFieldId(field_index).name_idx_;
     const DexFile::StringId& string_id = dex_file->GetStringId(name_index);
     const char* str = dex_file->GetStringData(string_id);
     if (strcmp(name, str) == 0) {
@@ -1451,12 +1451,12 @@
             // Swap the proto parameters and shorties to break the ordering.
             std::swap(const_cast<uint32_t&>(proto1.parameters_off_),
                       const_cast<uint32_t&>(proto2.parameters_off_));
-            std::swap(const_cast<uint32_t&>(proto1.shorty_idx_),
-                      const_cast<uint32_t&>(proto2.shorty_idx_));
+            std::swap(const_cast<dex::StringIndex&>(proto1.shorty_idx_),
+                      const_cast<dex::StringIndex&>(proto2.shorty_idx_));
           } else {
             // Copy the proto parameters and shorty to create duplicate proto id.
             const_cast<uint32_t&>(proto1.parameters_off_) = proto2.parameters_off_;
-            const_cast<uint32_t&>(proto1.shorty_idx_) = proto2.shorty_idx_;
+            const_cast<dex::StringIndex&>(proto1.shorty_idx_) = proto2.shorty_idx_;
           }
         },
         "Out-of-order proto_id arguments");
diff --git a/runtime/dex_instruction.cc b/runtime/dex_instruction.cc
index 751bd51..9902389 100644
--- a/runtime/dex_instruction.cc
+++ b/runtime/dex_instruction.cc
@@ -191,10 +191,11 @@
           if (file != nullptr) {
             uint32_t string_idx = VRegB_21c();
             if (string_idx < file->NumStringIds()) {
-              os << StringPrintf("const-string v%d, %s // string@%d",
-                                 VRegA_21c(),
-                                 PrintableString(file->StringDataByIdx(string_idx)).c_str(),
-                                 string_idx);
+              os << StringPrintf(
+                  "const-string v%d, %s // string@%d",
+                  VRegA_21c(),
+                  PrintableString(file->StringDataByIdx(dex::StringIndex(string_idx))).c_str(),
+                  string_idx);
             } else {
               os << StringPrintf("const-string v%d, <<invalid-string-idx-%d>> // string@%d",
                                  VRegA_21c(),
@@ -333,11 +334,12 @@
         uint32_t string_idx = VRegB_31c();
         if (file != nullptr) {
           if (string_idx < file->NumStringIds()) {
-            os << StringPrintf("%s v%d, %s // string@%d",
-                               opcode,
-                               VRegA_31c(),
-                               PrintableString(file->StringDataByIdx(string_idx)).c_str(),
-                               string_idx);
+            os << StringPrintf(
+                "%s v%d, %s // string@%d",
+                opcode,
+                VRegA_31c(),
+                PrintableString(file->StringDataByIdx(dex::StringIndex(string_idx))).c_str(),
+                string_idx);
           } else {
             os << StringPrintf("%s v%d, <<invalid-string-idx-%d>> // string@%d",
                                opcode,
diff --git a/runtime/dex_instruction.h b/runtime/dex_instruction.h
index 99b9f9d..578550c 100644
--- a/runtime/dex_instruction.h
+++ b/runtime/dex_instruction.h
@@ -189,6 +189,7 @@
     kVerifyVarArgRangeNonZero = 0x100000,
     kVerifyRuntimeOnly        = 0x200000,
     kVerifyError              = 0x400000,
+    kVerifyRegHPrototype      = 0x800000
   };
 
   static constexpr uint32_t kMaxVarArgRegs = 5;
@@ -579,6 +580,10 @@
         kVerifyRegCNewArray | kVerifyRegCType | kVerifyRegCWide));
   }
 
+  int GetVerifyTypeArgumentH() const {
+    return (kInstructionVerifyFlags[Opcode()] & kVerifyRegHPrototype);
+  }
+
   int GetVerifyExtraFlags() const {
     return (kInstructionVerifyFlags[Opcode()] & (kVerifyArrayData | kVerifyBranchTarget |
         kVerifySwitchTargets | kVerifyVarArg | kVerifyVarArgNonZero | kVerifyVarArgRange |
diff --git a/runtime/dex_instruction_list.h b/runtime/dex_instruction_list.h
index e537afe..ca2ce1d 100644
--- a/runtime/dex_instruction_list.h
+++ b/runtime/dex_instruction_list.h
@@ -269,8 +269,8 @@
   V(0xF7, UNUSED_F7, "unused-f7", k10x, kIndexUnknown, 0, kVerifyError) \
   V(0xF8, UNUSED_F8, "unused-f8", k10x, kIndexUnknown, 0, kVerifyError) \
   V(0xF9, UNUSED_F9, "unused-f9", k10x, kIndexUnknown, 0, kVerifyError) \
-  V(0xFA, INVOKE_POLYMORPHIC, "invoke-polymorphic", k45cc, kIndexMethodAndProtoRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgNonZero | kExperimental) \
-  V(0xFB, INVOKE_POLYMORPHIC_RANGE, "invoke-polymorphic/range", k4rcc, kIndexMethodAndProtoRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgRangeNonZero | kExperimental) \
+  V(0xFA, INVOKE_POLYMORPHIC, "invoke-polymorphic", k45cc, kIndexMethodAndProtoRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgNonZero | kVerifyRegHPrototype) \
+  V(0xFB, INVOKE_POLYMORPHIC_RANGE, "invoke-polymorphic/range", k4rcc, kIndexMethodAndProtoRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgRangeNonZero | kVerifyRegHPrototype) \
   V(0xFC, UNUSED_FC, "unused-fc", k10x, kIndexUnknown, 0, kVerifyError) \
   V(0xFD, UNUSED_FD, "unused-fd", k10x, kIndexUnknown, 0, kVerifyError) \
   V(0xFE, UNUSED_FE, "unused-fe", k10x, kIndexUnknown, 0, kVerifyError) \
diff --git a/runtime/entrypoints/entrypoint_utils-inl.h b/runtime/entrypoints/entrypoint_utils-inl.h
index ac52f4e..f6eeffc 100644
--- a/runtime/entrypoints/entrypoint_utils-inl.h
+++ b/runtime/entrypoints/entrypoint_utils-inl.h
@@ -826,7 +826,7 @@
   return h_class.Get();
 }
 
-inline mirror::String* ResolveStringFromCode(ArtMethod* referrer, uint32_t string_idx) {
+inline mirror::String* ResolveStringFromCode(ArtMethod* referrer, dex::StringIndex string_idx) {
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   return class_linker->ResolveString(string_idx, referrer);
 }
diff --git a/runtime/entrypoints/entrypoint_utils.h b/runtime/entrypoints/entrypoint_utils.h
index d87dc67..7cc136e 100644
--- a/runtime/entrypoints/entrypoint_utils.h
+++ b/runtime/entrypoints/entrypoint_utils.h
@@ -188,7 +188,7 @@
     REQUIRES_SHARED(Locks::mutator_lock_)
     REQUIRES(!Roles::uninterruptible_);
 
-inline mirror::String* ResolveStringFromCode(ArtMethod* referrer, uint32_t string_idx)
+inline mirror::String* ResolveStringFromCode(ArtMethod* referrer, dex::StringIndex string_idx)
     REQUIRES_SHARED(Locks::mutator_lock_)
     REQUIRES(!Roles::uninterruptible_);
 
diff --git a/runtime/entrypoints/quick/quick_alloc_entrypoints.cc b/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
index 397655a..82bb8e5 100644
--- a/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
@@ -292,7 +292,7 @@
   entry_points_instrumented = instrumented;
 }
 
-void ResetQuickAllocEntryPoints(QuickEntryPoints* qpoints) {
+void ResetQuickAllocEntryPoints(QuickEntryPoints* qpoints, bool is_marking) {
 #if !defined(__APPLE__) || !defined(__LP64__)
   switch (entry_points_allocator) {
     case gc::kAllocatorTypeDlMalloc: {
@@ -320,7 +320,12 @@
     }
     case gc::kAllocatorTypeRegionTLAB: {
       CHECK(kMovingCollector);
-      SetQuickAllocEntryPoints_region_tlab(qpoints, entry_points_instrumented);
+      if (is_marking) {
+        SetQuickAllocEntryPoints_region_tlab(qpoints, entry_points_instrumented);
+      } else {
+        // Not marking means we need no read barriers and can just use the normal TLAB case.
+        SetQuickAllocEntryPoints_tlab(qpoints, entry_points_instrumented);
+      }
       return;
     }
     default:
@@ -328,6 +333,7 @@
   }
 #else
   UNUSED(qpoints);
+  UNUSED(is_marking);
 #endif
   UNIMPLEMENTED(FATAL);
   UNREACHABLE();
diff --git a/runtime/entrypoints/quick/quick_alloc_entrypoints.h b/runtime/entrypoints/quick/quick_alloc_entrypoints.h
index 14a8e04..bd1e295 100644
--- a/runtime/entrypoints/quick/quick_alloc_entrypoints.h
+++ b/runtime/entrypoints/quick/quick_alloc_entrypoints.h
@@ -23,7 +23,9 @@
 
 namespace art {
 
-void ResetQuickAllocEntryPoints(QuickEntryPoints* qpoints);
+// is_marking is only used for CC, if the GC is marking the allocation entrypoint is the marking
+// one.
+void ResetQuickAllocEntryPoints(QuickEntryPoints* qpoints, bool is_marking);
 
 // Runtime shutdown lock is necessary to prevent races in thread initialization. When the thread is
 // starting it doesn't hold the mutator lock until after it has been added to the thread list.
diff --git a/runtime/entrypoints/quick/quick_default_init_entrypoints.h b/runtime/entrypoints/quick/quick_default_init_entrypoints.h
index df23f94..78dad94 100644
--- a/runtime/entrypoints/quick/quick_default_init_entrypoints.h
+++ b/runtime/entrypoints/quick/quick_default_init_entrypoints.h
@@ -31,7 +31,7 @@
   jpoints->pDlsymLookup = art_jni_dlsym_lookup_stub;
 
   // Alloc
-  ResetQuickAllocEntryPoints(qpoints);
+  ResetQuickAllocEntryPoints(qpoints, /* is_marking */ true);
 
   // DexCache
   qpoints->pInitializeStaticStorage = art_quick_initialize_static_storage;
diff --git a/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc b/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc
index b1259e1..5dad43e 100644
--- a/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc
@@ -66,7 +66,7 @@
       // TODO: Change art_quick_resolve_string on MIPS and MIPS64 to kSaveEverything.
       (kRuntimeISA == kMips || kRuntimeISA == kMips64) ? Runtime::kSaveRefsOnly
                                                        : Runtime::kSaveEverything);
-  mirror::String* result = ResolveStringFromCode(caller, string_idx);
+  mirror::String* result = ResolveStringFromCode(caller, dex::StringIndex(string_idx));
   if (LIKELY(result != nullptr)) {
     // For AOT code, we need a write barrier for the class loader that holds
     // the GC roots in the .bss.
diff --git a/runtime/gc/allocator/rosalloc.cc b/runtime/gc/allocator/rosalloc.cc
index 40186f8..2e4475f 100644
--- a/runtime/gc/allocator/rosalloc.cc
+++ b/runtime/gc/allocator/rosalloc.cc
@@ -2068,26 +2068,30 @@
   size_t largest_continuous_free_pages = 0;
   WriterMutexLock wmu(self, bulk_free_lock_);
   MutexLock mu(self, lock_);
+  uint64_t total_free = 0;
   for (FreePageRun* fpr : free_page_runs_) {
     largest_continuous_free_pages = std::max(largest_continuous_free_pages,
                                              fpr->ByteSize(this));
+    total_free += fpr->ByteSize(this);
   }
+  size_t required_bytes = 0;
+  const char* new_buffer_msg = "";
   if (failed_alloc_bytes > kLargeSizeThreshold) {
     // Large allocation.
-    size_t required_bytes = RoundUp(failed_alloc_bytes, kPageSize);
-    if (required_bytes > largest_continuous_free_pages) {
-      os << "; failed due to fragmentation (required continguous free "
-         << required_bytes << " bytes where largest contiguous free "
-         <<  largest_continuous_free_pages << " bytes)";
-    }
+    required_bytes = RoundUp(failed_alloc_bytes, kPageSize);
   } else {
     // Non-large allocation.
-    size_t required_bytes = numOfPages[SizeToIndex(failed_alloc_bytes)] * kPageSize;
-    if (required_bytes > largest_continuous_free_pages) {
-      os << "; failed due to fragmentation (required continguous free "
-         << required_bytes << " bytes for a new buffer where largest contiguous free "
-         <<  largest_continuous_free_pages << " bytes)";
-    }
+    required_bytes = numOfPages[SizeToIndex(failed_alloc_bytes)] * kPageSize;
+    new_buffer_msg = " for a new buffer";
+  }
+  if (required_bytes > largest_continuous_free_pages) {
+    os << "; failed due to fragmentation ("
+       << "required contiguous free " << required_bytes << " bytes" << new_buffer_msg
+       << ", largest contiguous free " << largest_continuous_free_pages << " bytes"
+       << ", total free pages " << total_free << " bytes"
+       << ", space footprint " << footprint_ << " bytes"
+       << ", space max capacity " << max_capacity_ << " bytes"
+       << ")" << std::endl;
   }
 }
 
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index 8353b26..fbab73f 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -812,13 +812,13 @@
   false_gray_stack_.clear();
 }
 
-
 void ConcurrentCopying::IssueEmptyCheckpoint() {
   Thread* self = Thread::Current();
   ThreadList* thread_list = Runtime::Current()->GetThreadList();
   Barrier* barrier = thread_list->EmptyCheckpointBarrier();
   barrier->Init(self, 0);
-  size_t barrier_count = thread_list->RunEmptyCheckpoint();
+  std::vector<uint32_t> runnable_thread_ids;  // Used in debug build only
+  size_t barrier_count = thread_list->RunEmptyCheckpoint(runnable_thread_ids);
   // If there are no threads to wait which implys that all the checkpoint functions are finished,
   // then no need to release the mutator lock.
   if (barrier_count == 0) {
@@ -828,7 +828,27 @@
   Locks::mutator_lock_->SharedUnlock(self);
   {
     ScopedThreadStateChange tsc(self, kWaitingForCheckPointsToRun);
-    barrier->Increment(self, barrier_count);
+    if (kIsDebugBuild) {
+      static constexpr uint64_t kEmptyCheckpointTimeoutMs = 600 * 1000;  // 10 minutes.
+      bool timed_out = barrier->Increment(self, barrier_count, kEmptyCheckpointTimeoutMs);
+      if (timed_out) {
+        Runtime* runtime = Runtime::Current();
+        std::ostringstream ss;
+        ss << "Empty checkpoint timeout\n";
+        ss << "Barrier count " << barrier->GetCount(self) << "\n";
+        ss << "Runnable thread IDs";
+        for (uint32_t tid : runnable_thread_ids) {
+          ss << " " << tid;
+        }
+        ss << "\n";
+        Locks::mutator_lock_->Dump(ss);
+        ss << "\n";
+        runtime->GetThreadList()->Dump(ss);
+        LOG(FATAL) << ss.str();
+      }
+    } else {
+      barrier->Increment(self, barrier_count);
+    }
   }
   Locks::mutator_lock_->SharedLock(self);
 }
@@ -2145,14 +2165,18 @@
       to_ref->SetReadBarrierState(ReadBarrier::GrayState());
     }
 
+    // Do a fence to prevent the field CAS in ConcurrentCopying::Process from possibly reordering
+    // before the object copy.
+    QuasiAtomic::ThreadFenceRelease();
+
     LockWord new_lock_word = LockWord::FromForwardingAddress(reinterpret_cast<size_t>(to_ref));
 
     // Try to atomically write the fwd ptr.
-    bool success = from_ref->CasLockWordWeakSequentiallyConsistent(old_lock_word, new_lock_word);
+    bool success = from_ref->CasLockWordWeakRelaxed(old_lock_word, new_lock_word);
     if (LIKELY(success)) {
       // The CAS succeeded.
-      objects_moved_.FetchAndAddSequentiallyConsistent(1);
-      bytes_moved_.FetchAndAddSequentiallyConsistent(region_space_alloc_size);
+      objects_moved_.FetchAndAddRelaxed(1);
+      bytes_moved_.FetchAndAddRelaxed(region_space_alloc_size);
       if (LIKELY(!fall_back_to_non_moving)) {
         DCHECK(region_space_->IsInToSpace(to_ref));
       } else {
diff --git a/runtime/gc/heap-inl.h b/runtime/gc/heap-inl.h
index 97129e8..54f2210 100644
--- a/runtime/gc/heap-inl.h
+++ b/runtime/gc/heap-inl.h
@@ -247,7 +247,7 @@
   if (allocator_type != kAllocatorTypeTLAB &&
       allocator_type != kAllocatorTypeRegionTLAB &&
       allocator_type != kAllocatorTypeRosAlloc &&
-      UNLIKELY(IsOutOfMemoryOnAllocation<kGrow>(allocator_type, alloc_size))) {
+      UNLIKELY(IsOutOfMemoryOnAllocation(allocator_type, alloc_size, kGrow))) {
     return nullptr;
   }
   mirror::Object* ret;
@@ -267,8 +267,9 @@
       if (kInstrumented && UNLIKELY(is_running_on_memory_tool_)) {
         // If running on valgrind or asan, we should be using the instrumented path.
         size_t max_bytes_tl_bulk_allocated = rosalloc_space_->MaxBytesBulkAllocatedFor(alloc_size);
-        if (UNLIKELY(IsOutOfMemoryOnAllocation<kGrow>(allocator_type,
-                                                      max_bytes_tl_bulk_allocated))) {
+        if (UNLIKELY(IsOutOfMemoryOnAllocation(allocator_type,
+                                               max_bytes_tl_bulk_allocated,
+                                               kGrow))) {
           return nullptr;
         }
         ret = rosalloc_space_->Alloc(self, alloc_size, bytes_allocated, usable_size,
@@ -277,14 +278,18 @@
         DCHECK(!is_running_on_memory_tool_);
         size_t max_bytes_tl_bulk_allocated =
             rosalloc_space_->MaxBytesBulkAllocatedForNonvirtual(alloc_size);
-        if (UNLIKELY(IsOutOfMemoryOnAllocation<kGrow>(allocator_type,
-                                                      max_bytes_tl_bulk_allocated))) {
+        if (UNLIKELY(IsOutOfMemoryOnAllocation(allocator_type,
+                                               max_bytes_tl_bulk_allocated,
+                                               kGrow))) {
           return nullptr;
         }
         if (!kInstrumented) {
           DCHECK(!rosalloc_space_->CanAllocThreadLocal(self, alloc_size));
         }
-        ret = rosalloc_space_->AllocNonvirtual(self, alloc_size, bytes_allocated, usable_size,
+        ret = rosalloc_space_->AllocNonvirtual(self,
+                                               alloc_size,
+                                               bytes_allocated,
+                                               usable_size,
                                                bytes_tl_bulk_allocated);
       }
       break;
@@ -292,22 +297,34 @@
     case kAllocatorTypeDlMalloc: {
       if (kInstrumented && UNLIKELY(is_running_on_memory_tool_)) {
         // If running on valgrind, we should be using the instrumented path.
-        ret = dlmalloc_space_->Alloc(self, alloc_size, bytes_allocated, usable_size,
+        ret = dlmalloc_space_->Alloc(self,
+                                     alloc_size,
+                                     bytes_allocated,
+                                     usable_size,
                                      bytes_tl_bulk_allocated);
       } else {
         DCHECK(!is_running_on_memory_tool_);
-        ret = dlmalloc_space_->AllocNonvirtual(self, alloc_size, bytes_allocated, usable_size,
+        ret = dlmalloc_space_->AllocNonvirtual(self,
+                                               alloc_size,
+                                               bytes_allocated,
+                                               usable_size,
                                                bytes_tl_bulk_allocated);
       }
       break;
     }
     case kAllocatorTypeNonMoving: {
-      ret = non_moving_space_->Alloc(self, alloc_size, bytes_allocated, usable_size,
+      ret = non_moving_space_->Alloc(self,
+                                     alloc_size,
+                                     bytes_allocated,
+                                     usable_size,
                                      bytes_tl_bulk_allocated);
       break;
     }
     case kAllocatorTypeLOS: {
-      ret = large_object_space_->Alloc(self, alloc_size, bytes_allocated, usable_size,
+      ret = large_object_space_->Alloc(self,
+                                       alloc_size,
+                                       bytes_allocated,
+                                       usable_size,
                                        bytes_tl_bulk_allocated);
       // Note that the bump pointer spaces aren't necessarily next to
       // the other continuous spaces like the non-moving alloc space or
@@ -315,80 +332,38 @@
       DCHECK(ret == nullptr || large_object_space_->Contains(ret));
       break;
     }
-    case kAllocatorTypeTLAB: {
-      DCHECK_ALIGNED(alloc_size, space::BumpPointerSpace::kAlignment);
-      if (UNLIKELY(self->TlabSize() < alloc_size)) {
-        const size_t new_tlab_size = alloc_size + kDefaultTLABSize;
-        if (UNLIKELY(IsOutOfMemoryOnAllocation<kGrow>(allocator_type, new_tlab_size))) {
-          return nullptr;
-        }
-        // Try allocating a new thread local buffer, if the allocaiton fails the space must be
-        // full so return null.
-        if (!bump_pointer_space_->AllocNewTlab(self, new_tlab_size)) {
-          return nullptr;
-        }
-        *bytes_tl_bulk_allocated = new_tlab_size;
-      } else {
-        *bytes_tl_bulk_allocated = 0;
-      }
-      // The allocation can't fail.
-      ret = self->AllocTlab(alloc_size);
-      DCHECK(ret != nullptr);
-      *bytes_allocated = alloc_size;
-      *usable_size = alloc_size;
-      break;
-    }
     case kAllocatorTypeRegion: {
       DCHECK(region_space_ != nullptr);
       alloc_size = RoundUp(alloc_size, space::RegionSpace::kAlignment);
-      ret = region_space_->AllocNonvirtual<false>(alloc_size, bytes_allocated, usable_size,
+      ret = region_space_->AllocNonvirtual<false>(alloc_size,
+                                                  bytes_allocated,
+                                                  usable_size,
                                                   bytes_tl_bulk_allocated);
       break;
     }
+    case kAllocatorTypeTLAB:
+      FALLTHROUGH_INTENDED;
     case kAllocatorTypeRegionTLAB: {
-      DCHECK(region_space_ != nullptr);
-      DCHECK_ALIGNED(alloc_size, space::RegionSpace::kAlignment);
+      DCHECK_ALIGNED(alloc_size, kObjectAlignment);
+      static_assert(space::RegionSpace::kAlignment == space::BumpPointerSpace::kAlignment,
+                    "mismatched alignments");
+      static_assert(kObjectAlignment == space::BumpPointerSpace::kAlignment,
+                    "mismatched alignments");
       if (UNLIKELY(self->TlabSize() < alloc_size)) {
-        if (space::RegionSpace::kRegionSize >= alloc_size) {
-          // Non-large. Check OOME for a tlab.
-          if (LIKELY(!IsOutOfMemoryOnAllocation<kGrow>(allocator_type, space::RegionSpace::kRegionSize))) {
-            // Try to allocate a tlab.
-            if (!region_space_->AllocNewTlab(self)) {
-              // Failed to allocate a tlab. Try non-tlab.
-              ret = region_space_->AllocNonvirtual<false>(alloc_size, bytes_allocated, usable_size,
-                                                          bytes_tl_bulk_allocated);
-              return ret;
-            }
-            *bytes_tl_bulk_allocated = space::RegionSpace::kRegionSize;
-            // Fall-through.
-          } else {
-            // Check OOME for a non-tlab allocation.
-            if (!IsOutOfMemoryOnAllocation<kGrow>(allocator_type, alloc_size)) {
-              ret = region_space_->AllocNonvirtual<false>(alloc_size, bytes_allocated, usable_size,
-                                                          bytes_tl_bulk_allocated);
-              return ret;
-            } else {
-              // Neither tlab or non-tlab works. Give up.
-              return nullptr;
-            }
-          }
-        } else {
-          // Large. Check OOME.
-          if (LIKELY(!IsOutOfMemoryOnAllocation<kGrow>(allocator_type, alloc_size))) {
-            ret = region_space_->AllocNonvirtual<false>(alloc_size, bytes_allocated, usable_size,
-                                                        bytes_tl_bulk_allocated);
-            return ret;
-          } else {
-            return nullptr;
-          }
-        }
-      } else {
-        *bytes_tl_bulk_allocated = 0;  // Allocated in an existing buffer.
+        // kAllocatorTypeTLAB may be the allocator for region space TLAB if the GC is not marking,
+        // that is why the allocator is not passed down.
+        return AllocWithNewTLAB(self,
+                                alloc_size,
+                                kGrow,
+                                bytes_allocated,
+                                usable_size,
+                                bytes_tl_bulk_allocated);
       }
       // The allocation can't fail.
       ret = self->AllocTlab(alloc_size);
       DCHECK(ret != nullptr);
       *bytes_allocated = alloc_size;
+      *bytes_tl_bulk_allocated = 0;  // Allocated in an existing buffer.
       *usable_size = alloc_size;
       break;
     }
@@ -408,15 +383,16 @@
   return byte_count >= large_object_threshold_ && (c->IsPrimitiveArray() || c->IsStringClass());
 }
 
-template <bool kGrow>
-inline bool Heap::IsOutOfMemoryOnAllocation(AllocatorType allocator_type, size_t alloc_size) {
+inline bool Heap::IsOutOfMemoryOnAllocation(AllocatorType allocator_type,
+                                            size_t alloc_size,
+                                            bool grow) {
   size_t new_footprint = num_bytes_allocated_.LoadSequentiallyConsistent() + alloc_size;
   if (UNLIKELY(new_footprint > max_allowed_footprint_)) {
     if (UNLIKELY(new_footprint > growth_limit_)) {
       return true;
     }
     if (!AllocatorMayHaveConcurrentGC(allocator_type) || !IsGcConcurrent()) {
-      if (!kGrow) {
+      if (!grow) {
         return true;
       }
       // TODO: Grow for allocation is racy, fix it.
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 6a97edd..8ff5e5a 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -1326,7 +1326,9 @@
   std::ostringstream oss;
   size_t total_bytes_free = GetFreeMemory();
   oss << "Failed to allocate a " << byte_count << " byte allocation with " << total_bytes_free
-      << " free bytes and " << PrettySize(GetFreeMemoryUntilOOME()) << " until OOM";
+      << " free bytes and " << PrettySize(GetFreeMemoryUntilOOME()) << " until OOM,"
+      << " max allowed footprint " << max_allowed_footprint_ << ", growth limit "
+      << growth_limit_;
   // If the allocation failed due to fragmentation, print out the largest continuous allocation.
   if (total_bytes_free >= byte_count) {
     space::AllocSpace* space = nullptr;
@@ -1819,7 +1821,7 @@
           break;
         }
         // Try to transition the heap if the allocation failure was due to the space being full.
-        if (!IsOutOfMemoryOnAllocation<false>(allocator, alloc_size)) {
+        if (!IsOutOfMemoryOnAllocation(allocator, alloc_size, /*grow*/ false)) {
           // If we aren't out of memory then the OOM was probably from the non moving space being
           // full. Attempt to disable compaction and turn the main space into a non moving space.
           DisableMovingGc();
@@ -4219,5 +4221,72 @@
   gc_pause_listener_.StoreRelaxed(nullptr);
 }
 
+mirror::Object* Heap::AllocWithNewTLAB(Thread* self,
+                                       size_t alloc_size,
+                                       bool grow,
+                                       size_t* bytes_allocated,
+                                       size_t* usable_size,
+                                       size_t* bytes_tl_bulk_allocated) {
+  const AllocatorType allocator_type = GetCurrentAllocator();
+  if (allocator_type == kAllocatorTypeTLAB) {
+    DCHECK(bump_pointer_space_ != nullptr);
+    const size_t new_tlab_size = alloc_size + kDefaultTLABSize;
+    if (UNLIKELY(IsOutOfMemoryOnAllocation(allocator_type, new_tlab_size, grow))) {
+      return nullptr;
+    }
+    // Try allocating a new thread local buffer, if the allocation fails the space must be
+    // full so return null.
+    if (!bump_pointer_space_->AllocNewTlab(self, new_tlab_size)) {
+      return nullptr;
+    }
+    *bytes_tl_bulk_allocated = new_tlab_size;
+  } else {
+    DCHECK(allocator_type == kAllocatorTypeRegionTLAB);
+    DCHECK(region_space_ != nullptr);
+    if (space::RegionSpace::kRegionSize >= alloc_size) {
+      // Non-large. Check OOME for a tlab.
+      if (LIKELY(!IsOutOfMemoryOnAllocation(allocator_type,
+                                            space::RegionSpace::kRegionSize,
+                                            grow))) {
+        // Try to allocate a tlab.
+        if (!region_space_->AllocNewTlab(self)) {
+          // Failed to allocate a tlab. Try non-tlab.
+          return region_space_->AllocNonvirtual<false>(alloc_size,
+                                                       bytes_allocated,
+                                                       usable_size,
+                                                       bytes_tl_bulk_allocated);
+        }
+        *bytes_tl_bulk_allocated = space::RegionSpace::kRegionSize;
+        // Fall-through to using the TLAB below.
+      } else {
+        // Check OOME for a non-tlab allocation.
+        if (!IsOutOfMemoryOnAllocation(allocator_type, alloc_size, grow)) {
+          return region_space_->AllocNonvirtual<false>(alloc_size,
+                                                       bytes_allocated,
+                                                       usable_size,
+                                                       bytes_tl_bulk_allocated);
+        }
+        // Neither tlab or non-tlab works. Give up.
+        return nullptr;
+      }
+    } else {
+      // Large. Check OOME.
+      if (LIKELY(!IsOutOfMemoryOnAllocation(allocator_type, alloc_size, grow))) {
+        return region_space_->AllocNonvirtual<false>(alloc_size,
+                                                     bytes_allocated,
+                                                     usable_size,
+                                                     bytes_tl_bulk_allocated);
+      }
+      return nullptr;
+    }
+  }
+  // Refilled TLAB, return.
+  mirror::Object* ret = self->AllocTlab(alloc_size);
+  DCHECK(ret != nullptr);
+  *bytes_allocated = alloc_size;
+  *usable_size = alloc_size;
+  return ret;
+}
+
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index 0c671d2..3a8e29b 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -854,6 +854,10 @@
         allocator_type != kAllocatorTypeRegionTLAB;
   }
   static ALWAYS_INLINE bool AllocatorMayHaveConcurrentGC(AllocatorType allocator_type) {
+    if (kUseReadBarrier) {
+      // Read barrier may have the TLAB allocator but is always concurrent. TODO: clean this up.
+      return true;
+    }
     return
         allocator_type != kAllocatorTypeBumpPointer &&
         allocator_type != kAllocatorTypeTLAB;
@@ -923,11 +927,20 @@
                                               size_t* bytes_tl_bulk_allocated)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
+  mirror::Object* AllocWithNewTLAB(Thread* self,
+                                   size_t alloc_size,
+                                   bool grow,
+                                   size_t* bytes_allocated,
+                                   size_t* usable_size,
+                                   size_t* bytes_tl_bulk_allocated)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
   void ThrowOutOfMemoryError(Thread* self, size_t byte_count, AllocatorType allocator_type)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  template <bool kGrow>
-  ALWAYS_INLINE bool IsOutOfMemoryOnAllocation(AllocatorType allocator_type, size_t alloc_size);
+  ALWAYS_INLINE bool IsOutOfMemoryOnAllocation(AllocatorType allocator_type,
+                                               size_t alloc_size,
+                                               bool grow);
 
   // Run the finalizers. If timeout is non zero, then we use the VMRuntime version.
   void RunFinalization(JNIEnv* env, uint64_t timeout);
diff --git a/runtime/handle.h b/runtime/handle.h
index 3db3be2..e4b6d29 100644
--- a/runtime/handle.h
+++ b/runtime/handle.h
@@ -61,6 +61,10 @@
     return down_cast<T*>(reference_->AsMirrorPtr());
   }
 
+  ALWAYS_INLINE bool IsNull() const REQUIRES_SHARED(Locks::mutator_lock_) {
+    return Get() == nullptr;
+  }
+
   ALWAYS_INLINE jobject ToJObject() const REQUIRES_SHARED(Locks::mutator_lock_) {
     if (UNLIKELY(reference_->AsMirrorPtr() == nullptr)) {
       // Special case so that we work with null handles.
diff --git a/runtime/image.cc b/runtime/image.cc
index bd5ba93..52c9f4e 100644
--- a/runtime/image.cc
+++ b/runtime/image.cc
@@ -25,7 +25,7 @@
 namespace art {
 
 const uint8_t ImageHeader::kImageMagic[] = { 'a', 'r', 't', '\n' };
-const uint8_t ImageHeader::kImageVersion[] = { '0', '3', '2', '\0' };
+const uint8_t ImageHeader::kImageVersion[] = { '0', '3', '3', '\0' };
 
 ImageHeader::ImageHeader(uint32_t image_begin,
                          uint32_t image_size,
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index d4c322e..870d1ae 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -630,7 +630,7 @@
 }
 
 static void ResetQuickAllocEntryPointsForThread(Thread* thread, void* arg ATTRIBUTE_UNUSED) {
-  thread->ResetQuickAllocEntryPointsForThread();
+  thread->ResetQuickAllocEntryPointsForThread(kUseReadBarrier && thread->GetIsGcMarking());
 }
 
 void Instrumentation::SetEntrypointsInstrumented(bool instrumented) {
diff --git a/runtime/interpreter/interpreter_common.cc b/runtime/interpreter/interpreter_common.cc
index 72dbe6a..22da07d 100644
--- a/runtime/interpreter/interpreter_common.cc
+++ b/runtime/interpreter/interpreter_common.cc
@@ -865,11 +865,6 @@
   // The invoke_method_idx here is the name of the signature polymorphic method that
   // was symbolically invoked in bytecode (say MethodHandle.invoke or MethodHandle.invokeExact)
   // and not the method that we'll dispatch to in the end.
-  //
-  // TODO(narayan) We'll have to check in the verifier that this is in fact a
-  // signature polymorphic method so that we disallow calls via invoke-polymorphic
-  // to non sig-poly methods. This would also have the side effect of verifying
-  // that vRegC really is a reference type.
   StackHandleScope<6> hs(self);
   Handle<mirror::MethodHandleImpl> method_handle(hs.NewHandle(
       ObjPtr<mirror::MethodHandleImpl>::DownCast(
diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h
index 9c26d24..c9a5b44 100644
--- a/runtime/interpreter/interpreter_common.h
+++ b/runtime/interpreter/interpreter_common.h
@@ -236,7 +236,7 @@
 // java.lang.String class is initialized.
 static inline ObjPtr<mirror::String> ResolveString(Thread* self,
                                                    ShadowFrame& shadow_frame,
-                                                   uint32_t string_idx)
+                                                   dex::StringIndex string_idx)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   ObjPtr<mirror::Class> java_lang_string_class = mirror::String::GetJavaLangString();
   if (UNLIKELY(!java_lang_string_class->IsInitialized())) {
@@ -251,11 +251,11 @@
   ArtMethod* method = shadow_frame.GetMethod();
   ObjPtr<mirror::Class> declaring_class = method->GetDeclaringClass();
   // MethodVerifier refuses methods with string_idx out of bounds.
-  DCHECK_LT(string_idx % mirror::DexCache::kDexCacheStringCacheSize,
+  DCHECK_LT(string_idx.index_ % mirror::DexCache::kDexCacheStringCacheSize,
             declaring_class->GetDexFile().NumStringIds());
   ObjPtr<mirror::String> string_ptr =
       mirror::StringDexCachePair::Lookup(declaring_class->GetDexCacheStrings(),
-                                         string_idx,
+                                         string_idx.index_,
                                          mirror::DexCache::kDexCacheStringCacheSize).Read();
   if (UNLIKELY(string_ptr == nullptr)) {
     StackHandleScope<1> hs(self);
diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc
index 22c0fe0..52eacd5 100644
--- a/runtime/interpreter/interpreter_switch_impl.cc
+++ b/runtime/interpreter/interpreter_switch_impl.cc
@@ -373,7 +373,9 @@
         break;
       case Instruction::CONST_STRING: {
         PREAMBLE();
-        ObjPtr<mirror::String> s = ResolveString(self, shadow_frame,  inst->VRegB_21c());
+        ObjPtr<mirror::String> s = ResolveString(self,
+                                                 shadow_frame,
+                                                 dex::StringIndex(inst->VRegB_21c()));
         if (UNLIKELY(s == nullptr)) {
           HANDLE_PENDING_EXCEPTION();
         } else {
@@ -384,7 +386,9 @@
       }
       case Instruction::CONST_STRING_JUMBO: {
         PREAMBLE();
-        ObjPtr<mirror::String> s = ResolveString(self, shadow_frame,  inst->VRegB_31c());
+        ObjPtr<mirror::String> s = ResolveString(self,
+                                                 shadow_frame,
+                                                 dex::StringIndex(inst->VRegB_31c()));
         if (UNLIKELY(s == nullptr)) {
           HANDLE_PENDING_EXCEPTION();
         } else {
diff --git a/runtime/interpreter/mterp/arm64/entry.S b/runtime/interpreter/mterp/arm64/entry.S
index 9fbbbd3..441c1a1 100644
--- a/runtime/interpreter/mterp/arm64/entry.S
+++ b/runtime/interpreter/mterp/arm64/entry.S
@@ -31,11 +31,11 @@
 
 ExecuteMterpImpl:
     .cfi_startproc
-    stp     xPROFILE, x27, [sp, #-80]!
-    stp     xIBASE, xREFS, [sp, #16]
-    stp     xSELF, xINST, [sp, #32]
-    stp     xPC, xFP, [sp, #48]
-    stp     fp, lr, [sp, #64]
+    SAVE_TWO_REGS_INCREASE_FRAME xPROFILE, x27, 80
+    SAVE_TWO_REGS                xIBASE, xREFS, 16
+    SAVE_TWO_REGS                xSELF, xINST, 32
+    SAVE_TWO_REGS                xPC, xFP, 48
+    SAVE_TWO_REGS                fp, lr, 64
     add     fp, sp, #64
 
     /* Remember the return register */
diff --git a/runtime/interpreter/mterp/arm64/footer.S b/runtime/interpreter/mterp/arm64/footer.S
index ada0326..6ffbd3f 100644
--- a/runtime/interpreter/mterp/arm64/footer.S
+++ b/runtime/interpreter/mterp/arm64/footer.S
@@ -285,12 +285,15 @@
  */
     cmp     wPROFILE, #0
     bgt     MterpProfileActive                      // if > 0, we may have some counts to report.
-    ldp     fp, lr, [sp, #64]
-    ldp     xPC, xFP, [sp, #48]
-    ldp     xSELF, xINST, [sp, #32]
-    ldp     xIBASE, xREFS, [sp, #16]
-    ldp     xPROFILE, x27, [sp], #80
+    .cfi_remember_state
+    RESTORE_TWO_REGS                fp, lr, 64
+    RESTORE_TWO_REGS                xPC, xFP, 48
+    RESTORE_TWO_REGS                xSELF, xINST, 32
+    RESTORE_TWO_REGS                xIBASE, xREFS, 16
+    RESTORE_TWO_REGS_DECREASE_FRAME xPROFILE, x27, 80
     ret
+    .cfi_restore_state                              // Reset unwind info so following code unwinds.
+    .cfi_def_cfa_offset 80                          // workaround for clang bug: 31975598
 
 MterpProfileActive:
     mov     xINST, x0                               // stash return value
@@ -301,11 +304,11 @@
     strh    wPROFILE, [x1, #SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET]
     bl      MterpAddHotnessBatch                    // (method, shadow_frame, self)
     mov     x0, xINST                               // restore return value
-    ldp     fp, lr, [sp, #64]
-    ldp     xPC, xFP, [sp, #48]
-    ldp     xSELF, xINST, [sp, #32]
-    ldp     xIBASE, xREFS, [sp, #16]
-    ldp     xPROFILE, x27, [sp], #80
+    RESTORE_TWO_REGS                fp, lr, 64
+    RESTORE_TWO_REGS                xPC, xFP, 48
+    RESTORE_TWO_REGS                xSELF, xINST, 32
+    RESTORE_TWO_REGS                xIBASE, xREFS, 16
+    RESTORE_TWO_REGS_DECREASE_FRAME xPROFILE, x27, 80
     ret
 
     .cfi_endproc
diff --git a/runtime/interpreter/mterp/arm64/header.S b/runtime/interpreter/mterp/arm64/header.S
index c791eb5..7125d5a 100644
--- a/runtime/interpreter/mterp/arm64/header.S
+++ b/runtime/interpreter/mterp/arm64/header.S
@@ -292,3 +292,41 @@
 .macro REFRESH_IBASE
   ldr     xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]
 .endm
+
+/*
+ * Save two registers to the stack.
+ */
+.macro SAVE_TWO_REGS reg1, reg2, offset
+    stp \reg1, \reg2, [sp, #(\offset)]
+    .cfi_rel_offset \reg1, (\offset)
+    .cfi_rel_offset \reg2, (\offset) + 8
+.endm
+
+/*
+ * Restore two registers from the stack.
+ */
+.macro RESTORE_TWO_REGS reg1, reg2, offset
+    ldp \reg1, \reg2, [sp, #(\offset)]
+    .cfi_restore \reg1
+    .cfi_restore \reg2
+.endm
+
+/*
+ * Increase frame size and save two registers to the bottom of the stack.
+ */
+.macro SAVE_TWO_REGS_INCREASE_FRAME reg1, reg2, frame_adjustment
+    stp \reg1, \reg2, [sp, #-(\frame_adjustment)]!
+    .cfi_adjust_cfa_offset (\frame_adjustment)
+    .cfi_rel_offset \reg1, 0
+    .cfi_rel_offset \reg2, 8
+.endm
+
+/*
+ * Restore two registers from the bottom of the stack and decrease frame size.
+ */
+.macro RESTORE_TWO_REGS_DECREASE_FRAME reg1, reg2, frame_adjustment
+    ldp \reg1, \reg2, [sp], #(\frame_adjustment)
+    .cfi_restore \reg1
+    .cfi_restore \reg2
+    .cfi_adjust_cfa_offset -(\frame_adjustment)
+.endm
diff --git a/runtime/interpreter/mterp/mterp.cc b/runtime/interpreter/mterp/mterp.cc
index fbfed40..c8c1563 100644
--- a/runtime/interpreter/mterp/mterp.cc
+++ b/runtime/interpreter/mterp/mterp.cc
@@ -291,7 +291,7 @@
                                    ShadowFrame* shadow_frame,
                                    Thread* self)
     REQUIRES_SHARED(Locks::mutator_lock_) {
-  ObjPtr<mirror::String> s = ResolveString(self, *shadow_frame, index);
+  ObjPtr<mirror::String> s = ResolveString(self, *shadow_frame, dex::StringIndex(index));
   if (UNLIKELY(s == nullptr)) {
     return true;
   }
diff --git a/runtime/interpreter/mterp/out/mterp_arm64.S b/runtime/interpreter/mterp/out/mterp_arm64.S
index 42f8c1b..34d99a8 100644
--- a/runtime/interpreter/mterp/out/mterp_arm64.S
+++ b/runtime/interpreter/mterp/out/mterp_arm64.S
@@ -300,6 +300,44 @@
   ldr     xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]
 .endm
 
+/*
+ * Save two registers to the stack.
+ */
+.macro SAVE_TWO_REGS reg1, reg2, offset
+    stp \reg1, \reg2, [sp, #(\offset)]
+    .cfi_rel_offset \reg1, (\offset)
+    .cfi_rel_offset \reg2, (\offset) + 8
+.endm
+
+/*
+ * Restore two registers from the stack.
+ */
+.macro RESTORE_TWO_REGS reg1, reg2, offset
+    ldp \reg1, \reg2, [sp, #(\offset)]
+    .cfi_restore \reg1
+    .cfi_restore \reg2
+.endm
+
+/*
+ * Increase frame size and save two registers to the bottom of the stack.
+ */
+.macro SAVE_TWO_REGS_INCREASE_FRAME reg1, reg2, frame_adjustment
+    stp \reg1, \reg2, [sp, #-(\frame_adjustment)]!
+    .cfi_adjust_cfa_offset (\frame_adjustment)
+    .cfi_rel_offset \reg1, 0
+    .cfi_rel_offset \reg2, 8
+.endm
+
+/*
+ * Restore two registers from the bottom of the stack and decrease frame size.
+ */
+.macro RESTORE_TWO_REGS_DECREASE_FRAME reg1, reg2, frame_adjustment
+    ldp \reg1, \reg2, [sp], #(\frame_adjustment)
+    .cfi_restore \reg1
+    .cfi_restore \reg2
+    .cfi_adjust_cfa_offset -(\frame_adjustment)
+.endm
+
 /* File: arm64/entry.S */
 /*
  * Copyright (C) 2016 The Android Open Source Project
@@ -334,11 +372,11 @@
 
 ExecuteMterpImpl:
     .cfi_startproc
-    stp     xPROFILE, x27, [sp, #-80]!
-    stp     xIBASE, xREFS, [sp, #16]
-    stp     xSELF, xINST, [sp, #32]
-    stp     xPC, xFP, [sp, #48]
-    stp     fp, lr, [sp, #64]
+    SAVE_TWO_REGS_INCREASE_FRAME xPROFILE, x27, 80
+    SAVE_TWO_REGS                xIBASE, xREFS, 16
+    SAVE_TWO_REGS                xSELF, xINST, 32
+    SAVE_TWO_REGS                xPC, xFP, 48
+    SAVE_TWO_REGS                fp, lr, 64
     add     fp, sp, #64
 
     /* Remember the return register */
@@ -7226,12 +7264,15 @@
  */
     cmp     wPROFILE, #0
     bgt     MterpProfileActive                      // if > 0, we may have some counts to report.
-    ldp     fp, lr, [sp, #64]
-    ldp     xPC, xFP, [sp, #48]
-    ldp     xSELF, xINST, [sp, #32]
-    ldp     xIBASE, xREFS, [sp, #16]
-    ldp     xPROFILE, x27, [sp], #80
+    .cfi_remember_state
+    RESTORE_TWO_REGS                fp, lr, 64
+    RESTORE_TWO_REGS                xPC, xFP, 48
+    RESTORE_TWO_REGS                xSELF, xINST, 32
+    RESTORE_TWO_REGS                xIBASE, xREFS, 16
+    RESTORE_TWO_REGS_DECREASE_FRAME xPROFILE, x27, 80
     ret
+    .cfi_restore_state                              // Reset unwind info so following code unwinds.
+    .cfi_def_cfa_offset 80                          // workaround for clang bug: 31975598
 
 MterpProfileActive:
     mov     xINST, x0                               // stash return value
@@ -7242,11 +7283,11 @@
     strh    wPROFILE, [x1, #SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET]
     bl      MterpAddHotnessBatch                    // (method, shadow_frame, self)
     mov     x0, xINST                               // restore return value
-    ldp     fp, lr, [sp, #64]
-    ldp     xPC, xFP, [sp, #48]
-    ldp     xSELF, xINST, [sp, #32]
-    ldp     xIBASE, xREFS, [sp, #16]
-    ldp     xPROFILE, x27, [sp], #80
+    RESTORE_TWO_REGS                fp, lr, 64
+    RESTORE_TWO_REGS                xPC, xFP, 48
+    RESTORE_TWO_REGS                xSELF, xINST, 32
+    RESTORE_TWO_REGS                xIBASE, xREFS, 16
+    RESTORE_TWO_REGS_DECREASE_FRAME xPROFILE, x27, 80
     ret
 
     .cfi_endproc
diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc
index 2ae989a..93f50ad 100644
--- a/runtime/jit/jit_code_cache.cc
+++ b/runtime/jit/jit_code_cache.cc
@@ -23,6 +23,7 @@
 #include "base/stl_util.h"
 #include "base/systrace.h"
 #include "base/time_utils.h"
+#include "cha.h"
 #include "debugger_interface.h"
 #include "entrypoints/runtime_asm_entrypoints.h"
 #include "gc/accounting/bitmap-inl.h"
@@ -217,7 +218,9 @@
                                   const uint8_t* code,
                                   size_t code_size,
                                   bool osr,
-                                  Handle<mirror::ObjectArray<mirror::Object>> roots) {
+                                  Handle<mirror::ObjectArray<mirror::Object>> roots,
+                                  bool has_should_deoptimize_flag,
+                                  const ArenaSet<ArtMethod*>& cha_single_implementation_list) {
   uint8_t* result = CommitCodeInternal(self,
                                        method,
                                        stack_map,
@@ -228,7 +231,9 @@
                                        code,
                                        code_size,
                                        osr,
-                                       roots);
+                                       roots,
+                                       has_should_deoptimize_flag,
+                                       cha_single_implementation_list);
   if (result == nullptr) {
     // Retry.
     GarbageCollectCache(self);
@@ -242,7 +247,9 @@
                                 code,
                                 code_size,
                                 osr,
-                                roots);
+                                roots,
+                                has_should_deoptimize_flag,
+                                cha_single_implementation_list);
   }
   return result;
 }
@@ -277,6 +284,10 @@
   reinterpret_cast<uint32_t*>(roots_data)[length] = length;
 }
 
+static const uint8_t* FromStackMapToRoots(const uint8_t* stack_map_data) {
+  return stack_map_data - ComputeRootTableSize(GetNumberOfRoots(stack_map_data));
+}
+
 static void FillRootTable(uint8_t* roots_data, Handle<mirror::ObjectArray<mirror::Object>> roots)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   GcRoot<mirror::Object>* gc_roots = reinterpret_cast<GcRoot<mirror::Object>*>(roots_data);
@@ -293,7 +304,6 @@
     }
     gc_roots[i] = GcRoot<mirror::Object>(object);
   }
-  FillRootTableLength(roots_data, length);
 }
 
 static uint8_t* GetRootTable(const void* code_ptr, uint32_t* number_of_roots = nullptr) {
@@ -359,7 +369,7 @@
   }
 }
 
-void JitCodeCache::FreeCode(const void* code_ptr, ArtMethod* method ATTRIBUTE_UNUSED) {
+void JitCodeCache::FreeCode(const void* code_ptr) {
   uintptr_t allocation = FromCodeToAllocation(code_ptr);
   // Notify native debugger that we are about to remove the code.
   // It does nothing if we are not using native debugger.
@@ -368,41 +378,69 @@
   FreeCode(reinterpret_cast<uint8_t*>(allocation));
 }
 
+void JitCodeCache::FreeAllMethodHeaders(
+    const std::unordered_set<OatQuickMethodHeader*>& method_headers) {
+  {
+    MutexLock mu(Thread::Current(), *Locks::cha_lock_);
+    Runtime::Current()->GetClassHierarchyAnalysis()
+        ->RemoveDependentsWithMethodHeaders(method_headers);
+  }
+
+  // We need to remove entries in method_headers from CHA dependencies
+  // first since once we do FreeCode() below, the memory can be reused
+  // so it's possible for the same method_header to start representing
+  // different compile code.
+  MutexLock mu(Thread::Current(), lock_);
+  ScopedCodeCacheWrite scc(code_map_.get());
+  for (const OatQuickMethodHeader* method_header : method_headers) {
+    FreeCode(method_header->GetCode());
+  }
+}
+
 void JitCodeCache::RemoveMethodsIn(Thread* self, const LinearAlloc& alloc) {
   ScopedTrace trace(__PRETTY_FUNCTION__);
-  MutexLock mu(self, lock_);
-  // We do not check if a code cache GC is in progress, as this method comes
-  // with the classlinker_classes_lock_ held, and suspending ourselves could
-  // lead to a deadlock.
+  // We use a set to first collect all method_headers whose code need to be
+  // removed. We need to free the underlying code after we remove CHA dependencies
+  // for entries in this set. And it's more efficient to iterate through
+  // the CHA dependency map just once with an unordered_set.
+  std::unordered_set<OatQuickMethodHeader*> method_headers;
   {
-    ScopedCodeCacheWrite scc(code_map_.get());
-    for (auto it = method_code_map_.begin(); it != method_code_map_.end();) {
-      if (alloc.ContainsUnsafe(it->second)) {
-        FreeCode(it->first, it->second);
-        it = method_code_map_.erase(it);
+    MutexLock mu(self, lock_);
+    // We do not check if a code cache GC is in progress, as this method comes
+    // with the classlinker_classes_lock_ held, and suspending ourselves could
+    // lead to a deadlock.
+    {
+      ScopedCodeCacheWrite scc(code_map_.get());
+      for (auto it = method_code_map_.begin(); it != method_code_map_.end();) {
+        if (alloc.ContainsUnsafe(it->second)) {
+          method_headers.insert(OatQuickMethodHeader::FromCodePointer(it->first));
+          it = method_code_map_.erase(it);
+        } else {
+          ++it;
+        }
+      }
+    }
+    for (auto it = osr_code_map_.begin(); it != osr_code_map_.end();) {
+      if (alloc.ContainsUnsafe(it->first)) {
+        // Note that the code has already been pushed to method_headers in the loop
+        // above and is going to be removed in FreeCode() below.
+        it = osr_code_map_.erase(it);
+      } else {
+        ++it;
+      }
+    }
+    for (auto it = profiling_infos_.begin(); it != profiling_infos_.end();) {
+      ProfilingInfo* info = *it;
+      if (alloc.ContainsUnsafe(info->GetMethod())) {
+        info->GetMethod()->SetProfilingInfo(nullptr);
+        FreeData(reinterpret_cast<uint8_t*>(info));
+        it = profiling_infos_.erase(it);
       } else {
         ++it;
       }
     }
   }
-  for (auto it = osr_code_map_.begin(); it != osr_code_map_.end();) {
-    if (alloc.ContainsUnsafe(it->first)) {
-      // Note that the code has already been removed in the loop above.
-      it = osr_code_map_.erase(it);
-    } else {
-      ++it;
-    }
-  }
-  for (auto it = profiling_infos_.begin(); it != profiling_infos_.end();) {
-    ProfilingInfo* info = *it;
-    if (alloc.ContainsUnsafe(info->GetMethod())) {
-      info->GetMethod()->SetProfilingInfo(nullptr);
-      FreeData(reinterpret_cast<uint8_t*>(info));
-      it = profiling_infos_.erase(it);
-    } else {
-      ++it;
-    }
-  }
+  FreeAllMethodHeaders(method_headers);
 }
 
 bool JitCodeCache::IsWeakAccessEnabled(Thread* self) const {
@@ -464,13 +502,15 @@
                                           const uint8_t* code,
                                           size_t code_size,
                                           bool osr,
-                                          Handle<mirror::ObjectArray<mirror::Object>> roots) {
+                                          Handle<mirror::ObjectArray<mirror::Object>> roots,
+                                          bool has_should_deoptimize_flag,
+                                          const ArenaSet<ArtMethod*>&
+                                              cha_single_implementation_list) {
   DCHECK(stack_map != nullptr);
   size_t alignment = GetInstructionSetAlignment(kRuntimeISA);
   // Ensure the header ends up at expected instruction alignment.
   size_t header_size = RoundUp(sizeof(OatQuickMethodHeader), alignment);
   size_t total_size = header_size + code_size;
-  const uint32_t num_roots = roots->GetLength();
 
   OatQuickMethodHeader* method_header = nullptr;
   uint8_t* code_ptr = nullptr;
@@ -483,9 +523,6 @@
       ScopedCodeCacheWrite scc(code_map_.get());
       memory = AllocateCode(total_size);
       if (memory == nullptr) {
-        // Fill root table length so that ClearData works correctly in case of failure. Otherwise
-        // the length will be 0 and cause incorrect DCHECK failure.
-        FillRootTableLength(roots_data, num_roots);
         return nullptr;
       }
       code_ptr = memory + header_size;
@@ -506,15 +543,48 @@
       // https://patchwork.kernel.org/patch/9047921/
       FlushInstructionCache(reinterpret_cast<char*>(code_ptr),
                             reinterpret_cast<char*>(code_ptr + code_size));
+      DCHECK(!Runtime::Current()->IsAotCompiler());
+      if (has_should_deoptimize_flag) {
+        method_header->SetHasShouldDeoptimizeFlag();
+      }
     }
 
     number_of_compilations_++;
   }
   // We need to update the entry point in the runnable state for the instrumentation.
   {
+    // Need cha_lock_ for checking all single-implementation flags and register
+    // dependencies.
+    MutexLock cha_mu(self, *Locks::cha_lock_);
+    bool single_impl_still_valid = true;
+    for (ArtMethod* single_impl : cha_single_implementation_list) {
+      if (!single_impl->HasSingleImplementation()) {
+        // We simply discard the compiled code. Clear the
+        // counter so that it may be recompiled later. Hopefully the
+        // class hierarchy will be more stable when compilation is retried.
+        single_impl_still_valid = false;
+        method->ClearCounter();
+        break;
+      }
+    }
+
+    // Discard the code if any single-implementation assumptions are now invalid.
+    if (!single_impl_still_valid) {
+      VLOG(jit) << "JIT discarded jitted code due to invalid single-implementation assumptions.";
+      return nullptr;
+    }
+    for (ArtMethod* single_impl : cha_single_implementation_list) {
+      Runtime::Current()->GetClassHierarchyAnalysis()->AddDependency(
+          single_impl, method, method_header);
+    }
+
+    // The following needs to be guarded by cha_lock_ also. Otherwise it's
+    // possible that the compiled code is considered invalidated by some class linking,
+    // but below we still make the compiled code valid for the method.
     MutexLock mu(self, lock_);
     method_code_map_.Put(code_ptr, method);
     // Fill the root table before updating the entry point.
+    DCHECK_EQ(FromStackMapToRoots(stack_map), roots_data);
     FillRootTable(roots_data, roots);
     if (osr) {
       number_of_osr_compilations_++;
@@ -535,7 +605,8 @@
         << " ccache_size=" << PrettySize(CodeCacheSizeLocked()) << ": "
         << " dcache_size=" << PrettySize(DataCacheSizeLocked()) << ": "
         << reinterpret_cast<const void*>(method_header->GetEntryPoint()) << ","
-        << reinterpret_cast<const void*>(method_header->GetEntryPoint() + method_header->code_size_);
+        << reinterpret_cast<const void*>(method_header->GetEntryPoint() +
+                                         method_header->GetCodeSize());
     histogram_code_memory_use_.AddValue(code_size);
     if (code_size > kCodeSizeLogThreshold) {
       LOG(INFO) << "JIT allocated "
@@ -566,10 +637,6 @@
   return used_memory_for_data_;
 }
 
-static const uint8_t* FromStackMapToRoots(const uint8_t* stack_map_data) {
-  return stack_map_data - ComputeRootTableSize(GetNumberOfRoots(stack_map_data));
-}
-
 void JitCodeCache::ClearData(Thread* self,
                              uint8_t* stack_map_data,
                              uint8_t* roots_data) {
@@ -612,8 +679,14 @@
               << " for stack maps of "
               << ArtMethod::PrettyMethod(method);
   }
-  *roots_data = result;
-  *stack_map_data = result + table_size;
+  if (result != nullptr) {
+    *roots_data = result;
+    *stack_map_data = result + table_size;
+    FillRootTableLength(*roots_data, number_of_roots);
+  } else {
+    *roots_data = nullptr;
+    *stack_map_data = nullptr;
+  }
 }
 
 class MarkCodeVisitor FINAL : public StackVisitor {
@@ -836,20 +909,23 @@
 
 void JitCodeCache::RemoveUnmarkedCode(Thread* self) {
   ScopedTrace trace(__FUNCTION__);
-  MutexLock mu(self, lock_);
-  ScopedCodeCacheWrite scc(code_map_.get());
-  // Iterate over all compiled code and remove entries that are not marked.
-  for (auto it = method_code_map_.begin(); it != method_code_map_.end();) {
-    const void* code_ptr = it->first;
-    ArtMethod* method = it->second;
-    uintptr_t allocation = FromCodeToAllocation(code_ptr);
-    if (GetLiveBitmap()->Test(allocation)) {
-      ++it;
-    } else {
-      FreeCode(code_ptr, method);
-      it = method_code_map_.erase(it);
+  std::unordered_set<OatQuickMethodHeader*> method_headers;
+  {
+    MutexLock mu(self, lock_);
+    ScopedCodeCacheWrite scc(code_map_.get());
+    // Iterate over all compiled code and remove entries that are not marked.
+    for (auto it = method_code_map_.begin(); it != method_code_map_.end();) {
+      const void* code_ptr = it->first;
+      uintptr_t allocation = FromCodeToAllocation(code_ptr);
+      if (GetLiveBitmap()->Test(allocation)) {
+        ++it;
+      } else {
+        method_headers.insert(OatQuickMethodHeader::FromCodePointer(it->first));
+        it = method_code_map_.erase(it);
+      }
     }
   }
+  FreeAllMethodHeaders(method_headers);
 }
 
 void JitCodeCache::DoCollection(Thread* self, bool collect_profiling_info) {
diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h
index be2cec5..30e2efb 100644
--- a/runtime/jit/jit_code_cache.h
+++ b/runtime/jit/jit_code_cache.h
@@ -20,6 +20,7 @@
 #include "instrumentation.h"
 
 #include "atomic.h"
+#include "base/arena_containers.h"
 #include "base/histogram-inl.h"
 #include "base/macros.h"
 #include "base/mutex.h"
@@ -91,6 +92,11 @@
       REQUIRES(!lock_);
 
   // Allocate and write code and its metadata to the code cache.
+  // `cha_single_implementation_list` needs to be registered via CHA (if it's
+  // still valid), since the compiled code still needs to be invalidated if the
+  // single-implementation assumptions are violated later. This needs to be done
+  // even if `has_should_deoptimize_flag` is false, which can happen due to CHA
+  // guard elimination.
   uint8_t* CommitCode(Thread* self,
                       ArtMethod* method,
                       uint8_t* stack_map,
@@ -101,7 +107,9 @@
                       const uint8_t* code,
                       size_t code_size,
                       bool osr,
-                      Handle<mirror::ObjectArray<mirror::Object>> roots)
+                      Handle<mirror::ObjectArray<mirror::Object>> roots,
+                      bool has_should_deoptimize_flag,
+                      const ArenaSet<ArtMethod*>& cha_single_implementation_list)
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!lock_);
 
@@ -230,7 +238,9 @@
                               const uint8_t* code,
                               size_t code_size,
                               bool osr,
-                              Handle<mirror::ObjectArray<mirror::Object>> roots)
+                              Handle<mirror::ObjectArray<mirror::Object>> roots,
+                              bool has_should_deoptimize_flag,
+                              const ArenaSet<ArtMethod*>& cha_single_implementation_list)
       REQUIRES(!lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
@@ -245,8 +255,13 @@
   bool WaitForPotentialCollectionToComplete(Thread* self)
       REQUIRES(lock_) REQUIRES(!Locks::mutator_lock_);
 
-  // Free in the mspace allocations taken by 'method'.
-  void FreeCode(const void* code_ptr, ArtMethod* method) REQUIRES(lock_);
+  // Remove CHA dependents and underlying allocations for entries in `method_headers`.
+  void FreeAllMethodHeaders(const std::unordered_set<OatQuickMethodHeader*>& method_headers)
+      REQUIRES(!lock_)
+      REQUIRES(!Locks::cha_lock_);
+
+  // Free in the mspace allocations for `code_ptr`.
+  void FreeCode(const void* code_ptr) REQUIRES(lock_);
 
   // Number of bytes allocated in the code cache.
   size_t CodeCacheSizeLocked() REQUIRES(lock_);
diff --git a/runtime/jit/offline_profiling_info.h b/runtime/jit/offline_profiling_info.h
index 4136488..53d0eea 100644
--- a/runtime/jit/offline_profiling_info.h
+++ b/runtime/jit/offline_profiling_info.h
@@ -180,6 +180,7 @@
   friend class ProfileCompilationInfoTest;
   friend class CompilerDriverProfileTest;
   friend class ProfileAssistantTest;
+  friend class Dex2oatLayoutTest;
 
   DexFileToProfileInfoMap info_;
 };
diff --git a/runtime/jvalue.h b/runtime/jvalue.h
index 52a0f23..398bfbc 100644
--- a/runtime/jvalue.h
+++ b/runtime/jvalue.h
@@ -29,7 +29,7 @@
 class Object;
 }  // namespace mirror
 
-union PACKED(4) JValue {
+union PACKED(alignof(mirror::Object*)) JValue {
   // We default initialize JValue instances to all-zeros.
   JValue() : j(0) {}
 
diff --git a/runtime/mirror/array-inl.h b/runtime/mirror/array-inl.h
index b11dad8..7d7c1d7 100644
--- a/runtime/mirror/array-inl.h
+++ b/runtime/mirror/array-inl.h
@@ -424,6 +424,29 @@
   }
 }
 
+template<bool kUnchecked>
+void PointerArray::Memcpy(int32_t dst_pos,
+                          ObjPtr<PointerArray> src,
+                          int32_t src_pos,
+                          int32_t count,
+                          PointerSize ptr_size) {
+  DCHECK(!Runtime::Current()->IsActiveTransaction());
+  DCHECK(!src.IsNull());
+  if (ptr_size == PointerSize::k64) {
+    LongArray* l_this = (kUnchecked ? down_cast<LongArray*>(static_cast<Object*>(this))
+                                    : AsLongArray());
+    LongArray* l_src = (kUnchecked ? down_cast<LongArray*>(static_cast<Object*>(src.Ptr()))
+                                   : src->AsLongArray());
+    l_this->Memcpy(dst_pos, l_src, src_pos, count);
+  } else {
+    IntArray* i_this = (kUnchecked ? down_cast<IntArray*>(static_cast<Object*>(this))
+                                   : AsIntArray());
+    IntArray* i_src = (kUnchecked ? down_cast<IntArray*>(static_cast<Object*>(src.Ptr()))
+                                  : src->AsIntArray());
+    i_this->Memcpy(dst_pos, i_src, src_pos, count);
+  }
+}
+
 template<typename T>
 inline void PrimitiveArray<T>::SetArrayClass(ObjPtr<Class> array_class) {
   CHECK(array_class_.IsNull());
diff --git a/runtime/mirror/array.h b/runtime/mirror/array.h
index 994e9b2..19d300e 100644
--- a/runtime/mirror/array.h
+++ b/runtime/mirror/array.h
@@ -208,6 +208,17 @@
             typename Visitor>
   void Fixup(mirror::PointerArray* dest, PointerSize pointer_size, const Visitor& visitor)
       REQUIRES_SHARED(Locks::mutator_lock_);
+
+  // Works like memcpy(), except we guarantee not to allow tearing of array values (ie using smaller
+  // than element size copies). Arguments are assumed to be within the bounds of the array and the
+  // arrays non-null. Cannot be called in an active transaction.
+  template<bool kUnchecked = false>
+  void Memcpy(int32_t dst_pos,
+              ObjPtr<PointerArray> src,
+              int32_t src_pos,
+              int32_t count,
+              PointerSize pointer_size)
+      REQUIRES_SHARED(Locks::mutator_lock_);
 };
 
 }  // namespace mirror
diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index 5def65e..5fdf8f3 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h
@@ -238,7 +238,7 @@
 template<VerifyObjectFlags kVerifyFlags,
          ReadBarrierOption kReadBarrierOption>
 inline PointerArray* Class::GetVTable() {
-  DCHECK(IsResolved<kVerifyFlags>() || IsErroneous<kVerifyFlags>());
+  DCHECK(IsLoaded<kVerifyFlags>() || IsErroneous<kVerifyFlags>());
   return GetFieldObject<PointerArray, kVerifyFlags, kReadBarrierOption>(
       OFFSET_OF_OBJECT_MEMBER(Class, vtable_));
 }
diff --git a/runtime/mirror/class_ext.cc b/runtime/mirror/class_ext.cc
index cc208e4..7c6a710 100644
--- a/runtime/mirror/class_ext.cc
+++ b/runtime/mirror/class_ext.cc
@@ -34,6 +34,72 @@
 
 GcRoot<Class> ClassExt::dalvik_system_ClassExt_;
 
+void ClassExt::SetObsoleteArrays(ObjPtr<PointerArray> methods,
+                                 ObjPtr<ObjectArray<DexCache>> dex_caches) {
+  DCHECK_EQ(GetLockOwnerThreadId(), Thread::Current()->GetThreadId())
+      << "Obsolete arrays are set without synchronization!";
+  CHECK_EQ(methods.IsNull(), dex_caches.IsNull());
+  auto obsolete_dex_cache_off = OFFSET_OF_OBJECT_MEMBER(ClassExt, obsolete_dex_caches_);
+  auto obsolete_methods_off = OFFSET_OF_OBJECT_MEMBER(ClassExt, obsolete_methods_);
+  DCHECK(!Runtime::Current()->IsActiveTransaction());
+  SetFieldObject<false>(obsolete_dex_cache_off, dex_caches.Ptr());
+  SetFieldObject<false>(obsolete_methods_off, methods.Ptr());
+}
+
+// We really need to be careful how we update this. If we ever in the future make it so that
+// these arrays are written into without all threads being suspended we have a race condition! This
+// race could cause obsolete methods to be missed.
+bool ClassExt::ExtendObsoleteArrays(Thread* self, uint32_t increase) {
+  DCHECK_EQ(GetLockOwnerThreadId(), Thread::Current()->GetThreadId())
+      << "Obsolete arrays are set without synchronization!";
+  StackHandleScope<5> hs(self);
+  Handle<ClassExt> h_this(hs.NewHandle(this));
+  Handle<PointerArray> old_methods(hs.NewHandle(h_this->GetObsoleteMethods()));
+  Handle<ObjectArray<DexCache>> old_dex_caches(hs.NewHandle(h_this->GetObsoleteDexCaches()));
+  ClassLinker* cl = Runtime::Current()->GetClassLinker();
+  size_t new_len;
+  if (old_methods.Get() == nullptr) {
+    CHECK(old_dex_caches.Get() == nullptr);
+    new_len = increase;
+  } else {
+    CHECK_EQ(old_methods->GetLength(), old_dex_caches->GetLength());
+    new_len = increase + old_methods->GetLength();
+  }
+  Handle<PointerArray> new_methods(hs.NewHandle<PointerArray>(
+      cl->AllocPointerArray(self, new_len)));
+  if (new_methods.IsNull()) {
+    // Fail.
+    self->AssertPendingOOMException();
+    return false;
+  }
+  Handle<ObjectArray<DexCache>> new_dex_caches(hs.NewHandle<ObjectArray<DexCache>>(
+      ObjectArray<DexCache>::Alloc(self,
+                                   cl->FindClass(self,
+                                                 "[Ljava/lang/DexCache;",
+                                                 ScopedNullHandle<ClassLoader>()),
+                                   new_len)));
+  if (new_dex_caches.IsNull()) {
+    // Fail.
+    self->AssertPendingOOMException();
+    return false;
+  }
+
+  if (!old_methods.IsNull()) {
+    // Copy the old contents.
+    new_methods->Memcpy(0,
+                        old_methods.Get(),
+                        0,
+                        old_methods->GetLength(),
+                        cl->GetImagePointerSize());
+    new_dex_caches->AsObjectArray<Object>()->AssignableCheckingMemcpy<false>(
+        0, old_dex_caches->AsObjectArray<Object>(), 0, old_dex_caches->GetLength(), false);
+  }
+  // Set the fields.
+  h_this->SetObsoleteArrays(new_methods.Get(), new_dex_caches.Get());
+
+  return true;
+}
+
 ClassExt* ClassExt::Alloc(Thread* self) {
   DCHECK(dalvik_system_ClassExt_.Read() != nullptr);
   return down_cast<ClassExt*>(dalvik_system_ClassExt_.Read()->AllocObject(self).Ptr());
diff --git a/runtime/mirror/class_ext.h b/runtime/mirror/class_ext.h
index 35eaae1..9104631 100644
--- a/runtime/mirror/class_ext.h
+++ b/runtime/mirror/class_ext.h
@@ -19,8 +19,11 @@
 
 #include "class-inl.h"
 
+#include "array.h"
+#include "dex_cache.h"
 #include "gc_root.h"
 #include "object.h"
+#include "object_array.h"
 #include "object_callbacks.h"
 #include "string.h"
 
@@ -49,6 +52,22 @@
     return GetFieldObject<ClassExt>(OFFSET_OF_OBJECT_MEMBER(ClassExt, verify_error_));
   }
 
+  ObjectArray<DexCache>* GetObsoleteDexCaches() REQUIRES_SHARED(Locks::mutator_lock_) {
+    return GetFieldObject<ObjectArray<DexCache>>(
+        OFFSET_OF_OBJECT_MEMBER(ClassExt, obsolete_dex_caches_));
+  }
+
+  PointerArray* GetObsoleteMethods() REQUIRES_SHARED(Locks::mutator_lock_) {
+    return GetFieldObject<PointerArray>(OFFSET_OF_OBJECT_MEMBER(ClassExt, obsolete_methods_));
+  }
+
+  void SetObsoleteArrays(ObjPtr<PointerArray> methods, ObjPtr<ObjectArray<DexCache>> dex_caches)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
+  // Extend the obsolete arrays by the given amount.
+  bool ExtendObsoleteArrays(Thread* self, uint32_t increase)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
   static void SetClass(ObjPtr<Class> dalvik_system_ClassExt);
   static void ResetClass();
   static void VisitRoots(RootVisitor* visitor) REQUIRES_SHARED(Locks::mutator_lock_);
@@ -57,6 +76,13 @@
 
  private:
   // Field order required by test "ValidateFieldOrderOfJavaCppUnionClasses".
+  HeapReference<ObjectArray<DexCache>> obsolete_dex_caches_;
+
+  HeapReference<PointerArray> obsolete_methods_;
+
+  HeapReference<DexCache> original_dex_cache_;
+
+  // The saved verification error of this class.
   HeapReference<Object> verify_error_;
 
   static GcRoot<Class> dalvik_system_ClassExt_;
diff --git a/runtime/mirror/dex_cache-inl.h b/runtime/mirror/dex_cache-inl.h
index d903f71..be8815a 100644
--- a/runtime/mirror/dex_cache-inl.h
+++ b/runtime/mirror/dex_cache-inl.h
@@ -40,13 +40,14 @@
   return Class::ComputeClassSize(true, vtable_entries, 0, 0, 0, 0, 0, pointer_size);
 }
 
-inline mirror::String* DexCache::GetResolvedString(uint32_t string_idx) {
-  DCHECK_LT(string_idx, GetDexFile()->NumStringIds());
-  return StringDexCachePair::Lookup(GetStrings(), string_idx, NumStrings()).Read();
+inline mirror::String* DexCache::GetResolvedString(dex::StringIndex string_idx) {
+  DCHECK_LT(string_idx.index_, GetDexFile()->NumStringIds());
+  return StringDexCachePair::Lookup(GetStrings(), string_idx.index_, NumStrings()).Read();
 }
 
-inline void DexCache::SetResolvedString(uint32_t string_idx, ObjPtr<mirror::String> resolved) {
-  StringDexCachePair::Assign(GetStrings(), string_idx, resolved.Ptr(), NumStrings());
+inline void DexCache::SetResolvedString(dex::StringIndex string_idx,
+                                        ObjPtr<mirror::String> resolved) {
+  StringDexCachePair::Assign(GetStrings(), string_idx.index_, resolved.Ptr(), NumStrings());
   Runtime* const runtime = Runtime::Current();
   if (UNLIKELY(runtime->IsActiveTransaction())) {
     DCHECK(runtime->IsAotCompiler());
@@ -56,12 +57,12 @@
   runtime->GetHeap()->WriteBarrierEveryFieldOf(this);
 }
 
-inline void DexCache::ClearString(uint32_t string_idx) {
-  const uint32_t slot_idx = string_idx % NumStrings();
+inline void DexCache::ClearString(dex::StringIndex string_idx) {
+  const uint32_t slot_idx = string_idx.index_ % NumStrings();
   DCHECK(Runtime::Current()->IsAotCompiler());
   StringDexCacheType* slot = &GetStrings()[slot_idx];
   // This is racy but should only be called from the transactional interpreter.
-  if (slot->load(std::memory_order_relaxed).index == string_idx) {
+  if (slot->load(std::memory_order_relaxed).index == string_idx.index_) {
     StringDexCachePair cleared(
         nullptr,
         StringDexCachePair::InvalidIndexForSlot(slot_idx));
diff --git a/runtime/mirror/dex_cache.cc b/runtime/mirror/dex_cache.cc
index a32d51f..741cf3b 100644
--- a/runtime/mirror/dex_cache.cc
+++ b/runtime/mirror/dex_cache.cc
@@ -22,15 +22,123 @@
 #include "gc/accounting/card_table-inl.h"
 #include "gc/heap.h"
 #include "globals.h"
+#include "linear_alloc.h"
 #include "object.h"
 #include "object-inl.h"
 #include "object_array-inl.h"
 #include "runtime.h"
 #include "string.h"
+#include "thread.h"
+#include "utils/dex_cache_arrays_layout-inl.h"
 
 namespace art {
 namespace mirror {
 
+void DexCache::InitializeDexCache(Thread* self,
+                                  ObjPtr<mirror::DexCache> dex_cache,
+                                  ObjPtr<mirror::String> location,
+                                  const DexFile* dex_file,
+                                  LinearAlloc* linear_alloc,
+                                  PointerSize image_pointer_size) {
+  DCHECK(dex_file != nullptr);
+  ScopedAssertNoThreadSuspension sants(__FUNCTION__);
+  DexCacheArraysLayout layout(image_pointer_size, dex_file);
+  uint8_t* raw_arrays = nullptr;
+
+  const OatDexFile* const oat_dex = dex_file->GetOatDexFile();
+  if (oat_dex != nullptr && oat_dex->GetDexCacheArrays() != nullptr) {
+    raw_arrays = oat_dex->GetDexCacheArrays();
+  } else if (dex_file->NumStringIds() != 0u ||
+             dex_file->NumTypeIds() != 0u ||
+             dex_file->NumMethodIds() != 0u ||
+             dex_file->NumFieldIds() != 0u) {
+    // Zero-initialized.
+    raw_arrays = reinterpret_cast<uint8_t*>(linear_alloc->Alloc(self, layout.Size()));
+  }
+
+  mirror::StringDexCacheType* strings = (dex_file->NumStringIds() == 0u) ? nullptr :
+      reinterpret_cast<mirror::StringDexCacheType*>(raw_arrays + layout.StringsOffset());
+  GcRoot<mirror::Class>* types = (dex_file->NumTypeIds() == 0u) ? nullptr :
+      reinterpret_cast<GcRoot<mirror::Class>*>(raw_arrays + layout.TypesOffset());
+  ArtMethod** methods = (dex_file->NumMethodIds() == 0u) ? nullptr :
+      reinterpret_cast<ArtMethod**>(raw_arrays + layout.MethodsOffset());
+  ArtField** fields = (dex_file->NumFieldIds() == 0u) ? nullptr :
+      reinterpret_cast<ArtField**>(raw_arrays + layout.FieldsOffset());
+
+  size_t num_strings = mirror::DexCache::kDexCacheStringCacheSize;
+  if (dex_file->NumStringIds() < num_strings) {
+    num_strings = dex_file->NumStringIds();
+  }
+
+  // Note that we allocate the method type dex caches regardless of this flag,
+  // and we make sure here that they're not used by the runtime. This is in the
+  // interest of simplicity and to avoid extensive compiler and layout class changes.
+  //
+  // If this needs to be mitigated in a production system running this code,
+  // DexCache::kDexCacheMethodTypeCacheSize can be set to zero.
+  mirror::MethodTypeDexCacheType* method_types = nullptr;
+  size_t num_method_types = 0;
+
+  if (dex_file->NumProtoIds() < mirror::DexCache::kDexCacheMethodTypeCacheSize) {
+    num_method_types = dex_file->NumProtoIds();
+  } else {
+    num_method_types = mirror::DexCache::kDexCacheMethodTypeCacheSize;
+  }
+
+  if (num_method_types > 0) {
+    method_types = reinterpret_cast<mirror::MethodTypeDexCacheType*>(
+        raw_arrays + layout.MethodTypesOffset());
+  }
+
+  DCHECK_ALIGNED(raw_arrays, alignof(mirror::StringDexCacheType)) <<
+                 "Expected raw_arrays to align to StringDexCacheType.";
+  DCHECK_ALIGNED(layout.StringsOffset(), alignof(mirror::StringDexCacheType)) <<
+                 "Expected StringsOffset() to align to StringDexCacheType.";
+  DCHECK_ALIGNED(strings, alignof(mirror::StringDexCacheType)) <<
+                 "Expected strings to align to StringDexCacheType.";
+  static_assert(alignof(mirror::StringDexCacheType) == 8u,
+                "Expected StringDexCacheType to have align of 8.");
+  if (kIsDebugBuild) {
+    // Sanity check to make sure all the dex cache arrays are empty. b/28992179
+    for (size_t i = 0; i < num_strings; ++i) {
+      CHECK_EQ(strings[i].load(std::memory_order_relaxed).index, 0u);
+      CHECK(strings[i].load(std::memory_order_relaxed).object.IsNull());
+    }
+    for (size_t i = 0; i < dex_file->NumTypeIds(); ++i) {
+      CHECK(types[i].IsNull());
+    }
+    for (size_t i = 0; i < dex_file->NumMethodIds(); ++i) {
+      CHECK(mirror::DexCache::GetElementPtrSize(methods, i, image_pointer_size) == nullptr);
+    }
+    for (size_t i = 0; i < dex_file->NumFieldIds(); ++i) {
+      CHECK(mirror::DexCache::GetElementPtrSize(fields, i, image_pointer_size) == nullptr);
+    }
+    for (size_t i = 0; i < num_method_types; ++i) {
+      CHECK_EQ(method_types[i].load(std::memory_order_relaxed).index, 0u);
+      CHECK(method_types[i].load(std::memory_order_relaxed).object.IsNull());
+    }
+  }
+  if (strings != nullptr) {
+    mirror::StringDexCachePair::Initialize(strings);
+  }
+  if (method_types != nullptr) {
+    mirror::MethodTypeDexCachePair::Initialize(method_types);
+  }
+  dex_cache->Init(dex_file,
+                  location,
+                  strings,
+                  num_strings,
+                  types,
+                  dex_file->NumTypeIds(),
+                  methods,
+                  dex_file->NumMethodIds(),
+                  fields,
+                  dex_file->NumFieldIds(),
+                  method_types,
+                  num_method_types,
+                  image_pointer_size);
+}
+
 void DexCache::Init(const DexFile* dex_file,
                     ObjPtr<String> location,
                     StringDexCacheType* strings,
diff --git a/runtime/mirror/dex_cache.h b/runtime/mirror/dex_cache.h
index 7d82d3a..ec265e5 100644
--- a/runtime/mirror/dex_cache.h
+++ b/runtime/mirror/dex_cache.h
@@ -31,6 +31,8 @@
 class DexFile;
 class ImageWriter;
 union JValue;
+class LinearAlloc;
+class Thread;
 
 namespace mirror {
 
@@ -137,19 +139,14 @@
     return sizeof(DexCache);
   }
 
-  void Init(const DexFile* dex_file,
-            ObjPtr<String> location,
-            StringDexCacheType* strings,
-            uint32_t num_strings,
-            GcRoot<Class>* resolved_types,
-            uint32_t num_resolved_types,
-            ArtMethod** resolved_methods,
-            uint32_t num_resolved_methods,
-            ArtField** resolved_fields,
-            uint32_t num_resolved_fields,
-            MethodTypeDexCacheType* resolved_methodtypes,
-            uint32_t num_resolved_methodtypes,
-            PointerSize pointer_size) REQUIRES_SHARED(Locks::mutator_lock_);
+  static void InitializeDexCache(Thread* self,
+                                 ObjPtr<mirror::DexCache> dex_cache,
+                                 ObjPtr<mirror::String> location,
+                                 const DexFile* dex_file,
+                                 LinearAlloc* linear_alloc,
+                                 PointerSize image_pointer_size)
+      REQUIRES_SHARED(Locks::mutator_lock_)
+      REQUIRES(Locks::dex_lock_);
 
   void Fixup(ArtMethod* trampoline, PointerSize pointer_size)
       REQUIRES_SHARED(Locks::mutator_lock_);
@@ -214,15 +211,15 @@
     return OFFSET_OF_OBJECT_MEMBER(DexCache, num_resolved_method_types_);
   }
 
-  mirror::String* GetResolvedString(uint32_t string_idx) ALWAYS_INLINE
+  mirror::String* GetResolvedString(dex::StringIndex string_idx) ALWAYS_INLINE
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  void SetResolvedString(uint32_t string_idx, ObjPtr<mirror::String> resolved) ALWAYS_INLINE
+  void SetResolvedString(dex::StringIndex string_idx, ObjPtr<mirror::String> resolved) ALWAYS_INLINE
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Clear a string for a string_idx, used to undo string intern transactions to make sure
   // the string isn't kept live.
-  void ClearString(uint32_t string_idx) REQUIRES_SHARED(Locks::mutator_lock_);
+  void ClearString(dex::StringIndex string_idx) REQUIRES_SHARED(Locks::mutator_lock_);
 
   Class* GetResolvedType(dex::TypeIndex type_idx) REQUIRES_SHARED(Locks::mutator_lock_);
 
@@ -339,6 +336,21 @@
   static void SetElementPtrSize(PtrType* ptr_array, size_t idx, PtrType ptr, PointerSize ptr_size);
 
  private:
+  void Init(const DexFile* dex_file,
+            ObjPtr<String> location,
+            StringDexCacheType* strings,
+            uint32_t num_strings,
+            GcRoot<Class>* resolved_types,
+            uint32_t num_resolved_types,
+            ArtMethod** resolved_methods,
+            uint32_t num_resolved_methods,
+            ArtField** resolved_fields,
+            uint32_t num_resolved_fields,
+            MethodTypeDexCacheType* resolved_methodtypes,
+            uint32_t num_resolved_methodtypes,
+            PointerSize pointer_size)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
   // Visit instance fields of the dex cache as well as its associated arrays.
   template <bool kVisitNativeRoots,
             VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
diff --git a/runtime/mirror/method_handle_impl.cc b/runtime/mirror/method_handle_impl.cc
index fdfaaa8..4f1c448 100644
--- a/runtime/mirror/method_handle_impl.cc
+++ b/runtime/mirror/method_handle_impl.cc
@@ -22,6 +22,12 @@
 namespace art {
 namespace mirror {
 
+mirror::Class* MethodHandle::StaticClass() {
+  mirror::Class* klass = MethodHandleImpl::StaticClass()->GetSuperClass();
+  DCHECK(klass->DescriptorEquals("Ljava/lang/invoke/MethodHandle;"));
+  return klass;
+}
+
 GcRoot<mirror::Class> MethodHandleImpl::static_class_;
 
 void MethodHandleImpl::SetClass(Class* klass) {
diff --git a/runtime/mirror/method_handle_impl.h b/runtime/mirror/method_handle_impl.h
index 9054216..5ea82b5 100644
--- a/runtime/mirror/method_handle_impl.h
+++ b/runtime/mirror/method_handle_impl.h
@@ -57,6 +57,8 @@
     return static_cast<MethodHandleKind>(handle_kind);
   }
 
+  static mirror::Class* StaticClass() REQUIRES_SHARED(Locks::mutator_lock_);
+
  private:
   HeapReference<mirror::MethodType> nominal_type_;
   HeapReference<mirror::MethodType> method_type_;
diff --git a/runtime/modifiers.h b/runtime/modifiers.h
index dd32df6..ae6b31d 100644
--- a/runtime/modifiers.h
+++ b/runtime/modifiers.h
@@ -67,6 +67,15 @@
 
 // Set by the verifier for a method that could not be verified to follow structured locking.
 static constexpr uint32_t kAccMustCountLocks =        0x02000000;  // method (runtime)
+// Set to indicate that the ArtMethod is obsolete and has a different DexCache from its declaring
+// class.
+// TODO Might want to re-arrange some of these so that we can have obsolete + intrinsic methods.
+static constexpr uint32_t kAccObsoleteMethod =        0x04000000;  // method (runtime)
+
+// Set by the class linker for a method that has only one implementation for a
+// virtual call.
+static constexpr uint32_t kAccSingleImplementation =  0x08000000;  // method (runtime)
+
 static constexpr uint32_t kAccIntrinsic  =            0x80000000;  // method (runtime)
 
 // Special runtime-only flags.
diff --git a/runtime/native/dalvik_system_VMDebug.cc b/runtime/native/dalvik_system_VMDebug.cc
index adf35b6..67b2e1c 100644
--- a/runtime/native/dalvik_system_VMDebug.cc
+++ b/runtime/native/dalvik_system_VMDebug.cc
@@ -177,8 +177,22 @@
 }
 
 static void VMDebug_printLoadedClasses(JNIEnv* env, jclass, jint flags) {
+  class DumpClassVisitor : public ClassVisitor {
+   public:
+    explicit DumpClassVisitor(int dump_flags) : flags_(dump_flags) {}
+
+    bool operator()(ObjPtr<mirror::Class> klass) OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_) {
+      klass->DumpClass(LOG_STREAM(ERROR), flags_);
+      return true;
+    }
+
+   private:
+    const int flags_;
+  };
+  DumpClassVisitor visitor(flags);
+
   ScopedFastNativeObjectAccess soa(env);
-  return Runtime::Current()->GetClassLinker()->DumpAllClasses(flags);
+  return Runtime::Current()->GetClassLinker()->VisitClasses(&visitor);
 }
 
 static jint VMDebug_getLoadedClassCount(JNIEnv* env, jclass) {
diff --git a/runtime/native/dalvik_system_VMRuntime.cc b/runtime/native/dalvik_system_VMRuntime.cc
index 48feb11..3058df4 100644
--- a/runtime/native/dalvik_system_VMRuntime.cc
+++ b/runtime/native/dalvik_system_VMRuntime.cc
@@ -287,7 +287,7 @@
 
 // Based on ClassLinker::ResolveString.
 static void PreloadDexCachesResolveString(
-    Handle<mirror::DexCache> dex_cache, uint32_t string_idx, StringTable& strings)
+    Handle<mirror::DexCache> dex_cache, dex::StringIndex string_idx, StringTable& strings)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   ObjPtr<mirror::String>  string = dex_cache->GetResolvedString(string_idx);
   if (string != nullptr) {
@@ -450,7 +450,7 @@
       continue;
     }
     for (size_t j = 0; j < dex_cache->NumStrings(); j++) {
-      ObjPtr<mirror::String> string = dex_cache->GetResolvedString(j);
+      ObjPtr<mirror::String> string = dex_cache->GetResolvedString(dex::StringIndex(j));
       if (string != nullptr) {
         filled->num_strings++;
       }
@@ -514,7 +514,7 @@
 
     if (kPreloadDexCachesStrings) {
       for (size_t j = 0; j < dex_cache->NumStrings(); j++) {
-        PreloadDexCachesResolveString(dex_cache, j, strings);
+        PreloadDexCachesResolveString(dex_cache, dex::StringIndex(j), strings);
       }
     }
 
diff --git a/runtime/native/java_lang_DexCache.cc b/runtime/native/java_lang_DexCache.cc
index f6de593..f1c350f 100644
--- a/runtime/native/java_lang_DexCache.cc
+++ b/runtime/native/java_lang_DexCache.cc
@@ -61,7 +61,8 @@
   ScopedFastNativeObjectAccess soa(env);
   ObjPtr<mirror::DexCache> dex_cache = soa.Decode<mirror::DexCache>(javaDexCache);
   CHECK_LT(static_cast<size_t>(string_index), dex_cache->GetDexFile()->NumStringIds());
-  return soa.AddLocalReference<jobject>(dex_cache->GetResolvedString(string_index));
+  return soa.AddLocalReference<jobject>(
+      dex_cache->GetResolvedString(dex::StringIndex(string_index)));
 }
 
 static void DexCache_setResolvedType(JNIEnv* env, jobject javaDexCache, jint type_index,
@@ -77,7 +78,7 @@
   ScopedFastNativeObjectAccess soa(env);
   ObjPtr<mirror::DexCache> dex_cache = soa.Decode<mirror::DexCache>(javaDexCache);
   CHECK_LT(static_cast<size_t>(string_index), dex_cache->GetDexFile()->NumStringIds());
-  dex_cache->SetResolvedString(string_index, soa.Decode<mirror::String>(string));
+  dex_cache->SetResolvedString(dex::StringIndex(string_index), soa.Decode<mirror::String>(string));
 }
 
 static JNINativeMethod gMethods[] = {
diff --git a/runtime/native_stack_dump.cc b/runtime/native_stack_dump.cc
index 2376889..5565565 100644
--- a/runtime/native_stack_dump.cc
+++ b/runtime/native_stack_dump.cc
@@ -272,7 +272,7 @@
   if (code == 0) {
     return pc == 0;
   }
-  uintptr_t code_size = reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].code_size_;
+  uintptr_t code_size = reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].GetCodeSize();
   return code <= pc && pc <= (code + code_size);
 }
 
diff --git a/runtime/oat_file-inl.h b/runtime/oat_file-inl.h
index d7d0c4f..721fab9 100644
--- a/runtime/oat_file-inl.h
+++ b/runtime/oat_file-inl.h
@@ -44,7 +44,7 @@
   if (method_header == nullptr) {
     return 0u;
   }
-  return reinterpret_cast<const uint8_t*>(&method_header->code_size_) - begin_;
+  return reinterpret_cast<const uint8_t*>(method_header->GetCodeSizeAddr()) - begin_;
 }
 
 inline size_t OatFile::OatMethod::GetFrameSizeInBytes() const {
@@ -52,7 +52,7 @@
   if (code == nullptr) {
     return 0u;
   }
-  return reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].frame_info_.FrameSizeInBytes();
+  return reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].GetFrameInfo().FrameSizeInBytes();
 }
 
 inline uint32_t OatFile::OatMethod::GetCoreSpillMask() const {
@@ -60,7 +60,7 @@
   if (code == nullptr) {
     return 0u;
   }
-  return reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].frame_info_.CoreSpillMask();
+  return reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].GetFrameInfo().CoreSpillMask();
 }
 
 inline uint32_t OatFile::OatMethod::GetFpSpillMask() const {
@@ -68,7 +68,7 @@
   if (code == nullptr) {
     return 0u;
   }
-  return reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].frame_info_.FpSpillMask();
+  return reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].GetFrameInfo().FpSpillMask();
 }
 
 inline uint32_t OatFile::OatMethod::GetVmapTableOffset() const {
@@ -81,7 +81,7 @@
   if (method_header == nullptr) {
     return 0u;
   }
-  return reinterpret_cast<const uint8_t*>(&method_header->vmap_table_offset_) - begin_;
+  return reinterpret_cast<const uint8_t*>(method_header->GetVmapTableOffsetAddr()) - begin_;
 }
 
 inline const uint8_t* OatFile::OatMethod::GetVmapTable() const {
@@ -89,7 +89,7 @@
   if (code == nullptr) {
     return nullptr;
   }
-  uint32_t offset = reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].vmap_table_offset_;
+  uint32_t offset = reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].GetVmapTableOffset();
   if (UNLIKELY(offset == 0u)) {
     return nullptr;
   }
@@ -101,7 +101,7 @@
   if (code == nullptr) {
     return 0u;
   }
-  return reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].code_size_;
+  return reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].GetCodeSize();
 }
 
 inline uint32_t OatFile::OatMethod::GetCodeOffset() const {
diff --git a/runtime/oat_file_assistant.cc b/runtime/oat_file_assistant.cc
index 4d1e1ea..6a62a16 100644
--- a/runtime/oat_file_assistant.cc
+++ b/runtime/oat_file_assistant.cc
@@ -314,7 +314,7 @@
   const OatFile::OatDexFile* oat_dex_file = file.GetOatDexFile(
       dex_location_.c_str(), dex_checksum_pointer, &error_msg);
   if (oat_dex_file == nullptr) {
-    VLOG(oat) << error_msg;
+    LOG(ERROR) << error_msg;
     return kOatDexOutOfDate;
   }
 
diff --git a/runtime/oat_quick_method_header.h b/runtime/oat_quick_method_header.h
index 4afca7d..3cdde5a 100644
--- a/runtime/oat_quick_method_header.h
+++ b/runtime/oat_quick_method_header.h
@@ -58,7 +58,7 @@
   }
 
   bool IsOptimized() const {
-    return code_size_ != 0 && vmap_table_offset_ != 0;
+    return GetCodeSize() != 0 && vmap_table_offset_ != 0;
   }
 
   const void* GetOptimizedCodeInfoPtr() const {
@@ -81,7 +81,23 @@
   }
 
   uint32_t GetCodeSize() const {
-    return code_size_;
+    return code_size_ & kCodeSizeMask;
+  }
+
+  const uint32_t* GetCodeSizeAddr() const {
+    return &code_size_;
+  }
+
+  uint32_t GetVmapTableOffset() const {
+    return vmap_table_offset_;
+  }
+
+  void SetVmapTableOffset(uint32_t offset) {
+    vmap_table_offset_ = offset;
+  }
+
+  const uint32_t* GetVmapTableOffsetAddr() const {
+    return &vmap_table_offset_;
   }
 
   const uint8_t* GetVmapTable() const {
@@ -96,7 +112,7 @@
       // On Thumb-2, the pc is offset by one.
       code_start++;
     }
-    return code_start <= pc && pc <= (code_start + code_size_);
+    return code_start <= pc && pc <= (code_start + GetCodeSize());
   }
 
   const uint8_t* GetEntryPoint() const {
@@ -130,11 +146,25 @@
 
   uint32_t ToDexPc(ArtMethod* method, const uintptr_t pc, bool abort_on_failure = true) const;
 
+  void SetHasShouldDeoptimizeFlag() {
+    DCHECK_EQ(code_size_ & kShouldDeoptimizeMask, 0u);
+    code_size_ |= kShouldDeoptimizeMask;
+  }
+
+  bool HasShouldDeoptimizeFlag() const {
+    return (code_size_ & kShouldDeoptimizeMask) != 0;
+  }
+
+ private:
+  static constexpr uint32_t kShouldDeoptimizeMask = 0x80000000;
+  static constexpr uint32_t kCodeSizeMask = ~kShouldDeoptimizeMask;
+
   // The offset in bytes from the start of the vmap table to the end of the header.
   uint32_t vmap_table_offset_;
   // The stack frame information.
   QuickMethodFrameInfo frame_info_;
-  // The code size in bytes.
+  // The code size in bytes. The highest bit is used to signify if the compiled
+  // code with the method header has should_deoptimize flag.
   uint32_t code_size_;
   // The actual code.
   uint8_t code_[0];
diff --git a/runtime/object_lock.cc b/runtime/object_lock.cc
index b8754a4..39ab52f 100644
--- a/runtime/object_lock.cc
+++ b/runtime/object_lock.cc
@@ -17,6 +17,7 @@
 #include "object_lock.h"
 
 #include "mirror/object-inl.h"
+#include "mirror/class_ext.h"
 #include "monitor.h"
 
 namespace art {
@@ -61,6 +62,7 @@
 }
 
 template class ObjectLock<mirror::Class>;
+template class ObjectLock<mirror::ClassExt>;
 template class ObjectLock<mirror::Object>;
 template class ObjectTryLock<mirror::Class>;
 template class ObjectTryLock<mirror::Object>;
diff --git a/runtime/openjdkjvmti/Android.bp b/runtime/openjdkjvmti/Android.bp
index b323aef..0f9fbb2 100644
--- a/runtime/openjdkjvmti/Android.bp
+++ b/runtime/openjdkjvmti/Android.bp
@@ -24,6 +24,7 @@
            "ti_heap.cc",
            "ti_method.cc",
            "ti_stack.cc",
+           "ti_redefine.cc",
            "transform.cc"],
     include_dirs: ["art/runtime"],
     shared_libs: [
diff --git a/runtime/openjdkjvmti/OpenjdkJvmTi.cc b/runtime/openjdkjvmti/OpenjdkJvmTi.cc
index 6480843..1ad3f08 100644
--- a/runtime/openjdkjvmti/OpenjdkJvmTi.cc
+++ b/runtime/openjdkjvmti/OpenjdkJvmTi.cc
@@ -40,15 +40,16 @@
 #include "base/mutex.h"
 #include "events-inl.h"
 #include "jni_env_ext-inl.h"
-#include "object_tagging.h"
 #include "obj_ptr-inl.h"
+#include "object_tagging.h"
 #include "runtime.h"
 #include "scoped_thread_state_change-inl.h"
-#include "thread_list.h"
 #include "thread-inl.h"
+#include "thread_list.h"
 #include "ti_class.h"
 #include "ti_heap.h"
 #include "ti_method.h"
+#include "ti_redefine.h"
 #include "ti_stack.h"
 #include "transform.h"
 
@@ -1141,6 +1142,34 @@
     return RetransformClassesWithHook(reinterpret_cast<ArtJvmTiEnv*>(env), classes, hook);
   }
 
+  static jvmtiError RedefineClassDirect(ArtJvmTiEnv* env,
+                                        jclass klass,
+                                        jint dex_size,
+                                        unsigned char* dex_file) {
+    if (!IsValidEnv(env)) {
+      return ERR(INVALID_ENVIRONMENT);
+    }
+    jvmtiError ret = OK;
+    std::string location;
+    if ((ret = GetClassLocation(env, klass, &location)) != OK) {
+      // TODO Do something more here? Maybe give log statements?
+      return ret;
+    }
+    std::string error;
+    ret = Redefiner::RedefineClass(env,
+                                    art::Runtime::Current(),
+                                    art::Thread::Current(),
+                                    klass,
+                                    location,
+                                    dex_size,
+                                    reinterpret_cast<uint8_t*>(dex_file),
+                                    &error);
+    if (ret != OK) {
+      LOG(ERROR) << "FAILURE TO REDEFINE " << error;
+    }
+    return ret;
+  }
+
   // TODO This will be called by the event handler for the art::ti Event Load Event
   static jvmtiError RetransformClassesWithHook(ArtJvmTiEnv* env,
                                                const std::vector<jclass>& classes,
@@ -1148,6 +1177,8 @@
     if (!IsValidEnv(env)) {
       return ERR(INVALID_ENVIRONMENT);
     }
+    jvmtiError res = OK;
+    std::string error;
     for (jclass klass : classes) {
       JNIEnv* jni_env = nullptr;
       jobject loader = nullptr;
@@ -1183,11 +1214,22 @@
            /*out*/&new_dex_data);
       // Check if anything actually changed.
       if ((new_data_len != 0 || new_dex_data != nullptr) && new_dex_data != dex_data) {
-        MoveTransformedFileIntoRuntime(klass, std::move(location), new_data_len, new_dex_data);
+        res = Redefiner::RedefineClass(env,
+                                       art::Runtime::Current(),
+                                       art::Thread::Current(),
+                                       klass,
+                                       location,
+                                       new_data_len,
+                                       new_dex_data,
+                                       &error);
         env->Deallocate(new_dex_data);
       }
       // Deallocate the old dex data.
       env->Deallocate(dex_data);
+      if (res != OK) {
+        LOG(ERROR) << "FAILURE TO REDEFINE " << error;
+        return res;
+      }
     }
     return OK;
   }
@@ -1238,7 +1280,10 @@
   reinterpret_cast<void*>(JvmtiFunctions::RetransformClassWithHook),
   // nullptr,  // reserved1
   JvmtiFunctions::SetEventNotificationMode,
-  nullptr,  // reserved3
+  // SPECIAL FUNCTION: RedefineClassDirect Is normally reserved3
+  // TODO Remove once we have events working.
+  reinterpret_cast<void*>(JvmtiFunctions::RedefineClassDirect),
+  // nullptr,  // reserved3
   JvmtiFunctions::GetAllThreads,
   JvmtiFunctions::SuspendThread,
   JvmtiFunctions::ResumeThread,
diff --git a/runtime/openjdkjvmti/ti_redefine.cc b/runtime/openjdkjvmti/ti_redefine.cc
new file mode 100644
index 0000000..d0349b9
--- /dev/null
+++ b/runtime/openjdkjvmti/ti_redefine.cc
@@ -0,0 +1,508 @@
+/* Copyright (C) 2016 The Android Open Source Project
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This file implements interfaces from the file jvmti.h. This implementation
+ * is licensed under the same terms as the file jvmti.h.  The
+ * copyright and license information for the file jvmti.h follows.
+ *
+ * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+#include "ti_redefine.h"
+
+#include <limits>
+
+#include "art_jvmti.h"
+#include "base/logging.h"
+#include "events-inl.h"
+#include "gc/allocation_listener.h"
+#include "instrumentation.h"
+#include "jni_env_ext-inl.h"
+#include "jvmti_allocator.h"
+#include "mirror/class.h"
+#include "mirror/class_ext.h"
+#include "mirror/object.h"
+#include "object_lock.h"
+#include "runtime.h"
+#include "ScopedLocalRef.h"
+
+namespace openjdkjvmti {
+
+// Moves dex data to an anonymous, read-only mmap'd region.
+std::unique_ptr<art::MemMap> Redefiner::MoveDataToMemMap(const std::string& original_location,
+                                                         jint data_len,
+                                                         unsigned char* dex_data,
+                                                         std::string* error_msg) {
+  std::unique_ptr<art::MemMap> map(art::MemMap::MapAnonymous(
+      art::StringPrintf("%s-transformed", original_location.c_str()).c_str(),
+      nullptr,
+      data_len,
+      PROT_READ|PROT_WRITE,
+      /*low_4gb*/false,
+      /*reuse*/false,
+      error_msg));
+  if (map == nullptr) {
+    return map;
+  }
+  memcpy(map->Begin(), dex_data, data_len);
+  // Make the dex files mmap read only. This matches how other DexFiles are mmaped and prevents
+  // programs from corrupting it.
+  map->Protect(PROT_READ);
+  return map;
+}
+
+jvmtiError Redefiner::RedefineClass(ArtJvmTiEnv* env,
+                                    art::Runtime* runtime,
+                                    art::Thread* self,
+                                    jclass klass,
+                                    const std::string& original_dex_location,
+                                    jint data_len,
+                                    unsigned char* dex_data,
+                                    std::string* error_msg) {
+  std::unique_ptr<art::MemMap> map(MoveDataToMemMap(original_dex_location,
+                                                    data_len,
+                                                    dex_data,
+                                                    error_msg));
+  std::ostringstream os;
+  char* generic_ptr_unused = nullptr;
+  char* signature_ptr = nullptr;
+  if (env->GetClassSignature(klass, &signature_ptr, &generic_ptr_unused) != OK) {
+    signature_ptr = const_cast<char*>("<UNKNOWN CLASS>");
+  }
+  if (map.get() == nullptr) {
+    os << "Failed to create anonymous mmap for modified dex file of class " << signature_ptr
+       << "in dex file " << original_dex_location << " because: " << *error_msg;
+    *error_msg = os.str();
+    return ERR(OUT_OF_MEMORY);
+  }
+  if (map->Size() < sizeof(art::DexFile::Header)) {
+    *error_msg = "Could not read dex file header because dex_data was too short";
+    return ERR(INVALID_CLASS_FORMAT);
+  }
+  uint32_t checksum = reinterpret_cast<const art::DexFile::Header*>(map->Begin())->checksum_;
+  std::unique_ptr<const art::DexFile> dex_file(art::DexFile::Open(map->GetName(),
+                                                                  checksum,
+                                                                  std::move(map),
+                                                                  /*verify*/true,
+                                                                  /*verify_checksum*/true,
+                                                                  error_msg));
+  if (dex_file.get() == nullptr) {
+    os << "Unable to load modified dex file for " << signature_ptr << ": " << *error_msg;
+    *error_msg = os.str();
+    return ERR(INVALID_CLASS_FORMAT);
+  }
+  // Get shared mutator lock.
+  art::ScopedObjectAccess soa(self);
+  art::StackHandleScope<1> hs(self);
+  Redefiner r(runtime, self, klass, signature_ptr, dex_file, error_msg);
+  // Lock around this class to avoid races.
+  art::ObjectLock<art::mirror::Class> lock(self, hs.NewHandle(r.GetMirrorClass()));
+  return r.Run();
+}
+
+// TODO *MAJOR* This should return the actual source java.lang.DexFile object for the klass.
+// TODO Make mirror of DexFile and associated types to make this less hellish.
+// TODO Make mirror of BaseDexClassLoader and associated types to make this less hellish.
+art::mirror::Object* Redefiner::FindSourceDexFileObject(
+    art::Handle<art::mirror::ClassLoader> loader) {
+  const char* dex_path_list_element_array_name = "[Ldalvik/system/DexPathList$Element;";
+  const char* dex_path_list_element_name = "Ldalvik/system/DexPathList$Element;";
+  const char* dex_file_name = "Ldalvik/system/DexFile;";
+  const char* dex_path_list_name = "Ldalvik/system/DexPathList;";
+  const char* dex_class_loader_name = "Ldalvik/system/BaseDexClassLoader;";
+
+  CHECK(!self_->IsExceptionPending());
+  art::StackHandleScope<11> hs(self_);
+  art::ClassLinker* class_linker = runtime_->GetClassLinker();
+
+  art::Handle<art::mirror::ClassLoader> null_loader(hs.NewHandle<art::mirror::ClassLoader>(
+      nullptr));
+  art::Handle<art::mirror::Class> base_dex_loader_class(hs.NewHandle(class_linker->FindClass(
+      self_, dex_class_loader_name, null_loader)));
+
+  // Get all the ArtFields so we can look in the BaseDexClassLoader
+  art::ArtField* path_list_field = base_dex_loader_class->FindDeclaredInstanceField(
+      "pathList", dex_path_list_name);
+  CHECK(path_list_field != nullptr);
+
+  art::ArtField* dex_path_list_element_field =
+      class_linker->FindClass(self_, dex_path_list_name, null_loader)
+        ->FindDeclaredInstanceField("dexElements", dex_path_list_element_array_name);
+  CHECK(dex_path_list_element_field != nullptr);
+
+  art::ArtField* element_dex_file_field =
+      class_linker->FindClass(self_, dex_path_list_element_name, null_loader)
+        ->FindDeclaredInstanceField("dexFile", dex_file_name);
+  CHECK(element_dex_file_field != nullptr);
+
+  // Check if loader is a BaseDexClassLoader
+  art::Handle<art::mirror::Class> loader_class(hs.NewHandle(loader->GetClass()));
+  if (!loader_class->IsSubClass(base_dex_loader_class.Get())) {
+    LOG(ERROR) << "The classloader is not a BaseDexClassLoader which is currently the only "
+               << "supported class loader type!";
+    return nullptr;
+  }
+  // Start navigating the fields of the loader (now known to be a BaseDexClassLoader derivative)
+  art::Handle<art::mirror::Object> path_list(
+      hs.NewHandle(path_list_field->GetObject(loader.Get())));
+  CHECK(path_list.Get() != nullptr);
+  CHECK(!self_->IsExceptionPending());
+  art::Handle<art::mirror::ObjectArray<art::mirror::Object>> dex_elements_list(hs.NewHandle(
+      dex_path_list_element_field->GetObject(path_list.Get())->
+      AsObjectArray<art::mirror::Object>()));
+  CHECK(!self_->IsExceptionPending());
+  CHECK(dex_elements_list.Get() != nullptr);
+  size_t num_elements = dex_elements_list->GetLength();
+  art::MutableHandle<art::mirror::Object> current_element(
+      hs.NewHandle<art::mirror::Object>(nullptr));
+  art::MutableHandle<art::mirror::Object> first_dex_file(
+      hs.NewHandle<art::mirror::Object>(nullptr));
+  // Iterate over the DexPathList$Element to find the right one
+  // TODO Or not ATM just return the first one.
+  for (size_t i = 0; i < num_elements; i++) {
+    current_element.Assign(dex_elements_list->Get(i));
+    CHECK(current_element.Get() != nullptr);
+    CHECK(!self_->IsExceptionPending());
+    CHECK(dex_elements_list.Get() != nullptr);
+    CHECK_EQ(current_element->GetClass(), class_linker->FindClass(self_,
+                                                                  dex_path_list_element_name,
+                                                                  null_loader));
+    // TODO It would be cleaner to put the art::DexFile into the dalvik.system.DexFile the class
+    // comes from but it is more annoying because we would need to find this class. It is not
+    // necessary for proper function since we just need to be in front of the classes old dex file
+    // in the path.
+    first_dex_file.Assign(element_dex_file_field->GetObject(current_element.Get()));
+    if (first_dex_file.Get() != nullptr) {
+      return first_dex_file.Get();
+    }
+  }
+  return nullptr;
+}
+
+art::mirror::Class* Redefiner::GetMirrorClass() {
+  return self_->DecodeJObject(klass_)->AsClass();
+}
+
+art::mirror::ClassLoader* Redefiner::GetClassLoader() {
+  return GetMirrorClass()->GetClassLoader();
+}
+
+art::mirror::DexCache* Redefiner::CreateNewDexCache(art::Handle<art::mirror::ClassLoader> loader) {
+  return runtime_->GetClassLinker()->RegisterDexFile(*dex_file_, loader.Get());
+}
+
+// TODO Really wishing I had that mirror of java.lang.DexFile now.
+art::mirror::LongArray* Redefiner::AllocateDexFileCookie(
+    art::Handle<art::mirror::Object> java_dex_file_obj) {
+  art::StackHandleScope<2> hs(self_);
+  // mCookie is nulled out if the DexFile has been closed but mInternalCookie sticks around until
+  // the object is finalized. Since they always point to the same array if mCookie is not null we
+  // just use the mInternalCookie field. We will update one or both of these fields later.
+  // TODO Should I get the class from the classloader or directly?
+  art::ArtField* internal_cookie_field = java_dex_file_obj->GetClass()->FindDeclaredInstanceField(
+      "mInternalCookie", "Ljava/lang/Object;");
+  // TODO Add check that mCookie is either null or same as mInternalCookie
+  CHECK(internal_cookie_field != nullptr);
+  art::Handle<art::mirror::LongArray> cookie(
+      hs.NewHandle(internal_cookie_field->GetObject(java_dex_file_obj.Get())->AsLongArray()));
+  // TODO Maybe make these non-fatal.
+  CHECK(cookie.Get() != nullptr);
+  CHECK_GE(cookie->GetLength(), 1);
+  art::Handle<art::mirror::LongArray> new_cookie(
+      hs.NewHandle(art::mirror::LongArray::Alloc(self_, cookie->GetLength() + 1)));
+  if (new_cookie.Get() == nullptr) {
+    self_->AssertPendingOOMException();
+    return nullptr;
+  }
+  // Copy the oat-dex field at the start.
+  // TODO Should I clear this field?
+  // TODO This is a really crappy thing here with the first element being different.
+  new_cookie->SetWithoutChecks<false>(0, cookie->GetWithoutChecks(0));
+  new_cookie->SetWithoutChecks<false>(
+      1, static_cast<int64_t>(reinterpret_cast<intptr_t>(dex_file_.get())));
+  new_cookie->Memcpy(2, cookie.Get(), 1, cookie->GetLength() - 1);
+  return new_cookie.Get();
+}
+
+void Redefiner::RecordFailure(jvmtiError result, const std::string& error_msg) {
+  *error_msg_ = art::StringPrintf("Unable to perform redefinition of '%s': %s",
+                                  class_sig_,
+                                  error_msg.c_str());
+  result_ = result;
+}
+
+bool Redefiner::FinishRemainingAllocations(
+    /*out*/art::MutableHandle<art::mirror::ClassLoader>* source_class_loader,
+    /*out*/art::MutableHandle<art::mirror::Object>* java_dex_file_obj,
+    /*out*/art::MutableHandle<art::mirror::LongArray>* new_dex_file_cookie,
+    /*out*/art::MutableHandle<art::mirror::DexCache>* new_dex_cache) {
+  art::StackHandleScope<4> hs(self_);
+  // This shouldn't allocate
+  art::Handle<art::mirror::ClassLoader> loader(hs.NewHandle(GetClassLoader()));
+  if (loader.Get() == nullptr) {
+    // TODO Better error msg.
+    RecordFailure(ERR(INTERNAL), "Unable to find class loader!");
+    return false;
+  }
+  art::Handle<art::mirror::Object> dex_file_obj(hs.NewHandle(FindSourceDexFileObject(loader)));
+  if (dex_file_obj.Get() == nullptr) {
+    // TODO Better error msg.
+    RecordFailure(ERR(INTERNAL), "Unable to find class loader!");
+    return false;
+  }
+  art::Handle<art::mirror::LongArray> new_cookie(hs.NewHandle(AllocateDexFileCookie(dex_file_obj)));
+  if (new_cookie.Get() == nullptr) {
+    self_->AssertPendingOOMException();
+    self_->ClearException();
+    RecordFailure(ERR(OUT_OF_MEMORY), "Unable to allocate dex file array for class loader");
+    return false;
+  }
+  art::Handle<art::mirror::DexCache> dex_cache(hs.NewHandle(CreateNewDexCache(loader)));
+  if (dex_cache.Get() == nullptr) {
+    self_->AssertPendingOOMException();
+    self_->ClearException();
+    RecordFailure(ERR(OUT_OF_MEMORY), "Unable to allocate DexCache");
+    return false;
+  }
+  source_class_loader->Assign(loader.Get());
+  java_dex_file_obj->Assign(dex_file_obj.Get());
+  new_dex_file_cookie->Assign(new_cookie.Get());
+  new_dex_cache->Assign(dex_cache.Get());
+  return true;
+}
+
+jvmtiError Redefiner::Run() {
+  art::StackHandleScope<5> hs(self_);
+  // TODO We might want to have a global lock (or one based on the class being redefined at least)
+  // in order to make cleanup easier. Not a huge deal though.
+  //
+  // First we just allocate the ClassExt and its fields that we need. These can be updated
+  // atomically without any issues (since we allocate the map arrays as empty) so we don't bother
+  // doing a try loop. The other allocations we need to ensure that nothing has changed in the time
+  // between allocating them and pausing all threads before we can update them so we need to do a
+  // try loop.
+  if (!EnsureRedefinitionIsValid() || !EnsureClassAllocationsFinished()) {
+    return result_;
+  }
+  art::MutableHandle<art::mirror::ClassLoader> source_class_loader(
+      hs.NewHandle<art::mirror::ClassLoader>(nullptr));
+  art::MutableHandle<art::mirror::Object> java_dex_file(
+      hs.NewHandle<art::mirror::Object>(nullptr));
+  art::MutableHandle<art::mirror::LongArray> new_dex_file_cookie(
+      hs.NewHandle<art::mirror::LongArray>(nullptr));
+  art::MutableHandle<art::mirror::DexCache> new_dex_cache(
+      hs.NewHandle<art::mirror::DexCache>(nullptr));
+  if (!FinishRemainingAllocations(&source_class_loader,
+                                  &java_dex_file,
+                                  &new_dex_file_cookie,
+                                  &new_dex_cache)) {
+    // TODO Null out the ClassExt fields we allocated (if possible, might be racing with another
+    // redefineclass call which made it even bigger. Leak shouldn't be huge (2x array of size
+    // declared_methods_.length) but would be good to get rid of.
+    // new_dex_file_cookie & new_dex_cache should be cleaned up by the GC.
+    return result_;
+  }
+  // Get the mirror class now that we aren't allocating anymore.
+  art::Handle<art::mirror::Class> art_class(hs.NewHandle(GetMirrorClass()));
+  // Enable assertion that this thread isn't interrupted during this installation.
+  // After this we will need to do real cleanup in case of failure. Prior to this we could simply
+  // return and would let everything get cleaned up or harmlessly leaked.
+  // Do transition to final suspension
+  // TODO We might want to give this its own suspended state!
+  // TODO This isn't right. We need to change state without any chance of suspend ideally!
+  self_->TransitionFromRunnableToSuspended(art::ThreadState::kNative);
+  runtime_->GetThreadList()->SuspendAll(
+      "Final installation of redefined Class!", /*long_suspend*/true);
+  // TODO Might want to move this into a different type.
+  // Now we reach the part where we must do active cleanup if something fails.
+  // TODO We should really Retry if this fails instead of simply aborting.
+  // Set the new DexFileCookie returns the original so we can fix it back up if redefinition fails
+  art::ObjPtr<art::mirror::LongArray> original_dex_file_cookie(nullptr);
+  if (!UpdateJavaDexFile(java_dex_file.Get(),
+                         new_dex_file_cookie.Get(),
+                         &original_dex_file_cookie)) {
+    // Release suspendAll
+    runtime_->GetThreadList()->ResumeAll();
+    // Get back shared mutator lock as expected for return.
+    self_->TransitionFromSuspendedToRunnable();
+    return result_;
+  }
+  if (!UpdateClass(art_class.Get(), new_dex_cache.Get())) {
+    // TODO Should have some form of scope to do this.
+    RestoreJavaDexFile(java_dex_file.Get(), original_dex_file_cookie);
+    // Release suspendAll
+    runtime_->GetThreadList()->ResumeAll();
+    // Get back shared mutator lock as expected for return.
+    self_->TransitionFromSuspendedToRunnable();
+    return result_;
+  }
+  // Update the ClassObjects Keep the old DexCache (and other stuff) around so we can restore
+  // functions/fields.
+  // Verify the new Class.
+  //   Failure then undo updates to class
+  // Do stack walks and allocate obsolete methods
+  // Shrink the obsolete method maps if possible?
+  // TODO find appropriate class loader. Allocate new dex files array. Pause all java treads.
+  // Replace dex files array. Do stack scan + allocate obsoletes. Remove array if possible.
+  // TODO We might want to ensure that all threads are stopped for this!
+  // AddDexToClassPath();
+  // TODO
+  // Release suspendAll
+  // TODO Put this into a scoped thing.
+  runtime_->GetThreadList()->ResumeAll();
+  // Get back shared mutator lock as expected for return.
+  self_->TransitionFromSuspendedToRunnable();
+  // TODO Do this at a more reasonable place.
+  dex_file_.release();
+  return OK;
+}
+
+void Redefiner::RestoreJavaDexFile(art::ObjPtr<art::mirror::Object> java_dex_file,
+                                   art::ObjPtr<art::mirror::LongArray> orig_cookie) {
+  art::ArtField* internal_cookie_field = java_dex_file->GetClass()->FindDeclaredInstanceField(
+      "mInternalCookie", "Ljava/lang/Object;");
+  art::ArtField* cookie_field = java_dex_file->GetClass()->FindDeclaredInstanceField(
+      "mCookie", "Ljava/lang/Object;");
+  art::ObjPtr<art::mirror::LongArray> new_cookie(
+      cookie_field->GetObject(java_dex_file)->AsLongArray());
+  internal_cookie_field->SetObject<false>(java_dex_file, orig_cookie);
+  if (!new_cookie.IsNull()) {
+    cookie_field->SetObject<false>(java_dex_file, orig_cookie);
+  }
+}
+
+// Performs updates to class that will allow us to verify it.
+bool Redefiner::UpdateClass(art::ObjPtr<art::mirror::Class> mclass,
+                            art::ObjPtr<art::mirror::DexCache> new_dex_cache) {
+  art::ClassLinker* linker = runtime_->GetClassLinker();
+  art::PointerSize image_pointer_size = linker->GetImagePointerSize();
+  const art::DexFile::ClassDef* class_def = art::OatFile::OatDexFile::FindClassDef(
+      *dex_file_, class_sig_, art::ComputeModifiedUtf8Hash(class_sig_));
+  if (class_def == nullptr) {
+    RecordFailure(ERR(INVALID_CLASS_FORMAT), "Unable to find ClassDef!");
+    return false;
+  }
+  const art::DexFile::TypeId& declaring_class_id = dex_file_->GetTypeId(class_def->class_idx_);
+  const art::DexFile& old_dex_file = mclass->GetDexFile();
+  for (art::ArtMethod& method : mclass->GetMethods(image_pointer_size)) {
+    const art::DexFile::StringId* new_name_id = dex_file_->FindStringId(method.GetName());
+    art::dex::TypeIndex method_return_idx =
+        dex_file_->GetIndexForTypeId(*dex_file_->FindTypeId(method.GetReturnTypeDescriptor()));
+    const auto* old_type_list = method.GetParameterTypeList();
+    std::vector<art::dex::TypeIndex> new_type_list;
+    for (uint32_t i = 0; old_type_list != nullptr && i < old_type_list->Size(); i++) {
+      new_type_list.push_back(
+          dex_file_->GetIndexForTypeId(
+              *dex_file_->FindTypeId(
+                  old_dex_file.GetTypeDescriptor(
+                      old_dex_file.GetTypeId(
+                          old_type_list->GetTypeItem(i).type_idx_)))));
+    }
+    const art::DexFile::ProtoId* proto_id = dex_file_->FindProtoId(method_return_idx,
+                                                                   new_type_list);
+    CHECK(proto_id != nullptr || old_type_list == nullptr);
+    // TODO Return false, cleanup.
+    const art::DexFile::MethodId* method_id = dex_file_->FindMethodId(declaring_class_id,
+                                                                      *new_name_id,
+                                                                      *proto_id);
+    CHECK(method_id != nullptr);
+    // TODO Return false, cleanup.
+    uint32_t dex_method_idx = dex_file_->GetIndexForMethodId(*method_id);
+    method.SetDexMethodIndex(dex_method_idx);
+    linker->SetEntryPointsToInterpreter(&method);
+    method.SetCodeItemOffset(dex_file_->FindCodeItemOffset(*class_def, dex_method_idx));
+    method.SetDexCacheResolvedMethods(new_dex_cache->GetResolvedMethods(), image_pointer_size);
+    method.SetDexCacheResolvedTypes(new_dex_cache->GetResolvedTypes(), image_pointer_size);
+  }
+  // Update the class fields.
+  // Need to update class last since the ArtMethod gets its DexFile from the class (which is needed
+  // to call GetReturnTypeDescriptor and GetParameterTypeList above).
+  mclass->SetDexCache(new_dex_cache.Ptr());
+  mclass->SetDexCacheStrings(new_dex_cache->GetStrings());
+  mclass->SetDexClassDefIndex(dex_file_->GetIndexForClassDef(*class_def));
+  mclass->SetDexTypeIndex(dex_file_->GetIndexForTypeId(*dex_file_->FindTypeId(class_sig_)));
+  return true;
+}
+
+bool Redefiner::UpdateJavaDexFile(art::ObjPtr<art::mirror::Object> java_dex_file,
+                                  art::ObjPtr<art::mirror::LongArray> new_cookie,
+                                  /*out*/art::ObjPtr<art::mirror::LongArray>* original_cookie) {
+  art::ArtField* internal_cookie_field = java_dex_file->GetClass()->FindDeclaredInstanceField(
+      "mInternalCookie", "Ljava/lang/Object;");
+  art::ArtField* cookie_field = java_dex_file->GetClass()->FindDeclaredInstanceField(
+      "mCookie", "Ljava/lang/Object;");
+  CHECK(internal_cookie_field != nullptr);
+  art::ObjPtr<art::mirror::LongArray> orig_internal_cookie(
+      internal_cookie_field->GetObject(java_dex_file)->AsLongArray());
+  art::ObjPtr<art::mirror::LongArray> orig_cookie(
+      cookie_field->GetObject(java_dex_file)->AsLongArray());
+  internal_cookie_field->SetObject<false>(java_dex_file, new_cookie);
+  *original_cookie = orig_internal_cookie;
+  if (!orig_cookie.IsNull()) {
+    cookie_field->SetObject<false>(java_dex_file, new_cookie);
+  }
+  return true;
+}
+
+// This function does all (java) allocations we need to do for the Class being redefined.
+// TODO Change this name maybe?
+bool Redefiner::EnsureClassAllocationsFinished() {
+  art::StackHandleScope<2> hs(self_);
+  art::Handle<art::mirror::Class> klass(hs.NewHandle(self_->DecodeJObject(klass_)->AsClass()));
+  if (klass.Get() == nullptr) {
+    RecordFailure(ERR(INVALID_CLASS), "Unable to decode class argument!");
+    return false;
+  }
+  // Allocate the classExt
+  art::Handle<art::mirror::ClassExt> ext(hs.NewHandle(klass->EnsureExtDataPresent(self_)));
+  if (ext.Get() == nullptr) {
+    // No memory. Clear exception (it's not useful) and return error.
+    // TODO This doesn't need to be fatal. We could just not support obsolete methods after hitting
+    // this case.
+    self_->AssertPendingOOMException();
+    self_->ClearException();
+    RecordFailure(ERR(OUT_OF_MEMORY), "Could not allocate ClassExt");
+    return false;
+  }
+  // Allocate the 2 arrays that make up the obsolete methods map.  Since the contents of the arrays
+  // are only modified when all threads (other than the modifying one) are suspended we don't need
+  // to worry about missing the unsyncronized writes to the array. We do synchronize when setting it
+  // however, since that can happen at any time.
+  // TODO Clear these after we walk the stacks in order to free them in the (likely?) event there
+  // are no obsolete methods.
+  {
+    art::ObjectLock<art::mirror::ClassExt> lock(self_, ext);
+    if (!ext->ExtendObsoleteArrays(
+          self_, klass->GetDeclaredMethodsSlice(art::kRuntimePointerSize).size())) {
+      // OOM. Clear exception and return error.
+      self_->AssertPendingOOMException();
+      self_->ClearException();
+      RecordFailure(ERR(OUT_OF_MEMORY), "Unable to allocate/extend obsolete methods map");
+      return false;
+    }
+  }
+  return true;
+}
+
+}  // namespace openjdkjvmti
diff --git a/runtime/openjdkjvmti/ti_redefine.h b/runtime/openjdkjvmti/ti_redefine.h
new file mode 100644
index 0000000..c819acd
--- /dev/null
+++ b/runtime/openjdkjvmti/ti_redefine.h
@@ -0,0 +1,169 @@
+/* Copyright (C) 2016 The Android Open Source Project
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This file implements interfaces from the file jvmti.h. This implementation
+ * is licensed under the same terms as the file jvmti.h.  The
+ * copyright and license information for the file jvmti.h follows.
+ *
+ * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+#ifndef ART_RUNTIME_OPENJDKJVMTI_TI_REDEFINE_H_
+#define ART_RUNTIME_OPENJDKJVMTI_TI_REDEFINE_H_
+
+#include <string>
+
+#include <jni.h>
+
+#include "art_jvmti.h"
+#include "art_method.h"
+#include "class_linker.h"
+#include "dex_file.h"
+#include "gc_root-inl.h"
+#include "globals.h"
+#include "jni_env_ext-inl.h"
+#include "jvmti.h"
+#include "linear_alloc.h"
+#include "mem_map.h"
+#include "mirror/array-inl.h"
+#include "mirror/array.h"
+#include "mirror/class-inl.h"
+#include "mirror/class.h"
+#include "mirror/class_loader-inl.h"
+#include "mirror/string-inl.h"
+#include "oat_file.h"
+#include "obj_ptr.h"
+#include "scoped_thread_state_change-inl.h"
+#include "stack.h"
+#include "thread_list.h"
+#include "transform.h"
+#include "utf.h"
+#include "utils/dex_cache_arrays_layout-inl.h"
+
+namespace openjdkjvmti {
+
+// Class that can redefine a single class's methods.
+class Redefiner {
+ public:
+  // Redefine the given class with the given dex data. Note this function does not take ownership of
+  // the dex_data pointer. It is not used after this call however and may be freed if desired.
+  // The caller is responsible for freeing it. The runtime makes its own copy of the data.
+  static jvmtiError RedefineClass(ArtJvmTiEnv* env,
+                                  art::Runtime* runtime,
+                                  art::Thread* self,
+                                  jclass klass,
+                                  const std::string& original_dex_location,
+                                  jint data_len,
+                                  unsigned char* dex_data,
+                                  std::string* error_msg);
+
+ private:
+  jvmtiError result_;
+  art::Runtime* runtime_;
+  art::Thread* self_;
+  // Kept as a jclass since we have weird run-state changes that make keeping it around as a
+  // mirror::Class difficult and confusing.
+  jclass klass_;
+  std::unique_ptr<const art::DexFile> dex_file_;
+  std::string* error_msg_;
+  char* class_sig_;
+
+  // TODO Maybe change jclass to a mirror::Class
+  Redefiner(art::Runtime* runtime,
+            art::Thread* self,
+            jclass klass,
+            char* class_sig,
+            std::unique_ptr<const art::DexFile>& redefined_dex_file,
+            std::string* error_msg)
+      : result_(ERR(INTERNAL)),
+        runtime_(runtime),
+        self_(self),
+        klass_(klass),
+        dex_file_(std::move(redefined_dex_file)),
+        error_msg_(error_msg),
+        class_sig_(class_sig) { }
+
+  static std::unique_ptr<art::MemMap> MoveDataToMemMap(const std::string& original_location,
+                                                       jint data_len,
+                                                       unsigned char* dex_data,
+                                                       std::string* error_msg);
+
+  // TODO Put on all the lock qualifiers.
+  jvmtiError Run() REQUIRES_SHARED(art::Locks::mutator_lock_);
+
+  bool FinishRemainingAllocations(
+        /*out*/art::MutableHandle<art::mirror::ClassLoader>* source_class_loader,
+        /*out*/art::MutableHandle<art::mirror::Object>* source_dex_file_obj,
+        /*out*/art::MutableHandle<art::mirror::LongArray>* new_dex_file_cookie,
+        /*out*/art::MutableHandle<art::mirror::DexCache>* new_dex_cache)
+      REQUIRES_SHARED(art::Locks::mutator_lock_);
+
+  // Preallocates all needed allocations in klass so that we can pause execution safely.
+  // TODO We should be able to free the arrays if they end up not being used. Investigate doing this
+  // in the future. For now we will just take the memory hit.
+  bool EnsureClassAllocationsFinished() REQUIRES_SHARED(art::Locks::mutator_lock_);
+
+  art::mirror::ClassLoader* GetClassLoader() REQUIRES_SHARED(art::Locks::mutator_lock_);
+
+  // This finds the java.lang.DexFile we will add the native DexFile to as part of the classpath.
+  // TODO Make sure the DexFile object returned is the one that the klass_ actually comes from.
+  art::mirror::Object* FindSourceDexFileObject(art::Handle<art::mirror::ClassLoader> loader)
+      REQUIRES_SHARED(art::Locks::mutator_lock_);
+
+  art::mirror::Class* GetMirrorClass() REQUIRES_SHARED(art::Locks::mutator_lock_);
+
+  // Allocates and fills the new DexFileCookie
+  art::mirror::LongArray* AllocateDexFileCookie(art::Handle<art::mirror::Object> java_dex_file_obj)
+      REQUIRES_SHARED(art::Locks::mutator_lock_);
+
+  art::mirror::DexCache* CreateNewDexCache(art::Handle<art::mirror::ClassLoader> loader)
+      REQUIRES_SHARED(art::Locks::mutator_lock_);
+
+  void RecordFailure(jvmtiError result, const std::string& error_msg);
+
+  // TODO Actually write this.
+  // This will check that no constraints are violated (more than 1 class in dex file, any changes in
+  // number/declaration of methods & fields, changes in access flags, etc.)
+  bool EnsureRedefinitionIsValid() {
+    LOG(WARNING) << "Redefinition is not checked for validity currently";
+    return true;
+  }
+
+  bool UpdateJavaDexFile(art::ObjPtr<art::mirror::Object> java_dex_file,
+                         art::ObjPtr<art::mirror::LongArray> new_cookie,
+                         /*out*/art::ObjPtr<art::mirror::LongArray>* original_cookie)
+      REQUIRES(art::Locks::mutator_lock_);
+
+  void RestoreJavaDexFile(art::ObjPtr<art::mirror::Object> java_dex_file,
+                          art::ObjPtr<art::mirror::LongArray> original_cookie)
+      REQUIRES(art::Locks::mutator_lock_);
+
+  bool UpdateClass(art::ObjPtr<art::mirror::Class> mclass,
+                   art::ObjPtr<art::mirror::DexCache> new_dex_cache)
+      REQUIRES(art::Locks::mutator_lock_);
+};
+
+}  // namespace openjdkjvmti
+
+#endif  // ART_RUNTIME_OPENJDKJVMTI_TI_REDEFINE_H_
diff --git a/runtime/openjdkjvmti/transform.cc b/runtime/openjdkjvmti/transform.cc
index 7bb5205..f545125 100644
--- a/runtime/openjdkjvmti/transform.cc
+++ b/runtime/openjdkjvmti/transform.cc
@@ -29,8 +29,12 @@
  * questions.
  */
 
+#include <unordered_map>
+#include <unordered_set>
+
 #include "transform.h"
 
+#include "art_method.h"
 #include "class_linker.h"
 #include "dex_file.h"
 #include "dex_file_types.h"
@@ -46,6 +50,7 @@
 #include "mirror/string-inl.h"
 #include "oat_file.h"
 #include "scoped_thread_state_change-inl.h"
+#include "stack.h"
 #include "thread_list.h"
 #include "transform.h"
 #include "utf.h"
@@ -53,196 +58,22 @@
 
 namespace openjdkjvmti {
 
-static bool ReadChecksum(jint data_len, const unsigned char* dex, /*out*/uint32_t* res) {
-  if (data_len < static_cast<jint>(sizeof(art::DexFile::Header))) {
-    return false;
+jvmtiError GetClassLocation(ArtJvmTiEnv* env, jclass klass, /*out*/std::string* location) {
+  JNIEnv* jni_env = nullptr;
+  jint ret = env->art_vm->GetEnv(reinterpret_cast<void**>(&jni_env), JNI_VERSION_1_1);
+  if (ret != JNI_OK) {
+    // TODO Different error might be better?
+    return ERR(INTERNAL);
   }
-  *res = reinterpret_cast<const art::DexFile::Header*>(dex)->checksum_;
-  return true;
+  art::ScopedObjectAccess soa(jni_env);
+  art::StackHandleScope<1> hs(art::Thread::Current());
+  art::Handle<art::mirror::Class> hs_klass(hs.NewHandle(soa.Decode<art::mirror::Class>(klass)));
+  const art::DexFile& dex = hs_klass->GetDexFile();
+  *location = dex.GetLocation();
+  return OK;
 }
 
-static std::unique_ptr<art::MemMap> MoveDataToMemMap(const std::string& original_location,
-                                                      jint data_len,
-                                                      unsigned char* dex_data) {
-  std::string error_msg;
-  std::unique_ptr<art::MemMap> map(art::MemMap::MapAnonymous(
-      art::StringPrintf("%s-transformed", original_location.c_str()).c_str(),
-      nullptr,
-      data_len,
-      PROT_READ|PROT_WRITE,
-      /*low_4gb*/false,
-      /*reuse*/false,
-      &error_msg));
-  if (map == nullptr) {
-    return map;
-  }
-  memcpy(map->Begin(), dex_data, data_len);
-  map->Protect(PROT_READ);
-  return map;
-}
-
-static void InvalidateExistingMethods(art::Thread* self,
-                                      art::Handle<art::mirror::Class> klass,
-                                      art::Handle<art::mirror::DexCache> cache,
-                                      const art::DexFile* dex_file)
-    REQUIRES_SHARED(art::Locks::mutator_lock_) {
-  // Create new DexCache with new DexFile.
-  // reset dex_class_def_idx_
-  // for each method reset entry_point_from_quick_compiled_code_ to bridge
-  // for each method reset dex_code_item_offset_
-  // for each method reset dex_method_index_
-  // for each method set dex_cache_resolved_methods_ to new DexCache
-  // for each method set dex_cache_resolved_types_ to new DexCache
-  auto* runtime = art::Runtime::Current();
-  art::ClassLinker* linker = runtime->GetClassLinker();
-  art::PointerSize image_pointer_size = linker->GetImagePointerSize();
-  std::string descriptor_storage;
-  const char* descriptor = klass->GetDescriptor(&descriptor_storage);
-  // Get the new class def
-  const art::DexFile::ClassDef* class_def = art::OatFile::OatDexFile::FindClassDef(
-      *dex_file, descriptor, art::ComputeModifiedUtf8Hash(descriptor));
-  CHECK(class_def != nullptr);
-  const art::DexFile::TypeId& declaring_class_id = dex_file->GetTypeId(class_def->class_idx_);
-  art::StackHandleScope<6> hs(self);
-  const art::DexFile& old_dex_file = klass->GetDexFile();
-  for (art::ArtMethod& method : klass->GetMethods(image_pointer_size)) {
-    // Find the code_item for the method then find the dex_method_index and dex_code_item_offset to
-    // set.
-    const art::DexFile::StringId* new_name_id = dex_file->FindStringId(method.GetName());
-    art::dex::TypeIndex method_return_idx =
-        dex_file->GetIndexForTypeId(*dex_file->FindTypeId(method.GetReturnTypeDescriptor()));
-    const auto* old_type_list = method.GetParameterTypeList();
-    std::vector<art::dex::TypeIndex> new_type_list;
-    for (uint32_t i = 0; old_type_list != nullptr && i < old_type_list->Size(); i++) {
-      new_type_list.push_back(
-          dex_file->GetIndexForTypeId(
-              *dex_file->FindTypeId(
-                  old_dex_file.GetTypeDescriptor(
-                      old_dex_file.GetTypeId(
-                          old_type_list->GetTypeItem(i).type_idx_)))));
-    }
-    const art::DexFile::ProtoId* proto_id = dex_file->FindProtoId(method_return_idx,
-                                                                  new_type_list);
-    CHECK(proto_id != nullptr || old_type_list == nullptr);
-    const art::DexFile::MethodId* method_id = dex_file->FindMethodId(declaring_class_id,
-                                                                      *new_name_id,
-                                                                      *proto_id);
-    CHECK(method_id != nullptr);
-    uint32_t dex_method_idx = dex_file->GetIndexForMethodId(*method_id);
-    method.SetDexMethodIndex(dex_method_idx);
-    linker->SetEntryPointsToInterpreter(&method);
-    method.SetCodeItemOffset(dex_file->FindCodeItemOffset(*class_def, dex_method_idx));
-    method.SetDexCacheResolvedMethods(cache->GetResolvedMethods(), image_pointer_size);
-    method.SetDexCacheResolvedTypes(cache->GetResolvedTypes(), image_pointer_size);
-  }
-
-  // Update the class fields.
-  // Need to update class last since the ArtMethod gets its DexFile from the class (which is needed
-  // to call GetReturnTypeDescriptor and GetParameterTypeList above).
-  klass->SetDexCache(cache.Get());
-  klass->SetDexCacheStrings(cache->GetStrings());
-  klass->SetDexClassDefIndex(dex_file->GetIndexForClassDef(*class_def));
-  klass->SetDexTypeIndex(dex_file->GetIndexForTypeId(*dex_file->FindTypeId(descriptor)));
-}
-
-// Adds the dex file.
-static art::mirror::LongArray* InsertDexFileIntoArray(art::Thread* self,
-                                                      const art::DexFile* dex,
-                                                      art::Handle<art::mirror::LongArray>& orig)
-    REQUIRES_SHARED(art::Locks::mutator_lock_) {
-  art::StackHandleScope<1> hs(self);
-  CHECK_GE(orig->GetLength(), 1);
-  art::Handle<art::mirror::LongArray> ret(
-      hs.NewHandle(art::mirror::LongArray::Alloc(self, orig->GetLength() + 1)));
-  CHECK(ret.Get() != nullptr);
-  // Copy the oat-dex.
-  // TODO Should I clear the oatdex element?
-  ret->SetWithoutChecks<false>(0, orig->GetWithoutChecks(0));
-  ret->SetWithoutChecks<false>(1, static_cast<int64_t>(reinterpret_cast<intptr_t>(dex)));
-  ret->Memcpy(2, orig.Get(), 1, orig->GetLength() - 1);
-  return ret.Get();
-}
-
-// TODO Handle all types of class loaders.
-static bool FindDalvikSystemDexFileAndLoaderForClass(
-    art::Handle<art::mirror::Class> klass,
-    /*out*/art::mirror::Object** dex_file,
-    /*out*/art::mirror::ClassLoader** loader)
-      REQUIRES_SHARED(art::Locks::mutator_lock_) {
-  const char* dex_path_list_element_array_name = "[Ldalvik/system/DexPathList$Element;";
-  const char* dex_path_list_element_name = "Ldalvik/system/DexPathList$Element;";
-  const char* dex_file_name = "Ldalvik/system/DexFile;";
-  const char* dex_path_list_name = "Ldalvik/system/DexPathList;";
-  const char* dex_class_loader_name = "Ldalvik/system/BaseDexClassLoader;";
-
-  art::Thread* self = art::Thread::Current();
-  CHECK(!self->IsExceptionPending());
-  art::StackHandleScope<11> hs(self);
-  art::ClassLinker* class_linker = art::Runtime::Current()->GetClassLinker();
-
-  art::Handle<art::mirror::ClassLoader> null_loader(hs.NewHandle<art::mirror::ClassLoader>(
-      nullptr));
-  art::Handle<art::mirror::Class> base_dex_loader_class(hs.NewHandle(class_linker->FindClass(
-      self, dex_class_loader_name, null_loader)));
-
-  art::ArtField* path_list_field = base_dex_loader_class->FindDeclaredInstanceField(
-      "pathList", dex_path_list_name);
-  CHECK(path_list_field != nullptr);
-
-  art::ArtField* dex_path_list_element_field =
-      class_linker->FindClass(self, dex_path_list_name, null_loader)
-        ->FindDeclaredInstanceField("dexElements", dex_path_list_element_array_name);
-  CHECK(dex_path_list_element_field != nullptr);
-
-  art::ArtField* element_dex_file_field =
-      class_linker->FindClass(self, dex_path_list_element_name, null_loader)
-        ->FindDeclaredInstanceField("dexFile", dex_file_name);
-  CHECK(element_dex_file_field != nullptr);
-
-  art::Handle<art::mirror::ClassLoader> h_class_loader(hs.NewHandle(klass->GetClassLoader()));
-  art::Handle<art::mirror::Class> loader_class(hs.NewHandle(h_class_loader->GetClass()));
-  // Check if loader is a BaseDexClassLoader
-  if (!loader_class->IsSubClass(base_dex_loader_class.Get())) {
-    LOG(ERROR) << "The classloader is not a BaseDexClassLoader which is currently the only "
-               << "supported class loader type!";
-    return false;
-  }
-  art::Handle<art::mirror::Object> path_list(
-      hs.NewHandle(path_list_field->GetObject(h_class_loader.Get())));
-  CHECK(path_list.Get() != nullptr);
-  CHECK(!self->IsExceptionPending());
-  art::Handle<art::mirror::ObjectArray<art::mirror::Object>> dex_elements_list(hs.NewHandle(
-      dex_path_list_element_field->GetObject(path_list.Get())->
-      AsObjectArray<art::mirror::Object>()));
-  CHECK(!self->IsExceptionPending());
-  CHECK(dex_elements_list.Get() != nullptr);
-  size_t num_elements = dex_elements_list->GetLength();
-  art::MutableHandle<art::mirror::Object> current_element(
-      hs.NewHandle<art::mirror::Object>(nullptr));
-  art::MutableHandle<art::mirror::Object> first_dex_file(
-      hs.NewHandle<art::mirror::Object>(nullptr));
-  for (size_t i = 0; i < num_elements; i++) {
-    current_element.Assign(dex_elements_list->Get(i));
-    CHECK(current_element.Get() != nullptr);
-    CHECK(!self->IsExceptionPending());
-    CHECK(dex_elements_list.Get() != nullptr);
-    CHECK_EQ(current_element->GetClass(), class_linker->FindClass(self,
-                                                                  dex_path_list_element_name,
-                                                                  null_loader));
-    // TODO It would be cleaner to put the art::DexFile into the dalvik.system.DexFile the class
-    // comes from but it is more annoying because we would need to find this class. It is not
-    // necessary for proper function since we just need to be in front of the classes old dex file
-    // in the path.
-    first_dex_file.Assign(element_dex_file_field->GetObject(current_element.Get()));
-    if (first_dex_file.Get() != nullptr) {
-      *dex_file = first_dex_file.Get();
-      *loader = h_class_loader.Get();
-      return true;
-    }
-  }
-  return false;
-}
-
+// TODO Move this function somewhere more appropriate.
 // Gets the data surrounding the given class.
 jvmtiError GetTransformationData(ArtJvmTiEnv* env,
                                  jclass klass,
@@ -281,83 +112,4 @@
   return OK;
 }
 
-// Install the new dex file.
-// TODO do error checks for bad state (method in a stack, changes to number of methods/fields/etc).
-jvmtiError MoveTransformedFileIntoRuntime(jclass jklass,
-                                          const std::string& original_location,
-                                          jint data_len,
-                                          unsigned char* dex_data) {
-  const char* dex_file_name = "Ldalvik/system/DexFile;";
-  art::Thread* self = art::Thread::Current();
-  art::Runtime* runtime = art::Runtime::Current();
-  art::ThreadList* threads = runtime->GetThreadList();
-  art::ClassLinker* class_linker = runtime->GetClassLinker();
-  uint32_t checksum = 0;
-  if (!ReadChecksum(data_len, dex_data, &checksum)) {
-    return ERR(INVALID_CLASS_FORMAT);
-  }
-
-  std::unique_ptr<art::MemMap> map(MoveDataToMemMap(original_location, data_len, dex_data));
-  if (map.get() == nullptr) {
-    return ERR(INTERNAL);
-  }
-  std::string error_msg;
-  // Load the new dex_data in memory (mmap it, etc)
-  std::unique_ptr<const art::DexFile> new_dex_file = art::DexFile::Open(map->GetName(),
-                                                                        checksum,
-                                                                        std::move(map),
-                                                                        /*verify*/ true,
-                                                                        /*verify_checksum*/ true,
-                                                                        &error_msg);
-  CHECK(new_dex_file.get() != nullptr) << "Unable to load dex file! " << error_msg;
-
-  // Get mutator lock. We need the lifetimes of these variables (hs, the classes, etc.) to be longer
-  // then current lock (since there isn't upgrading of the lock) so we don't use soa.
-  art::ThreadState old_state = self->TransitionFromSuspendedToRunnable();
-  // This scope is needed to make sure that the HandleScope dies with mutator_lock_ since we need to
-  // upgrade the mutator_lock during the execution.
-  {
-    art::StackHandleScope<11> hs(self);
-    art::Handle<art::mirror::ClassLoader> null_loader(
-        hs.NewHandle<art::mirror::ClassLoader>(nullptr));
-    CHECK(null_loader.Get() == nullptr);
-    art::ArtField* dex_file_cookie_field = class_linker->
-        FindClass(self, dex_file_name, null_loader)->
-        FindDeclaredInstanceField("mCookie", "Ljava/lang/Object;");
-    art::ArtField* dex_file_internal_cookie_field =
-        class_linker->FindClass(self, dex_file_name, null_loader)
-          ->FindDeclaredInstanceField("mInternalCookie", "Ljava/lang/Object;");
-    CHECK(dex_file_cookie_field != nullptr);
-    art::Handle<art::mirror::Class> klass(hs.NewHandle(self->DecodeJObject(jklass)->AsClass()));
-    art::mirror::Object* dex_file_ptr = nullptr;
-    art::mirror::ClassLoader* class_loader_ptr = nullptr;
-    // Find dalvik.system.DexFile that represents the dex file we are changing.
-    if (!FindDalvikSystemDexFileAndLoaderForClass(klass, &dex_file_ptr, &class_loader_ptr)) {
-      self->TransitionFromRunnableToSuspended(old_state);
-      LOG(ERROR) << "Could not find DexFile.";
-      return ERR(INTERNAL);
-    }
-    art::Handle<art::mirror::Object> dex_file_obj(hs.NewHandle(dex_file_ptr));
-    art::Handle<art::mirror::ClassLoader> class_loader(hs.NewHandle(class_loader_ptr));
-    art::Handle<art::mirror::LongArray> art_dex_array(
-        hs.NewHandle<art::mirror::LongArray>(
-            dex_file_cookie_field->GetObject(dex_file_obj.Get())->AsLongArray()));
-    art::Handle<art::mirror::LongArray> new_art_dex_array(
-        hs.NewHandle<art::mirror::LongArray>(
-            InsertDexFileIntoArray(self, new_dex_file.get(), art_dex_array)));
-    art::Handle<art::mirror::DexCache> cache(
-        hs.NewHandle(class_linker->RegisterDexFile(*new_dex_file.get(), class_loader.Get())));
-    self->TransitionFromRunnableToSuspended(old_state);
-
-    threads->SuspendAll("moving dex file into runtime", /*long_suspend*/true);
-    // Change the mCookie field. Old value will be GC'd as normal.
-    dex_file_cookie_field->SetObject<false>(dex_file_obj.Get(), new_art_dex_array.Get());
-    dex_file_internal_cookie_field->SetObject<false>(dex_file_obj.Get(), new_art_dex_array.Get());
-    // Invalidate existing methods.
-    InvalidateExistingMethods(self, klass, cache, new_dex_file.release());
-  }
-  threads->ResumeAll();
-  return OK;
-}
-
 }  // namespace openjdkjvmti
diff --git a/runtime/openjdkjvmti/transform.h b/runtime/openjdkjvmti/transform.h
index a76ed93..0ad5099 100644
--- a/runtime/openjdkjvmti/transform.h
+++ b/runtime/openjdkjvmti/transform.h
@@ -41,6 +41,8 @@
 
 namespace openjdkjvmti {
 
+jvmtiError GetClassLocation(ArtJvmTiEnv* env, jclass klass, /*out*/std::string* location);
+
 // Gets the data surrounding the given class.
 jvmtiError GetTransformationData(ArtJvmTiEnv* env,
                                  jclass klass,
@@ -52,12 +54,6 @@
                                  /*out*/jint* data_len,
                                  /*out*/unsigned char** dex_data);
 
-// Install the new dex file.
-jvmtiError MoveTransformedFileIntoRuntime(jclass jklass,
-                                          const std::string& original_location,
-                                          jint data_len,
-                                          unsigned char* dex_data);
-
 }  // namespace openjdkjvmti
 
 #endif  // ART_RUNTIME_OPENJDKJVMTI_TRANSFORM_H_
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 8551791..66bb803 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -62,6 +62,7 @@
 #include "base/stl_util.h"
 #include "base/systrace.h"
 #include "base/unix_file/fd_file.h"
+#include "cha.h"
 #include "class_linker-inl.h"
 #include "compiler_callbacks.h"
 #include "debugger.h"
@@ -349,6 +350,7 @@
   delete monitor_list_;
   delete monitor_pool_;
   delete class_linker_;
+  delete cha_;
   delete heap_;
   delete intern_table_;
   delete oat_file_manager_;
@@ -1203,6 +1205,7 @@
 
   CHECK_GE(GetHeap()->GetContinuousSpaces().size(), 1U);
   class_linker_ = new ClassLinker(intern_table_);
+  cha_ = new ClassHierarchyAnalysis;
   if (GetHeap()->HasBootImageSpace()) {
     bool result = class_linker_->InitFromBootImage(&error_msg);
     if (!result) {
@@ -1733,9 +1736,24 @@
   }
 }
 
+static ArtMethod* CreateRuntimeMethod(ClassLinker* class_linker, LinearAlloc* linear_alloc) {
+  const PointerSize image_pointer_size = class_linker->GetImagePointerSize();
+  const size_t method_alignment = ArtMethod::Alignment(image_pointer_size);
+  const size_t method_size = ArtMethod::Size(image_pointer_size);
+  LengthPrefixedArray<ArtMethod>* method_array = class_linker->AllocArtMethodArray(
+      Thread::Current(),
+      linear_alloc,
+      1);
+  ArtMethod* method = &method_array->At(0, method_size, method_alignment);
+  CHECK(method != nullptr);
+  method->SetDexMethodIndex(DexFile::kDexNoIndex);
+  CHECK(method->IsRuntimeMethod());
+  return method;
+}
+
 ArtMethod* Runtime::CreateImtConflictMethod(LinearAlloc* linear_alloc) {
   ClassLinker* const class_linker = GetClassLinker();
-  ArtMethod* method = class_linker->CreateRuntimeMethod(linear_alloc);
+  ArtMethod* method = CreateRuntimeMethod(class_linker, linear_alloc);
   // When compiling, the code pointer will get set later when the image is loaded.
   const PointerSize pointer_size = GetInstructionSetPointerSize(instruction_set_);
   if (IsAotCompiler()) {
@@ -1756,7 +1774,7 @@
 }
 
 ArtMethod* Runtime::CreateResolutionMethod() {
-  auto* method = GetClassLinker()->CreateRuntimeMethod(GetLinearAlloc());
+  auto* method = CreateRuntimeMethod(GetClassLinker(), GetLinearAlloc());
   // When compiling, the code pointer will get set later when the image is loaded.
   if (IsAotCompiler()) {
     PointerSize pointer_size = GetInstructionSetPointerSize(instruction_set_);
@@ -1768,7 +1786,7 @@
 }
 
 ArtMethod* Runtime::CreateCalleeSaveMethod() {
-  auto* method = GetClassLinker()->CreateRuntimeMethod(GetLinearAlloc());
+  auto* method = CreateRuntimeMethod(GetClassLinker(), GetLinearAlloc());
   PointerSize pointer_size = GetInstructionSetPointerSize(instruction_set_);
   method->SetEntryPointFromQuickCompiledCodePtrSize(nullptr, pointer_size);
   DCHECK_NE(instruction_set_, kNone);
@@ -2032,7 +2050,8 @@
   preinitialization_transaction_->RecordWeakStringRemoval(s);
 }
 
-void Runtime::RecordResolveString(ObjPtr<mirror::DexCache> dex_cache, uint32_t string_idx) const {
+void Runtime::RecordResolveString(ObjPtr<mirror::DexCache> dex_cache,
+                                  dex::StringIndex string_idx) const {
   DCHECK(IsAotCompiler());
   DCHECK(IsActiveTransaction());
   preinitialization_transaction_->RecordResolveString(dex_cache, string_idx);
diff --git a/runtime/runtime.h b/runtime/runtime.h
index de5a356..e6b3128 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -28,6 +28,7 @@
 
 #include "arch/instruction_set.h"
 #include "base/macros.h"
+#include "dex_file_types.h"
 #include "experimental_flags.h"
 #include "gc_root.h"
 #include "instrumentation.h"
@@ -75,6 +76,7 @@
 }  // namespace verifier
 class ArenaPool;
 class ArtMethod;
+class ClassHierarchyAnalysis;
 class ClassLinker;
 class Closure;
 class CompilerCallbacks;
@@ -520,7 +522,7 @@
       REQUIRES(Locks::intern_table_lock_);
   void RecordWeakStringRemoval(ObjPtr<mirror::String> s) const
       REQUIRES(Locks::intern_table_lock_);
-  void RecordResolveString(ObjPtr<mirror::DexCache> dex_cache, uint32_t string_idx) const
+  void RecordResolveString(ObjPtr<mirror::DexCache> dex_cache, dex::StringIndex string_idx) const
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   void SetFaultMessage(const std::string& message) REQUIRES(!fault_message_lock_);
@@ -673,6 +675,10 @@
   void AddSystemWeakHolder(gc::AbstractSystemWeakHolder* holder);
   void RemoveSystemWeakHolder(gc::AbstractSystemWeakHolder* holder);
 
+  ClassHierarchyAnalysis* GetClassHierarchyAnalysis() {
+    return cha_;
+  }
+
   NO_RETURN
   static void Aborter(const char* abort_message);
 
@@ -920,6 +926,8 @@
   // Generic system-weak holders.
   std::vector<gc::AbstractSystemWeakHolder*> system_weak_holders_;
 
+  ClassHierarchyAnalysis* cha_;
+
   DISALLOW_COPY_AND_ASSIGN(Runtime);
 };
 std::ostream& operator<<(std::ostream& os, const Runtime::CalleeSaveType& rhs);
diff --git a/runtime/stack.cc b/runtime/stack.cc
index 167a30b..f20aa20 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -648,7 +648,7 @@
     return;
   }
 
-  uint32_t code_size = OatQuickMethodHeader::FromEntryPoint(code)->code_size_;
+  uint32_t code_size = OatQuickMethodHeader::FromEntryPoint(code)->GetCodeSize();
   uintptr_t code_start = reinterpret_cast<uintptr_t>(code);
   CHECK(code_start <= pc && pc <= (code_start + code_size))
       << method->PrettyMethod()
diff --git a/runtime/stack.h b/runtime/stack.h
index e879214..d02e4b7 100644
--- a/runtime/stack.h
+++ b/runtime/stack.h
@@ -66,6 +66,11 @@
 struct ShadowFrameDeleter;
 using ShadowFrameAllocaUniquePtr = std::unique_ptr<ShadowFrame, ShadowFrameDeleter>;
 
+// Size in bytes of the should_deoptimize flag on stack.
+// We just need 4 bytes for our purpose regardless of the architecture. Frame size
+// calculation will automatically do alignment for the final frame size.
+static constexpr size_t kShouldDeoptimizeFlagSize = 4;
+
 // Counting locks by storing object pointers into a vector. Duplicate entries mark recursive locks.
 // The vector will be visited with the ShadowFrame during GC (so all the locked-on objects are
 // thread roots).
diff --git a/runtime/string_reference.h b/runtime/string_reference.h
index c75c218..0fc06e6 100644
--- a/runtime/string_reference.h
+++ b/runtime/string_reference.h
@@ -21,20 +21,22 @@
 
 #include "base/logging.h"
 #include "dex_file-inl.h"
+#include "dex_file_types.h"
 #include "utf-inl.h"
 
 namespace art {
 
 // A string is located by its DexFile and the string_ids_ table index into that DexFile.
 struct StringReference {
-  StringReference(const DexFile* file, uint32_t index) : dex_file(file), string_index(index) { }
+  StringReference(const DexFile* file, dex::StringIndex index)
+      : dex_file(file), string_index(index) { }
 
   const char* GetStringData() const {
     return dex_file->GetStringData(dex_file->GetStringId(string_index));
   }
 
   const DexFile* dex_file;
-  uint32_t string_index;
+  dex::StringIndex string_index;
 };
 
 // Compare only the reference and not the string contents.
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 65c8681..17f5513 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -122,21 +122,26 @@
   CHECK(kUseReadBarrier);
   tls32_.is_gc_marking = is_marking;
   UpdateReadBarrierEntrypoints(&tlsPtr_.quick_entrypoints, is_marking);
+  ResetQuickAllocEntryPointsForThread(is_marking);
 }
 
 void Thread::InitTlsEntryPoints() {
   // Insert a placeholder so we can easily tell if we call an unimplemented entry point.
   uintptr_t* begin = reinterpret_cast<uintptr_t*>(&tlsPtr_.jni_entrypoints);
-  uintptr_t* end = reinterpret_cast<uintptr_t*>(reinterpret_cast<uint8_t*>(&tlsPtr_.quick_entrypoints) +
-      sizeof(tlsPtr_.quick_entrypoints));
+  uintptr_t* end = reinterpret_cast<uintptr_t*>(
+      reinterpret_cast<uint8_t*>(&tlsPtr_.quick_entrypoints) + sizeof(tlsPtr_.quick_entrypoints));
   for (uintptr_t* it = begin; it != end; ++it) {
     *it = reinterpret_cast<uintptr_t>(UnimplementedEntryPoint);
   }
   InitEntryPoints(&tlsPtr_.jni_entrypoints, &tlsPtr_.quick_entrypoints);
 }
 
-void Thread::ResetQuickAllocEntryPointsForThread() {
-  ResetQuickAllocEntryPoints(&tlsPtr_.quick_entrypoints);
+void Thread::ResetQuickAllocEntryPointsForThread(bool is_marking) {
+  if (kUseReadBarrier && kRuntimeISA != kX86_64) {
+    // Allocation entrypoint switching is currently only implemented for X86_64.
+    is_marking = true;
+  }
+  ResetQuickAllocEntryPoints(&tlsPtr_.quick_entrypoints, is_marking);
 }
 
 class DeoptimizationContextRecord {
@@ -1389,6 +1394,7 @@
     os << "  | group=\"" << group_name << "\""
        << " sCount=" << thread->tls32_.suspend_count
        << " dsCount=" << thread->tls32_.debug_suspend_count
+       << " flags=" << thread->tls32_.state_and_flags.as_struct.flags
        << " obj=" << reinterpret_cast<void*>(thread->tlsPtr_.opeer)
        << " self=" << reinterpret_cast<const void*>(thread) << "\n";
   }
@@ -3170,4 +3176,8 @@
   tlsPtr_.exception = new_exception.Ptr();
 }
 
+bool Thread::IsAotCompiler() {
+  return Runtime::Current()->IsAotCompiler();
+}
+
 }  // namespace art
diff --git a/runtime/thread.h b/runtime/thread.h
index 97093a6..b80fdc7 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -33,13 +33,13 @@
 #include "base/mutex.h"
 #include "entrypoints/jni/jni_entrypoints.h"
 #include "entrypoints/quick/quick_entrypoints.h"
+#include "gc_root.h"
 #include "globals.h"
 #include "handle_scope.h"
 #include "instrumentation.h"
 #include "jvalue.h"
 #include "object_callbacks.h"
 #include "offsets.h"
-#include "runtime.h"
 #include "runtime_stats.h"
 #include "stack.h"
 #include "thread_state.h"
@@ -87,7 +87,6 @@
 class JavaVMExt;
 struct JNIEnvExt;
 class Monitor;
-class Runtime;
 class ScopedObjectAccessAlreadyRunnable;
 class ShadowFrame;
 class SingleStepControl;
@@ -949,17 +948,17 @@
   }
 
   std::vector<ArtMethod*>* GetStackTraceSample() const {
-    DCHECK(!Runtime::Current()->IsAotCompiler());
+    DCHECK(!IsAotCompiler());
     return tlsPtr_.deps_or_stack_trace_sample.stack_trace_sample;
   }
 
   void SetStackTraceSample(std::vector<ArtMethod*>* sample) {
-    DCHECK(!Runtime::Current()->IsAotCompiler());
+    DCHECK(!IsAotCompiler());
     tlsPtr_.deps_or_stack_trace_sample.stack_trace_sample = sample;
   }
 
   verifier::VerifierDeps* GetVerifierDeps() const {
-    DCHECK(Runtime::Current()->IsAotCompiler());
+    DCHECK(IsAotCompiler());
     return tlsPtr_.deps_or_stack_trace_sample.verifier_deps;
   }
 
@@ -967,7 +966,7 @@
   // entry in the thread is cleared before destruction of the actual VerifierDeps
   // object, or the thread.
   void SetVerifierDeps(verifier::VerifierDeps* verifier_deps) {
-    DCHECK(Runtime::Current()->IsAotCompiler());
+    DCHECK(IsAotCompiler());
     DCHECK(verifier_deps == nullptr || tlsPtr_.deps_or_stack_trace_sample.verifier_deps == nullptr);
     tlsPtr_.deps_or_stack_trace_sample.verifier_deps = verifier_deps;
   }
@@ -1007,7 +1006,7 @@
     tls32_.state_and_flags.as_atomic_int.FetchAndAndSequentiallyConsistent(-1 ^ flag);
   }
 
-  void ResetQuickAllocEntryPointsForThread();
+  void ResetQuickAllocEntryPointsForThread(bool is_marking);
 
   // Returns the remaining space in the TLAB.
   size_t TlabSize() const;
@@ -1246,6 +1245,8 @@
   // Install the protected region for implicit stack checks.
   void InstallImplicitProtection();
 
+  static bool IsAotCompiler();
+
   // 32 bits of atomically changed state and flags. Keeping as 32 bits allows and atomic CAS to
   // change from being Suspended to Runnable without a suspend request occurring.
   union PACKED(4) StateAndFlags {
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index 27fb37a..a6bd83d 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -375,7 +375,7 @@
   return count;
 }
 
-size_t ThreadList::RunEmptyCheckpoint() {
+size_t ThreadList::RunEmptyCheckpoint(std::vector<uint32_t>& runnable_thread_ids) {
   Thread* self = Thread::Current();
   Locks::mutator_lock_->AssertNotExclusiveHeld(self);
   Locks::thread_list_lock_->AssertNotHeld(self);
@@ -392,6 +392,9 @@
             // This thread will run an empty checkpoint (decrement the empty checkpoint barrier)
             // some time in the near future.
             ++count;
+            if (kIsDebugBuild) {
+              runnable_thread_ids.push_back(thread->GetThreadId());
+            }
             break;
           }
           if (thread->GetState() != kRunnable) {
diff --git a/runtime/thread_list.h b/runtime/thread_list.h
index 133d430..1acabcb 100644
--- a/runtime/thread_list.h
+++ b/runtime/thread_list.h
@@ -27,6 +27,7 @@
 
 #include <bitset>
 #include <list>
+#include <vector>
 
 namespace art {
 namespace gc {
@@ -106,7 +107,9 @@
   // in-flight mutator heap access (eg. a read barrier.) Runnable threads will respond by
   // decrementing the empty checkpoint barrier count. This works even when the weak ref access is
   // disabled. Only one concurrent use is currently supported.
-  size_t RunEmptyCheckpoint()
+  // In debug build, runnable_thread_ids will be populated with the thread IDS of the runnable
+  // thread to wait for.
+  size_t RunEmptyCheckpoint(std::vector<uint32_t>& runnable_thread_ids)
       REQUIRES(!Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_);
 
   size_t RunCheckpointOnRunnableThreads(Closure* checkpoint_function)
diff --git a/runtime/transaction.cc b/runtime/transaction.cc
index c5da5d2..2536968 100644
--- a/runtime/transaction.cc
+++ b/runtime/transaction.cc
@@ -167,9 +167,10 @@
   array_log.LogValue(index, value);
 }
 
-void Transaction::RecordResolveString(ObjPtr<mirror::DexCache> dex_cache, uint32_t string_idx) {
+void Transaction::RecordResolveString(ObjPtr<mirror::DexCache> dex_cache,
+                                      dex::StringIndex string_idx) {
   DCHECK(dex_cache != nullptr);
-  DCHECK_LT(string_idx, dex_cache->GetDexFile()->NumStringIds());
+  DCHECK_LT(string_idx.index_, dex_cache->GetDexFile()->NumStringIds());
   MutexLock mu(Thread::Current(), log_lock_);
   resolve_string_logs_.push_back(ResolveStringLog(dex_cache, string_idx));
 }
@@ -510,11 +511,11 @@
 }
 
 Transaction::ResolveStringLog::ResolveStringLog(ObjPtr<mirror::DexCache> dex_cache,
-                                                uint32_t string_idx)
+                                                dex::StringIndex string_idx)
     : dex_cache_(dex_cache),
       string_idx_(string_idx) {
   DCHECK(dex_cache != nullptr);
-  DCHECK_LT(string_idx_, dex_cache->GetDexFile()->NumStringIds());
+  DCHECK_LT(string_idx_.index_, dex_cache->GetDexFile()->NumStringIds());
 }
 
 void Transaction::ResolveStringLog::VisitRoots(RootVisitor* visitor) {
diff --git a/runtime/transaction.h b/runtime/transaction.h
index 2ec2f50..1774657 100644
--- a/runtime/transaction.h
+++ b/runtime/transaction.h
@@ -20,6 +20,7 @@
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "base/value_object.h"
+#include "dex_file_types.h"
 #include "gc_root.h"
 #include "object_callbacks.h"
 #include "offsets.h"
@@ -97,7 +98,7 @@
       REQUIRES(!log_lock_);
 
   // Record resolve string.
-  void RecordResolveString(ObjPtr<mirror::DexCache> dex_cache, uint32_t string_idx)
+  void RecordResolveString(ObjPtr<mirror::DexCache> dex_cache, dex::StringIndex string_idx)
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!log_lock_);
 
@@ -197,7 +198,7 @@
 
   class ResolveStringLog : public ValueObject {
    public:
-    ResolveStringLog(ObjPtr<mirror::DexCache> dex_cache, uint32_t string_idx);
+    ResolveStringLog(ObjPtr<mirror::DexCache> dex_cache, dex::StringIndex string_idx);
 
     void Undo() REQUIRES_SHARED(Locks::mutator_lock_);
 
@@ -205,7 +206,7 @@
 
    private:
     GcRoot<mirror::DexCache> dex_cache_;
-    const uint32_t string_idx_;
+    const dex::StringIndex string_idx_;
   };
 
   void LogInternedString(const InternStringLog& log)
diff --git a/runtime/transaction_test.cc b/runtime/transaction_test.cc
index 77c2b76..a43c967 100644
--- a/runtime/transaction_test.cc
+++ b/runtime/transaction_test.cc
@@ -26,8 +26,6 @@
 
 namespace art {
 
-static const size_t kDexNoIndex = DexFile::kDexNoIndex;  // Make copy to prevent linking errors.
-
 class TransactionTest : public CommonRuntimeTest {
  public:
   // Tests failing class initialization due to native call with transaction rollback.
@@ -507,8 +505,8 @@
   static const char* kResolvedString = "ResolvedString";
   const DexFile::StringId* string_id = dex_file->FindStringId(kResolvedString);
   ASSERT_TRUE(string_id != nullptr);
-  uint32_t string_idx = dex_file->GetIndexForStringId(*string_id);
-  ASSERT_NE(string_idx, kDexNoIndex);
+  dex::StringIndex string_idx = dex_file->GetIndexForStringId(*string_id);
+  ASSERT_TRUE(string_idx.IsValid());
   // String should only get resolved by the initializer.
   EXPECT_TRUE(class_linker_->LookupString(*dex_file, string_idx, h_dex_cache) == nullptr);
   EXPECT_TRUE(h_dex_cache->GetResolvedString(string_idx) == nullptr);
diff --git a/runtime/vdex_file.cc b/runtime/vdex_file.cc
index 843be92..dabf8c8 100644
--- a/runtime/vdex_file.cc
+++ b/runtime/vdex_file.cc
@@ -35,10 +35,12 @@
   return (memcmp(version_, kVdexVersion, sizeof(kVdexVersion)) == 0);
 }
 
-VdexFile::Header::Header(uint32_t dex_size,
+VdexFile::Header::Header(uint32_t number_of_dex_files,
+                         uint32_t dex_size,
                          uint32_t verifier_deps_size,
                          uint32_t quickening_info_size)
-    : dex_size_(dex_size),
+    : number_of_dex_files_(number_of_dex_files),
+      dex_size_(dex_size),
       verifier_deps_size_(verifier_deps_size),
       quickening_info_size_(quickening_info_size) {
   memcpy(magic_, kVdexMagic, sizeof(kVdexMagic));
diff --git a/runtime/vdex_file.h b/runtime/vdex_file.h
index 75a0d5e..3b114a9 100644
--- a/runtime/vdex_file.h
+++ b/runtime/vdex_file.h
@@ -43,7 +43,10 @@
  public:
   struct Header {
    public:
-    Header(uint32_t dex_size, uint32_t verifier_deps_size, uint32_t quickening_info_size);
+    Header(uint32_t number_of_dex_files_,
+           uint32_t dex_size,
+           uint32_t verifier_deps_size,
+           uint32_t quickening_info_size);
 
     const char* GetMagic() const { return reinterpret_cast<const char*>(magic_); }
     const char* GetVersion() const { return reinterpret_cast<const char*>(version_); }
@@ -54,18 +57,22 @@
     uint32_t GetDexSize() const { return dex_size_; }
     uint32_t GetVerifierDepsSize() const { return verifier_deps_size_; }
     uint32_t GetQuickeningInfoSize() const { return quickening_info_size_; }
+    uint32_t GetNumberOfDexFiles() const { return number_of_dex_files_; }
 
    private:
     static constexpr uint8_t kVdexMagic[] = { 'v', 'd', 'e', 'x' };
-    static constexpr uint8_t kVdexVersion[] = { '0', '0', '0', '\0' };
+    static constexpr uint8_t kVdexVersion[] = { '0', '0', '1', '\0' };
 
     uint8_t magic_[4];
     uint8_t version_[4];
+    uint32_t number_of_dex_files_;
     uint32_t dex_size_;
     uint32_t verifier_deps_size_;
     uint32_t quickening_info_size_;
   };
 
+  typedef uint32_t VdexChecksum;
+
   static VdexFile* Open(const std::string& vdex_filename,
                         bool writable,
                         bool low_4gb,
@@ -88,7 +95,7 @@
 
   ArrayRef<const uint8_t> GetVerifierDepsData() const {
     return ArrayRef<const uint8_t>(
-        Begin() + sizeof(Header) + GetHeader().GetDexSize(), GetHeader().GetVerifierDepsSize());
+        DexBegin() + GetHeader().GetDexSize(), GetHeader().GetVerifierDepsSize());
   }
 
   ArrayRef<const uint8_t> GetQuickeningInfo() const {
@@ -107,6 +114,12 @@
   // is none.
   const uint8_t* GetNextDexFileData(const uint8_t* cursor) const;
 
+  // Get the location checksum of the dex file number `dex_file_index`.
+  uint32_t GetLocationChecksum(uint32_t dex_file_index) const {
+    DCHECK_LT(dex_file_index, GetHeader().GetNumberOfDexFiles());
+    return reinterpret_cast<const uint32_t*>(Begin() + sizeof(Header))[dex_file_index];
+  }
+
  private:
   explicit VdexFile(MemMap* mmap) : mmap_(mmap) {}
 
@@ -115,11 +128,15 @@
   }
 
   const uint8_t* DexBegin() const {
-    return Begin() + sizeof(Header);
+    return Begin() + sizeof(Header) + GetSizeOfChecksumsSection();
   }
 
   const uint8_t* DexEnd() const {
-    return Begin() + sizeof(Header) + GetHeader().GetDexSize();
+    return DexBegin() + GetHeader().GetDexSize();
+  }
+
+  size_t GetSizeOfChecksumsSection() const {
+    return sizeof(VdexChecksum) * GetHeader().GetNumberOfDexFiles();
   }
 
   std::unique_ptr<MemMap> mmap_;
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index 7137db8..ebecc85 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -41,6 +41,7 @@
 #include "mirror/class.h"
 #include "mirror/class-inl.h"
 #include "mirror/dex_cache-inl.h"
+#include "mirror/method_handle_impl.h"
 #include "mirror/object-inl.h"
 #include "mirror/object_array-inl.h"
 #include "reg_type-inl.h"
@@ -410,15 +411,15 @@
       result.kind = kSoftFailure;
       if (method != nullptr &&
           !CanCompilerHandleVerificationFailure(verifier.encountered_failure_types_)) {
-        method->SetAccessFlags(method->GetAccessFlags() | kAccCompileDontBother);
+        method->AddAccessFlags(kAccCompileDontBother);
       }
     }
     if (method != nullptr) {
       if (verifier.HasInstructionThatWillThrow()) {
-        method->SetAccessFlags(method->GetAccessFlags() | kAccCompileDontBother);
+        method->AddAccessFlags(kAccCompileDontBother);
       }
       if ((verifier.encountered_failure_types_ & VerifyError::VERIFY_ERROR_LOCKING) != 0) {
-        method->SetAccessFlags(method->GetAccessFlags() | kAccMustCountLocks);
+        method->AddAccessFlags(kAccMustCountLocks);
       }
     }
   } else {
@@ -1184,6 +1185,11 @@
       result = result && CheckWideRegisterIndex(inst->VRegC());
       break;
   }
+  switch (inst->GetVerifyTypeArgumentH()) {
+    case Instruction::kVerifyRegHPrototype:
+      result = result && CheckPrototypeIndex(inst->VRegH());
+      break;
+  }
   switch (inst->GetVerifyExtraFlags()) {
     case Instruction::kVerifyArrayData:
       result = result && CheckArrayData(code_offset);
@@ -1289,6 +1295,15 @@
   return true;
 }
 
+inline bool MethodVerifier::CheckPrototypeIndex(uint32_t idx) {
+  if (idx >= dex_file_->GetHeader().proto_ids_size_) {
+    Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "bad prototype index " << idx << " (max "
+                                      << dex_file_->GetHeader().proto_ids_size_ << ")";
+    return false;
+  }
+  return true;
+}
+
 inline bool MethodVerifier::CheckStringIndex(uint32_t idx) {
   if (idx >= dex_file_->GetHeader().string_ids_size_) {
     Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "bad string index " << idx << " (max "
@@ -2934,7 +2949,7 @@
          * allowing the latter only if the "this" argument is the same as the "this" argument to
          * this method (which implies that we're in a constructor ourselves).
          */
-        const RegType& this_type = work_line_->GetInvocationThis(this, inst, is_range);
+        const RegType& this_type = work_line_->GetInvocationThis(this, inst);
         if (this_type.IsConflict())  // failure.
           break;
 
@@ -3015,7 +3030,7 @@
       /* Get the type of the "this" arg, which should either be a sub-interface of called
        * interface or Object (see comments in RegType::JoinClass).
        */
-      const RegType& this_type = work_line_->GetInvocationThis(this, inst, is_range);
+      const RegType& this_type = work_line_->GetInvocationThis(this, inst);
       if (this_type.IsZero()) {
         /* null pointer always passes (and always fails at runtime) */
       } else {
@@ -3057,10 +3072,37 @@
     }
     case Instruction::INVOKE_POLYMORPHIC:
     case Instruction::INVOKE_POLYMORPHIC_RANGE: {
+      bool is_range = (inst->Opcode() == Instruction::INVOKE_POLYMORPHIC_RANGE);
+      ArtMethod* called_method = VerifyInvocationArgs(inst, METHOD_POLYMORPHIC, is_range);
+      if (called_method == nullptr) {
+        // Convert potential soft failures in VerifyInvocationArgs() to hard errors.
+        if (failure_messages_.size() > 0) {
+          std::string message = failure_messages_.back()->str();
+          Fail(VERIFY_ERROR_BAD_CLASS_HARD) << message;
+        } else {
+          Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "invoke-polymorphic verification failure.";
+        }
+        break;
+      }
+      if (!CheckSignaturePolymorphicMethod(called_method) ||
+          !CheckSignaturePolymorphicReceiver(inst)) {
+        break;
+      }
+      const uint32_t proto_idx = (is_range) ? inst->VRegH_4rcc() : inst->VRegH_45cc();
+      const char* descriptor =
+          dex_file_->GetReturnTypeDescriptor(dex_file_->GetProtoId(proto_idx));
+      const RegType& return_type =
+          reg_types_.FromDescriptor(GetClassLoader(), descriptor, false);
+      if (!return_type.IsLowHalf()) {
+        work_line_->SetResultRegisterType(this, return_type);
+      } else {
+        work_line_->SetResultRegisterTypeWide(return_type, return_type.HighHalf(&reg_types_));
+      }
+      // TODO(oth): remove when compiler support is available.
       Fail(VERIFY_ERROR_FORCE_INTERPRETER)
-          << "instruction is not supported by verifier; skipping verification";
+          << "invoke-polymorphic is not supported by compiler";
       have_pending_experimental_failure_ = true;
-      return false;
+      break;
     }
     case Instruction::NEG_INT:
     case Instruction::NOT_INT:
@@ -3416,8 +3458,6 @@
     work_line_->SetResultTypeToUnknown(this);
   }
 
-
-
   /*
    * Handle "branch". Tag the branch target.
    *
@@ -3740,7 +3780,8 @@
   } else if (method_type == METHOD_SUPER && is_interface) {
     return kInterfaceMethodResolution;
   } else {
-    DCHECK(method_type == METHOD_VIRTUAL || method_type == METHOD_SUPER);
+    DCHECK(method_type == METHOD_VIRTUAL || method_type == METHOD_SUPER
+           || method_type == METHOD_POLYMORPHIC);
     return kVirtualMethodResolution;
   }
 }
@@ -3868,15 +3909,18 @@
     return nullptr;
   }
   // See if the method type implied by the invoke instruction matches the access flags for the
-  // target method.
+  // target method. The flags for METHOD_POLYMORPHIC are based on there being precisely two
+  // signature polymorphic methods supported by the run-time which are native methods with variable
+  // arguments.
   if ((method_type == METHOD_DIRECT && (!res_method->IsDirect() || res_method->IsStatic())) ||
       (method_type == METHOD_STATIC && !res_method->IsStatic()) ||
       ((method_type == METHOD_SUPER ||
         method_type == METHOD_VIRTUAL ||
-        method_type == METHOD_INTERFACE) && res_method->IsDirect())
-      ) {
+        method_type == METHOD_INTERFACE) && res_method->IsDirect()) ||
+      ((method_type == METHOD_POLYMORPHIC) &&
+       (!res_method->IsNative() || !res_method->IsVarargs()))) {
     Fail(VERIFY_ERROR_CLASS_CHANGE) << "invoke type (" << method_type << ") does not match method "
-                                       " type of " << res_method->PrettyMethod();
+                                       "type of " << res_method->PrettyMethod();
     return nullptr;
   }
   return res_method;
@@ -3888,20 +3932,18 @@
   // We use vAA as our expected arg count, rather than res_method->insSize, because we need to
   // match the call to the signature. Also, we might be calling through an abstract method
   // definition (which doesn't have register count values).
-  const size_t expected_args = (is_range) ? inst->VRegA_3rc() : inst->VRegA_35c();
+  const size_t expected_args = inst->VRegA();
   /* caught by static verifier */
   DCHECK(is_range || expected_args <= 5);
-  if (expected_args > code_item_->outs_size_) {
-    Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "invalid argument count (" << expected_args
-        << ") exceeds outsSize (" << code_item_->outs_size_ << ")";
-    return nullptr;
-  }
 
-  uint32_t arg[5];
-  if (!is_range) {
-    inst->GetVarArgs(arg);
+  // TODO(oth): Enable this path for invoke-polymorphic when b/33099829 is resolved.
+  if (method_type != METHOD_POLYMORPHIC) {
+    if (expected_args > code_item_->outs_size_) {
+      Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "invalid argument count (" << expected_args
+                                        << ") exceeds outsSize (" << code_item_->outs_size_ << ")";
+      return nullptr;
+    }
   }
-  uint32_t sig_registers = 0;
 
   /*
    * Check the "this" argument, which must be an instance of the class that declared the method.
@@ -3909,7 +3951,7 @@
    * rigorous check here (which is okay since we have to do it at runtime).
    */
   if (method_type != METHOD_STATIC) {
-    const RegType& actual_arg_type = work_line_->GetInvocationThis(this, inst, is_range);
+    const RegType& actual_arg_type = work_line_->GetInvocationThis(this, inst);
     if (actual_arg_type.IsConflict()) {  // GetInvocationThis failed.
       CHECK(have_pending_hard_failure_);
       return nullptr;
@@ -3945,7 +3987,7 @@
         res_method_class = &FromClass(klass->GetDescriptor(&temp), klass,
                                       klass->CannotBeAssignedFromOtherTypes());
       } else {
-        const uint32_t method_idx = (is_range) ? inst->VRegB_3rc() : inst->VRegB_35c();
+        const uint32_t method_idx = inst->VRegB();
         const dex::TypeIndex class_idx = dex_file_->GetMethodId(method_idx).class_idx_;
         res_method_class = &reg_types_.FromDescriptor(
             GetClassLoader(),
@@ -3965,13 +4007,17 @@
         }
       }
     }
-    sig_registers = 1;
   }
 
+  uint32_t arg[5];
+  if (!is_range) {
+    inst->GetVarArgs(arg);
+  }
+  uint32_t sig_registers = (method_type == METHOD_STATIC) ? 0 : 1;
   for ( ; it->HasNext(); it->Next()) {
     if (sig_registers >= expected_args) {
       Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "Rejecting invocation, expected " << inst->VRegA() <<
-          " arguments, found " << sig_registers << " or more.";
+          " argument registers, method signature has " << sig_registers + 1 << " or more";
       return nullptr;
     }
 
@@ -3984,7 +4030,7 @@
     }
 
     const RegType& reg_type = reg_types_.FromDescriptor(GetClassLoader(), param_descriptor, false);
-    uint32_t get_reg = is_range ? inst->VRegC_3rc() + static_cast<uint32_t>(sig_registers) :
+    uint32_t get_reg = is_range ? inst->VRegC() + static_cast<uint32_t>(sig_registers) :
         arg[sig_registers];
     if (reg_type.IsIntegralTypes()) {
       const RegType& src_type = work_line_->GetRegisterType(this, get_reg);
@@ -4020,7 +4066,7 @@
   }
   if (expected_args != sig_registers) {
     Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "Rejecting invocation, expected " << expected_args <<
-        " arguments, found " << sig_registers;
+        " argument registers, method signature has " << sig_registers;
     return nullptr;
   }
   return res_method;
@@ -4032,11 +4078,10 @@
   // As the method may not have been resolved, make this static check against what we expect.
   // The main reason for this code block is to fail hard when we find an illegal use, e.g.,
   // wrong number of arguments or wrong primitive types, even if the method could not be resolved.
-  const uint32_t method_idx = (is_range) ? inst->VRegB_3rc() : inst->VRegB_35c();
+  const uint32_t method_idx = inst->VRegB();
   DexFileParameterIterator it(*dex_file_,
                               dex_file_->GetProtoId(dex_file_->GetMethodId(method_idx).proto_idx_));
-  VerifyInvocationArgsFromIterator<DexFileParameterIterator>(&it, inst, method_type, is_range,
-                                                             nullptr);
+  VerifyInvocationArgsFromIterator(&it, inst, method_type, is_range, nullptr);
 }
 
 class MethodParamListDescriptorIterator {
@@ -4069,8 +4114,7 @@
     const Instruction* inst, MethodType method_type, bool is_range) {
   // Resolve the method. This could be an abstract or concrete method depending on what sort of call
   // we're making.
-  const uint32_t method_idx = (is_range) ? inst->VRegB_3rc() : inst->VRegB_35c();
-
+  const uint32_t method_idx = inst->VRegB();
   ArtMethod* res_method = ResolveMethodAndCheckAccess(method_idx, method_type);
   if (res_method == nullptr) {  // error or class is unresolved
     // Check what we can statically.
@@ -4133,10 +4177,84 @@
     }
   }
 
-  // Process the target method's signature. This signature may or may not
-  MethodParamListDescriptorIterator it(res_method);
-  return VerifyInvocationArgsFromIterator<MethodParamListDescriptorIterator>(&it, inst, method_type,
-                                                                             is_range, res_method);
+  if (method_type == METHOD_POLYMORPHIC) {
+    // Process the signature of the calling site that is invoking the method handle.
+    DexFileParameterIterator it(*dex_file_, dex_file_->GetProtoId(inst->VRegH()));
+    return VerifyInvocationArgsFromIterator(&it, inst, method_type, is_range, res_method);
+  } else {
+    // Process the target method's signature.
+    MethodParamListDescriptorIterator it(res_method);
+    return VerifyInvocationArgsFromIterator(&it, inst, method_type, is_range, res_method);
+  }
+}
+
+bool MethodVerifier::CheckSignaturePolymorphicMethod(ArtMethod* method) {
+  mirror::Class* klass = method->GetDeclaringClass();
+  if (klass != mirror::MethodHandle::StaticClass()) {
+    Fail(VERIFY_ERROR_BAD_CLASS_HARD)
+        << "Signature polymorphic method must be declared in java.lang.invoke.MethodClass";
+    return false;
+  }
+
+  const char* method_name = method->GetName();
+  if (strcmp(method_name, "invoke") != 0 && strcmp(method_name, "invokeExact") != 0) {
+    Fail(VERIFY_ERROR_BAD_CLASS_HARD)
+        << "Signature polymorphic method name invalid: " << method_name;
+    return false;
+  }
+
+  const DexFile::TypeList* types = method->GetParameterTypeList();
+  if (types->Size() != 1) {
+    Fail(VERIFY_ERROR_BAD_CLASS_HARD)
+        << "Signature polymorphic method has too many arguments " << types->Size() << " != 1";
+    return false;
+  }
+
+  const dex::TypeIndex argument_type_index = types->GetTypeItem(0).type_idx_;
+  const char* argument_descriptor = method->GetTypeDescriptorFromTypeIdx(argument_type_index);
+  if (strcmp(argument_descriptor, "[Ljava/lang/Object;") != 0) {
+    Fail(VERIFY_ERROR_BAD_CLASS_HARD)
+        << "Signature polymorphic method has unexpected argument type: " << argument_descriptor;
+    return false;
+  }
+
+  const char* return_descriptor = method->GetReturnTypeDescriptor();
+  if (strcmp(return_descriptor, "Ljava/lang/Object;") != 0) {
+    Fail(VERIFY_ERROR_BAD_CLASS_HARD)
+        << "Signature polymorphic method has unexpected return type: " << return_descriptor;
+    return false;
+  }
+
+  return true;
+}
+
+bool MethodVerifier::CheckSignaturePolymorphicReceiver(const Instruction* inst) {
+  const RegType& this_type = work_line_->GetInvocationThis(this, inst);
+  if (this_type.IsZero()) {
+    /* null pointer always passes (and always fails at run time) */
+    return true;
+  } else if (!this_type.IsNonZeroReferenceTypes()) {
+    Fail(VERIFY_ERROR_BAD_CLASS_HARD)
+        << "invoke-polymorphic receiver is not a reference: "
+        << this_type;
+    return false;
+  } else if (this_type.IsUninitializedReference()) {
+    Fail(VERIFY_ERROR_BAD_CLASS_HARD)
+        << "invoke-polymorphic receiver is uninitialized: "
+        << this_type;
+    return false;
+  } else if (!this_type.HasClass()) {
+    Fail(VERIFY_ERROR_BAD_CLASS_HARD)
+        << "invoke-polymorphic receiver has no class: "
+        << this_type;
+    return false;
+  } else if (!this_type.GetClass()->IsSubClass(mirror::MethodHandle::StaticClass())) {
+    Fail(VERIFY_ERROR_BAD_CLASS_HARD)
+        << "invoke-polymorphic receiver is not a subclass of MethodHandle: "
+        << this_type;
+    return false;
+  }
+  return true;
 }
 
 ArtMethod* MethodVerifier::GetQuickInvokedMethod(const Instruction* inst, RegisterLine* reg_line,
@@ -4146,7 +4264,7 @@
   } else {
     DCHECK_EQ(inst->Opcode(), Instruction::INVOKE_VIRTUAL_QUICK);
   }
-  const RegType& actual_arg_type = reg_line->GetInvocationThis(this, inst, is_range, allow_failure);
+  const RegType& actual_arg_type = reg_line->GetInvocationThis(this, inst, allow_failure);
   if (!actual_arg_type.HasClass()) {
     VLOG(verifier) << "Failed to get mirror::Class* from '" << actual_arg_type << "'";
     return nullptr;
@@ -4208,7 +4326,7 @@
   // We use vAA as our expected arg count, rather than res_method->insSize, because we need to
   // match the call to the signature. Also, we might be calling through an abstract method
   // definition (which doesn't have register count values).
-  const RegType& actual_arg_type = work_line_->GetInvocationThis(this, inst, is_range);
+  const RegType& actual_arg_type = work_line_->GetInvocationThis(this, inst);
   if (actual_arg_type.IsConflict()) {  // GetInvocationThis failed.
     return nullptr;
   }
diff --git a/runtime/verifier/method_verifier.h b/runtime/verifier/method_verifier.h
index f3faecd..fa5a698 100644
--- a/runtime/verifier/method_verifier.h
+++ b/runtime/verifier/method_verifier.h
@@ -63,7 +63,8 @@
   METHOD_STATIC,      // static
   METHOD_VIRTUAL,     // virtual
   METHOD_SUPER,       // super
-  METHOD_INTERFACE    // interface
+  METHOD_INTERFACE,   // interface
+  METHOD_POLYMORPHIC  // polymorphic
 };
 std::ostream& operator<<(std::ostream& os, const MethodType& rhs);
 
@@ -474,6 +475,10 @@
   // reference isn't for an array class.
   bool CheckNewInstance(dex::TypeIndex idx);
 
+  // Perform static checks on a prototype indexing instruction. All we do here is ensure that the
+  // prototype index is in the valid range.
+  bool CheckPrototypeIndex(uint32_t idx);
+
   /* Ensure that the string index is in the valid range. */
   bool CheckStringIndex(uint32_t idx);
 
@@ -512,6 +517,12 @@
   // - vA holds word count, vC holds index of first reg.
   bool CheckVarArgRangeRegs(uint32_t vA, uint32_t vC);
 
+  // Checks the method matches the expectations required to be signature polymorphic.
+  bool CheckSignaturePolymorphicMethod(ArtMethod* method) REQUIRES_SHARED(Locks::mutator_lock_);
+
+  // Checks the invoked receiver matches the expectations for signature polymorphic methods.
+  bool CheckSignaturePolymorphicReceiver(const Instruction* inst) REQUIRES_SHARED(Locks::mutator_lock_);
+
   // Extract the relative offset from a branch instruction.
   // Returns "false" on failure (e.g. this isn't a branch instruction).
   bool GetBranchOffset(uint32_t cur_offset, int32_t* pOffset, bool* pConditional,
diff --git a/runtime/verifier/register_line.cc b/runtime/verifier/register_line.cc
index da3d946..a6088aa 100644
--- a/runtime/verifier/register_line.cc
+++ b/runtime/verifier/register_line.cc
@@ -44,8 +44,9 @@
 }
 
 const RegType& RegisterLine::GetInvocationThis(MethodVerifier* verifier, const Instruction* inst,
-                                               bool is_range, bool allow_failure) {
-  const size_t args_count = is_range ? inst->VRegA_3rc() : inst->VRegA_35c();
+                                               bool allow_failure) {
+  DCHECK(inst->IsInvoke());
+  const size_t args_count = inst->VRegA();
   if (args_count < 1) {
     if (!allow_failure) {
       verifier->Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "invoke lacks 'this'";
@@ -53,7 +54,7 @@
     return verifier->GetRegTypeCache()->Conflict();
   }
   /* Get the element type of the array held in vsrc */
-  const uint32_t this_reg = (is_range) ? inst->VRegC_3rc() : inst->VRegC_35c();
+  const uint32_t this_reg = inst->VRegC();
   const RegType& this_type = GetRegisterType(verifier, this_reg);
   if (!this_type.IsReferenceTypes()) {
     if (!allow_failure) {
diff --git a/runtime/verifier/register_line.h b/runtime/verifier/register_line.h
index 7603a79..221aa80 100644
--- a/runtime/verifier/register_line.h
+++ b/runtime/verifier/register_line.h
@@ -217,7 +217,6 @@
    */
   const RegType& GetInvocationThis(MethodVerifier* verifier,
                                    const Instruction* inst,
-                                   bool is_range,
                                    bool allow_failure = false)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
diff --git a/runtime/verifier/verifier_deps.cc b/runtime/verifier/verifier_deps.cc
index 01af5ec..f9bff23 100644
--- a/runtime/verifier/verifier_deps.cc
+++ b/runtime/verifier/verifier_deps.cc
@@ -80,8 +80,8 @@
   }
 }
 
-uint32_t VerifierDeps::GetClassDescriptorStringId(const DexFile& dex_file,
-                                                  ObjPtr<mirror::Class> klass) {
+dex::StringIndex VerifierDeps::GetClassDescriptorStringId(const DexFile& dex_file,
+                                                          ObjPtr<mirror::Class> klass) {
   DCHECK(klass != nullptr);
   ObjPtr<mirror::DexCache> dex_cache = klass->GetDexCache();
   // Array and proxy classes do not have a dex cache.
@@ -104,9 +104,9 @@
 }
 
 // Try to find the string descriptor of the class. type_idx is a best guess of a matching string id.
-static uint32_t TryGetClassDescriptorStringId(const DexFile& dex_file,
-                                              dex::TypeIndex type_idx,
-                                              ObjPtr<mirror::Class> klass)
+static dex::StringIndex TryGetClassDescriptorStringId(const DexFile& dex_file,
+                                                      dex::TypeIndex type_idx,
+                                                      ObjPtr<mirror::Class> klass)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   if (!klass->IsArrayClass()) {
     const DexFile::TypeId& type_id = dex_file.GetTypeId(type_idx);
@@ -117,21 +117,21 @@
       return type_id.descriptor_idx_;
     }
   }
-  return DexFile::kDexNoIndex;
+  return dex::StringIndex::Invalid();
 }
 
-uint32_t VerifierDeps::GetMethodDeclaringClassStringId(const DexFile& dex_file,
-                                                       uint32_t dex_method_index,
-                                                       ArtMethod* method) {
+dex::StringIndex VerifierDeps::GetMethodDeclaringClassStringId(const DexFile& dex_file,
+                                                               uint32_t dex_method_index,
+                                                               ArtMethod* method) {
   static_assert(kAccJavaFlagsMask == 0xFFFF, "Unexpected value of a constant");
   if (method == nullptr) {
-    return VerifierDeps::kUnresolvedMarker;
+    return dex::StringIndex(VerifierDeps::kUnresolvedMarker);
   }
-  const uint32_t string_id = TryGetClassDescriptorStringId(
+  const dex::StringIndex string_id = TryGetClassDescriptorStringId(
       dex_file,
       dex_file.GetMethodId(dex_method_index).class_idx_,
       method->GetDeclaringClass());
-  if (string_id != DexFile::kDexNoIndex) {
+  if (string_id.IsValid()) {
     // Got lucky using the original dex file, return based on the input dex file.
     DCHECK_EQ(GetClassDescriptorStringId(dex_file, method->GetDeclaringClass()), string_id);
     return string_id;
@@ -139,18 +139,18 @@
   return GetClassDescriptorStringId(dex_file, method->GetDeclaringClass());
 }
 
-uint32_t VerifierDeps::GetFieldDeclaringClassStringId(const DexFile& dex_file,
-                                                      uint32_t dex_field_idx,
-                                                      ArtField* field) {
+dex::StringIndex VerifierDeps::GetFieldDeclaringClassStringId(const DexFile& dex_file,
+                                                              uint32_t dex_field_idx,
+                                                              ArtField* field) {
   static_assert(kAccJavaFlagsMask == 0xFFFF, "Unexpected value of a constant");
   if (field == nullptr) {
-    return VerifierDeps::kUnresolvedMarker;
+    return dex::StringIndex(VerifierDeps::kUnresolvedMarker);
   }
-  const uint32_t string_id = TryGetClassDescriptorStringId(
+  const dex::StringIndex string_id = TryGetClassDescriptorStringId(
       dex_file,
       dex_file.GetFieldId(dex_field_idx).class_idx_,
       field->GetDeclaringClass());
-  if (string_id != DexFile::kDexNoIndex) {
+  if (string_id.IsValid()) {
     // Got lucky using the original dex file, return based on the input dex file.
     DCHECK_EQ(GetClassDescriptorStringId(dex_file, field->GetDeclaringClass()), string_id);
     return string_id;
@@ -190,7 +190,7 @@
   return false;
 }
 
-uint32_t VerifierDeps::GetIdFromString(const DexFile& dex_file, const std::string& str) {
+dex::StringIndex VerifierDeps::GetIdFromString(const DexFile& dex_file, const std::string& str) {
   const DexFile::StringId* string_id = dex_file.FindStringId(str.c_str());
   if (string_id != nullptr) {
     // String is in the DEX file. Return its ID.
@@ -212,32 +212,33 @@
   {
     ReaderMutexLock mu(Thread::Current(), *Locks::verifier_deps_lock_);
     if (FindExistingStringId(deps->strings_, str, &found_id)) {
-      return num_ids_in_dex + found_id;
+      return dex::StringIndex(num_ids_in_dex + found_id);
     }
   }
   {
     WriterMutexLock mu(Thread::Current(), *Locks::verifier_deps_lock_);
     if (FindExistingStringId(deps->strings_, str, &found_id)) {
-      return num_ids_in_dex + found_id;
+      return dex::StringIndex(num_ids_in_dex + found_id);
     }
     deps->strings_.push_back(str);
-    uint32_t new_id = num_ids_in_dex + deps->strings_.size() - 1;
-    CHECK_GE(new_id, num_ids_in_dex);  // check for overflows
+    dex::StringIndex new_id(num_ids_in_dex + deps->strings_.size() - 1);
+    CHECK_GE(new_id.index_, num_ids_in_dex);  // check for overflows
     DCHECK_EQ(str, singleton->GetStringFromId(dex_file, new_id));
     return new_id;
   }
 }
 
-std::string VerifierDeps::GetStringFromId(const DexFile& dex_file, uint32_t string_id) const {
+std::string VerifierDeps::GetStringFromId(const DexFile& dex_file, dex::StringIndex string_id)
+    const {
   uint32_t num_ids_in_dex = dex_file.NumStringIds();
-  if (string_id < num_ids_in_dex) {
+  if (string_id.index_ < num_ids_in_dex) {
     return std::string(dex_file.StringDataByIdx(string_id));
   } else {
     const DexFileDeps* deps = GetDexFileDeps(dex_file);
     DCHECK(deps != nullptr);
-    string_id -= num_ids_in_dex;
-    CHECK_LT(string_id, deps->strings_.size());
-    return deps->strings_[string_id];
+    string_id.index_ -= num_ids_in_dex;
+    CHECK_LT(string_id.index_, deps->strings_.size());
+    return deps->strings_[string_id.index_];
   }
 }
 
@@ -389,8 +390,8 @@
   }
 
   // Get string IDs for both descriptors and store in the appropriate set.
-  uint32_t destination_id = GetClassDescriptorStringId(dex_file, destination);
-  uint32_t source_id = GetClassDescriptorStringId(dex_file, source);
+  dex::StringIndex destination_id = GetClassDescriptorStringId(dex_file, destination);
+  dex::StringIndex source_id = GetClassDescriptorStringId(dex_file, source);
 
   if (is_assignable) {
     dex_deps->assignable_types_.emplace(TypeAssignability(destination_id, source_id));
@@ -471,6 +472,9 @@
 template<> inline uint32_t Encode<dex::TypeIndex>(dex::TypeIndex in) {
   return in.index_;
 }
+template<> inline uint32_t Encode<dex::StringIndex>(dex::StringIndex in) {
+  return in.index_;
+}
 
 template<typename T> inline T Decode(uint32_t in);
 
@@ -483,6 +487,9 @@
 template<> inline dex::TypeIndex Decode<dex::TypeIndex>(uint32_t in) {
   return dex::TypeIndex(in);
 }
+template<> inline dex::StringIndex Decode<dex::StringIndex>(uint32_t in) {
+  return dex::StringIndex(in);
+}
 
 template<typename T1, typename T2>
 static inline void EncodeTuple(std::vector<uint8_t>* out, const std::tuple<T1, T2>& t) {
@@ -508,7 +515,7 @@
 static inline void DecodeTuple(const uint8_t** in, const uint8_t* end, std::tuple<T1, T2, T3>* t) {
   T1 v1 = Decode<T1>(DecodeUint32WithOverflowCheck(in, end));
   T2 v2 = Decode<T2>(DecodeUint32WithOverflowCheck(in, end));
-  T3 v3 = Decode<T2>(DecodeUint32WithOverflowCheck(in, end));
+  T3 v3 = Decode<T3>(DecodeUint32WithOverflowCheck(in, end));
   *t = std::make_tuple(v1, v2, v3);
 }
 
diff --git a/runtime/verifier/verifier_deps.h b/runtime/verifier/verifier_deps.h
index a12071b..4b8206f 100644
--- a/runtime/verifier/verifier_deps.h
+++ b/runtime/verifier/verifier_deps.h
@@ -129,41 +129,43 @@
     uint16_t GetAccessFlags() const { return std::get<1>(*this); }
   };
 
-  using FieldResolutionBase = std::tuple<uint32_t, uint16_t, uint32_t>;
+  using FieldResolutionBase = std::tuple<uint32_t, uint16_t, dex::StringIndex>;
   struct FieldResolution : public FieldResolutionBase {
     FieldResolution() = default;
     FieldResolution(const FieldResolution&) = default;
-    FieldResolution(uint32_t field_idx, uint16_t access_flags, uint32_t declaring_class_idx)
+    FieldResolution(uint32_t field_idx, uint16_t access_flags, dex::StringIndex declaring_class_idx)
         : FieldResolutionBase(field_idx, access_flags, declaring_class_idx) {}
 
     bool IsResolved() const { return GetAccessFlags() != kUnresolvedMarker; }
     uint32_t GetDexFieldIndex() const { return std::get<0>(*this); }
     uint16_t GetAccessFlags() const { return std::get<1>(*this); }
-    uint32_t GetDeclaringClassIndex() const { return std::get<2>(*this); }
+    dex::StringIndex GetDeclaringClassIndex() const { return std::get<2>(*this); }
   };
 
-  using MethodResolutionBase = std::tuple<uint32_t, uint16_t, uint32_t>;
+  using MethodResolutionBase = std::tuple<uint32_t, uint16_t, dex::StringIndex>;
   struct MethodResolution : public MethodResolutionBase {
     MethodResolution() = default;
     MethodResolution(const MethodResolution&) = default;
-    MethodResolution(uint32_t method_idx, uint16_t access_flags, uint32_t declaring_class_idx)
+    MethodResolution(uint32_t method_idx,
+                     uint16_t access_flags,
+                     dex::StringIndex declaring_class_idx)
         : MethodResolutionBase(method_idx, access_flags, declaring_class_idx) {}
 
     bool IsResolved() const { return GetAccessFlags() != kUnresolvedMarker; }
     uint32_t GetDexMethodIndex() const { return std::get<0>(*this); }
     uint16_t GetAccessFlags() const { return std::get<1>(*this); }
-    uint32_t GetDeclaringClassIndex() const { return std::get<2>(*this); }
+    dex::StringIndex GetDeclaringClassIndex() const { return std::get<2>(*this); }
   };
 
-  using TypeAssignabilityBase = std::tuple<uint32_t, uint32_t>;
+  using TypeAssignabilityBase = std::tuple<dex::StringIndex, dex::StringIndex>;
   struct TypeAssignability : public TypeAssignabilityBase {
     TypeAssignability() = default;
     TypeAssignability(const TypeAssignability&) = default;
-    TypeAssignability(uint32_t destination_idx, uint32_t source_idx)
+    TypeAssignability(dex::StringIndex destination_idx, dex::StringIndex source_idx)
         : TypeAssignabilityBase(destination_idx, source_idx) {}
 
-    uint32_t GetDestination() const { return std::get<0>(*this); }
-    uint32_t GetSource() const { return std::get<1>(*this); }
+    dex::StringIndex GetDestination() const { return std::get<0>(*this); }
+    dex::StringIndex GetSource() const { return std::get<1>(*this); }
   };
 
   // Data structure representing dependencies collected during verification of
@@ -206,11 +208,11 @@
   // string ID. If not, an ID is assigned to the string and cached in `strings_`
   // of the corresponding DexFileDeps structure (either provided or inferred from
   // `dex_file`).
-  uint32_t GetIdFromString(const DexFile& dex_file, const std::string& str)
+  dex::StringIndex GetIdFromString(const DexFile& dex_file, const std::string& str)
       REQUIRES(!Locks::verifier_deps_lock_);
 
   // Returns the string represented by `id`.
-  std::string GetStringFromId(const DexFile& dex_file, uint32_t string_id) const;
+  std::string GetStringFromId(const DexFile& dex_file, dex::StringIndex string_id) const;
 
   // Returns the bytecode access flags of `element` (bottom 16 bits), or
   // `kUnresolvedMarker` if `element` is null.
@@ -220,17 +222,17 @@
 
   // Returns a string ID of the descriptor of the declaring class of `element`,
   // or `kUnresolvedMarker` if `element` is null.
-  uint32_t GetMethodDeclaringClassStringId(const DexFile& dex_file,
-                                           uint32_t dex_method_idx,
-                                           ArtMethod* method)
+  dex::StringIndex GetMethodDeclaringClassStringId(const DexFile& dex_file,
+                                                   uint32_t dex_method_idx,
+                                                   ArtMethod* method)
       REQUIRES_SHARED(Locks::mutator_lock_);
-  uint32_t GetFieldDeclaringClassStringId(const DexFile& dex_file,
-                                          uint32_t dex_field_idx,
-                                          ArtField* field)
+  dex::StringIndex GetFieldDeclaringClassStringId(const DexFile& dex_file,
+                                                  uint32_t dex_field_idx,
+                                                  ArtField* field)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Returns a string ID of the descriptor of the class.
-  uint32_t GetClassDescriptorStringId(const DexFile& dex_file, ObjPtr<mirror::Class> klass)
+  dex::StringIndex GetClassDescriptorStringId(const DexFile& dex_file, ObjPtr<mirror::Class> klass)
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!Locks::verifier_deps_lock_);
 
diff --git a/test/080-oom-fragmentation/expected.txt b/test/080-oom-fragmentation/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/080-oom-fragmentation/expected.txt
diff --git a/test/080-oom-fragmentation/info.txt b/test/080-oom-fragmentation/info.txt
new file mode 100644
index 0000000..5bcc425
--- /dev/null
+++ b/test/080-oom-fragmentation/info.txt
@@ -0,0 +1,2 @@
+Test that the allocator can go from a full heap to an empty one and is able to allocate a large
+object array.
diff --git a/test/080-oom-fragmentation/src/Main.java b/test/080-oom-fragmentation/src/Main.java
new file mode 100644
index 0000000..cf21139
--- /dev/null
+++ b/test/080-oom-fragmentation/src/Main.java
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+    public static void main(String[] args) {
+        // Reserve around 1/4 of the RAM for keeping objects live.
+        long maxMem = Runtime.getRuntime().maxMemory();
+        Object[] holder = new Object[(int)maxMem / 16];
+        int count = 0;
+        try {
+            while (true) {
+                holder[count++] = new Object[1025];  // A bit over one page.
+            }
+        } catch (OutOfMemoryError e) {}
+        for (int i = 0; i < count; ++i) {
+            holder[i] = null;
+        }
+        // Make sure the heap can handle allocating large object array. This makes sure that free
+        // pages are correctly coalesced together by the allocator.
+        holder[0] = new Object[(int)maxMem / 8];
+    }
+}
diff --git a/test/522-checker-regression-monitor-exit/src/Main.java b/test/522-checker-regression-monitor-exit/src/Main.java
index a5e9512..c4f80fc 100644
--- a/test/522-checker-regression-monitor-exit/src/Main.java
+++ b/test/522-checker-regression-monitor-exit/src/Main.java
@@ -66,7 +66,7 @@
     }
 
     try {
-      List<Future<Integer>> results = pool.invokeAll(queries, 5, TimeUnit.SECONDS);
+      List<Future<Integer>> results = pool.invokeAll(queries);
 
       int hash = obj.hashCode();
       for (int i = 0; i < numThreads; ++i) {
diff --git a/test/616-cha/expected.txt b/test/616-cha/expected.txt
new file mode 100644
index 0000000..6a5618e
--- /dev/null
+++ b/test/616-cha/expected.txt
@@ -0,0 +1 @@
+JNI_OnLoad called
diff --git a/test/616-cha/info.txt b/test/616-cha/info.txt
new file mode 100644
index 0000000..50e3b0d
--- /dev/null
+++ b/test/616-cha/info.txt
@@ -0,0 +1 @@
+Test for Class Hierarchy Analysis (CHA).
diff --git a/test/616-cha/src/Main.java b/test/616-cha/src/Main.java
new file mode 100644
index 0000000..787318d
--- /dev/null
+++ b/test/616-cha/src/Main.java
@@ -0,0 +1,255 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+class Main1 {
+  String getName() {
+    return "Main1";
+  }
+
+  void printError(String msg) {
+    System.out.println(msg);
+  }
+
+  void foo(int i) {
+    if (i != 1) {
+      printError("error1");
+    }
+  }
+
+  int getValue1() {
+    return 1;
+  }
+  int getValue2() {
+    return 2;
+  }
+  int getValue3() {
+    return 3;
+  }
+  int getValue4() {
+    return 4;
+  }
+  int getValue5() {
+    return 5;
+  }
+  int getValue6() {
+    return 6;
+  }
+}
+
+class Main2 extends Main1 {
+  String getName() {
+    return "Main2";
+  }
+
+  void foo(int i) {
+    if (i != 2) {
+      printError("error2");
+    }
+  }
+}
+
+class Main3 extends Main1 {
+  String getName() {
+    return "Main3";
+  }
+}
+
+public class Main {
+  static Main1 sMain1;
+  static Main1 sMain2;
+
+  static boolean sIsOptimizing = true;
+  static boolean sHasJIT = true;
+  static volatile boolean sOtherThreadStarted;
+
+  // sMain1.foo() will be always be Main1.foo() before Main2 is loaded/linked.
+  // So sMain1.foo() can be devirtualized to Main1.foo() and be inlined.
+  // After Dummy.createMain2() which links in Main2, live testOverride() on stack
+  // should be deoptimized.
+  static void testOverride(boolean createMain2, boolean wait, boolean setHasJIT) {
+    if (setHasJIT) {
+      if (isInterpreted()) {
+        sHasJIT = false;
+      }
+      return;
+    }
+
+    if (createMain2 && (sIsOptimizing || sHasJIT)) {
+      assertIsManaged();
+    }
+
+    sMain1.foo(sMain1.getClass() == Main1.class ? 1 : 2);
+
+    if (createMain2) {
+      // Wait for the other thread to start.
+      while (!sOtherThreadStarted);
+      // Create an Main2 instance and assign it to sMain2.
+      // sMain1 is kept the same.
+      sMain2 = Dummy.createMain2();
+      // Wake up the other thread.
+      synchronized(Main.class) {
+        Main.class.notify();
+      }
+    } else if (wait) {
+      // This is the other thread.
+      synchronized(Main.class) {
+        sOtherThreadStarted = true;
+        // Wait for Main2 to be linked and deoptimization is triggered.
+        try {
+          Main.class.wait();
+        } catch (Exception e) {
+        }
+      }
+    }
+
+    // There should be a deoptimization here right after Main2 is linked by
+    // calling Dummy.createMain2(), even though sMain1 didn't change.
+    // The behavior here would be different if inline-cache is used, which
+    // doesn't deoptimize since sMain1 still hits the type cache.
+    sMain1.foo(sMain1.getClass() == Main1.class ? 1 : 2);
+    if ((createMain2 || wait) && sHasJIT && !sIsOptimizing) {
+      // This method should be deoptimized right after Main2 is created.
+      assertIsInterpreted();
+    }
+
+    if (sMain2 != null) {
+      sMain2.foo(sMain2.getClass() == Main1.class ? 1 : 2);
+    }
+  }
+
+  static Main1[] sArray;
+
+  static long calcValue(Main1 m) {
+    return m.getValue1()
+        + m.getValue2() * 2
+        + m.getValue3() * 3
+        + m.getValue4() * 4
+        + m.getValue5() * 5
+        + m.getValue6() * 6;
+  }
+
+  static long testNoOverrideLoop(int count) {
+    long sum = 0;
+    for (int i=0; i<count; i++) {
+      sum += calcValue(sArray[0]);
+      sum += calcValue(sArray[1]);
+      sum += calcValue(sArray[2]);
+    }
+    return sum;
+  }
+
+  static void testNoOverride() {
+    sArray = new Main1[3];
+    sArray[0] = new Main1();
+    sArray[1] = Dummy.createMain2();
+    sArray[2] = Dummy.createMain3();
+    long sum = 0;
+    // Loop enough to get methods JITed.
+    for (int i=0; i<100; i++) {
+      testNoOverrideLoop(1);
+    }
+    ensureJitCompiled(Main.class, "testNoOverrideLoop");
+    ensureJitCompiled(Main.class, "calcValue");
+
+    long t1 = System.currentTimeMillis();
+    sum = testNoOverrideLoop(100000);
+    long t2 = System.currentTimeMillis();
+    if (sum != 27300000L) {
+      System.out.println("Unexpected result.");
+    }
+  }
+
+  private static void assertSingleImplementation(Class<?> clazz, String method_name, boolean b) {
+    if (hasSingleImplementation(clazz, method_name) != b) {
+      System.out.println(clazz + "." + method_name +
+          " doesn't have single implementation value of " + b);
+    }
+  }
+
+  // Test scanerios under which CHA-based devirtualization happens,
+  // and class loading that overrides a method can invalidate compiled code.
+  // Also test pure non-overriding case, which is more for checking generated
+  // code form.
+  public static void main(String[] args) {
+    System.loadLibrary(args[0]);
+
+    // CHeck some boot-image methods.
+    assertSingleImplementation(java.util.ArrayList.class, "size", true);
+    // java.util.LinkedHashMap overrides get().
+    assertSingleImplementation(java.util.HashMap.class, "get", false);
+
+    // We don't set single-implementation modifier bit for final classes or methods
+    // since we can devirtualize without CHA for those cases. However hasSingleImplementation()
+    // should return true for those cases.
+    assertSingleImplementation(java.lang.String.class, "charAt", true);
+    assertSingleImplementation(java.lang.Thread.class, "join", true);
+    // We don't set single-implementation modifier bit for native methods.
+    assertSingleImplementation(java.lang.Thread.class, "isInterrupted", false);
+
+    if (isInterpreted()) {
+      sIsOptimizing = false;
+    }
+
+    // sMain1 is an instance of Main1. Main2 hasn't bee loaded yet.
+    sMain1 = new Main1();
+
+    // Loop enough to get testOverride() JITed.
+    for (int i=0; i<100; i++) {
+      testOverride(false, false, false);
+    }
+
+    ensureJitCompiled(Main.class, "testOverride");
+    testOverride(false, false, true);
+
+    if (sHasJIT && !sIsOptimizing) {
+      assertSingleImplementation(Main1.class, "foo", true);
+    } else {
+      // Main2 is verified ahead-of-time so it's linked in already.
+    }
+    assertSingleImplementation(Main1.class, "getValue1", true);
+
+    // Create another thread that also calls sMain1.foo().
+    // Try to test suspend and deopt another thread.
+    new Thread() {
+      public void run() {
+        testOverride(false, true, false);
+      }
+    }.start();
+
+    // This will create Main2 instance in the middle of testOverride().
+    testOverride(true, false, false);
+    assertSingleImplementation(Main1.class, "foo", false);
+    assertSingleImplementation(Main1.class, "getValue1", true);
+
+    testNoOverride();
+  }
+
+  private static native void ensureJitCompiled(Class<?> itf, String method_name);
+  private static native void assertIsInterpreted();
+  private static native void assertIsManaged();
+  private static native boolean isInterpreted();
+  private static native boolean hasSingleImplementation(Class<?> clazz, String method_name);
+}
+
+// Do it in another class to avoid class loading due to verifier.
+class Dummy {
+  static Main1 createMain2() {
+    return new Main2();
+  }
+  static Main1 createMain3() {
+    return new Main3();
+  }
+}
diff --git a/test/624-checker-stringops/src/Main.java b/test/624-checker-stringops/src/Main.java
index 34e8283..d965e3f 100644
--- a/test/624-checker-stringops/src/Main.java
+++ b/test/624-checker-stringops/src/Main.java
@@ -98,9 +98,170 @@
     return k;
   }
 
+  //
+  // Allows combining of returned "this". Also ensures that similar looking append() calls
+  // are not combined somehow through returned result.
+  //
+  /// CHECK-START: int Main.bufferLen2() instruction_simplifier (before)
+  /// CHECK-DAG: <<New:l\d+>>     NewInstance
+  /// CHECK-DAG: <<String1:l\d+>> LoadString
+  /// CHECK-DAG: <<Append1:l\d+>> InvokeVirtual [<<New>>,<<String1>>]   intrinsic:StringBufferAppend
+  /// CHECK-DAG: <<String2:l\d+>> LoadString
+  /// CHECK-DAG: <<Null1:l\d+>>   NullCheck     [<<Append1>>]
+  /// CHECK-DAG: <<Append2:l\d+>> InvokeVirtual [<<Null1>>,<<String2>>] intrinsic:StringBufferAppend
+  /// CHECK-DAG: <<Null2:l\d+>>   NullCheck     [<<Append2>>]
+  /// CHECK-DAG:                  InvokeVirtual [<<Null2>>]             intrinsic:StringBufferLength
+  //
+  /// CHECK-START: int Main.bufferLen2() instruction_simplifier (after)
+  /// CHECK-DAG: <<New:l\d+>>     NewInstance
+  /// CHECK-DAG: <<String1:l\d+>> LoadString
+  /// CHECK-DAG: <<Append1:l\d+>> InvokeVirtual [<<New>>,<<String1>>] intrinsic:StringBufferAppend
+  /// CHECK-DAG: <<String2:l\d+>> LoadString
+  /// CHECK-DAG: <<Append2:l\d+>> InvokeVirtual [<<New>>,<<String2>>] intrinsic:StringBufferAppend
+  /// CHECK-DAG:                  InvokeVirtual [<<New>>]             intrinsic:StringBufferLength
+  static int bufferLen2() {
+    StringBuffer s = new StringBuffer();
+    return s.append("x").append("x").length();
+  }
+
+  //
+  // Allows combining of returned "this". Also ensures that similar looking append() calls
+  // are not combined somehow through returned result.
+  //
+  /// CHECK-START: int Main.builderLen2() instruction_simplifier (before)
+  /// CHECK-DAG: <<New:l\d+>>     NewInstance
+  /// CHECK-DAG: <<String1:l\d+>> LoadString
+  /// CHECK-DAG: <<Append1:l\d+>> InvokeVirtual [<<New>>,<<String1>>]   intrinsic:StringBuilderAppend
+  /// CHECK-DAG: <<String2:l\d+>> LoadString
+  /// CHECK-DAG: <<Null2:l\d+>>   NullCheck     [<<Append1>>]
+  /// CHECK-DAG: <<Append2:l\d+>> InvokeVirtual [<<Null2>>,<<String2>>] intrinsic:StringBuilderAppend
+  /// CHECK-DAG: <<Null3:l\d+>>   NullCheck     [<<Append2>>]
+  /// CHECK-DAG:                  InvokeVirtual [<<Null3>>]             intrinsic:StringBuilderLength
+  //
+  /// CHECK-START: int Main.builderLen2() instruction_simplifier (after)
+  /// CHECK-DAG: <<New:l\d+>>     NewInstance
+  /// CHECK-DAG: <<String1:l\d+>> LoadString
+  /// CHECK-DAG: <<Append1:l\d+>> InvokeVirtual [<<New>>,<<String1>>] intrinsic:StringBuilderAppend
+  /// CHECK-DAG: <<String2:l\d+>> LoadString
+  /// CHECK-DAG: <<Append2:l\d+>> InvokeVirtual [<<New>>,<<String2>>] intrinsic:StringBuilderAppend
+  /// CHECK-DAG:                  InvokeVirtual [<<New>>]             intrinsic:StringBuilderLength
+  static int builderLen2() {
+    StringBuilder s = new StringBuilder();
+    return s.append("x").append("x").length();
+  }
+
+  //
+  // Similar situation in a loop.
+  //
+  /// CHECK-START: int Main.bufferLoopAppender() instruction_simplifier (before)
+  /// CHECK-DAG: <<New:l\d+>>     NewInstance                                                         loop:none
+  /// CHECK-DAG: <<String1:l\d+>> LoadString                                                          loop:<<Loop:B\d+>>
+  /// CHECK-DAG: <<Null1:l\d+>>   NullCheck     [<<New>>]                                             loop:<<Loop>>
+  /// CHECK-DAG: <<Append1:l\d+>> InvokeVirtual [<<Null1>>,<<String1>>] intrinsic:StringBufferAppend  loop:<<Loop>>
+  /// CHECK-DAG: <<String2:l\d+>> LoadString                                                          loop:<<Loop>>
+  /// CHECK-DAG: <<Null2:l\d+>>   NullCheck     [<<Append1>>]                                         loop:<<Loop>>
+  /// CHECK-DAG: <<Append2:l\d+>> InvokeVirtual [<<Null2>>,<<String2>>] intrinsic:StringBufferAppend  loop:<<Loop>>
+  /// CHECK-DAG: <<String3:l\d+>> LoadString                                                          loop:<<Loop>>
+  /// CHECK-DAG: <<Null3:l\d+>>   NullCheck     [<<Append2>>]                                         loop:<<Loop>>
+  /// CHECK-DAG: <<Append3:l\d+>> InvokeVirtual [<<Null3>>,<<String3>>] intrinsic:StringBufferAppend  loop:<<Loop>>
+  /// CHECK-DAG: <<Null4:l\d+>>   NullCheck     [<<New>>]                                             loop:none
+  /// CHECK-DAG:                  InvokeVirtual [<<Null4>>]             intrinsic:StringBufferLength  loop:none
+  //
+  /// CHECK-START: int Main.bufferLoopAppender() instruction_simplifier (after)
+  /// CHECK-DAG: <<New:l\d+>>     NewInstance                                                       loop:none
+  /// CHECK-DAG: <<String1:l\d+>> LoadString                                                        loop:<<Loop:B\d+>>
+  /// CHECK-DAG: <<Append1:l\d+>> InvokeVirtual [<<New>>,<<String1>>] intrinsic:StringBufferAppend  loop:<<Loop>>
+  /// CHECK-DAG: <<String2:l\d+>> LoadString                                                        loop:<<Loop>>
+  /// CHECK-DAG: <<Append2:l\d+>> InvokeVirtual [<<New>>,<<String2>>] intrinsic:StringBufferAppend  loop:<<Loop>>
+  /// CHECK-DAG: <<String3:l\d+>> LoadString                                                        loop:<<Loop>>
+  /// CHECK-DAG: <<Append3:l\d+>> InvokeVirtual [<<New>>,<<String3>>] intrinsic:StringBufferAppend  loop:<<Loop>>
+  /// CHECK-DAG:                  InvokeVirtual [<<New>>]             intrinsic:StringBufferLength  loop:none
+  static int bufferLoopAppender() {
+    StringBuffer b = new StringBuffer();
+    for (int i = 0; i < 10; i++) {
+      b.append("x").append("y").append("z");
+    }
+    return b.length();
+  }
+
+  //
+  // Similar situation in a loop.
+  //
+  /// CHECK-START: int Main.builderLoopAppender() instruction_simplifier (before)
+  /// CHECK-DAG: <<New:l\d+>>     NewInstance                                                         loop:none
+  /// CHECK-DAG: <<String1:l\d+>> LoadString                                                          loop:<<Loop:B\d+>>
+  /// CHECK-DAG: <<Null1:l\d+>>   NullCheck     [<<New>>]                                             loop:<<Loop>>
+  /// CHECK-DAG: <<Append1:l\d+>> InvokeVirtual [<<Null1>>,<<String1>>] intrinsic:StringBuilderAppend loop:<<Loop>>
+  /// CHECK-DAG: <<String2:l\d+>> LoadString                                                          loop:<<Loop>>
+  /// CHECK-DAG: <<Null2:l\d+>>   NullCheck     [<<Append1>>]                                         loop:<<Loop>>
+  /// CHECK-DAG: <<Append2:l\d+>> InvokeVirtual [<<Null2>>,<<String2>>] intrinsic:StringBuilderAppend loop:<<Loop>>
+  /// CHECK-DAG: <<String3:l\d+>> LoadString                                                          loop:<<Loop>>
+  /// CHECK-DAG: <<Null3:l\d+>>   NullCheck     [<<Append2>>]                                         loop:<<Loop>>
+  /// CHECK-DAG: <<Append3:l\d+>> InvokeVirtual [<<Null3>>,<<String3>>] intrinsic:StringBuilderAppend loop:<<Loop>>
+  /// CHECK-DAG: <<Null4:l\d+>>   NullCheck     [<<New>>]                                             loop:none
+  /// CHECK-DAG:                  InvokeVirtual [<<Null4>>]             intrinsic:StringBuilderLength loop:none
+  //
+  /// CHECK-START: int Main.builderLoopAppender() instruction_simplifier (after)
+  /// CHECK-DAG: <<New:l\d+>>     NewInstance                                                       loop:none
+  /// CHECK-DAG: <<String1:l\d+>> LoadString                                                        loop:<<Loop:B\d+>>
+  /// CHECK-DAG: <<Append1:l\d+>> InvokeVirtual [<<New>>,<<String1>>] intrinsic:StringBuilderAppend loop:<<Loop>>
+  /// CHECK-DAG: <<String2:l\d+>> LoadString                                                        loop:<<Loop>>
+  /// CHECK-DAG: <<Append2:l\d+>> InvokeVirtual [<<New>>,<<String2>>] intrinsic:StringBuilderAppend loop:<<Loop>>
+  /// CHECK-DAG: <<String3:l\d+>> LoadString                                                        loop:<<Loop>>
+  /// CHECK-DAG: <<Append3:l\d+>> InvokeVirtual [<<New>>,<<String3>>] intrinsic:StringBuilderAppend loop:<<Loop>>
+  /// CHECK-DAG:                  InvokeVirtual [<<New>>]             intrinsic:StringBuilderLength loop:none
+  static int builderLoopAppender() {
+    StringBuilder b = new StringBuilder();
+    for (int i = 0; i < 10; i++) {
+      b.append("x").append("y").append("z");
+    }
+    return b.length();
+  }
+
+  //
+  // All calls in the loop-body and thus loop can be eliminated.
+  //
+  /// CHECK-START: int Main.bufferDeadLoop() instruction_simplifier (before)
+  /// CHECK-DAG: Phi                                              loop:<<Loop:B\d+>>
+  /// CHECK-DAG: InvokeVirtual intrinsic:StringBufferToString     loop:<<Loop>>
+  /// CHECK-DAG: InvokeVirtual intrinsic:StringStringIndexOfAfter loop:<<Loop>>
+  //
+  /// CHECK-START: int Main.bufferDeadLoop() loop_optimization (after)
+  /// CHECK-NOT: Phi
+  /// CHECK-NOT: InvokeVirtual intrinsic:StringBufferToString
+  /// CHECK-NOT: InvokeVirtual intrinsic:StringStringIndexOfAfter
+  static int bufferDeadLoop() {
+    StringBuffer b = new StringBuffer();
+    for (int i = 0; i < 10; i++) {
+      int d = b.toString().indexOf("x", 1);
+    }
+    return b.length();
+  }
+
+  //
+  // All calls in the loop-body and thus loop can be eliminated.
+  //
+  /// CHECK-START: int Main.builderDeadLoop() instruction_simplifier (before)
+  /// CHECK-DAG: Phi                                              loop:<<Loop:B\d+>>
+  /// CHECK-DAG: InvokeVirtual intrinsic:StringBuilderToString    loop:<<Loop>>
+  /// CHECK-DAG: InvokeVirtual intrinsic:StringStringIndexOfAfter loop:<<Loop>>
+  //
+  /// CHECK-START: int Main.builderDeadLoop() loop_optimization (after)
+  /// CHECK-NOT: Phi
+  /// CHECK-NOT: InvokeVirtual intrinsic:StringBuilderToString
+  /// CHECK-NOT: InvokeVirtual intrinsic:StringStringIndexOfAfter
+  static int builderDeadLoop() {
+    StringBuilder b = new StringBuilder();
+    for (int i = 0; i < 10; i++) {
+      int d = b.toString().indexOf("x", 1);
+    }
+    return b.length();
+  }
+
   public static void main(String[] args) {
     expectEquals(1865, liveIndexOf());
     expectEquals(29, deadIndexOf());
+
     try {
       indexOfExceptions(null, XYZ);
       throw new Error("Expected: NPE");
@@ -113,6 +274,13 @@
     }
     expectEquals(598, indexOfExceptions(ABC, XYZ));
 
+    expectEquals(2, bufferLen2());
+    expectEquals(2, builderLen2());
+    expectEquals(30, bufferLoopAppender());
+    expectEquals(30, builderLoopAppender());
+    expectEquals(0, bufferDeadLoop());
+    expectEquals(0, builderDeadLoop());
+
     System.out.println("passed");
   }
 
diff --git a/test/629-vdex-speed/expected.txt b/test/629-vdex-speed/expected.txt
new file mode 100644
index 0000000..6a5618e
--- /dev/null
+++ b/test/629-vdex-speed/expected.txt
@@ -0,0 +1 @@
+JNI_OnLoad called
diff --git a/test/629-vdex-speed/info.txt b/test/629-vdex-speed/info.txt
new file mode 100644
index 0000000..6d84cb5
--- /dev/null
+++ b/test/629-vdex-speed/info.txt
@@ -0,0 +1,2 @@
+Regression test for vdex that used to not AOT compile
+methods when the VerifierDeps were verified.
diff --git a/test/629-vdex-speed/run b/test/629-vdex-speed/run
new file mode 100644
index 0000000..f1b0a95
--- /dev/null
+++ b/test/629-vdex-speed/run
@@ -0,0 +1,17 @@
+#!/bin/bash
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+exec ${RUN} --vdex "${@}"
diff --git a/test/902-hello-transformation/transform.h b/test/629-vdex-speed/src/Main.java
similarity index 62%
rename from test/902-hello-transformation/transform.h
rename to test/629-vdex-speed/src/Main.java
index 661058d..470565a 100644
--- a/test/902-hello-transformation/transform.h
+++ b/test/629-vdex-speed/src/Main.java
@@ -14,17 +14,14 @@
  * limitations under the License.
  */
 
-#ifndef ART_TEST_902_HELLO_TRANSFORMATION_TRANSFORM_H_
-#define ART_TEST_902_HELLO_TRANSFORMATION_TRANSFORM_H_
+public class Main {
+  public static void main(String[] args) {
+    System.loadLibrary(args[0]);
+    if (!isAotCompiled(Main.class, "main")) {
+      throw new Error("Expected Main.main to be AOT compiled");
+    }
+  }
 
-#include <jni.h>
+  private native static boolean isAotCompiled(Class<?> cls, String methodName);
+}
 
-namespace art {
-namespace Test902HelloTransformation {
-
-jint OnLoad(JavaVM* vm, char* options, void* reserved);
-
-}  // namespace Test902HelloTransformation
-}  // namespace art
-
-#endif  // ART_TEST_902_HELLO_TRANSFORMATION_TRANSFORM_H_
diff --git a/test/902-hello-transformation/expected.txt b/test/902-hello-transformation/expected.txt
index a826f93..4774b81 100644
--- a/test/902-hello-transformation/expected.txt
+++ b/test/902-hello-transformation/expected.txt
@@ -1,3 +1,2 @@
 hello
-modifying class 'Transform'
 Goodbye
diff --git a/test/902-hello-transformation/run b/test/902-hello-transformation/run
index 3755d1d..94a8b2d 100755
--- a/test/902-hello-transformation/run
+++ b/test/902-hello-transformation/run
@@ -27,12 +27,11 @@
   arg="jvm"
 else
   arg="art"
-fi
-
-if [[ "$@" != *"--debuggable"* ]]; then
-  other_args=" -Xcompiler-option --debuggable "
-else
-  other_args=""
+  if [[ "$@" != *"--debuggable"* ]]; then
+    other_args=" -Xcompiler-option --debuggable "
+  else
+    other_args=""
+  fi
 fi
 
 ./default-run "$@" --experimental agents \
diff --git a/test/902-hello-transformation/src/Main.java b/test/902-hello-transformation/src/Main.java
index 204b6e7..ec47119 100644
--- a/test/902-hello-transformation/src/Main.java
+++ b/test/902-hello-transformation/src/Main.java
@@ -14,7 +14,40 @@
  * limitations under the License.
  */
 
+import java.util.Base64;
 public class Main {
+
+  /**
+   * base64 encoded class/dex file for
+   * class Transform {
+   *   public void sayHi() {
+   *    System.out.println("Goodbye");
+   *   }
+   * }
+   */
+  private static final byte[] CLASS_BYTES = Base64.getDecoder().decode(
+    "yv66vgAAADQAHAoABgAOCQAPABAIABEKABIAEwcAFAcAFQEABjxpbml0PgEAAygpVgEABENvZGUB" +
+    "AA9MaW5lTnVtYmVyVGFibGUBAAVzYXlIaQEAClNvdXJjZUZpbGUBAA5UcmFuc2Zvcm0uamF2YQwA" +
+    "BwAIBwAWDAAXABgBAAdHb29kYnllBwAZDAAaABsBAAlUcmFuc2Zvcm0BABBqYXZhL2xhbmcvT2Jq" +
+    "ZWN0AQAQamF2YS9sYW5nL1N5c3RlbQEAA291dAEAFUxqYXZhL2lvL1ByaW50U3RyZWFtOwEAE2ph" +
+    "dmEvaW8vUHJpbnRTdHJlYW0BAAdwcmludGxuAQAVKExqYXZhL2xhbmcvU3RyaW5nOylWACAABQAG" +
+    "AAAAAAACAAAABwAIAAEACQAAAB0AAQABAAAABSq3AAGxAAAAAQAKAAAABgABAAAAEQABAAsACAAB" +
+    "AAkAAAAlAAIAAQAAAAmyAAISA7YABLEAAAABAAoAAAAKAAIAAAATAAgAFAABAAwAAAACAA0=");
+  private static final byte[] DEX_BYTES = Base64.getDecoder().decode(
+    "ZGV4CjAzNQCLXSBQ5FiS3f16krSYZFF8xYZtFVp0GRXMAgAAcAAAAHhWNBIAAAAAAAAAACwCAAAO" +
+    "AAAAcAAAAAYAAACoAAAAAgAAAMAAAAABAAAA2AAAAAQAAADgAAAAAQAAAAABAACsAQAAIAEAAGIB" +
+    "AABqAQAAcwEAAIABAACXAQAAqwEAAL8BAADTAQAA4wEAAOYBAADqAQAA/gEAAAMCAAAMAgAAAgAA" +
+    "AAMAAAAEAAAABQAAAAYAAAAIAAAACAAAAAUAAAAAAAAACQAAAAUAAABcAQAABAABAAsAAAAAAAAA" +
+    "AAAAAAAAAAANAAAAAQABAAwAAAACAAAAAAAAAAAAAAAAAAAAAgAAAAAAAAAHAAAAAAAAAB4CAAAA" +
+    "AAAAAQABAAEAAAATAgAABAAAAHAQAwAAAA4AAwABAAIAAAAYAgAACQAAAGIAAAAbAQEAAABuIAIA" +
+    "EAAOAAAAAQAAAAMABjxpbml0PgAHR29vZGJ5ZQALTFRyYW5zZm9ybTsAFUxqYXZhL2lvL1ByaW50" +
+    "U3RyZWFtOwASTGphdmEvbGFuZy9PYmplY3Q7ABJMamF2YS9sYW5nL1N0cmluZzsAEkxqYXZhL2xh" +
+    "bmcvU3lzdGVtOwAOVHJhbnNmb3JtLmphdmEAAVYAAlZMABJlbWl0dGVyOiBqYWNrLTMuMzYAA291" +
+    "dAAHcHJpbnRsbgAFc2F5SGkAEQAHDgATAAcOhQAAAAEBAICABKACAQG4Ag0AAAAAAAAAAQAAAAAA" +
+    "AAABAAAADgAAAHAAAAACAAAABgAAAKgAAAADAAAAAgAAAMAAAAAEAAAAAQAAANgAAAAFAAAABAAA" +
+    "AOAAAAAGAAAAAQAAAAABAAABIAAAAgAAACABAAABEAAAAQAAAFwBAAACIAAADgAAAGIBAAADIAAA" +
+    "AgAAABMCAAAAIAAAAQAAAB4CAAAAEAAAAQAAACwCAAA=");
+
   public static void main(String[] args) {
     System.loadLibrary(args[1]);
     doTest(new Transform());
@@ -22,10 +55,12 @@
 
   public static void doTest(Transform t) {
     t.sayHi();
-    doClassTransformation(Transform.class);
+    doCommonClassRedefinition(Transform.class, CLASS_BYTES, DEX_BYTES);
     t.sayHi();
   }
 
   // Transforms the class
-  private static native void doClassTransformation(Class target);
+  private static native void doCommonClassRedefinition(Class<?> target,
+                                                       byte[] class_file,
+                                                       byte[] dex_file);
 }
diff --git a/test/902-hello-transformation/transform.cc b/test/902-hello-transformation/transform.cc
deleted file mode 100644
index 3369dd4..0000000
--- a/test/902-hello-transformation/transform.cc
+++ /dev/null
@@ -1,153 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <iostream>
-#include <pthread.h>
-#include <stdio.h>
-#include <vector>
-
-#include "art_method-inl.h"
-#include "base/logging.h"
-#include "jni.h"
-#include "openjdkjvmti/jvmti.h"
-#include "ti-agent/common_helper.h"
-#include "ti-agent/common_load.h"
-#include "utils.h"
-
-namespace art {
-namespace Test902HelloTransformation {
-
-static bool RuntimeIsJvm = false;
-
-bool IsJVM() {
-  return RuntimeIsJvm;
-}
-
-// base64 encoded class/dex file for
-//
-// class Transform {
-//   public void sayHi() {
-//     System.out.println("Goodbye");
-//   }
-// }
-const char* class_file_base64 =
-    "yv66vgAAADQAHAoABgAOCQAPABAIABEKABIAEwcAFAcAFQEABjxpbml0PgEAAygpVgEABENvZGUB"
-    "AA9MaW5lTnVtYmVyVGFibGUBAAVzYXlIaQEAClNvdXJjZUZpbGUBAA5UcmFuc2Zvcm0uamF2YQwA"
-    "BwAIBwAWDAAXABgBAAdHb29kYnllBwAZDAAaABsBAAlUcmFuc2Zvcm0BABBqYXZhL2xhbmcvT2Jq"
-    "ZWN0AQAQamF2YS9sYW5nL1N5c3RlbQEAA291dAEAFUxqYXZhL2lvL1ByaW50U3RyZWFtOwEAE2ph"
-    "dmEvaW8vUHJpbnRTdHJlYW0BAAdwcmludGxuAQAVKExqYXZhL2xhbmcvU3RyaW5nOylWACAABQAG"
-    "AAAAAAACAAAABwAIAAEACQAAAB0AAQABAAAABSq3AAGxAAAAAQAKAAAABgABAAAAEQABAAsACAAB"
-    "AAkAAAAlAAIAAQAAAAmyAAISA7YABLEAAAABAAoAAAAKAAIAAAATAAgAFAABAAwAAAACAA0=";
-
-const char* dex_file_base64 =
-    "ZGV4CjAzNQCLXSBQ5FiS3f16krSYZFF8xYZtFVp0GRXMAgAAcAAAAHhWNBIAAAAAAAAAACwCAAAO"
-    "AAAAcAAAAAYAAACoAAAAAgAAAMAAAAABAAAA2AAAAAQAAADgAAAAAQAAAAABAACsAQAAIAEAAGIB"
-    "AABqAQAAcwEAAIABAACXAQAAqwEAAL8BAADTAQAA4wEAAOYBAADqAQAA/gEAAAMCAAAMAgAAAgAA"
-    "AAMAAAAEAAAABQAAAAYAAAAIAAAACAAAAAUAAAAAAAAACQAAAAUAAABcAQAABAABAAsAAAAAAAAA"
-    "AAAAAAAAAAANAAAAAQABAAwAAAACAAAAAAAAAAAAAAAAAAAAAgAAAAAAAAAHAAAAAAAAAB4CAAAA"
-    "AAAAAQABAAEAAAATAgAABAAAAHAQAwAAAA4AAwABAAIAAAAYAgAACQAAAGIAAAAbAQEAAABuIAIA"
-    "EAAOAAAAAQAAAAMABjxpbml0PgAHR29vZGJ5ZQALTFRyYW5zZm9ybTsAFUxqYXZhL2lvL1ByaW50"
-    "U3RyZWFtOwASTGphdmEvbGFuZy9PYmplY3Q7ABJMamF2YS9sYW5nL1N0cmluZzsAEkxqYXZhL2xh"
-    "bmcvU3lzdGVtOwAOVHJhbnNmb3JtLmphdmEAAVYAAlZMABJlbWl0dGVyOiBqYWNrLTMuMzYAA291"
-    "dAAHcHJpbnRsbgAFc2F5SGkAEQAHDgATAAcOhQAAAAEBAICABKACAQG4Ag0AAAAAAAAAAQAAAAAA"
-    "AAABAAAADgAAAHAAAAACAAAABgAAAKgAAAADAAAAAgAAAMAAAAAEAAAAAQAAANgAAAAFAAAABAAA"
-    "AOAAAAAGAAAAAQAAAAABAAABIAAAAgAAACABAAABEAAAAQAAAFwBAAACIAAADgAAAGIBAAADIAAA"
-    "AgAAABMCAAAAIAAAAQAAAB4CAAAAEAAAAQAAACwCAAA=";
-
-static void JNICALL transformationHook(jvmtiEnv *jvmtienv,
-                                       JNIEnv* jni_env                 ATTRIBUTE_UNUSED,
-                                       jclass class_being_redefined    ATTRIBUTE_UNUSED,
-                                       jobject loader                  ATTRIBUTE_UNUSED,
-                                       const char* name,
-                                       jobject protection_domain       ATTRIBUTE_UNUSED,
-                                       jint class_data_len             ATTRIBUTE_UNUSED,
-                                       const unsigned char* class_data ATTRIBUTE_UNUSED,
-                                       jint* new_class_data_len,
-                                       unsigned char** new_class_data) {
-  if (strcmp("Transform", name)) {
-    return;
-  }
-  printf("modifying class '%s'\n", name);
-  bool is_jvm = IsJVM();
-  size_t decode_len = 0;
-  unsigned char* new_data;
-  std::unique_ptr<uint8_t[]> file_data(
-      DecodeBase64((is_jvm) ? class_file_base64 : dex_file_base64, &decode_len));
-  jvmtiError ret = JVMTI_ERROR_NONE;
-  if ((ret = jvmtienv->Allocate(static_cast<jlong>(decode_len), &new_data)) != JVMTI_ERROR_NONE) {
-    printf("Unable to allocate buffer!\n");
-    return;
-  }
-  memcpy(new_data, file_data.get(), decode_len);
-  *new_class_data_len = static_cast<jint>(decode_len);
-  *new_class_data = new_data;
-  return;
-}
-
-using RetransformWithHookFunction = jvmtiError (*)(jvmtiEnv*, jclass, jvmtiEventClassFileLoadHook);
-static void DoClassTransformation(jvmtiEnv* jvmtienv, JNIEnv* jnienv, jclass target) {
-  if (IsJVM()) {
-    UNUSED(jnienv);
-    jvmtienv->SetEventNotificationMode(JVMTI_ENABLE, JVMTI_EVENT_CLASS_FILE_LOAD_HOOK, nullptr);
-    jvmtiError ret = jvmtienv->RetransformClasses(1, &target);
-    if (ret != JVMTI_ERROR_NONE) {
-      char* err;
-      jvmtienv->GetErrorName(ret, &err);
-      printf("Error transforming: %s\n", err);
-    }
-  } else {
-    RetransformWithHookFunction f =
-        reinterpret_cast<RetransformWithHookFunction>(jvmtienv->functions->reserved1);
-    if (f(jvmtienv, target, transformationHook) != JVMTI_ERROR_NONE) {
-      printf("Failed to tranform class!");
-      return;
-    }
-  }
-}
-
-extern "C" JNIEXPORT void JNICALL Java_Main_doClassTransformation(JNIEnv* env,
-                                                                  jclass,
-                                                                  jclass target) {
-  JavaVM* vm;
-  if (env->GetJavaVM(&vm)) {
-    printf("Unable to get javaVM!\n");
-    return;
-  }
-  DoClassTransformation(jvmti_env, env, target);
-}
-
-// Don't do anything
-jint OnLoad(JavaVM* vm,
-            char* options,
-            void* reserved ATTRIBUTE_UNUSED) {
-  RuntimeIsJvm = (strcmp("jvm", options) == 0);
-  if (vm->GetEnv(reinterpret_cast<void**>(&jvmti_env), JVMTI_VERSION_1_0)) {
-    printf("Unable to get jvmti env!\n");
-    return 1;
-  }
-  SetAllCapabilities(jvmti_env);
-  if (IsJVM()) {
-    jvmtiEventCallbacks cbs;
-    memset(&cbs, 0, sizeof(cbs));
-    cbs.ClassFileLoadHook = transformationHook;
-    jvmti_env->SetEventCallbacks(&cbs, sizeof(jvmtiEventCallbacks));
-  }
-  return 0;
-}
-
-}  // namespace Test902HelloTransformation
-}  // namespace art
-
diff --git a/test/954-invoke-polymorphic-verifier/build b/test/954-invoke-polymorphic-verifier/build
new file mode 100755
index 0000000..a423ca6
--- /dev/null
+++ b/test/954-invoke-polymorphic-verifier/build
@@ -0,0 +1,25 @@
+#!/bin/bash
+#
+# Copyright 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# make us exit on a failure
+set -e
+
+if [[ $@ != *"--jvm"* ]]; then
+  # Don't do anything with jvm.
+  export USE_JACK=true
+fi
+
+./default-build "$@" --experimental method-handles
diff --git a/test/954-invoke-polymorphic-verifier/check b/test/954-invoke-polymorphic-verifier/check
new file mode 100755
index 0000000..dc5ddb7
--- /dev/null
+++ b/test/954-invoke-polymorphic-verifier/check
@@ -0,0 +1,19 @@
+#!/bin/bash
+#
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Strip out temporary file path information and indicies from output.
+sed -e "s/ [(]declaration of.*//" -e "s/\[0x[0-9A-F]*\] //g" "$2" > "$2.tmp"
+diff --strip-trailing-cr -q "$1" "$2.tmp" >/dev/null
diff --git a/test/954-invoke-polymorphic-verifier/expected.txt b/test/954-invoke-polymorphic-verifier/expected.txt
new file mode 100644
index 0000000..5df393a
--- /dev/null
+++ b/test/954-invoke-polymorphic-verifier/expected.txt
@@ -0,0 +1,10 @@
+java.lang.VerifyError: Verifier rejected class MethodHandleNotInvoke: void MethodHandleNotInvoke.<init>() failed to verify: void MethodHandleNotInvoke.<init>(): void MethodHandleNotInvoke.<init>(): couldn't find method java.lang.invoke.MethodHandle.notInvoke ([Ljava/lang/Object;)Ljava/lang/Object;
+java.lang.VerifyError: Verifier rejected class MethodHandleToString: void MethodHandleToString.<init>() failed to verify: void MethodHandleToString.<init>(): void MethodHandleToString.<init>(): invoke type (METHOD_POLYMORPHIC) does not match method type of java.lang.String java.lang.invoke.MethodHandle.toString()
+java.lang.VerifyError: Verifier rejected class NonReference: void NonReference.<init>() failed to verify: void NonReference.<init>(): void NonReference.<init>(): tried to get class from non-reference register v0 (type=Precise Low-half Constant: 0)
+java.lang.VerifyError: Verifier rejected class TooFewArguments: void TooFewArguments.<init>() failed to verify: void TooFewArguments.<init>(): void TooFewArguments.<init>(): Rejecting invocation, expected 2 argument registers, method signature has 3 or more
+java.lang.VerifyError: Verifier rejected class TooManyArguments: void TooManyArguments.<init>() failed to verify: void TooManyArguments.<init>(): void TooManyArguments.<init>(): Rejecting invocation, expected 4 argument registers, method signature has 3
+java.lang.VerifyError: Verifier rejected class BadThis: void BadThis.<init>() failed to verify: void BadThis.<init>(): void BadThis.<init>(): 'this' argument 'Precise Reference: java.lang.String' not instance of 'Reference: java.lang.invoke.MethodHandle'
+java.lang.VerifyError: Verifier rejected class FakeSignaturePolymorphic: void FakeSignaturePolymorphic.<init>() failed to verify: void FakeSignaturePolymorphic.<init>(): void FakeSignaturePolymorphic.<init>(): invoke type (METHOD_POLYMORPHIC) does not match method type of java.lang.Object Main.invoke(java.lang.Object[])
+java.lang.VerifyError: Verifier rejected class BetterFakeSignaturePolymorphic: void BetterFakeSignaturePolymorphic.<init>() failed to verify: void BetterFakeSignaturePolymorphic.<init>(): Signature polymorphic method must be declared in java.lang.invoke.MethodClass
+Passed Subclass test
+java.lang.VerifyError: Verifier rejected class Unresolved: void Unresolved.<init>() failed to verify: void Unresolved.<init>(): invoke-polymorphic receiver has no class: Unresolved Reference: other.thing.Foo
diff --git a/test/954-invoke-polymorphic-verifier/info.txt b/test/954-invoke-polymorphic-verifier/info.txt
new file mode 100644
index 0000000..cb10d42
--- /dev/null
+++ b/test/954-invoke-polymorphic-verifier/info.txt
@@ -0,0 +1,3 @@
+Test cases that should be rejected by the method verifier.
+
+NOTE: needs to run under ART.
diff --git a/test/954-invoke-polymorphic-verifier/run b/test/954-invoke-polymorphic-verifier/run
new file mode 100755
index 0000000..a9f1822
--- /dev/null
+++ b/test/954-invoke-polymorphic-verifier/run
@@ -0,0 +1,20 @@
+#!/bin/bash
+#
+# Copyright 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# make us exit on a failure
+set -e
+
+./default-run "$@" --experimental method-handles
diff --git a/test/954-invoke-polymorphic-verifier/smali/BadThis.smali b/test/954-invoke-polymorphic-verifier/smali/BadThis.smali
new file mode 100644
index 0000000..d9edf67
--- /dev/null
+++ b/test/954-invoke-polymorphic-verifier/smali/BadThis.smali
@@ -0,0 +1,30 @@
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.source "BadThis.smali"
+
+.class public LBadThis;
+.super Ljava/lang/Object;
+
+.method public constructor <init>()V
+.registers 4
+  invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+  const-string v0, "0"
+  const-string v1, "1"
+  const-string v2, "2"
+  # v0 is a String, not a MethodHandle.
+  invoke-polymorphic {v0, v1, v2}, Ljava/lang/invoke/MethodHandle;->invoke([Ljava/lang/Object;)Ljava/lang/Object;, (Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String;
+  return-void
+.end method
\ No newline at end of file
diff --git a/test/954-invoke-polymorphic-verifier/smali/BetterFakeSignaturePolymorphic.smali b/test/954-invoke-polymorphic-verifier/smali/BetterFakeSignaturePolymorphic.smali
new file mode 100644
index 0000000..631e704
--- /dev/null
+++ b/test/954-invoke-polymorphic-verifier/smali/BetterFakeSignaturePolymorphic.smali
@@ -0,0 +1,43 @@
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.source "BetterFakeSignaturePolymorphic.smali"
+
+.class public LBetterFakeSignaturePolymorphic;
+.super Ljava/lang/Object;
+
+.method public constructor <init>()V
+.registers 4
+  invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+  invoke-static {}, LBetterFakeSignaturePolymorphic;->getMain()LMain;
+  move-result-object v0
+  const/4 v1, 0
+  move-object v1, v1
+  # Fail here because Main;->invokeExact is on wrong class.
+  invoke-polymorphic {v0, v1}, LMain;->invokeExact([Ljava/lang/Object;)Ljava/lang/Object;, ([Ljava/lang/Object;)Ljava/lang/Object;
+  return-void
+.end method
+
+.method public static getMethodHandle()Ljava/lang/invoke/MethodHandle;
+.registers 1
+  const/4 v0, 0
+  return-object v0
+.end method
+
+.method public static getMain()LMain;
+.registers 1
+  const/4 v0, 0
+  return-object v0
+.end method
\ No newline at end of file
diff --git a/test/954-invoke-polymorphic-verifier/smali/FakeSignaturePolymorphic.smali b/test/954-invoke-polymorphic-verifier/smali/FakeSignaturePolymorphic.smali
new file mode 100644
index 0000000..5bd054a
--- /dev/null
+++ b/test/954-invoke-polymorphic-verifier/smali/FakeSignaturePolymorphic.smali
@@ -0,0 +1,43 @@
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.source "FakeSignaturePolymorphic.smali"
+
+.class public LFakeSignaturePolymorphic;
+.super Ljava/lang/Object;
+
+.method public constructor <init>()V
+.registers 4
+  invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+  invoke-static {}, LFakeSignaturePolymorphic;->getMain()LMain;
+  move-result-object v0
+  const/4 v1, 0
+  move-object v1, v1
+  # Fail here because Main;->invoke does not have right flags (ie not native or varargs).
+  invoke-polymorphic {v0, v1}, LMain;->invoke([Ljava/lang/Object;)Ljava/lang/Object;, ([Ljava/lang/Object;)Ljava/lang/Object;
+  return-void
+.end method
+
+.method public static getMethodHandle()Ljava/lang/invoke/MethodHandle;
+.registers 1
+  const/4 v0, 0
+  return-object v0
+.end method
+
+.method public static getMain()LMain;
+.registers 1
+  const/4 v0, 0
+  return-object v0
+.end method
\ No newline at end of file
diff --git a/test/954-invoke-polymorphic-verifier/smali/Main.smali b/test/954-invoke-polymorphic-verifier/smali/Main.smali
new file mode 100644
index 0000000..5b5e555
--- /dev/null
+++ b/test/954-invoke-polymorphic-verifier/smali/Main.smali
@@ -0,0 +1,85 @@
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This is the test suite runner. It is written in smali rather than
+# Java pending support in dx/dxmerge for invoke-polymorphic (b/33191712).
+
+.source "Main.smali"
+
+.class public LMain;
+.super Ljava/lang/Object;
+
+.method public constructor<init>()V
+.registers 1
+  invoke-direct {v0}, Ljava/lang/Object;-><init>()V
+  return-void
+.end method
+
+.method public static main([Ljava/lang/String;)V
+.registers 1
+  # New tests should be added here.
+  const-string v0, "MethodHandleNotInvoke"
+  invoke-static {v0}, LMain;->test(Ljava/lang/String;)V
+  const-string v0, "MethodHandleToString"
+  invoke-static {v0}, LMain;->test(Ljava/lang/String;)V
+  const-string v0, "NonReference"
+  invoke-static {v0}, LMain;->test(Ljava/lang/String;)V
+  const-string v0, "TooFewArguments"
+  invoke-static {v0}, LMain;->test(Ljava/lang/String;)V
+  const-string v0, "TooManyArguments"
+  invoke-static {v0}, LMain;->test(Ljava/lang/String;)V
+  const-string v0, "BadThis"
+  invoke-static {v0}, LMain;->test(Ljava/lang/String;)V
+  const-string v0, "FakeSignaturePolymorphic"
+  invoke-static {v0}, LMain;->test(Ljava/lang/String;)V
+  const-string v0, "BetterFakeSignaturePolymorphic"
+  invoke-static {v0}, LMain;->test(Ljava/lang/String;)V
+  const-string v0, "Subclass"
+  invoke-static {v0}, LMain;->test(Ljava/lang/String;)V
+  const-string v0, "Unresolved"
+  invoke-static {v0}, LMain;->test(Ljava/lang/String;)V
+  return-void
+.end method
+
+.method public static test(Ljava/lang/String;)V
+.registers 6
+ :try_start_1
+  invoke-static {v5}, Ljava/lang/Class;->forName(Ljava/lang/String;)Ljava/lang/Class;
+  move-result-object v0
+  invoke-virtual {v0}, Ljava/lang/Class;->newInstance()Ljava/lang/Object;
+ :try_end_1
+  .catch Ljava/lang/VerifyError; {:try_start_1 .. :try_end_1} :catch_verifier
+  return-void
+ :catch_verifier
+  move-exception v3
+  invoke-virtual {v3}, Ljava/lang/Exception;->toString()Ljava/lang/String;
+  move-result-object v3
+  sget-object v2, Ljava/lang/System;->out:Ljava/io/PrintStream;
+  invoke-virtual {v2, v3}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
+  return-void
+.end method
+
+# A test method called "invoke", but on a class other than MethodHandle.
+.method public invoke([Ljava/lang/Object;)Ljava/lang/Object;
+.registers 2
+  const/4 v0, 0
+  aget-object v0, p0, v0
+  return-object v0
+.end method
+
+# A test method called "invokeExact" that is native varargs, but is on a class
+# other than MethodHandle.
+.method public native varargs invokeExact([Ljava/lang/Object;)Ljava/lang/Object;
+.end method
\ No newline at end of file
diff --git a/test/954-invoke-polymorphic-verifier/smali/MethodHandleNotInvoke.smali b/test/954-invoke-polymorphic-verifier/smali/MethodHandleNotInvoke.smali
new file mode 100644
index 0000000..42546d1
--- /dev/null
+++ b/test/954-invoke-polymorphic-verifier/smali/MethodHandleNotInvoke.smali
@@ -0,0 +1,37 @@
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.source "MethodHandleNotInvoke.smali"
+
+.class public LMethodHandleNotInvoke;
+.super Ljava/lang/Object;
+
+.method public constructor <init>()V
+.registers 4
+  invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+  invoke-static {}, LMethodHandleNotInvoke;->getMethodHandle()Ljava/lang/invoke/MethodHandle;
+  move-result-object v0
+  const/4 v1, 0
+  move-object v1, v1
+  # Attempt invoke-polymorphic on MethodHandle.notInvoke().
+  invoke-polymorphic {v0, v1}, Ljava/lang/invoke/MethodHandle;->notInvoke([Ljava/lang/Object;)Ljava/lang/Object;, ([Ljava/lang/Object;)Ljava/lang/Object;
+  return-void
+.end method
+
+.method public static getMethodHandle()Ljava/lang/invoke/MethodHandle;
+.registers 1
+  const/4 v0, 0
+  return-object v0
+.end method
diff --git a/test/954-invoke-polymorphic-verifier/smali/MethodHandleToString.smali b/test/954-invoke-polymorphic-verifier/smali/MethodHandleToString.smali
new file mode 100644
index 0000000..c48429c
--- /dev/null
+++ b/test/954-invoke-polymorphic-verifier/smali/MethodHandleToString.smali
@@ -0,0 +1,35 @@
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.source "MethodHandleToString.smali"
+
+.class public LMethodHandleToString;
+.super Ljava/lang/Object;
+
+.method public constructor <init>()V
+.registers 1
+  invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+  invoke-static {}, LMethodHandleToString;->getMethodHandle()Ljava/lang/invoke/MethodHandle;
+  move-result-object v0
+  # Attempt invoke-polymorphic on MethodHandle.toString().
+  invoke-polymorphic {v0}, Ljava/lang/invoke/MethodHandle;->toString()Ljava/lang/String;, ()Ljava/lang/Object;
+  return-void
+.end method
+
+.method public static getMethodHandle()Ljava/lang/invoke/MethodHandle;
+.registers 1
+  const/4 v0, 0
+  return-object v0
+.end method
diff --git a/test/954-invoke-polymorphic-verifier/smali/NonReference.smali b/test/954-invoke-polymorphic-verifier/smali/NonReference.smali
new file mode 100644
index 0000000..4e1eff2
--- /dev/null
+++ b/test/954-invoke-polymorphic-verifier/smali/NonReference.smali
@@ -0,0 +1,30 @@
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.source "NonReference.smali"
+
+.class public LNonReference;
+.super Ljava/lang/Object;
+
+.method public constructor <init>()V
+.registers 4
+  invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+  # Set v0 to have incorrect type (not a MethodHandle) and value (not null).
+  const-wide v0, 0
+  const-string v1, "1"
+  const-string v2, "2"
+  invoke-polymorphic {v0, v1, v2}, Ljava/lang/invoke/MethodHandle;->invoke([Ljava/lang/Object;)Ljava/lang/Object;, (Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String;
+  return-void
+.end method
diff --git a/test/954-invoke-polymorphic-verifier/smali/Subclass.smali b/test/954-invoke-polymorphic-verifier/smali/Subclass.smali
new file mode 100644
index 0000000..7ef61be
--- /dev/null
+++ b/test/954-invoke-polymorphic-verifier/smali/Subclass.smali
@@ -0,0 +1,45 @@
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.source "Subclass.smali"
+
+.class public LSubclass;
+.super Ljava/lang/Object;
+
+.method public constructor <init>()V
+.registers 3
+  invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+  goto :happy
+  # Get a MethodHandleImpl instance (subclass of MethodHandle).
+  invoke-static {}, LSubclass;->getMethodHandleSubclassInstance()Ljava/lang/invoke/MethodHandleImpl;
+  move-result-object v0
+  const-string v1, "1"
+  const-string v2, "2"
+  # Calling MethodHandle.invoke() on MethodHandleImpl instance (subclass of MethodHandle) => Okay
+  invoke-polymorphic {v0, v1, v2}, Ljava/lang/invoke/MethodHandle;->invoke([Ljava/lang/Object;)Ljava/lang/Object;, (Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String;
+  # Calling MethodHandleImpl.invoke() rather than MethodHandle.invoke() [ declaring class is okay ] => Okay
+  invoke-polymorphic {v0, v1, v2}, Ljava/lang/invoke/MethodHandleImpl;->invoke([Ljava/lang/Object;)Ljava/lang/Object;, (Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String;
+:happy
+  sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
+  const-string v2, "Passed Subclass test"
+  invoke-virtual {v1, v2}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
+  return-void
+.end method
+
+.method public static getMethodHandleSubclassInstance()Ljava/lang/invoke/MethodHandleImpl;
+.registers 1
+  const/4 v0, 0
+  return-object v0
+.end method
diff --git a/test/954-invoke-polymorphic-verifier/smali/TooFewArguments.smali b/test/954-invoke-polymorphic-verifier/smali/TooFewArguments.smali
new file mode 100644
index 0000000..da29c6f
--- /dev/null
+++ b/test/954-invoke-polymorphic-verifier/smali/TooFewArguments.smali
@@ -0,0 +1,33 @@
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.source "TooFewArguments.smali"
+
+.class public LTooFewArguments;
+.super Ljava/lang/Object;
+
+.method public constructor <init>()V
+.registers 4
+  invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+  # Set up v0 as a null MethodHandle
+  const/4 v0, 0
+  move-object v0, v0
+  invoke-virtual {v0}, Ljava/lang/invoke/MethodHandle;->asFixedArity()Ljava/lang/invoke/MethodHandle;
+  move-result-object v0
+  const-string v1, "1"
+  # Invoke with one argument too few for prototype.
+  invoke-polymorphic {v0, v1}, Ljava/lang/invoke/MethodHandle;->invoke([Ljava/lang/Object;)Ljava/lang/Object;, (Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String;
+  return-void
+.end method
diff --git a/test/954-invoke-polymorphic-verifier/smali/TooManyArguments.smali b/test/954-invoke-polymorphic-verifier/smali/TooManyArguments.smali
new file mode 100644
index 0000000..bc0135e
--- /dev/null
+++ b/test/954-invoke-polymorphic-verifier/smali/TooManyArguments.smali
@@ -0,0 +1,35 @@
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.source "TooManyArguments.smali"
+
+.class public LTooManyArguments;
+.super Ljava/lang/Object;
+
+.method public constructor <init>()V
+.registers 4
+  invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+  # Set up v0 as a null MethodHandle
+  const/4 v0, 0
+  move-object v0, v0
+  invoke-virtual {v0}, Ljava/lang/invoke/MethodHandle;->asFixedArity()Ljava/lang/invoke/MethodHandle;
+  move-result-object v0
+  const-string v1, "1"
+  const-string v2, "2"
+  const-string v3, "3"
+  # Invoke with one argument too many for prototype.
+  invoke-polymorphic {v0, v1, v2, v3}, Ljava/lang/invoke/MethodHandle;->invoke([Ljava/lang/Object;)Ljava/lang/Object;, (Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String;
+  return-void
+.end method
diff --git a/test/954-invoke-polymorphic-verifier/smali/Unresolved.smali b/test/954-invoke-polymorphic-verifier/smali/Unresolved.smali
new file mode 100644
index 0000000..882f0e9
--- /dev/null
+++ b/test/954-invoke-polymorphic-verifier/smali/Unresolved.smali
@@ -0,0 +1,40 @@
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.source "Unresolved.smali"
+
+.class public LUnresolved;
+.super Ljava/lang/Object;
+
+.method public constructor <init>()V
+.registers 3
+.line 23
+  invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+  # Get an unresolvable instance (abstract class)
+  invoke-static {}, LAbstract;->getUnresolvedInstance()Lother/thing/Foo;
+  move-result-object v0
+  const-string v1, "1"
+  const-string v2, "2"
+  # Calling MethodHandle.invoke() on unresolved receiver.
+  invoke-polymorphic {v0, v1, v2}, Ljava/lang/invoke/MethodHandle;->invoke([Ljava/lang/Object;)Ljava/lang/Object;, (Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String;
+  return-void
+.end method
+
+.method public static getUnresolvedInstance()Lother/thing/Foo;
+.registers 1
+.line 37
+  const/4 v0, 0
+  return-object v0
+.end method
diff --git a/test/Android.arm_vixl.mk b/test/Android.arm_vixl.mk
index 72616a1..c89eb4a 100644
--- a/test/Android.arm_vixl.mk
+++ b/test/Android.arm_vixl.mk
@@ -16,9 +16,5 @@
 
 # Known broken tests for the ARM VIXL backend.
 TEST_ART_BROKEN_OPTIMIZING_ARM_VIXL_RUN_TESTS := \
-  103-string-append \
-  137-cfi \
-  488-checker-inline-recursive-calls \
-  552-checker-sharpening \
   562-checker-no-intermediate \
-  602-deoptimizeable \
+  624-checker-stringops \
diff --git a/test/Android.bp b/test/Android.bp
index fe20f29..44c64c1 100644
--- a/test/Android.bp
+++ b/test/Android.bp
@@ -244,8 +244,8 @@
     defaults: ["libartagent-defaults"],
     srcs: [
         "ti-agent/common_load.cc",
+        "ti-agent/common_helper.cc",
         "901-hello-ti-agent/basics.cc",
-        "902-hello-transformation/transform.cc",
         "903-hello-tagging/tagging.cc",
         "904-object-allocation/tracking.cc",
         "905-object-free/tracking_free.cc",
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index 96b984d..d7dfe5a 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -360,8 +360,10 @@
 TEST_ART_BROKEN_NO_RELOCATE_TESTS :=
 
 # Temporarily disable some broken tests when forcing access checks in interpreter b/22414682
+# 629 requires compilation.
 TEST_ART_BROKEN_INTERPRETER_ACCESS_CHECK_TESTS := \
-  137-cfi
+  137-cfi \
+  629-vdex-speed
 
 ifneq (,$(filter interp-ac,$(COMPILER_TYPES)))
   ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
@@ -470,12 +472,14 @@
 # This test dynamically enables tracing to force a deoptimization. This makes the test meaningless
 # when already tracing, and writes an error message that we do not want to check for.
 # 130 occasional timeout b/32383962.
+# 629 requires compilation.
 TEST_ART_BROKEN_TRACING_RUN_TESTS := \
   087-gc-after-link \
   130-hprof \
   137-cfi \
   141-class-unload \
   570-checker-osr \
+  629-vdex-speed \
   802-deoptimization
 
 ifneq (,$(filter trace stream,$(TRACE_TYPES)))
@@ -486,9 +490,11 @@
 
 # Known broken tests for the interpreter.
 # CFI unwinding expects managed frames.
+# 629 requires compilation.
 TEST_ART_BROKEN_INTERPRETER_RUN_TESTS := \
   137-cfi \
-  554-jit-profile-file
+  554-jit-profile-file \
+  629-vdex-speed
 
 ifneq (,$(filter interpreter,$(COMPILER_TYPES)))
   ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
@@ -504,8 +510,10 @@
 # Test 906 iterates the heap filtering with different options. No instances should be created
 # between those runs to be able to have precise checks.
 # Test 902 hits races with the JIT compiler. b/32821077
+# Test 629 requires compilation.
 TEST_ART_BROKEN_JIT_RUN_TESTS := \
   137-cfi \
+  629-vdex-speed \
   902-hello-transformation \
   904-object-allocation \
   906-iterate-heap \
@@ -660,6 +668,15 @@
   endif
 endif
 
+# Tests disabled for GSS.
+TEST_ART_BROKEN_GSS_RUN_TESTS := 080-oom-fragmentation
+ifeq ($(ART_DEFAULT_GC_TYPE),GSS)
+  ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES), \
+      $(PREBUILD_TYPES),$(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES), \
+      $(JNI_TYPES),$(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \
+      $(TEST_ART_BROKEN_GSS_RUN_TESTS),$(ALL_ADDRESS_SIZES))
+endif
+
 TEST_ART_BROKEN_OPTIMIZING_READ_BARRIER_RUN_TESTS :=
 TEST_ART_BROKEN_JIT_READ_BARRIER_RUN_TESTS :=
 
diff --git a/test/common/runtime_state.cc b/test/common/runtime_state.cc
index a2eb370..285f3aa 100644
--- a/test/common/runtime_state.cc
+++ b/test/common/runtime_state.cc
@@ -119,6 +119,24 @@
   return JNI_TRUE;
 }
 
+extern "C" JNIEXPORT jboolean JNICALL Java_Main_isAotCompiled(JNIEnv* env,
+                                                              jclass,
+                                                              jclass cls,
+                                                              jstring method_name) {
+  Thread* self = Thread::Current();
+  ScopedObjectAccess soa(self);
+  ScopedUtfChars chars(env, method_name);
+  CHECK(chars.c_str() != nullptr);
+  ArtMethod* method = soa.Decode<mirror::Class>(cls)->FindDeclaredDirectMethodByName(
+        chars.c_str(), kRuntimePointerSize);
+  const void* code = method->GetOatMethodQuickCode(kRuntimePointerSize);
+  jit::Jit* jit = Runtime::Current()->GetJit();
+  if (jit != nullptr && jit->GetCodeCache()->ContainsPc(code)) {
+    return true;
+  }
+  return code != nullptr;
+}
+
 extern "C" JNIEXPORT void JNICALL Java_Main_ensureJitCompiled(JNIEnv* env,
                                                              jclass,
                                                              jclass cls,
@@ -157,4 +175,17 @@
   }
 }
 
+extern "C" JNIEXPORT jboolean JNICALL Java_Main_hasSingleImplementation(JNIEnv* env,
+                                                                        jclass,
+                                                                        jclass cls,
+                                                                        jstring method_name) {
+  ArtMethod* method = nullptr;
+  ScopedObjectAccess soa(Thread::Current());
+  ScopedUtfChars chars(env, method_name);
+  CHECK(chars.c_str() != nullptr);
+  method = soa.Decode<mirror::Class>(cls)->FindDeclaredVirtualMethodByName(
+      chars.c_str(), kRuntimePointerSize);
+  return method->HasSingleImplementation();
+}
+
 }  // namespace art
diff --git a/test/ti-agent/common_helper.cc b/test/ti-agent/common_helper.cc
new file mode 100644
index 0000000..3e2b168
--- /dev/null
+++ b/test/ti-agent/common_helper.cc
@@ -0,0 +1,91 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ti-agent/common_helper.h"
+
+#include <stdio.h>
+
+#include "jni.h"
+#include "openjdkjvmti/jvmti.h"
+#include "ti-agent/common_load.h"
+#include "utils.h"
+
+namespace art {
+bool RuntimeIsJVM;
+
+bool IsJVM() {
+  return RuntimeIsJVM;
+}
+
+void SetAllCapabilities(jvmtiEnv* env) {
+  jvmtiCapabilities caps;
+  env->GetPotentialCapabilities(&caps);
+  env->AddCapabilities(&caps);
+}
+
+namespace common_redefine {
+
+using RedefineDirectFunction = jvmtiError (*)(jvmtiEnv*, jclass, jint, const unsigned char*);
+static void DoClassTransformation(jvmtiEnv* jvmti_env, JNIEnv* env,
+                                  jclass target,
+                                  jbyteArray class_file_bytes,
+                                  jbyteArray dex_file_bytes) {
+  jbyteArray desired_array = IsJVM() ? class_file_bytes : dex_file_bytes;
+  jint len = static_cast<jint>(env->GetArrayLength(desired_array));
+  const unsigned char* redef_bytes = reinterpret_cast<const unsigned char*>(
+      env->GetByteArrayElements(desired_array, nullptr));
+  jvmtiError res;
+  if (IsJVM()) {
+    jvmtiClassDefinition def;
+    def.klass = target;
+    def.class_byte_count = static_cast<jint>(len);
+    def.class_bytes = redef_bytes;
+    res = jvmti_env->RedefineClasses(1, &def);
+  } else {
+    RedefineDirectFunction f =
+        reinterpret_cast<RedefineDirectFunction>(jvmti_env->functions->reserved3);
+    res = f(jvmti_env, target, len, redef_bytes);
+  }
+  if (res != JVMTI_ERROR_NONE) {
+    printf("Redefinition failed!");
+  }
+}
+
+// Magic JNI export that classes can use for redefining classes.
+// To use classes should declare this as a native function with signature (Ljava/lang/Class;[B[B)V
+extern "C" JNIEXPORT void JNICALL Java_Main_doCommonClassRedefinition(JNIEnv* env,
+                                                                      jclass,
+                                                                      jclass target,
+                                                                      jbyteArray class_file_bytes,
+                                                                      jbyteArray dex_file_bytes) {
+  DoClassTransformation(jvmti_env, env, target, class_file_bytes, dex_file_bytes);
+}
+
+// Don't do anything
+jint OnLoad(JavaVM* vm,
+            char* options ATTRIBUTE_UNUSED,
+            void* reserved ATTRIBUTE_UNUSED) {
+  if (vm->GetEnv(reinterpret_cast<void**>(&jvmti_env), JVMTI_VERSION_1_0)) {
+    printf("Unable to get jvmti env!\n");
+    return 1;
+  }
+  SetAllCapabilities(jvmti_env);
+  return 0;
+}
+
+}  // namespace common_redefine
+
+}  // namespace art
diff --git a/test/ti-agent/common_helper.h b/test/ti-agent/common_helper.h
index 84997f3..76543fe 100644
--- a/test/ti-agent/common_helper.h
+++ b/test/ti-agent/common_helper.h
@@ -18,9 +18,19 @@
 #define ART_TEST_TI_AGENT_COMMON_HELPER_H_
 
 #include "jni.h"
+#include "openjdkjvmti/jvmti.h"
 #include "ScopedLocalRef.h"
 
 namespace art {
+namespace common_redefine {
+
+jint OnLoad(JavaVM* vm, char* options, void* reserved);
+
+}  // namespace common_redefine
+
+extern bool RuntimeIsJVM;
+
+bool IsJVM();
 
 template <typename T>
 static jobjectArray CreateObjectArray(JNIEnv* env,
@@ -53,11 +63,7 @@
   return ret.release();
 }
 
-static void SetAllCapabilities(jvmtiEnv* env) {
-  jvmtiCapabilities caps;
-  env->GetPotentialCapabilities(&caps);
-  env->AddCapabilities(&caps);
-}
+void SetAllCapabilities(jvmtiEnv* env);
 
 }  // namespace art
 
diff --git a/test/ti-agent/common_load.cc b/test/ti-agent/common_load.cc
index a959482..2795cbc 100644
--- a/test/ti-agent/common_load.cc
+++ b/test/ti-agent/common_load.cc
@@ -23,9 +23,9 @@
 #include "base/logging.h"
 #include "base/macros.h"
 #include "common_load.h"
+#include "common_helper.h"
 
 #include "901-hello-ti-agent/basics.h"
-#include "902-hello-transformation/transform.h"
 #include "903-hello-tagging/tagging.h"
 #include "904-object-allocation/tracking.h"
 #include "905-object-free/tracking_free.h"
@@ -54,7 +54,7 @@
 // A list of all the agents we have for testing.
 AgentLib agents[] = {
   { "901-hello-ti-agent", Test901HelloTi::OnLoad, nullptr },
-  { "902-hello-transformation", Test902HelloTransformation::OnLoad, nullptr },
+  { "902-hello-transformation", common_redefine::OnLoad, nullptr },
   { "903-hello-tagging", Test903HelloTagging::OnLoad, nullptr },
   { "904-object-allocation", Test904ObjectAllocation::OnLoad, nullptr },
   { "905-object-free", Test905ObjectFree::OnLoad, nullptr },
@@ -95,6 +95,10 @@
   return true;
 }
 
+static void SetIsJVM(char* options) {
+  RuntimeIsJVM = strncmp(options, "jvm", 3) == 0;
+}
+
 extern "C" JNIEXPORT jint JNICALL Agent_OnLoad(JavaVM* vm, char* options, void* reserved) {
   char* remaining_options = nullptr;
   char* name_option = nullptr;
@@ -112,6 +116,7 @@
     printf("agent: %s does not include an OnLoad method.\n", name_option);
     return -3;
   }
+  SetIsJVM(remaining_options);
   return lib->load(vm, remaining_options, reserved);
 }
 
@@ -132,6 +137,7 @@
     printf("agent: %s does not include an OnAttach method.\n", name_option);
     return -3;
   }
+  SetIsJVM(remaining_options);
   return lib->attach(vm, remaining_options, reserved);
 }
 
diff --git a/tools/libcore_failures.txt b/tools/libcore_failures.txt
index 53fe8fe..ae18f41 100644
--- a/tools/libcore_failures.txt
+++ b/tools/libcore_failures.txt
@@ -173,12 +173,6 @@
   bug: 25178637
 },
 {
-  description: "Non-deterministic test because of a dependency on weak ref collection.",
-  result: EXEC_FAILED,
-  names: ["org.apache.harmony.tests.java.util.WeakHashMapTest#test_keySet"],
-  bug: 25437292
-},
-{
   description: "Missing resource in classpath",
   result: EXEC_FAILED,
   modes: [device],