Merge "unstarted_runtime: add cutout for Math.floor"
diff --git a/compiler/Android.mk b/compiler/Android.mk
index a7867e8..e9c22d2 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -54,6 +54,7 @@
 	optimizing/induction_var_analysis.cc \
 	optimizing/induction_var_range.cc \
 	optimizing/inliner.cc \
+	optimizing/instruction_builder.cc \
 	optimizing/instruction_simplifier.cc \
 	optimizing/intrinsics.cc \
 	optimizing/licm.cc \
diff --git a/compiler/common_compiler_test.cc b/compiler/common_compiler_test.cc
index 0001b67..f75a252 100644
--- a/compiler/common_compiler_test.cc
+++ b/compiler/common_compiler_test.cc
@@ -285,7 +285,7 @@
   MemMap::Init();
   image_reservation_.reset(MemMap::MapAnonymous("image reservation",
                                                 reinterpret_cast<uint8_t*>(ART_BASE_ADDRESS),
-                                                (size_t)100 * 1024 * 1024,  // 100MB
+                                                (size_t)120 * 1024 * 1024,  // 120MB
                                                 PROT_NONE,
                                                 false /* no need for 4gb flag with fixed mmap*/,
                                                 false /* not reusing existing reservation */,
diff --git a/compiler/common_compiler_test.h b/compiler/common_compiler_test.h
index 7c2c844..2d139eb 100644
--- a/compiler/common_compiler_test.h
+++ b/compiler/common_compiler_test.h
@@ -115,13 +115,6 @@
   std::list<std::vector<uint8_t>> header_code_and_maps_chunks_;
 };
 
-// TODO: When heap reference poisoning works with all compilers in use, get rid of this.
-#define TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING_WITH_QUICK() \
-  if (kPoisonHeapReferences && GetCompilerKind() == Compiler::kQuick) { \
-    printf("WARNING: TEST DISABLED FOR HEAP REFERENCE POISONING WITH QUICK\n"); \
-    return; \
-  }
-
 // TODO: When read barrier works with all tests, get rid of this.
 #define TEST_DISABLED_FOR_READ_BARRIER() \
   if (kUseReadBarrier) { \
@@ -129,13 +122,6 @@
     return; \
   }
 
-// TODO: When read barrier works with all compilers in use, get rid of this.
-#define TEST_DISABLED_FOR_READ_BARRIER_WITH_QUICK() \
-  if (kUseReadBarrier && GetCompilerKind() == Compiler::kQuick) { \
-    printf("WARNING: TEST DISABLED FOR READ BARRIER WITH QUICK\n"); \
-    return; \
-  }
-
 // TODO: When read barrier works with all Optimizing back ends, get rid of this.
 #define TEST_DISABLED_FOR_READ_BARRIER_WITH_OPTIMIZING_FOR_UNSUPPORTED_INSTRUCTION_SETS() \
   if (kUseReadBarrier && GetCompilerKind() == Compiler::kOptimizing) {                    \
@@ -155,13 +141,6 @@
     }                                                                                     \
   }
 
-// TODO: When non-PIC works with all compilers in use, get rid of this.
-#define TEST_DISABLED_FOR_NON_PIC_COMPILING_WITH_OPTIMIZING() \
-  if (GetCompilerKind() == Compiler::kOptimizing) { \
-    printf("WARNING: TEST DISABLED FOR NON-PIC COMPILING WITH OPTIMIZING\n"); \
-    return; \
-  }
-
 }  // namespace art
 
 #endif  // ART_COMPILER_COMMON_COMPILER_TEST_H_
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index be149af..5294068 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -357,7 +357,7 @@
       compiler_kind_(compiler_kind),
       instruction_set_(instruction_set),
       instruction_set_features_(instruction_set_features),
-      freezing_constructor_lock_("freezing constructor lock"),
+      requires_constructor_barrier_lock_("constructor barrier lock"),
       compiled_classes_lock_("compiled classes lock"),
       compiled_methods_lock_("compiled method lock"),
       compiled_methods_(MethodTable::key_compare()),
@@ -2006,6 +2006,28 @@
   self->ClearException();
 }
 
+bool CompilerDriver::RequiresConstructorBarrier(const DexFile& dex_file,
+                                                uint16_t class_def_idx) const {
+  const DexFile::ClassDef& class_def = dex_file.GetClassDef(class_def_idx);
+  const uint8_t* class_data = dex_file.GetClassData(class_def);
+  if (class_data == nullptr) {
+    // Empty class such as a marker interface.
+    return false;
+  }
+  ClassDataItemIterator it(dex_file, class_data);
+  while (it.HasNextStaticField()) {
+    it.Next();
+  }
+  // We require a constructor barrier if there are final instance fields.
+  while (it.HasNextInstanceField()) {
+    if (it.MemberIsFinal()) {
+      return true;
+    }
+    it.Next();
+  }
+  return false;
+}
+
 class ResolveClassFieldsAndMethodsVisitor : public CompilationVisitor {
  public:
   explicit ResolveClassFieldsAndMethodsVisitor(const ParallelCompilationManager* manager)
@@ -2110,9 +2132,10 @@
         DCHECK(!it.HasNext());
       }
     }
-    if (requires_constructor_barrier) {
-      manager_->GetCompiler()->AddRequiresConstructorBarrier(self, &dex_file, class_def_index);
-    }
+    manager_->GetCompiler()->SetRequiresConstructorBarrier(self,
+                                                           &dex_file,
+                                                           class_def_index,
+                                                           requires_constructor_barrier);
   }
 
  private:
@@ -2769,16 +2792,29 @@
   return non_relative_linker_patch_count_;
 }
 
-void CompilerDriver::AddRequiresConstructorBarrier(Thread* self, const DexFile* dex_file,
-                                                   uint16_t class_def_index) {
-  WriterMutexLock mu(self, freezing_constructor_lock_);
-  freezing_constructor_classes_.insert(ClassReference(dex_file, class_def_index));
+void CompilerDriver::SetRequiresConstructorBarrier(Thread* self,
+                                                   const DexFile* dex_file,
+                                                   uint16_t class_def_index,
+                                                   bool requires) {
+  WriterMutexLock mu(self, requires_constructor_barrier_lock_);
+  requires_constructor_barrier_.emplace(ClassReference(dex_file, class_def_index), requires);
 }
 
-bool CompilerDriver::RequiresConstructorBarrier(Thread* self, const DexFile* dex_file,
-                                                uint16_t class_def_index) const {
-  ReaderMutexLock mu(self, freezing_constructor_lock_);
-  return freezing_constructor_classes_.count(ClassReference(dex_file, class_def_index)) != 0;
+bool CompilerDriver::RequiresConstructorBarrier(Thread* self,
+                                                const DexFile* dex_file,
+                                                uint16_t class_def_index) {
+  ClassReference class_ref(dex_file, class_def_index);
+  {
+    ReaderMutexLock mu(self, requires_constructor_barrier_lock_);
+    auto it = requires_constructor_barrier_.find(class_ref);
+    if (it != requires_constructor_barrier_.end()) {
+      return it->second;
+    }
+  }
+  WriterMutexLock mu(self, requires_constructor_barrier_lock_);
+  const bool requires = RequiresConstructorBarrier(*dex_file, class_def_index);
+  requires_constructor_barrier_.emplace(class_ref, requires);
+  return requires;
 }
 
 std::string CompilerDriver::GetMemoryUsageString(bool extended) const {
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index 64a06a2..905f84d 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -183,12 +183,15 @@
   // Remove and delete a compiled method.
   void RemoveCompiledMethod(const MethodReference& method_ref) REQUIRES(!compiled_methods_lock_);
 
-  void AddRequiresConstructorBarrier(Thread* self, const DexFile* dex_file,
-                                     uint16_t class_def_index)
-      REQUIRES(!freezing_constructor_lock_);
-  bool RequiresConstructorBarrier(Thread* self, const DexFile* dex_file,
-                                  uint16_t class_def_index) const
-      REQUIRES(!freezing_constructor_lock_);
+  void SetRequiresConstructorBarrier(Thread* self,
+                                     const DexFile* dex_file,
+                                     uint16_t class_def_index,
+                                     bool requires)
+      REQUIRES(!requires_constructor_barrier_lock_);
+  bool RequiresConstructorBarrier(Thread* self,
+                                  const DexFile* dex_file,
+                                  uint16_t class_def_index)
+      REQUIRES(!requires_constructor_barrier_lock_);
 
   // Callbacks from compiler to see what runtime checks must be generated.
 
@@ -619,6 +622,8 @@
   void FreeThreadPools();
   void CheckThreadPools();
 
+  bool RequiresConstructorBarrier(const DexFile& dex_file, uint16_t class_def_idx) const;
+
   const CompilerOptions* const compiler_options_;
   VerificationResults* const verification_results_;
   DexFileToMethodInlinerMap* const method_inliner_map_;
@@ -629,9 +634,11 @@
   const InstructionSet instruction_set_;
   const InstructionSetFeatures* const instruction_set_features_;
 
-  // All class references that require
-  mutable ReaderWriterMutex freezing_constructor_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
-  std::set<ClassReference> freezing_constructor_classes_ GUARDED_BY(freezing_constructor_lock_);
+  // All class references that require constructor barriers. If the class reference is not in the
+  // set then the result has not yet been computed.
+  mutable ReaderWriterMutex requires_constructor_barrier_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+  std::map<ClassReference, bool> requires_constructor_barrier_
+      GUARDED_BY(requires_constructor_barrier_lock_);
 
   typedef SafeMap<const ClassReference, CompiledClass*> ClassTable;
   // All class references that this compiler has compiled.
diff --git a/compiler/driver/compiler_driver_test.cc b/compiler/driver/compiler_driver_test.cc
index 0037564..b9a5a78 100644
--- a/compiler/driver/compiler_driver_test.cc
+++ b/compiler/driver/compiler_driver_test.cc
@@ -143,21 +143,11 @@
   // TODO: check that all Method::GetCode() values are non-null
 }
 
-TEST_F(CompilerDriverTest, DISABLED_AbstractMethodErrorStub) {
-  TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING_WITH_QUICK();
-  TEST_DISABLED_FOR_READ_BARRIER_WITH_QUICK();
+TEST_F(CompilerDriverTest, AbstractMethodErrorStub) {
   TEST_DISABLED_FOR_READ_BARRIER_WITH_OPTIMIZING_FOR_UNSUPPORTED_INSTRUCTION_SETS();
   jobject class_loader;
   {
     ScopedObjectAccess soa(Thread::Current());
-    CompileVirtualMethod(ScopedNullHandle<mirror::ClassLoader>(),
-                         "java.lang.Class",
-                         "isFinalizable",
-                         "()Z");
-    CompileDirectMethod(ScopedNullHandle<mirror::ClassLoader>(),
-                        "java.lang.Object",
-                        "<init>",
-                        "()V");
     class_loader = LoadDex("AbstractMethod");
   }
   ASSERT_TRUE(class_loader != nullptr);
@@ -197,8 +187,6 @@
 };
 
 TEST_F(CompilerDriverMethodsTest, Selection) {
-  TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING_WITH_QUICK();
-  TEST_DISABLED_FOR_READ_BARRIER_WITH_QUICK();
   TEST_DISABLED_FOR_READ_BARRIER_WITH_OPTIMIZING_FOR_UNSUPPORTED_INSTRUCTION_SETS();
   Thread* self = Thread::Current();
   jobject class_loader;
@@ -303,8 +291,6 @@
 };
 
 TEST_F(CompilerDriverProfileTest, ProfileGuidedCompilation) {
-  TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING_WITH_QUICK();
-  TEST_DISABLED_FOR_READ_BARRIER_WITH_QUICK();
   TEST_DISABLED_FOR_READ_BARRIER_WITH_OPTIMIZING_FOR_UNSUPPORTED_INSTRUCTION_SETS();
   Thread* self = Thread::Current();
   jobject class_loader;
diff --git a/compiler/image_test.cc b/compiler/image_test.cc
index 7779e44..91579e9 100644
--- a/compiler/image_test.cc
+++ b/compiler/image_test.cc
@@ -288,17 +288,14 @@
 }
 
 TEST_F(ImageTest, WriteReadUncompressed) {
-  TEST_DISABLED_FOR_READ_BARRIER();  // b/27578460
   TestWriteRead(ImageHeader::kStorageModeUncompressed);
 }
 
 TEST_F(ImageTest, WriteReadLZ4) {
-  TEST_DISABLED_FOR_READ_BARRIER();  // b/27578460
   TestWriteRead(ImageHeader::kStorageModeLZ4);
 }
 
 TEST_F(ImageTest, WriteReadLZ4HC) {
-  TEST_DISABLED_FOR_READ_BARRIER();  // b/27578460
   TestWriteRead(ImageHeader::kStorageModeLZ4HC);
 }
 
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index c747ffa..8bb462c 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -576,7 +576,16 @@
       }
     } else if (object->GetClass<kVerifyNone>()->IsStringClass()) {
       bin = kBinString;  // Strings are almost always immutable (except for object header).
-    }  // else bin = kBinRegular
+    } else if (object->GetClass<kVerifyNone>() ==
+        Runtime::Current()->GetClassLinker()->GetClassRoot(ClassLinker::kJavaLangObject)) {
+      // Instance of java lang object, probably a lock object. This means it will be dirty when we
+      // synchronize on it.
+      bin = kBinMiscDirty;
+    } else if (object->IsDexCache()) {
+      // Dex file field becomes dirty when the image is loaded.
+      bin = kBinMiscDirty;
+    }
+    // else bin = kBinRegular
   }
 
   size_t oat_index = GetOatIndex(object);
diff --git a/compiler/image_writer.h b/compiler/image_writer.h
index f204b28..0cb6aea 100644
--- a/compiler/image_writer.h
+++ b/compiler/image_writer.h
@@ -149,16 +149,17 @@
   void RecordImageAllocations() SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Classify different kinds of bins that objects end up getting packed into during image writing.
+  // Ordered from dirtiest to cleanest (until ArtMethods).
   enum Bin {
-    // Likely-clean:
-    kBinString,                        // [String] Almost always immutable (except for obj header).
+    kBinMiscDirty,                // Dex caches, object locks, etc...
+    kBinClassVerified,            // Class verified, but initializers haven't been run
     // Unknown mix of clean/dirty:
     kBinRegular,
-    // Likely-dirty:
+    kBinClassInitialized,         // Class initializers have been run
     // All classes get their own bins since their fields often dirty
     kBinClassInitializedFinalStatics,  // Class initializers have been run, no non-final statics
-    kBinClassInitialized,         // Class initializers have been run
-    kBinClassVerified,            // Class verified, but initializers haven't been run
+    // Likely-clean:
+    kBinString,                        // [String] Almost always immutable (except for obj header).
     // Add more bins here if we add more segregation code.
     // Non mirror fields must be below.
     // ArtFields should be always clean.
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index 5315858..86742e6 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -20,110 +20,17 @@
 #include "base/arena_bit_vector.h"
 #include "base/bit_vector-inl.h"
 #include "base/logging.h"
-#include "bytecode_utils.h"
-#include "class_linker.h"
 #include "dex/verified_method.h"
-#include "dex_file-inl.h"
-#include "dex_instruction-inl.h"
-#include "dex/verified_method.h"
-#include "driver/compiler_driver-inl.h"
 #include "driver/compiler_options.h"
 #include "mirror/class_loader.h"
 #include "mirror/dex_cache.h"
 #include "nodes.h"
 #include "primitive.h"
-#include "scoped_thread_state_change.h"
-#include "ssa_builder.h"
 #include "thread.h"
 #include "utils/dex_cache_arrays_layout-inl.h"
 
 namespace art {
 
-void HGraphBuilder::InitializeLocals(uint16_t count) {
-  graph_->SetNumberOfVRegs(count);
-  locals_.resize(count);
-  HBasicBlock* entry_block = graph_->GetEntryBlock();
-  for (int i = 0; i < count; i++) {
-    HLocal* local = new (arena_) HLocal(i);
-    entry_block->AddInstruction(local);
-    locals_[i] = local;
-  }
-}
-
-void HGraphBuilder::InitializeParameters(uint16_t number_of_parameters) {
-  // dex_compilation_unit_ is null only when unit testing.
-  if (dex_compilation_unit_ == nullptr) {
-    return;
-  }
-
-  HBasicBlock* entry_block = graph_->GetEntryBlock();
-
-  graph_->SetNumberOfInVRegs(number_of_parameters);
-  const char* shorty = dex_compilation_unit_->GetShorty();
-  int locals_index = locals_.size() - number_of_parameters;
-  int parameter_index = 0;
-
-  const DexFile::MethodId& referrer_method_id =
-      dex_file_->GetMethodId(dex_compilation_unit_->GetDexMethodIndex());
-  if (!dex_compilation_unit_->IsStatic()) {
-    // Add the implicit 'this' argument, not expressed in the signature.
-    HParameterValue* parameter = new (arena_) HParameterValue(*dex_file_,
-                                                              referrer_method_id.class_idx_,
-                                                              parameter_index++,
-                                                              Primitive::kPrimNot,
-                                                              true);
-    entry_block->AddInstruction(parameter);
-    HLocal* local = GetLocalAt(locals_index++);
-    entry_block->AddInstruction(new (arena_) HStoreLocal(local, parameter, local->GetDexPc()));
-    number_of_parameters--;
-  }
-
-  const DexFile::ProtoId& proto = dex_file_->GetMethodPrototype(referrer_method_id);
-  const DexFile::TypeList* arg_types = dex_file_->GetProtoParameters(proto);
-  for (int i = 0, shorty_pos = 1; i < number_of_parameters; i++) {
-    HParameterValue* parameter = new (arena_) HParameterValue(
-        *dex_file_,
-        arg_types->GetTypeItem(shorty_pos - 1).type_idx_,
-        parameter_index++,
-        Primitive::GetType(shorty[shorty_pos]),
-        false);
-    ++shorty_pos;
-    entry_block->AddInstruction(parameter);
-    HLocal* local = GetLocalAt(locals_index++);
-    // Store the parameter value in the local that the dex code will use
-    // to reference that parameter.
-    entry_block->AddInstruction(new (arena_) HStoreLocal(local, parameter, local->GetDexPc()));
-    bool is_wide = (parameter->GetType() == Primitive::kPrimLong)
-        || (parameter->GetType() == Primitive::kPrimDouble);
-    if (is_wide) {
-      i++;
-      locals_index++;
-      parameter_index++;
-    }
-  }
-}
-
-template<typename T>
-void HGraphBuilder::If_22t(const Instruction& instruction, uint32_t dex_pc) {
-  HInstruction* first = LoadLocal(instruction.VRegA(), Primitive::kPrimInt, dex_pc);
-  HInstruction* second = LoadLocal(instruction.VRegB(), Primitive::kPrimInt, dex_pc);
-  T* comparison = new (arena_) T(first, second, dex_pc);
-  current_block_->AddInstruction(comparison);
-  HInstruction* ifinst = new (arena_) HIf(comparison, dex_pc);
-  current_block_->AddInstruction(ifinst);
-  current_block_ = nullptr;
-}
-
-template<typename T>
-void HGraphBuilder::If_21t(const Instruction& instruction, uint32_t dex_pc) {
-  HInstruction* value = LoadLocal(instruction.VRegA(), Primitive::kPrimInt, dex_pc);
-  T* comparison = new (arena_) T(value, graph_->GetIntConstant(0, dex_pc), dex_pc);
-  current_block_->AddInstruction(comparison);
-  HInstruction* ifinst = new (arena_) HIf(comparison, dex_pc);
-  current_block_->AddInstruction(ifinst);
-  current_block_ = nullptr;
-}
-
 void HGraphBuilder::MaybeRecordStat(MethodCompilationStat compilation_stat) {
   if (compilation_stats_ != nullptr) {
     compilation_stats_->RecordStat(compilation_stat);
@@ -163,84 +70,13 @@
   return false;
 }
 
-static bool BlockIsNotPopulated(HBasicBlock* block) {
-  if (!block->GetPhis().IsEmpty()) {
-    return false;
-  } else if (block->IsLoopHeader()) {
-    // Suspend checks were inserted into loop headers during building of dominator tree.
-    DCHECK(block->GetFirstInstruction()->IsSuspendCheck());
-    return block->GetFirstInstruction() == block->GetLastInstruction();
-  } else {
-    return block->GetInstructions().IsEmpty();
-  }
-}
-
-bool HGraphBuilder::GenerateInstructions() {
-  // Find locations where we want to generate extra stackmaps for native debugging.
-  // This allows us to generate the info only at interesting points (for example,
-  // at start of java statement) rather than before every dex instruction.
-  const bool native_debuggable = compiler_driver_ != nullptr &&
-                                 compiler_driver_->GetCompilerOptions().GetNativeDebuggable();
-  ArenaBitVector* native_debug_info_locations;
-  if (native_debuggable) {
-    const uint32_t num_instructions = code_item_.insns_size_in_code_units_;
-    native_debug_info_locations =
-        ArenaBitVector::Create(arena_, num_instructions, false, kArenaAllocGraphBuilder);
-    FindNativeDebugInfoLocations(native_debug_info_locations);
-  }
-
-  InitializeLocals(code_item_.registers_size_);
-  InitializeParameters(code_item_.ins_size_);
-
-  // Add the suspend check to the entry block.
-  current_block_ = graph_->GetEntryBlock();
-  current_block_->AddInstruction(new (arena_) HSuspendCheck(0));
-
-  for (CodeItemIterator it(code_item_); !it.Done(); it.Advance()) {
-    uint32_t dex_pc = it.CurrentDexPc();
-
-    HBasicBlock* next_block = FindBlockStartingAt(dex_pc);
-    if (next_block != nullptr && next_block->GetGraph() != nullptr) {
-      if (current_block_ != nullptr) {
-        // Branching instructions clear current_block, so we know
-        // the last instruction of the current block is not a branching
-        // instruction. We add an unconditional goto to the found block.
-        current_block_->AddInstruction(new (arena_) HGoto(dex_pc));
-      }
-      DCHECK(BlockIsNotPopulated(next_block));
-      current_block_ = next_block;
-    }
-
-    if (current_block_ == nullptr) {
-      // Unreachable code.
-      continue;
-    }
-
-    if (native_debuggable && native_debug_info_locations->IsBitSet(dex_pc)) {
-      current_block_->AddInstruction(new (arena_) HNativeDebugInfo(dex_pc));
-    }
-
-    if (!AnalyzeDexInstruction(it.CurrentInstruction(), dex_pc)) {
-      return false;
-    }
-  }
-
-  // Add Exit to the exit block.
-  HBasicBlock* exit_block = graph_->GetExitBlock();
-  if (exit_block == nullptr) {
-    // Unreachable exit block was removed.
-  } else {
-    exit_block->AddInstruction(new (arena_) HExit());
-  }
-
-  return true;
-}
-
-GraphAnalysisResult HGraphBuilder::BuildGraph(StackHandleScopeCollection* handles) {
+GraphAnalysisResult HGraphBuilder::BuildGraph() {
   DCHECK(graph_->GetBlocks().empty());
+
+  graph_->SetNumberOfVRegs(code_item_.registers_size_);
+  graph_->SetNumberOfInVRegs(code_item_.ins_size_);
   graph_->SetMaximumNumberOfOutVRegs(code_item_.outs_size_);
   graph_->SetHasTryCatch(code_item_.tries_size_ != 0);
-  graph_->InitializeInexactObjectRTI(handles);
 
   // 1) Create basic blocks and link them together. Basic blocks are left
   //    unpopulated with the exception of synthetic blocks, e.g. HTryBoundaries.
@@ -261,2264 +97,12 @@
   }
 
   // 4) Populate basic blocks with instructions.
-  if (!GenerateInstructions()) {
+  if (!instruction_builder_.Build()) {
     return kAnalysisInvalidBytecode;
   }
 
   // 5) Type the graph and eliminate dead/redundant phis.
-  return SsaBuilder(graph_, code_item_, handles).BuildSsa();
-}
-
-void HGraphBuilder::FindNativeDebugInfoLocations(ArenaBitVector* locations) {
-  // The callback gets called when the line number changes.
-  // In other words, it marks the start of new java statement.
-  struct Callback {
-    static bool Position(void* ctx, const DexFile::PositionInfo& entry) {
-      static_cast<ArenaBitVector*>(ctx)->SetBit(entry.address_);
-      return false;
-    }
-  };
-  dex_file_->DecodeDebugPositionInfo(&code_item_, Callback::Position, locations);
-  // Instruction-specific tweaks.
-  const Instruction* const begin = Instruction::At(code_item_.insns_);
-  const Instruction* const end = begin->RelativeAt(code_item_.insns_size_in_code_units_);
-  for (const Instruction* inst = begin; inst < end; inst = inst->Next()) {
-    switch (inst->Opcode()) {
-      case Instruction::MOVE_EXCEPTION: {
-        // Stop in native debugger after the exception has been moved.
-        // The compiler also expects the move at the start of basic block so
-        // we do not want to interfere by inserting native-debug-info before it.
-        locations->ClearBit(inst->GetDexPc(code_item_.insns_));
-        const Instruction* next = inst->Next();
-        if (next < end) {
-          locations->SetBit(next->GetDexPc(code_item_.insns_));
-        }
-        break;
-      }
-      default:
-        break;
-    }
-  }
-}
-
-template<typename T>
-void HGraphBuilder::Unop_12x(const Instruction& instruction,
-                             Primitive::Type type,
-                             uint32_t dex_pc) {
-  HInstruction* first = LoadLocal(instruction.VRegB(), type, dex_pc);
-  current_block_->AddInstruction(new (arena_) T(type, first, dex_pc));
-  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction(), dex_pc);
-}
-
-void HGraphBuilder::Conversion_12x(const Instruction& instruction,
-                                   Primitive::Type input_type,
-                                   Primitive::Type result_type,
-                                   uint32_t dex_pc) {
-  HInstruction* first = LoadLocal(instruction.VRegB(), input_type, dex_pc);
-  current_block_->AddInstruction(new (arena_) HTypeConversion(result_type, first, dex_pc));
-  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction(), dex_pc);
-}
-
-template<typename T>
-void HGraphBuilder::Binop_23x(const Instruction& instruction,
-                              Primitive::Type type,
-                              uint32_t dex_pc) {
-  HInstruction* first = LoadLocal(instruction.VRegB(), type, dex_pc);
-  HInstruction* second = LoadLocal(instruction.VRegC(), type, dex_pc);
-  current_block_->AddInstruction(new (arena_) T(type, first, second, dex_pc));
-  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction(), dex_pc);
-}
-
-template<typename T>
-void HGraphBuilder::Binop_23x_shift(const Instruction& instruction,
-                                    Primitive::Type type,
-                                    uint32_t dex_pc) {
-  HInstruction* first = LoadLocal(instruction.VRegB(), type, dex_pc);
-  HInstruction* second = LoadLocal(instruction.VRegC(), Primitive::kPrimInt, dex_pc);
-  current_block_->AddInstruction(new (arena_) T(type, first, second, dex_pc));
-  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction(), dex_pc);
-}
-
-void HGraphBuilder::Binop_23x_cmp(const Instruction& instruction,
-                                  Primitive::Type type,
-                                  ComparisonBias bias,
-                                  uint32_t dex_pc) {
-  HInstruction* first = LoadLocal(instruction.VRegB(), type, dex_pc);
-  HInstruction* second = LoadLocal(instruction.VRegC(), type, dex_pc);
-  current_block_->AddInstruction(new (arena_) HCompare(type, first, second, bias, dex_pc));
-  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction(), dex_pc);
-}
-
-template<typename T>
-void HGraphBuilder::Binop_12x_shift(const Instruction& instruction, Primitive::Type type,
-                                    uint32_t dex_pc) {
-  HInstruction* first = LoadLocal(instruction.VRegA(), type, dex_pc);
-  HInstruction* second = LoadLocal(instruction.VRegB(), Primitive::kPrimInt, dex_pc);
-  current_block_->AddInstruction(new (arena_) T(type, first, second, dex_pc));
-  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction(), dex_pc);
-}
-
-template<typename T>
-void HGraphBuilder::Binop_12x(const Instruction& instruction,
-                              Primitive::Type type,
-                              uint32_t dex_pc) {
-  HInstruction* first = LoadLocal(instruction.VRegA(), type, dex_pc);
-  HInstruction* second = LoadLocal(instruction.VRegB(), type, dex_pc);
-  current_block_->AddInstruction(new (arena_) T(type, first, second, dex_pc));
-  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction(), dex_pc);
-}
-
-template<typename T>
-void HGraphBuilder::Binop_22s(const Instruction& instruction, bool reverse, uint32_t dex_pc) {
-  HInstruction* first = LoadLocal(instruction.VRegB(), Primitive::kPrimInt, dex_pc);
-  HInstruction* second = graph_->GetIntConstant(instruction.VRegC_22s(), dex_pc);
-  if (reverse) {
-    std::swap(first, second);
-  }
-  current_block_->AddInstruction(new (arena_) T(Primitive::kPrimInt, first, second, dex_pc));
-  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction(), dex_pc);
-}
-
-template<typename T>
-void HGraphBuilder::Binop_22b(const Instruction& instruction, bool reverse, uint32_t dex_pc) {
-  HInstruction* first = LoadLocal(instruction.VRegB(), Primitive::kPrimInt, dex_pc);
-  HInstruction* second = graph_->GetIntConstant(instruction.VRegC_22b(), dex_pc);
-  if (reverse) {
-    std::swap(first, second);
-  }
-  current_block_->AddInstruction(new (arena_) T(Primitive::kPrimInt, first, second, dex_pc));
-  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction(), dex_pc);
-}
-
-static bool RequiresConstructorBarrier(const DexCompilationUnit* cu, const CompilerDriver& driver) {
-  Thread* self = Thread::Current();
-  return cu->IsConstructor()
-      && driver.RequiresConstructorBarrier(self, cu->GetDexFile(), cu->GetClassDefIndex());
-}
-
-// Returns true if `block` has only one successor which starts at the next
-// dex_pc after `instruction` at `dex_pc`.
-static bool IsFallthroughInstruction(const Instruction& instruction,
-                                     uint32_t dex_pc,
-                                     HBasicBlock* block) {
-  uint32_t next_dex_pc = dex_pc + instruction.SizeInCodeUnits();
-  return block->GetSingleSuccessor()->GetDexPc() == next_dex_pc;
-}
-
-void HGraphBuilder::BuildSwitch(const Instruction& instruction, uint32_t dex_pc) {
-  HInstruction* value = LoadLocal(instruction.VRegA(), Primitive::kPrimInt, dex_pc);
-  DexSwitchTable table(instruction, dex_pc);
-
-  if (table.GetNumEntries() == 0) {
-    // Empty Switch. Code falls through to the next block.
-    DCHECK(IsFallthroughInstruction(instruction, dex_pc, current_block_));
-    current_block_->AddInstruction(new (arena_) HGoto(dex_pc));
-  } else if (table.ShouldBuildDecisionTree()) {
-    for (DexSwitchTableIterator it(table); !it.Done(); it.Advance()) {
-      HInstruction* case_value = graph_->GetIntConstant(it.CurrentKey(), dex_pc);
-      HEqual* comparison = new (arena_) HEqual(value, case_value, dex_pc);
-      current_block_->AddInstruction(comparison);
-      HInstruction* ifinst = new (arena_) HIf(comparison, dex_pc);
-      current_block_->AddInstruction(ifinst);
-
-      if (!it.IsLast()) {
-        current_block_ = FindBlockStartingAt(it.GetDexPcForCurrentIndex());
-      }
-    }
-  } else {
-    current_block_->AddInstruction(
-        new (arena_) HPackedSwitch(table.GetEntryAt(0), table.GetNumEntries(), value, dex_pc));
-  }
-
-  current_block_ = nullptr;
-}
-
-void HGraphBuilder::BuildReturn(const Instruction& instruction,
-                                Primitive::Type type,
-                                uint32_t dex_pc) {
-  if (type == Primitive::kPrimVoid) {
-    if (graph_->ShouldGenerateConstructorBarrier()) {
-      // The compilation unit is null during testing.
-      if (dex_compilation_unit_ != nullptr) {
-        DCHECK(RequiresConstructorBarrier(dex_compilation_unit_, *compiler_driver_))
-          << "Inconsistent use of ShouldGenerateConstructorBarrier. Should not generate a barrier.";
-      }
-      current_block_->AddInstruction(new (arena_) HMemoryBarrier(kStoreStore, dex_pc));
-    }
-    current_block_->AddInstruction(new (arena_) HReturnVoid(dex_pc));
-  } else {
-    HInstruction* value = LoadLocal(instruction.VRegA(), type, dex_pc);
-    current_block_->AddInstruction(new (arena_) HReturn(value, dex_pc));
-  }
-  current_block_ = nullptr;
-}
-
-static InvokeType GetInvokeTypeFromOpCode(Instruction::Code opcode) {
-  switch (opcode) {
-    case Instruction::INVOKE_STATIC:
-    case Instruction::INVOKE_STATIC_RANGE:
-      return kStatic;
-    case Instruction::INVOKE_DIRECT:
-    case Instruction::INVOKE_DIRECT_RANGE:
-      return kDirect;
-    case Instruction::INVOKE_VIRTUAL:
-    case Instruction::INVOKE_VIRTUAL_QUICK:
-    case Instruction::INVOKE_VIRTUAL_RANGE:
-    case Instruction::INVOKE_VIRTUAL_RANGE_QUICK:
-      return kVirtual;
-    case Instruction::INVOKE_INTERFACE:
-    case Instruction::INVOKE_INTERFACE_RANGE:
-      return kInterface;
-    case Instruction::INVOKE_SUPER_RANGE:
-    case Instruction::INVOKE_SUPER:
-      return kSuper;
-    default:
-      LOG(FATAL) << "Unexpected invoke opcode: " << opcode;
-      UNREACHABLE();
-  }
-}
-
-ArtMethod* HGraphBuilder::ResolveMethod(uint16_t method_idx, InvokeType invoke_type) {
-  ScopedObjectAccess soa(Thread::Current());
-  StackHandleScope<3> hs(soa.Self());
-
-  ClassLinker* class_linker = dex_compilation_unit_->GetClassLinker();
-  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
-      soa.Decode<mirror::ClassLoader*>(dex_compilation_unit_->GetClassLoader())));
-  Handle<mirror::Class> compiling_class(hs.NewHandle(GetCompilingClass()));
-
-  ArtMethod* resolved_method = class_linker->ResolveMethod<ClassLinker::kForceICCECheck>(
-      *dex_compilation_unit_->GetDexFile(),
-      method_idx,
-      dex_compilation_unit_->GetDexCache(),
-      class_loader,
-      /* referrer */ nullptr,
-      invoke_type);
-
-  if (UNLIKELY(resolved_method == nullptr)) {
-    // Clean up any exception left by type resolution.
-    soa.Self()->ClearException();
-    return nullptr;
-  }
-
-  // Check access. The class linker has a fast path for looking into the dex cache
-  // and does not check the access if it hits it.
-  if (compiling_class.Get() == nullptr) {
-    if (!resolved_method->IsPublic()) {
-      return nullptr;
-    }
-  } else if (!compiling_class->CanAccessResolvedMethod(resolved_method->GetDeclaringClass(),
-                                                       resolved_method,
-                                                       dex_compilation_unit_->GetDexCache().Get(),
-                                                       method_idx)) {
-    return nullptr;
-  }
-
-  // We have to special case the invoke-super case, as ClassLinker::ResolveMethod does not.
-  // We need to look at the referrer's super class vtable. We need to do this to know if we need to
-  // make this an invoke-unresolved to handle cross-dex invokes or abstract super methods, both of
-  // which require runtime handling.
-  if (invoke_type == kSuper) {
-    if (compiling_class.Get() == nullptr) {
-      // We could not determine the method's class we need to wait until runtime.
-      DCHECK(Runtime::Current()->IsAotCompiler());
-      return nullptr;
-    }
-    ArtMethod* current_method = graph_->GetArtMethod();
-    DCHECK(current_method != nullptr);
-    Handle<mirror::Class> methods_class(hs.NewHandle(
-        dex_compilation_unit_->GetClassLinker()->ResolveReferencedClassOfMethod(Thread::Current(),
-                                                                                method_idx,
-                                                                                current_method)));
-    if (methods_class.Get() == nullptr) {
-      // Invoking a super method requires knowing the actual super class. If we did not resolve
-      // the compiling method's declaring class (which only happens for ahead of time
-      // compilation), bail out.
-      DCHECK(Runtime::Current()->IsAotCompiler());
-      return nullptr;
-    } else {
-      ArtMethod* actual_method;
-      if (methods_class->IsInterface()) {
-        actual_method = methods_class->FindVirtualMethodForInterfaceSuper(
-            resolved_method, class_linker->GetImagePointerSize());
-      } else {
-        uint16_t vtable_index = resolved_method->GetMethodIndex();
-        actual_method = compiling_class->GetSuperClass()->GetVTableEntry(
-            vtable_index, class_linker->GetImagePointerSize());
-      }
-      if (actual_method != resolved_method &&
-          !IsSameDexFile(*actual_method->GetDexFile(), *dex_compilation_unit_->GetDexFile())) {
-        // The back-end code generator relies on this check in order to ensure that it will not
-        // attempt to read the dex_cache with a dex_method_index that is not from the correct
-        // dex_file. If we didn't do this check then the dex_method_index will not be updated in the
-        // builder, which means that the code-generator (and compiler driver during sharpening and
-        // inliner, maybe) might invoke an incorrect method.
-        // TODO: The actual method could still be referenced in the current dex file, so we
-        //       could try locating it.
-        // TODO: Remove the dex_file restriction.
-        return nullptr;
-      }
-      if (!actual_method->IsInvokable()) {
-        // Fail if the actual method cannot be invoked. Otherwise, the runtime resolution stub
-        // could resolve the callee to the wrong method.
-        return nullptr;
-      }
-      resolved_method = actual_method;
-    }
-  }
-
-  // Check for incompatible class changes. The class linker has a fast path for
-  // looking into the dex cache and does not check incompatible class changes if it hits it.
-  if (resolved_method->CheckIncompatibleClassChange(invoke_type)) {
-    return nullptr;
-  }
-
-  return resolved_method;
-}
-
-bool HGraphBuilder::BuildInvoke(const Instruction& instruction,
-                                uint32_t dex_pc,
-                                uint32_t method_idx,
-                                uint32_t number_of_vreg_arguments,
-                                bool is_range,
-                                uint32_t* args,
-                                uint32_t register_index) {
-  InvokeType invoke_type = GetInvokeTypeFromOpCode(instruction.Opcode());
-  const char* descriptor = dex_file_->GetMethodShorty(method_idx);
-  Primitive::Type return_type = Primitive::GetType(descriptor[0]);
-
-  // Remove the return type from the 'proto'.
-  size_t number_of_arguments = strlen(descriptor) - 1;
-  if (invoke_type != kStatic) {  // instance call
-    // One extra argument for 'this'.
-    number_of_arguments++;
-  }
-
-  MethodReference target_method(dex_file_, method_idx);
-
-  // Special handling for string init.
-  int32_t string_init_offset = 0;
-  bool is_string_init = compiler_driver_->IsStringInit(method_idx,
-                                                       dex_file_,
-                                                       &string_init_offset);
-  // Replace calls to String.<init> with StringFactory.
-  if (is_string_init) {
-    HInvokeStaticOrDirect::DispatchInfo dispatch_info = {
-        HInvokeStaticOrDirect::MethodLoadKind::kStringInit,
-        HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod,
-        dchecked_integral_cast<uint64_t>(string_init_offset),
-        0U
-    };
-    HInvoke* invoke = new (arena_) HInvokeStaticOrDirect(
-        arena_,
-        number_of_arguments - 1,
-        Primitive::kPrimNot /*return_type */,
-        dex_pc,
-        method_idx,
-        target_method,
-        dispatch_info,
-        invoke_type,
-        kStatic /* optimized_invoke_type */,
-        HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit);
-    return HandleStringInit(invoke,
-                            number_of_vreg_arguments,
-                            args,
-                            register_index,
-                            is_range,
-                            descriptor);
-  }
-
-  ArtMethod* resolved_method = ResolveMethod(method_idx, invoke_type);
-
-  if (UNLIKELY(resolved_method == nullptr)) {
-    MaybeRecordStat(MethodCompilationStat::kUnresolvedMethod);
-    HInvoke* invoke = new (arena_) HInvokeUnresolved(arena_,
-                                                     number_of_arguments,
-                                                     return_type,
-                                                     dex_pc,
-                                                     method_idx,
-                                                     invoke_type);
-    return HandleInvoke(invoke,
-                        number_of_vreg_arguments,
-                        args,
-                        register_index,
-                        is_range,
-                        descriptor,
-                        nullptr /* clinit_check */);
-  }
-
-  // Potential class initialization check, in the case of a static method call.
-  HClinitCheck* clinit_check = nullptr;
-  HInvoke* invoke = nullptr;
-  if (invoke_type == kDirect || invoke_type == kStatic || invoke_type == kSuper) {
-    // By default, consider that the called method implicitly requires
-    // an initialization check of its declaring method.
-    HInvokeStaticOrDirect::ClinitCheckRequirement clinit_check_requirement
-        = HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit;
-    ScopedObjectAccess soa(Thread::Current());
-    if (invoke_type == kStatic) {
-      clinit_check = ProcessClinitCheckForInvoke(
-          dex_pc, resolved_method, method_idx, &clinit_check_requirement);
-    } else if (invoke_type == kSuper) {
-      if (IsSameDexFile(*resolved_method->GetDexFile(), *dex_compilation_unit_->GetDexFile())) {
-        // Update the target method to the one resolved. Note that this may be a no-op if
-        // we resolved to the method referenced by the instruction.
-        method_idx = resolved_method->GetDexMethodIndex();
-        target_method = MethodReference(dex_file_, method_idx);
-      }
-    }
-
-    HInvokeStaticOrDirect::DispatchInfo dispatch_info = {
-        HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod,
-        HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod,
-        0u,
-        0U
-    };
-    invoke = new (arena_) HInvokeStaticOrDirect(arena_,
-                                                number_of_arguments,
-                                                return_type,
-                                                dex_pc,
-                                                method_idx,
-                                                target_method,
-                                                dispatch_info,
-                                                invoke_type,
-                                                invoke_type,
-                                                clinit_check_requirement);
-  } else if (invoke_type == kVirtual) {
-    ScopedObjectAccess soa(Thread::Current());  // Needed for the method index
-    invoke = new (arena_) HInvokeVirtual(arena_,
-                                         number_of_arguments,
-                                         return_type,
-                                         dex_pc,
-                                         method_idx,
-                                         resolved_method->GetMethodIndex());
-  } else {
-    DCHECK_EQ(invoke_type, kInterface);
-    ScopedObjectAccess soa(Thread::Current());  // Needed for the method index
-    invoke = new (arena_) HInvokeInterface(arena_,
-                                           number_of_arguments,
-                                           return_type,
-                                           dex_pc,
-                                           method_idx,
-                                           resolved_method->GetDexMethodIndex());
-  }
-
-  return HandleInvoke(invoke,
-                      number_of_vreg_arguments,
-                      args,
-                      register_index,
-                      is_range,
-                      descriptor,
-                      clinit_check);
-}
-
-bool HGraphBuilder::BuildNewInstance(uint16_t type_index, uint32_t dex_pc) {
-  bool finalizable;
-  bool can_throw = NeedsAccessCheck(type_index, &finalizable);
-
-  // Only the non-resolved entrypoint handles the finalizable class case. If we
-  // need access checks, then we haven't resolved the method and the class may
-  // again be finalizable.
-  QuickEntrypointEnum entrypoint = (finalizable || can_throw)
-      ? kQuickAllocObject
-      : kQuickAllocObjectInitialized;
-
-  ScopedObjectAccess soa(Thread::Current());
-  StackHandleScope<3> hs(soa.Self());
-  Handle<mirror::DexCache> dex_cache(hs.NewHandle(
-      dex_compilation_unit_->GetClassLinker()->FindDexCache(
-          soa.Self(), *dex_compilation_unit_->GetDexFile())));
-  Handle<mirror::Class> resolved_class(hs.NewHandle(dex_cache->GetResolvedType(type_index)));
-  const DexFile& outer_dex_file = *outer_compilation_unit_->GetDexFile();
-  Handle<mirror::DexCache> outer_dex_cache(hs.NewHandle(
-      outer_compilation_unit_->GetClassLinker()->FindDexCache(soa.Self(), outer_dex_file)));
-
-  if (outer_dex_cache.Get() != dex_cache.Get()) {
-    // We currently do not support inlining allocations across dex files.
-    return false;
-  }
-
-  HLoadClass* load_class = new (arena_) HLoadClass(
-      graph_->GetCurrentMethod(),
-      type_index,
-      outer_dex_file,
-      IsOutermostCompilingClass(type_index),
-      dex_pc,
-      /*needs_access_check*/ can_throw,
-      compiler_driver_->CanAssumeTypeIsPresentInDexCache(outer_dex_file, type_index));
-
-  current_block_->AddInstruction(load_class);
-  HInstruction* cls = load_class;
-  if (!IsInitialized(resolved_class)) {
-    cls = new (arena_) HClinitCheck(load_class, dex_pc);
-    current_block_->AddInstruction(cls);
-  }
-
-  current_block_->AddInstruction(new (arena_) HNewInstance(
-      cls,
-      graph_->GetCurrentMethod(),
-      dex_pc,
-      type_index,
-      *dex_compilation_unit_->GetDexFile(),
-      can_throw,
-      finalizable,
-      entrypoint));
-  return true;
-}
-
-static bool IsSubClass(mirror::Class* to_test, mirror::Class* super_class)
-    SHARED_REQUIRES(Locks::mutator_lock_) {
-  return to_test != nullptr && !to_test->IsInterface() && to_test->IsSubClass(super_class);
-}
-
-bool HGraphBuilder::IsInitialized(Handle<mirror::Class> cls) const {
-  if (cls.Get() == nullptr) {
-    return false;
-  }
-
-  // `CanAssumeClassIsLoaded` will return true if we're JITting, or will
-  // check whether the class is in an image for the AOT compilation.
-  if (cls->IsInitialized() &&
-      compiler_driver_->CanAssumeClassIsLoaded(cls.Get())) {
-    return true;
-  }
-
-  if (IsSubClass(GetOutermostCompilingClass(), cls.Get())) {
-    return true;
-  }
-
-  // TODO: We should walk over the inlined methods, but we don't pass
-  //       that information to the builder.
-  if (IsSubClass(GetCompilingClass(), cls.Get())) {
-    return true;
-  }
-
-  return false;
-}
-
-HClinitCheck* HGraphBuilder::ProcessClinitCheckForInvoke(
-      uint32_t dex_pc,
-      ArtMethod* resolved_method,
-      uint32_t method_idx,
-      HInvokeStaticOrDirect::ClinitCheckRequirement* clinit_check_requirement) {
-  const DexFile& outer_dex_file = *outer_compilation_unit_->GetDexFile();
-  Thread* self = Thread::Current();
-  StackHandleScope<4> hs(self);
-  Handle<mirror::DexCache> dex_cache(hs.NewHandle(
-      dex_compilation_unit_->GetClassLinker()->FindDexCache(
-          self, *dex_compilation_unit_->GetDexFile())));
-  Handle<mirror::DexCache> outer_dex_cache(hs.NewHandle(
-      outer_compilation_unit_->GetClassLinker()->FindDexCache(
-          self, outer_dex_file)));
-  Handle<mirror::Class> outer_class(hs.NewHandle(GetOutermostCompilingClass()));
-  Handle<mirror::Class> resolved_method_class(hs.NewHandle(resolved_method->GetDeclaringClass()));
-
-  // The index at which the method's class is stored in the DexCache's type array.
-  uint32_t storage_index = DexFile::kDexNoIndex;
-  bool is_outer_class = (resolved_method->GetDeclaringClass() == outer_class.Get());
-  if (is_outer_class) {
-    storage_index = outer_class->GetDexTypeIndex();
-  } else if (outer_dex_cache.Get() == dex_cache.Get()) {
-    // Get `storage_index` from IsClassOfStaticMethodAvailableToReferrer.
-    compiler_driver_->IsClassOfStaticMethodAvailableToReferrer(outer_dex_cache.Get(),
-                                                               GetCompilingClass(),
-                                                               resolved_method,
-                                                               method_idx,
-                                                               &storage_index);
-  }
-
-  HClinitCheck* clinit_check = nullptr;
-
-  if (IsInitialized(resolved_method_class)) {
-    *clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kNone;
-  } else if (storage_index != DexFile::kDexNoIndex) {
-    *clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kExplicit;
-    HLoadClass* load_class = new (arena_) HLoadClass(
-        graph_->GetCurrentMethod(),
-        storage_index,
-        outer_dex_file,
-        is_outer_class,
-        dex_pc,
-        /*needs_access_check*/ false,
-        compiler_driver_->CanAssumeTypeIsPresentInDexCache(outer_dex_file, storage_index));
-    current_block_->AddInstruction(load_class);
-    clinit_check = new (arena_) HClinitCheck(load_class, dex_pc);
-    current_block_->AddInstruction(clinit_check);
-  }
-  return clinit_check;
-}
-
-bool HGraphBuilder::SetupInvokeArguments(HInvoke* invoke,
-                                         uint32_t number_of_vreg_arguments,
-                                         uint32_t* args,
-                                         uint32_t register_index,
-                                         bool is_range,
-                                         const char* descriptor,
-                                         size_t start_index,
-                                         size_t* argument_index) {
-  uint32_t descriptor_index = 1;  // Skip the return type.
-  uint32_t dex_pc = invoke->GetDexPc();
-
-  for (size_t i = start_index;
-       // Make sure we don't go over the expected arguments or over the number of
-       // dex registers given. If the instruction was seen as dead by the verifier,
-       // it hasn't been properly checked.
-       (i < number_of_vreg_arguments) && (*argument_index < invoke->GetNumberOfArguments());
-       i++, (*argument_index)++) {
-    Primitive::Type type = Primitive::GetType(descriptor[descriptor_index++]);
-    bool is_wide = (type == Primitive::kPrimLong) || (type == Primitive::kPrimDouble);
-    if (!is_range
-        && is_wide
-        && ((i + 1 == number_of_vreg_arguments) || (args[i] + 1 != args[i + 1]))) {
-      // Longs and doubles should be in pairs, that is, sequential registers. The verifier should
-      // reject any class where this is violated. However, the verifier only does these checks
-      // on non trivially dead instructions, so we just bailout the compilation.
-      VLOG(compiler) << "Did not compile "
-                     << PrettyMethod(dex_compilation_unit_->GetDexMethodIndex(), *dex_file_)
-                     << " because of non-sequential dex register pair in wide argument";
-      MaybeRecordStat(MethodCompilationStat::kNotCompiledMalformedOpcode);
-      return false;
-    }
-    HInstruction* arg = LoadLocal(is_range ? register_index + i : args[i], type, dex_pc);
-    invoke->SetArgumentAt(*argument_index, arg);
-    if (is_wide) {
-      i++;
-    }
-  }
-
-  if (*argument_index != invoke->GetNumberOfArguments()) {
-    VLOG(compiler) << "Did not compile "
-                   << PrettyMethod(dex_compilation_unit_->GetDexMethodIndex(), *dex_file_)
-                   << " because of wrong number of arguments in invoke instruction";
-    MaybeRecordStat(MethodCompilationStat::kNotCompiledMalformedOpcode);
-    return false;
-  }
-
-  if (invoke->IsInvokeStaticOrDirect() &&
-      HInvokeStaticOrDirect::NeedsCurrentMethodInput(
-          invoke->AsInvokeStaticOrDirect()->GetMethodLoadKind())) {
-    invoke->SetArgumentAt(*argument_index, graph_->GetCurrentMethod());
-    (*argument_index)++;
-  }
-
-  return true;
-}
-
-bool HGraphBuilder::HandleInvoke(HInvoke* invoke,
-                                 uint32_t number_of_vreg_arguments,
-                                 uint32_t* args,
-                                 uint32_t register_index,
-                                 bool is_range,
-                                 const char* descriptor,
-                                 HClinitCheck* clinit_check) {
-  DCHECK(!invoke->IsInvokeStaticOrDirect() || !invoke->AsInvokeStaticOrDirect()->IsStringInit());
-
-  size_t start_index = 0;
-  size_t argument_index = 0;
-  if (invoke->GetOriginalInvokeType() != InvokeType::kStatic) {  // Instance call.
-    HInstruction* arg = LoadLocal(
-        is_range ? register_index : args[0], Primitive::kPrimNot, invoke->GetDexPc());
-    HNullCheck* null_check = new (arena_) HNullCheck(arg, invoke->GetDexPc());
-    current_block_->AddInstruction(null_check);
-    invoke->SetArgumentAt(0, null_check);
-    start_index = 1;
-    argument_index = 1;
-  }
-
-  if (!SetupInvokeArguments(invoke,
-                            number_of_vreg_arguments,
-                            args,
-                            register_index,
-                            is_range,
-                            descriptor,
-                            start_index,
-                            &argument_index)) {
-    return false;
-  }
-
-  if (clinit_check != nullptr) {
-    // Add the class initialization check as last input of `invoke`.
-    DCHECK(invoke->IsInvokeStaticOrDirect());
-    DCHECK(invoke->AsInvokeStaticOrDirect()->GetClinitCheckRequirement()
-        == HInvokeStaticOrDirect::ClinitCheckRequirement::kExplicit);
-    invoke->SetArgumentAt(argument_index, clinit_check);
-    argument_index++;
-  }
-
-  current_block_->AddInstruction(invoke);
-  latest_result_ = invoke;
-
-  return true;
-}
-
-bool HGraphBuilder::HandleStringInit(HInvoke* invoke,
-                                     uint32_t number_of_vreg_arguments,
-                                     uint32_t* args,
-                                     uint32_t register_index,
-                                     bool is_range,
-                                     const char* descriptor) {
-  DCHECK(invoke->IsInvokeStaticOrDirect());
-  DCHECK(invoke->AsInvokeStaticOrDirect()->IsStringInit());
-
-  size_t start_index = 1;
-  size_t argument_index = 0;
-  if (!SetupInvokeArguments(invoke,
-                            number_of_vreg_arguments,
-                            args,
-                            register_index,
-                            is_range,
-                            descriptor,
-                            start_index,
-                            &argument_index)) {
-    return false;
-  }
-
-  // Add move-result for StringFactory method.
-  uint32_t orig_this_reg = is_range ? register_index : args[0];
-  HInstruction* new_instance = LoadLocal(orig_this_reg, Primitive::kPrimNot, invoke->GetDexPc());
-  invoke->SetArgumentAt(argument_index, new_instance);
-  current_block_->AddInstruction(invoke);
-
-  latest_result_ = invoke;
-  return true;
-}
-
-static Primitive::Type GetFieldAccessType(const DexFile& dex_file, uint16_t field_index) {
-  const DexFile::FieldId& field_id = dex_file.GetFieldId(field_index);
-  const char* type = dex_file.GetFieldTypeDescriptor(field_id);
-  return Primitive::GetType(type[0]);
-}
-
-bool HGraphBuilder::BuildInstanceFieldAccess(const Instruction& instruction,
-                                             uint32_t dex_pc,
-                                             bool is_put) {
-  uint32_t source_or_dest_reg = instruction.VRegA_22c();
-  uint32_t obj_reg = instruction.VRegB_22c();
-  uint16_t field_index;
-  if (instruction.IsQuickened()) {
-    if (!CanDecodeQuickenedInfo()) {
-      return false;
-    }
-    field_index = LookupQuickenedInfo(dex_pc);
-  } else {
-    field_index = instruction.VRegC_22c();
-  }
-
-  ScopedObjectAccess soa(Thread::Current());
-  ArtField* resolved_field =
-      compiler_driver_->ComputeInstanceFieldInfo(field_index, dex_compilation_unit_, is_put, soa);
-
-
-  HInstruction* object = LoadLocal(obj_reg, Primitive::kPrimNot, dex_pc);
-  HInstruction* null_check = new (arena_) HNullCheck(object, dex_pc);
-  current_block_->AddInstruction(null_check);
-
-  Primitive::Type field_type = (resolved_field == nullptr)
-      ? GetFieldAccessType(*dex_file_, field_index)
-      : resolved_field->GetTypeAsPrimitiveType();
-  if (is_put) {
-    HInstruction* value = LoadLocal(source_or_dest_reg, field_type, dex_pc);
-    HInstruction* field_set = nullptr;
-    if (resolved_field == nullptr) {
-      MaybeRecordStat(MethodCompilationStat::kUnresolvedField);
-      field_set = new (arena_) HUnresolvedInstanceFieldSet(null_check,
-                                                           value,
-                                                           field_type,
-                                                           field_index,
-                                                           dex_pc);
-    } else {
-      uint16_t class_def_index = resolved_field->GetDeclaringClass()->GetDexClassDefIndex();
-      field_set = new (arena_) HInstanceFieldSet(null_check,
-                                                 value,
-                                                 field_type,
-                                                 resolved_field->GetOffset(),
-                                                 resolved_field->IsVolatile(),
-                                                 field_index,
-                                                 class_def_index,
-                                                 *dex_file_,
-                                                 dex_compilation_unit_->GetDexCache(),
-                                                 dex_pc);
-    }
-    current_block_->AddInstruction(field_set);
-  } else {
-    HInstruction* field_get = nullptr;
-    if (resolved_field == nullptr) {
-      MaybeRecordStat(MethodCompilationStat::kUnresolvedField);
-      field_get = new (arena_) HUnresolvedInstanceFieldGet(null_check,
-                                                           field_type,
-                                                           field_index,
-                                                           dex_pc);
-    } else {
-      uint16_t class_def_index = resolved_field->GetDeclaringClass()->GetDexClassDefIndex();
-      field_get = new (arena_) HInstanceFieldGet(null_check,
-                                                 field_type,
-                                                 resolved_field->GetOffset(),
-                                                 resolved_field->IsVolatile(),
-                                                 field_index,
-                                                 class_def_index,
-                                                 *dex_file_,
-                                                 dex_compilation_unit_->GetDexCache(),
-                                                 dex_pc);
-    }
-    current_block_->AddInstruction(field_get);
-    UpdateLocal(source_or_dest_reg, field_get, dex_pc);
-  }
-
-  return true;
-}
-
-static mirror::Class* GetClassFrom(CompilerDriver* driver,
-                                   const DexCompilationUnit& compilation_unit) {
-  ScopedObjectAccess soa(Thread::Current());
-  StackHandleScope<2> hs(soa.Self());
-  const DexFile& dex_file = *compilation_unit.GetDexFile();
-  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
-      soa.Decode<mirror::ClassLoader*>(compilation_unit.GetClassLoader())));
-  Handle<mirror::DexCache> dex_cache(hs.NewHandle(
-      compilation_unit.GetClassLinker()->FindDexCache(soa.Self(), dex_file)));
-
-  return driver->ResolveCompilingMethodsClass(soa, dex_cache, class_loader, &compilation_unit);
-}
-
-mirror::Class* HGraphBuilder::GetOutermostCompilingClass() const {
-  return GetClassFrom(compiler_driver_, *outer_compilation_unit_);
-}
-
-mirror::Class* HGraphBuilder::GetCompilingClass() const {
-  return GetClassFrom(compiler_driver_, *dex_compilation_unit_);
-}
-
-bool HGraphBuilder::IsOutermostCompilingClass(uint16_t type_index) const {
-  ScopedObjectAccess soa(Thread::Current());
-  StackHandleScope<4> hs(soa.Self());
-  Handle<mirror::DexCache> dex_cache(hs.NewHandle(
-      dex_compilation_unit_->GetClassLinker()->FindDexCache(
-          soa.Self(), *dex_compilation_unit_->GetDexFile())));
-  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
-      soa.Decode<mirror::ClassLoader*>(dex_compilation_unit_->GetClassLoader())));
-  Handle<mirror::Class> cls(hs.NewHandle(compiler_driver_->ResolveClass(
-      soa, dex_cache, class_loader, type_index, dex_compilation_unit_)));
-  Handle<mirror::Class> outer_class(hs.NewHandle(GetOutermostCompilingClass()));
-
-  // GetOutermostCompilingClass returns null when the class is unresolved
-  // (e.g. if it derives from an unresolved class). This is bogus knowing that
-  // we are compiling it.
-  // When this happens we cannot establish a direct relation between the current
-  // class and the outer class, so we return false.
-  // (Note that this is only used for optimizing invokes and field accesses)
-  return (cls.Get() != nullptr) && (outer_class.Get() == cls.Get());
-}
-
-void HGraphBuilder::BuildUnresolvedStaticFieldAccess(const Instruction& instruction,
-                                                     uint32_t dex_pc,
-                                                     bool is_put,
-                                                     Primitive::Type field_type) {
-  uint32_t source_or_dest_reg = instruction.VRegA_21c();
-  uint16_t field_index = instruction.VRegB_21c();
-
-  if (is_put) {
-    HInstruction* value = LoadLocal(source_or_dest_reg, field_type, dex_pc);
-    current_block_->AddInstruction(
-        new (arena_) HUnresolvedStaticFieldSet(value, field_type, field_index, dex_pc));
-  } else {
-    current_block_->AddInstruction(
-        new (arena_) HUnresolvedStaticFieldGet(field_type, field_index, dex_pc));
-    UpdateLocal(source_or_dest_reg, current_block_->GetLastInstruction(), dex_pc);
-  }
-}
-bool HGraphBuilder::BuildStaticFieldAccess(const Instruction& instruction,
-                                           uint32_t dex_pc,
-                                           bool is_put) {
-  uint32_t source_or_dest_reg = instruction.VRegA_21c();
-  uint16_t field_index = instruction.VRegB_21c();
-
-  ScopedObjectAccess soa(Thread::Current());
-  StackHandleScope<5> hs(soa.Self());
-  Handle<mirror::DexCache> dex_cache(hs.NewHandle(
-      dex_compilation_unit_->GetClassLinker()->FindDexCache(
-          soa.Self(), *dex_compilation_unit_->GetDexFile())));
-  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
-      soa.Decode<mirror::ClassLoader*>(dex_compilation_unit_->GetClassLoader())));
-  ArtField* resolved_field = compiler_driver_->ResolveField(
-      soa, dex_cache, class_loader, dex_compilation_unit_, field_index, true);
-
-  if (resolved_field == nullptr) {
-    MaybeRecordStat(MethodCompilationStat::kUnresolvedField);
-    Primitive::Type field_type = GetFieldAccessType(*dex_file_, field_index);
-    BuildUnresolvedStaticFieldAccess(instruction, dex_pc, is_put, field_type);
-    return true;
-  }
-
-  Primitive::Type field_type = resolved_field->GetTypeAsPrimitiveType();
-  const DexFile& outer_dex_file = *outer_compilation_unit_->GetDexFile();
-  Handle<mirror::DexCache> outer_dex_cache(hs.NewHandle(
-      outer_compilation_unit_->GetClassLinker()->FindDexCache(soa.Self(), outer_dex_file)));
-  Handle<mirror::Class> outer_class(hs.NewHandle(GetOutermostCompilingClass()));
-
-  // The index at which the field's class is stored in the DexCache's type array.
-  uint32_t storage_index;
-  bool is_outer_class = (outer_class.Get() == resolved_field->GetDeclaringClass());
-  if (is_outer_class) {
-    storage_index = outer_class->GetDexTypeIndex();
-  } else if (outer_dex_cache.Get() != dex_cache.Get()) {
-    // The compiler driver cannot currently understand multiple dex caches involved. Just bailout.
-    return false;
-  } else {
-    // TODO: This is rather expensive. Perf it and cache the results if needed.
-    std::pair<bool, bool> pair = compiler_driver_->IsFastStaticField(
-        outer_dex_cache.Get(),
-        GetCompilingClass(),
-        resolved_field,
-        field_index,
-        &storage_index);
-    bool can_easily_access = is_put ? pair.second : pair.first;
-    if (!can_easily_access) {
-      MaybeRecordStat(MethodCompilationStat::kUnresolvedFieldNotAFastAccess);
-      BuildUnresolvedStaticFieldAccess(instruction, dex_pc, is_put, field_type);
-      return true;
-    }
-  }
-
-  bool is_in_cache =
-      compiler_driver_->CanAssumeTypeIsPresentInDexCache(outer_dex_file, storage_index);
-  HLoadClass* constant = new (arena_) HLoadClass(graph_->GetCurrentMethod(),
-                                                 storage_index,
-                                                 outer_dex_file,
-                                                 is_outer_class,
-                                                 dex_pc,
-                                                 /*needs_access_check*/ false,
-                                                 is_in_cache);
-  current_block_->AddInstruction(constant);
-
-  HInstruction* cls = constant;
-
-  Handle<mirror::Class> klass(hs.NewHandle(resolved_field->GetDeclaringClass()));
-  if (!IsInitialized(klass)) {
-    cls = new (arena_) HClinitCheck(constant, dex_pc);
-    current_block_->AddInstruction(cls);
-  }
-
-  uint16_t class_def_index = klass->GetDexClassDefIndex();
-  if (is_put) {
-    // We need to keep the class alive before loading the value.
-    HInstruction* value = LoadLocal(source_or_dest_reg, field_type, dex_pc);
-    DCHECK_EQ(value->GetType(), field_type);
-    current_block_->AddInstruction(new (arena_) HStaticFieldSet(cls,
-                                                                value,
-                                                                field_type,
-                                                                resolved_field->GetOffset(),
-                                                                resolved_field->IsVolatile(),
-                                                                field_index,
-                                                                class_def_index,
-                                                                *dex_file_,
-                                                                dex_cache_,
-                                                                dex_pc));
-  } else {
-    current_block_->AddInstruction(new (arena_) HStaticFieldGet(cls,
-                                                                field_type,
-                                                                resolved_field->GetOffset(),
-                                                                resolved_field->IsVolatile(),
-                                                                field_index,
-                                                                class_def_index,
-                                                                *dex_file_,
-                                                                dex_cache_,
-                                                                dex_pc));
-    UpdateLocal(source_or_dest_reg, current_block_->GetLastInstruction(), dex_pc);
-  }
-  return true;
-}
-
-void HGraphBuilder::BuildCheckedDivRem(uint16_t out_vreg,
-                                       uint16_t first_vreg,
-                                       int64_t second_vreg_or_constant,
-                                       uint32_t dex_pc,
-                                       Primitive::Type type,
-                                       bool second_is_constant,
-                                       bool isDiv) {
-  DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
-
-  HInstruction* first = LoadLocal(first_vreg, type, dex_pc);
-  HInstruction* second = nullptr;
-  if (second_is_constant) {
-    if (type == Primitive::kPrimInt) {
-      second = graph_->GetIntConstant(second_vreg_or_constant, dex_pc);
-    } else {
-      second = graph_->GetLongConstant(second_vreg_or_constant, dex_pc);
-    }
-  } else {
-    second = LoadLocal(second_vreg_or_constant, type, dex_pc);
-  }
-
-  if (!second_is_constant
-      || (type == Primitive::kPrimInt && second->AsIntConstant()->GetValue() == 0)
-      || (type == Primitive::kPrimLong && second->AsLongConstant()->GetValue() == 0)) {
-    second = new (arena_) HDivZeroCheck(second, dex_pc);
-    current_block_->AddInstruction(second);
-  }
-
-  if (isDiv) {
-    current_block_->AddInstruction(new (arena_) HDiv(type, first, second, dex_pc));
-  } else {
-    current_block_->AddInstruction(new (arena_) HRem(type, first, second, dex_pc));
-  }
-  UpdateLocal(out_vreg, current_block_->GetLastInstruction(), dex_pc);
-}
-
-void HGraphBuilder::BuildArrayAccess(const Instruction& instruction,
-                                     uint32_t dex_pc,
-                                     bool is_put,
-                                     Primitive::Type anticipated_type) {
-  uint8_t source_or_dest_reg = instruction.VRegA_23x();
-  uint8_t array_reg = instruction.VRegB_23x();
-  uint8_t index_reg = instruction.VRegC_23x();
-
-  HInstruction* object = LoadLocal(array_reg, Primitive::kPrimNot, dex_pc);
-  object = new (arena_) HNullCheck(object, dex_pc);
-  current_block_->AddInstruction(object);
-
-  HInstruction* length = new (arena_) HArrayLength(object, dex_pc);
-  current_block_->AddInstruction(length);
-  HInstruction* index = LoadLocal(index_reg, Primitive::kPrimInt, dex_pc);
-  index = new (arena_) HBoundsCheck(index, length, dex_pc);
-  current_block_->AddInstruction(index);
-  if (is_put) {
-    HInstruction* value = LoadLocal(source_or_dest_reg, anticipated_type, dex_pc);
-    // TODO: Insert a type check node if the type is Object.
-    current_block_->AddInstruction(new (arena_) HArraySet(
-        object, index, value, anticipated_type, dex_pc));
-  } else {
-    current_block_->AddInstruction(new (arena_) HArrayGet(object, index, anticipated_type, dex_pc));
-    UpdateLocal(source_or_dest_reg, current_block_->GetLastInstruction(), dex_pc);
-  }
-  graph_->SetHasBoundsChecks(true);
-}
-
-void HGraphBuilder::BuildFilledNewArray(uint32_t dex_pc,
-                                        uint32_t type_index,
-                                        uint32_t number_of_vreg_arguments,
-                                        bool is_range,
-                                        uint32_t* args,
-                                        uint32_t register_index) {
-  HInstruction* length = graph_->GetIntConstant(number_of_vreg_arguments, dex_pc);
-  bool finalizable;
-  QuickEntrypointEnum entrypoint = NeedsAccessCheck(type_index, &finalizable)
-      ? kQuickAllocArrayWithAccessCheck
-      : kQuickAllocArray;
-  HInstruction* object = new (arena_) HNewArray(length,
-                                                graph_->GetCurrentMethod(),
-                                                dex_pc,
-                                                type_index,
-                                                *dex_compilation_unit_->GetDexFile(),
-                                                entrypoint);
-  current_block_->AddInstruction(object);
-
-  const char* descriptor = dex_file_->StringByTypeIdx(type_index);
-  DCHECK_EQ(descriptor[0], '[') << descriptor;
-  char primitive = descriptor[1];
-  DCHECK(primitive == 'I'
-      || primitive == 'L'
-      || primitive == '[') << descriptor;
-  bool is_reference_array = (primitive == 'L') || (primitive == '[');
-  Primitive::Type type = is_reference_array ? Primitive::kPrimNot : Primitive::kPrimInt;
-
-  for (size_t i = 0; i < number_of_vreg_arguments; ++i) {
-    HInstruction* value = LoadLocal(is_range ? register_index + i : args[i], type, dex_pc);
-    HInstruction* index = graph_->GetIntConstant(i, dex_pc);
-    current_block_->AddInstruction(
-        new (arena_) HArraySet(object, index, value, type, dex_pc));
-  }
-  latest_result_ = object;
-}
-
-template <typename T>
-void HGraphBuilder::BuildFillArrayData(HInstruction* object,
-                                       const T* data,
-                                       uint32_t element_count,
-                                       Primitive::Type anticipated_type,
-                                       uint32_t dex_pc) {
-  for (uint32_t i = 0; i < element_count; ++i) {
-    HInstruction* index = graph_->GetIntConstant(i, dex_pc);
-    HInstruction* value = graph_->GetIntConstant(data[i], dex_pc);
-    current_block_->AddInstruction(new (arena_) HArraySet(
-      object, index, value, anticipated_type, dex_pc));
-  }
-}
-
-void HGraphBuilder::BuildFillArrayData(const Instruction& instruction, uint32_t dex_pc) {
-  HInstruction* array = LoadLocal(instruction.VRegA_31t(), Primitive::kPrimNot, dex_pc);
-  HNullCheck* null_check = new (arena_) HNullCheck(array, dex_pc);
-  current_block_->AddInstruction(null_check);
-
-  HInstruction* length = new (arena_) HArrayLength(null_check, dex_pc);
-  current_block_->AddInstruction(length);
-
-  int32_t payload_offset = instruction.VRegB_31t() + dex_pc;
-  const Instruction::ArrayDataPayload* payload =
-      reinterpret_cast<const Instruction::ArrayDataPayload*>(code_start_ + payload_offset);
-  const uint8_t* data = payload->data;
-  uint32_t element_count = payload->element_count;
-
-  // Implementation of this DEX instruction seems to be that the bounds check is
-  // done before doing any stores.
-  HInstruction* last_index = graph_->GetIntConstant(payload->element_count - 1, dex_pc);
-  current_block_->AddInstruction(new (arena_) HBoundsCheck(last_index, length, dex_pc));
-
-  switch (payload->element_width) {
-    case 1:
-      BuildFillArrayData(null_check,
-                         reinterpret_cast<const int8_t*>(data),
-                         element_count,
-                         Primitive::kPrimByte,
-                         dex_pc);
-      break;
-    case 2:
-      BuildFillArrayData(null_check,
-                         reinterpret_cast<const int16_t*>(data),
-                         element_count,
-                         Primitive::kPrimShort,
-                         dex_pc);
-      break;
-    case 4:
-      BuildFillArrayData(null_check,
-                         reinterpret_cast<const int32_t*>(data),
-                         element_count,
-                         Primitive::kPrimInt,
-                         dex_pc);
-      break;
-    case 8:
-      BuildFillWideArrayData(null_check,
-                             reinterpret_cast<const int64_t*>(data),
-                             element_count,
-                             dex_pc);
-      break;
-    default:
-      LOG(FATAL) << "Unknown element width for " << payload->element_width;
-  }
-  graph_->SetHasBoundsChecks(true);
-}
-
-void HGraphBuilder::BuildFillWideArrayData(HInstruction* object,
-                                           const int64_t* data,
-                                           uint32_t element_count,
-                                           uint32_t dex_pc) {
-  for (uint32_t i = 0; i < element_count; ++i) {
-    HInstruction* index = graph_->GetIntConstant(i, dex_pc);
-    HInstruction* value = graph_->GetLongConstant(data[i], dex_pc);
-    current_block_->AddInstruction(new (arena_) HArraySet(
-      object, index, value, Primitive::kPrimLong, dex_pc));
-  }
-}
-
-static TypeCheckKind ComputeTypeCheckKind(Handle<mirror::Class> cls)
-    SHARED_REQUIRES(Locks::mutator_lock_) {
-  if (cls.Get() == nullptr) {
-    return TypeCheckKind::kUnresolvedCheck;
-  } else if (cls->IsInterface()) {
-    return TypeCheckKind::kInterfaceCheck;
-  } else if (cls->IsArrayClass()) {
-    if (cls->GetComponentType()->IsObjectClass()) {
-      return TypeCheckKind::kArrayObjectCheck;
-    } else if (cls->CannotBeAssignedFromOtherTypes()) {
-      return TypeCheckKind::kExactCheck;
-    } else {
-      return TypeCheckKind::kArrayCheck;
-    }
-  } else if (cls->IsFinal()) {
-    return TypeCheckKind::kExactCheck;
-  } else if (cls->IsAbstract()) {
-    return TypeCheckKind::kAbstractClassCheck;
-  } else {
-    return TypeCheckKind::kClassHierarchyCheck;
-  }
-}
-
-void HGraphBuilder::BuildTypeCheck(const Instruction& instruction,
-                                   uint8_t destination,
-                                   uint8_t reference,
-                                   uint16_t type_index,
-                                   uint32_t dex_pc) {
-  bool type_known_final, type_known_abstract, use_declaring_class;
-  bool can_access = compiler_driver_->CanAccessTypeWithoutChecks(
-      dex_compilation_unit_->GetDexMethodIndex(),
-      *dex_compilation_unit_->GetDexFile(),
-      type_index,
-      &type_known_final,
-      &type_known_abstract,
-      &use_declaring_class);
-
-  ScopedObjectAccess soa(Thread::Current());
-  StackHandleScope<2> hs(soa.Self());
-  const DexFile& dex_file = *dex_compilation_unit_->GetDexFile();
-  Handle<mirror::DexCache> dex_cache(hs.NewHandle(
-      dex_compilation_unit_->GetClassLinker()->FindDexCache(soa.Self(), dex_file)));
-  Handle<mirror::Class> resolved_class(hs.NewHandle(dex_cache->GetResolvedType(type_index)));
-
-  HInstruction* object = LoadLocal(reference, Primitive::kPrimNot, dex_pc);
-  HLoadClass* cls = new (arena_) HLoadClass(
-      graph_->GetCurrentMethod(),
-      type_index,
-      dex_file,
-      IsOutermostCompilingClass(type_index),
-      dex_pc,
-      !can_access,
-      compiler_driver_->CanAssumeTypeIsPresentInDexCache(dex_file, type_index));
-  current_block_->AddInstruction(cls);
-
-  TypeCheckKind check_kind = ComputeTypeCheckKind(resolved_class);
-  if (instruction.Opcode() == Instruction::INSTANCE_OF) {
-    current_block_->AddInstruction(new (arena_) HInstanceOf(object, cls, check_kind, dex_pc));
-    UpdateLocal(destination, current_block_->GetLastInstruction(), dex_pc);
-  } else {
-    DCHECK_EQ(instruction.Opcode(), Instruction::CHECK_CAST);
-    // We emit a CheckCast followed by a BoundType. CheckCast is a statement
-    // which may throw. If it succeeds BoundType sets the new type of `object`
-    // for all subsequent uses.
-    current_block_->AddInstruction(new (arena_) HCheckCast(object, cls, check_kind, dex_pc));
-    current_block_->AddInstruction(new (arena_) HBoundType(object, dex_pc));
-    UpdateLocal(reference, current_block_->GetLastInstruction(), dex_pc);
-  }
-}
-
-bool HGraphBuilder::NeedsAccessCheck(uint32_t type_index, bool* finalizable) const {
-  return !compiler_driver_->CanAccessInstantiableTypeWithoutChecks(
-      dex_compilation_unit_->GetDexMethodIndex(), *dex_file_, type_index, finalizable);
-}
-
-bool HGraphBuilder::CanDecodeQuickenedInfo() const {
-  return interpreter_metadata_ != nullptr;
-}
-
-uint16_t HGraphBuilder::LookupQuickenedInfo(uint32_t dex_pc) {
-  DCHECK(interpreter_metadata_ != nullptr);
-  uint32_t dex_pc_in_map = DecodeUnsignedLeb128(&interpreter_metadata_);
-  DCHECK_EQ(dex_pc, dex_pc_in_map);
-  return DecodeUnsignedLeb128(&interpreter_metadata_);
-}
-
-bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32_t dex_pc) {
-  switch (instruction.Opcode()) {
-    case Instruction::CONST_4: {
-      int32_t register_index = instruction.VRegA();
-      HIntConstant* constant = graph_->GetIntConstant(instruction.VRegB_11n(), dex_pc);
-      UpdateLocal(register_index, constant, dex_pc);
-      break;
-    }
-
-    case Instruction::CONST_16: {
-      int32_t register_index = instruction.VRegA();
-      HIntConstant* constant = graph_->GetIntConstant(instruction.VRegB_21s(), dex_pc);
-      UpdateLocal(register_index, constant, dex_pc);
-      break;
-    }
-
-    case Instruction::CONST: {
-      int32_t register_index = instruction.VRegA();
-      HIntConstant* constant = graph_->GetIntConstant(instruction.VRegB_31i(), dex_pc);
-      UpdateLocal(register_index, constant, dex_pc);
-      break;
-    }
-
-    case Instruction::CONST_HIGH16: {
-      int32_t register_index = instruction.VRegA();
-      HIntConstant* constant = graph_->GetIntConstant(instruction.VRegB_21h() << 16, dex_pc);
-      UpdateLocal(register_index, constant, dex_pc);
-      break;
-    }
-
-    case Instruction::CONST_WIDE_16: {
-      int32_t register_index = instruction.VRegA();
-      // Get 16 bits of constant value, sign extended to 64 bits.
-      int64_t value = instruction.VRegB_21s();
-      value <<= 48;
-      value >>= 48;
-      HLongConstant* constant = graph_->GetLongConstant(value, dex_pc);
-      UpdateLocal(register_index, constant, dex_pc);
-      break;
-    }
-
-    case Instruction::CONST_WIDE_32: {
-      int32_t register_index = instruction.VRegA();
-      // Get 32 bits of constant value, sign extended to 64 bits.
-      int64_t value = instruction.VRegB_31i();
-      value <<= 32;
-      value >>= 32;
-      HLongConstant* constant = graph_->GetLongConstant(value, dex_pc);
-      UpdateLocal(register_index, constant, dex_pc);
-      break;
-    }
-
-    case Instruction::CONST_WIDE: {
-      int32_t register_index = instruction.VRegA();
-      HLongConstant* constant = graph_->GetLongConstant(instruction.VRegB_51l(), dex_pc);
-      UpdateLocal(register_index, constant, dex_pc);
-      break;
-    }
-
-    case Instruction::CONST_WIDE_HIGH16: {
-      int32_t register_index = instruction.VRegA();
-      int64_t value = static_cast<int64_t>(instruction.VRegB_21h()) << 48;
-      HLongConstant* constant = graph_->GetLongConstant(value, dex_pc);
-      UpdateLocal(register_index, constant, dex_pc);
-      break;
-    }
-
-    // Note that the SSA building will refine the types.
-    case Instruction::MOVE:
-    case Instruction::MOVE_FROM16:
-    case Instruction::MOVE_16: {
-      HInstruction* value = LoadLocal(instruction.VRegB(), Primitive::kPrimInt, dex_pc);
-      UpdateLocal(instruction.VRegA(), value, dex_pc);
-      break;
-    }
-
-    // Note that the SSA building will refine the types.
-    case Instruction::MOVE_WIDE:
-    case Instruction::MOVE_WIDE_FROM16:
-    case Instruction::MOVE_WIDE_16: {
-      HInstruction* value = LoadLocal(instruction.VRegB(), Primitive::kPrimLong, dex_pc);
-      UpdateLocal(instruction.VRegA(), value, dex_pc);
-      break;
-    }
-
-    case Instruction::MOVE_OBJECT:
-    case Instruction::MOVE_OBJECT_16:
-    case Instruction::MOVE_OBJECT_FROM16: {
-      HInstruction* value = LoadLocal(instruction.VRegB(), Primitive::kPrimNot, dex_pc);
-      UpdateLocal(instruction.VRegA(), value, dex_pc);
-      break;
-    }
-
-    case Instruction::RETURN_VOID_NO_BARRIER:
-    case Instruction::RETURN_VOID: {
-      BuildReturn(instruction, Primitive::kPrimVoid, dex_pc);
-      break;
-    }
-
-#define IF_XX(comparison, cond) \
-    case Instruction::IF_##cond: If_22t<comparison>(instruction, dex_pc); break; \
-    case Instruction::IF_##cond##Z: If_21t<comparison>(instruction, dex_pc); break
-
-    IF_XX(HEqual, EQ);
-    IF_XX(HNotEqual, NE);
-    IF_XX(HLessThan, LT);
-    IF_XX(HLessThanOrEqual, LE);
-    IF_XX(HGreaterThan, GT);
-    IF_XX(HGreaterThanOrEqual, GE);
-
-    case Instruction::GOTO:
-    case Instruction::GOTO_16:
-    case Instruction::GOTO_32: {
-      current_block_->AddInstruction(new (arena_) HGoto(dex_pc));
-      current_block_ = nullptr;
-      break;
-    }
-
-    case Instruction::RETURN: {
-      BuildReturn(instruction, return_type_, dex_pc);
-      break;
-    }
-
-    case Instruction::RETURN_OBJECT: {
-      BuildReturn(instruction, return_type_, dex_pc);
-      break;
-    }
-
-    case Instruction::RETURN_WIDE: {
-      BuildReturn(instruction, return_type_, dex_pc);
-      break;
-    }
-
-    case Instruction::INVOKE_DIRECT:
-    case Instruction::INVOKE_INTERFACE:
-    case Instruction::INVOKE_STATIC:
-    case Instruction::INVOKE_SUPER:
-    case Instruction::INVOKE_VIRTUAL:
-    case Instruction::INVOKE_VIRTUAL_QUICK: {
-      uint16_t method_idx;
-      if (instruction.Opcode() == Instruction::INVOKE_VIRTUAL_QUICK) {
-        if (!CanDecodeQuickenedInfo()) {
-          return false;
-        }
-        method_idx = LookupQuickenedInfo(dex_pc);
-      } else {
-        method_idx = instruction.VRegB_35c();
-      }
-      uint32_t number_of_vreg_arguments = instruction.VRegA_35c();
-      uint32_t args[5];
-      instruction.GetVarArgs(args);
-      if (!BuildInvoke(instruction, dex_pc, method_idx,
-                       number_of_vreg_arguments, false, args, -1)) {
-        return false;
-      }
-      break;
-    }
-
-    case Instruction::INVOKE_DIRECT_RANGE:
-    case Instruction::INVOKE_INTERFACE_RANGE:
-    case Instruction::INVOKE_STATIC_RANGE:
-    case Instruction::INVOKE_SUPER_RANGE:
-    case Instruction::INVOKE_VIRTUAL_RANGE:
-    case Instruction::INVOKE_VIRTUAL_RANGE_QUICK: {
-      uint16_t method_idx;
-      if (instruction.Opcode() == Instruction::INVOKE_VIRTUAL_RANGE_QUICK) {
-        if (!CanDecodeQuickenedInfo()) {
-          return false;
-        }
-        method_idx = LookupQuickenedInfo(dex_pc);
-      } else {
-        method_idx = instruction.VRegB_3rc();
-      }
-      uint32_t number_of_vreg_arguments = instruction.VRegA_3rc();
-      uint32_t register_index = instruction.VRegC();
-      if (!BuildInvoke(instruction, dex_pc, method_idx,
-                       number_of_vreg_arguments, true, nullptr, register_index)) {
-        return false;
-      }
-      break;
-    }
-
-    case Instruction::NEG_INT: {
-      Unop_12x<HNeg>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::NEG_LONG: {
-      Unop_12x<HNeg>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::NEG_FLOAT: {
-      Unop_12x<HNeg>(instruction, Primitive::kPrimFloat, dex_pc);
-      break;
-    }
-
-    case Instruction::NEG_DOUBLE: {
-      Unop_12x<HNeg>(instruction, Primitive::kPrimDouble, dex_pc);
-      break;
-    }
-
-    case Instruction::NOT_INT: {
-      Unop_12x<HNot>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::NOT_LONG: {
-      Unop_12x<HNot>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::INT_TO_LONG: {
-      Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::INT_TO_FLOAT: {
-      Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimFloat, dex_pc);
-      break;
-    }
-
-    case Instruction::INT_TO_DOUBLE: {
-      Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimDouble, dex_pc);
-      break;
-    }
-
-    case Instruction::LONG_TO_INT: {
-      Conversion_12x(instruction, Primitive::kPrimLong, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::LONG_TO_FLOAT: {
-      Conversion_12x(instruction, Primitive::kPrimLong, Primitive::kPrimFloat, dex_pc);
-      break;
-    }
-
-    case Instruction::LONG_TO_DOUBLE: {
-      Conversion_12x(instruction, Primitive::kPrimLong, Primitive::kPrimDouble, dex_pc);
-      break;
-    }
-
-    case Instruction::FLOAT_TO_INT: {
-      Conversion_12x(instruction, Primitive::kPrimFloat, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::FLOAT_TO_LONG: {
-      Conversion_12x(instruction, Primitive::kPrimFloat, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::FLOAT_TO_DOUBLE: {
-      Conversion_12x(instruction, Primitive::kPrimFloat, Primitive::kPrimDouble, dex_pc);
-      break;
-    }
-
-    case Instruction::DOUBLE_TO_INT: {
-      Conversion_12x(instruction, Primitive::kPrimDouble, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::DOUBLE_TO_LONG: {
-      Conversion_12x(instruction, Primitive::kPrimDouble, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::DOUBLE_TO_FLOAT: {
-      Conversion_12x(instruction, Primitive::kPrimDouble, Primitive::kPrimFloat, dex_pc);
-      break;
-    }
-
-    case Instruction::INT_TO_BYTE: {
-      Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimByte, dex_pc);
-      break;
-    }
-
-    case Instruction::INT_TO_SHORT: {
-      Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimShort, dex_pc);
-      break;
-    }
-
-    case Instruction::INT_TO_CHAR: {
-      Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimChar, dex_pc);
-      break;
-    }
-
-    case Instruction::ADD_INT: {
-      Binop_23x<HAdd>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::ADD_LONG: {
-      Binop_23x<HAdd>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::ADD_DOUBLE: {
-      Binop_23x<HAdd>(instruction, Primitive::kPrimDouble, dex_pc);
-      break;
-    }
-
-    case Instruction::ADD_FLOAT: {
-      Binop_23x<HAdd>(instruction, Primitive::kPrimFloat, dex_pc);
-      break;
-    }
-
-    case Instruction::SUB_INT: {
-      Binop_23x<HSub>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::SUB_LONG: {
-      Binop_23x<HSub>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::SUB_FLOAT: {
-      Binop_23x<HSub>(instruction, Primitive::kPrimFloat, dex_pc);
-      break;
-    }
-
-    case Instruction::SUB_DOUBLE: {
-      Binop_23x<HSub>(instruction, Primitive::kPrimDouble, dex_pc);
-      break;
-    }
-
-    case Instruction::ADD_INT_2ADDR: {
-      Binop_12x<HAdd>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::MUL_INT: {
-      Binop_23x<HMul>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::MUL_LONG: {
-      Binop_23x<HMul>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::MUL_FLOAT: {
-      Binop_23x<HMul>(instruction, Primitive::kPrimFloat, dex_pc);
-      break;
-    }
-
-    case Instruction::MUL_DOUBLE: {
-      Binop_23x<HMul>(instruction, Primitive::kPrimDouble, dex_pc);
-      break;
-    }
-
-    case Instruction::DIV_INT: {
-      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegB(), instruction.VRegC(),
-                         dex_pc, Primitive::kPrimInt, false, true);
-      break;
-    }
-
-    case Instruction::DIV_LONG: {
-      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegB(), instruction.VRegC(),
-                         dex_pc, Primitive::kPrimLong, false, true);
-      break;
-    }
-
-    case Instruction::DIV_FLOAT: {
-      Binop_23x<HDiv>(instruction, Primitive::kPrimFloat, dex_pc);
-      break;
-    }
-
-    case Instruction::DIV_DOUBLE: {
-      Binop_23x<HDiv>(instruction, Primitive::kPrimDouble, dex_pc);
-      break;
-    }
-
-    case Instruction::REM_INT: {
-      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegB(), instruction.VRegC(),
-                         dex_pc, Primitive::kPrimInt, false, false);
-      break;
-    }
-
-    case Instruction::REM_LONG: {
-      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegB(), instruction.VRegC(),
-                         dex_pc, Primitive::kPrimLong, false, false);
-      break;
-    }
-
-    case Instruction::REM_FLOAT: {
-      Binop_23x<HRem>(instruction, Primitive::kPrimFloat, dex_pc);
-      break;
-    }
-
-    case Instruction::REM_DOUBLE: {
-      Binop_23x<HRem>(instruction, Primitive::kPrimDouble, dex_pc);
-      break;
-    }
-
-    case Instruction::AND_INT: {
-      Binop_23x<HAnd>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::AND_LONG: {
-      Binop_23x<HAnd>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::SHL_INT: {
-      Binop_23x_shift<HShl>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::SHL_LONG: {
-      Binop_23x_shift<HShl>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::SHR_INT: {
-      Binop_23x_shift<HShr>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::SHR_LONG: {
-      Binop_23x_shift<HShr>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::USHR_INT: {
-      Binop_23x_shift<HUShr>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::USHR_LONG: {
-      Binop_23x_shift<HUShr>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::OR_INT: {
-      Binop_23x<HOr>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::OR_LONG: {
-      Binop_23x<HOr>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::XOR_INT: {
-      Binop_23x<HXor>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::XOR_LONG: {
-      Binop_23x<HXor>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::ADD_LONG_2ADDR: {
-      Binop_12x<HAdd>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::ADD_DOUBLE_2ADDR: {
-      Binop_12x<HAdd>(instruction, Primitive::kPrimDouble, dex_pc);
-      break;
-    }
-
-    case Instruction::ADD_FLOAT_2ADDR: {
-      Binop_12x<HAdd>(instruction, Primitive::kPrimFloat, dex_pc);
-      break;
-    }
-
-    case Instruction::SUB_INT_2ADDR: {
-      Binop_12x<HSub>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::SUB_LONG_2ADDR: {
-      Binop_12x<HSub>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::SUB_FLOAT_2ADDR: {
-      Binop_12x<HSub>(instruction, Primitive::kPrimFloat, dex_pc);
-      break;
-    }
-
-    case Instruction::SUB_DOUBLE_2ADDR: {
-      Binop_12x<HSub>(instruction, Primitive::kPrimDouble, dex_pc);
-      break;
-    }
-
-    case Instruction::MUL_INT_2ADDR: {
-      Binop_12x<HMul>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::MUL_LONG_2ADDR: {
-      Binop_12x<HMul>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::MUL_FLOAT_2ADDR: {
-      Binop_12x<HMul>(instruction, Primitive::kPrimFloat, dex_pc);
-      break;
-    }
-
-    case Instruction::MUL_DOUBLE_2ADDR: {
-      Binop_12x<HMul>(instruction, Primitive::kPrimDouble, dex_pc);
-      break;
-    }
-
-    case Instruction::DIV_INT_2ADDR: {
-      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegA(), instruction.VRegB(),
-                         dex_pc, Primitive::kPrimInt, false, true);
-      break;
-    }
-
-    case Instruction::DIV_LONG_2ADDR: {
-      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegA(), instruction.VRegB(),
-                         dex_pc, Primitive::kPrimLong, false, true);
-      break;
-    }
-
-    case Instruction::REM_INT_2ADDR: {
-      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegA(), instruction.VRegB(),
-                         dex_pc, Primitive::kPrimInt, false, false);
-      break;
-    }
-
-    case Instruction::REM_LONG_2ADDR: {
-      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegA(), instruction.VRegB(),
-                         dex_pc, Primitive::kPrimLong, false, false);
-      break;
-    }
-
-    case Instruction::REM_FLOAT_2ADDR: {
-      Binop_12x<HRem>(instruction, Primitive::kPrimFloat, dex_pc);
-      break;
-    }
-
-    case Instruction::REM_DOUBLE_2ADDR: {
-      Binop_12x<HRem>(instruction, Primitive::kPrimDouble, dex_pc);
-      break;
-    }
-
-    case Instruction::SHL_INT_2ADDR: {
-      Binop_12x_shift<HShl>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::SHL_LONG_2ADDR: {
-      Binop_12x_shift<HShl>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::SHR_INT_2ADDR: {
-      Binop_12x_shift<HShr>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::SHR_LONG_2ADDR: {
-      Binop_12x_shift<HShr>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::USHR_INT_2ADDR: {
-      Binop_12x_shift<HUShr>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::USHR_LONG_2ADDR: {
-      Binop_12x_shift<HUShr>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::DIV_FLOAT_2ADDR: {
-      Binop_12x<HDiv>(instruction, Primitive::kPrimFloat, dex_pc);
-      break;
-    }
-
-    case Instruction::DIV_DOUBLE_2ADDR: {
-      Binop_12x<HDiv>(instruction, Primitive::kPrimDouble, dex_pc);
-      break;
-    }
-
-    case Instruction::AND_INT_2ADDR: {
-      Binop_12x<HAnd>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::AND_LONG_2ADDR: {
-      Binop_12x<HAnd>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::OR_INT_2ADDR: {
-      Binop_12x<HOr>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::OR_LONG_2ADDR: {
-      Binop_12x<HOr>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::XOR_INT_2ADDR: {
-      Binop_12x<HXor>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::XOR_LONG_2ADDR: {
-      Binop_12x<HXor>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::ADD_INT_LIT16: {
-      Binop_22s<HAdd>(instruction, false, dex_pc);
-      break;
-    }
-
-    case Instruction::AND_INT_LIT16: {
-      Binop_22s<HAnd>(instruction, false, dex_pc);
-      break;
-    }
-
-    case Instruction::OR_INT_LIT16: {
-      Binop_22s<HOr>(instruction, false, dex_pc);
-      break;
-    }
-
-    case Instruction::XOR_INT_LIT16: {
-      Binop_22s<HXor>(instruction, false, dex_pc);
-      break;
-    }
-
-    case Instruction::RSUB_INT: {
-      Binop_22s<HSub>(instruction, true, dex_pc);
-      break;
-    }
-
-    case Instruction::MUL_INT_LIT16: {
-      Binop_22s<HMul>(instruction, false, dex_pc);
-      break;
-    }
-
-    case Instruction::ADD_INT_LIT8: {
-      Binop_22b<HAdd>(instruction, false, dex_pc);
-      break;
-    }
-
-    case Instruction::AND_INT_LIT8: {
-      Binop_22b<HAnd>(instruction, false, dex_pc);
-      break;
-    }
-
-    case Instruction::OR_INT_LIT8: {
-      Binop_22b<HOr>(instruction, false, dex_pc);
-      break;
-    }
-
-    case Instruction::XOR_INT_LIT8: {
-      Binop_22b<HXor>(instruction, false, dex_pc);
-      break;
-    }
-
-    case Instruction::RSUB_INT_LIT8: {
-      Binop_22b<HSub>(instruction, true, dex_pc);
-      break;
-    }
-
-    case Instruction::MUL_INT_LIT8: {
-      Binop_22b<HMul>(instruction, false, dex_pc);
-      break;
-    }
-
-    case Instruction::DIV_INT_LIT16:
-    case Instruction::DIV_INT_LIT8: {
-      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegB(), instruction.VRegC(),
-                         dex_pc, Primitive::kPrimInt, true, true);
-      break;
-    }
-
-    case Instruction::REM_INT_LIT16:
-    case Instruction::REM_INT_LIT8: {
-      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegB(), instruction.VRegC(),
-                         dex_pc, Primitive::kPrimInt, true, false);
-      break;
-    }
-
-    case Instruction::SHL_INT_LIT8: {
-      Binop_22b<HShl>(instruction, false, dex_pc);
-      break;
-    }
-
-    case Instruction::SHR_INT_LIT8: {
-      Binop_22b<HShr>(instruction, false, dex_pc);
-      break;
-    }
-
-    case Instruction::USHR_INT_LIT8: {
-      Binop_22b<HUShr>(instruction, false, dex_pc);
-      break;
-    }
-
-    case Instruction::NEW_INSTANCE: {
-      if (!BuildNewInstance(instruction.VRegB_21c(), dex_pc)) {
-        return false;
-      }
-      UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction(), dex_pc);
-      break;
-    }
-
-    case Instruction::NEW_ARRAY: {
-      uint16_t type_index = instruction.VRegC_22c();
-      HInstruction* length = LoadLocal(instruction.VRegB_22c(), Primitive::kPrimInt, dex_pc);
-      bool finalizable;
-      QuickEntrypointEnum entrypoint = NeedsAccessCheck(type_index, &finalizable)
-          ? kQuickAllocArrayWithAccessCheck
-          : kQuickAllocArray;
-      current_block_->AddInstruction(new (arena_) HNewArray(length,
-                                                            graph_->GetCurrentMethod(),
-                                                            dex_pc,
-                                                            type_index,
-                                                            *dex_compilation_unit_->GetDexFile(),
-                                                            entrypoint));
-      UpdateLocal(instruction.VRegA_22c(), current_block_->GetLastInstruction(), dex_pc);
-      break;
-    }
-
-    case Instruction::FILLED_NEW_ARRAY: {
-      uint32_t number_of_vreg_arguments = instruction.VRegA_35c();
-      uint32_t type_index = instruction.VRegB_35c();
-      uint32_t args[5];
-      instruction.GetVarArgs(args);
-      BuildFilledNewArray(dex_pc, type_index, number_of_vreg_arguments, false, args, 0);
-      break;
-    }
-
-    case Instruction::FILLED_NEW_ARRAY_RANGE: {
-      uint32_t number_of_vreg_arguments = instruction.VRegA_3rc();
-      uint32_t type_index = instruction.VRegB_3rc();
-      uint32_t register_index = instruction.VRegC_3rc();
-      BuildFilledNewArray(
-          dex_pc, type_index, number_of_vreg_arguments, true, nullptr, register_index);
-      break;
-    }
-
-    case Instruction::FILL_ARRAY_DATA: {
-      BuildFillArrayData(instruction, dex_pc);
-      break;
-    }
-
-    case Instruction::MOVE_RESULT:
-    case Instruction::MOVE_RESULT_WIDE:
-    case Instruction::MOVE_RESULT_OBJECT: {
-      if (latest_result_ == nullptr) {
-        // Only dead code can lead to this situation, where the verifier
-        // does not reject the method.
-      } else {
-        // An Invoke/FilledNewArray and its MoveResult could have landed in
-        // different blocks if there was a try/catch block boundary between
-        // them. For Invoke, we insert a StoreLocal after the instruction. For
-        // FilledNewArray, the local needs to be updated after the array was
-        // filled, otherwise we might overwrite an input vreg.
-        HStoreLocal* update_local =
-            new (arena_) HStoreLocal(GetLocalAt(instruction.VRegA()), latest_result_, dex_pc);
-        HBasicBlock* block = latest_result_->GetBlock();
-        if (block == current_block_) {
-          // MoveResult and the previous instruction are in the same block.
-          current_block_->AddInstruction(update_local);
-        } else {
-          // The two instructions are in different blocks. Insert the MoveResult
-          // before the final control-flow instruction of the previous block.
-          DCHECK(block->EndsWithControlFlowInstruction());
-          DCHECK(current_block_->GetInstructions().IsEmpty());
-          block->InsertInstructionBefore(update_local, block->GetLastInstruction());
-        }
-        latest_result_ = nullptr;
-      }
-      break;
-    }
-
-    case Instruction::CMP_LONG: {
-      Binop_23x_cmp(instruction, Primitive::kPrimLong, ComparisonBias::kNoBias, dex_pc);
-      break;
-    }
-
-    case Instruction::CMPG_FLOAT: {
-      Binop_23x_cmp(instruction, Primitive::kPrimFloat, ComparisonBias::kGtBias, dex_pc);
-      break;
-    }
-
-    case Instruction::CMPG_DOUBLE: {
-      Binop_23x_cmp(instruction, Primitive::kPrimDouble, ComparisonBias::kGtBias, dex_pc);
-      break;
-    }
-
-    case Instruction::CMPL_FLOAT: {
-      Binop_23x_cmp(instruction, Primitive::kPrimFloat, ComparisonBias::kLtBias, dex_pc);
-      break;
-    }
-
-    case Instruction::CMPL_DOUBLE: {
-      Binop_23x_cmp(instruction, Primitive::kPrimDouble, ComparisonBias::kLtBias, dex_pc);
-      break;
-    }
-
-    case Instruction::NOP:
-      break;
-
-    case Instruction::IGET:
-    case Instruction::IGET_QUICK:
-    case Instruction::IGET_WIDE:
-    case Instruction::IGET_WIDE_QUICK:
-    case Instruction::IGET_OBJECT:
-    case Instruction::IGET_OBJECT_QUICK:
-    case Instruction::IGET_BOOLEAN:
-    case Instruction::IGET_BOOLEAN_QUICK:
-    case Instruction::IGET_BYTE:
-    case Instruction::IGET_BYTE_QUICK:
-    case Instruction::IGET_CHAR:
-    case Instruction::IGET_CHAR_QUICK:
-    case Instruction::IGET_SHORT:
-    case Instruction::IGET_SHORT_QUICK: {
-      if (!BuildInstanceFieldAccess(instruction, dex_pc, false)) {
-        return false;
-      }
-      break;
-    }
-
-    case Instruction::IPUT:
-    case Instruction::IPUT_QUICK:
-    case Instruction::IPUT_WIDE:
-    case Instruction::IPUT_WIDE_QUICK:
-    case Instruction::IPUT_OBJECT:
-    case Instruction::IPUT_OBJECT_QUICK:
-    case Instruction::IPUT_BOOLEAN:
-    case Instruction::IPUT_BOOLEAN_QUICK:
-    case Instruction::IPUT_BYTE:
-    case Instruction::IPUT_BYTE_QUICK:
-    case Instruction::IPUT_CHAR:
-    case Instruction::IPUT_CHAR_QUICK:
-    case Instruction::IPUT_SHORT:
-    case Instruction::IPUT_SHORT_QUICK: {
-      if (!BuildInstanceFieldAccess(instruction, dex_pc, true)) {
-        return false;
-      }
-      break;
-    }
-
-    case Instruction::SGET:
-    case Instruction::SGET_WIDE:
-    case Instruction::SGET_OBJECT:
-    case Instruction::SGET_BOOLEAN:
-    case Instruction::SGET_BYTE:
-    case Instruction::SGET_CHAR:
-    case Instruction::SGET_SHORT: {
-      if (!BuildStaticFieldAccess(instruction, dex_pc, false)) {
-        return false;
-      }
-      break;
-    }
-
-    case Instruction::SPUT:
-    case Instruction::SPUT_WIDE:
-    case Instruction::SPUT_OBJECT:
-    case Instruction::SPUT_BOOLEAN:
-    case Instruction::SPUT_BYTE:
-    case Instruction::SPUT_CHAR:
-    case Instruction::SPUT_SHORT: {
-      if (!BuildStaticFieldAccess(instruction, dex_pc, true)) {
-        return false;
-      }
-      break;
-    }
-
-#define ARRAY_XX(kind, anticipated_type)                                          \
-    case Instruction::AGET##kind: {                                               \
-      BuildArrayAccess(instruction, dex_pc, false, anticipated_type);         \
-      break;                                                                      \
-    }                                                                             \
-    case Instruction::APUT##kind: {                                               \
-      BuildArrayAccess(instruction, dex_pc, true, anticipated_type);          \
-      break;                                                                      \
-    }
-
-    ARRAY_XX(, Primitive::kPrimInt);
-    ARRAY_XX(_WIDE, Primitive::kPrimLong);
-    ARRAY_XX(_OBJECT, Primitive::kPrimNot);
-    ARRAY_XX(_BOOLEAN, Primitive::kPrimBoolean);
-    ARRAY_XX(_BYTE, Primitive::kPrimByte);
-    ARRAY_XX(_CHAR, Primitive::kPrimChar);
-    ARRAY_XX(_SHORT, Primitive::kPrimShort);
-
-    case Instruction::ARRAY_LENGTH: {
-      HInstruction* object = LoadLocal(instruction.VRegB_12x(), Primitive::kPrimNot, dex_pc);
-      object = new (arena_) HNullCheck(object, dex_pc);
-      current_block_->AddInstruction(object);
-      current_block_->AddInstruction(new (arena_) HArrayLength(object, dex_pc));
-      UpdateLocal(instruction.VRegA_12x(), current_block_->GetLastInstruction(), dex_pc);
-      break;
-    }
-
-    case Instruction::CONST_STRING: {
-      uint32_t string_index = instruction.VRegB_21c();
-      current_block_->AddInstruction(
-          new (arena_) HLoadString(graph_->GetCurrentMethod(), string_index, *dex_file_, dex_pc));
-      UpdateLocal(instruction.VRegA_21c(), current_block_->GetLastInstruction(), dex_pc);
-      break;
-    }
-
-    case Instruction::CONST_STRING_JUMBO: {
-      uint32_t string_index = instruction.VRegB_31c();
-      current_block_->AddInstruction(
-          new (arena_) HLoadString(graph_->GetCurrentMethod(), string_index, *dex_file_, dex_pc));
-      UpdateLocal(instruction.VRegA_31c(), current_block_->GetLastInstruction(), dex_pc);
-      break;
-    }
-
-    case Instruction::CONST_CLASS: {
-      uint16_t type_index = instruction.VRegB_21c();
-      bool type_known_final;
-      bool type_known_abstract;
-      bool dont_use_is_referrers_class;
-      // `CanAccessTypeWithoutChecks` will tell whether the method being
-      // built is trying to access its own class, so that the generated
-      // code can optimize for this case. However, the optimization does not
-      // work for inlining, so we use `IsOutermostCompilingClass` instead.
-      bool can_access = compiler_driver_->CanAccessTypeWithoutChecks(
-          dex_compilation_unit_->GetDexMethodIndex(), *dex_file_, type_index,
-          &type_known_final, &type_known_abstract, &dont_use_is_referrers_class);
-      current_block_->AddInstruction(new (arena_) HLoadClass(
-          graph_->GetCurrentMethod(),
-          type_index,
-          *dex_file_,
-          IsOutermostCompilingClass(type_index),
-          dex_pc,
-          !can_access,
-          compiler_driver_->CanAssumeTypeIsPresentInDexCache(*dex_file_, type_index)));
-      UpdateLocal(instruction.VRegA_21c(), current_block_->GetLastInstruction(), dex_pc);
-      break;
-    }
-
-    case Instruction::MOVE_EXCEPTION: {
-      current_block_->AddInstruction(new (arena_) HLoadException(dex_pc));
-      UpdateLocal(instruction.VRegA_11x(), current_block_->GetLastInstruction(), dex_pc);
-      current_block_->AddInstruction(new (arena_) HClearException(dex_pc));
-      break;
-    }
-
-    case Instruction::THROW: {
-      HInstruction* exception = LoadLocal(instruction.VRegA_11x(), Primitive::kPrimNot, dex_pc);
-      current_block_->AddInstruction(new (arena_) HThrow(exception, dex_pc));
-      // We finished building this block. Set the current block to null to avoid
-      // adding dead instructions to it.
-      current_block_ = nullptr;
-      break;
-    }
-
-    case Instruction::INSTANCE_OF: {
-      uint8_t destination = instruction.VRegA_22c();
-      uint8_t reference = instruction.VRegB_22c();
-      uint16_t type_index = instruction.VRegC_22c();
-      BuildTypeCheck(instruction, destination, reference, type_index, dex_pc);
-      break;
-    }
-
-    case Instruction::CHECK_CAST: {
-      uint8_t reference = instruction.VRegA_21c();
-      uint16_t type_index = instruction.VRegB_21c();
-      BuildTypeCheck(instruction, -1, reference, type_index, dex_pc);
-      break;
-    }
-
-    case Instruction::MONITOR_ENTER: {
-      current_block_->AddInstruction(new (arena_) HMonitorOperation(
-          LoadLocal(instruction.VRegA_11x(), Primitive::kPrimNot, dex_pc),
-          HMonitorOperation::OperationKind::kEnter,
-          dex_pc));
-      break;
-    }
-
-    case Instruction::MONITOR_EXIT: {
-      current_block_->AddInstruction(new (arena_) HMonitorOperation(
-          LoadLocal(instruction.VRegA_11x(), Primitive::kPrimNot, dex_pc),
-          HMonitorOperation::OperationKind::kExit,
-          dex_pc));
-      break;
-    }
-
-    case Instruction::SPARSE_SWITCH:
-    case Instruction::PACKED_SWITCH: {
-      BuildSwitch(instruction, dex_pc);
-      break;
-    }
-
-    default:
-      VLOG(compiler) << "Did not compile "
-                     << PrettyMethod(dex_compilation_unit_->GetDexMethodIndex(), *dex_file_)
-                     << " because of unhandled instruction "
-                     << instruction.Name();
-      MaybeRecordStat(MethodCompilationStat::kNotCompiledUnhandledInstruction);
-      return false;
-  }
-  return true;
-}  // NOLINT(readability/fn_size)
-
-HLocal* HGraphBuilder::GetLocalAt(uint32_t register_index) const {
-  return locals_[register_index];
-}
-
-void HGraphBuilder::UpdateLocal(uint32_t register_index,
-                                HInstruction* instruction,
-                                uint32_t dex_pc) const {
-  HLocal* local = GetLocalAt(register_index);
-  current_block_->AddInstruction(new (arena_) HStoreLocal(local, instruction, dex_pc));
-}
-
-HInstruction* HGraphBuilder::LoadLocal(uint32_t register_index,
-                                       Primitive::Type type,
-                                       uint32_t dex_pc) const {
-  HLocal* local = GetLocalAt(register_index);
-  current_block_->AddInstruction(new (arena_) HLoadLocal(local, type, dex_pc));
-  return current_block_->GetLastInstruction();
+  return ssa_builder_.BuildSsa();
 }
 
 }  // namespace art
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index 50a1334..4f46d5e 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -24,14 +24,14 @@
 #include "dex_file-inl.h"
 #include "driver/compiler_driver.h"
 #include "driver/dex_compilation_unit.h"
+#include "instruction_builder.h"
 #include "optimizing_compiler_stats.h"
 #include "primitive.h"
 #include "nodes.h"
+#include "ssa_builder.h"
 
 namespace art {
 
-class Instruction;
-
 class HGraphBuilder : public ValueObject {
  public:
   HGraphBuilder(HGraph* graph,
@@ -42,245 +42,65 @@
                 CompilerDriver* driver,
                 OptimizingCompilerStats* compiler_stats,
                 const uint8_t* interpreter_metadata,
-                Handle<mirror::DexCache> dex_cache)
-      : arena_(graph->GetArena()),
-        locals_(graph->GetArena()->Adapter(kArenaAllocGraphBuilder)),
-        current_block_(nullptr),
-        graph_(graph),
+                Handle<mirror::DexCache> dex_cache,
+                StackHandleScopeCollection* handles)
+      : graph_(graph),
         dex_file_(dex_file),
         code_item_(code_item),
         dex_compilation_unit_(dex_compilation_unit),
         compiler_driver_(driver),
-        outer_compilation_unit_(outer_compilation_unit),
-        return_type_(Primitive::GetType(dex_compilation_unit_->GetShorty()[0])),
-        code_start_(code_item.insns_),
-        block_builder_(graph, dex_file, code_item),
-        latest_result_(nullptr),
         compilation_stats_(compiler_stats),
-        interpreter_metadata_(interpreter_metadata),
-        dex_cache_(dex_cache) {}
+        block_builder_(graph, dex_file, code_item),
+        ssa_builder_(graph, handles),
+        instruction_builder_(graph,
+                             &block_builder_,
+                             &ssa_builder_,
+                             dex_file,
+                             code_item_,
+                             Primitive::GetType(dex_compilation_unit_->GetShorty()[0]),
+                             dex_compilation_unit,
+                             outer_compilation_unit,
+                             driver,
+                             interpreter_metadata,
+                             compiler_stats,
+                             dex_cache) {}
 
   // Only for unit testing.
   HGraphBuilder(HGraph* graph,
                 const DexFile::CodeItem& code_item,
+                StackHandleScopeCollection* handles,
                 Primitive::Type return_type = Primitive::kPrimInt)
-      : arena_(graph->GetArena()),
-        locals_(graph->GetArena()->Adapter(kArenaAllocGraphBuilder)),
-        current_block_(nullptr),
-        graph_(graph),
+      : graph_(graph),
         dex_file_(nullptr),
         code_item_(code_item),
         dex_compilation_unit_(nullptr),
         compiler_driver_(nullptr),
-        outer_compilation_unit_(nullptr),
-        return_type_(return_type),
-        code_start_(code_item.insns_),
-        block_builder_(graph, nullptr, code_item),
-        latest_result_(nullptr),
-        compilation_stats_(nullptr),
-        interpreter_metadata_(nullptr),
         null_dex_cache_(),
-        dex_cache_(null_dex_cache_) {}
+        compilation_stats_(nullptr),
+        block_builder_(graph, nullptr, code_item),
+        ssa_builder_(graph, handles),
+        instruction_builder_(graph,
+                             &block_builder_,
+                             &ssa_builder_,
+                             /* dex_file */ nullptr,
+                             code_item_,
+                             return_type,
+                             /* dex_compilation_unit */ nullptr,
+                             /* outer_compilation_unit */ nullptr,
+                             /* compiler_driver */ nullptr,
+                             /* interpreter_metadata */ nullptr,
+                             /* compiler_stats */ nullptr,
+                             null_dex_cache_) {}
 
-  GraphAnalysisResult BuildGraph(StackHandleScopeCollection* handles);
+  GraphAnalysisResult BuildGraph();
 
   static constexpr const char* kBuilderPassName = "builder";
 
  private:
-  bool GenerateInstructions();
-  bool AnalyzeDexInstruction(const Instruction& instruction, uint32_t dex_pc);
-
-  void FindNativeDebugInfoLocations(ArenaBitVector* locations);
-
-  bool CanDecodeQuickenedInfo() const;
-  uint16_t LookupQuickenedInfo(uint32_t dex_pc);
-
-  HBasicBlock* FindBlockStartingAt(uint32_t dex_pc) const {
-    return block_builder_.GetBlockAt(dex_pc);
-  }
-
-  void InitializeLocals(uint16_t count);
-  HLocal* GetLocalAt(uint32_t register_index) const;
-  void UpdateLocal(uint32_t register_index, HInstruction* instruction, uint32_t dex_pc) const;
-  HInstruction* LoadLocal(uint32_t register_index, Primitive::Type type, uint32_t dex_pc) const;
-  void InitializeParameters(uint16_t number_of_parameters);
-
-  // Returns whether the current method needs access check for the type.
-  // Output parameter finalizable is set to whether the type is finalizable.
-  bool NeedsAccessCheck(uint32_t type_index, /*out*/bool* finalizable) const;
-
-  template<typename T>
-  void Unop_12x(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc);
-
-  template<typename T>
-  void Binop_23x(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc);
-
-  template<typename T>
-  void Binop_23x_shift(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc);
-
-  void Binop_23x_cmp(const Instruction& instruction,
-                     Primitive::Type type,
-                     ComparisonBias bias,
-                     uint32_t dex_pc);
-
-  template<typename T>
-  void Binop_12x(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc);
-
-  template<typename T>
-  void Binop_12x_shift(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc);
-
-  template<typename T>
-  void Binop_22b(const Instruction& instruction, bool reverse, uint32_t dex_pc);
-
-  template<typename T>
-  void Binop_22s(const Instruction& instruction, bool reverse, uint32_t dex_pc);
-
-  template<typename T> void If_21t(const Instruction& instruction, uint32_t dex_pc);
-  template<typename T> void If_22t(const Instruction& instruction, uint32_t dex_pc);
-
-  void Conversion_12x(const Instruction& instruction,
-                      Primitive::Type input_type,
-                      Primitive::Type result_type,
-                      uint32_t dex_pc);
-
-  void BuildCheckedDivRem(uint16_t out_reg,
-                          uint16_t first_reg,
-                          int64_t second_reg_or_constant,
-                          uint32_t dex_pc,
-                          Primitive::Type type,
-                          bool second_is_lit,
-                          bool is_div);
-
-  void BuildReturn(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc);
-
-  // Builds an instance field access node and returns whether the instruction is supported.
-  bool BuildInstanceFieldAccess(const Instruction& instruction, uint32_t dex_pc, bool is_put);
-
-  void BuildUnresolvedStaticFieldAccess(const Instruction& instruction,
-                                        uint32_t dex_pc,
-                                        bool is_put,
-                                        Primitive::Type field_type);
-  // Builds a static field access node and returns whether the instruction is supported.
-  bool BuildStaticFieldAccess(const Instruction& instruction, uint32_t dex_pc, bool is_put);
-
-  void BuildArrayAccess(const Instruction& instruction,
-                        uint32_t dex_pc,
-                        bool is_get,
-                        Primitive::Type anticipated_type);
-
-  // Builds an invocation node and returns whether the instruction is supported.
-  bool BuildInvoke(const Instruction& instruction,
-                   uint32_t dex_pc,
-                   uint32_t method_idx,
-                   uint32_t number_of_vreg_arguments,
-                   bool is_range,
-                   uint32_t* args,
-                   uint32_t register_index);
-
-  // Builds a new array node and the instructions that fill it.
-  void BuildFilledNewArray(uint32_t dex_pc,
-                           uint32_t type_index,
-                           uint32_t number_of_vreg_arguments,
-                           bool is_range,
-                           uint32_t* args,
-                           uint32_t register_index);
-
-  void BuildFillArrayData(const Instruction& instruction, uint32_t dex_pc);
-
-  // Fills the given object with data as specified in the fill-array-data
-  // instruction. Currently only used for non-reference and non-floating point
-  // arrays.
-  template <typename T>
-  void BuildFillArrayData(HInstruction* object,
-                          const T* data,
-                          uint32_t element_count,
-                          Primitive::Type anticipated_type,
-                          uint32_t dex_pc);
-
-  // Fills the given object with data as specified in the fill-array-data
-  // instruction. The data must be for long and double arrays.
-  void BuildFillWideArrayData(HInstruction* object,
-                              const int64_t* data,
-                              uint32_t element_count,
-                              uint32_t dex_pc);
-
-  // Builds a `HInstanceOf`, or a `HCheckCast` instruction.
-  void BuildTypeCheck(const Instruction& instruction,
-                      uint8_t destination,
-                      uint8_t reference,
-                      uint16_t type_index,
-                      uint32_t dex_pc);
-
-  // Builds an instruction sequence for a switch statement.
-  void BuildSwitch(const Instruction& instruction, uint32_t dex_pc);
-
+  void MaybeRecordStat(MethodCompilationStat compilation_stat);
   bool SkipCompilation(size_t number_of_branches);
 
-  void MaybeRecordStat(MethodCompilationStat compilation_stat);
-
-  // Returns the outer-most compiling method's class.
-  mirror::Class* GetOutermostCompilingClass() const;
-
-  // Returns the class whose method is being compiled.
-  mirror::Class* GetCompilingClass() const;
-
-  // Returns whether `type_index` points to the outer-most compiling method's class.
-  bool IsOutermostCompilingClass(uint16_t type_index) const;
-
-  void PotentiallySimplifyFakeString(uint16_t original_dex_register,
-                                     uint32_t dex_pc,
-                                     HInvoke* invoke);
-
-  bool SetupInvokeArguments(HInvoke* invoke,
-                            uint32_t number_of_vreg_arguments,
-                            uint32_t* args,
-                            uint32_t register_index,
-                            bool is_range,
-                            const char* descriptor,
-                            size_t start_index,
-                            size_t* argument_index);
-
-  bool HandleInvoke(HInvoke* invoke,
-                    uint32_t number_of_vreg_arguments,
-                    uint32_t* args,
-                    uint32_t register_index,
-                    bool is_range,
-                    const char* descriptor,
-                    HClinitCheck* clinit_check);
-
-  bool HandleStringInit(HInvoke* invoke,
-                        uint32_t number_of_vreg_arguments,
-                        uint32_t* args,
-                        uint32_t register_index,
-                        bool is_range,
-                        const char* descriptor);
-
-  HClinitCheck* ProcessClinitCheckForInvoke(
-      uint32_t dex_pc,
-      ArtMethod* method,
-      uint32_t method_idx,
-      HInvokeStaticOrDirect::ClinitCheckRequirement* clinit_check_requirement)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
-  // Build a HNewInstance instruction.
-  bool BuildNewInstance(uint16_t type_index, uint32_t dex_pc);
-
-  // Return whether the compiler can assume `cls` is initialized.
-  bool IsInitialized(Handle<mirror::Class> cls) const
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
-  // Try to resolve a method using the class linker. Return null if a method could
-  // not be resolved.
-  ArtMethod* ResolveMethod(uint16_t method_idx, InvokeType invoke_type);
-
-  ArenaAllocator* const arena_;
-
-  ArenaVector<HLocal*> locals_;
-
-  HBasicBlock* current_block_;
   HGraph* const graph_;
-
-  // The dex file where the method being compiled is, and the bytecode data.
   const DexFile* const dex_file_;
   const DexFile::CodeItem& code_item_;
 
@@ -290,31 +110,13 @@
 
   CompilerDriver* const compiler_driver_;
 
-  // The compilation unit of the outermost method being compiled. That is the
-  // method being compiled (and not inlined), and potentially inlining other
-  // methods.
-  const DexCompilationUnit* const outer_compilation_unit_;
-
-  // The return type of the method being compiled.
-  const Primitive::Type return_type_;
-
-  // The pointer in the dex file where the instructions of the code item
-  // being currently compiled start.
-  const uint16_t* code_start_;
-
-  HBasicBlockBuilder block_builder_;
-
-  // The last invoke or fill-new-array being built. Only to be
-  // used by move-result instructions.
-  HInstruction* latest_result_;
+  ScopedNullHandle<mirror::DexCache> null_dex_cache_;
 
   OptimizingCompilerStats* compilation_stats_;
 
-  const uint8_t* interpreter_metadata_;
-
-  // Dex cache for dex_file_.
-  ScopedNullHandle<mirror::DexCache> null_dex_cache_;
-  Handle<mirror::DexCache> dex_cache_;
+  HBasicBlockBuilder block_builder_;
+  SsaBuilder ssa_builder_;
+  HInstructionBuilder instruction_builder_;
 
   DISALLOW_COPY_AND_ASSIGN(HGraphBuilder);
 };
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 7a508fc..953c0ae 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -296,23 +296,6 @@
   }
 }
 
-int32_t CodeGenerator::GetStackSlot(HLocal* local) const {
-  uint16_t reg_number = local->GetRegNumber();
-  uint16_t number_of_locals = GetGraph()->GetNumberOfLocalVRegs();
-  if (reg_number >= number_of_locals) {
-    // Local is a parameter of the method. It is stored in the caller's frame.
-    // TODO: Share this logic with StackVisitor::GetVRegOffsetFromQuickCode.
-    return GetFrameSize() + InstructionSetPointerSize(GetInstructionSet())  // ART method
-                          + (reg_number - number_of_locals) * kVRegSize;
-  } else {
-    // Local is a temporary in this method. It is stored in this method's frame.
-    return GetFrameSize() - FrameEntrySpillSize()
-                          - kVRegSize  // filler.
-                          - (number_of_locals * kVRegSize)
-                          + (reg_number * kVRegSize);
-  }
-}
-
 void CodeGenerator::CreateCommonInvokeLocationSummary(
     HInvoke* invoke, InvokeDexCallingConventionVisitor* visitor) {
   ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetArena();
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index cad5529..1a060b1 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -211,7 +211,6 @@
                                 size_t maximum_number_of_live_fpu_registers,
                                 size_t number_of_out_slots,
                                 const ArenaVector<HBasicBlock*>& block_order);
-  int32_t GetStackSlot(HLocal* local) const;
 
   uint32_t GetFrameSize() const { return frame_size_; }
   void SetFrameSize(uint32_t size) { frame_size_ = size; }
@@ -525,8 +524,6 @@
     slow_paths_.reserve(8);
   }
 
-  virtual Location GetStackLocation(HLoadLocal* load) const = 0;
-
   virtual HGraphVisitor* GetLocationBuilder() = 0;
   virtual HGraphVisitor* GetInstructionVisitor() = 0;
 
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 98577d6..3049128 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -954,30 +954,6 @@
   __ BindTrackedLabel(label);
 }
 
-Location CodeGeneratorARM::GetStackLocation(HLoadLocal* load) const {
-  switch (load->GetType()) {
-    case Primitive::kPrimLong:
-    case Primitive::kPrimDouble:
-      return Location::DoubleStackSlot(GetStackSlot(load->GetLocal()));
-
-    case Primitive::kPrimInt:
-    case Primitive::kPrimNot:
-    case Primitive::kPrimFloat:
-      return Location::StackSlot(GetStackSlot(load->GetLocal()));
-
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-    case Primitive::kPrimVoid:
-      LOG(FATAL) << "Unexpected type " << load->GetType();
-      UNREACHABLE();
-  }
-
-  LOG(FATAL) << "Unreachable";
-  UNREACHABLE();
-}
-
 Location InvokeDexCallingConventionVisitorARM::GetNextLocation(Primitive::Type type) {
   switch (type) {
     case Primitive::kPrimBoolean:
@@ -1724,49 +1700,6 @@
   HandleCondition(comp);
 }
 
-void LocationsBuilderARM::VisitLocal(HLocal* local) {
-  local->SetLocations(nullptr);
-}
-
-void InstructionCodeGeneratorARM::VisitLocal(HLocal* local) {
-  DCHECK_EQ(local->GetBlock(), GetGraph()->GetEntryBlock());
-}
-
-void LocationsBuilderARM::VisitLoadLocal(HLoadLocal* load) {
-  load->SetLocations(nullptr);
-}
-
-void InstructionCodeGeneratorARM::VisitLoadLocal(HLoadLocal* load ATTRIBUTE_UNUSED) {
-  // Nothing to do, this is driven by the code generator.
-}
-
-void LocationsBuilderARM::VisitStoreLocal(HStoreLocal* store) {
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(store, LocationSummary::kNoCall);
-  switch (store->InputAt(1)->GetType()) {
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-    case Primitive::kPrimInt:
-    case Primitive::kPrimNot:
-    case Primitive::kPrimFloat:
-      locations->SetInAt(1, Location::StackSlot(codegen_->GetStackSlot(store->GetLocal())));
-      break;
-
-    case Primitive::kPrimLong:
-    case Primitive::kPrimDouble:
-      locations->SetInAt(1, Location::DoubleStackSlot(codegen_->GetStackSlot(store->GetLocal())));
-      break;
-
-    default:
-      LOG(FATAL) << "Unexpected local type " << store->InputAt(1)->GetType();
-  }
-}
-
-void InstructionCodeGeneratorARM::VisitStoreLocal(HStoreLocal* store ATTRIBUTE_UNUSED) {
-}
-
 void LocationsBuilderARM::VisitIntConstant(HIntConstant* constant) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
@@ -6440,8 +6373,9 @@
                         reg,
                         method_reg,
                         ArtMethod::DexCacheResolvedMethodsOffset(kArmPointerSize).Int32Value());
-      // temp = temp[index_in_cache]
-      uint32_t index_in_cache = invoke->GetTargetMethod().dex_method_index;
+      // temp = temp[index_in_cache];
+      // Note: Don't use invoke->GetTargetMethod() as it may point to a different dex file.
+      uint32_t index_in_cache = invoke->GetDexMethodIndex();
       __ LoadFromOffset(kLoadWord, reg, reg, CodeGenerator::GetCachePointerOffset(index_in_cache));
       break;
     }
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 8434128..144d58d 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -345,8 +345,6 @@
 
   void SetupBlockedRegisters() const OVERRIDE;
 
-  Location GetStackLocation(HLoadLocal* load) const OVERRIDE;
-
   void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE;
   void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE;
 
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 491014d..c978aaa 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -1072,31 +1072,6 @@
   }
 }
 
-Location CodeGeneratorARM64::GetStackLocation(HLoadLocal* load) const {
-  Primitive::Type type = load->GetType();
-
-  switch (type) {
-    case Primitive::kPrimNot:
-    case Primitive::kPrimInt:
-    case Primitive::kPrimFloat:
-      return Location::StackSlot(GetStackSlot(load->GetLocal()));
-
-    case Primitive::kPrimLong:
-    case Primitive::kPrimDouble:
-      return Location::DoubleStackSlot(GetStackSlot(load->GetLocal()));
-
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-    case Primitive::kPrimVoid:
-      LOG(FATAL) << "Unexpected type " << type;
-  }
-
-  LOG(FATAL) << "Unreachable";
-  return Location::NoLocation();
-}
-
 void CodeGeneratorARM64::MarkGCCard(Register object, Register value, bool value_can_be_null) {
   UseScratchRegisterScope temps(GetVIXLAssembler());
   Register card = temps.AcquireX();
@@ -3705,7 +3680,8 @@
              MemOperand(method_reg.X(),
                         ArtMethod::DexCacheResolvedMethodsOffset(kArm64WordSize).Int32Value()));
       // temp = temp[index_in_cache];
-      uint32_t index_in_cache = invoke->GetTargetMethod().dex_method_index;
+      // Note: Don't use invoke->GetTargetMethod() as it may point to a different dex file.
+      uint32_t index_in_cache = invoke->GetDexMethodIndex();
     __ Ldr(reg.X(), MemOperand(reg.X(), GetCachePointerOffset(index_in_cache)));
       break;
     }
@@ -4010,14 +3986,6 @@
   __ Str(wzr, GetExceptionTlsAddress());
 }
 
-void LocationsBuilderARM64::VisitLoadLocal(HLoadLocal* load) {
-  load->SetLocations(nullptr);
-}
-
-void InstructionCodeGeneratorARM64::VisitLoadLocal(HLoadLocal* load ATTRIBUTE_UNUSED) {
-  // Nothing to do, this is driven by the code generator.
-}
-
 HLoadString::LoadKind CodeGeneratorARM64::GetSupportedLoadStringKind(
     HLoadString::LoadKind desired_string_load_kind) {
   if (kEmitCompilerReadBarrier) {
@@ -4156,14 +4124,6 @@
   }
 }
 
-void LocationsBuilderARM64::VisitLocal(HLocal* local) {
-  local->SetLocations(nullptr);
-}
-
-void InstructionCodeGeneratorARM64::VisitLocal(HLocal* local) {
-  DCHECK_EQ(local->GetBlock(), GetGraph()->GetEntryBlock());
-}
-
 void LocationsBuilderARM64::VisitLongConstant(HLongConstant* constant) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant);
   locations->SetOut(Location::ConstantLocation(constant));
@@ -4556,34 +4516,6 @@
   HandleShift(shr);
 }
 
-void LocationsBuilderARM64::VisitStoreLocal(HStoreLocal* store) {
-  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(store);
-  Primitive::Type field_type = store->InputAt(1)->GetType();
-  switch (field_type) {
-    case Primitive::kPrimNot:
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-    case Primitive::kPrimInt:
-    case Primitive::kPrimFloat:
-      locations->SetInAt(1, Location::StackSlot(codegen_->GetStackSlot(store->GetLocal())));
-      break;
-
-    case Primitive::kPrimLong:
-    case Primitive::kPrimDouble:
-      locations->SetInAt(1, Location::DoubleStackSlot(codegen_->GetStackSlot(store->GetLocal())));
-      break;
-
-    default:
-      LOG(FATAL) << "Unimplemented local type " << field_type;
-      UNREACHABLE();
-  }
-}
-
-void InstructionCodeGeneratorARM64::VisitStoreLocal(HStoreLocal* store ATTRIBUTE_UNUSED) {
-}
-
 void LocationsBuilderARM64::VisitSub(HSub* instruction) {
   HandleBinaryOp(instruction);
 }
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 8ec7531..ec46a34 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -386,8 +386,6 @@
 
   void SetupBlockedRegisters() const OVERRIDE;
 
-  Location GetStackLocation(HLoadLocal* load) const OVERRIDE;
-
   size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
   size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
   size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 8b19f84..185397c 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -974,31 +974,6 @@
   }
 }
 
-Location CodeGeneratorMIPS::GetStackLocation(HLoadLocal* load) const {
-  Primitive::Type type = load->GetType();
-
-  switch (type) {
-    case Primitive::kPrimNot:
-    case Primitive::kPrimInt:
-    case Primitive::kPrimFloat:
-      return Location::StackSlot(GetStackSlot(load->GetLocal()));
-
-    case Primitive::kPrimLong:
-    case Primitive::kPrimDouble:
-      return Location::DoubleStackSlot(GetStackSlot(load->GetLocal()));
-
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-    case Primitive::kPrimVoid:
-      LOG(FATAL) << "Unexpected type " << type;
-  }
-
-  LOG(FATAL) << "Unreachable";
-  return Location::NoLocation();
-}
-
 void CodeGeneratorMIPS::MarkGCCard(Register object, Register value) {
   MipsLabel done;
   Register card = AT;
@@ -3896,8 +3871,9 @@
                         reg,
                         method_reg,
                         ArtMethod::DexCacheResolvedMethodsOffset(kMipsPointerSize).Int32Value());
-      // temp = temp[index_in_cache]
-      uint32_t index_in_cache = invoke->GetTargetMethod().dex_method_index;
+      // temp = temp[index_in_cache];
+      // Note: Don't use invoke->GetTargetMethod() as it may point to a different dex file.
+      uint32_t index_in_cache = invoke->GetDexMethodIndex();
       __ LoadFromOffset(kLoadWord,
                         reg,
                         reg,
@@ -4063,14 +4039,6 @@
   __ StoreToOffset(kStoreWord, ZERO, TR, GetExceptionTlsOffset());
 }
 
-void LocationsBuilderMIPS::VisitLoadLocal(HLoadLocal* load) {
-  load->SetLocations(nullptr);
-}
-
-void InstructionCodeGeneratorMIPS::VisitLoadLocal(HLoadLocal* load ATTRIBUTE_UNUSED) {
-  // Nothing to do, this is driven by the code generator.
-}
-
 void LocationsBuilderMIPS::VisitLoadString(HLoadString* load) {
   LocationSummary::CallKind call_kind = load->NeedsEnvironment()
       ? LocationSummary::kCallOnSlowPath
@@ -4096,14 +4064,6 @@
   }
 }
 
-void LocationsBuilderMIPS::VisitLocal(HLocal* local) {
-  local->SetLocations(nullptr);
-}
-
-void InstructionCodeGeneratorMIPS::VisitLocal(HLocal* local) {
-  DCHECK_EQ(local->GetBlock(), GetGraph()->GetEntryBlock());
-}
-
 void LocationsBuilderMIPS::VisitLongConstant(HLongConstant* constant) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant);
   locations->SetOut(Location::ConstantLocation(constant));
@@ -4611,33 +4571,6 @@
   HandleShift(shr);
 }
 
-void LocationsBuilderMIPS::VisitStoreLocal(HStoreLocal* store) {
-  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(store);
-  Primitive::Type field_type = store->InputAt(1)->GetType();
-  switch (field_type) {
-    case Primitive::kPrimNot:
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-    case Primitive::kPrimInt:
-    case Primitive::kPrimFloat:
-      locations->SetInAt(1, Location::StackSlot(codegen_->GetStackSlot(store->GetLocal())));
-      break;
-
-    case Primitive::kPrimLong:
-    case Primitive::kPrimDouble:
-      locations->SetInAt(1, Location::DoubleStackSlot(codegen_->GetStackSlot(store->GetLocal())));
-      break;
-
-    default:
-      LOG(FATAL) << "Unimplemented local type " << field_type;
-  }
-}
-
-void InstructionCodeGeneratorMIPS::VisitStoreLocal(HStoreLocal* store ATTRIBUTE_UNUSED) {
-}
-
 void LocationsBuilderMIPS::VisitSub(HSub* instruction) {
   HandleBinaryOp(instruction);
 }
diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h
index afe7917..5e6fec8 100644
--- a/compiler/optimizing/code_generator_mips.h
+++ b/compiler/optimizing/code_generator_mips.h
@@ -290,8 +290,6 @@
 
   void SetupBlockedRegisters() const OVERRIDE;
 
-  Location GetStackLocation(HLoadLocal* load) const OVERRIDE;
-
   size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id);
   size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id);
   size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id);
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 2f9eca6..246f5b7 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -869,31 +869,6 @@
   }
 }
 
-Location CodeGeneratorMIPS64::GetStackLocation(HLoadLocal* load) const {
-  Primitive::Type type = load->GetType();
-
-  switch (type) {
-    case Primitive::kPrimNot:
-    case Primitive::kPrimInt:
-    case Primitive::kPrimFloat:
-      return Location::StackSlot(GetStackSlot(load->GetLocal()));
-
-    case Primitive::kPrimLong:
-    case Primitive::kPrimDouble:
-      return Location::DoubleStackSlot(GetStackSlot(load->GetLocal()));
-
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-    case Primitive::kPrimVoid:
-      LOG(FATAL) << "Unexpected type " << type;
-  }
-
-  LOG(FATAL) << "Unreachable";
-  return Location::NoLocation();
-}
-
 void CodeGeneratorMIPS64::MarkGCCard(GpuRegister object,
                                      GpuRegister value,
                                      bool value_can_be_null) {
@@ -3110,8 +3085,9 @@
                         reg,
                         method_reg,
                         ArtMethod::DexCacheResolvedMethodsOffset(kMips64PointerSize).Int32Value());
-      // temp = temp[index_in_cache]
-      uint32_t index_in_cache = invoke->GetTargetMethod().dex_method_index;
+      // temp = temp[index_in_cache];
+      // Note: Don't use invoke->GetTargetMethod() as it may point to a different dex file.
+      uint32_t index_in_cache = invoke->GetDexMethodIndex();
       __ LoadFromOffset(kLoadDoubleword,
                         reg,
                         reg,
@@ -3281,14 +3257,6 @@
   __ StoreToOffset(kStoreWord, ZERO, TR, GetExceptionTlsOffset());
 }
 
-void LocationsBuilderMIPS64::VisitLoadLocal(HLoadLocal* load) {
-  load->SetLocations(nullptr);
-}
-
-void InstructionCodeGeneratorMIPS64::VisitLoadLocal(HLoadLocal* load ATTRIBUTE_UNUSED) {
-  // Nothing to do, this is driven by the code generator.
-}
-
 void LocationsBuilderMIPS64::VisitLoadString(HLoadString* load) {
   LocationSummary::CallKind call_kind = load->NeedsEnvironment()
       ? LocationSummary::kCallOnSlowPath
@@ -3317,14 +3285,6 @@
   }
 }
 
-void LocationsBuilderMIPS64::VisitLocal(HLocal* local) {
-  local->SetLocations(nullptr);
-}
-
-void InstructionCodeGeneratorMIPS64::VisitLocal(HLocal* local) {
-  DCHECK_EQ(local->GetBlock(), GetGraph()->GetEntryBlock());
-}
-
 void LocationsBuilderMIPS64::VisitLongConstant(HLongConstant* constant) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant);
   locations->SetOut(Location::ConstantLocation(constant));
@@ -3745,33 +3705,6 @@
   HandleShift(shr);
 }
 
-void LocationsBuilderMIPS64::VisitStoreLocal(HStoreLocal* store) {
-  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(store);
-  Primitive::Type field_type = store->InputAt(1)->GetType();
-  switch (field_type) {
-    case Primitive::kPrimNot:
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-    case Primitive::kPrimInt:
-    case Primitive::kPrimFloat:
-      locations->SetInAt(1, Location::StackSlot(codegen_->GetStackSlot(store->GetLocal())));
-      break;
-
-    case Primitive::kPrimLong:
-    case Primitive::kPrimDouble:
-      locations->SetInAt(1, Location::DoubleStackSlot(codegen_->GetStackSlot(store->GetLocal())));
-      break;
-
-    default:
-      LOG(FATAL) << "Unimplemented local type " << field_type;
-  }
-}
-
-void InstructionCodeGeneratorMIPS64::VisitStoreLocal(HStoreLocal* store ATTRIBUTE_UNUSED) {
-}
-
 void LocationsBuilderMIPS64::VisitSub(HSub* instruction) {
   HandleBinaryOp(instruction);
 }
diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h
index 94767cb..4e15cdd 100644
--- a/compiler/optimizing/code_generator_mips64.h
+++ b/compiler/optimizing/code_generator_mips64.h
@@ -286,8 +286,6 @@
 
   void SetupBlockedRegisters() const OVERRIDE;
 
-  Location GetStackLocation(HLoadLocal* load) const OVERRIDE;
-
   size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id);
   size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id);
   size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id);
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 715b5be..304cf08 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -892,30 +892,6 @@
   __ Bind(GetLabelOf(block));
 }
 
-Location CodeGeneratorX86::GetStackLocation(HLoadLocal* load) const {
-  switch (load->GetType()) {
-    case Primitive::kPrimLong:
-    case Primitive::kPrimDouble:
-      return Location::DoubleStackSlot(GetStackSlot(load->GetLocal()));
-
-    case Primitive::kPrimInt:
-    case Primitive::kPrimNot:
-    case Primitive::kPrimFloat:
-      return Location::StackSlot(GetStackSlot(load->GetLocal()));
-
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-    case Primitive::kPrimVoid:
-      LOG(FATAL) << "Unexpected type " << load->GetType();
-      UNREACHABLE();
-  }
-
-  LOG(FATAL) << "Unreachable";
-  UNREACHABLE();
-}
-
 Location InvokeDexCallingConventionVisitorX86::GetReturnLocation(Primitive::Type type) const {
   switch (type) {
     case Primitive::kPrimBoolean:
@@ -1646,49 +1622,6 @@
   __ nop();
 }
 
-void LocationsBuilderX86::VisitLocal(HLocal* local) {
-  local->SetLocations(nullptr);
-}
-
-void InstructionCodeGeneratorX86::VisitLocal(HLocal* local) {
-  DCHECK_EQ(local->GetBlock(), GetGraph()->GetEntryBlock());
-}
-
-void LocationsBuilderX86::VisitLoadLocal(HLoadLocal* local) {
-  local->SetLocations(nullptr);
-}
-
-void InstructionCodeGeneratorX86::VisitLoadLocal(HLoadLocal* load ATTRIBUTE_UNUSED) {
-  // Nothing to do, this is driven by the code generator.
-}
-
-void LocationsBuilderX86::VisitStoreLocal(HStoreLocal* store) {
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(store, LocationSummary::kNoCall);
-  switch (store->InputAt(1)->GetType()) {
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-    case Primitive::kPrimInt:
-    case Primitive::kPrimNot:
-    case Primitive::kPrimFloat:
-      locations->SetInAt(1, Location::StackSlot(codegen_->GetStackSlot(store->GetLocal())));
-      break;
-
-    case Primitive::kPrimLong:
-    case Primitive::kPrimDouble:
-      locations->SetInAt(1, Location::DoubleStackSlot(codegen_->GetStackSlot(store->GetLocal())));
-      break;
-
-    default:
-      LOG(FATAL) << "Unknown local type " << store->InputAt(1)->GetType();
-  }
-}
-
-void InstructionCodeGeneratorX86::VisitStoreLocal(HStoreLocal* store ATTRIBUTE_UNUSED) {
-}
-
 void LocationsBuilderX86::HandleCondition(HCondition* cond) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(cond, LocationSummary::kNoCall);
@@ -4433,8 +4366,9 @@
       // /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_;
       __ movl(reg, Address(method_reg,
                            ArtMethod::DexCacheResolvedMethodsOffset(kX86PointerSize).Int32Value()));
-      // temp = temp[index_in_cache]
-      uint32_t index_in_cache = invoke->GetTargetMethod().dex_method_index;
+      // temp = temp[index_in_cache];
+      // Note: Don't use invoke->GetTargetMethod() as it may point to a different dex file.
+      uint32_t index_in_cache = invoke->GetDexMethodIndex();
       __ movl(reg, Address(reg, CodeGenerator::GetCachePointerOffset(index_in_cache)));
       break;
     }
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 1fa22fc..69a6253 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -367,8 +367,6 @@
 
   void SetupBlockedRegisters() const OVERRIDE;
 
-  Location GetStackLocation(HLoadLocal* load) const OVERRIDE;
-
   void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE;
   void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE;
 
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index cc46a07..056b69b 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -808,8 +808,9 @@
       __ movq(reg,
               Address(CpuRegister(method_reg),
                       ArtMethod::DexCacheResolvedMethodsOffset(kX86_64PointerSize).SizeValue()));
-      // temp = temp[index_in_cache]
-      uint32_t index_in_cache = invoke->GetTargetMethod().dex_method_index;
+      // temp = temp[index_in_cache];
+      // Note: Don't use invoke->GetTargetMethod() as it may point to a different dex file.
+      uint32_t index_in_cache = invoke->GetDexMethodIndex();
       __ movq(reg, Address(reg, CodeGenerator::GetCachePointerOffset(index_in_cache)));
       break;
     }
@@ -1118,30 +1119,6 @@
   __ Bind(GetLabelOf(block));
 }
 
-Location CodeGeneratorX86_64::GetStackLocation(HLoadLocal* load) const {
-  switch (load->GetType()) {
-    case Primitive::kPrimLong:
-    case Primitive::kPrimDouble:
-      return Location::DoubleStackSlot(GetStackSlot(load->GetLocal()));
-
-    case Primitive::kPrimInt:
-    case Primitive::kPrimNot:
-    case Primitive::kPrimFloat:
-      return Location::StackSlot(GetStackSlot(load->GetLocal()));
-
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-    case Primitive::kPrimVoid:
-      LOG(FATAL) << "Unexpected type " << load->GetType();
-      UNREACHABLE();
-  }
-
-  LOG(FATAL) << "Unreachable";
-  UNREACHABLE();
-}
-
 void CodeGeneratorX86_64::Move(Location destination, Location source) {
   if (source.Equals(destination)) {
     return;
@@ -1660,49 +1637,6 @@
   __ nop();
 }
 
-void LocationsBuilderX86_64::VisitLocal(HLocal* local) {
-  local->SetLocations(nullptr);
-}
-
-void InstructionCodeGeneratorX86_64::VisitLocal(HLocal* local) {
-  DCHECK_EQ(local->GetBlock(), GetGraph()->GetEntryBlock());
-}
-
-void LocationsBuilderX86_64::VisitLoadLocal(HLoadLocal* local) {
-  local->SetLocations(nullptr);
-}
-
-void InstructionCodeGeneratorX86_64::VisitLoadLocal(HLoadLocal* load ATTRIBUTE_UNUSED) {
-  // Nothing to do, this is driven by the code generator.
-}
-
-void LocationsBuilderX86_64::VisitStoreLocal(HStoreLocal* store) {
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(store, LocationSummary::kNoCall);
-  switch (store->InputAt(1)->GetType()) {
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-    case Primitive::kPrimInt:
-    case Primitive::kPrimNot:
-    case Primitive::kPrimFloat:
-      locations->SetInAt(1, Location::StackSlot(codegen_->GetStackSlot(store->GetLocal())));
-      break;
-
-    case Primitive::kPrimLong:
-    case Primitive::kPrimDouble:
-      locations->SetInAt(1, Location::DoubleStackSlot(codegen_->GetStackSlot(store->GetLocal())));
-      break;
-
-    default:
-      LOG(FATAL) << "Unexpected local type " << store->InputAt(1)->GetType();
-  }
-}
-
-void InstructionCodeGeneratorX86_64::VisitStoreLocal(HStoreLocal* store ATTRIBUTE_UNUSED) {
-}
-
 void LocationsBuilderX86_64::HandleCondition(HCondition* cond) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(cond, LocationSummary::kNoCall);
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 7ebce58..d7ce7c6 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -350,8 +350,6 @@
     return GetLabelOf(block)->Position();
   }
 
-  Location GetStackLocation(HLoadLocal* load) const OVERRIDE;
-
   void SetupBlockedRegisters() const OVERRIDE;
   void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE;
   void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE;
diff --git a/compiler/optimizing/constant_folding_test.cc b/compiler/optimizing/constant_folding_test.cc
index b9081cb..d1a2a26 100644
--- a/compiler/optimizing/constant_folding_test.cc
+++ b/compiler/optimizing/constant_folding_test.cc
@@ -111,21 +111,21 @@
 
   std::string expected_before =
       "BasicBlock 0, succ: 1\n"
-      "  4: IntConstant [7]\n"
-      "  2: SuspendCheck\n"
-      "  3: Goto 1\n"
+      "  2: IntConstant [3]\n"
+      "  0: SuspendCheck\n"
+      "  1: Goto 1\n"
       "BasicBlock 1, pred: 0, succ: 2\n"
-      "  7: Neg(4) [10]\n"
-      "  10: Return(7)\n"
+      "  3: Neg(2) [4]\n"
+      "  4: Return(3)\n"
       "BasicBlock 2, pred: 1\n"
-      "  11: Exit\n";
+      "  5: Exit\n";
 
   // Expected difference after constant folding.
   diff_t expected_cf_diff = {
-    { "  4: IntConstant [7]\n", "  4: IntConstant\n"
-                                "  12: IntConstant [10]\n" },
-    { "  7: Neg(4) [10]\n",     removed },
-    { "  10: Return(7)\n",      "  10: Return(12)\n" }
+    { "  2: IntConstant [3]\n", "  2: IntConstant\n"
+                                "  6: IntConstant [4]\n" },
+    { "  3: Neg(2) [4]\n",      removed },
+    { "  4: Return(3)\n",       "  4: Return(6)\n" }
   };
   std::string expected_after_cf = Patch(expected_before, expected_cf_diff);
 
@@ -138,7 +138,7 @@
 
   // Expected difference after dead code elimination.
   diff_t expected_dce_diff = {
-    { "  4: IntConstant\n", removed },
+    { "  2: IntConstant\n", removed },
   };
   std::string expected_after_dce = Patch(expected_after_cf, expected_dce_diff);
 
@@ -172,21 +172,21 @@
 
   std::string expected_before =
       "BasicBlock 0, succ: 1\n"
-      "  6: LongConstant [9]\n"
-      "  4: SuspendCheck\n"
-      "  5: Goto 1\n"
+      "  2: LongConstant [3]\n"
+      "  0: SuspendCheck\n"
+      "  1: Goto 1\n"
       "BasicBlock 1, pred: 0, succ: 2\n"
-      "  9: Neg(6) [12]\n"
-      "  12: Return(9)\n"
+      "  3: Neg(2) [4]\n"
+      "  4: Return(3)\n"
       "BasicBlock 2, pred: 1\n"
-      "  13: Exit\n";
+      "  5: Exit\n";
 
   // Expected difference after constant folding.
   diff_t expected_cf_diff = {
-    { "  6: LongConstant [9]\n", "  6: LongConstant\n"
-                                 "  14: LongConstant [12]\n" },
-    { "  9: Neg(6) [12]\n",      removed },
-    { "  12: Return(9)\n",       "  12: Return(14)\n" }
+    { "  2: LongConstant [3]\n", "  2: LongConstant\n"
+                                 "  6: LongConstant [4]\n" },
+    { "  3: Neg(2) [4]\n",       removed },
+    { "  4: Return(3)\n",        "  4: Return(6)\n" }
   };
   std::string expected_after_cf = Patch(expected_before, expected_cf_diff);
 
@@ -199,7 +199,7 @@
 
   // Expected difference after dead code elimination.
   diff_t expected_dce_diff = {
-    { "  6: LongConstant\n", removed },
+    { "  2: LongConstant\n", removed },
   };
   std::string expected_after_dce = Patch(expected_after_cf, expected_dce_diff);
 
@@ -231,23 +231,23 @@
 
   std::string expected_before =
       "BasicBlock 0, succ: 1\n"
-      "  5: IntConstant [11]\n"
-      "  7: IntConstant [11]\n"
-      "  3: SuspendCheck\n"
-      "  4: Goto 1\n"
+      "  2: IntConstant [4]\n"
+      "  3: IntConstant [4]\n"
+      "  0: SuspendCheck\n"
+      "  1: Goto 1\n"
       "BasicBlock 1, pred: 0, succ: 2\n"
-      "  11: Add(5, 7) [14]\n"
-      "  14: Return(11)\n"
+      "  4: Add(2, 3) [5]\n"
+      "  5: Return(4)\n"
       "BasicBlock 2, pred: 1\n"
-      "  15: Exit\n";
+      "  6: Exit\n";
 
   // Expected difference after constant folding.
   diff_t expected_cf_diff = {
-    { "  5: IntConstant [11]\n", "  5: IntConstant\n" },
-    { "  7: IntConstant [11]\n", "  7: IntConstant\n"
-                                 "  16: IntConstant [14]\n" },
-    { "  11: Add(5, 7) [14]\n",  removed },
-    { "  14: Return(11)\n",      "  14: Return(16)\n" }
+    { "  2: IntConstant [4]\n", "  2: IntConstant\n" },
+    { "  3: IntConstant [4]\n", "  3: IntConstant\n"
+                                "  7: IntConstant [5]\n" },
+    { "  4: Add(2, 3) [5]\n",   removed },
+    { "  5: Return(4)\n",       "  5: Return(7)\n" }
   };
   std::string expected_after_cf = Patch(expected_before, expected_cf_diff);
 
@@ -260,8 +260,8 @@
 
   // Expected difference after dead code elimination.
   diff_t expected_dce_diff = {
-    { "  5: IntConstant\n", removed },
-    { "  7: IntConstant\n", removed }
+    { "  2: IntConstant\n", removed },
+    { "  3: IntConstant\n", removed }
   };
   std::string expected_after_dce = Patch(expected_after_cf, expected_dce_diff);
 
@@ -300,33 +300,33 @@
 
   std::string expected_before =
       "BasicBlock 0, succ: 1\n"
-      "  5: IntConstant [11]\n"
-      "  7: IntConstant [11]\n"
-      "  13: IntConstant [19]\n"
-      "  15: IntConstant [19]\n"
-      "  3: SuspendCheck\n"
-      "  4: Goto 1\n"
+      "  2: IntConstant [4]\n"
+      "  3: IntConstant [4]\n"
+      "  5: IntConstant [7]\n"
+      "  6: IntConstant [7]\n"
+      "  0: SuspendCheck\n"
+      "  1: Goto 1\n"
       "BasicBlock 1, pred: 0, succ: 2\n"
-      "  11: Add(5, 7) [23]\n"
-      "  19: Add(13, 15) [23]\n"
-      "  23: Add(11, 19) [26]\n"
-      "  26: Return(23)\n"
+      "  4: Add(2, 3) [8]\n"
+      "  7: Add(5, 6) [8]\n"
+      "  8: Add(4, 7) [9]\n"
+      "  9: Return(8)\n"
       "BasicBlock 2, pred: 1\n"
-      "  27: Exit\n";
+      "  10: Exit\n";
 
   // Expected difference after constant folding.
   diff_t expected_cf_diff = {
-    { "  5: IntConstant [11]\n",  "  5: IntConstant\n" },
-    { "  7: IntConstant [11]\n",  "  7: IntConstant\n" },
-    { "  13: IntConstant [19]\n", "  13: IntConstant\n" },
-    { "  15: IntConstant [19]\n", "  15: IntConstant\n"
-                                  "  28: IntConstant\n"
-                                  "  29: IntConstant\n"
-                                  "  30: IntConstant [26]\n" },
-    { "  11: Add(5, 7) [23]\n",   removed },
-    { "  19: Add(13, 15) [23]\n", removed },
-    { "  23: Add(11, 19) [26]\n", removed  },
-    { "  26: Return(23)\n",       "  26: Return(30)\n" }
+    { "  2: IntConstant [4]\n",  "  2: IntConstant\n" },
+    { "  3: IntConstant [4]\n",  "  3: IntConstant\n" },
+    { "  5: IntConstant [7]\n",  "  5: IntConstant\n" },
+    { "  6: IntConstant [7]\n",  "  6: IntConstant\n"
+                                 "  11: IntConstant\n"
+                                 "  12: IntConstant\n"
+                                 "  13: IntConstant [9]\n" },
+    { "  4: Add(2, 3) [8]\n",    removed },
+    { "  7: Add(5, 6) [8]\n",    removed },
+    { "  8: Add(4, 7) [9]\n",    removed  },
+    { "  9: Return(8)\n",        "  9: Return(13)\n" }
   };
   std::string expected_after_cf = Patch(expected_before, expected_cf_diff);
 
@@ -345,12 +345,12 @@
 
   // Expected difference after dead code elimination.
   diff_t expected_dce_diff = {
+    { "  2: IntConstant\n",  removed },
+    { "  3: IntConstant\n",  removed },
     { "  5: IntConstant\n",  removed },
-    { "  7: IntConstant\n",  removed },
-    { "  13: IntConstant\n", removed },
-    { "  15: IntConstant\n", removed },
-    { "  28: IntConstant\n", removed },
-    { "  29: IntConstant\n", removed }
+    { "  6: IntConstant\n",  removed },
+    { "  11: IntConstant\n", removed },
+    { "  12: IntConstant\n", removed }
   };
   std::string expected_after_dce = Patch(expected_after_cf, expected_dce_diff);
 
@@ -381,23 +381,23 @@
 
   std::string expected_before =
       "BasicBlock 0, succ: 1\n"
-      "  5: IntConstant [11]\n"
-      "  7: IntConstant [11]\n"
-      "  3: SuspendCheck\n"
-      "  4: Goto 1\n"
+      "  2: IntConstant [4]\n"
+      "  3: IntConstant [4]\n"
+      "  0: SuspendCheck\n"
+      "  1: Goto 1\n"
       "BasicBlock 1, pred: 0, succ: 2\n"
-      "  11: Sub(5, 7) [14]\n"
-      "  14: Return(11)\n"
+      "  4: Sub(2, 3) [5]\n"
+      "  5: Return(4)\n"
       "BasicBlock 2, pred: 1\n"
-      "  15: Exit\n";
+      "  6: Exit\n";
 
   // Expected difference after constant folding.
   diff_t expected_cf_diff = {
-    { "  5: IntConstant [11]\n", "  5: IntConstant\n" },
-    { "  7: IntConstant [11]\n", "  7: IntConstant\n"
-                                 "  16: IntConstant [14]\n" },
-    { "  11: Sub(5, 7) [14]\n",  removed },
-    { "  14: Return(11)\n",      "  14: Return(16)\n" }
+    { "  2: IntConstant [4]\n",  "  2: IntConstant\n" },
+    { "  3: IntConstant [4]\n",  "  3: IntConstant\n"
+                                 "  7: IntConstant [5]\n" },
+    { "  4: Sub(2, 3) [5]\n",    removed },
+    { "  5: Return(4)\n",        "  5: Return(7)\n" }
   };
   std::string expected_after_cf = Patch(expected_before, expected_cf_diff);
 
@@ -410,8 +410,8 @@
 
   // Expected difference after dead code elimination.
   diff_t expected_dce_diff = {
-    { "  5: IntConstant\n", removed },
-    { "  7: IntConstant\n", removed }
+    { "  2: IntConstant\n", removed },
+    { "  3: IntConstant\n", removed }
   };
   std::string expected_after_dce = Patch(expected_after_cf, expected_dce_diff);
 
@@ -444,23 +444,23 @@
 
   std::string expected_before =
       "BasicBlock 0, succ: 1\n"
-      "  8: LongConstant [14]\n"
-      "  10: LongConstant [14]\n"
-      "  6: SuspendCheck\n"
-      "  7: Goto 1\n"
+      "  2: LongConstant [4]\n"
+      "  3: LongConstant [4]\n"
+      "  0: SuspendCheck\n"
+      "  1: Goto 1\n"
       "BasicBlock 1, pred: 0, succ: 2\n"
-      "  14: Add(8, 10) [17]\n"
-      "  17: Return(14)\n"
+      "  4: Add(2, 3) [5]\n"
+      "  5: Return(4)\n"
       "BasicBlock 2, pred: 1\n"
-      "  18: Exit\n";
+      "  6: Exit\n";
 
   // Expected difference after constant folding.
   diff_t expected_cf_diff = {
-    { "  8: LongConstant [14]\n",  "  8: LongConstant\n" },
-    { "  10: LongConstant [14]\n", "  10: LongConstant\n"
-                                   "  19: LongConstant [17]\n" },
-    { "  14: Add(8, 10) [17]\n",   removed },
-    { "  17: Return(14)\n",        "  17: Return(19)\n" }
+    { "  2: LongConstant [4]\n",  "  2: LongConstant\n" },
+    { "  3: LongConstant [4]\n",  "  3: LongConstant\n"
+                                  "  7: LongConstant [5]\n" },
+    { "  4: Add(2, 3) [5]\n",     removed },
+    { "  5: Return(4)\n",         "  5: Return(7)\n" }
   };
   std::string expected_after_cf = Patch(expected_before, expected_cf_diff);
 
@@ -473,8 +473,8 @@
 
   // Expected difference after dead code elimination.
   diff_t expected_dce_diff = {
-    { "  8: LongConstant\n", removed },
-    { "  10: LongConstant\n", removed }
+    { "  2: LongConstant\n", removed },
+    { "  3: LongConstant\n", removed }
   };
   std::string expected_after_dce = Patch(expected_after_cf, expected_dce_diff);
 
@@ -508,23 +508,23 @@
 
   std::string expected_before =
       "BasicBlock 0, succ: 1\n"
-      "  8: LongConstant [14]\n"
-      "  10: LongConstant [14]\n"
-      "  6: SuspendCheck\n"
-      "  7: Goto 1\n"
+      "  2: LongConstant [4]\n"
+      "  3: LongConstant [4]\n"
+      "  0: SuspendCheck\n"
+      "  1: Goto 1\n"
       "BasicBlock 1, pred: 0, succ: 2\n"
-      "  14: Sub(8, 10) [17]\n"
-      "  17: Return(14)\n"
+      "  4: Sub(2, 3) [5]\n"
+      "  5: Return(4)\n"
       "BasicBlock 2, pred: 1\n"
-      "  18: Exit\n";
+      "  6: Exit\n";
 
   // Expected difference after constant folding.
   diff_t expected_cf_diff = {
-    { "  8: LongConstant [14]\n",  "  8: LongConstant\n" },
-    { "  10: LongConstant [14]\n", "  10: LongConstant\n"
-                                   "  19: LongConstant [17]\n" },
-    { "  14: Sub(8, 10) [17]\n",   removed },
-    { "  17: Return(14)\n",        "  17: Return(19)\n" }
+    { "  2: LongConstant [4]\n",  "  2: LongConstant\n" },
+    { "  3: LongConstant [4]\n",  "  3: LongConstant\n"
+                                  "  7: LongConstant [5]\n" },
+    { "  4: Sub(2, 3) [5]\n",     removed },
+    { "  5: Return(4)\n",         "  5: Return(7)\n" }
   };
   std::string expected_after_cf = Patch(expected_before, expected_cf_diff);
 
@@ -537,8 +537,8 @@
 
   // Expected difference after dead code elimination.
   diff_t expected_dce_diff = {
-    { "  8: LongConstant\n", removed },
-    { "  10: LongConstant\n", removed }
+    { "  2: LongConstant\n", removed },
+    { "  3: LongConstant\n", removed }
   };
   std::string expected_after_dce = Patch(expected_after_cf, expected_dce_diff);
 
@@ -587,44 +587,44 @@
 
   std::string expected_before =
       "BasicBlock 0, succ: 1\n"
-      "  5: IntConstant [11]\n"            // v0 <- 1
-      "  7: IntConstant [11]\n"            // v1 <- 2
-      "  15: IntConstant [16]\n"           // const 5
-      "  20: IntConstant [21]\n"           // const 4
-      "  25: IntConstant [26]\n"           // const 8
-      "  3: SuspendCheck\n"
-      "  4: Goto 1\n"
+      "  2: IntConstant [4]\n"             // v0 <- 1
+      "  3: IntConstant [4]\n"             // v1 <- 2
+      "  6: IntConstant [7]\n"             // const 5
+      "  9: IntConstant [10]\n"            // const 4
+      "  12: IntConstant [13]\n"           // const 8
+      "  0: SuspendCheck\n"
+      "  1: Goto 1\n"
       "BasicBlock 1, pred: 0, succ: 3\n"
-      "  11: Add(5, 7) [21]\n"             // v2 <- v0 + v1 = 1 + 2 = 3
-      "  13: Goto 3\n"                     // goto L2
+      "  4: Add(2, 3) [7]\n"               // v2 <- v0 + v1 = 1 + 2 = 3
+      "  5: Goto 3\n"                      // goto L2
       "BasicBlock 2, pred: 3, succ: 4\n"   // L1:
-      "  16: Add(21, 15) [26]\n"           // v1 <- v0 + 3 = 7 + 5 = 12
-      "  18: Goto 4\n"                     // goto L3
+      "  10: Add(7, 9) [13]\n"             // v1 <- v0 + 3 = 7 + 5 = 12
+      "  11: Goto 4\n"                     // goto L3
       "BasicBlock 3, pred: 1, succ: 2\n"   // L2:
-      "  21: Add(11, 20) [16]\n"           // v0 <- v2 + 2 = 3 + 4 = 7
-      "  23: Goto 2\n"                     // goto L1
+      "  7: Add(4, 6) [10]\n"              // v0 <- v2 + 2 = 3 + 4 = 7
+      "  8: Goto 2\n"                      // goto L1
       "BasicBlock 4, pred: 2, succ: 5\n"   // L3:
-      "  26: Add(16, 25) [29]\n"           // v2 <- v1 + 4 = 12 + 8 = 20
-      "  29: Return(26)\n"                 // return v2
+      "  13: Add(10, 12) [14]\n"           // v2 <- v1 + 4 = 12 + 8 = 20
+      "  14: Return(13)\n"                 // return v2
       "BasicBlock 5, pred: 4\n"
-      "  30: Exit\n";
+      "  15: Exit\n";
 
   // Expected difference after constant folding.
   diff_t expected_cf_diff = {
-    { "  5: IntConstant [11]\n",  "  5: IntConstant\n" },
-    { "  7: IntConstant [11]\n",  "  7: IntConstant\n" },
-    { "  15: IntConstant [16]\n", "  15: IntConstant\n" },
-    { "  20: IntConstant [21]\n", "  20: IntConstant\n" },
-    { "  25: IntConstant [26]\n", "  25: IntConstant\n"
-                                  "  31: IntConstant\n"
-                                  "  32: IntConstant\n"
-                                  "  33: IntConstant\n"
-                                  "  34: IntConstant [29]\n" },
-    { "  11: Add(5, 7) [21]\n",   removed },
-    { "  16: Add(21, 15) [26]\n", removed },
-    { "  21: Add(11, 20) [16]\n", removed },
-    { "  26: Add(16, 25) [29]\n", removed },
-    { "  29: Return(26)\n",       "  29: Return(34)\n"}
+    { "  2: IntConstant [4]\n",   "  2: IntConstant\n" },
+    { "  3: IntConstant [4]\n",   "  3: IntConstant\n" },
+    { "  6: IntConstant [7]\n",   "  6: IntConstant\n" },
+    { "  9: IntConstant [10]\n",  "  9: IntConstant\n" },
+    { "  12: IntConstant [13]\n", "  12: IntConstant\n"
+                                  "  16: IntConstant\n"
+                                  "  17: IntConstant\n"
+                                  "  18: IntConstant\n"
+                                  "  19: IntConstant [14]\n" },
+    { "  4: Add(2, 3) [7]\n",     removed },
+    { "  10: Add(7, 9) [13]\n",   removed },
+    { "  7: Add(4, 6) [10]\n",    removed },
+    { "  13: Add(10, 12) [14]\n", removed },
+    { "  14: Return(13)\n",       "  14: Return(19)\n"}
   };
   std::string expected_after_cf = Patch(expected_before, expected_cf_diff);
 
@@ -647,13 +647,13 @@
   // Expected difference after dead code elimination.
   std::string expected_after_dce =
       "BasicBlock 0, succ: 1\n"
-      "  34: IntConstant [29]\n"
-      "  3: SuspendCheck\n"
-      "  4: Goto 1\n"
+      "  19: IntConstant [14]\n"
+      "  0: SuspendCheck\n"
+      "  1: Goto 1\n"
       "BasicBlock 1, pred: 0, succ: 5\n"
-      "  29: Return(34)\n"
+      "  14: Return(19)\n"
       "BasicBlock 5, pred: 1\n"
-      "  30: Exit\n";
+      "  15: Exit\n";
 
   TestCode(data,
            expected_before,
@@ -685,31 +685,31 @@
     Instruction::RETURN_VOID);
 
   std::string expected_before =
-     "BasicBlock 0, succ: 1\n"
-      "  6: IntConstant [18, 22, 11]\n"
-      "  8: IntConstant [22, 11]\n"
-      "  4: SuspendCheck\n"
-      "  5: Goto 1\n"
+      "BasicBlock 0, succ: 1\n"
+      "  3: IntConstant [9, 8, 5]\n"
+      "  4: IntConstant [8, 5]\n"
+      "  1: SuspendCheck\n"
+      "  2: Goto 1\n"
       "BasicBlock 1, pred: 0, succ: 5, 2\n"
-      "  11: GreaterThanOrEqual(6, 8) [12]\n"
-      "  12: If(11)\n"
+      "  5: GreaterThanOrEqual(3, 4) [6]\n"
+      "  6: If(5)\n"
       "BasicBlock 2, pred: 1, succ: 3\n"
-      "  15: Goto 3\n"
+      "  7: Goto 3\n"
       "BasicBlock 3, pred: 5, 2, succ: 4\n"
-      "  22: Phi(8, 6) [18]\n"
-      "  18: Add(22, 6)\n"
-      "  20: ReturnVoid\n"
+      "  8: Phi(4, 3) [9]\n"
+      "  9: Add(8, 3)\n"
+      "  10: ReturnVoid\n"
       "BasicBlock 4, pred: 3\n"
-      "  21: Exit\n"
+      "  11: Exit\n"
       "BasicBlock 5, pred: 1, succ: 3\n"
       "  0: Goto 3\n";
 
   // Expected difference after constant folding.
   diff_t expected_cf_diff = {
-    { "  6: IntConstant [18, 22, 11]\n",       "  6: IntConstant [12, 18, 22]\n" },
-    { "  8: IntConstant [22, 11]\n",           "  8: IntConstant [22]\n" },
-    { "  11: GreaterThanOrEqual(6, 8) [12]\n", removed },
-    { "  12: If(11)\n",                        "  12: If(6)\n" }
+    { "  3: IntConstant [9, 8, 5]\n",        "  3: IntConstant [6, 9, 8]\n" },
+    { "  4: IntConstant [8, 5]\n",           "  4: IntConstant [8]\n" },
+    { "  5: GreaterThanOrEqual(3, 4) [6]\n", removed },
+    { "  6: If(5)\n",                        "  6: If(3)\n" }
   };
   std::string expected_after_cf = Patch(expected_before, expected_cf_diff);
 
@@ -723,12 +723,12 @@
   // Expected graph after dead code elimination.
   std::string expected_after_dce =
       "BasicBlock 0, succ: 1\n"
-      "  4: SuspendCheck\n"
-      "  5: Goto 1\n"
+      "  1: SuspendCheck\n"
+      "  2: Goto 1\n"
       "BasicBlock 1, pred: 0, succ: 4\n"
-      "  20: ReturnVoid\n"
+      "  10: ReturnVoid\n"
       "BasicBlock 4, pred: 1\n"
-      "  21: Exit\n";
+      "  11: Exit\n";
 
   TestCode(data,
            expected_before,
diff --git a/compiler/optimizing/dead_code_elimination_test.cc b/compiler/optimizing/dead_code_elimination_test.cc
index 04bbd9c..fe52aac 100644
--- a/compiler/optimizing/dead_code_elimination_test.cc
+++ b/compiler/optimizing/dead_code_elimination_test.cc
@@ -79,29 +79,29 @@
 
   std::string expected_before =
       "BasicBlock 0, succ: 1\n"
-      "  6: IntConstant [18, 22, 11]\n"
-      "  8: IntConstant [22, 11]\n"
-      "  4: SuspendCheck\n"
-      "  5: Goto 1\n"
+      "  3: IntConstant [9, 8, 5]\n"
+      "  4: IntConstant [8, 5]\n"
+      "  1: SuspendCheck\n"
+      "  2: Goto 1\n"
       "BasicBlock 1, pred: 0, succ: 5, 2\n"
-      "  11: GreaterThanOrEqual(6, 8) [12]\n"
-      "  12: If(11)\n"
+      "  5: GreaterThanOrEqual(3, 4) [6]\n"
+      "  6: If(5)\n"
       "BasicBlock 2, pred: 1, succ: 3\n"
-      "  15: Goto 3\n"
+      "  7: Goto 3\n"
       "BasicBlock 3, pred: 5, 2, succ: 4\n"
-      "  22: Phi(8, 6) [18]\n"
-      "  18: Add(22, 6)\n"
-      "  20: ReturnVoid\n"
+      "  8: Phi(4, 3) [9]\n"
+      "  9: Add(8, 3)\n"
+      "  10: ReturnVoid\n"
       "BasicBlock 4, pred: 3\n"
-      "  21: Exit\n"
+      "  11: Exit\n"
       "BasicBlock 5, pred: 1, succ: 3\n"
       "  0: Goto 3\n";
 
   // Expected difference after dead code elimination.
   diff_t expected_diff = {
-    { "  6: IntConstant [18, 22, 11]\n", "  6: IntConstant [22, 11]\n" },
-    { "  22: Phi(8, 6) [18]\n",          "  22: Phi(8, 6)\n" },
-    { "  18: Add(22, 6)\n",              removed }
+    { "  3: IntConstant [9, 8, 5]\n",  "  3: IntConstant [8, 5]\n" },
+    { "  8: Phi(4, 3) [9]\n",          "  8: Phi(4, 3)\n" },
+    { "  9: Add(8, 3)\n",              removed }
   };
   std::string expected_after = Patch(expected_before, expected_diff);
 
@@ -145,36 +145,36 @@
 
   std::string expected_before =
       "BasicBlock 0, succ: 1\n"
-      "  5: IntConstant [11]\n"
-      "  7: IntConstant [11]\n"
-      "  15: IntConstant [16]\n"
-      "  20: IntConstant [21]\n"
-      "  25: IntConstant [26]\n"
-      "  3: SuspendCheck\n"
-      "  4: Goto 1\n"
+      "  2: IntConstant [4]\n"
+      "  3: IntConstant [4]\n"
+      "  6: IntConstant [7]\n"
+      "  9: IntConstant [10]\n"
+      "  12: IntConstant [13]\n"
+      "  0: SuspendCheck\n"
+      "  1: Goto 1\n"
       "BasicBlock 1, pred: 0, succ: 3\n"
-      "  11: Add(5, 7) [21]\n"
-      "  13: Goto 3\n"
+      "  4: Add(2, 3) [7]\n"
+      "  5: Goto 3\n"
       "BasicBlock 2, pred: 3, succ: 4\n"
-      "  16: Add(21, 15) [26]\n"
-      "  18: Goto 4\n"
+      "  10: Add(7, 9) [13]\n"
+      "  11: Goto 4\n"
       "BasicBlock 3, pred: 1, succ: 2\n"
-      "  21: Add(11, 20) [16]\n"
-      "  23: Goto 2\n"
+      "  7: Add(4, 6) [10]\n"
+      "  8: Goto 2\n"
       "BasicBlock 4, pred: 2, succ: 5\n"
-      "  26: Add(16, 25)\n"
-      "  28: ReturnVoid\n"
+      "  13: Add(10, 12)\n"
+      "  14: ReturnVoid\n"
       "BasicBlock 5, pred: 4\n"
-      "  29: Exit\n";
+      "  15: Exit\n";
 
   std::string expected_after =
       "BasicBlock 0, succ: 1\n"
-      "  3: SuspendCheck\n"
-      "  4: Goto 1\n"
+      "  0: SuspendCheck\n"
+      "  1: Goto 1\n"
       "BasicBlock 1, pred: 0, succ: 5\n"
-      "  28: ReturnVoid\n"
+      "  14: ReturnVoid\n"
       "BasicBlock 5, pred: 1\n"
-      "  29: Exit\n";
+      "  15: Exit\n";
 
   TestCode(data, expected_before, expected_after);
 }
diff --git a/compiler/optimizing/gvn_test.cc b/compiler/optimizing/gvn_test.cc
index 56dc088..6abf00e 100644
--- a/compiler/optimizing/gvn_test.cc
+++ b/compiler/optimizing/gvn_test.cc
@@ -357,8 +357,10 @@
                                                              Primitive::kPrimBoolean);
   entry->AddInstruction(parameter);
   entry->AddInstruction(new (&allocator) HGoto());
+  outer_loop_header->AddInstruction(new (&allocator) HSuspendCheck());
   outer_loop_header->AddInstruction(new (&allocator) HIf(parameter));
   outer_loop_body->AddInstruction(new (&allocator) HGoto());
+  inner_loop_header->AddInstruction(new (&allocator) HSuspendCheck());
   inner_loop_header->AddInstruction(new (&allocator) HIf(parameter));
   inner_loop_body->AddInstruction(new (&allocator) HGoto());
   inner_loop_exit->AddInstruction(new (&allocator) HGoto());
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index d936a8c..77e0cbc 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -1075,9 +1075,10 @@
                         compiler_driver_,
                         inline_stats.get(),
                         resolved_method->GetQuickenedInfo(),
-                        dex_cache);
+                        dex_cache,
+                        handles_);
 
-  if (builder.BuildGraph(handles_) != kAnalysisSuccess) {
+  if (builder.BuildGraph() != kAnalysisSuccess) {
     VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file)
                    << " could not be built, so cannot be inlined";
     return false;
@@ -1116,41 +1117,10 @@
     }
   }
 
-  // Run simple optimizations on the graph.
-  HDeadCodeElimination dce(callee_graph, stats_);
-  HConstantFolding fold(callee_graph);
-  HSharpening sharpening(callee_graph, codegen_, dex_compilation_unit, compiler_driver_);
-  InstructionSimplifier simplify(callee_graph, stats_);
-  IntrinsicsRecognizer intrinsics(callee_graph, compiler_driver_, stats_);
-
-  HOptimization* optimizations[] = {
-    &intrinsics,
-    &sharpening,
-    &simplify,
-    &fold,
-    &dce,
-  };
-
-  for (size_t i = 0; i < arraysize(optimizations); ++i) {
-    HOptimization* optimization = optimizations[i];
-    optimization->Run();
-  }
-
   size_t number_of_instructions_budget = kMaximumNumberOfHInstructions;
-  if (depth_ + 1 < compiler_driver_->GetCompilerOptions().GetInlineDepthLimit()) {
-    HInliner inliner(callee_graph,
-                     outermost_graph_,
-                     codegen_,
-                     outer_compilation_unit_,
-                     dex_compilation_unit,
-                     compiler_driver_,
-                     handles_,
-                     stats_,
-                     total_number_of_dex_registers_ + code_item->registers_size_,
-                     depth_ + 1);
-    inliner.Run();
-    number_of_instructions_budget += inliner.number_of_inlined_instructions_;
-  }
+  size_t number_of_inlined_instructions =
+      RunOptimizations(callee_graph, code_item, dex_compilation_unit);
+  number_of_instructions_budget += number_of_inlined_instructions;
 
   // TODO: We should abort only if all predecessors throw. However,
   // HGraph::InlineInto currently does not handle an exit block with
@@ -1196,7 +1166,7 @@
     for (HInstructionIterator instr_it(block->GetInstructions());
          !instr_it.Done();
          instr_it.Advance()) {
-      if (number_of_instructions++ ==  number_of_instructions_budget) {
+      if (number_of_instructions++ == number_of_instructions_budget) {
         VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file)
                        << " is not inlined because its caller has reached"
                        << " its instruction budget limit.";
@@ -1277,6 +1247,47 @@
   return true;
 }
 
+size_t HInliner::RunOptimizations(HGraph* callee_graph,
+                                  const DexFile::CodeItem* code_item,
+                                  const DexCompilationUnit& dex_compilation_unit) {
+  HDeadCodeElimination dce(callee_graph, stats_);
+  HConstantFolding fold(callee_graph);
+  HSharpening sharpening(callee_graph, codegen_, dex_compilation_unit, compiler_driver_);
+  InstructionSimplifier simplify(callee_graph, stats_);
+  IntrinsicsRecognizer intrinsics(callee_graph, compiler_driver_, stats_);
+
+  HOptimization* optimizations[] = {
+    &intrinsics,
+    &sharpening,
+    &simplify,
+    &fold,
+    &dce,
+  };
+
+  for (size_t i = 0; i < arraysize(optimizations); ++i) {
+    HOptimization* optimization = optimizations[i];
+    optimization->Run();
+  }
+
+  size_t number_of_inlined_instructions = 0u;
+  if (depth_ + 1 < compiler_driver_->GetCompilerOptions().GetInlineDepthLimit()) {
+    HInliner inliner(callee_graph,
+                     outermost_graph_,
+                     codegen_,
+                     outer_compilation_unit_,
+                     dex_compilation_unit,
+                     compiler_driver_,
+                     handles_,
+                     stats_,
+                     total_number_of_dex_registers_ + code_item->registers_size_,
+                     depth_ + 1);
+    inliner.Run();
+    number_of_inlined_instructions += inliner.number_of_inlined_instructions_;
+  }
+
+  return number_of_inlined_instructions;
+}
+
 void HInliner::FixUpReturnReferenceType(HInvoke* invoke_instruction,
                                         ArtMethod* resolved_method,
                                         HInstruction* return_replacement,
diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h
index cdb2167..7cf1424 100644
--- a/compiler/optimizing/inliner.h
+++ b/compiler/optimizing/inliner.h
@@ -76,6 +76,12 @@
                                bool same_dex_file,
                                HInstruction** return_replacement);
 
+  // Run simple optimizations on `callee_graph`.
+  // Returns the number of inlined instructions.
+  size_t RunOptimizations(HGraph* callee_graph,
+                          const DexFile::CodeItem* code_item,
+                          const DexCompilationUnit& dex_compilation_unit);
+
   // Try to recognize known simple patterns and replace invoke call with appropriate instructions.
   bool TryPatternSubstitution(HInvoke* invoke_instruction,
                               ArtMethod* resolved_method,
diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc
new file mode 100644
index 0000000..06b3968
--- /dev/null
+++ b/compiler/optimizing/instruction_builder.cc
@@ -0,0 +1,2701 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "instruction_builder.h"
+
+#include "bytecode_utils.h"
+#include "class_linker.h"
+#include "driver/compiler_options.h"
+#include "scoped_thread_state_change.h"
+
+namespace art {
+
+void HInstructionBuilder::MaybeRecordStat(MethodCompilationStat compilation_stat) {
+  if (compilation_stats_ != nullptr) {
+    compilation_stats_->RecordStat(compilation_stat);
+  }
+}
+
+HBasicBlock* HInstructionBuilder::FindBlockStartingAt(uint32_t dex_pc) const {
+  return block_builder_->GetBlockAt(dex_pc);
+}
+
+ArenaVector<HInstruction*>* HInstructionBuilder::GetLocalsFor(HBasicBlock* block) {
+  ArenaVector<HInstruction*>* locals = &locals_for_[block->GetBlockId()];
+  const size_t vregs = graph_->GetNumberOfVRegs();
+  if (locals->size() != vregs) {
+    locals->resize(vregs, nullptr);
+
+    if (block->IsCatchBlock()) {
+      // We record incoming inputs of catch phis at throwing instructions and
+      // must therefore eagerly create the phis. Phis for undefined vregs will
+      // be deleted when the first throwing instruction with the vreg undefined
+      // is encountered. Unused phis will be removed by dead phi analysis.
+      for (size_t i = 0; i < vregs; ++i) {
+        // No point in creating the catch phi if it is already undefined at
+        // the first throwing instruction.
+        HInstruction* current_local_value = (*current_locals_)[i];
+        if (current_local_value != nullptr) {
+          HPhi* phi = new (arena_) HPhi(
+              arena_,
+              i,
+              0,
+              current_local_value->GetType());
+          block->AddPhi(phi);
+          (*locals)[i] = phi;
+        }
+      }
+    }
+  }
+  return locals;
+}
+
+HInstruction* HInstructionBuilder::ValueOfLocalAt(HBasicBlock* block, size_t local) {
+  ArenaVector<HInstruction*>* locals = GetLocalsFor(block);
+  return (*locals)[local];
+}
+
+void HInstructionBuilder::InitializeBlockLocals() {
+  current_locals_ = GetLocalsFor(current_block_);
+
+  if (current_block_->IsCatchBlock()) {
+    // Catch phis were already created and inputs collected from throwing sites.
+    if (kIsDebugBuild) {
+      // Make sure there was at least one throwing instruction which initialized
+      // locals (guaranteed by HGraphBuilder) and that all try blocks have been
+      // visited already (from HTryBoundary scoping and reverse post order).
+      bool catch_block_visited = false;
+      for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
+        HBasicBlock* current = it.Current();
+        if (current == current_block_) {
+          catch_block_visited = true;
+        } else if (current->IsTryBlock()) {
+          const HTryBoundary& try_entry = current->GetTryCatchInformation()->GetTryEntry();
+          if (try_entry.HasExceptionHandler(*current_block_)) {
+            DCHECK(!catch_block_visited) << "Catch block visited before its try block.";
+          }
+        }
+      }
+      DCHECK_EQ(current_locals_->size(), graph_->GetNumberOfVRegs())
+          << "No instructions throwing into a live catch block.";
+    }
+  } else if (current_block_->IsLoopHeader()) {
+    // If the block is a loop header, we know we only have visited the pre header
+    // because we are visiting in reverse post order. We create phis for all initialized
+    // locals from the pre header. Their inputs will be populated at the end of
+    // the analysis.
+    for (size_t local = 0; local < current_locals_->size(); ++local) {
+      HInstruction* incoming =
+          ValueOfLocalAt(current_block_->GetLoopInformation()->GetPreHeader(), local);
+      if (incoming != nullptr) {
+        HPhi* phi = new (arena_) HPhi(
+            arena_,
+            local,
+            0,
+            incoming->GetType());
+        current_block_->AddPhi(phi);
+        (*current_locals_)[local] = phi;
+      }
+    }
+
+    // Save the loop header so that the last phase of the analysis knows which
+    // blocks need to be updated.
+    loop_headers_.push_back(current_block_);
+  } else if (current_block_->GetPredecessors().size() > 0) {
+    // All predecessors have already been visited because we are visiting in reverse post order.
+    // We merge the values of all locals, creating phis if those values differ.
+    for (size_t local = 0; local < current_locals_->size(); ++local) {
+      bool one_predecessor_has_no_value = false;
+      bool is_different = false;
+      HInstruction* value = ValueOfLocalAt(current_block_->GetPredecessors()[0], local);
+
+      for (HBasicBlock* predecessor : current_block_->GetPredecessors()) {
+        HInstruction* current = ValueOfLocalAt(predecessor, local);
+        if (current == nullptr) {
+          one_predecessor_has_no_value = true;
+          break;
+        } else if (current != value) {
+          is_different = true;
+        }
+      }
+
+      if (one_predecessor_has_no_value) {
+        // If one predecessor has no value for this local, we trust the verifier has
+        // successfully checked that there is a store dominating any read after this block.
+        continue;
+      }
+
+      if (is_different) {
+        HInstruction* first_input = ValueOfLocalAt(current_block_->GetPredecessors()[0], local);
+        HPhi* phi = new (arena_) HPhi(
+            arena_,
+            local,
+            current_block_->GetPredecessors().size(),
+            first_input->GetType());
+        for (size_t i = 0; i < current_block_->GetPredecessors().size(); i++) {
+          HInstruction* pred_value = ValueOfLocalAt(current_block_->GetPredecessors()[i], local);
+          phi->SetRawInputAt(i, pred_value);
+        }
+        current_block_->AddPhi(phi);
+        value = phi;
+      }
+      (*current_locals_)[local] = value;
+    }
+  }
+}
+
+void HInstructionBuilder::PropagateLocalsToCatchBlocks() {
+  const HTryBoundary& try_entry = current_block_->GetTryCatchInformation()->GetTryEntry();
+  for (HBasicBlock* catch_block : try_entry.GetExceptionHandlers()) {
+    ArenaVector<HInstruction*>* handler_locals = GetLocalsFor(catch_block);
+    DCHECK_EQ(handler_locals->size(), current_locals_->size());
+    for (size_t vreg = 0, e = current_locals_->size(); vreg < e; ++vreg) {
+      HInstruction* handler_value = (*handler_locals)[vreg];
+      if (handler_value == nullptr) {
+        // Vreg was undefined at a previously encountered throwing instruction
+        // and the catch phi was deleted. Do not record the local value.
+        continue;
+      }
+      DCHECK(handler_value->IsPhi());
+
+      HInstruction* local_value = (*current_locals_)[vreg];
+      if (local_value == nullptr) {
+        // This is the first instruction throwing into `catch_block` where
+        // `vreg` is undefined. Delete the catch phi.
+        catch_block->RemovePhi(handler_value->AsPhi());
+        (*handler_locals)[vreg] = nullptr;
+      } else {
+        // Vreg has been defined at all instructions throwing into `catch_block`
+        // encountered so far. Record the local value in the catch phi.
+        handler_value->AsPhi()->AddInput(local_value);
+      }
+    }
+  }
+}
+
+void HInstructionBuilder::AppendInstruction(HInstruction* instruction) {
+  current_block_->AddInstruction(instruction);
+  InitializeInstruction(instruction);
+}
+
+void HInstructionBuilder::InsertInstructionAtTop(HInstruction* instruction) {
+  if (current_block_->GetInstructions().IsEmpty()) {
+    current_block_->AddInstruction(instruction);
+  } else {
+    current_block_->InsertInstructionBefore(instruction, current_block_->GetFirstInstruction());
+  }
+  InitializeInstruction(instruction);
+}
+
+void HInstructionBuilder::InitializeInstruction(HInstruction* instruction) {
+  if (instruction->NeedsEnvironment()) {
+    HEnvironment* environment = new (arena_) HEnvironment(
+        arena_,
+        current_locals_->size(),
+        graph_->GetDexFile(),
+        graph_->GetMethodIdx(),
+        instruction->GetDexPc(),
+        graph_->GetInvokeType(),
+        instruction);
+    environment->CopyFrom(*current_locals_);
+    instruction->SetRawEnvironment(environment);
+  }
+}
+
+void HInstructionBuilder::SetLoopHeaderPhiInputs() {
+  for (size_t i = loop_headers_.size(); i > 0; --i) {
+    HBasicBlock* block = loop_headers_[i - 1];
+    for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
+      HPhi* phi = it.Current()->AsPhi();
+      size_t vreg = phi->GetRegNumber();
+      for (HBasicBlock* predecessor : block->GetPredecessors()) {
+        HInstruction* value = ValueOfLocalAt(predecessor, vreg);
+        if (value == nullptr) {
+          // Vreg is undefined at this predecessor. Mark it dead and leave with
+          // fewer inputs than predecessors. SsaChecker will fail if not removed.
+          phi->SetDead();
+          break;
+        } else {
+          phi->AddInput(value);
+        }
+      }
+    }
+  }
+}
+
+static bool IsBlockPopulated(HBasicBlock* block) {
+  if (block->IsLoopHeader()) {
+    // Suspend checks were inserted into loop headers during building of dominator tree.
+    DCHECK(block->GetFirstInstruction()->IsSuspendCheck());
+    return block->GetFirstInstruction() != block->GetLastInstruction();
+  } else {
+    return !block->GetInstructions().IsEmpty();
+  }
+}
+
+bool HInstructionBuilder::Build() {
+  locals_for_.resize(graph_->GetBlocks().size(),
+                     ArenaVector<HInstruction*>(arena_->Adapter(kArenaAllocGraphBuilder)));
+
+  // Find locations where we want to generate extra stackmaps for native debugging.
+  // This allows us to generate the info only at interesting points (for example,
+  // at start of java statement) rather than before every dex instruction.
+  const bool native_debuggable = compiler_driver_ != nullptr &&
+                                 compiler_driver_->GetCompilerOptions().GetNativeDebuggable();
+  ArenaBitVector* native_debug_info_locations = nullptr;
+  if (native_debuggable) {
+    const uint32_t num_instructions = code_item_.insns_size_in_code_units_;
+    native_debug_info_locations = new (arena_) ArenaBitVector (arena_, num_instructions, false);
+    FindNativeDebugInfoLocations(native_debug_info_locations);
+  }
+
+  for (HReversePostOrderIterator block_it(*graph_); !block_it.Done(); block_it.Advance()) {
+    current_block_ = block_it.Current();
+    uint32_t block_dex_pc = current_block_->GetDexPc();
+
+    InitializeBlockLocals();
+
+    if (current_block_->IsEntryBlock()) {
+      InitializeParameters();
+      AppendInstruction(new (arena_) HSuspendCheck(0u));
+      AppendInstruction(new (arena_) HGoto(0u));
+      continue;
+    } else if (current_block_->IsExitBlock()) {
+      AppendInstruction(new (arena_) HExit());
+      continue;
+    } else if (current_block_->IsLoopHeader()) {
+      HSuspendCheck* suspend_check = new (arena_) HSuspendCheck(current_block_->GetDexPc());
+      current_block_->GetLoopInformation()->SetSuspendCheck(suspend_check);
+      // This is slightly odd because the loop header might not be empty (TryBoundary).
+      // But we're still creating the environment with locals from the top of the block.
+      InsertInstructionAtTop(suspend_check);
+    }
+
+    if (block_dex_pc == kNoDexPc || current_block_ != block_builder_->GetBlockAt(block_dex_pc)) {
+      // Synthetic block that does not need to be populated.
+      DCHECK(IsBlockPopulated(current_block_));
+      continue;
+    }
+
+    DCHECK(!IsBlockPopulated(current_block_));
+
+    for (CodeItemIterator it(code_item_, block_dex_pc); !it.Done(); it.Advance()) {
+      if (current_block_ == nullptr) {
+        // The previous instruction ended this block.
+        break;
+      }
+
+      uint32_t dex_pc = it.CurrentDexPc();
+      if (dex_pc != block_dex_pc && FindBlockStartingAt(dex_pc) != nullptr) {
+        // This dex_pc starts a new basic block.
+        break;
+      }
+
+      if (current_block_->IsTryBlock() && IsThrowingDexInstruction(it.CurrentInstruction())) {
+        PropagateLocalsToCatchBlocks();
+      }
+
+      if (native_debuggable && native_debug_info_locations->IsBitSet(dex_pc)) {
+        AppendInstruction(new (arena_) HNativeDebugInfo(dex_pc));
+      }
+
+      if (!ProcessDexInstruction(it.CurrentInstruction(), dex_pc)) {
+        return false;
+      }
+    }
+
+    if (current_block_ != nullptr) {
+      // Branching instructions clear current_block, so we know the last
+      // instruction of the current block is not a branching instruction.
+      // We add an unconditional Goto to the next block.
+      DCHECK_EQ(current_block_->GetSuccessors().size(), 1u);
+      AppendInstruction(new (arena_) HGoto());
+    }
+  }
+
+  SetLoopHeaderPhiInputs();
+
+  return true;
+}
+
+void HInstructionBuilder::FindNativeDebugInfoLocations(ArenaBitVector* locations) {
+  // The callback gets called when the line number changes.
+  // In other words, it marks the start of new java statement.
+  struct Callback {
+    static bool Position(void* ctx, const DexFile::PositionInfo& entry) {
+      static_cast<ArenaBitVector*>(ctx)->SetBit(entry.address_);
+      return false;
+    }
+  };
+  dex_file_->DecodeDebugPositionInfo(&code_item_, Callback::Position, locations);
+  // Instruction-specific tweaks.
+  const Instruction* const begin = Instruction::At(code_item_.insns_);
+  const Instruction* const end = begin->RelativeAt(code_item_.insns_size_in_code_units_);
+  for (const Instruction* inst = begin; inst < end; inst = inst->Next()) {
+    switch (inst->Opcode()) {
+      case Instruction::MOVE_EXCEPTION: {
+        // Stop in native debugger after the exception has been moved.
+        // The compiler also expects the move at the start of basic block so
+        // we do not want to interfere by inserting native-debug-info before it.
+        locations->ClearBit(inst->GetDexPc(code_item_.insns_));
+        const Instruction* next = inst->Next();
+        if (next < end) {
+          locations->SetBit(next->GetDexPc(code_item_.insns_));
+        }
+        break;
+      }
+      default:
+        break;
+    }
+  }
+}
+
+HInstruction* HInstructionBuilder::LoadLocal(uint32_t reg_number, Primitive::Type type) const {
+  HInstruction* value = (*current_locals_)[reg_number];
+  DCHECK(value != nullptr);
+
+  // If the operation requests a specific type, we make sure its input is of that type.
+  if (type != value->GetType()) {
+    if (Primitive::IsFloatingPointType(type)) {
+      return ssa_builder_->GetFloatOrDoubleEquivalent(value, type);
+    } else if (type == Primitive::kPrimNot) {
+      return ssa_builder_->GetReferenceTypeEquivalent(value);
+    }
+  }
+
+  return value;
+}
+
+void HInstructionBuilder::UpdateLocal(uint32_t reg_number, HInstruction* stored_value) {
+  Primitive::Type stored_type = stored_value->GetType();
+  DCHECK_NE(stored_type, Primitive::kPrimVoid);
+
+  // Storing into vreg `reg_number` may implicitly invalidate the surrounding
+  // registers. Consider the following cases:
+  // (1) Storing a wide value must overwrite previous values in both `reg_number`
+  //     and `reg_number+1`. We store `nullptr` in `reg_number+1`.
+  // (2) If vreg `reg_number-1` holds a wide value, writing into `reg_number`
+  //     must invalidate it. We store `nullptr` in `reg_number-1`.
+  // Consequently, storing a wide value into the high vreg of another wide value
+  // will invalidate both `reg_number-1` and `reg_number+1`.
+
+  if (reg_number != 0) {
+    HInstruction* local_low = (*current_locals_)[reg_number - 1];
+    if (local_low != nullptr && Primitive::Is64BitType(local_low->GetType())) {
+      // The vreg we are storing into was previously the high vreg of a pair.
+      // We need to invalidate its low vreg.
+      DCHECK((*current_locals_)[reg_number] == nullptr);
+      (*current_locals_)[reg_number - 1] = nullptr;
+    }
+  }
+
+  (*current_locals_)[reg_number] = stored_value;
+  if (Primitive::Is64BitType(stored_type)) {
+    // We are storing a pair. Invalidate the instruction in the high vreg.
+    (*current_locals_)[reg_number + 1] = nullptr;
+  }
+}
+
+void HInstructionBuilder::InitializeParameters() {
+  DCHECK(current_block_->IsEntryBlock());
+
+  // dex_compilation_unit_ is null only when unit testing.
+  if (dex_compilation_unit_ == nullptr) {
+    return;
+  }
+
+  const char* shorty = dex_compilation_unit_->GetShorty();
+  uint16_t number_of_parameters = graph_->GetNumberOfInVRegs();
+  uint16_t locals_index = graph_->GetNumberOfLocalVRegs();
+  uint16_t parameter_index = 0;
+
+  const DexFile::MethodId& referrer_method_id =
+      dex_file_->GetMethodId(dex_compilation_unit_->GetDexMethodIndex());
+  if (!dex_compilation_unit_->IsStatic()) {
+    // Add the implicit 'this' argument, not expressed in the signature.
+    HParameterValue* parameter = new (arena_) HParameterValue(*dex_file_,
+                                                              referrer_method_id.class_idx_,
+                                                              parameter_index++,
+                                                              Primitive::kPrimNot,
+                                                              true);
+    AppendInstruction(parameter);
+    UpdateLocal(locals_index++, parameter);
+    number_of_parameters--;
+  }
+
+  const DexFile::ProtoId& proto = dex_file_->GetMethodPrototype(referrer_method_id);
+  const DexFile::TypeList* arg_types = dex_file_->GetProtoParameters(proto);
+  for (int i = 0, shorty_pos = 1; i < number_of_parameters; i++) {
+    HParameterValue* parameter = new (arena_) HParameterValue(
+        *dex_file_,
+        arg_types->GetTypeItem(shorty_pos - 1).type_idx_,
+        parameter_index++,
+        Primitive::GetType(shorty[shorty_pos]),
+        false);
+    ++shorty_pos;
+    AppendInstruction(parameter);
+    // Store the parameter value in the local that the dex code will use
+    // to reference that parameter.
+    UpdateLocal(locals_index++, parameter);
+    if (Primitive::Is64BitType(parameter->GetType())) {
+      i++;
+      locals_index++;
+      parameter_index++;
+    }
+  }
+}
+
+template<typename T>
+void HInstructionBuilder::If_22t(const Instruction& instruction, uint32_t dex_pc) {
+  HInstruction* first = LoadLocal(instruction.VRegA(), Primitive::kPrimInt);
+  HInstruction* second = LoadLocal(instruction.VRegB(), Primitive::kPrimInt);
+  T* comparison = new (arena_) T(first, second, dex_pc);
+  AppendInstruction(comparison);
+  AppendInstruction(new (arena_) HIf(comparison, dex_pc));
+  current_block_ = nullptr;
+}
+
+template<typename T>
+void HInstructionBuilder::If_21t(const Instruction& instruction, uint32_t dex_pc) {
+  HInstruction* value = LoadLocal(instruction.VRegA(), Primitive::kPrimInt);
+  T* comparison = new (arena_) T(value, graph_->GetIntConstant(0, dex_pc), dex_pc);
+  AppendInstruction(comparison);
+  AppendInstruction(new (arena_) HIf(comparison, dex_pc));
+  current_block_ = nullptr;
+}
+
+template<typename T>
+void HInstructionBuilder::Unop_12x(const Instruction& instruction,
+                                   Primitive::Type type,
+                                   uint32_t dex_pc) {
+  HInstruction* first = LoadLocal(instruction.VRegB(), type);
+  AppendInstruction(new (arena_) T(type, first, dex_pc));
+  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
+}
+
+void HInstructionBuilder::Conversion_12x(const Instruction& instruction,
+                                         Primitive::Type input_type,
+                                         Primitive::Type result_type,
+                                         uint32_t dex_pc) {
+  HInstruction* first = LoadLocal(instruction.VRegB(), input_type);
+  AppendInstruction(new (arena_) HTypeConversion(result_type, first, dex_pc));
+  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
+}
+
+template<typename T>
+void HInstructionBuilder::Binop_23x(const Instruction& instruction,
+                                    Primitive::Type type,
+                                    uint32_t dex_pc) {
+  HInstruction* first = LoadLocal(instruction.VRegB(), type);
+  HInstruction* second = LoadLocal(instruction.VRegC(), type);
+  AppendInstruction(new (arena_) T(type, first, second, dex_pc));
+  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
+}
+
+template<typename T>
+void HInstructionBuilder::Binop_23x_shift(const Instruction& instruction,
+                                          Primitive::Type type,
+                                          uint32_t dex_pc) {
+  HInstruction* first = LoadLocal(instruction.VRegB(), type);
+  HInstruction* second = LoadLocal(instruction.VRegC(), Primitive::kPrimInt);
+  AppendInstruction(new (arena_) T(type, first, second, dex_pc));
+  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
+}
+
+void HInstructionBuilder::Binop_23x_cmp(const Instruction& instruction,
+                                        Primitive::Type type,
+                                        ComparisonBias bias,
+                                        uint32_t dex_pc) {
+  HInstruction* first = LoadLocal(instruction.VRegB(), type);
+  HInstruction* second = LoadLocal(instruction.VRegC(), type);
+  AppendInstruction(new (arena_) HCompare(type, first, second, bias, dex_pc));
+  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
+}
+
+template<typename T>
+void HInstructionBuilder::Binop_12x_shift(const Instruction& instruction,
+                                          Primitive::Type type,
+                                          uint32_t dex_pc) {
+  HInstruction* first = LoadLocal(instruction.VRegA(), type);
+  HInstruction* second = LoadLocal(instruction.VRegB(), Primitive::kPrimInt);
+  AppendInstruction(new (arena_) T(type, first, second, dex_pc));
+  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
+}
+
+template<typename T>
+void HInstructionBuilder::Binop_12x(const Instruction& instruction,
+                                    Primitive::Type type,
+                                    uint32_t dex_pc) {
+  HInstruction* first = LoadLocal(instruction.VRegA(), type);
+  HInstruction* second = LoadLocal(instruction.VRegB(), type);
+  AppendInstruction(new (arena_) T(type, first, second, dex_pc));
+  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
+}
+
+template<typename T>
+void HInstructionBuilder::Binop_22s(const Instruction& instruction, bool reverse, uint32_t dex_pc) {
+  HInstruction* first = LoadLocal(instruction.VRegB(), Primitive::kPrimInt);
+  HInstruction* second = graph_->GetIntConstant(instruction.VRegC_22s(), dex_pc);
+  if (reverse) {
+    std::swap(first, second);
+  }
+  AppendInstruction(new (arena_) T(Primitive::kPrimInt, first, second, dex_pc));
+  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
+}
+
+template<typename T>
+void HInstructionBuilder::Binop_22b(const Instruction& instruction, bool reverse, uint32_t dex_pc) {
+  HInstruction* first = LoadLocal(instruction.VRegB(), Primitive::kPrimInt);
+  HInstruction* second = graph_->GetIntConstant(instruction.VRegC_22b(), dex_pc);
+  if (reverse) {
+    std::swap(first, second);
+  }
+  AppendInstruction(new (arena_) T(Primitive::kPrimInt, first, second, dex_pc));
+  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
+}
+
+static bool RequiresConstructorBarrier(const DexCompilationUnit* cu, CompilerDriver* driver) {
+  Thread* self = Thread::Current();
+  return cu->IsConstructor()
+      && driver->RequiresConstructorBarrier(self, cu->GetDexFile(), cu->GetClassDefIndex());
+}
+
+// Returns true if `block` has only one successor which starts at the next
+// dex_pc after `instruction` at `dex_pc`.
+static bool IsFallthroughInstruction(const Instruction& instruction,
+                                     uint32_t dex_pc,
+                                     HBasicBlock* block) {
+  uint32_t next_dex_pc = dex_pc + instruction.SizeInCodeUnits();
+  return block->GetSingleSuccessor()->GetDexPc() == next_dex_pc;
+}
+
+void HInstructionBuilder::BuildSwitch(const Instruction& instruction, uint32_t dex_pc) {
+  HInstruction* value = LoadLocal(instruction.VRegA(), Primitive::kPrimInt);
+  DexSwitchTable table(instruction, dex_pc);
+
+  if (table.GetNumEntries() == 0) {
+    // Empty Switch. Code falls through to the next block.
+    DCHECK(IsFallthroughInstruction(instruction, dex_pc, current_block_));
+    AppendInstruction(new (arena_) HGoto(dex_pc));
+  } else if (table.ShouldBuildDecisionTree()) {
+    for (DexSwitchTableIterator it(table); !it.Done(); it.Advance()) {
+      HInstruction* case_value = graph_->GetIntConstant(it.CurrentKey(), dex_pc);
+      HEqual* comparison = new (arena_) HEqual(value, case_value, dex_pc);
+      AppendInstruction(comparison);
+      AppendInstruction(new (arena_) HIf(comparison, dex_pc));
+
+      if (!it.IsLast()) {
+        current_block_ = FindBlockStartingAt(it.GetDexPcForCurrentIndex());
+      }
+    }
+  } else {
+    AppendInstruction(
+        new (arena_) HPackedSwitch(table.GetEntryAt(0), table.GetNumEntries(), value, dex_pc));
+  }
+
+  current_block_ = nullptr;
+}
+
+void HInstructionBuilder::BuildReturn(const Instruction& instruction,
+                                      Primitive::Type type,
+                                      uint32_t dex_pc) {
+  if (type == Primitive::kPrimVoid) {
+    if (graph_->ShouldGenerateConstructorBarrier()) {
+      // The compilation unit is null during testing.
+      if (dex_compilation_unit_ != nullptr) {
+        DCHECK(RequiresConstructorBarrier(dex_compilation_unit_, compiler_driver_))
+          << "Inconsistent use of ShouldGenerateConstructorBarrier. Should not generate a barrier.";
+      }
+      AppendInstruction(new (arena_) HMemoryBarrier(kStoreStore, dex_pc));
+    }
+    AppendInstruction(new (arena_) HReturnVoid(dex_pc));
+  } else {
+    HInstruction* value = LoadLocal(instruction.VRegA(), type);
+    AppendInstruction(new (arena_) HReturn(value, dex_pc));
+  }
+  current_block_ = nullptr;
+}
+
+static InvokeType GetInvokeTypeFromOpCode(Instruction::Code opcode) {
+  switch (opcode) {
+    case Instruction::INVOKE_STATIC:
+    case Instruction::INVOKE_STATIC_RANGE:
+      return kStatic;
+    case Instruction::INVOKE_DIRECT:
+    case Instruction::INVOKE_DIRECT_RANGE:
+      return kDirect;
+    case Instruction::INVOKE_VIRTUAL:
+    case Instruction::INVOKE_VIRTUAL_QUICK:
+    case Instruction::INVOKE_VIRTUAL_RANGE:
+    case Instruction::INVOKE_VIRTUAL_RANGE_QUICK:
+      return kVirtual;
+    case Instruction::INVOKE_INTERFACE:
+    case Instruction::INVOKE_INTERFACE_RANGE:
+      return kInterface;
+    case Instruction::INVOKE_SUPER_RANGE:
+    case Instruction::INVOKE_SUPER:
+      return kSuper;
+    default:
+      LOG(FATAL) << "Unexpected invoke opcode: " << opcode;
+      UNREACHABLE();
+  }
+}
+
+ArtMethod* HInstructionBuilder::ResolveMethod(uint16_t method_idx, InvokeType invoke_type) {
+  ScopedObjectAccess soa(Thread::Current());
+  StackHandleScope<3> hs(soa.Self());
+
+  ClassLinker* class_linker = dex_compilation_unit_->GetClassLinker();
+  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
+      soa.Decode<mirror::ClassLoader*>(dex_compilation_unit_->GetClassLoader())));
+  Handle<mirror::Class> compiling_class(hs.NewHandle(GetCompilingClass()));
+
+  ArtMethod* resolved_method = class_linker->ResolveMethod<ClassLinker::kForceICCECheck>(
+      *dex_compilation_unit_->GetDexFile(),
+      method_idx,
+      dex_compilation_unit_->GetDexCache(),
+      class_loader,
+      /* referrer */ nullptr,
+      invoke_type);
+
+  if (UNLIKELY(resolved_method == nullptr)) {
+    // Clean up any exception left by type resolution.
+    soa.Self()->ClearException();
+    return nullptr;
+  }
+
+  // Check access. The class linker has a fast path for looking into the dex cache
+  // and does not check the access if it hits it.
+  if (compiling_class.Get() == nullptr) {
+    if (!resolved_method->IsPublic()) {
+      return nullptr;
+    }
+  } else if (!compiling_class->CanAccessResolvedMethod(resolved_method->GetDeclaringClass(),
+                                                       resolved_method,
+                                                       dex_compilation_unit_->GetDexCache().Get(),
+                                                       method_idx)) {
+    return nullptr;
+  }
+
+  // We have to special case the invoke-super case, as ClassLinker::ResolveMethod does not.
+  // We need to look at the referrer's super class vtable. We need to do this to know if we need to
+  // make this an invoke-unresolved to handle cross-dex invokes or abstract super methods, both of
+  // which require runtime handling.
+  if (invoke_type == kSuper) {
+    if (compiling_class.Get() == nullptr) {
+      // We could not determine the method's class we need to wait until runtime.
+      DCHECK(Runtime::Current()->IsAotCompiler());
+      return nullptr;
+    }
+    ArtMethod* current_method = graph_->GetArtMethod();
+    DCHECK(current_method != nullptr);
+    Handle<mirror::Class> methods_class(hs.NewHandle(
+        dex_compilation_unit_->GetClassLinker()->ResolveReferencedClassOfMethod(Thread::Current(),
+                                                                                method_idx,
+                                                                                current_method)));
+    if (methods_class.Get() == nullptr) {
+      // Invoking a super method requires knowing the actual super class. If we did not resolve
+      // the compiling method's declaring class (which only happens for ahead of time
+      // compilation), bail out.
+      DCHECK(Runtime::Current()->IsAotCompiler());
+      return nullptr;
+    } else {
+      ArtMethod* actual_method;
+      if (methods_class->IsInterface()) {
+        actual_method = methods_class->FindVirtualMethodForInterfaceSuper(
+            resolved_method, class_linker->GetImagePointerSize());
+      } else {
+        uint16_t vtable_index = resolved_method->GetMethodIndex();
+        actual_method = compiling_class->GetSuperClass()->GetVTableEntry(
+            vtable_index, class_linker->GetImagePointerSize());
+      }
+      if (actual_method != resolved_method &&
+          !IsSameDexFile(*actual_method->GetDexFile(), *dex_compilation_unit_->GetDexFile())) {
+        // The back-end code generator relies on this check in order to ensure that it will not
+        // attempt to read the dex_cache with a dex_method_index that is not from the correct
+        // dex_file. If we didn't do this check then the dex_method_index will not be updated in the
+        // builder, which means that the code-generator (and compiler driver during sharpening and
+        // inliner, maybe) might invoke an incorrect method.
+        // TODO: The actual method could still be referenced in the current dex file, so we
+        //       could try locating it.
+        // TODO: Remove the dex_file restriction.
+        return nullptr;
+      }
+      if (!actual_method->IsInvokable()) {
+        // Fail if the actual method cannot be invoked. Otherwise, the runtime resolution stub
+        // could resolve the callee to the wrong method.
+        return nullptr;
+      }
+      resolved_method = actual_method;
+    }
+  }
+
+  // Check for incompatible class changes. The class linker has a fast path for
+  // looking into the dex cache and does not check incompatible class changes if it hits it.
+  if (resolved_method->CheckIncompatibleClassChange(invoke_type)) {
+    return nullptr;
+  }
+
+  return resolved_method;
+}
+
+bool HInstructionBuilder::BuildInvoke(const Instruction& instruction,
+                                      uint32_t dex_pc,
+                                      uint32_t method_idx,
+                                      uint32_t number_of_vreg_arguments,
+                                      bool is_range,
+                                      uint32_t* args,
+                                      uint32_t register_index) {
+  InvokeType invoke_type = GetInvokeTypeFromOpCode(instruction.Opcode());
+  const char* descriptor = dex_file_->GetMethodShorty(method_idx);
+  Primitive::Type return_type = Primitive::GetType(descriptor[0]);
+
+  // Remove the return type from the 'proto'.
+  size_t number_of_arguments = strlen(descriptor) - 1;
+  if (invoke_type != kStatic) {  // instance call
+    // One extra argument for 'this'.
+    number_of_arguments++;
+  }
+
+  MethodReference target_method(dex_file_, method_idx);
+
+  // Special handling for string init.
+  int32_t string_init_offset = 0;
+  bool is_string_init = compiler_driver_->IsStringInit(method_idx,
+                                                       dex_file_,
+                                                       &string_init_offset);
+  // Replace calls to String.<init> with StringFactory.
+  if (is_string_init) {
+    HInvokeStaticOrDirect::DispatchInfo dispatch_info = {
+        HInvokeStaticOrDirect::MethodLoadKind::kStringInit,
+        HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod,
+        dchecked_integral_cast<uint64_t>(string_init_offset),
+        0U
+    };
+    HInvoke* invoke = new (arena_) HInvokeStaticOrDirect(
+        arena_,
+        number_of_arguments - 1,
+        Primitive::kPrimNot /*return_type */,
+        dex_pc,
+        method_idx,
+        target_method,
+        dispatch_info,
+        invoke_type,
+        kStatic /* optimized_invoke_type */,
+        HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit);
+    return HandleStringInit(invoke,
+                            number_of_vreg_arguments,
+                            args,
+                            register_index,
+                            is_range,
+                            descriptor);
+  }
+
+  ArtMethod* resolved_method = ResolveMethod(method_idx, invoke_type);
+
+  if (UNLIKELY(resolved_method == nullptr)) {
+    MaybeRecordStat(MethodCompilationStat::kUnresolvedMethod);
+    HInvoke* invoke = new (arena_) HInvokeUnresolved(arena_,
+                                                     number_of_arguments,
+                                                     return_type,
+                                                     dex_pc,
+                                                     method_idx,
+                                                     invoke_type);
+    return HandleInvoke(invoke,
+                        number_of_vreg_arguments,
+                        args,
+                        register_index,
+                        is_range,
+                        descriptor,
+                        nullptr /* clinit_check */);
+  }
+
+  // Potential class initialization check, in the case of a static method call.
+  HClinitCheck* clinit_check = nullptr;
+  HInvoke* invoke = nullptr;
+  if (invoke_type == kDirect || invoke_type == kStatic || invoke_type == kSuper) {
+    // By default, consider that the called method implicitly requires
+    // an initialization check of its declaring method.
+    HInvokeStaticOrDirect::ClinitCheckRequirement clinit_check_requirement
+        = HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit;
+    ScopedObjectAccess soa(Thread::Current());
+    if (invoke_type == kStatic) {
+      clinit_check = ProcessClinitCheckForInvoke(
+          dex_pc, resolved_method, method_idx, &clinit_check_requirement);
+    } else if (invoke_type == kSuper) {
+      if (IsSameDexFile(*resolved_method->GetDexFile(), *dex_compilation_unit_->GetDexFile())) {
+        // Update the target method to the one resolved. Note that this may be a no-op if
+        // we resolved to the method referenced by the instruction.
+        method_idx = resolved_method->GetDexMethodIndex();
+        target_method = MethodReference(dex_file_, method_idx);
+      }
+    }
+
+    HInvokeStaticOrDirect::DispatchInfo dispatch_info = {
+        HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod,
+        HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod,
+        0u,
+        0U
+    };
+    invoke = new (arena_) HInvokeStaticOrDirect(arena_,
+                                                number_of_arguments,
+                                                return_type,
+                                                dex_pc,
+                                                method_idx,
+                                                target_method,
+                                                dispatch_info,
+                                                invoke_type,
+                                                invoke_type,
+                                                clinit_check_requirement);
+  } else if (invoke_type == kVirtual) {
+    ScopedObjectAccess soa(Thread::Current());  // Needed for the method index
+    invoke = new (arena_) HInvokeVirtual(arena_,
+                                         number_of_arguments,
+                                         return_type,
+                                         dex_pc,
+                                         method_idx,
+                                         resolved_method->GetMethodIndex());
+  } else {
+    DCHECK_EQ(invoke_type, kInterface);
+    ScopedObjectAccess soa(Thread::Current());  // Needed for the method index
+    invoke = new (arena_) HInvokeInterface(arena_,
+                                           number_of_arguments,
+                                           return_type,
+                                           dex_pc,
+                                           method_idx,
+                                           resolved_method->GetDexMethodIndex());
+  }
+
+  return HandleInvoke(invoke,
+                      number_of_vreg_arguments,
+                      args,
+                      register_index,
+                      is_range,
+                      descriptor,
+                      clinit_check);
+}
+
+bool HInstructionBuilder::BuildNewInstance(uint16_t type_index, uint32_t dex_pc) {
+  bool finalizable;
+  bool can_throw = NeedsAccessCheck(type_index, &finalizable);
+
+  // Only the non-resolved entrypoint handles the finalizable class case. If we
+  // need access checks, then we haven't resolved the method and the class may
+  // again be finalizable.
+  QuickEntrypointEnum entrypoint = (finalizable || can_throw)
+      ? kQuickAllocObject
+      : kQuickAllocObjectInitialized;
+
+  ScopedObjectAccess soa(Thread::Current());
+  StackHandleScope<3> hs(soa.Self());
+  Handle<mirror::DexCache> dex_cache(hs.NewHandle(
+      dex_compilation_unit_->GetClassLinker()->FindDexCache(
+          soa.Self(), *dex_compilation_unit_->GetDexFile())));
+  Handle<mirror::Class> resolved_class(hs.NewHandle(dex_cache->GetResolvedType(type_index)));
+  const DexFile& outer_dex_file = *outer_compilation_unit_->GetDexFile();
+  Handle<mirror::DexCache> outer_dex_cache(hs.NewHandle(
+      outer_compilation_unit_->GetClassLinker()->FindDexCache(soa.Self(), outer_dex_file)));
+
+  if (outer_dex_cache.Get() != dex_cache.Get()) {
+    // We currently do not support inlining allocations across dex files.
+    return false;
+  }
+
+  HLoadClass* load_class = new (arena_) HLoadClass(
+      graph_->GetCurrentMethod(),
+      type_index,
+      outer_dex_file,
+      IsOutermostCompilingClass(type_index),
+      dex_pc,
+      /*needs_access_check*/ can_throw,
+      compiler_driver_->CanAssumeTypeIsPresentInDexCache(outer_dex_file, type_index));
+
+  AppendInstruction(load_class);
+  HInstruction* cls = load_class;
+  if (!IsInitialized(resolved_class)) {
+    cls = new (arena_) HClinitCheck(load_class, dex_pc);
+    AppendInstruction(cls);
+  }
+
+  AppendInstruction(new (arena_) HNewInstance(
+      cls,
+      graph_->GetCurrentMethod(),
+      dex_pc,
+      type_index,
+      *dex_compilation_unit_->GetDexFile(),
+      can_throw,
+      finalizable,
+      entrypoint));
+  return true;
+}
+
+static bool IsSubClass(mirror::Class* to_test, mirror::Class* super_class)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  return to_test != nullptr && !to_test->IsInterface() && to_test->IsSubClass(super_class);
+}
+
+bool HInstructionBuilder::IsInitialized(Handle<mirror::Class> cls) const {
+  if (cls.Get() == nullptr) {
+    return false;
+  }
+
+  // `CanAssumeClassIsLoaded` will return true if we're JITting, or will
+  // check whether the class is in an image for the AOT compilation.
+  if (cls->IsInitialized() &&
+      compiler_driver_->CanAssumeClassIsLoaded(cls.Get())) {
+    return true;
+  }
+
+  if (IsSubClass(GetOutermostCompilingClass(), cls.Get())) {
+    return true;
+  }
+
+  // TODO: We should walk over the inlined methods, but we don't pass
+  //       that information to the builder.
+  if (IsSubClass(GetCompilingClass(), cls.Get())) {
+    return true;
+  }
+
+  return false;
+}
+
+HClinitCheck* HInstructionBuilder::ProcessClinitCheckForInvoke(
+      uint32_t dex_pc,
+      ArtMethod* resolved_method,
+      uint32_t method_idx,
+      HInvokeStaticOrDirect::ClinitCheckRequirement* clinit_check_requirement) {
+  const DexFile& outer_dex_file = *outer_compilation_unit_->GetDexFile();
+  Thread* self = Thread::Current();
+  StackHandleScope<4> hs(self);
+  Handle<mirror::DexCache> dex_cache(hs.NewHandle(
+      dex_compilation_unit_->GetClassLinker()->FindDexCache(
+          self, *dex_compilation_unit_->GetDexFile())));
+  Handle<mirror::DexCache> outer_dex_cache(hs.NewHandle(
+      outer_compilation_unit_->GetClassLinker()->FindDexCache(
+          self, outer_dex_file)));
+  Handle<mirror::Class> outer_class(hs.NewHandle(GetOutermostCompilingClass()));
+  Handle<mirror::Class> resolved_method_class(hs.NewHandle(resolved_method->GetDeclaringClass()));
+
+  // The index at which the method's class is stored in the DexCache's type array.
+  uint32_t storage_index = DexFile::kDexNoIndex;
+  bool is_outer_class = (resolved_method->GetDeclaringClass() == outer_class.Get());
+  if (is_outer_class) {
+    storage_index = outer_class->GetDexTypeIndex();
+  } else if (outer_dex_cache.Get() == dex_cache.Get()) {
+    // Get `storage_index` from IsClassOfStaticMethodAvailableToReferrer.
+    compiler_driver_->IsClassOfStaticMethodAvailableToReferrer(outer_dex_cache.Get(),
+                                                               GetCompilingClass(),
+                                                               resolved_method,
+                                                               method_idx,
+                                                               &storage_index);
+  }
+
+  HClinitCheck* clinit_check = nullptr;
+
+  if (IsInitialized(resolved_method_class)) {
+    *clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kNone;
+  } else if (storage_index != DexFile::kDexNoIndex) {
+    *clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kExplicit;
+    HLoadClass* load_class = new (arena_) HLoadClass(
+        graph_->GetCurrentMethod(),
+        storage_index,
+        outer_dex_file,
+        is_outer_class,
+        dex_pc,
+        /*needs_access_check*/ false,
+        compiler_driver_->CanAssumeTypeIsPresentInDexCache(outer_dex_file, storage_index));
+    AppendInstruction(load_class);
+    clinit_check = new (arena_) HClinitCheck(load_class, dex_pc);
+    AppendInstruction(clinit_check);
+  }
+  return clinit_check;
+}
+
+bool HInstructionBuilder::SetupInvokeArguments(HInvoke* invoke,
+                                               uint32_t number_of_vreg_arguments,
+                                               uint32_t* args,
+                                               uint32_t register_index,
+                                               bool is_range,
+                                               const char* descriptor,
+                                               size_t start_index,
+                                               size_t* argument_index) {
+  uint32_t descriptor_index = 1;  // Skip the return type.
+
+  for (size_t i = start_index;
+       // Make sure we don't go over the expected arguments or over the number of
+       // dex registers given. If the instruction was seen as dead by the verifier,
+       // it hasn't been properly checked.
+       (i < number_of_vreg_arguments) && (*argument_index < invoke->GetNumberOfArguments());
+       i++, (*argument_index)++) {
+    Primitive::Type type = Primitive::GetType(descriptor[descriptor_index++]);
+    bool is_wide = (type == Primitive::kPrimLong) || (type == Primitive::kPrimDouble);
+    if (!is_range
+        && is_wide
+        && ((i + 1 == number_of_vreg_arguments) || (args[i] + 1 != args[i + 1]))) {
+      // Longs and doubles should be in pairs, that is, sequential registers. The verifier should
+      // reject any class where this is violated. However, the verifier only does these checks
+      // on non trivially dead instructions, so we just bailout the compilation.
+      VLOG(compiler) << "Did not compile "
+                     << PrettyMethod(dex_compilation_unit_->GetDexMethodIndex(), *dex_file_)
+                     << " because of non-sequential dex register pair in wide argument";
+      MaybeRecordStat(MethodCompilationStat::kNotCompiledMalformedOpcode);
+      return false;
+    }
+    HInstruction* arg = LoadLocal(is_range ? register_index + i : args[i], type);
+    invoke->SetArgumentAt(*argument_index, arg);
+    if (is_wide) {
+      i++;
+    }
+  }
+
+  if (*argument_index != invoke->GetNumberOfArguments()) {
+    VLOG(compiler) << "Did not compile "
+                   << PrettyMethod(dex_compilation_unit_->GetDexMethodIndex(), *dex_file_)
+                   << " because of wrong number of arguments in invoke instruction";
+    MaybeRecordStat(MethodCompilationStat::kNotCompiledMalformedOpcode);
+    return false;
+  }
+
+  if (invoke->IsInvokeStaticOrDirect() &&
+      HInvokeStaticOrDirect::NeedsCurrentMethodInput(
+          invoke->AsInvokeStaticOrDirect()->GetMethodLoadKind())) {
+    invoke->SetArgumentAt(*argument_index, graph_->GetCurrentMethod());
+    (*argument_index)++;
+  }
+
+  return true;
+}
+
+bool HInstructionBuilder::HandleInvoke(HInvoke* invoke,
+                                       uint32_t number_of_vreg_arguments,
+                                       uint32_t* args,
+                                       uint32_t register_index,
+                                       bool is_range,
+                                       const char* descriptor,
+                                       HClinitCheck* clinit_check) {
+  DCHECK(!invoke->IsInvokeStaticOrDirect() || !invoke->AsInvokeStaticOrDirect()->IsStringInit());
+
+  size_t start_index = 0;
+  size_t argument_index = 0;
+  if (invoke->GetOriginalInvokeType() != InvokeType::kStatic) {  // Instance call.
+    HInstruction* arg = LoadLocal(is_range ? register_index : args[0], Primitive::kPrimNot);
+    HNullCheck* null_check = new (arena_) HNullCheck(arg, invoke->GetDexPc());
+    AppendInstruction(null_check);
+    invoke->SetArgumentAt(0, null_check);
+    start_index = 1;
+    argument_index = 1;
+  }
+
+  if (!SetupInvokeArguments(invoke,
+                            number_of_vreg_arguments,
+                            args,
+                            register_index,
+                            is_range,
+                            descriptor,
+                            start_index,
+                            &argument_index)) {
+    return false;
+  }
+
+  if (clinit_check != nullptr) {
+    // Add the class initialization check as last input of `invoke`.
+    DCHECK(invoke->IsInvokeStaticOrDirect());
+    DCHECK(invoke->AsInvokeStaticOrDirect()->GetClinitCheckRequirement()
+        == HInvokeStaticOrDirect::ClinitCheckRequirement::kExplicit);
+    invoke->SetArgumentAt(argument_index, clinit_check);
+    argument_index++;
+  }
+
+  AppendInstruction(invoke);
+  latest_result_ = invoke;
+
+  return true;
+}
+
+bool HInstructionBuilder::HandleStringInit(HInvoke* invoke,
+                                           uint32_t number_of_vreg_arguments,
+                                           uint32_t* args,
+                                           uint32_t register_index,
+                                           bool is_range,
+                                           const char* descriptor) {
+  DCHECK(invoke->IsInvokeStaticOrDirect());
+  DCHECK(invoke->AsInvokeStaticOrDirect()->IsStringInit());
+
+  size_t start_index = 1;
+  size_t argument_index = 0;
+  if (!SetupInvokeArguments(invoke,
+                            number_of_vreg_arguments,
+                            args,
+                            register_index,
+                            is_range,
+                            descriptor,
+                            start_index,
+                            &argument_index)) {
+    return false;
+  }
+
+  AppendInstruction(invoke);
+
+  // This is a StringFactory call, not an actual String constructor. Its result
+  // replaces the empty String pre-allocated by NewInstance.
+  uint32_t orig_this_reg = is_range ? register_index : args[0];
+  HInstruction* arg_this = LoadLocal(orig_this_reg, Primitive::kPrimNot);
+
+  // Replacing the NewInstance might render it redundant. Keep a list of these
+  // to be visited once it is clear whether it is has remaining uses.
+  if (arg_this->IsNewInstance()) {
+    ssa_builder_->AddUninitializedString(arg_this->AsNewInstance());
+  } else {
+    DCHECK(arg_this->IsPhi());
+    // NewInstance is not the direct input of the StringFactory call. It might
+    // be redundant but optimizing this case is not worth the effort.
+  }
+
+  // Walk over all vregs and replace any occurrence of `arg_this` with `invoke`.
+  for (size_t vreg = 0, e = current_locals_->size(); vreg < e; ++vreg) {
+    if ((*current_locals_)[vreg] == arg_this) {
+      (*current_locals_)[vreg] = invoke;
+    }
+  }
+
+  return true;
+}
+
+static Primitive::Type GetFieldAccessType(const DexFile& dex_file, uint16_t field_index) {
+  const DexFile::FieldId& field_id = dex_file.GetFieldId(field_index);
+  const char* type = dex_file.GetFieldTypeDescriptor(field_id);
+  return Primitive::GetType(type[0]);
+}
+
+bool HInstructionBuilder::BuildInstanceFieldAccess(const Instruction& instruction,
+                                                   uint32_t dex_pc,
+                                                   bool is_put) {
+  uint32_t source_or_dest_reg = instruction.VRegA_22c();
+  uint32_t obj_reg = instruction.VRegB_22c();
+  uint16_t field_index;
+  if (instruction.IsQuickened()) {
+    if (!CanDecodeQuickenedInfo()) {
+      return false;
+    }
+    field_index = LookupQuickenedInfo(dex_pc);
+  } else {
+    field_index = instruction.VRegC_22c();
+  }
+
+  ScopedObjectAccess soa(Thread::Current());
+  ArtField* resolved_field =
+      compiler_driver_->ComputeInstanceFieldInfo(field_index, dex_compilation_unit_, is_put, soa);
+
+
+  HInstruction* object = LoadLocal(obj_reg, Primitive::kPrimNot);
+  HInstruction* null_check = new (arena_) HNullCheck(object, dex_pc);
+  AppendInstruction(null_check);
+
+  Primitive::Type field_type = (resolved_field == nullptr)
+      ? GetFieldAccessType(*dex_file_, field_index)
+      : resolved_field->GetTypeAsPrimitiveType();
+  if (is_put) {
+    HInstruction* value = LoadLocal(source_or_dest_reg, field_type);
+    HInstruction* field_set = nullptr;
+    if (resolved_field == nullptr) {
+      MaybeRecordStat(MethodCompilationStat::kUnresolvedField);
+      field_set = new (arena_) HUnresolvedInstanceFieldSet(null_check,
+                                                           value,
+                                                           field_type,
+                                                           field_index,
+                                                           dex_pc);
+    } else {
+      uint16_t class_def_index = resolved_field->GetDeclaringClass()->GetDexClassDefIndex();
+      field_set = new (arena_) HInstanceFieldSet(null_check,
+                                                 value,
+                                                 field_type,
+                                                 resolved_field->GetOffset(),
+                                                 resolved_field->IsVolatile(),
+                                                 field_index,
+                                                 class_def_index,
+                                                 *dex_file_,
+                                                 dex_compilation_unit_->GetDexCache(),
+                                                 dex_pc);
+    }
+    AppendInstruction(field_set);
+  } else {
+    HInstruction* field_get = nullptr;
+    if (resolved_field == nullptr) {
+      MaybeRecordStat(MethodCompilationStat::kUnresolvedField);
+      field_get = new (arena_) HUnresolvedInstanceFieldGet(null_check,
+                                                           field_type,
+                                                           field_index,
+                                                           dex_pc);
+    } else {
+      uint16_t class_def_index = resolved_field->GetDeclaringClass()->GetDexClassDefIndex();
+      field_get = new (arena_) HInstanceFieldGet(null_check,
+                                                 field_type,
+                                                 resolved_field->GetOffset(),
+                                                 resolved_field->IsVolatile(),
+                                                 field_index,
+                                                 class_def_index,
+                                                 *dex_file_,
+                                                 dex_compilation_unit_->GetDexCache(),
+                                                 dex_pc);
+    }
+    AppendInstruction(field_get);
+    UpdateLocal(source_or_dest_reg, field_get);
+  }
+
+  return true;
+}
+
+static mirror::Class* GetClassFrom(CompilerDriver* driver,
+                                   const DexCompilationUnit& compilation_unit) {
+  ScopedObjectAccess soa(Thread::Current());
+  StackHandleScope<2> hs(soa.Self());
+  const DexFile& dex_file = *compilation_unit.GetDexFile();
+  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
+      soa.Decode<mirror::ClassLoader*>(compilation_unit.GetClassLoader())));
+  Handle<mirror::DexCache> dex_cache(hs.NewHandle(
+      compilation_unit.GetClassLinker()->FindDexCache(soa.Self(), dex_file)));
+
+  return driver->ResolveCompilingMethodsClass(soa, dex_cache, class_loader, &compilation_unit);
+}
+
+mirror::Class* HInstructionBuilder::GetOutermostCompilingClass() const {
+  return GetClassFrom(compiler_driver_, *outer_compilation_unit_);
+}
+
+mirror::Class* HInstructionBuilder::GetCompilingClass() const {
+  return GetClassFrom(compiler_driver_, *dex_compilation_unit_);
+}
+
+bool HInstructionBuilder::IsOutermostCompilingClass(uint16_t type_index) const {
+  ScopedObjectAccess soa(Thread::Current());
+  StackHandleScope<4> hs(soa.Self());
+  Handle<mirror::DexCache> dex_cache(hs.NewHandle(
+      dex_compilation_unit_->GetClassLinker()->FindDexCache(
+          soa.Self(), *dex_compilation_unit_->GetDexFile())));
+  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
+      soa.Decode<mirror::ClassLoader*>(dex_compilation_unit_->GetClassLoader())));
+  Handle<mirror::Class> cls(hs.NewHandle(compiler_driver_->ResolveClass(
+      soa, dex_cache, class_loader, type_index, dex_compilation_unit_)));
+  Handle<mirror::Class> outer_class(hs.NewHandle(GetOutermostCompilingClass()));
+
+  // GetOutermostCompilingClass returns null when the class is unresolved
+  // (e.g. if it derives from an unresolved class). This is bogus knowing that
+  // we are compiling it.
+  // When this happens we cannot establish a direct relation between the current
+  // class and the outer class, so we return false.
+  // (Note that this is only used for optimizing invokes and field accesses)
+  return (cls.Get() != nullptr) && (outer_class.Get() == cls.Get());
+}
+
+void HInstructionBuilder::BuildUnresolvedStaticFieldAccess(const Instruction& instruction,
+                                                     uint32_t dex_pc,
+                                                     bool is_put,
+                                                     Primitive::Type field_type) {
+  uint32_t source_or_dest_reg = instruction.VRegA_21c();
+  uint16_t field_index = instruction.VRegB_21c();
+
+  if (is_put) {
+    HInstruction* value = LoadLocal(source_or_dest_reg, field_type);
+    AppendInstruction(
+        new (arena_) HUnresolvedStaticFieldSet(value, field_type, field_index, dex_pc));
+  } else {
+    AppendInstruction(new (arena_) HUnresolvedStaticFieldGet(field_type, field_index, dex_pc));
+    UpdateLocal(source_or_dest_reg, current_block_->GetLastInstruction());
+  }
+}
+
+bool HInstructionBuilder::BuildStaticFieldAccess(const Instruction& instruction,
+                                                 uint32_t dex_pc,
+                                                 bool is_put) {
+  uint32_t source_or_dest_reg = instruction.VRegA_21c();
+  uint16_t field_index = instruction.VRegB_21c();
+
+  ScopedObjectAccess soa(Thread::Current());
+  StackHandleScope<5> hs(soa.Self());
+  Handle<mirror::DexCache> dex_cache(hs.NewHandle(
+      dex_compilation_unit_->GetClassLinker()->FindDexCache(
+          soa.Self(), *dex_compilation_unit_->GetDexFile())));
+  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
+      soa.Decode<mirror::ClassLoader*>(dex_compilation_unit_->GetClassLoader())));
+  ArtField* resolved_field = compiler_driver_->ResolveField(
+      soa, dex_cache, class_loader, dex_compilation_unit_, field_index, true);
+
+  if (resolved_field == nullptr) {
+    MaybeRecordStat(MethodCompilationStat::kUnresolvedField);
+    Primitive::Type field_type = GetFieldAccessType(*dex_file_, field_index);
+    BuildUnresolvedStaticFieldAccess(instruction, dex_pc, is_put, field_type);
+    return true;
+  }
+
+  Primitive::Type field_type = resolved_field->GetTypeAsPrimitiveType();
+  const DexFile& outer_dex_file = *outer_compilation_unit_->GetDexFile();
+  Handle<mirror::DexCache> outer_dex_cache(hs.NewHandle(
+      outer_compilation_unit_->GetClassLinker()->FindDexCache(soa.Self(), outer_dex_file)));
+  Handle<mirror::Class> outer_class(hs.NewHandle(GetOutermostCompilingClass()));
+
+  // The index at which the field's class is stored in the DexCache's type array.
+  uint32_t storage_index;
+  bool is_outer_class = (outer_class.Get() == resolved_field->GetDeclaringClass());
+  if (is_outer_class) {
+    storage_index = outer_class->GetDexTypeIndex();
+  } else if (outer_dex_cache.Get() != dex_cache.Get()) {
+    // The compiler driver cannot currently understand multiple dex caches involved. Just bailout.
+    return false;
+  } else {
+    // TODO: This is rather expensive. Perf it and cache the results if needed.
+    std::pair<bool, bool> pair = compiler_driver_->IsFastStaticField(
+        outer_dex_cache.Get(),
+        GetCompilingClass(),
+        resolved_field,
+        field_index,
+        &storage_index);
+    bool can_easily_access = is_put ? pair.second : pair.first;
+    if (!can_easily_access) {
+      MaybeRecordStat(MethodCompilationStat::kUnresolvedFieldNotAFastAccess);
+      BuildUnresolvedStaticFieldAccess(instruction, dex_pc, is_put, field_type);
+      return true;
+    }
+  }
+
+  bool is_in_cache =
+      compiler_driver_->CanAssumeTypeIsPresentInDexCache(outer_dex_file, storage_index);
+  HLoadClass* constant = new (arena_) HLoadClass(graph_->GetCurrentMethod(),
+                                                 storage_index,
+                                                 outer_dex_file,
+                                                 is_outer_class,
+                                                 dex_pc,
+                                                 /*needs_access_check*/ false,
+                                                 is_in_cache);
+  AppendInstruction(constant);
+
+  HInstruction* cls = constant;
+
+  Handle<mirror::Class> klass(hs.NewHandle(resolved_field->GetDeclaringClass()));
+  if (!IsInitialized(klass)) {
+    cls = new (arena_) HClinitCheck(constant, dex_pc);
+    AppendInstruction(cls);
+  }
+
+  uint16_t class_def_index = klass->GetDexClassDefIndex();
+  if (is_put) {
+    // We need to keep the class alive before loading the value.
+    HInstruction* value = LoadLocal(source_or_dest_reg, field_type);
+    DCHECK_EQ(HPhi::ToPhiType(value->GetType()), HPhi::ToPhiType(field_type));
+    AppendInstruction(new (arena_) HStaticFieldSet(cls,
+                                                   value,
+                                                   field_type,
+                                                   resolved_field->GetOffset(),
+                                                   resolved_field->IsVolatile(),
+                                                   field_index,
+                                                   class_def_index,
+                                                   *dex_file_,
+                                                   dex_cache_,
+                                                   dex_pc));
+  } else {
+    AppendInstruction(new (arena_) HStaticFieldGet(cls,
+                                                   field_type,
+                                                   resolved_field->GetOffset(),
+                                                   resolved_field->IsVolatile(),
+                                                   field_index,
+                                                   class_def_index,
+                                                   *dex_file_,
+                                                   dex_cache_,
+                                                   dex_pc));
+    UpdateLocal(source_or_dest_reg, current_block_->GetLastInstruction());
+  }
+  return true;
+}
+
+void HInstructionBuilder::BuildCheckedDivRem(uint16_t out_vreg,
+                                       uint16_t first_vreg,
+                                       int64_t second_vreg_or_constant,
+                                       uint32_t dex_pc,
+                                       Primitive::Type type,
+                                       bool second_is_constant,
+                                       bool isDiv) {
+  DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
+
+  HInstruction* first = LoadLocal(first_vreg, type);
+  HInstruction* second = nullptr;
+  if (second_is_constant) {
+    if (type == Primitive::kPrimInt) {
+      second = graph_->GetIntConstant(second_vreg_or_constant, dex_pc);
+    } else {
+      second = graph_->GetLongConstant(second_vreg_or_constant, dex_pc);
+    }
+  } else {
+    second = LoadLocal(second_vreg_or_constant, type);
+  }
+
+  if (!second_is_constant
+      || (type == Primitive::kPrimInt && second->AsIntConstant()->GetValue() == 0)
+      || (type == Primitive::kPrimLong && second->AsLongConstant()->GetValue() == 0)) {
+    second = new (arena_) HDivZeroCheck(second, dex_pc);
+    AppendInstruction(second);
+  }
+
+  if (isDiv) {
+    AppendInstruction(new (arena_) HDiv(type, first, second, dex_pc));
+  } else {
+    AppendInstruction(new (arena_) HRem(type, first, second, dex_pc));
+  }
+  UpdateLocal(out_vreg, current_block_->GetLastInstruction());
+}
+
+void HInstructionBuilder::BuildArrayAccess(const Instruction& instruction,
+                                           uint32_t dex_pc,
+                                           bool is_put,
+                                           Primitive::Type anticipated_type) {
+  uint8_t source_or_dest_reg = instruction.VRegA_23x();
+  uint8_t array_reg = instruction.VRegB_23x();
+  uint8_t index_reg = instruction.VRegC_23x();
+
+  HInstruction* object = LoadLocal(array_reg, Primitive::kPrimNot);
+  object = new (arena_) HNullCheck(object, dex_pc);
+  AppendInstruction(object);
+
+  HInstruction* length = new (arena_) HArrayLength(object, dex_pc);
+  AppendInstruction(length);
+  HInstruction* index = LoadLocal(index_reg, Primitive::kPrimInt);
+  index = new (arena_) HBoundsCheck(index, length, dex_pc);
+  AppendInstruction(index);
+  if (is_put) {
+    HInstruction* value = LoadLocal(source_or_dest_reg, anticipated_type);
+    // TODO: Insert a type check node if the type is Object.
+    HArraySet* aset = new (arena_) HArraySet(object, index, value, anticipated_type, dex_pc);
+    ssa_builder_->MaybeAddAmbiguousArraySet(aset);
+    AppendInstruction(aset);
+  } else {
+    HArrayGet* aget = new (arena_) HArrayGet(object, index, anticipated_type, dex_pc);
+    ssa_builder_->MaybeAddAmbiguousArrayGet(aget);
+    AppendInstruction(aget);
+    UpdateLocal(source_or_dest_reg, current_block_->GetLastInstruction());
+  }
+  graph_->SetHasBoundsChecks(true);
+}
+
+void HInstructionBuilder::BuildFilledNewArray(uint32_t dex_pc,
+                                              uint32_t type_index,
+                                              uint32_t number_of_vreg_arguments,
+                                              bool is_range,
+                                              uint32_t* args,
+                                              uint32_t register_index) {
+  HInstruction* length = graph_->GetIntConstant(number_of_vreg_arguments, dex_pc);
+  bool finalizable;
+  QuickEntrypointEnum entrypoint = NeedsAccessCheck(type_index, &finalizable)
+      ? kQuickAllocArrayWithAccessCheck
+      : kQuickAllocArray;
+  HInstruction* object = new (arena_) HNewArray(length,
+                                                graph_->GetCurrentMethod(),
+                                                dex_pc,
+                                                type_index,
+                                                *dex_compilation_unit_->GetDexFile(),
+                                                entrypoint);
+  AppendInstruction(object);
+
+  const char* descriptor = dex_file_->StringByTypeIdx(type_index);
+  DCHECK_EQ(descriptor[0], '[') << descriptor;
+  char primitive = descriptor[1];
+  DCHECK(primitive == 'I'
+      || primitive == 'L'
+      || primitive == '[') << descriptor;
+  bool is_reference_array = (primitive == 'L') || (primitive == '[');
+  Primitive::Type type = is_reference_array ? Primitive::kPrimNot : Primitive::kPrimInt;
+
+  for (size_t i = 0; i < number_of_vreg_arguments; ++i) {
+    HInstruction* value = LoadLocal(is_range ? register_index + i : args[i], type);
+    HInstruction* index = graph_->GetIntConstant(i, dex_pc);
+    HArraySet* aset = new (arena_) HArraySet(object, index, value, type, dex_pc);
+    ssa_builder_->MaybeAddAmbiguousArraySet(aset);
+    AppendInstruction(aset);
+  }
+  latest_result_ = object;
+}
+
+template <typename T>
+void HInstructionBuilder::BuildFillArrayData(HInstruction* object,
+                                             const T* data,
+                                             uint32_t element_count,
+                                             Primitive::Type anticipated_type,
+                                             uint32_t dex_pc) {
+  for (uint32_t i = 0; i < element_count; ++i) {
+    HInstruction* index = graph_->GetIntConstant(i, dex_pc);
+    HInstruction* value = graph_->GetIntConstant(data[i], dex_pc);
+    HArraySet* aset = new (arena_) HArraySet(object, index, value, anticipated_type, dex_pc);
+    ssa_builder_->MaybeAddAmbiguousArraySet(aset);
+    AppendInstruction(aset);
+  }
+}
+
+void HInstructionBuilder::BuildFillArrayData(const Instruction& instruction, uint32_t dex_pc) {
+  HInstruction* array = LoadLocal(instruction.VRegA_31t(), Primitive::kPrimNot);
+  HNullCheck* null_check = new (arena_) HNullCheck(array, dex_pc);
+  AppendInstruction(null_check);
+
+  HInstruction* length = new (arena_) HArrayLength(null_check, dex_pc);
+  AppendInstruction(length);
+
+  int32_t payload_offset = instruction.VRegB_31t() + dex_pc;
+  const Instruction::ArrayDataPayload* payload =
+      reinterpret_cast<const Instruction::ArrayDataPayload*>(code_item_.insns_ + payload_offset);
+  const uint8_t* data = payload->data;
+  uint32_t element_count = payload->element_count;
+
+  // Implementation of this DEX instruction seems to be that the bounds check is
+  // done before doing any stores.
+  HInstruction* last_index = graph_->GetIntConstant(payload->element_count - 1, dex_pc);
+  AppendInstruction(new (arena_) HBoundsCheck(last_index, length, dex_pc));
+
+  switch (payload->element_width) {
+    case 1:
+      BuildFillArrayData(null_check,
+                         reinterpret_cast<const int8_t*>(data),
+                         element_count,
+                         Primitive::kPrimByte,
+                         dex_pc);
+      break;
+    case 2:
+      BuildFillArrayData(null_check,
+                         reinterpret_cast<const int16_t*>(data),
+                         element_count,
+                         Primitive::kPrimShort,
+                         dex_pc);
+      break;
+    case 4:
+      BuildFillArrayData(null_check,
+                         reinterpret_cast<const int32_t*>(data),
+                         element_count,
+                         Primitive::kPrimInt,
+                         dex_pc);
+      break;
+    case 8:
+      BuildFillWideArrayData(null_check,
+                             reinterpret_cast<const int64_t*>(data),
+                             element_count,
+                             dex_pc);
+      break;
+    default:
+      LOG(FATAL) << "Unknown element width for " << payload->element_width;
+  }
+  graph_->SetHasBoundsChecks(true);
+}
+
+void HInstructionBuilder::BuildFillWideArrayData(HInstruction* object,
+                                                 const int64_t* data,
+                                                 uint32_t element_count,
+                                                 uint32_t dex_pc) {
+  for (uint32_t i = 0; i < element_count; ++i) {
+    HInstruction* index = graph_->GetIntConstant(i, dex_pc);
+    HInstruction* value = graph_->GetLongConstant(data[i], dex_pc);
+    HArraySet* aset = new (arena_) HArraySet(object, index, value, Primitive::kPrimLong, dex_pc);
+    ssa_builder_->MaybeAddAmbiguousArraySet(aset);
+    AppendInstruction(aset);
+  }
+}
+
+static TypeCheckKind ComputeTypeCheckKind(Handle<mirror::Class> cls)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  if (cls.Get() == nullptr) {
+    return TypeCheckKind::kUnresolvedCheck;
+  } else if (cls->IsInterface()) {
+    return TypeCheckKind::kInterfaceCheck;
+  } else if (cls->IsArrayClass()) {
+    if (cls->GetComponentType()->IsObjectClass()) {
+      return TypeCheckKind::kArrayObjectCheck;
+    } else if (cls->CannotBeAssignedFromOtherTypes()) {
+      return TypeCheckKind::kExactCheck;
+    } else {
+      return TypeCheckKind::kArrayCheck;
+    }
+  } else if (cls->IsFinal()) {
+    return TypeCheckKind::kExactCheck;
+  } else if (cls->IsAbstract()) {
+    return TypeCheckKind::kAbstractClassCheck;
+  } else {
+    return TypeCheckKind::kClassHierarchyCheck;
+  }
+}
+
+void HInstructionBuilder::BuildTypeCheck(const Instruction& instruction,
+                                         uint8_t destination,
+                                         uint8_t reference,
+                                         uint16_t type_index,
+                                         uint32_t dex_pc) {
+  bool type_known_final, type_known_abstract, use_declaring_class;
+  bool can_access = compiler_driver_->CanAccessTypeWithoutChecks(
+      dex_compilation_unit_->GetDexMethodIndex(),
+      *dex_compilation_unit_->GetDexFile(),
+      type_index,
+      &type_known_final,
+      &type_known_abstract,
+      &use_declaring_class);
+
+  ScopedObjectAccess soa(Thread::Current());
+  StackHandleScope<2> hs(soa.Self());
+  const DexFile& dex_file = *dex_compilation_unit_->GetDexFile();
+  Handle<mirror::DexCache> dex_cache(hs.NewHandle(
+      dex_compilation_unit_->GetClassLinker()->FindDexCache(soa.Self(), dex_file)));
+  Handle<mirror::Class> resolved_class(hs.NewHandle(dex_cache->GetResolvedType(type_index)));
+
+  HInstruction* object = LoadLocal(reference, Primitive::kPrimNot);
+  HLoadClass* cls = new (arena_) HLoadClass(
+      graph_->GetCurrentMethod(),
+      type_index,
+      dex_file,
+      IsOutermostCompilingClass(type_index),
+      dex_pc,
+      !can_access,
+      compiler_driver_->CanAssumeTypeIsPresentInDexCache(dex_file, type_index));
+  AppendInstruction(cls);
+
+  TypeCheckKind check_kind = ComputeTypeCheckKind(resolved_class);
+  if (instruction.Opcode() == Instruction::INSTANCE_OF) {
+    AppendInstruction(new (arena_) HInstanceOf(object, cls, check_kind, dex_pc));
+    UpdateLocal(destination, current_block_->GetLastInstruction());
+  } else {
+    DCHECK_EQ(instruction.Opcode(), Instruction::CHECK_CAST);
+    // We emit a CheckCast followed by a BoundType. CheckCast is a statement
+    // which may throw. If it succeeds BoundType sets the new type of `object`
+    // for all subsequent uses.
+    AppendInstruction(new (arena_) HCheckCast(object, cls, check_kind, dex_pc));
+    AppendInstruction(new (arena_) HBoundType(object, dex_pc));
+    UpdateLocal(reference, current_block_->GetLastInstruction());
+  }
+}
+
+bool HInstructionBuilder::NeedsAccessCheck(uint32_t type_index, bool* finalizable) const {
+  return !compiler_driver_->CanAccessInstantiableTypeWithoutChecks(
+      dex_compilation_unit_->GetDexMethodIndex(), *dex_file_, type_index, finalizable);
+}
+
+bool HInstructionBuilder::CanDecodeQuickenedInfo() const {
+  return interpreter_metadata_ != nullptr;
+}
+
+uint16_t HInstructionBuilder::LookupQuickenedInfo(uint32_t dex_pc) {
+  DCHECK(interpreter_metadata_ != nullptr);
+
+  // First check if the info has already been decoded from `interpreter_metadata_`.
+  auto it = skipped_interpreter_metadata_.find(dex_pc);
+  if (it != skipped_interpreter_metadata_.end()) {
+    // Remove the entry from the map and return the parsed info.
+    uint16_t value_in_map = it->second;
+    skipped_interpreter_metadata_.erase(it);
+    return value_in_map;
+  }
+
+  // Otherwise start parsing `interpreter_metadata_` until the slot for `dex_pc`
+  // is found. Store skipped values in the `skipped_interpreter_metadata_` map.
+  while (true) {
+    uint32_t dex_pc_in_map = DecodeUnsignedLeb128(&interpreter_metadata_);
+    uint16_t value_in_map = DecodeUnsignedLeb128(&interpreter_metadata_);
+    DCHECK_LE(dex_pc_in_map, dex_pc);
+
+    if (dex_pc_in_map == dex_pc) {
+      return value_in_map;
+    } else {
+      skipped_interpreter_metadata_.Put(dex_pc_in_map, value_in_map);
+    }
+  }
+}
+
+bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction, uint32_t dex_pc) {
+  switch (instruction.Opcode()) {
+    case Instruction::CONST_4: {
+      int32_t register_index = instruction.VRegA();
+      HIntConstant* constant = graph_->GetIntConstant(instruction.VRegB_11n(), dex_pc);
+      UpdateLocal(register_index, constant);
+      break;
+    }
+
+    case Instruction::CONST_16: {
+      int32_t register_index = instruction.VRegA();
+      HIntConstant* constant = graph_->GetIntConstant(instruction.VRegB_21s(), dex_pc);
+      UpdateLocal(register_index, constant);
+      break;
+    }
+
+    case Instruction::CONST: {
+      int32_t register_index = instruction.VRegA();
+      HIntConstant* constant = graph_->GetIntConstant(instruction.VRegB_31i(), dex_pc);
+      UpdateLocal(register_index, constant);
+      break;
+    }
+
+    case Instruction::CONST_HIGH16: {
+      int32_t register_index = instruction.VRegA();
+      HIntConstant* constant = graph_->GetIntConstant(instruction.VRegB_21h() << 16, dex_pc);
+      UpdateLocal(register_index, constant);
+      break;
+    }
+
+    case Instruction::CONST_WIDE_16: {
+      int32_t register_index = instruction.VRegA();
+      // Get 16 bits of constant value, sign extended to 64 bits.
+      int64_t value = instruction.VRegB_21s();
+      value <<= 48;
+      value >>= 48;
+      HLongConstant* constant = graph_->GetLongConstant(value, dex_pc);
+      UpdateLocal(register_index, constant);
+      break;
+    }
+
+    case Instruction::CONST_WIDE_32: {
+      int32_t register_index = instruction.VRegA();
+      // Get 32 bits of constant value, sign extended to 64 bits.
+      int64_t value = instruction.VRegB_31i();
+      value <<= 32;
+      value >>= 32;
+      HLongConstant* constant = graph_->GetLongConstant(value, dex_pc);
+      UpdateLocal(register_index, constant);
+      break;
+    }
+
+    case Instruction::CONST_WIDE: {
+      int32_t register_index = instruction.VRegA();
+      HLongConstant* constant = graph_->GetLongConstant(instruction.VRegB_51l(), dex_pc);
+      UpdateLocal(register_index, constant);
+      break;
+    }
+
+    case Instruction::CONST_WIDE_HIGH16: {
+      int32_t register_index = instruction.VRegA();
+      int64_t value = static_cast<int64_t>(instruction.VRegB_21h()) << 48;
+      HLongConstant* constant = graph_->GetLongConstant(value, dex_pc);
+      UpdateLocal(register_index, constant);
+      break;
+    }
+
+    // Note that the SSA building will refine the types.
+    case Instruction::MOVE:
+    case Instruction::MOVE_FROM16:
+    case Instruction::MOVE_16: {
+      HInstruction* value = LoadLocal(instruction.VRegB(), Primitive::kPrimInt);
+      UpdateLocal(instruction.VRegA(), value);
+      break;
+    }
+
+    // Note that the SSA building will refine the types.
+    case Instruction::MOVE_WIDE:
+    case Instruction::MOVE_WIDE_FROM16:
+    case Instruction::MOVE_WIDE_16: {
+      HInstruction* value = LoadLocal(instruction.VRegB(), Primitive::kPrimLong);
+      UpdateLocal(instruction.VRegA(), value);
+      break;
+    }
+
+    case Instruction::MOVE_OBJECT:
+    case Instruction::MOVE_OBJECT_16:
+    case Instruction::MOVE_OBJECT_FROM16: {
+      HInstruction* value = LoadLocal(instruction.VRegB(), Primitive::kPrimNot);
+      UpdateLocal(instruction.VRegA(), value);
+      break;
+    }
+
+    case Instruction::RETURN_VOID_NO_BARRIER:
+    case Instruction::RETURN_VOID: {
+      BuildReturn(instruction, Primitive::kPrimVoid, dex_pc);
+      break;
+    }
+
+#define IF_XX(comparison, cond) \
+    case Instruction::IF_##cond: If_22t<comparison>(instruction, dex_pc); break; \
+    case Instruction::IF_##cond##Z: If_21t<comparison>(instruction, dex_pc); break
+
+    IF_XX(HEqual, EQ);
+    IF_XX(HNotEqual, NE);
+    IF_XX(HLessThan, LT);
+    IF_XX(HLessThanOrEqual, LE);
+    IF_XX(HGreaterThan, GT);
+    IF_XX(HGreaterThanOrEqual, GE);
+
+    case Instruction::GOTO:
+    case Instruction::GOTO_16:
+    case Instruction::GOTO_32: {
+      AppendInstruction(new (arena_) HGoto(dex_pc));
+      current_block_ = nullptr;
+      break;
+    }
+
+    case Instruction::RETURN: {
+      BuildReturn(instruction, return_type_, dex_pc);
+      break;
+    }
+
+    case Instruction::RETURN_OBJECT: {
+      BuildReturn(instruction, return_type_, dex_pc);
+      break;
+    }
+
+    case Instruction::RETURN_WIDE: {
+      BuildReturn(instruction, return_type_, dex_pc);
+      break;
+    }
+
+    case Instruction::INVOKE_DIRECT:
+    case Instruction::INVOKE_INTERFACE:
+    case Instruction::INVOKE_STATIC:
+    case Instruction::INVOKE_SUPER:
+    case Instruction::INVOKE_VIRTUAL:
+    case Instruction::INVOKE_VIRTUAL_QUICK: {
+      uint16_t method_idx;
+      if (instruction.Opcode() == Instruction::INVOKE_VIRTUAL_QUICK) {
+        if (!CanDecodeQuickenedInfo()) {
+          return false;
+        }
+        method_idx = LookupQuickenedInfo(dex_pc);
+      } else {
+        method_idx = instruction.VRegB_35c();
+      }
+      uint32_t number_of_vreg_arguments = instruction.VRegA_35c();
+      uint32_t args[5];
+      instruction.GetVarArgs(args);
+      if (!BuildInvoke(instruction, dex_pc, method_idx,
+                       number_of_vreg_arguments, false, args, -1)) {
+        return false;
+      }
+      break;
+    }
+
+    case Instruction::INVOKE_DIRECT_RANGE:
+    case Instruction::INVOKE_INTERFACE_RANGE:
+    case Instruction::INVOKE_STATIC_RANGE:
+    case Instruction::INVOKE_SUPER_RANGE:
+    case Instruction::INVOKE_VIRTUAL_RANGE:
+    case Instruction::INVOKE_VIRTUAL_RANGE_QUICK: {
+      uint16_t method_idx;
+      if (instruction.Opcode() == Instruction::INVOKE_VIRTUAL_RANGE_QUICK) {
+        if (!CanDecodeQuickenedInfo()) {
+          return false;
+        }
+        method_idx = LookupQuickenedInfo(dex_pc);
+      } else {
+        method_idx = instruction.VRegB_3rc();
+      }
+      uint32_t number_of_vreg_arguments = instruction.VRegA_3rc();
+      uint32_t register_index = instruction.VRegC();
+      if (!BuildInvoke(instruction, dex_pc, method_idx,
+                       number_of_vreg_arguments, true, nullptr, register_index)) {
+        return false;
+      }
+      break;
+    }
+
+    case Instruction::NEG_INT: {
+      Unop_12x<HNeg>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::NEG_LONG: {
+      Unop_12x<HNeg>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::NEG_FLOAT: {
+      Unop_12x<HNeg>(instruction, Primitive::kPrimFloat, dex_pc);
+      break;
+    }
+
+    case Instruction::NEG_DOUBLE: {
+      Unop_12x<HNeg>(instruction, Primitive::kPrimDouble, dex_pc);
+      break;
+    }
+
+    case Instruction::NOT_INT: {
+      Unop_12x<HNot>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::NOT_LONG: {
+      Unop_12x<HNot>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::INT_TO_LONG: {
+      Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::INT_TO_FLOAT: {
+      Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimFloat, dex_pc);
+      break;
+    }
+
+    case Instruction::INT_TO_DOUBLE: {
+      Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimDouble, dex_pc);
+      break;
+    }
+
+    case Instruction::LONG_TO_INT: {
+      Conversion_12x(instruction, Primitive::kPrimLong, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::LONG_TO_FLOAT: {
+      Conversion_12x(instruction, Primitive::kPrimLong, Primitive::kPrimFloat, dex_pc);
+      break;
+    }
+
+    case Instruction::LONG_TO_DOUBLE: {
+      Conversion_12x(instruction, Primitive::kPrimLong, Primitive::kPrimDouble, dex_pc);
+      break;
+    }
+
+    case Instruction::FLOAT_TO_INT: {
+      Conversion_12x(instruction, Primitive::kPrimFloat, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::FLOAT_TO_LONG: {
+      Conversion_12x(instruction, Primitive::kPrimFloat, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::FLOAT_TO_DOUBLE: {
+      Conversion_12x(instruction, Primitive::kPrimFloat, Primitive::kPrimDouble, dex_pc);
+      break;
+    }
+
+    case Instruction::DOUBLE_TO_INT: {
+      Conversion_12x(instruction, Primitive::kPrimDouble, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::DOUBLE_TO_LONG: {
+      Conversion_12x(instruction, Primitive::kPrimDouble, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::DOUBLE_TO_FLOAT: {
+      Conversion_12x(instruction, Primitive::kPrimDouble, Primitive::kPrimFloat, dex_pc);
+      break;
+    }
+
+    case Instruction::INT_TO_BYTE: {
+      Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimByte, dex_pc);
+      break;
+    }
+
+    case Instruction::INT_TO_SHORT: {
+      Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimShort, dex_pc);
+      break;
+    }
+
+    case Instruction::INT_TO_CHAR: {
+      Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimChar, dex_pc);
+      break;
+    }
+
+    case Instruction::ADD_INT: {
+      Binop_23x<HAdd>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::ADD_LONG: {
+      Binop_23x<HAdd>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::ADD_DOUBLE: {
+      Binop_23x<HAdd>(instruction, Primitive::kPrimDouble, dex_pc);
+      break;
+    }
+
+    case Instruction::ADD_FLOAT: {
+      Binop_23x<HAdd>(instruction, Primitive::kPrimFloat, dex_pc);
+      break;
+    }
+
+    case Instruction::SUB_INT: {
+      Binop_23x<HSub>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::SUB_LONG: {
+      Binop_23x<HSub>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::SUB_FLOAT: {
+      Binop_23x<HSub>(instruction, Primitive::kPrimFloat, dex_pc);
+      break;
+    }
+
+    case Instruction::SUB_DOUBLE: {
+      Binop_23x<HSub>(instruction, Primitive::kPrimDouble, dex_pc);
+      break;
+    }
+
+    case Instruction::ADD_INT_2ADDR: {
+      Binop_12x<HAdd>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::MUL_INT: {
+      Binop_23x<HMul>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::MUL_LONG: {
+      Binop_23x<HMul>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::MUL_FLOAT: {
+      Binop_23x<HMul>(instruction, Primitive::kPrimFloat, dex_pc);
+      break;
+    }
+
+    case Instruction::MUL_DOUBLE: {
+      Binop_23x<HMul>(instruction, Primitive::kPrimDouble, dex_pc);
+      break;
+    }
+
+    case Instruction::DIV_INT: {
+      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegB(), instruction.VRegC(),
+                         dex_pc, Primitive::kPrimInt, false, true);
+      break;
+    }
+
+    case Instruction::DIV_LONG: {
+      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegB(), instruction.VRegC(),
+                         dex_pc, Primitive::kPrimLong, false, true);
+      break;
+    }
+
+    case Instruction::DIV_FLOAT: {
+      Binop_23x<HDiv>(instruction, Primitive::kPrimFloat, dex_pc);
+      break;
+    }
+
+    case Instruction::DIV_DOUBLE: {
+      Binop_23x<HDiv>(instruction, Primitive::kPrimDouble, dex_pc);
+      break;
+    }
+
+    case Instruction::REM_INT: {
+      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegB(), instruction.VRegC(),
+                         dex_pc, Primitive::kPrimInt, false, false);
+      break;
+    }
+
+    case Instruction::REM_LONG: {
+      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegB(), instruction.VRegC(),
+                         dex_pc, Primitive::kPrimLong, false, false);
+      break;
+    }
+
+    case Instruction::REM_FLOAT: {
+      Binop_23x<HRem>(instruction, Primitive::kPrimFloat, dex_pc);
+      break;
+    }
+
+    case Instruction::REM_DOUBLE: {
+      Binop_23x<HRem>(instruction, Primitive::kPrimDouble, dex_pc);
+      break;
+    }
+
+    case Instruction::AND_INT: {
+      Binop_23x<HAnd>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::AND_LONG: {
+      Binop_23x<HAnd>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::SHL_INT: {
+      Binop_23x_shift<HShl>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::SHL_LONG: {
+      Binop_23x_shift<HShl>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::SHR_INT: {
+      Binop_23x_shift<HShr>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::SHR_LONG: {
+      Binop_23x_shift<HShr>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::USHR_INT: {
+      Binop_23x_shift<HUShr>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::USHR_LONG: {
+      Binop_23x_shift<HUShr>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::OR_INT: {
+      Binop_23x<HOr>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::OR_LONG: {
+      Binop_23x<HOr>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::XOR_INT: {
+      Binop_23x<HXor>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::XOR_LONG: {
+      Binop_23x<HXor>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::ADD_LONG_2ADDR: {
+      Binop_12x<HAdd>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::ADD_DOUBLE_2ADDR: {
+      Binop_12x<HAdd>(instruction, Primitive::kPrimDouble, dex_pc);
+      break;
+    }
+
+    case Instruction::ADD_FLOAT_2ADDR: {
+      Binop_12x<HAdd>(instruction, Primitive::kPrimFloat, dex_pc);
+      break;
+    }
+
+    case Instruction::SUB_INT_2ADDR: {
+      Binop_12x<HSub>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::SUB_LONG_2ADDR: {
+      Binop_12x<HSub>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::SUB_FLOAT_2ADDR: {
+      Binop_12x<HSub>(instruction, Primitive::kPrimFloat, dex_pc);
+      break;
+    }
+
+    case Instruction::SUB_DOUBLE_2ADDR: {
+      Binop_12x<HSub>(instruction, Primitive::kPrimDouble, dex_pc);
+      break;
+    }
+
+    case Instruction::MUL_INT_2ADDR: {
+      Binop_12x<HMul>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::MUL_LONG_2ADDR: {
+      Binop_12x<HMul>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::MUL_FLOAT_2ADDR: {
+      Binop_12x<HMul>(instruction, Primitive::kPrimFloat, dex_pc);
+      break;
+    }
+
+    case Instruction::MUL_DOUBLE_2ADDR: {
+      Binop_12x<HMul>(instruction, Primitive::kPrimDouble, dex_pc);
+      break;
+    }
+
+    case Instruction::DIV_INT_2ADDR: {
+      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegA(), instruction.VRegB(),
+                         dex_pc, Primitive::kPrimInt, false, true);
+      break;
+    }
+
+    case Instruction::DIV_LONG_2ADDR: {
+      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegA(), instruction.VRegB(),
+                         dex_pc, Primitive::kPrimLong, false, true);
+      break;
+    }
+
+    case Instruction::REM_INT_2ADDR: {
+      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegA(), instruction.VRegB(),
+                         dex_pc, Primitive::kPrimInt, false, false);
+      break;
+    }
+
+    case Instruction::REM_LONG_2ADDR: {
+      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegA(), instruction.VRegB(),
+                         dex_pc, Primitive::kPrimLong, false, false);
+      break;
+    }
+
+    case Instruction::REM_FLOAT_2ADDR: {
+      Binop_12x<HRem>(instruction, Primitive::kPrimFloat, dex_pc);
+      break;
+    }
+
+    case Instruction::REM_DOUBLE_2ADDR: {
+      Binop_12x<HRem>(instruction, Primitive::kPrimDouble, dex_pc);
+      break;
+    }
+
+    case Instruction::SHL_INT_2ADDR: {
+      Binop_12x_shift<HShl>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::SHL_LONG_2ADDR: {
+      Binop_12x_shift<HShl>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::SHR_INT_2ADDR: {
+      Binop_12x_shift<HShr>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::SHR_LONG_2ADDR: {
+      Binop_12x_shift<HShr>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::USHR_INT_2ADDR: {
+      Binop_12x_shift<HUShr>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::USHR_LONG_2ADDR: {
+      Binop_12x_shift<HUShr>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::DIV_FLOAT_2ADDR: {
+      Binop_12x<HDiv>(instruction, Primitive::kPrimFloat, dex_pc);
+      break;
+    }
+
+    case Instruction::DIV_DOUBLE_2ADDR: {
+      Binop_12x<HDiv>(instruction, Primitive::kPrimDouble, dex_pc);
+      break;
+    }
+
+    case Instruction::AND_INT_2ADDR: {
+      Binop_12x<HAnd>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::AND_LONG_2ADDR: {
+      Binop_12x<HAnd>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::OR_INT_2ADDR: {
+      Binop_12x<HOr>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::OR_LONG_2ADDR: {
+      Binop_12x<HOr>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::XOR_INT_2ADDR: {
+      Binop_12x<HXor>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::XOR_LONG_2ADDR: {
+      Binop_12x<HXor>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::ADD_INT_LIT16: {
+      Binop_22s<HAdd>(instruction, false, dex_pc);
+      break;
+    }
+
+    case Instruction::AND_INT_LIT16: {
+      Binop_22s<HAnd>(instruction, false, dex_pc);
+      break;
+    }
+
+    case Instruction::OR_INT_LIT16: {
+      Binop_22s<HOr>(instruction, false, dex_pc);
+      break;
+    }
+
+    case Instruction::XOR_INT_LIT16: {
+      Binop_22s<HXor>(instruction, false, dex_pc);
+      break;
+    }
+
+    case Instruction::RSUB_INT: {
+      Binop_22s<HSub>(instruction, true, dex_pc);
+      break;
+    }
+
+    case Instruction::MUL_INT_LIT16: {
+      Binop_22s<HMul>(instruction, false, dex_pc);
+      break;
+    }
+
+    case Instruction::ADD_INT_LIT8: {
+      Binop_22b<HAdd>(instruction, false, dex_pc);
+      break;
+    }
+
+    case Instruction::AND_INT_LIT8: {
+      Binop_22b<HAnd>(instruction, false, dex_pc);
+      break;
+    }
+
+    case Instruction::OR_INT_LIT8: {
+      Binop_22b<HOr>(instruction, false, dex_pc);
+      break;
+    }
+
+    case Instruction::XOR_INT_LIT8: {
+      Binop_22b<HXor>(instruction, false, dex_pc);
+      break;
+    }
+
+    case Instruction::RSUB_INT_LIT8: {
+      Binop_22b<HSub>(instruction, true, dex_pc);
+      break;
+    }
+
+    case Instruction::MUL_INT_LIT8: {
+      Binop_22b<HMul>(instruction, false, dex_pc);
+      break;
+    }
+
+    case Instruction::DIV_INT_LIT16:
+    case Instruction::DIV_INT_LIT8: {
+      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegB(), instruction.VRegC(),
+                         dex_pc, Primitive::kPrimInt, true, true);
+      break;
+    }
+
+    case Instruction::REM_INT_LIT16:
+    case Instruction::REM_INT_LIT8: {
+      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegB(), instruction.VRegC(),
+                         dex_pc, Primitive::kPrimInt, true, false);
+      break;
+    }
+
+    case Instruction::SHL_INT_LIT8: {
+      Binop_22b<HShl>(instruction, false, dex_pc);
+      break;
+    }
+
+    case Instruction::SHR_INT_LIT8: {
+      Binop_22b<HShr>(instruction, false, dex_pc);
+      break;
+    }
+
+    case Instruction::USHR_INT_LIT8: {
+      Binop_22b<HUShr>(instruction, false, dex_pc);
+      break;
+    }
+
+    case Instruction::NEW_INSTANCE: {
+      if (!BuildNewInstance(instruction.VRegB_21c(), dex_pc)) {
+        return false;
+      }
+      UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
+      break;
+    }
+
+    case Instruction::NEW_ARRAY: {
+      uint16_t type_index = instruction.VRegC_22c();
+      HInstruction* length = LoadLocal(instruction.VRegB_22c(), Primitive::kPrimInt);
+      bool finalizable;
+      QuickEntrypointEnum entrypoint = NeedsAccessCheck(type_index, &finalizable)
+          ? kQuickAllocArrayWithAccessCheck
+          : kQuickAllocArray;
+      AppendInstruction(new (arena_) HNewArray(length,
+                                               graph_->GetCurrentMethod(),
+                                               dex_pc,
+                                               type_index,
+                                               *dex_compilation_unit_->GetDexFile(),
+                                               entrypoint));
+      UpdateLocal(instruction.VRegA_22c(), current_block_->GetLastInstruction());
+      break;
+    }
+
+    case Instruction::FILLED_NEW_ARRAY: {
+      uint32_t number_of_vreg_arguments = instruction.VRegA_35c();
+      uint32_t type_index = instruction.VRegB_35c();
+      uint32_t args[5];
+      instruction.GetVarArgs(args);
+      BuildFilledNewArray(dex_pc, type_index, number_of_vreg_arguments, false, args, 0);
+      break;
+    }
+
+    case Instruction::FILLED_NEW_ARRAY_RANGE: {
+      uint32_t number_of_vreg_arguments = instruction.VRegA_3rc();
+      uint32_t type_index = instruction.VRegB_3rc();
+      uint32_t register_index = instruction.VRegC_3rc();
+      BuildFilledNewArray(
+          dex_pc, type_index, number_of_vreg_arguments, true, nullptr, register_index);
+      break;
+    }
+
+    case Instruction::FILL_ARRAY_DATA: {
+      BuildFillArrayData(instruction, dex_pc);
+      break;
+    }
+
+    case Instruction::MOVE_RESULT:
+    case Instruction::MOVE_RESULT_WIDE:
+    case Instruction::MOVE_RESULT_OBJECT: {
+      DCHECK(latest_result_ != nullptr);
+      UpdateLocal(instruction.VRegA(), latest_result_);
+      latest_result_ = nullptr;
+      break;
+    }
+
+    case Instruction::CMP_LONG: {
+      Binop_23x_cmp(instruction, Primitive::kPrimLong, ComparisonBias::kNoBias, dex_pc);
+      break;
+    }
+
+    case Instruction::CMPG_FLOAT: {
+      Binop_23x_cmp(instruction, Primitive::kPrimFloat, ComparisonBias::kGtBias, dex_pc);
+      break;
+    }
+
+    case Instruction::CMPG_DOUBLE: {
+      Binop_23x_cmp(instruction, Primitive::kPrimDouble, ComparisonBias::kGtBias, dex_pc);
+      break;
+    }
+
+    case Instruction::CMPL_FLOAT: {
+      Binop_23x_cmp(instruction, Primitive::kPrimFloat, ComparisonBias::kLtBias, dex_pc);
+      break;
+    }
+
+    case Instruction::CMPL_DOUBLE: {
+      Binop_23x_cmp(instruction, Primitive::kPrimDouble, ComparisonBias::kLtBias, dex_pc);
+      break;
+    }
+
+    case Instruction::NOP:
+      break;
+
+    case Instruction::IGET:
+    case Instruction::IGET_QUICK:
+    case Instruction::IGET_WIDE:
+    case Instruction::IGET_WIDE_QUICK:
+    case Instruction::IGET_OBJECT:
+    case Instruction::IGET_OBJECT_QUICK:
+    case Instruction::IGET_BOOLEAN:
+    case Instruction::IGET_BOOLEAN_QUICK:
+    case Instruction::IGET_BYTE:
+    case Instruction::IGET_BYTE_QUICK:
+    case Instruction::IGET_CHAR:
+    case Instruction::IGET_CHAR_QUICK:
+    case Instruction::IGET_SHORT:
+    case Instruction::IGET_SHORT_QUICK: {
+      if (!BuildInstanceFieldAccess(instruction, dex_pc, false)) {
+        return false;
+      }
+      break;
+    }
+
+    case Instruction::IPUT:
+    case Instruction::IPUT_QUICK:
+    case Instruction::IPUT_WIDE:
+    case Instruction::IPUT_WIDE_QUICK:
+    case Instruction::IPUT_OBJECT:
+    case Instruction::IPUT_OBJECT_QUICK:
+    case Instruction::IPUT_BOOLEAN:
+    case Instruction::IPUT_BOOLEAN_QUICK:
+    case Instruction::IPUT_BYTE:
+    case Instruction::IPUT_BYTE_QUICK:
+    case Instruction::IPUT_CHAR:
+    case Instruction::IPUT_CHAR_QUICK:
+    case Instruction::IPUT_SHORT:
+    case Instruction::IPUT_SHORT_QUICK: {
+      if (!BuildInstanceFieldAccess(instruction, dex_pc, true)) {
+        return false;
+      }
+      break;
+    }
+
+    case Instruction::SGET:
+    case Instruction::SGET_WIDE:
+    case Instruction::SGET_OBJECT:
+    case Instruction::SGET_BOOLEAN:
+    case Instruction::SGET_BYTE:
+    case Instruction::SGET_CHAR:
+    case Instruction::SGET_SHORT: {
+      if (!BuildStaticFieldAccess(instruction, dex_pc, false)) {
+        return false;
+      }
+      break;
+    }
+
+    case Instruction::SPUT:
+    case Instruction::SPUT_WIDE:
+    case Instruction::SPUT_OBJECT:
+    case Instruction::SPUT_BOOLEAN:
+    case Instruction::SPUT_BYTE:
+    case Instruction::SPUT_CHAR:
+    case Instruction::SPUT_SHORT: {
+      if (!BuildStaticFieldAccess(instruction, dex_pc, true)) {
+        return false;
+      }
+      break;
+    }
+
+#define ARRAY_XX(kind, anticipated_type)                                          \
+    case Instruction::AGET##kind: {                                               \
+      BuildArrayAccess(instruction, dex_pc, false, anticipated_type);         \
+      break;                                                                      \
+    }                                                                             \
+    case Instruction::APUT##kind: {                                               \
+      BuildArrayAccess(instruction, dex_pc, true, anticipated_type);          \
+      break;                                                                      \
+    }
+
+    ARRAY_XX(, Primitive::kPrimInt);
+    ARRAY_XX(_WIDE, Primitive::kPrimLong);
+    ARRAY_XX(_OBJECT, Primitive::kPrimNot);
+    ARRAY_XX(_BOOLEAN, Primitive::kPrimBoolean);
+    ARRAY_XX(_BYTE, Primitive::kPrimByte);
+    ARRAY_XX(_CHAR, Primitive::kPrimChar);
+    ARRAY_XX(_SHORT, Primitive::kPrimShort);
+
+    case Instruction::ARRAY_LENGTH: {
+      HInstruction* object = LoadLocal(instruction.VRegB_12x(), Primitive::kPrimNot);
+      object = new (arena_) HNullCheck(object, dex_pc);
+      AppendInstruction(object);
+      AppendInstruction(new (arena_) HArrayLength(object, dex_pc));
+      UpdateLocal(instruction.VRegA_12x(), current_block_->GetLastInstruction());
+      break;
+    }
+
+    case Instruction::CONST_STRING: {
+      uint32_t string_index = instruction.VRegB_21c();
+      AppendInstruction(
+          new (arena_) HLoadString(graph_->GetCurrentMethod(), string_index, *dex_file_, dex_pc));
+      UpdateLocal(instruction.VRegA_21c(), current_block_->GetLastInstruction());
+      break;
+    }
+
+    case Instruction::CONST_STRING_JUMBO: {
+      uint32_t string_index = instruction.VRegB_31c();
+      AppendInstruction(
+          new (arena_) HLoadString(graph_->GetCurrentMethod(), string_index, *dex_file_, dex_pc));
+      UpdateLocal(instruction.VRegA_31c(), current_block_->GetLastInstruction());
+      break;
+    }
+
+    case Instruction::CONST_CLASS: {
+      uint16_t type_index = instruction.VRegB_21c();
+      bool type_known_final;
+      bool type_known_abstract;
+      bool dont_use_is_referrers_class;
+      // `CanAccessTypeWithoutChecks` will tell whether the method being
+      // built is trying to access its own class, so that the generated
+      // code can optimize for this case. However, the optimization does not
+      // work for inlining, so we use `IsOutermostCompilingClass` instead.
+      bool can_access = compiler_driver_->CanAccessTypeWithoutChecks(
+          dex_compilation_unit_->GetDexMethodIndex(), *dex_file_, type_index,
+          &type_known_final, &type_known_abstract, &dont_use_is_referrers_class);
+      AppendInstruction(new (arena_) HLoadClass(
+          graph_->GetCurrentMethod(),
+          type_index,
+          *dex_file_,
+          IsOutermostCompilingClass(type_index),
+          dex_pc,
+          !can_access,
+          compiler_driver_->CanAssumeTypeIsPresentInDexCache(*dex_file_, type_index)));
+      UpdateLocal(instruction.VRegA_21c(), current_block_->GetLastInstruction());
+      break;
+    }
+
+    case Instruction::MOVE_EXCEPTION: {
+      AppendInstruction(new (arena_) HLoadException(dex_pc));
+      UpdateLocal(instruction.VRegA_11x(), current_block_->GetLastInstruction());
+      AppendInstruction(new (arena_) HClearException(dex_pc));
+      break;
+    }
+
+    case Instruction::THROW: {
+      HInstruction* exception = LoadLocal(instruction.VRegA_11x(), Primitive::kPrimNot);
+      AppendInstruction(new (arena_) HThrow(exception, dex_pc));
+      // We finished building this block. Set the current block to null to avoid
+      // adding dead instructions to it.
+      current_block_ = nullptr;
+      break;
+    }
+
+    case Instruction::INSTANCE_OF: {
+      uint8_t destination = instruction.VRegA_22c();
+      uint8_t reference = instruction.VRegB_22c();
+      uint16_t type_index = instruction.VRegC_22c();
+      BuildTypeCheck(instruction, destination, reference, type_index, dex_pc);
+      break;
+    }
+
+    case Instruction::CHECK_CAST: {
+      uint8_t reference = instruction.VRegA_21c();
+      uint16_t type_index = instruction.VRegB_21c();
+      BuildTypeCheck(instruction, -1, reference, type_index, dex_pc);
+      break;
+    }
+
+    case Instruction::MONITOR_ENTER: {
+      AppendInstruction(new (arena_) HMonitorOperation(
+          LoadLocal(instruction.VRegA_11x(), Primitive::kPrimNot),
+          HMonitorOperation::OperationKind::kEnter,
+          dex_pc));
+      break;
+    }
+
+    case Instruction::MONITOR_EXIT: {
+      AppendInstruction(new (arena_) HMonitorOperation(
+          LoadLocal(instruction.VRegA_11x(), Primitive::kPrimNot),
+          HMonitorOperation::OperationKind::kExit,
+          dex_pc));
+      break;
+    }
+
+    case Instruction::SPARSE_SWITCH:
+    case Instruction::PACKED_SWITCH: {
+      BuildSwitch(instruction, dex_pc);
+      break;
+    }
+
+    default:
+      VLOG(compiler) << "Did not compile "
+                     << PrettyMethod(dex_compilation_unit_->GetDexMethodIndex(), *dex_file_)
+                     << " because of unhandled instruction "
+                     << instruction.Name();
+      MaybeRecordStat(MethodCompilationStat::kNotCompiledUnhandledInstruction);
+      return false;
+  }
+  return true;
+}  // NOLINT(readability/fn_size)
+
+}  // namespace art
diff --git a/compiler/optimizing/instruction_builder.h b/compiler/optimizing/instruction_builder.h
new file mode 100644
index 0000000..f480b70
--- /dev/null
+++ b/compiler/optimizing/instruction_builder.h
@@ -0,0 +1,313 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_INSTRUCTION_BUILDER_H_
+#define ART_COMPILER_OPTIMIZING_INSTRUCTION_BUILDER_H_
+
+#include "base/arena_containers.h"
+#include "base/arena_object.h"
+#include "block_builder.h"
+#include "driver/compiler_driver.h"
+#include "driver/compiler_driver-inl.h"
+#include "driver/dex_compilation_unit.h"
+#include "mirror/dex_cache.h"
+#include "nodes.h"
+#include "optimizing_compiler_stats.h"
+#include "ssa_builder.h"
+
+namespace art {
+
+class HInstructionBuilder : public ValueObject {
+ public:
+  HInstructionBuilder(HGraph* graph,
+                      HBasicBlockBuilder* block_builder,
+                      SsaBuilder* ssa_builder,
+                      const DexFile* dex_file,
+                      const DexFile::CodeItem& code_item,
+                      Primitive::Type return_type,
+                      DexCompilationUnit* dex_compilation_unit,
+                      const DexCompilationUnit* const outer_compilation_unit,
+                      CompilerDriver* driver,
+                      const uint8_t* interpreter_metadata,
+                      OptimizingCompilerStats* compiler_stats,
+                      Handle<mirror::DexCache> dex_cache)
+      : arena_(graph->GetArena()),
+        graph_(graph),
+        dex_file_(dex_file),
+        code_item_(code_item),
+        return_type_(return_type),
+        block_builder_(block_builder),
+        ssa_builder_(ssa_builder),
+        locals_for_(arena_->Adapter(kArenaAllocGraphBuilder)),
+        current_block_(nullptr),
+        current_locals_(nullptr),
+        latest_result_(nullptr),
+        compiler_driver_(driver),
+        dex_compilation_unit_(dex_compilation_unit),
+        outer_compilation_unit_(outer_compilation_unit),
+        interpreter_metadata_(interpreter_metadata),
+        skipped_interpreter_metadata_(std::less<uint32_t>(),
+                                      arena_->Adapter(kArenaAllocGraphBuilder)),
+        compilation_stats_(compiler_stats),
+        dex_cache_(dex_cache),
+        loop_headers_(graph->GetArena()->Adapter(kArenaAllocGraphBuilder)) {
+    loop_headers_.reserve(kDefaultNumberOfLoops);
+  }
+
+  bool Build();
+
+ private:
+  void MaybeRecordStat(MethodCompilationStat compilation_stat);
+
+  void InitializeBlockLocals();
+  void PropagateLocalsToCatchBlocks();
+  void SetLoopHeaderPhiInputs();
+
+  bool ProcessDexInstruction(const Instruction& instruction, uint32_t dex_pc);
+  void FindNativeDebugInfoLocations(ArenaBitVector* locations);
+
+  bool CanDecodeQuickenedInfo() const;
+  uint16_t LookupQuickenedInfo(uint32_t dex_pc);
+
+  HBasicBlock* FindBlockStartingAt(uint32_t dex_pc) const;
+
+  ArenaVector<HInstruction*>* GetLocalsFor(HBasicBlock* block);
+  HInstruction* ValueOfLocalAt(HBasicBlock* block, size_t local);
+  HInstruction* LoadLocal(uint32_t register_index, Primitive::Type type) const;
+  void UpdateLocal(uint32_t register_index, HInstruction* instruction);
+
+  void AppendInstruction(HInstruction* instruction);
+  void InsertInstructionAtTop(HInstruction* instruction);
+  void InitializeInstruction(HInstruction* instruction);
+
+  void InitializeParameters();
+
+  // Returns whether the current method needs access check for the type.
+  // Output parameter finalizable is set to whether the type is finalizable.
+  bool NeedsAccessCheck(uint32_t type_index, /*out*/bool* finalizable) const;
+
+  template<typename T>
+  void Unop_12x(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc);
+
+  template<typename T>
+  void Binop_23x(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc);
+
+  template<typename T>
+  void Binop_23x_shift(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc);
+
+  void Binop_23x_cmp(const Instruction& instruction,
+                     Primitive::Type type,
+                     ComparisonBias bias,
+                     uint32_t dex_pc);
+
+  template<typename T>
+  void Binop_12x(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc);
+
+  template<typename T>
+  void Binop_12x_shift(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc);
+
+  template<typename T>
+  void Binop_22b(const Instruction& instruction, bool reverse, uint32_t dex_pc);
+
+  template<typename T>
+  void Binop_22s(const Instruction& instruction, bool reverse, uint32_t dex_pc);
+
+  template<typename T> void If_21t(const Instruction& instruction, uint32_t dex_pc);
+  template<typename T> void If_22t(const Instruction& instruction, uint32_t dex_pc);
+
+  void Conversion_12x(const Instruction& instruction,
+                      Primitive::Type input_type,
+                      Primitive::Type result_type,
+                      uint32_t dex_pc);
+
+  void BuildCheckedDivRem(uint16_t out_reg,
+                          uint16_t first_reg,
+                          int64_t second_reg_or_constant,
+                          uint32_t dex_pc,
+                          Primitive::Type type,
+                          bool second_is_lit,
+                          bool is_div);
+
+  void BuildReturn(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc);
+
+  // Builds an instance field access node and returns whether the instruction is supported.
+  bool BuildInstanceFieldAccess(const Instruction& instruction, uint32_t dex_pc, bool is_put);
+
+  void BuildUnresolvedStaticFieldAccess(const Instruction& instruction,
+                                        uint32_t dex_pc,
+                                        bool is_put,
+                                        Primitive::Type field_type);
+  // Builds a static field access node and returns whether the instruction is supported.
+  bool BuildStaticFieldAccess(const Instruction& instruction, uint32_t dex_pc, bool is_put);
+
+  void BuildArrayAccess(const Instruction& instruction,
+                        uint32_t dex_pc,
+                        bool is_get,
+                        Primitive::Type anticipated_type);
+
+  // Builds an invocation node and returns whether the instruction is supported.
+  bool BuildInvoke(const Instruction& instruction,
+                   uint32_t dex_pc,
+                   uint32_t method_idx,
+                   uint32_t number_of_vreg_arguments,
+                   bool is_range,
+                   uint32_t* args,
+                   uint32_t register_index);
+
+  // Builds a new array node and the instructions that fill it.
+  void BuildFilledNewArray(uint32_t dex_pc,
+                           uint32_t type_index,
+                           uint32_t number_of_vreg_arguments,
+                           bool is_range,
+                           uint32_t* args,
+                           uint32_t register_index);
+
+  void BuildFillArrayData(const Instruction& instruction, uint32_t dex_pc);
+
+  // Fills the given object with data as specified in the fill-array-data
+  // instruction. Currently only used for non-reference and non-floating point
+  // arrays.
+  template <typename T>
+  void BuildFillArrayData(HInstruction* object,
+                          const T* data,
+                          uint32_t element_count,
+                          Primitive::Type anticipated_type,
+                          uint32_t dex_pc);
+
+  // Fills the given object with data as specified in the fill-array-data
+  // instruction. The data must be for long and double arrays.
+  void BuildFillWideArrayData(HInstruction* object,
+                              const int64_t* data,
+                              uint32_t element_count,
+                              uint32_t dex_pc);
+
+  // Builds a `HInstanceOf`, or a `HCheckCast` instruction.
+  void BuildTypeCheck(const Instruction& instruction,
+                      uint8_t destination,
+                      uint8_t reference,
+                      uint16_t type_index,
+                      uint32_t dex_pc);
+
+  // Builds an instruction sequence for a switch statement.
+  void BuildSwitch(const Instruction& instruction, uint32_t dex_pc);
+
+  // Returns the outer-most compiling method's class.
+  mirror::Class* GetOutermostCompilingClass() const;
+
+  // Returns the class whose method is being compiled.
+  mirror::Class* GetCompilingClass() const;
+
+  // Returns whether `type_index` points to the outer-most compiling method's class.
+  bool IsOutermostCompilingClass(uint16_t type_index) const;
+
+  void PotentiallySimplifyFakeString(uint16_t original_dex_register,
+                                     uint32_t dex_pc,
+                                     HInvoke* invoke);
+
+  bool SetupInvokeArguments(HInvoke* invoke,
+                            uint32_t number_of_vreg_arguments,
+                            uint32_t* args,
+                            uint32_t register_index,
+                            bool is_range,
+                            const char* descriptor,
+                            size_t start_index,
+                            size_t* argument_index);
+
+  bool HandleInvoke(HInvoke* invoke,
+                    uint32_t number_of_vreg_arguments,
+                    uint32_t* args,
+                    uint32_t register_index,
+                    bool is_range,
+                    const char* descriptor,
+                    HClinitCheck* clinit_check);
+
+  bool HandleStringInit(HInvoke* invoke,
+                        uint32_t number_of_vreg_arguments,
+                        uint32_t* args,
+                        uint32_t register_index,
+                        bool is_range,
+                        const char* descriptor);
+  void HandleStringInitResult(HInvokeStaticOrDirect* invoke);
+
+  HClinitCheck* ProcessClinitCheckForInvoke(
+      uint32_t dex_pc,
+      ArtMethod* method,
+      uint32_t method_idx,
+      HInvokeStaticOrDirect::ClinitCheckRequirement* clinit_check_requirement)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Build a HNewInstance instruction.
+  bool BuildNewInstance(uint16_t type_index, uint32_t dex_pc);
+
+  // Return whether the compiler can assume `cls` is initialized.
+  bool IsInitialized(Handle<mirror::Class> cls) const
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Try to resolve a method using the class linker. Return null if a method could
+  // not be resolved.
+  ArtMethod* ResolveMethod(uint16_t method_idx, InvokeType invoke_type);
+
+  ArenaAllocator* const arena_;
+  HGraph* const graph_;
+
+  // The dex file where the method being compiled is, and the bytecode data.
+  const DexFile* const dex_file_;
+  const DexFile::CodeItem& code_item_;
+
+  // The return type of the method being compiled.
+  const Primitive::Type return_type_;
+
+  HBasicBlockBuilder* block_builder_;
+  SsaBuilder* ssa_builder_;
+
+  ArenaVector<ArenaVector<HInstruction*>> locals_for_;
+  HBasicBlock* current_block_;
+  ArenaVector<HInstruction*>* current_locals_;
+  HInstruction* latest_result_;
+
+  CompilerDriver* const compiler_driver_;
+
+  // The compilation unit of the current method being compiled. Note that
+  // it can be an inlined method.
+  DexCompilationUnit* const dex_compilation_unit_;
+
+  // The compilation unit of the outermost method being compiled. That is the
+  // method being compiled (and not inlined), and potentially inlining other
+  // methods.
+  const DexCompilationUnit* const outer_compilation_unit_;
+
+  // Original values kept after instruction quickening. This is a data buffer
+  // of Leb128-encoded (dex_pc, value) pairs sorted by dex_pc.
+  const uint8_t* interpreter_metadata_;
+
+  // InstructionBuilder does not parse instructions in dex_pc order. Quickening
+  // info for out-of-order dex_pcs is stored in a map until the positions
+  // are eventually visited.
+  ArenaSafeMap<uint32_t, uint16_t> skipped_interpreter_metadata_;
+
+  OptimizingCompilerStats* compilation_stats_;
+  Handle<mirror::DexCache> dex_cache_;
+
+  ArenaVector<HBasicBlock*> loop_headers_;
+
+  static constexpr int kDefaultNumberOfLoops = 2;
+
+  DISALLOW_COPY_AND_ASSIGN(HInstructionBuilder);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_INSTRUCTION_BUILDER_H_
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index a589ef0..927e2ec 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -1708,7 +1708,7 @@
 
 // Mirrors ARRAYCOPY_SHORT_CHAR_ARRAY_THRESHOLD in libcore, so we can choose to use the native
 // implementation there for longer copy lengths.
-static constexpr int32_t kSystemArrayCopyThreshold = 32;
+static constexpr int32_t kSystemArrayCopyCharThreshold = 32;
 
 static void SetSystemArrayCopyLocationRequires(LocationSummary* locations,
                                                uint32_t at,
@@ -1739,7 +1739,7 @@
   HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
   if (length != nullptr) {
     int32_t len = length->GetValue();
-    if (len < 0 || len > kSystemArrayCopyThreshold) {
+    if (len < 0 || len > kSystemArrayCopyCharThreshold) {
       // Just call as normal.
       return;
     }
@@ -1882,7 +1882,7 @@
     // If the length is negative, bail out.
     __ Tbnz(WRegisterFrom(length), kWRegSize - 1, slow_path->GetEntryLabel());
     // If the length > 32 then (currently) prefer libcore's native implementation.
-    __ Cmp(WRegisterFrom(length), kSystemArrayCopyThreshold);
+    __ Cmp(WRegisterFrom(length), kSystemArrayCopyCharThreshold);
     __ B(slow_path->GetEntryLabel(), gt);
   } else {
     // We have already checked in the LocationsBuilder for the constant case.
@@ -1943,7 +1943,271 @@
   __ Bind(slow_path->GetExitLabel());
 }
 
-UNIMPLEMENTED_INTRINSIC(ARM64, SystemArrayCopy)
+// We can choose to use the native implementation there for longer copy lengths.
+static constexpr int32_t kSystemArrayCopyThreshold = 128;
+
+// CodeGenerator::CreateSystemArrayCopyLocationSummary use three temporary registers.
+// We want to use two temporary registers in order to reduce the register pressure in arm64.
+// So we don't use the CodeGenerator::CreateSystemArrayCopyLocationSummary.
+void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopy(HInvoke* invoke) {
+  // Check to see if we have known failures that will cause us to have to bail out
+  // to the runtime, and just generate the runtime call directly.
+  HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
+  HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
+
+  // The positions must be non-negative.
+  if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
+      (dest_pos != nullptr && dest_pos->GetValue() < 0)) {
+    // We will have to fail anyways.
+    return;
+  }
+
+  // The length must be >= 0.
+  HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
+  if (length != nullptr) {
+    int32_t len = length->GetValue();
+    if (len < 0 || len >= kSystemArrayCopyThreshold) {
+      // Just call as normal.
+      return;
+    }
+  }
+
+  SystemArrayCopyOptimizations optimizations(invoke);
+
+  if (optimizations.GetDestinationIsSource()) {
+    if (src_pos != nullptr && dest_pos != nullptr && src_pos->GetValue() < dest_pos->GetValue()) {
+      // We only support backward copying if source and destination are the same.
+      return;
+    }
+  }
+
+  if (optimizations.GetDestinationIsPrimitiveArray() || optimizations.GetSourceIsPrimitiveArray()) {
+    // We currently don't intrinsify primitive copying.
+    return;
+  }
+
+  ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetArena();
+  LocationSummary* locations = new (allocator) LocationSummary(invoke,
+                                                               LocationSummary::kCallOnSlowPath,
+                                                               kIntrinsified);
+  // arraycopy(Object src, int src_pos, Object dest, int dest_pos, int length).
+  locations->SetInAt(0, Location::RequiresRegister());
+  SetSystemArrayCopyLocationRequires(locations, 1, invoke->InputAt(1));
+  locations->SetInAt(2, Location::RequiresRegister());
+  SetSystemArrayCopyLocationRequires(locations, 3, invoke->InputAt(3));
+  SetSystemArrayCopyLocationRequires(locations, 4, invoke->InputAt(4));
+
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
+  vixl::MacroAssembler* masm = GetVIXLAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+  uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
+  uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
+  uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+
+  Register src = XRegisterFrom(locations->InAt(0));
+  Location src_pos = locations->InAt(1);
+  Register dest = XRegisterFrom(locations->InAt(2));
+  Location dest_pos = locations->InAt(3);
+  Location length = locations->InAt(4);
+  Register temp1 = WRegisterFrom(locations->GetTemp(0));
+  Register temp2 = WRegisterFrom(locations->GetTemp(1));
+
+  SlowPathCodeARM64* slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke);
+  codegen_->AddSlowPath(slow_path);
+
+  vixl::Label conditions_on_positions_validated;
+  SystemArrayCopyOptimizations optimizations(invoke);
+
+  if (!optimizations.GetDestinationIsSource() &&
+     (!src_pos.IsConstant() || !dest_pos.IsConstant())) {
+      __ Cmp(src, dest);
+  }
+  // If source and destination are the same, we go to slow path if we need to do
+  // forward copying.
+  if (src_pos.IsConstant()) {
+    int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
+    if (dest_pos.IsConstant()) {
+      // Checked when building locations.
+      DCHECK(!optimizations.GetDestinationIsSource()
+             || (src_pos_constant >= dest_pos.GetConstant()->AsIntConstant()->GetValue()));
+    } else {
+      if (!optimizations.GetDestinationIsSource()) {
+        __ B(&conditions_on_positions_validated, ne);
+      }
+      __ Cmp(WRegisterFrom(dest_pos), src_pos_constant);
+      __ B(slow_path->GetEntryLabel(), gt);
+    }
+  } else {
+    if (!optimizations.GetDestinationIsSource()) {
+      __ B(&conditions_on_positions_validated, ne);
+    }
+    __ Cmp(RegisterFrom(src_pos, invoke->InputAt(1)->GetType()),
+           OperandFrom(dest_pos, invoke->InputAt(3)->GetType()));
+    __ B(slow_path->GetEntryLabel(), lt);
+  }
+
+  __ Bind(&conditions_on_positions_validated);
+
+  if (!optimizations.GetSourceIsNotNull()) {
+    // Bail out if the source is null.
+    __ Cbz(src, slow_path->GetEntryLabel());
+  }
+
+  if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
+    // Bail out if the destination is null.
+    __ Cbz(dest, slow_path->GetEntryLabel());
+  }
+
+  // We have already checked in the LocationsBuilder for the constant case.
+  if (!length.IsConstant() &&
+      !optimizations.GetCountIsSourceLength() &&
+      !optimizations.GetCountIsDestinationLength()) {
+    // If the length is negative, bail out.
+    __ Tbnz(WRegisterFrom(length), kWRegSize - 1, slow_path->GetEntryLabel());
+    // If the length >= 128 then (currently) prefer native implementation.
+    __ Cmp(WRegisterFrom(length), kSystemArrayCopyThreshold);
+    __ B(slow_path->GetEntryLabel(), ge);
+  }
+  // Validity checks: source.
+  CheckSystemArrayCopyPosition(masm,
+                               src_pos,
+                               src,
+                               length,
+                               slow_path,
+                               temp1,
+                               temp2,
+                               optimizations.GetCountIsSourceLength());
+
+  // Validity checks: dest.
+  CheckSystemArrayCopyPosition(masm,
+                               dest_pos,
+                               dest,
+                               length,
+                               slow_path,
+                               temp1,
+                               temp2,
+                               optimizations.GetCountIsDestinationLength());
+  {
+    // We use a block to end the scratch scope before the write barrier, thus
+    // freeing the temporary registers so they can be used in `MarkGCCard`.
+    UseScratchRegisterScope temps(masm);
+    Register temp3 = temps.AcquireW();
+    if (!optimizations.GetDoesNotNeedTypeCheck()) {
+      // Check whether all elements of the source array are assignable to the component
+      // type of the destination array. We do two checks: the classes are the same,
+      // or the destination is Object[]. If none of these checks succeed, we go to the
+      // slow path.
+      __ Ldr(temp1, MemOperand(dest, class_offset));
+      __ Ldr(temp2, MemOperand(src, class_offset));
+      bool did_unpoison = false;
+      if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
+          !optimizations.GetSourceIsNonPrimitiveArray()) {
+        // One or two of the references need to be unpoisoned. Unpoison them
+        // both to make the identity check valid.
+        codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
+        codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
+        did_unpoison = true;
+      }
+
+      if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
+        // Bail out if the destination is not a non primitive array.
+        // /* HeapReference<Class> */ temp3 = temp1->component_type_
+        __ Ldr(temp3, HeapOperand(temp1, component_offset));
+        __ Cbz(temp3, slow_path->GetEntryLabel());
+        codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3);
+        __ Ldrh(temp3, HeapOperand(temp3, primitive_offset));
+        static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+        __ Cbnz(temp3, slow_path->GetEntryLabel());
+      }
+
+      if (!optimizations.GetSourceIsNonPrimitiveArray()) {
+        // Bail out if the source is not a non primitive array.
+        // /* HeapReference<Class> */ temp3 = temp2->component_type_
+        __ Ldr(temp3, HeapOperand(temp2, component_offset));
+        __ Cbz(temp3, slow_path->GetEntryLabel());
+        codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3);
+        __ Ldrh(temp3, HeapOperand(temp3, primitive_offset));
+        static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+        __ Cbnz(temp3, slow_path->GetEntryLabel());
+      }
+
+      __ Cmp(temp1, temp2);
+
+      if (optimizations.GetDestinationIsTypedObjectArray()) {
+        vixl::Label do_copy;
+        __ B(&do_copy, eq);
+        if (!did_unpoison) {
+          codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
+        }
+        // /* HeapReference<Class> */ temp1 = temp1->component_type_
+        __ Ldr(temp1, HeapOperand(temp1, component_offset));
+        codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
+        // /* HeapReference<Class> */ temp1 = temp1->super_class_
+        __ Ldr(temp1, HeapOperand(temp1, super_offset));
+        // No need to unpoison the result, we're comparing against null.
+        __ Cbnz(temp1, slow_path->GetEntryLabel());
+        __ Bind(&do_copy);
+      } else {
+        __ B(slow_path->GetEntryLabel(), ne);
+      }
+    } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
+      DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
+      // Bail out if the source is not a non primitive array.
+      // /* HeapReference<Class> */ temp1 = src->klass_
+      __ Ldr(temp1, HeapOperand(src.W(), class_offset));
+      codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
+      // /* HeapReference<Class> */ temp3 = temp1->component_type_
+      __ Ldr(temp3, HeapOperand(temp1, component_offset));
+      __ Cbz(temp3, slow_path->GetEntryLabel());
+      codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3);
+      __ Ldrh(temp3, HeapOperand(temp3, primitive_offset));
+      static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+      __ Cbnz(temp3, slow_path->GetEntryLabel());
+    }
+
+    Register src_curr_addr = temp1.X();
+    Register dst_curr_addr = temp2.X();
+    Register src_stop_addr = temp3.X();
+
+    GenSystemArrayCopyAddresses(masm,
+                                Primitive::kPrimNot,
+                                src,
+                                src_pos,
+                                dest,
+                                dest_pos,
+                                length,
+                                src_curr_addr,
+                                dst_curr_addr,
+                                src_stop_addr);
+
+    // Iterate over the arrays and do a raw copy of the objects. We don't need to
+    // poison/unpoison, nor do any read barrier as the next uses of the destination
+    // array will do it.
+    vixl::Label loop, done;
+    const int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
+    __ Bind(&loop);
+    __ Cmp(src_curr_addr, src_stop_addr);
+    __ B(&done, eq);
+    {
+      Register tmp = temps.AcquireW();
+      __ Ldr(tmp, MemOperand(src_curr_addr, element_size, vixl::PostIndex));
+      __ Str(tmp, MemOperand(dst_curr_addr, element_size, vixl::PostIndex));
+    }
+    __ B(&loop);
+    __ Bind(&done);
+  }
+  // We only need one card marking on the destination array.
+  codegen_->MarkGCCard(dest.W(), Register(), /* value_can_be_null */ false);
+
+  __ Bind(slow_path->GetExitLabel());
+}
+
 UNIMPLEMENTED_INTRINSIC(ARM64, ReferenceGetReferent)
 UNIMPLEMENTED_INTRINSIC(ARM64, FloatIsInfinite)
 UNIMPLEMENTED_INTRINSIC(ARM64, DoubleIsInfinite)
diff --git a/compiler/optimizing/live_ranges_test.cc b/compiler/optimizing/live_ranges_test.cc
index 3202493..bdaef1d 100644
--- a/compiler/optimizing/live_ranges_test.cc
+++ b/compiler/optimizing/live_ranges_test.cc
@@ -211,8 +211,8 @@
    *
    * Which becomes the following graph (numbered by lifetime position):
    *       2: constant0
-   *       4: constant4
-   *       6: constant5
+   *       4: constant5
+   *       6: constant4
    *       8: goto
    *           |
    *       12: goto
@@ -247,7 +247,7 @@
   liveness.Analyze();
 
   // Test for the 0 constant.
-  LiveInterval* interval = liveness.GetInstructionFromSsaIndex(0)->GetLiveInterval();
+  LiveInterval* interval = graph->GetIntConstant(0)->GetLiveInterval();
   LiveRange* range = interval->GetFirstRange();
   ASSERT_EQ(2u, range->GetStart());
   // Last use is the loop phi so instruction is live until
@@ -256,18 +256,18 @@
   ASSERT_TRUE(range->GetNext() == nullptr);
 
   // Test for the 4 constant.
-  interval = liveness.GetInstructionFromSsaIndex(1)->GetLiveInterval();
+  interval = graph->GetIntConstant(4)->GetLiveInterval();
   range = interval->GetFirstRange();
   // The instruction is live until the end of the loop.
-  ASSERT_EQ(4u, range->GetStart());
+  ASSERT_EQ(6u, range->GetStart());
   ASSERT_EQ(24u, range->GetEnd());
   ASSERT_TRUE(range->GetNext() == nullptr);
 
   // Test for the 5 constant.
-  interval = liveness.GetInstructionFromSsaIndex(2)->GetLiveInterval();
+  interval = graph->GetIntConstant(5)->GetLiveInterval();
   range = interval->GetFirstRange();
   // The instruction is live until the return instruction after the loop.
-  ASSERT_EQ(6u, range->GetStart());
+  ASSERT_EQ(4u, range->GetStart());
   ASSERT_EQ(26u, range->GetEnd());
   ASSERT_TRUE(range->GetNext() == nullptr);
 
diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc
index 92a987c..bd74368 100644
--- a/compiler/optimizing/liveness_test.cc
+++ b/compiler/optimizing/liveness_test.cc
@@ -154,7 +154,7 @@
   // return a;
   //
   // Bitsets are made of:
-  // (constant0, constant4, constant5, phi)
+  // (constant0, constant5, constant4, phi)
   const char* expected =
     "Block 0\n"  // entry block
     "  live in: (0000)\n"
@@ -165,11 +165,11 @@
     "  live out: (0110)\n"
     "  kill: (0000)\n"
     "Block 2\n"  // else block
-    "  live in: (0100)\n"
+    "  live in: (0010)\n"
     "  live out: (0000)\n"
     "  kill: (0000)\n"
     "Block 3\n"  // then block
-    "  live in: (0010)\n"
+    "  live in: (0100)\n"
     "  live out: (0000)\n"
     "  kill: (0000)\n"
     "Block 4\n"  // return block
@@ -291,7 +291,7 @@
   // }
   // return 5;
   // Bitsets are made of:
-  // (constant0, constant4, constant5, phi)
+  // (constant0, constant5, constant4, phi)
   const char* expected =
     "Block 0\n"
     "  live in: (0000)\n"
@@ -310,7 +310,7 @@
     "  live out: (0110)\n"
     "  kill: (0000)\n"
     "Block 4\n"  // return block
-    "  live in: (0010)\n"
+    "  live in: (0100)\n"
     "  live out: (0000)\n"
     "  kill: (0000)\n"
     "Block 5\n"  // exit block
@@ -386,7 +386,7 @@
   // Make sure we create a preheader of a loop when a header originally has two
   // incoming blocks and one back edge.
   // Bitsets are made of:
-  // (constant0, constant4, constant5, phi in block 8)
+  // (constant0, constant5, constant4, phi in block 8)
   const char* expected =
     "Block 0\n"
     "  live in: (0000)\n"
@@ -397,11 +397,11 @@
     "  live out: (0110)\n"
     "  kill: (0000)\n"
     "Block 2\n"
-    "  live in: (0100)\n"
+    "  live in: (0010)\n"
     "  live out: (0000)\n"
     "  kill: (0000)\n"
     "Block 3\n"
-    "  live in: (0010)\n"
+    "  live in: (0100)\n"
     "  live out: (0000)\n"
     "  kill: (0000)\n"
     "Block 4\n"  // loop header
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 9f448af..1086cbf 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -318,21 +318,11 @@
     }
   }
 
-  // Place the suspend check at the beginning of the header, so that live registers
-  // will be known when allocating registers. Note that code generation can still
-  // generate the suspend check at the back edge, but needs to be careful with
-  // loop phi spill slots (which are not written to at back edge).
   HInstruction* first_instruction = header->GetFirstInstruction();
-  if (first_instruction == nullptr) {
-    HSuspendCheck* check = new (arena_) HSuspendCheck(header->GetDexPc());
-    header->AddInstruction(check);
-    first_instruction = check;
-  } else if (!first_instruction->IsSuspendCheck()) {
-    HSuspendCheck* check = new (arena_) HSuspendCheck(header->GetDexPc());
-    header->InsertInstructionBefore(check, first_instruction);
-    first_instruction = check;
+  if (first_instruction != nullptr && first_instruction->IsSuspendCheck()) {
+    // Called from DeadBlockElimination. Update SuspendCheck pointer.
+    info->SetSuspendCheck(first_instruction->AsSuspendCheck());
   }
-  info->SetSuspendCheck(first_instruction->AsSuspendCheck());
 }
 
 void HGraph::ComputeTryBlockInformation() {
@@ -1882,6 +1872,7 @@
            instr_it.Advance()) {
         HInstruction* current = instr_it.Current();
         if (current->NeedsEnvironment()) {
+          DCHECK(current->HasEnvironment());
           current->GetEnvironment()->SetAndCopyParentChain(
               outer_graph->GetArena(), invoke->GetEnvironment());
         }
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 8a2e83a..0088fed 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -427,6 +427,10 @@
     number_of_in_vregs_ = value;
   }
 
+  uint16_t GetNumberOfInVRegs() const {
+    return number_of_in_vregs_;
+  }
+
   uint16_t GetNumberOfLocalVRegs() const {
     DCHECK(!in_ssa_form_);
     return number_of_vregs_ - number_of_in_vregs_;
@@ -1211,9 +1215,7 @@
   M(LessThanOrEqual, Condition)                                         \
   M(LoadClass, Instruction)                                             \
   M(LoadException, Instruction)                                         \
-  M(LoadLocal, Instruction)                                             \
   M(LoadString, Instruction)                                            \
-  M(Local, Instruction)                                                 \
   M(LongConstant, Constant)                                             \
   M(MemoryBarrier, Instruction)                                         \
   M(MonitorOperation, Instruction)                                      \
@@ -1244,7 +1246,6 @@
   M(UnresolvedStaticFieldGet, Instruction)                              \
   M(UnresolvedStaticFieldSet, Instruction)                              \
   M(Select, Instruction)                                                \
-  M(StoreLocal, Instruction)                                            \
   M(Sub, BinaryOperation)                                               \
   M(SuspendCheck, Instruction)                                          \
   M(Throw, Instruction)                                                 \
@@ -2383,6 +2384,107 @@
   DISALLOW_COPY_AND_ASSIGN(HReturn);
 };
 
+class HPhi : public HInstruction {
+ public:
+  HPhi(ArenaAllocator* arena,
+       uint32_t reg_number,
+       size_t number_of_inputs,
+       Primitive::Type type,
+       uint32_t dex_pc = kNoDexPc)
+      : HInstruction(SideEffects::None(), dex_pc),
+        inputs_(number_of_inputs, arena->Adapter(kArenaAllocPhiInputs)),
+        reg_number_(reg_number) {
+    SetPackedField<TypeField>(ToPhiType(type));
+    DCHECK_NE(GetType(), Primitive::kPrimVoid);
+    // Phis are constructed live and marked dead if conflicting or unused.
+    // Individual steps of SsaBuilder should assume that if a phi has been
+    // marked dead, it can be ignored and will be removed by SsaPhiElimination.
+    SetPackedFlag<kFlagIsLive>(true);
+    SetPackedFlag<kFlagCanBeNull>(true);
+  }
+
+  // Returns a type equivalent to the given `type`, but that a `HPhi` can hold.
+  static Primitive::Type ToPhiType(Primitive::Type type) {
+    return Primitive::PrimitiveKind(type);
+  }
+
+  bool IsCatchPhi() const { return GetBlock()->IsCatchBlock(); }
+
+  size_t InputCount() const OVERRIDE { return inputs_.size(); }
+
+  void AddInput(HInstruction* input);
+  void RemoveInputAt(size_t index);
+
+  Primitive::Type GetType() const OVERRIDE { return GetPackedField<TypeField>(); }
+  void SetType(Primitive::Type new_type) {
+    // Make sure that only valid type changes occur. The following are allowed:
+    //  (1) int  -> float/ref (primitive type propagation),
+    //  (2) long -> double (primitive type propagation).
+    DCHECK(GetType() == new_type ||
+           (GetType() == Primitive::kPrimInt && new_type == Primitive::kPrimFloat) ||
+           (GetType() == Primitive::kPrimInt && new_type == Primitive::kPrimNot) ||
+           (GetType() == Primitive::kPrimLong && new_type == Primitive::kPrimDouble));
+    SetPackedField<TypeField>(new_type);
+  }
+
+  bool CanBeNull() const OVERRIDE { return GetPackedFlag<kFlagCanBeNull>(); }
+  void SetCanBeNull(bool can_be_null) { SetPackedFlag<kFlagCanBeNull>(can_be_null); }
+
+  uint32_t GetRegNumber() const { return reg_number_; }
+
+  void SetDead() { SetPackedFlag<kFlagIsLive>(false); }
+  void SetLive() { SetPackedFlag<kFlagIsLive>(true); }
+  bool IsDead() const { return !IsLive(); }
+  bool IsLive() const { return GetPackedFlag<kFlagIsLive>(); }
+
+  bool IsVRegEquivalentOf(HInstruction* other) const {
+    return other != nullptr
+        && other->IsPhi()
+        && other->AsPhi()->GetBlock() == GetBlock()
+        && other->AsPhi()->GetRegNumber() == GetRegNumber();
+  }
+
+  // Returns the next equivalent phi (starting from the current one) or null if there is none.
+  // An equivalent phi is a phi having the same dex register and type.
+  // It assumes that phis with the same dex register are adjacent.
+  HPhi* GetNextEquivalentPhiWithSameType() {
+    HInstruction* next = GetNext();
+    while (next != nullptr && next->AsPhi()->GetRegNumber() == reg_number_) {
+      if (next->GetType() == GetType()) {
+        return next->AsPhi();
+      }
+      next = next->GetNext();
+    }
+    return nullptr;
+  }
+
+  DECLARE_INSTRUCTION(Phi);
+
+ protected:
+  const HUserRecord<HInstruction*> InputRecordAt(size_t index) const OVERRIDE {
+    return inputs_[index];
+  }
+
+  void SetRawInputRecordAt(size_t index, const HUserRecord<HInstruction*>& input) OVERRIDE {
+    inputs_[index] = input;
+  }
+
+ private:
+  static constexpr size_t kFieldType = HInstruction::kNumberOfGenericPackedBits;
+  static constexpr size_t kFieldTypeSize =
+      MinimumBitsToStore(static_cast<size_t>(Primitive::kPrimLast));
+  static constexpr size_t kFlagIsLive = kFieldType + kFieldTypeSize;
+  static constexpr size_t kFlagCanBeNull = kFlagIsLive + 1;
+  static constexpr size_t kNumberOfPhiPackedBits = kFlagCanBeNull + 1;
+  static_assert(kNumberOfPhiPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields.");
+  using TypeField = BitField<Primitive::Type, kFieldType, kFieldTypeSize>;
+
+  ArenaVector<HUserRecord<HInstruction*> > inputs_;
+  const uint32_t reg_number_;
+
+  DISALLOW_COPY_AND_ASSIGN(HPhi);
+};
+
 // The exit instruction is the only instruction of the exit block.
 // Instructions aborting the method (HThrow and HReturn) must branch to the
 // exit block.
@@ -3543,57 +3645,6 @@
   DISALLOW_COPY_AND_ASSIGN(HCompare);
 };
 
-// A local in the graph. Corresponds to a Dex register.
-class HLocal : public HTemplateInstruction<0> {
- public:
-  explicit HLocal(uint16_t reg_number)
-      : HTemplateInstruction(SideEffects::None(), kNoDexPc), reg_number_(reg_number) {}
-
-  DECLARE_INSTRUCTION(Local);
-
-  uint16_t GetRegNumber() const { return reg_number_; }
-
- private:
-  // The Dex register number.
-  const uint16_t reg_number_;
-
-  DISALLOW_COPY_AND_ASSIGN(HLocal);
-};
-
-// Load a given local. The local is an input of this instruction.
-class HLoadLocal : public HExpression<1> {
- public:
-  HLoadLocal(HLocal* local, Primitive::Type type, uint32_t dex_pc = kNoDexPc)
-      : HExpression(type, SideEffects::None(), dex_pc) {
-    SetRawInputAt(0, local);
-  }
-
-  HLocal* GetLocal() const { return reinterpret_cast<HLocal*>(InputAt(0)); }
-
-  DECLARE_INSTRUCTION(LoadLocal);
-
- private:
-  DISALLOW_COPY_AND_ASSIGN(HLoadLocal);
-};
-
-// Store a value in a given local. This instruction has two inputs: the value
-// and the local.
-class HStoreLocal : public HTemplateInstruction<2> {
- public:
-  HStoreLocal(HLocal* local, HInstruction* value, uint32_t dex_pc = kNoDexPc)
-      : HTemplateInstruction(SideEffects::None(), dex_pc) {
-    SetRawInputAt(0, local);
-    SetRawInputAt(1, value);
-  }
-
-  HLocal* GetLocal() const { return reinterpret_cast<HLocal*>(InputAt(0)); }
-
-  DECLARE_INSTRUCTION(StoreLocal);
-
- private:
-  DISALLOW_COPY_AND_ASSIGN(HStoreLocal);
-};
-
 class HNewInstance : public HExpression<2> {
  public:
   HNewInstance(HInstruction* cls,
@@ -3914,8 +3965,7 @@
                 // potentially one other if the clinit check is explicit, and potentially
                 // one other if the method is a string factory.
                 (NeedsCurrentMethodInput(dispatch_info.method_load_kind) ? 1u : 0u) +
-                    (clinit_check_requirement == ClinitCheckRequirement::kExplicit ? 1u : 0u) +
-                    (dispatch_info.method_load_kind == MethodLoadKind::kStringInit ? 1u : 0u),
+                    (clinit_check_requirement == ClinitCheckRequirement::kExplicit ? 1u : 0u),
                 return_type,
                 dex_pc,
                 method_index,
@@ -4043,15 +4093,6 @@
     DCHECK(!IsStaticWithExplicitClinitCheck());
   }
 
-  HInstruction* GetAndRemoveThisArgumentOfStringInit() {
-    DCHECK(IsStringInit());
-    size_t index = InputCount() - 1;
-    HInstruction* input = InputAt(index);
-    RemoveAsUserOfInput(index);
-    inputs_.pop_back();
-    return input;
-  }
-
   // Is this a call to a static method whose declaring class has an
   // explicit initialization check in the graph?
   bool IsStaticWithExplicitClinitCheck() const {
@@ -4894,7 +4935,6 @@
                     SideEffectsForArchRuntimeCalls(input->GetType(), result_type),
                     dex_pc) {
     SetRawInputAt(0, input);
-    DCHECK_NE(input->GetType(), result_type);
     // Invariant: We should never generate a conversion to a Boolean value.
     DCHECK_NE(Primitive::kPrimBoolean, result_type);
   }
@@ -4930,115 +4970,6 @@
 
 static constexpr uint32_t kNoRegNumber = -1;
 
-class HPhi : public HInstruction {
- public:
-  HPhi(ArenaAllocator* arena,
-       uint32_t reg_number,
-       size_t number_of_inputs,
-       Primitive::Type type,
-       uint32_t dex_pc = kNoDexPc)
-      : HInstruction(SideEffects::None(), dex_pc),
-        inputs_(number_of_inputs, arena->Adapter(kArenaAllocPhiInputs)),
-        reg_number_(reg_number) {
-    SetPackedField<TypeField>(ToPhiType(type));
-    DCHECK_NE(GetType(), Primitive::kPrimVoid);
-    // Phis are constructed live and marked dead if conflicting or unused.
-    // Individual steps of SsaBuilder should assume that if a phi has been
-    // marked dead, it can be ignored and will be removed by SsaPhiElimination.
-    SetPackedFlag<kFlagIsLive>(true);
-    SetPackedFlag<kFlagCanBeNull>(true);
-  }
-
-  // Returns a type equivalent to the given `type`, but that a `HPhi` can hold.
-  static Primitive::Type ToPhiType(Primitive::Type type) {
-    switch (type) {
-      case Primitive::kPrimBoolean:
-      case Primitive::kPrimByte:
-      case Primitive::kPrimShort:
-      case Primitive::kPrimChar:
-        return Primitive::kPrimInt;
-      default:
-        return type;
-    }
-  }
-
-  bool IsCatchPhi() const { return GetBlock()->IsCatchBlock(); }
-
-  size_t InputCount() const OVERRIDE { return inputs_.size(); }
-
-  void AddInput(HInstruction* input);
-  void RemoveInputAt(size_t index);
-
-  Primitive::Type GetType() const OVERRIDE { return GetPackedField<TypeField>(); }
-  void SetType(Primitive::Type new_type) {
-    // Make sure that only valid type changes occur. The following are allowed:
-    //  (1) int  -> float/ref (primitive type propagation),
-    //  (2) long -> double (primitive type propagation).
-    DCHECK(GetType() == new_type ||
-           (GetType() == Primitive::kPrimInt && new_type == Primitive::kPrimFloat) ||
-           (GetType() == Primitive::kPrimInt && new_type == Primitive::kPrimNot) ||
-           (GetType() == Primitive::kPrimLong && new_type == Primitive::kPrimDouble));
-    SetPackedField<TypeField>(new_type);
-  }
-
-  bool CanBeNull() const OVERRIDE { return GetPackedFlag<kFlagCanBeNull>(); }
-  void SetCanBeNull(bool can_be_null) { SetPackedFlag<kFlagCanBeNull>(can_be_null); }
-
-  uint32_t GetRegNumber() const { return reg_number_; }
-
-  void SetDead() { SetPackedFlag<kFlagIsLive>(false); }
-  void SetLive() { SetPackedFlag<kFlagIsLive>(true); }
-  bool IsDead() const { return !IsLive(); }
-  bool IsLive() const { return GetPackedFlag<kFlagIsLive>(); }
-
-  bool IsVRegEquivalentOf(HInstruction* other) const {
-    return other != nullptr
-        && other->IsPhi()
-        && other->AsPhi()->GetBlock() == GetBlock()
-        && other->AsPhi()->GetRegNumber() == GetRegNumber();
-  }
-
-  // Returns the next equivalent phi (starting from the current one) or null if there is none.
-  // An equivalent phi is a phi having the same dex register and type.
-  // It assumes that phis with the same dex register are adjacent.
-  HPhi* GetNextEquivalentPhiWithSameType() {
-    HInstruction* next = GetNext();
-    while (next != nullptr && next->AsPhi()->GetRegNumber() == reg_number_) {
-      if (next->GetType() == GetType()) {
-        return next->AsPhi();
-      }
-      next = next->GetNext();
-    }
-    return nullptr;
-  }
-
-  DECLARE_INSTRUCTION(Phi);
-
- protected:
-  const HUserRecord<HInstruction*> InputRecordAt(size_t index) const OVERRIDE {
-    return inputs_[index];
-  }
-
-  void SetRawInputRecordAt(size_t index, const HUserRecord<HInstruction*>& input) OVERRIDE {
-    inputs_[index] = input;
-  }
-
- private:
-  static constexpr size_t kFieldType = HInstruction::kNumberOfGenericPackedBits;
-  static constexpr size_t kFieldTypeSize =
-      MinimumBitsToStore(static_cast<size_t>(Primitive::kPrimLast));
-  static constexpr size_t kFlagIsLive = kFieldType + kFieldTypeSize;
-  static constexpr size_t kFlagCanBeNull = kFlagIsLive + 1;
-  static constexpr size_t kNumberOfPhiPackedBits = kFlagCanBeNull + 1;
-  static_assert(kNumberOfPhiPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields.");
-  using TypeField = BitField<Primitive::Type, kFieldType, kFieldTypeSize>;
-
-  ArenaVector<HUserRecord<HInstruction*> > inputs_;
-  const uint32_t reg_number_;
-
-  DISALLOW_COPY_AND_ASSIGN(HPhi);
-};
-
 class HNullCheck : public HExpression<1> {
  public:
   // `HNullCheck` can trigger GC, as it may call the `NullPointerException`
@@ -5382,7 +5313,7 @@
   // constructor.
   HBoundsCheck(HInstruction* index, HInstruction* length, uint32_t dex_pc)
       : HExpression(index->GetType(), SideEffects::CanTriggerGC(), dex_pc) {
-    DCHECK(index->GetType() == Primitive::kPrimInt);
+    DCHECK_EQ(Primitive::kPrimInt, Primitive::PrimitiveKind(index->GetType()));
     SetRawInputAt(0, index);
     SetRawInputAt(1, length);
   }
@@ -5913,7 +5844,7 @@
       : HTemplateInstruction(SideEffects::AllExceptGCDependency(), dex_pc),
         field_index_(field_index) {
     SetPackedField<FieldTypeField>(field_type);
-    DCHECK_EQ(field_type, value->GetType());
+    DCHECK_EQ(Primitive::PrimitiveKind(field_type), Primitive::PrimitiveKind(value->GetType()));
     SetRawInputAt(0, obj);
     SetRawInputAt(1, value);
   }
@@ -5973,7 +5904,7 @@
       : HTemplateInstruction(SideEffects::AllExceptGCDependency(), dex_pc),
         field_index_(field_index) {
     SetPackedField<FieldTypeField>(field_type);
-    DCHECK_EQ(field_type, value->GetType());
+    DCHECK_EQ(Primitive::PrimitiveKind(field_type), Primitive::PrimitiveKind(value->GetType()));
     SetRawInputAt(0, value);
   }
 
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 830aae7..cad94c7 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -729,8 +729,9 @@
                             compiler_driver,
                             compilation_stats_.get(),
                             interpreter_metadata,
-                            dex_cache);
-      GraphAnalysisResult result = builder.BuildGraph(&handles);
+                            dex_cache,
+                            &handles);
+      GraphAnalysisResult result = builder.BuildGraph();
       if (result != kAnalysisSuccess) {
         switch (result) {
           case kAnalysisSkipped:
diff --git a/compiler/optimizing/optimizing_unit_test.h b/compiler/optimizing/optimizing_unit_test.h
index b140125..dd5cb1c 100644
--- a/compiler/optimizing/optimizing_unit_test.h
+++ b/compiler/optimizing/optimizing_unit_test.h
@@ -91,8 +91,8 @@
   {
     ScopedObjectAccess soa(Thread::Current());
     StackHandleScopeCollection handles(soa.Self());
-    HGraphBuilder builder(graph, *item, return_type);
-    bool graph_built = (builder.BuildGraph(&handles) == kAnalysisSuccess);
+    HGraphBuilder builder(graph, *item, &handles, return_type);
+    bool graph_built = (builder.BuildGraph() == kAnalysisSuccess);
     return graph_built ? graph : nullptr;
   }
 }
diff --git a/compiler/optimizing/pretty_printer_test.cc b/compiler/optimizing/pretty_printer_test.cc
index a444688..951cdfb 100644
--- a/compiler/optimizing/pretty_printer_test.cc
+++ b/compiler/optimizing/pretty_printer_test.cc
@@ -104,9 +104,9 @@
       "BasicBlock 1, pred: 0, succ: 3\n"
       "  2: Goto 3\n"
       "BasicBlock 2, pred: 3, succ: 4\n"
-      "  3: ReturnVoid\n"
+      "  4: ReturnVoid\n"
       "BasicBlock 3, pred: 1, succ: 2\n"
-      "  4: Goto 2\n"
+      "  3: Goto 2\n"
       "BasicBlock 4, pred: 2\n"
       "  5: Exit\n";
 
@@ -135,10 +135,10 @@
 TEST_F(PrettyPrinterTest, CFG4) {
   const char* expected =
       "BasicBlock 0, succ: 3\n"
-      "  2: SuspendCheck\n"
-      "  3: Goto 3\n"
-      "BasicBlock 1, pred: 3, 1, succ: 1\n"
       "  1: SuspendCheck\n"
+      "  2: Goto 3\n"
+      "BasicBlock 1, pred: 3, 1, succ: 1\n"
+      "  3: SuspendCheck\n"
       "  4: Goto 1\n"
       "BasicBlock 3, pred: 0, succ: 1\n"
       "  0: Goto 1\n";
@@ -176,18 +176,18 @@
 TEST_F(PrettyPrinterTest, CFG6) {
   const char* expected =
       "BasicBlock 0, succ: 1\n"
-      "  4: IntConstant [8, 8]\n"
-      "  2: SuspendCheck\n"
-      "  3: Goto 1\n"
+      "  3: IntConstant [4, 4]\n"
+      "  1: SuspendCheck\n"
+      "  2: Goto 1\n"
       "BasicBlock 1, pred: 0, succ: 5, 2\n"
-      "  8: Equal(4, 4) [9]\n"
-      "  9: If(8)\n"
+      "  4: Equal(3, 3) [5]\n"
+      "  5: If(4)\n"
       "BasicBlock 2, pred: 1, succ: 3\n"
-      "  10: Goto 3\n"
+      "  6: Goto 3\n"
       "BasicBlock 3, pred: 5, 2, succ: 4\n"
-      "  11: ReturnVoid\n"
+      "  7: ReturnVoid\n"
       "BasicBlock 4, pred: 3\n"
-      "  12: Exit\n"
+      "  8: Exit\n"
       "BasicBlock 5, pred: 1, succ: 3\n"
       "  0: Goto 3\n";
 
@@ -203,17 +203,17 @@
 TEST_F(PrettyPrinterTest, CFG7) {
   const char* expected =
       "BasicBlock 0, succ: 1\n"
-      "  6: IntConstant [10, 10]\n"
-      "  4: SuspendCheck\n"
-      "  5: Goto 1\n"
-      "BasicBlock 1, pred: 0, succ: 5, 6\n"
-      "  10: Equal(6, 6) [11]\n"
-      "  11: If(10)\n"
-      "BasicBlock 2, pred: 6, 3, succ: 3\n"
-      "  12: Goto 3\n"
-      "BasicBlock 3, pred: 5, 2, succ: 2\n"
+      "  4: IntConstant [5, 5]\n"
       "  2: SuspendCheck\n"
-      "  13: Goto 2\n"
+      "  3: Goto 1\n"
+      "BasicBlock 1, pred: 0, succ: 5, 6\n"
+      "  5: Equal(4, 4) [6]\n"
+      "  6: If(5)\n"
+      "BasicBlock 2, pred: 6, 3, succ: 3\n"
+      "  11: Goto 3\n"
+      "BasicBlock 3, pred: 5, 2, succ: 2\n"
+      "  8: SuspendCheck\n"
+      "  9: Goto 2\n"
       "BasicBlock 5, pred: 1, succ: 3\n"
       "  0: Goto 3\n"
       "BasicBlock 6, pred: 1, succ: 2\n"
@@ -231,13 +231,13 @@
 TEST_F(PrettyPrinterTest, IntConstant) {
   const char* expected =
       "BasicBlock 0, succ: 1\n"
-      "  3: IntConstant\n"
-      "  1: SuspendCheck\n"
-      "  2: Goto 1\n"
+      "  2: IntConstant\n"
+      "  0: SuspendCheck\n"
+      "  1: Goto 1\n"
       "BasicBlock 1, pred: 0, succ: 2\n"
-      "  5: ReturnVoid\n"
+      "  3: ReturnVoid\n"
       "BasicBlock 2, pred: 1\n"
-      "  6: Exit\n";
+      "  4: Exit\n";
 
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc
index 5a05256..eeadbeb 100644
--- a/compiler/optimizing/ssa_builder.cc
+++ b/compiler/optimizing/ssa_builder.cc
@@ -23,30 +23,9 @@
 
 namespace art {
 
-void SsaBuilder::SetLoopHeaderPhiInputs() {
-  for (size_t i = loop_headers_.size(); i > 0; --i) {
-    HBasicBlock* block = loop_headers_[i - 1];
-    for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
-      HPhi* phi = it.Current()->AsPhi();
-      size_t vreg = phi->GetRegNumber();
-      for (HBasicBlock* predecessor : block->GetPredecessors()) {
-        HInstruction* value = ValueOfLocal(predecessor, vreg);
-        if (value == nullptr) {
-          // Vreg is undefined at this predecessor. Mark it dead and leave with
-          // fewer inputs than predecessors. SsaChecker will fail if not removed.
-          phi->SetDead();
-          break;
-        } else {
-          phi->AddInput(value);
-        }
-      }
-    }
-  }
-}
-
 void SsaBuilder::FixNullConstantType() {
   // The order doesn't matter here.
-  for (HReversePostOrderIterator itb(*GetGraph()); !itb.Done(); itb.Advance()) {
+  for (HReversePostOrderIterator itb(*graph_); !itb.Done(); itb.Advance()) {
     for (HInstructionIterator it(itb.Current()->GetInstructions()); !it.Done(); it.Advance()) {
       HInstruction* equality_instr = it.Current();
       if (!equality_instr->IsEqual() && !equality_instr->IsNotEqual()) {
@@ -71,14 +50,14 @@
       // can only be the 0 constant.
       DCHECK(int_operand->IsIntConstant()) << int_operand->DebugName();
       DCHECK_EQ(0, int_operand->AsIntConstant()->GetValue());
-      equality_instr->ReplaceInput(GetGraph()->GetNullConstant(), int_operand == right ? 1 : 0);
+      equality_instr->ReplaceInput(graph_->GetNullConstant(), int_operand == right ? 1 : 0);
     }
   }
 }
 
 void SsaBuilder::EquivalentPhisCleanup() {
   // The order doesn't matter here.
-  for (HReversePostOrderIterator itb(*GetGraph()); !itb.Done(); itb.Advance()) {
+  for (HReversePostOrderIterator itb(*graph_); !itb.Done(); itb.Advance()) {
     for (HInstructionIterator it(itb.Current()->GetPhis()); !it.Done(); it.Advance()) {
       HPhi* phi = it.Current()->AsPhi();
       HPhi* next = phi->GetNextEquivalentPhiWithSameType();
@@ -100,7 +79,7 @@
 }
 
 void SsaBuilder::FixEnvironmentPhis() {
-  for (HReversePostOrderIterator it(*GetGraph()); !it.Done(); it.Advance()) {
+  for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
     HBasicBlock* block = it.Current();
     for (HInstructionIterator it_phis(block->GetPhis()); !it_phis.Done(); it_phis.Advance()) {
       HPhi* phi = it_phis.Current()->AsPhi();
@@ -254,9 +233,9 @@
 }
 
 void SsaBuilder::RunPrimitiveTypePropagation() {
-  ArenaVector<HPhi*> worklist(GetGraph()->GetArena()->Adapter());
+  ArenaVector<HPhi*> worklist(graph_->GetArena()->Adapter());
 
-  for (HReversePostOrderIterator it(*GetGraph()); !it.Done(); it.Advance()) {
+  for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
     HBasicBlock* block = it.Current();
     if (block->IsLoopHeader()) {
       for (HInstructionIterator phi_it(block->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
@@ -300,8 +279,14 @@
 static HArrayGet* FindFloatOrDoubleEquivalentOfArrayGet(HArrayGet* aget) {
   Primitive::Type type = aget->GetType();
   DCHECK(Primitive::IsIntOrLongType(type));
-  HArrayGet* next = aget->GetNext()->AsArrayGet();
-  return (next != nullptr && next->IsEquivalentOf(aget)) ? next : nullptr;
+  HInstruction* next = aget->GetNext();
+  if (next != nullptr && next->IsArrayGet()) {
+    HArrayGet* next_aget = next->AsArrayGet();
+    if (next_aget->IsEquivalentOf(aget)) {
+      return next_aget;
+    }
+  }
+  return nullptr;
 }
 
 static HArrayGet* CreateFloatOrDoubleEquivalentOfArrayGet(HArrayGet* aget) {
@@ -334,7 +319,7 @@
   // uses (because they are untyped) and environment uses (if --debuggable).
   // After resolving all ambiguous ArrayGets, we will re-run primitive type
   // propagation on the Phis which need to be updated.
-  ArenaVector<HPhi*> worklist(GetGraph()->GetArena()->Adapter());
+  ArenaVector<HPhi*> worklist(graph_->GetArena()->Adapter());
 
   {
     ScopedObjectAccess soa(Thread::Current());
@@ -452,7 +437,7 @@
 }
 
 void SsaBuilder::RemoveRedundantUninitializedStrings() {
-  if (GetGraph()->IsDebuggable()) {
+  if (graph_->IsDebuggable()) {
     // Do not perform the optimization for consistency with the interpreter
     // which always allocates an object for new-instance of String.
     return;
@@ -460,11 +445,13 @@
 
   for (HNewInstance* new_instance : uninitialized_strings_) {
     DCHECK(new_instance->IsInBlock());
+    DCHECK(new_instance->IsStringAlloc());
+
     // Replace NewInstance of String with NullConstant if not used prior to
     // calling StringFactory. In case of deoptimization, the interpreter is
     // expected to skip null check on the `this` argument of the StringFactory call.
     if (!new_instance->HasNonEnvironmentUses() && !HasAliasInEnvironments(new_instance)) {
-      new_instance->ReplaceWith(GetGraph()->GetNullConstant());
+      new_instance->ReplaceWith(graph_->GetNullConstant());
       new_instance->GetBlock()->RemoveInstruction(new_instance);
 
       // Remove LoadClass if not needed any more.
@@ -495,57 +482,47 @@
 }
 
 GraphAnalysisResult SsaBuilder::BuildSsa() {
-  DCHECK(!GetGraph()->IsInSsaForm());
+  DCHECK(!graph_->IsInSsaForm());
 
-  // 1) Visit in reverse post order. We need to have all predecessors of a block
-  // visited (with the exception of loops) in order to create the right environment
-  // for that block. For loops, we create phis whose inputs will be set in 2).
-  for (HReversePostOrderIterator it(*GetGraph()); !it.Done(); it.Advance()) {
-    VisitBasicBlock(it.Current());
-  }
-
-  // 2) Set inputs of loop header phis.
-  SetLoopHeaderPhiInputs();
-
-  // 3) Propagate types of phis. At this point, phis are typed void in the general
+  // 1) Propagate types of phis. At this point, phis are typed void in the general
   // case, or float/double/reference if we created an equivalent phi. So we need
   // to propagate the types across phis to give them a correct type. If a type
   // conflict is detected in this stage, the phi is marked dead.
   RunPrimitiveTypePropagation();
 
-  // 4) Now that the correct primitive types have been assigned, we can get rid
+  // 2) Now that the correct primitive types have been assigned, we can get rid
   // of redundant phis. Note that we cannot do this phase before type propagation,
   // otherwise we could get rid of phi equivalents, whose presence is a requirement
   // for the type propagation phase. Note that this is to satisfy statement (a)
   // of the SsaBuilder (see ssa_builder.h).
-  SsaRedundantPhiElimination(GetGraph()).Run();
+  SsaRedundantPhiElimination(graph_).Run();
 
-  // 5) Fix the type for null constants which are part of an equality comparison.
+  // 3) Fix the type for null constants which are part of an equality comparison.
   // We need to do this after redundant phi elimination, to ensure the only cases
   // that we can see are reference comparison against 0. The redundant phi
   // elimination ensures we do not see a phi taking two 0 constants in a HEqual
   // or HNotEqual.
   FixNullConstantType();
 
-  // 6) Compute type of reference type instructions. The pass assumes that
+  // 4) Compute type of reference type instructions. The pass assumes that
   // NullConstant has been fixed up.
-  ReferenceTypePropagation(GetGraph(), handles_, /* is_first_run */ true).Run();
+  ReferenceTypePropagation(graph_, handles_, /* is_first_run */ true).Run();
 
-  // 7) Step 1) duplicated ArrayGet instructions with ambiguous type (int/float
-  // or long/double) and marked ArraySets with ambiguous input type. Now that RTP
-  // computed the type of the array input, the ambiguity can be resolved and the
-  // correct equivalents kept.
+  // 5) HInstructionBuilder duplicated ArrayGet instructions with ambiguous type
+  // (int/float or long/double) and marked ArraySets with ambiguous input type.
+  // Now that RTP computed the type of the array input, the ambiguity can be
+  // resolved and the correct equivalents kept.
   if (!FixAmbiguousArrayOps()) {
     return kAnalysisFailAmbiguousArrayOp;
   }
 
-  // 8) Mark dead phis. This will mark phis which are not used by instructions
+  // 6) Mark dead phis. This will mark phis which are not used by instructions
   // or other live phis. If compiling as debuggable code, phis will also be kept
   // live if they have an environment use.
-  SsaDeadPhiElimination dead_phi_elimimation(GetGraph());
+  SsaDeadPhiElimination dead_phi_elimimation(graph_);
   dead_phi_elimimation.MarkDeadPhis();
 
-  // 9) Make sure environments use the right phi equivalent: a phi marked dead
+  // 7) Make sure environments use the right phi equivalent: a phi marked dead
   // can have a phi equivalent that is not dead. In that case we have to replace
   // it with the live equivalent because deoptimization and try/catch rely on
   // environments containing values of all live vregs at that point. Note that
@@ -554,165 +531,26 @@
   // environments to just reference one.
   FixEnvironmentPhis();
 
-  // 10) Now that the right phis are used for the environments, we can eliminate
+  // 8) Now that the right phis are used for the environments, we can eliminate
   // phis we do not need. Regardless of the debuggable status, this phase is
   /// necessary for statement (b) of the SsaBuilder (see ssa_builder.h), as well
   // as for the code generation, which does not deal with phis of conflicting
   // input types.
   dead_phi_elimimation.EliminateDeadPhis();
 
-  // 11) Step 1) replaced uses of NewInstances of String with the results of
-  // their corresponding StringFactory calls. Unless the String objects are used
-  // before they are initialized, they can be replaced with NullConstant.
-  // Note that this optimization is valid only if unsimplified code does not use
-  // the uninitialized value because we assume execution can be deoptimized at
-  // any safepoint. We must therefore perform it before any other optimizations.
+  // 9) HInstructionBuidler replaced uses of NewInstances of String with the
+  // results of their corresponding StringFactory calls. Unless the String
+  // objects are used before they are initialized, they can be replaced with
+  // NullConstant. Note that this optimization is valid only if unsimplified
+  // code does not use the uninitialized value because we assume execution can
+  // be deoptimized at any safepoint. We must therefore perform it before any
+  // other optimizations.
   RemoveRedundantUninitializedStrings();
 
-  // 12) Clear locals.
-  for (HInstructionIterator it(GetGraph()->GetEntryBlock()->GetInstructions());
-       !it.Done();
-       it.Advance()) {
-    HInstruction* current = it.Current();
-    if (current->IsLocal()) {
-      current->GetBlock()->RemoveInstruction(current);
-    }
-  }
-
-  GetGraph()->SetInSsaForm();
+  graph_->SetInSsaForm();
   return kAnalysisSuccess;
 }
 
-ArenaVector<HInstruction*>* SsaBuilder::GetLocalsFor(HBasicBlock* block) {
-  ArenaVector<HInstruction*>* locals = &locals_for_[block->GetBlockId()];
-  const size_t vregs = GetGraph()->GetNumberOfVRegs();
-  if (locals->empty() && vregs != 0u) {
-    locals->resize(vregs, nullptr);
-
-    if (block->IsCatchBlock()) {
-      ArenaAllocator* arena = GetGraph()->GetArena();
-      // We record incoming inputs of catch phis at throwing instructions and
-      // must therefore eagerly create the phis. Phis for undefined vregs will
-      // be deleted when the first throwing instruction with the vreg undefined
-      // is encountered. Unused phis will be removed by dead phi analysis.
-      for (size_t i = 0; i < vregs; ++i) {
-        // No point in creating the catch phi if it is already undefined at
-        // the first throwing instruction.
-        HInstruction* current_local_value = (*current_locals_)[i];
-        if (current_local_value != nullptr) {
-          HPhi* phi = new (arena) HPhi(
-              arena,
-              i,
-              0,
-              current_local_value->GetType());
-          block->AddPhi(phi);
-          (*locals)[i] = phi;
-        }
-      }
-    }
-  }
-  return locals;
-}
-
-HInstruction* SsaBuilder::ValueOfLocal(HBasicBlock* block, size_t local) {
-  ArenaVector<HInstruction*>* locals = GetLocalsFor(block);
-  return (*locals)[local];
-}
-
-void SsaBuilder::VisitBasicBlock(HBasicBlock* block) {
-  current_locals_ = GetLocalsFor(block);
-
-  if (block->IsCatchBlock()) {
-    // Catch phis were already created and inputs collected from throwing sites.
-    if (kIsDebugBuild) {
-      // Make sure there was at least one throwing instruction which initialized
-      // locals (guaranteed by HGraphBuilder) and that all try blocks have been
-      // visited already (from HTryBoundary scoping and reverse post order).
-      bool catch_block_visited = false;
-      for (HReversePostOrderIterator it(*GetGraph()); !it.Done(); it.Advance()) {
-        HBasicBlock* current = it.Current();
-        if (current == block) {
-          catch_block_visited = true;
-        } else if (current->IsTryBlock() &&
-                   current->GetTryCatchInformation()->GetTryEntry().HasExceptionHandler(*block)) {
-          DCHECK(!catch_block_visited) << "Catch block visited before its try block.";
-        }
-      }
-      DCHECK_EQ(current_locals_->size(), GetGraph()->GetNumberOfVRegs())
-          << "No instructions throwing into a live catch block.";
-    }
-  } else if (block->IsLoopHeader()) {
-    // If the block is a loop header, we know we only have visited the pre header
-    // because we are visiting in reverse post order. We create phis for all initialized
-    // locals from the pre header. Their inputs will be populated at the end of
-    // the analysis.
-    for (size_t local = 0; local < current_locals_->size(); ++local) {
-      HInstruction* incoming = ValueOfLocal(block->GetLoopInformation()->GetPreHeader(), local);
-      if (incoming != nullptr) {
-        HPhi* phi = new (GetGraph()->GetArena()) HPhi(
-            GetGraph()->GetArena(),
-            local,
-            0,
-            incoming->GetType());
-        block->AddPhi(phi);
-        (*current_locals_)[local] = phi;
-      }
-    }
-    // Save the loop header so that the last phase of the analysis knows which
-    // blocks need to be updated.
-    loop_headers_.push_back(block);
-  } else if (block->GetPredecessors().size() > 0) {
-    // All predecessors have already been visited because we are visiting in reverse post order.
-    // We merge the values of all locals, creating phis if those values differ.
-    for (size_t local = 0; local < current_locals_->size(); ++local) {
-      bool one_predecessor_has_no_value = false;
-      bool is_different = false;
-      HInstruction* value = ValueOfLocal(block->GetPredecessors()[0], local);
-
-      for (HBasicBlock* predecessor : block->GetPredecessors()) {
-        HInstruction* current = ValueOfLocal(predecessor, local);
-        if (current == nullptr) {
-          one_predecessor_has_no_value = true;
-          break;
-        } else if (current != value) {
-          is_different = true;
-        }
-      }
-
-      if (one_predecessor_has_no_value) {
-        // If one predecessor has no value for this local, we trust the verifier has
-        // successfully checked that there is a store dominating any read after this block.
-        continue;
-      }
-
-      if (is_different) {
-        HInstruction* first_input = ValueOfLocal(block->GetPredecessors()[0], local);
-        HPhi* phi = new (GetGraph()->GetArena()) HPhi(
-            GetGraph()->GetArena(),
-            local,
-            block->GetPredecessors().size(),
-            first_input->GetType());
-        for (size_t i = 0; i < block->GetPredecessors().size(); i++) {
-          HInstruction* pred_value = ValueOfLocal(block->GetPredecessors()[i], local);
-          phi->SetRawInputAt(i, pred_value);
-        }
-        block->AddPhi(phi);
-        value = phi;
-      }
-      (*current_locals_)[local] = value;
-    }
-  }
-
-  // Visit all instructions. The instructions of interest are:
-  // - HLoadLocal: replace them with the current value of the local.
-  // - HStoreLocal: update current value of the local and remove the instruction.
-  // - Instructions that require an environment: populate their environment
-  //   with the current values of the locals.
-  for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
-    it.Current()->Accept(this);
-  }
-}
-
 /**
  * Constants in the Dex format are not typed. So the builder types them as
  * integers, but when doing the SSA form, we might realize the constant
@@ -723,11 +561,10 @@
   // We place the floating point constant next to this constant.
   HFloatConstant* result = constant->GetNext()->AsFloatConstant();
   if (result == nullptr) {
-    HGraph* graph = constant->GetBlock()->GetGraph();
-    ArenaAllocator* allocator = graph->GetArena();
-    result = new (allocator) HFloatConstant(bit_cast<float, int32_t>(constant->GetValue()));
+    float value = bit_cast<float, int32_t>(constant->GetValue());
+    result = new (graph_->GetArena()) HFloatConstant(value);
     constant->GetBlock()->InsertInstructionBefore(result, constant->GetNext());
-    graph->CacheFloatConstant(result);
+    graph_->CacheFloatConstant(result);
   } else {
     // If there is already a constant with the expected type, we know it is
     // the floating point equivalent of this constant.
@@ -746,11 +583,10 @@
   // We place the floating point constant next to this constant.
   HDoubleConstant* result = constant->GetNext()->AsDoubleConstant();
   if (result == nullptr) {
-    HGraph* graph = constant->GetBlock()->GetGraph();
-    ArenaAllocator* allocator = graph->GetArena();
-    result = new (allocator) HDoubleConstant(bit_cast<double, int64_t>(constant->GetValue()));
+    double value = bit_cast<double, int64_t>(constant->GetValue());
+    result = new (graph_->GetArena()) HDoubleConstant(value);
     constant->GetBlock()->InsertInstructionBefore(result, constant->GetNext());
-    graph->CacheDoubleConstant(result);
+    graph_->CacheDoubleConstant(result);
   } else {
     // If there is already a constant with the expected type, we know it is
     // the floating point equivalent of this constant.
@@ -781,7 +617,7 @@
   if (next == nullptr
       || (next->AsPhi()->GetRegNumber() != phi->GetRegNumber())
       || (next->GetType() != type)) {
-    ArenaAllocator* allocator = phi->GetBlock()->GetGraph()->GetArena();
+    ArenaAllocator* allocator = graph_->GetArena();
     HPhi* new_phi = new (allocator) HPhi(allocator, phi->GetRegNumber(), phi->InputCount(), type);
     for (size_t i = 0, e = phi->InputCount(); i < e; ++i) {
       // Copy the inputs. Note that the graph may not be correctly typed
@@ -841,7 +677,7 @@
 
 HInstruction* SsaBuilder::GetReferenceTypeEquivalent(HInstruction* value) {
   if (value->IsIntConstant() && value->AsIntConstant()->GetValue() == 0) {
-    return value->GetBlock()->GetGraph()->GetNullConstant();
+    return graph_->GetNullConstant();
   } else if (value->IsPhi()) {
     return GetFloatDoubleOrReferenceEquivalentOfPhi(value->AsPhi(), Primitive::kPrimNot);
   } else {
@@ -849,171 +685,4 @@
   }
 }
 
-void SsaBuilder::VisitLoadLocal(HLoadLocal* load) {
-  Primitive::Type load_type = load->GetType();
-  HInstruction* value = (*current_locals_)[load->GetLocal()->GetRegNumber()];
-  // If the operation requests a specific type, we make sure its input is of that type.
-  if (load_type != value->GetType()) {
-    if (load_type == Primitive::kPrimFloat || load_type == Primitive::kPrimDouble) {
-      value = GetFloatOrDoubleEquivalent(value, load_type);
-    } else if (load_type == Primitive::kPrimNot) {
-      value = GetReferenceTypeEquivalent(value);
-    }
-  }
-
-  load->ReplaceWith(value);
-  load->GetBlock()->RemoveInstruction(load);
-}
-
-void SsaBuilder::VisitStoreLocal(HStoreLocal* store) {
-  uint32_t reg_number = store->GetLocal()->GetRegNumber();
-  HInstruction* stored_value = store->InputAt(1);
-  Primitive::Type stored_type = stored_value->GetType();
-  DCHECK_NE(stored_type, Primitive::kPrimVoid);
-
-  // Storing into vreg `reg_number` may implicitly invalidate the surrounding
-  // registers. Consider the following cases:
-  // (1) Storing a wide value must overwrite previous values in both `reg_number`
-  //     and `reg_number+1`. We store `nullptr` in `reg_number+1`.
-  // (2) If vreg `reg_number-1` holds a wide value, writing into `reg_number`
-  //     must invalidate it. We store `nullptr` in `reg_number-1`.
-  // Consequently, storing a wide value into the high vreg of another wide value
-  // will invalidate both `reg_number-1` and `reg_number+1`.
-
-  if (reg_number != 0) {
-    HInstruction* local_low = (*current_locals_)[reg_number - 1];
-    if (local_low != nullptr && Primitive::Is64BitType(local_low->GetType())) {
-      // The vreg we are storing into was previously the high vreg of a pair.
-      // We need to invalidate its low vreg.
-      DCHECK((*current_locals_)[reg_number] == nullptr);
-      (*current_locals_)[reg_number - 1] = nullptr;
-    }
-  }
-
-  (*current_locals_)[reg_number] = stored_value;
-  if (Primitive::Is64BitType(stored_type)) {
-    // We are storing a pair. Invalidate the instruction in the high vreg.
-    (*current_locals_)[reg_number + 1] = nullptr;
-  }
-
-  store->GetBlock()->RemoveInstruction(store);
-}
-
-bool SsaBuilder::IsFirstAtThrowingDexPc(HInstruction* instruction) const {
-  uint32_t dex_pc = instruction->GetDexPc();
-  if (dex_pc == kNoDexPc) {
-    return false;
-  }
-
-  // Needs to be the first HInstruction with this dex_pc.
-  HInstruction* previous = instruction->GetPrevious();
-  if (previous != nullptr && previous->GetDexPc() == dex_pc) {
-    return false;
-  }
-
-  if (instruction->IsControlFlow() && !instruction->IsThrow()) {
-    // Special-case non-throwing control-flow HInstruction because artifically
-    // created ones are given dex_pc of the nearest bytecode instructions.
-    return false;
-  }
-
-  return IsThrowingDexInstruction(GetDexInstructionAt(code_item_, dex_pc));
-}
-
-void SsaBuilder::VisitInstruction(HInstruction* instruction) {
-  if (instruction->NeedsEnvironment()) {
-    HEnvironment* environment = new (GetGraph()->GetArena()) HEnvironment(
-        GetGraph()->GetArena(),
-        current_locals_->size(),
-        GetGraph()->GetDexFile(),
-        GetGraph()->GetMethodIdx(),
-        instruction->GetDexPc(),
-        GetGraph()->GetInvokeType(),
-        instruction);
-    environment->CopyFrom(*current_locals_);
-    instruction->SetRawEnvironment(environment);
-  }
-
-  // If in a try block, propagate values of locals into catch blocks.
-  if (instruction->GetBlock()->IsTryBlock() && IsFirstAtThrowingDexPc(instruction)) {
-    const HTryBoundary& try_entry =
-        instruction->GetBlock()->GetTryCatchInformation()->GetTryEntry();
-    for (HBasicBlock* catch_block : try_entry.GetExceptionHandlers()) {
-      ArenaVector<HInstruction*>* handler_locals = GetLocalsFor(catch_block);
-      DCHECK_EQ(handler_locals->size(), current_locals_->size());
-      for (size_t vreg = 0, e = current_locals_->size(); vreg < e; ++vreg) {
-        HInstruction* handler_value = (*handler_locals)[vreg];
-        if (handler_value == nullptr) {
-          // Vreg was undefined at a previously encountered throwing instruction
-          // and the catch phi was deleted. Do not record the local value.
-          continue;
-        }
-        DCHECK(handler_value->IsPhi());
-
-        HInstruction* local_value = (*current_locals_)[vreg];
-        if (local_value == nullptr) {
-          // This is the first instruction throwing into `catch_block` where
-          // `vreg` is undefined. Delete the catch phi.
-          catch_block->RemovePhi(handler_value->AsPhi());
-          (*handler_locals)[vreg] = nullptr;
-        } else {
-          // Vreg has been defined at all instructions throwing into `catch_block`
-          // encountered so far. Record the local value in the catch phi.
-          handler_value->AsPhi()->AddInput(local_value);
-        }
-      }
-    }
-  }
-}
-
-void SsaBuilder::VisitArrayGet(HArrayGet* aget) {
-  Primitive::Type type = aget->GetType();
-  DCHECK(!Primitive::IsFloatingPointType(type));
-  if (Primitive::IsIntOrLongType(type)) {
-    ambiguous_agets_.push_back(aget);
-  }
-  VisitInstruction(aget);
-}
-
-void SsaBuilder::VisitArraySet(HArraySet* aset) {
-  Primitive::Type type = aset->GetValue()->GetType();
-  if (Primitive::IsIntOrLongType(type)) {
-    ambiguous_asets_.push_back(aset);
-  }
-  VisitInstruction(aset);
-}
-
-void SsaBuilder::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
-  VisitInstruction(invoke);
-
-  if (invoke->IsStringInit()) {
-    // This is a StringFactory call which acts as a String constructor. Its
-    // result replaces the empty String pre-allocated by NewInstance.
-    HInstruction* arg_this = invoke->GetAndRemoveThisArgumentOfStringInit();
-
-    // Replacing the NewInstance might render it redundant. Keep a list of these
-    // to be visited once it is clear whether it is has remaining uses.
-    if (arg_this->IsNewInstance()) {
-      HNewInstance* new_instance = arg_this->AsNewInstance();
-      // Note that in some rare cases (b/27847265), the same NewInstance may be seen
-      // multiple times. We should only consider it once for removal, so we
-      // ensure it is not added more than once.
-      if (!ContainsElement(uninitialized_strings_, new_instance)) {
-        uninitialized_strings_.push_back(new_instance);
-      }
-    } else {
-      DCHECK(arg_this->IsPhi());
-      // NewInstance is not the direct input of the StringFactory call. It might
-      // be redundant but optimizing this case is not worth the effort.
-    }
-
-    // Walk over all vregs and replace any occurrence of `arg_this` with `invoke`.
-    for (size_t vreg = 0, e = current_locals_->size(); vreg < e; ++vreg) {
-      if ((*current_locals_)[vreg] == arg_this) {
-        (*current_locals_)[vreg] = invoke;
-      }
-    }
-  }
-}
-
 }  // namespace art
diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h
index 83da378..c37c28c 100644
--- a/compiler/optimizing/ssa_builder.h
+++ b/compiler/optimizing/ssa_builder.h
@@ -23,8 +23,6 @@
 
 namespace art {
 
-static constexpr int kDefaultNumberOfLoops = 2;
-
 /**
  * Transforms a graph into SSA form. The liveness guarantees of
  * this transformation are listed below. A DEX register
@@ -47,38 +45,48 @@
  *     is not set, values of Dex registers only used by environments
  *     are killed.
  */
-class SsaBuilder : public HGraphVisitor {
+class SsaBuilder : public ValueObject {
  public:
-  SsaBuilder(HGraph* graph, const DexFile::CodeItem& code_item, StackHandleScopeCollection* handles)
-      : HGraphVisitor(graph),
-        code_item_(code_item),
+  SsaBuilder(HGraph* graph, StackHandleScopeCollection* handles)
+      : graph_(graph),
         handles_(handles),
         agets_fixed_(false),
-        current_locals_(nullptr),
-        loop_headers_(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)),
-        ambiguous_agets_(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)),
-        ambiguous_asets_(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)),
-        uninitialized_strings_(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)),
-        locals_for_(graph->GetBlocks().size(),
-                    ArenaVector<HInstruction*>(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)),
-                    graph->GetArena()->Adapter(kArenaAllocSsaBuilder)) {
-    loop_headers_.reserve(kDefaultNumberOfLoops);
+        ambiguous_agets_(graph->GetArena()->Adapter(kArenaAllocGraphBuilder)),
+        ambiguous_asets_(graph->GetArena()->Adapter(kArenaAllocGraphBuilder)),
+        uninitialized_strings_(graph->GetArena()->Adapter(kArenaAllocGraphBuilder)) {
+    graph_->InitializeInexactObjectRTI(handles);
   }
 
   GraphAnalysisResult BuildSsa();
 
-  // Returns locals vector for `block`. If it is a catch block, the vector will be
-  // prepopulated with catch phis for vregs which are defined in `current_locals_`.
-  ArenaVector<HInstruction*>* GetLocalsFor(HBasicBlock* block);
-  HInstruction* ValueOfLocal(HBasicBlock* block, size_t local);
+  HInstruction* GetFloatOrDoubleEquivalent(HInstruction* instruction, Primitive::Type type);
+  HInstruction* GetReferenceTypeEquivalent(HInstruction* instruction);
 
-  void VisitBasicBlock(HBasicBlock* block) OVERRIDE;
-  void VisitLoadLocal(HLoadLocal* load) OVERRIDE;
-  void VisitStoreLocal(HStoreLocal* store) OVERRIDE;
-  void VisitInstruction(HInstruction* instruction) OVERRIDE;
-  void VisitArrayGet(HArrayGet* aget) OVERRIDE;
-  void VisitArraySet(HArraySet* aset) OVERRIDE;
-  void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE;
+  void MaybeAddAmbiguousArrayGet(HArrayGet* aget) {
+    Primitive::Type type = aget->GetType();
+    DCHECK(!Primitive::IsFloatingPointType(type));
+    if (Primitive::IsIntOrLongType(type)) {
+      ambiguous_agets_.push_back(aget);
+    }
+  }
+
+  void MaybeAddAmbiguousArraySet(HArraySet* aset) {
+    Primitive::Type type = aset->GetValue()->GetType();
+    if (Primitive::IsIntOrLongType(type)) {
+      ambiguous_asets_.push_back(aset);
+    }
+  }
+
+  void AddUninitializedString(HNewInstance* string) {
+    // In some rare cases (b/27847265), the same NewInstance may be seen
+    // multiple times. We should only consider it once for removal, so we
+    // ensure it is not added more than once.
+    // Note that we cannot check whether this really is a NewInstance of String
+    // before RTP. We DCHECK that in RemoveRedundantUninitializedStrings.
+    if (!ContainsElement(uninitialized_strings_, string)) {
+      uninitialized_strings_.push_back(string);
+    }
+  }
 
  private:
   void SetLoopHeaderPhiInputs();
@@ -96,9 +104,6 @@
   bool UpdatePrimitiveType(HPhi* phi, ArenaVector<HPhi*>* worklist);
   void ProcessPrimitiveTypePropagationWorklist(ArenaVector<HPhi*>* worklist);
 
-  HInstruction* GetFloatOrDoubleEquivalent(HInstruction* instruction, Primitive::Type type);
-  HInstruction* GetReferenceTypeEquivalent(HInstruction* instruction);
-
   HFloatConstant* GetFloatEquivalent(HIntConstant* constant);
   HDoubleConstant* GetDoubleEquivalent(HLongConstant* constant);
   HPhi* GetFloatDoubleOrReferenceEquivalentOfPhi(HPhi* phi, Primitive::Type type);
@@ -106,30 +111,16 @@
 
   void RemoveRedundantUninitializedStrings();
 
-  // Returns whether `instruction` is the first generated HInstruction for its
-  // dex_pc position.
-  bool IsFirstAtThrowingDexPc(HInstruction* instruction) const;
-
-  const DexFile::CodeItem& code_item_;
+  HGraph* graph_;
   StackHandleScopeCollection* const handles_;
 
   // True if types of ambiguous ArrayGets have been resolved.
   bool agets_fixed_;
 
-  // Locals for the current block being visited.
-  ArenaVector<HInstruction*>* current_locals_;
-
-  // Keep track of loop headers found. The last phase of the analysis iterates
-  // over these blocks to set the inputs of their phis.
-  ArenaVector<HBasicBlock*> loop_headers_;
-
   ArenaVector<HArrayGet*> ambiguous_agets_;
   ArenaVector<HArraySet*> ambiguous_asets_;
   ArenaVector<HNewInstance*> uninitialized_strings_;
 
-  // HEnvironment for each block.
-  ArenaVector<ArenaVector<HInstruction*>> locals_for_;
-
   DISALLOW_COPY_AND_ASSIGN(SsaBuilder);
 };
 
diff --git a/compiler/optimizing/ssa_test.cc b/compiler/optimizing/ssa_test.cc
index a688092..218bd53 100644
--- a/compiler/optimizing/ssa_test.cc
+++ b/compiler/optimizing/ssa_test.cc
@@ -163,8 +163,8 @@
   const char* expected =
     "BasicBlock 0, succ: 1\n"
     "  0: IntConstant 0 [4, 4]\n"
-    "  1: IntConstant 4 [8]\n"
-    "  2: IntConstant 5 [8]\n"
+    "  1: IntConstant 5 [8]\n"
+    "  2: IntConstant 4 [8]\n"
     "  3: Goto\n"
     "BasicBlock 1, pred: 0, succ: 3, 2\n"
     "  4: Equal(0, 0) [5]\n"
@@ -174,7 +174,7 @@
     "BasicBlock 3, pred: 1, succ: 4\n"
     "  7: Goto\n"
     "BasicBlock 4, pred: 2, 3, succ: 5\n"
-    "  8: Phi(1, 2) [9]\n"
+    "  8: Phi(2, 1) [9]\n"
     "  9: Return(8)\n"
     "BasicBlock 5, pred: 4\n"
     "  10: Exit\n";
@@ -258,19 +258,19 @@
   const char* expected =
     "BasicBlock 0, succ: 1\n"
     "  0: IntConstant 0 [5]\n"
-    "  1: IntConstant 4 [5]\n"
-    "  2: IntConstant 5 [9]\n"
+    "  1: IntConstant 5 [9]\n"
+    "  2: IntConstant 4 [5]\n"
     "  3: Goto\n"
     "BasicBlock 1, pred: 0, succ: 2\n"
     "  4: Goto\n"
     "BasicBlock 2, pred: 1, 3, succ: 4, 3\n"
-    "  5: Phi(0, 1) [6, 6]\n"
+    "  5: Phi(0, 2) [6, 6]\n"
     "  6: Equal(5, 5) [7]\n"
     "  7: If(6)\n"
     "BasicBlock 3, pred: 2, succ: 2\n"
     "  8: Goto\n"
     "BasicBlock 4, pred: 2, succ: 5\n"
-    "  9: Return(2)\n"
+    "  9: Return(1)\n"
     "BasicBlock 5, pred: 4\n"
     "  10: Exit\n";
 
@@ -326,8 +326,8 @@
   const char* expected =
     "BasicBlock 0, succ: 1\n"
     "  0: IntConstant 0 [4, 4]\n"
-    "  1: IntConstant 4 [13]\n"
-    "  2: IntConstant 5 [13]\n"
+    "  1: IntConstant 5 [13]\n"
+    "  2: IntConstant 4 [13]\n"
     "  3: Goto\n"
     "BasicBlock 1, pred: 0, succ: 3, 2\n"
     "  4: Equal(0, 0) [5]\n"
@@ -346,7 +346,7 @@
     "BasicBlock 7, pred: 6\n"
     "  12: Exit\n"
     "BasicBlock 8, pred: 2, 3, succ: 4\n"
-    "  13: Phi(1, 2) [8, 8, 11]\n"
+    "  13: Phi(2, 1) [8, 8, 11]\n"
     "  14: Goto\n";
 
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
@@ -496,7 +496,7 @@
   // does not update the local.
   const char* expected =
     "BasicBlock 0, succ: 1\n"
-    "  0: IntConstant 0 [4, 8, 6, 6, 2, 2, 8, 4]\n"
+    "  0: IntConstant 0 [4, 4, 8, 8, 6, 6, 2, 2]\n"
     "  1: Goto\n"
     "BasicBlock 1, pred: 0, succ: 3, 2\n"
     "  2: Equal(0, 0) [3]\n"
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 0889098..370583e 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -68,6 +68,7 @@
 #include "mirror/class_loader.h"
 #include "mirror/object-inl.h"
 #include "mirror/object_array-inl.h"
+#include "oat_file_assistant.h"
 #include "oat_writer.h"
 #include "os.h"
 #include "runtime.h"
@@ -1325,7 +1326,7 @@
         TimingLogger::ScopedTiming t3("Loading image checksum", timings_);
         std::vector<gc::space::ImageSpace*> image_spaces =
             Runtime::Current()->GetHeap()->GetBootImageSpaces();
-        image_file_location_oat_checksum_ = image_spaces[0]->GetImageHeader().GetOatChecksum();
+        image_file_location_oat_checksum_ = OatFileAssistant::CalculateCombinedImageChecksum();
         image_file_location_oat_data_begin_ =
             reinterpret_cast<uintptr_t>(image_spaces[0]->GetImageHeader().GetOatDataBegin());
         image_patch_delta_ = image_spaces[0]->GetImageHeader().GetPatchDelta();
diff --git a/disassembler/disassembler_arm.cc b/disassembler/disassembler_arm.cc
index 0e709eb..77efb6b 100644
--- a/disassembler/disassembler_arm.cc
+++ b/disassembler/disassembler_arm.cc
@@ -776,7 +776,7 @@
               if (imm5 == 0) {
                 args << "rrx";
               } else {
-                args << "ror";
+                args << "ror #" << imm5;
               }
               break;
           }
diff --git a/imgdiag/imgdiag.cc b/imgdiag/imgdiag.cc
index 811d9fd..cbd0c40 100644
--- a/imgdiag/imgdiag.cc
+++ b/imgdiag/imgdiag.cc
@@ -18,6 +18,7 @@
 #include <stdlib.h>
 
 #include <fstream>
+#include <functional>
 #include <iostream>
 #include <string>
 #include <vector>
@@ -189,6 +190,28 @@
     return oss.str();
   }
 
+  // Aggregate and detail class data from an image diff.
+  struct ClassData {
+    int dirty_object_count = 0;
+
+    // Track only the byte-per-byte dirtiness (in bytes)
+    int dirty_object_byte_count = 0;
+
+    // Track the object-by-object dirtiness (in bytes)
+    int dirty_object_size_in_bytes = 0;
+
+    int clean_object_count = 0;
+
+    std::string descriptor;
+
+    int false_dirty_byte_count = 0;
+    int false_dirty_object_count = 0;
+    std::vector<mirror::Object*> false_dirty_objects;
+
+    // Remote pointers to dirty objects
+    std::vector<mirror::Object*> dirty_objects;
+  };
+
   // Look at /proc/$pid/mem and only diff the things from there
   bool DumpImageDiffMap(pid_t image_diff_pid, const backtrace_map_t& boot_map)
     SHARED_REQUIRES(Locks::mutator_lock_) {
@@ -373,16 +396,10 @@
       }
     }
 
+    std::map<mirror::Class*, ClassData> class_data;
+
     // Walk each object in the remote image space and compare it against ours
     size_t different_objects = 0;
-    std::map<mirror::Class*, int /*count*/> dirty_object_class_map;
-    // Track only the byte-per-byte dirtiness (in bytes)
-    std::map<mirror::Class*, int /*byte_count*/> dirty_object_byte_count;
-    // Track the object-by-object dirtiness (in bytes)
-    std::map<mirror::Class*, int /*byte_count*/> dirty_object_size_in_bytes;
-    std::map<mirror::Class*, int /*count*/> clean_object_class_map;
-
-    std::map<mirror::Class*, std::string> class_to_descriptor_map;
 
     std::map<off_t /* field offset */, int /* count */> art_method_field_dirty_count;
     std::vector<ArtMethod*> art_method_dirty_objects;
@@ -392,19 +409,15 @@
 
     // List of local objects that are clean, but located on dirty pages.
     std::vector<mirror::Object*> false_dirty_objects;
-    std::map<mirror::Class*, int /*byte_count*/> false_dirty_byte_count;
-    std::map<mirror::Class*, int /*object_count*/> false_dirty_object_count;
-    std::map<mirror::Class*, std::vector<mirror::Object*>> false_dirty_objects_map;
     size_t false_dirty_object_bytes = 0;
 
-    // Remote pointers to dirty objects
-    std::map<mirror::Class*, std::vector<mirror::Object*>> dirty_objects_by_class;
     // Look up remote classes by their descriptor
     std::map<std::string, mirror::Class*> remote_class_map;
     // Look up local classes by their descriptor
     std::map<std::string, mirror::Class*> local_class_map;
 
-    std::unordered_set<mirror::Object*> dirty_objects;
+    // Use set to have sorted output.
+    std::set<mirror::Object*> dirty_objects;
 
     size_t dirty_object_bytes = 0;
     const uint8_t* begin_image_ptr = image_begin_unaligned;
@@ -453,7 +466,7 @@
         dirty_object_bytes += obj->SizeOf();
         dirty_objects.insert(obj);
 
-        ++dirty_object_class_map[klass];
+        ++class_data[klass].dirty_object_count;
 
         // Go byte-by-byte and figure out what exactly got dirtied
         size_t dirty_byte_count_per_object = 0;
@@ -462,14 +475,14 @@
             dirty_byte_count_per_object++;
           }
         }
-        dirty_object_byte_count[klass] += dirty_byte_count_per_object;
-        dirty_object_size_in_bytes[klass] += obj->SizeOf();
+        class_data[klass].dirty_object_byte_count += dirty_byte_count_per_object;
+        class_data[klass].dirty_object_size_in_bytes += obj->SizeOf();
 
         different_object = true;
 
-        dirty_objects_by_class[klass].push_back(remote_obj);
+        class_data[klass].dirty_objects.push_back(remote_obj);
       } else {
-        ++clean_object_class_map[klass];
+        ++class_data[klass].clean_object_count;
       }
 
       std::string descriptor = GetClassDescriptor(klass);
@@ -503,10 +516,10 @@
         // This object was either never mutated or got mutated back to the same value.
         // TODO: Do I want to distinguish a "different" vs a "dirty" page here?
         false_dirty_objects.push_back(obj);
-        false_dirty_objects_map[klass].push_back(obj);
+        class_data[klass].false_dirty_objects.push_back(obj);
         false_dirty_object_bytes += obj->SizeOf();
-        false_dirty_byte_count[obj->GetClass()] += obj->SizeOf();
-        false_dirty_object_count[obj->GetClass()] += 1;
+        class_data[obj->GetClass()].false_dirty_byte_count += obj->SizeOf();
+        class_data[obj->GetClass()].false_dirty_object_count += 1;
       }
 
       if (strcmp(descriptor.c_str(), "Ljava/lang/Class;") == 0) {
@@ -515,7 +528,7 @@
       }
 
       // Unconditionally store the class descriptor in case we need it later
-      class_to_descriptor_map[klass] = descriptor;
+      class_data[klass].descriptor = descriptor;
       current += RoundUp(obj->SizeOf(), kObjectAlignment);
     }
 
@@ -540,8 +553,10 @@
        << "";
 
     // vector of pairs (int count, Class*)
-    auto dirty_object_class_values = SortByValueDesc(dirty_object_class_map);
-    auto clean_object_class_values = SortByValueDesc(clean_object_class_map);
+    auto dirty_object_class_values = SortByValueDesc<mirror::Class*, int, ClassData>(
+        class_data, [](const ClassData& d) { return d.dirty_object_count; });
+    auto clean_object_class_values = SortByValueDesc<mirror::Class*, int, ClassData>(
+        class_data, [](const ClassData& d) { return d.clean_object_count; });
 
     os << "\n" << "  Dirty objects: " << dirty_objects.size() << "\n";
     for (mirror::Object* obj : dirty_objects) {
@@ -612,10 +627,11 @@
     for (const auto& vk_pair : dirty_object_class_values) {
       int dirty_object_count = vk_pair.first;
       mirror::Class* klass = vk_pair.second;
-      int object_sizes = dirty_object_size_in_bytes[klass];
-      float avg_dirty_bytes_per_class = dirty_object_byte_count[klass] * 1.0f / object_sizes;
+      int object_sizes = class_data[klass].dirty_object_size_in_bytes;
+      float avg_dirty_bytes_per_class =
+          class_data[klass].dirty_object_byte_count * 1.0f / object_sizes;
       float avg_object_size = object_sizes * 1.0f / dirty_object_count;
-      const std::string& descriptor = class_to_descriptor_map[klass];
+      const std::string& descriptor = class_data[klass].descriptor;
       os << "    " << PrettyClass(klass) << " ("
          << "objects: " << dirty_object_count << ", "
          << "avg dirty bytes: " << avg_dirty_bytes_per_class << ", "
@@ -634,7 +650,8 @@
         os << "\n";
 
         os << "      dirty byte +offset:count list = ";
-        auto art_method_field_dirty_count_sorted = SortByValueDesc(art_method_field_dirty_count);
+        auto art_method_field_dirty_count_sorted =
+            SortByValueDesc<off_t, int, int>(art_method_field_dirty_count);
         for (auto pair : art_method_field_dirty_count_sorted) {
           off_t offset = pair.second;
           int count = pair.first;
@@ -645,7 +662,7 @@
         os << "\n";
 
         os << "      field contents:\n";
-        const auto& dirty_objects_list = dirty_objects_by_class[klass];
+        const auto& dirty_objects_list = class_data[klass].dirty_objects;
         for (mirror::Object* obj : dirty_objects_list) {
           // remote method
           auto art_method = reinterpret_cast<ArtMethod*>(obj);
@@ -684,7 +701,8 @@
         os << "\n";
 
         os << "       dirty byte +offset:count list = ";
-        auto class_field_dirty_count_sorted = SortByValueDesc(class_field_dirty_count);
+        auto class_field_dirty_count_sorted =
+            SortByValueDesc<off_t, int, int>(class_field_dirty_count);
         for (auto pair : class_field_dirty_count_sorted) {
           off_t offset = pair.second;
           int count = pair.first;
@@ -694,7 +712,7 @@
         os << "\n";
 
         os << "      field contents:\n";
-        const auto& dirty_objects_list = dirty_objects_by_class[klass];
+        const auto& dirty_objects_list = class_data[klass].dirty_objects;
         for (mirror::Object* obj : dirty_objects_list) {
           // remote class object
           auto remote_klass = reinterpret_cast<mirror::Class*>(obj);
@@ -712,15 +730,16 @@
       }
     }
 
-    auto false_dirty_object_class_values = SortByValueDesc(false_dirty_object_count);
+    auto false_dirty_object_class_values = SortByValueDesc<mirror::Class*, int, ClassData>(
+        class_data, [](const ClassData& d) { return d.false_dirty_object_count; });
 
     os << "\n" << "  False-dirty object count by class:\n";
     for (const auto& vk_pair : false_dirty_object_class_values) {
       int object_count = vk_pair.first;
       mirror::Class* klass = vk_pair.second;
-      int object_sizes = false_dirty_byte_count[klass];
+      int object_sizes = class_data[klass].false_dirty_byte_count;
       float avg_object_size = object_sizes * 1.0f / object_count;
-      const std::string& descriptor = class_to_descriptor_map[klass];
+      const std::string& descriptor = class_data[klass].descriptor;
       os << "    " << PrettyClass(klass) << " ("
          << "objects: " << object_count << ", "
          << "avg object size: " << avg_object_size << ", "
@@ -729,7 +748,7 @@
          << ")\n";
 
       if (strcmp(descriptor.c_str(), "Ljava/lang/reflect/ArtMethod;") == 0) {
-        auto& art_method_false_dirty_objects = false_dirty_objects_map[klass];
+        auto& art_method_false_dirty_objects = class_data[klass].false_dirty_objects;
 
         os << "      field contents:\n";
         for (mirror::Object* obj : art_method_false_dirty_objects) {
@@ -808,14 +827,16 @@
     return std::string(descriptor_str);
   }
 
-  template <typename K, typename V>
-  static std::vector<std::pair<V, K>> SortByValueDesc(const std::map<K, V> map) {
+  template <typename K, typename V, typename D>
+  static std::vector<std::pair<V, K>> SortByValueDesc(
+      const std::map<K, D> map,
+      std::function<V(const D&)> value_mapper = [](const D& d) { return static_cast<V>(d); }) {
     // Store value->key so that we can use the default sort from pair which
     // sorts by value first and then key
     std::vector<std::pair<V, K>> value_key_vector;
 
     for (const auto& kv_pair : map) {
-      value_key_vector.push_back(std::make_pair(kv_pair.second, kv_pair.first));
+      value_key_vector.push_back(std::make_pair(value_mapper(kv_pair.second), kv_pair.first));
     }
 
     // Sort in reverse (descending order)
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index b673eff..3c6a05d 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -1052,22 +1052,6 @@
     }
   }
 
-  void DumpInformationAtOffset(VariableIndentationOutputStream* vios,
-                               const OatFile::OatMethod& oat_method,
-                               const DexFile::CodeItem* code_item,
-                               size_t offset,
-                               bool suspend_point_mapping) {
-    if (!IsMethodGeneratedByOptimizingCompiler(oat_method, code_item)) {
-      // Native method.
-      return;
-    }
-    if (suspend_point_mapping) {
-      ScopedIndentation indent1(vios);
-      DumpDexRegisterMapAtOffset(vios, oat_method, code_item, offset);
-    }
-  }
-
-
   void DumpDexCode(std::ostream& os, const DexFile& dex_file, const DexFile::CodeItem* code_item) {
     if (code_item != nullptr) {
       size_t i = 0;
@@ -1104,27 +1088,6 @@
            code_item != nullptr;
   }
 
-  void DumpDexRegisterMapAtOffset(VariableIndentationOutputStream* vios,
-                                  const OatFile::OatMethod& oat_method,
-                                  const DexFile::CodeItem* code_item,
-                                  size_t offset) {
-    // This method is only relevant for oat methods compiled with the
-    // optimizing compiler.
-    DCHECK(IsMethodGeneratedByOptimizingCompiler(oat_method, code_item));
-
-    // The optimizing compiler outputs its CodeInfo data in the vmap table.
-    const void* raw_code_info = oat_method.GetVmapTable();
-    if (raw_code_info != nullptr) {
-      CodeInfo code_info(raw_code_info);
-      CodeInfoEncoding encoding = code_info.ExtractEncoding();
-      StackMap stack_map = code_info.GetStackMapForNativePcOffset(offset, encoding);
-      if (stack_map.IsValid()) {
-        stack_map.Dump(vios, code_info, encoding, oat_method.GetCodeOffset(),
-                       code_item->registers_size_);
-      }
-    }
-  }
-
   verifier::MethodVerifier* DumpVerifier(VariableIndentationOutputStream* vios,
                                          StackHandleScope<1>* hs,
                                          uint32_t dex_method_idx,
@@ -1147,6 +1110,91 @@
     return nullptr;
   }
 
+  // The StackMapsHelper provides the stack maps in the native PC order.
+  // For identical native PCs, the order from the CodeInfo is preserved.
+  class StackMapsHelper {
+   public:
+    explicit StackMapsHelper(const uint8_t* raw_code_info)
+        : code_info_(raw_code_info),
+          encoding_(code_info_.ExtractEncoding()),
+          number_of_stack_maps_(code_info_.GetNumberOfStackMaps(encoding_)),
+          indexes_(),
+          offset_(static_cast<size_t>(-1)),
+          stack_map_index_(0u) {
+      if (number_of_stack_maps_ != 0u) {
+        // Check if native PCs are ordered.
+        bool ordered = true;
+        StackMap last = code_info_.GetStackMapAt(0u, encoding_);
+        for (size_t i = 1; i != number_of_stack_maps_; ++i) {
+          StackMap current = code_info_.GetStackMapAt(i, encoding_);
+          if (last.GetNativePcOffset(encoding_.stack_map_encoding) >
+              current.GetNativePcOffset(encoding_.stack_map_encoding)) {
+            ordered = false;
+            break;
+          }
+          last = current;
+        }
+        if (!ordered) {
+          // Create indirection indexes for access in native PC order. We do not optimize
+          // for the fact that there can currently be only two separately ordered ranges,
+          // namely normal stack maps and catch-point stack maps.
+          indexes_.resize(number_of_stack_maps_);
+          std::iota(indexes_.begin(), indexes_.end(), 0u);
+          std::sort(indexes_.begin(),
+                    indexes_.end(),
+                    [this](size_t lhs, size_t rhs) {
+                      StackMap left = code_info_.GetStackMapAt(lhs, encoding_);
+                      uint32_t left_pc = left.GetNativePcOffset(encoding_.stack_map_encoding);
+                      StackMap right = code_info_.GetStackMapAt(rhs, encoding_);
+                      uint32_t right_pc = right.GetNativePcOffset(encoding_.stack_map_encoding);
+                      // If the PCs are the same, compare indexes to preserve the original order.
+                      return (left_pc < right_pc) || (left_pc == right_pc && lhs < rhs);
+                    });
+        }
+        offset_ = GetStackMapAt(0).GetNativePcOffset(encoding_.stack_map_encoding);
+      }
+    }
+
+    const CodeInfo& GetCodeInfo() const {
+      return code_info_;
+    }
+
+    const CodeInfoEncoding& GetEncoding() const {
+      return encoding_;
+    }
+
+    size_t GetOffset() const {
+      return offset_;
+    }
+
+    StackMap GetStackMap() const {
+      return GetStackMapAt(stack_map_index_);
+    }
+
+    void Next() {
+      ++stack_map_index_;
+      offset_ = (stack_map_index_ == number_of_stack_maps_)
+          ? static_cast<size_t>(-1)
+          : GetStackMapAt(stack_map_index_).GetNativePcOffset(encoding_.stack_map_encoding);
+    }
+
+   private:
+    StackMap GetStackMapAt(size_t i) const {
+      if (!indexes_.empty()) {
+        i = indexes_[i];
+      }
+      DCHECK_LT(i, number_of_stack_maps_);
+      return code_info_.GetStackMapAt(i, encoding_);
+    }
+
+    const CodeInfo code_info_;
+    const CodeInfoEncoding encoding_;
+    const size_t number_of_stack_maps_;
+    dchecked_vector<size_t> indexes_;  // Used if stack map native PCs are not ordered.
+    size_t offset_;
+    size_t stack_map_index_;
+  };
+
   void DumpCode(VariableIndentationOutputStream* vios,
                 const OatFile::OatMethod& oat_method, const DexFile::CodeItem* code_item,
                 bool bad_input, size_t code_size) {
@@ -1158,17 +1206,34 @@
     if (code_size == 0 || quick_code == nullptr) {
       vios->Stream() << "NO CODE!\n";
       return;
+    } else if (!bad_input && IsMethodGeneratedByOptimizingCompiler(oat_method, code_item)) {
+      // The optimizing compiler outputs its CodeInfo data in the vmap table.
+      StackMapsHelper helper(oat_method.GetVmapTable());
+      const uint8_t* quick_native_pc = reinterpret_cast<const uint8_t*>(quick_code);
+      size_t offset = 0;
+      while (offset < code_size) {
+        offset += disassembler_->Dump(vios->Stream(), quick_native_pc + offset);
+        if (offset == helper.GetOffset()) {
+          ScopedIndentation indent1(vios);
+          StackMap stack_map = helper.GetStackMap();
+          DCHECK(stack_map.IsValid());
+          stack_map.Dump(vios,
+                         helper.GetCodeInfo(),
+                         helper.GetEncoding(),
+                         oat_method.GetCodeOffset(),
+                         code_item->registers_size_);
+          do {
+            helper.Next();
+            // There may be multiple stack maps at a given PC. We display only the first one.
+          } while (offset == helper.GetOffset());
+        }
+        DCHECK_LT(offset, helper.GetOffset());
+      }
     } else {
       const uint8_t* quick_native_pc = reinterpret_cast<const uint8_t*>(quick_code);
       size_t offset = 0;
       while (offset < code_size) {
-        if (!bad_input) {
-          DumpInformationAtOffset(vios, oat_method, code_item, offset, false);
-        }
         offset += disassembler_->Dump(vios->Stream(), quick_native_pc + offset);
-        if (!bad_input) {
-          DumpInformationAtOffset(vios, oat_method, code_item, offset, true);
-        }
       }
     }
   }
diff --git a/runtime/Android.mk b/runtime/Android.mk
index 542a2c4..c859079 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -149,6 +149,7 @@
   native/java_lang_VMClassLoader.cc \
   native/java_lang_ref_FinalizerReference.cc \
   native/java_lang_ref_Reference.cc \
+  native/java_lang_reflect_AbstractMethod.cc \
   native/java_lang_reflect_Array.cc \
   native/java_lang_reflect_Constructor.cc \
   native/java_lang_reflect_Field.cc \
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index 5a901f1..da7db1d 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -1247,7 +1247,7 @@
     ldr r4, [r0, #(2 * __SIZEOF_POINTER__)]!
     b .Limt_table_iterate
 .Limt_table_found:
-    // We successuflly hit an entry in the table. Load the target method
+    // We successfully hit an entry in the table. Load the target method
     // and jump to it.
     ldr r0, [r0, #__SIZEOF_POINTER__]
     ldr pc, [r0, #ART_METHOD_QUICK_CODE_OFFSET_32]
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 8b497fe..506316e 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -1780,7 +1780,7 @@
     ldr x0, [xIP1, #(2 * __SIZEOF_POINTER__)]!
     b .Limt_table_iterate
 .Limt_table_found:
-    // We successuflly hit an entry in the table. Load the target method
+    // We successfully hit an entry in the table. Load the target method
     // and jump to it.
     ldr x0, [xIP1, #__SIZEOF_POINTER__]
     ldr xIP0, [x0, #ART_METHOD_QUICK_CODE_OFFSET_64]
diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S
index fd1851f..8939a48 100644
--- a/runtime/arch/mips/quick_entrypoints_mips.S
+++ b/runtime/arch/mips/quick_entrypoints_mips.S
@@ -319,6 +319,111 @@
 .endm
 
     /*
+     * On stack replacement stub.
+     * On entry:
+     *   a0 = stack to copy
+     *   a1 = size of stack
+     *   a2 = pc to call
+     *   a3 = JValue* result
+     *   [sp + 16] = shorty
+     *   [sp + 20] = thread
+     */
+ENTRY art_quick_osr_stub
+    // Save callee general purpose registers, RA and GP.
+    addiu  $sp, $sp, -48
+    .cfi_adjust_cfa_offset 48
+    sw     $ra, 44($sp)
+    .cfi_rel_offset 31, 44
+    sw     $s8, 40($sp)
+    .cfi_rel_offset 30, 40
+    sw     $gp, 36($sp)
+    .cfi_rel_offset 28, 36
+    sw     $s7, 32($sp)
+    .cfi_rel_offset 23, 32
+    sw     $s6, 28($sp)
+    .cfi_rel_offset 22, 28
+    sw     $s5, 24($sp)
+    .cfi_rel_offset 21, 24
+    sw     $s4, 20($sp)
+    .cfi_rel_offset 20, 20
+    sw     $s3, 16($sp)
+    .cfi_rel_offset 19, 16
+    sw     $s2, 12($sp)
+    .cfi_rel_offset 18, 12
+    sw     $s1, 8($sp)
+    .cfi_rel_offset 17, 8
+    sw     $s0, 4($sp)
+    .cfi_rel_offset 16, 4
+
+    move   $s8, $sp                        # Save the stack pointer
+    move   $s7, $a1                        # Save size of stack
+    move   $s6, $a2                        # Save the pc to call
+    lw     rSELF, 48+20($sp)               # Save managed thread pointer into rSELF
+    addiu  $t0, $sp, -12                   # Reserve space for stack pointer,
+                                           #    JValue* result, and ArtMethod* slot.
+    srl    $t0, $t0, 4                     # Align stack pointer to 16 bytes
+    sll    $sp, $t0, 4                     # Update stack pointer
+    sw     $s8, 4($sp)                     # Save old stack pointer
+    sw     $a3, 8($sp)                     # Save JValue* result
+    sw     $zero, 0($sp)                   # Store null for ArtMethod* at bottom of frame
+    subu   $sp, $a1                        # Reserve space for callee stack
+    move   $a2, $a1
+    move   $a1, $a0
+    move   $a0, $sp
+    la     $t9, memcpy
+    jalr   $t9                             # memcpy (dest a0, src a1, bytes a2)
+    addiu  $sp, $sp, -16                   # make space for argument slots for memcpy
+    bal    .Losr_entry                     # Call the method
+    addiu  $sp, $sp, 16                    # restore stack after memcpy
+    lw     $a2, 8($sp)                     # Restore JValue* result
+    lw     $sp, 4($sp)                     # Restore saved stack pointer
+    lw     $a0, 48+16($sp)                 # load shorty
+    lbu    $a0, 0($a0)                     # load return type
+    li     $a1, 'D'                        # put char 'D' into a1
+    beq    $a0, $a1, .Losr_fp_result       # Test if result type char == 'D'
+    li     $a1, 'F'                        # put char 'F' into a1
+    beq    $a0, $a1, .Losr_fp_result       # Test if result type char == 'F'
+    nop
+    sw     $v0, 0($a2)
+    b      .Losr_exit
+    sw     $v1, 4($a2)                     # store v0/v1 into result
+.Losr_fp_result:
+    SDu    $f0, $f1, 0, $a2, $t0           # store f0/f1 into result
+.Losr_exit:
+    lw     $ra, 44($sp)
+    .cfi_restore 31
+    lw     $s8, 40($sp)
+    .cfi_restore 30
+    lw     $gp, 36($sp)
+    .cfi_restore 28
+    lw     $s7, 32($sp)
+    .cfi_restore 23
+    lw     $s6, 28($sp)
+    .cfi_restore 22
+    lw     $s5, 24($sp)
+    .cfi_restore 21
+    lw     $s4, 20($sp)
+    .cfi_restore 20
+    lw     $s3, 16($sp)
+    .cfi_restore 19
+    lw     $s2, 12($sp)
+    .cfi_restore 18
+    lw     $s1, 8($sp)
+    .cfi_restore 17
+    lw     $s0, 4($sp)
+    .cfi_restore 16
+    jalr   $zero, $ra
+    addiu  $sp, $sp, 48
+    .cfi_adjust_cfa_offset -48
+.Losr_entry:
+    addiu  $s7, $s7, -4
+    addu   $t0, $s7, $sp
+    move   $t9, $s6
+    jalr   $zero, $t9
+    sw     $ra, 0($t0)                     # Store RA per the compiler ABI
+END art_quick_osr_stub
+
+    /*
      * On entry $a0 is uint32_t* gprs_ and $a1 is uint32_t* fprs_
      * FIXME: just guessing about the shape of the jmpbuf.  Where will pc be?
      */
@@ -1540,11 +1645,41 @@
 END art_quick_proxy_invoke_handler
 
     /*
-     * Called to resolve an imt conflict. t0 is a hidden argument that holds the target method's
-     * dex method index.
+     * Called to resolve an imt conflict.
+     * a0 is the conflict ArtMethod.
+     * t0 is a hidden argument that holds the target interface method's dex method index.
+     *
+     * Note that this stub writes to a0, t0 and t1.
      */
 ENTRY art_quick_imt_conflict_trampoline
-    move    $a0, $t0
+    lw      $t1, 0($sp)                                      # Load referrer.
+    lw      $t1, ART_METHOD_DEX_CACHE_METHODS_OFFSET_32($t1) # Load dex cache methods array.
+    sll     $t0, $t0, POINTER_SIZE_SHIFT                     # Calculate offset.
+    addu    $t0, $t1, $t0                                    # Add offset to base.
+    lw      $t0, 0($t0)                                      # Load interface method.
+    lw      $a0, ART_METHOD_JNI_OFFSET_32($a0)               # Load ImtConflictTable.
+
+.Limt_table_iterate:
+    lw      $t1, 0($a0)                                      # Load next entry in ImtConflictTable.
+    # Branch if found.
+    beq     $t1, $t0, .Limt_table_found
+    nop
+    # If the entry is null, the interface method is not in the ImtConflictTable.
+    beqz    $t1, .Lconflict_trampoline
+    nop
+    # Iterate over the entries of the ImtConflictTable.
+    b       .Limt_table_iterate
+    addiu   $a0, $a0, 2 * __SIZEOF_POINTER__                 # Iterate to the next entry.
+
+.Limt_table_found:
+    # We successfully hit an entry in the table. Load the target method and jump to it.
+    lw      $a0, __SIZEOF_POINTER__($a0)
+    lw      $t9, ART_METHOD_QUICK_CODE_OFFSET_32($a0)
+    jr      $t9
+    nop
+
+.Lconflict_trampoline:
+    # Call the runtime stub to populate the ImtConflictTable and jump to the resolved method.
     INVOKE_TRAMPOLINE_BODY artInvokeInterfaceTrampoline
 END art_quick_imt_conflict_trampoline
 
diff --git a/runtime/arch/mips64/quick_entrypoints_mips64.S b/runtime/arch/mips64/quick_entrypoints_mips64.S
index f1e605a..5d0c94c 100644
--- a/runtime/arch/mips64/quick_entrypoints_mips64.S
+++ b/runtime/arch/mips64/quick_entrypoints_mips64.S
@@ -358,6 +358,138 @@
 .endm
 
     /*
+     * On stack replacement stub.
+     * On entry:
+     *   a0 = stack to copy
+     *   a1 = size of stack
+     *   a2 = pc to call
+     *   a3 = JValue* result
+     *   a4 = shorty
+     *   a5 = thread
+     */
+ENTRY art_quick_osr_stub
+    move   $t0, $sp               # save stack pointer
+    daddiu $t1, $sp, -112         # reserve stack space
+    dsrl   $t1, $t1, 4            # enforce 16 byte stack alignment
+    dsll   $sp, $t1, 4            # update stack pointer
+
+    // Save callee general purpose registers, SP, T8(GP), RA, A3, and A4 (8x14 bytes)
+    sd     $ra, 104($sp)
+    .cfi_rel_offset 31, 104
+    sd     $s8, 96($sp)
+    .cfi_rel_offset 30, 96
+    sd     $t0, 88($sp)           # save original stack pointer stored in t0
+    .cfi_rel_offset 29, 88
+    sd     $t8, 80($sp)           # t8 holds caller's gp, now save it to the stack.
+    .cfi_rel_offset 28, 80        # Value from gp is pushed, so set the cfi offset accordingly.
+    sd     $s7, 72($sp)
+    .cfi_rel_offset 23, 72
+    sd     $s6, 64($sp)
+    .cfi_rel_offset 22, 64
+    sd     $s5, 56($sp)
+    .cfi_rel_offset 21, 56
+    sd     $s4, 48($sp)
+    .cfi_rel_offset 20, 48
+    sd     $s3, 40($sp)
+    .cfi_rel_offset 19, 40
+    sd     $s2, 32($sp)
+    .cfi_rel_offset 18, 32
+    sd     $s1, 24($sp)
+    .cfi_rel_offset 17, 24
+    sd     $s0, 16($sp)
+    .cfi_rel_offset 16, 16
+    sd     $a4, 8($sp)
+    .cfi_rel_offset 8, 8
+    sd     $a3, 0($sp)
+    .cfi_rel_offset 7, 0
+    move   rSELF, $a5                      # Save managed thread pointer into rSELF
+
+    daddiu $sp, $sp, -16
+    jal    .Losr_entry
+    sd     $zero, 0($sp)                   # Store null for ArtMethod* at bottom of frame
+    daddiu $sp, $sp, 16
+
+    // Restore return value address and shorty address
+    ld     $a4, 8($sp)                     # shorty address
+    .cfi_restore 8
+    ld     $a3, 0($sp)                     # result value address
+    .cfi_restore 7
+
+    lbu    $t1, 0($a4)                     # load return type
+    li     $t2, 'D'                        # put char 'D' into t2
+    beq    $t1, $t2, .Losr_fp_result       # branch if result type char == 'D'
+    li     $t2, 'F'                        # put char 'F' into t2
+    beq    $t1, $t2, .Losr_fp_result       # branch if result type char == 'F'
+    nop
+    b      .Losr_exit
+    dsrl   $v1, $v0, 32                    # put high half of result in v1
+.Losr_fp_result:
+    mfc1   $v0, $f0
+    mfhc1  $v1, $f0                        # put high half of FP result in v1
+.Losr_exit:
+    sw     $v0, 0($a3)                     # store low half of result
+    sw     $v1, 4($a3)                     # store high half of result
+
+    // Restore callee registers
+    ld     $ra, 104($sp)
+    .cfi_restore 31
+    ld     $s8, 96($sp)
+    .cfi_restore 30
+    ld     $t0, 88($sp)                    # save SP into t0 for now
+    .cfi_restore 29
+    ld     $t8, 80($sp)                    # Restore gp back to it's temp storage.
+    .cfi_restore 28
+    ld     $s7, 72($sp)
+    .cfi_restore 23
+    ld     $s6, 64($sp)
+    .cfi_restore 22
+    ld     $s5, 56($sp)
+    .cfi_restore 21
+    ld     $s4, 48($sp)
+    .cfi_restore 20
+    ld     $s3, 40($sp)
+    .cfi_restore 19
+    ld     $s2, 32($sp)
+    .cfi_restore 18
+    ld     $s1, 24($sp)
+    .cfi_restore 17
+    ld     $s0, 16($sp)
+    .cfi_restore 16
+    jalr   $zero, $ra
+    move   $sp, $t0
+
+.Losr_entry:
+    dsubu  $sp, $sp, $a1                   # Reserve space for callee stack
+    daddiu $a1, $a1, -8
+    daddu  $t0, $a1, $sp
+    sw     $ra, 0($t0)                     # Store low half of RA per compiler ABI
+    dsrl   $t1, $ra, 32
+    sw     $t1, 4($t0)                     # Store high half of RA per compiler ABI
+
+    // Copy arguments into callee stack
+    // Use simple copy routine for now.
+    // 4 bytes per slot.
+    // a0 = source address
+    // a1 = args length in bytes (does not include 8 bytes for RA)
+    // sp = destination address
+    beqz   $a1, .Losr_loop_exit
+    daddiu $a1, $a1, -4
+    daddu  $t1, $a0, $a1
+    daddu  $t2, $sp, $a1
+.Losr_loop_entry:
+    lw     $t0, 0($t1)
+    daddiu $t1, $t1, -4
+    sw     $t0, 0($t2)
+    bne    $sp, $t2, .Losr_loop_entry
+    daddiu $t2, $t2, -4
+
+.Losr_loop_exit:
+    move   $t9, $a2
+    jalr   $zero, $t9                      # Jump to the OSR entry point.
+    nop
+END art_quick_osr_stub
+
+    /*
      * On entry $a0 is uint32_t* gprs_ and $a1 is uint32_t* fprs_
      * FIXME: just guessing about the shape of the jmpbuf.  Where will pc be?
      */
@@ -1534,11 +1666,40 @@
 END art_quick_proxy_invoke_handler
 
     /*
-     * Called to resolve an imt conflict. t0 is a hidden argument that holds the target method's
-     * dex method index.
+     * Called to resolve an imt conflict.
+     * a0 is the conflict ArtMethod.
+     * t0 is a hidden argument that holds the target interface method's dex method index.
+     *
+     * Mote that this stub writes to a0, t0 and t1.
      */
 ENTRY art_quick_imt_conflict_trampoline
-    move    $a0, $t0
+    ld      $t1, 0($sp)                                      # Load referrer.
+    ld      $t1, ART_METHOD_DEX_CACHE_METHODS_OFFSET_64($t1) # Load dex cache methods array.
+    dsll    $t0, $t0, POINTER_SIZE_SHIFT                     # Calculate offset.
+    daddu   $t0, $t1, $t0                                    # Add offset to base.
+    ld      $t0, 0($t0)                                      # Load interface method.
+    ld      $a0, ART_METHOD_JNI_OFFSET_64($a0)               # Load ImtConflictTable.
+
+.Limt_table_iterate:
+    ld      $t1, 0($a0)                                      # Load next entry in ImtConflictTable.
+    # Branch if found.
+    beq     $t1, $t0, .Limt_table_found
+    nop
+    # If the entry is null, the interface method is not in the ImtConflictTable.
+    beqzc   $t1, .Lconflict_trampoline
+    # Iterate over the entries of the ImtConflictTable.
+    daddiu  $a0, $a0, 2 * __SIZEOF_POINTER__                 # Iterate to the next entry.
+    bc       .Limt_table_iterate
+
+.Limt_table_found:
+    # We successfully hit an entry in the table. Load the target method and jump to it.
+    ld      $a0, __SIZEOF_POINTER__($a0)
+    ld      $t9, ART_METHOD_QUICK_CODE_OFFSET_64($a0)
+    jr      $t9
+    .cpreturn                      # Restore gp from t8 in branch delay slot.
+
+.Lconflict_trampoline:
+    # Call the runtime stub to populate the ImtConflictTable and jump to the resolved method.
     INVOKE_TRAMPOLINE_BODY artInvokeInterfaceTrampoline
 END art_quick_imt_conflict_trampoline
 
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 82ac574..551ec68 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -1422,7 +1422,7 @@
 .Limt_table_iterate:
     cmpl %edi, 0(%eax)
     jne .Limt_table_next_entry
-    // We successuflly hit an entry in the table. Load the target method
+    // We successfully hit an entry in the table. Load the target method
     // and jump to it.
     POP EDI
     movl __SIZEOF_POINTER__(%eax), %eax
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 90049cc..26e668e 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -1331,7 +1331,7 @@
 .Limt_table_iterate:
     cmpq %r10, 0(%rdi)
     jne .Limt_table_next_entry
-    // We successuflly hit an entry in the table. Load the target method
+    // We successfully hit an entry in the table. Load the target method
     // and jump to it.
     movq __SIZEOF_POINTER__(%rdi), %rdi
     jmp *ART_METHOD_QUICK_CODE_OFFSET_64(%rdi)
diff --git a/runtime/art_method.h b/runtime/art_method.h
index 3dbcd58..d1ef019 100644
--- a/runtime/art_method.h
+++ b/runtime/art_method.h
@@ -545,6 +545,9 @@
   ALWAYS_INLINE GcRoot<mirror::Class>* GetDexCacheResolvedTypes(size_t pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  // Note, hotness_counter_ updates are non-atomic but it doesn't need to be precise.  Also,
+  // given that the counter is only 16 bits wide we can expect wrap-around in some
+  // situations.  Consumers of hotness_count_ must be able to deal with that.
   uint16_t IncrementCounter() {
     return ++hotness_count_;
   }
@@ -553,6 +556,14 @@
     hotness_count_ = 0;
   }
 
+  void SetCounter(int16_t hotness_count) {
+    hotness_count_ = hotness_count;
+  }
+
+  uint16_t GetCounter() const {
+    return hotness_count_;
+  }
+
   const uint8_t* GetQuickenedInfo() SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Returns the method header for the compiled code containing 'pc'. Note that runtime
@@ -597,7 +608,7 @@
   // ifTable.
   uint16_t method_index_;
 
-  // The hotness we measure for this method. Incremented by the interpreter. Not atomic, as we allow
+  // The hotness we measure for this method. Managed by the interpreter. Not atomic, as we allow
   // missing increments: if the method is hot, we will see it eventually.
   uint16_t hotness_count_;
 
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index 942f9de..d27d2f6 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -20,6 +20,7 @@
 #if defined(__cplusplus)
 #include "art_method.h"
 #include "gc/allocator/rosalloc.h"
+#include "jit/jit_instrumentation.h"
 #include "lock_word.h"
 #include "mirror/class.h"
 #include "mirror/string.h"
@@ -188,7 +189,13 @@
 #define SHADOWFRAME_DEX_PC_OFFSET (SHADOWFRAME_NUMBER_OF_VREGS_OFFSET + 4)
 ADD_TEST_EQ(SHADOWFRAME_DEX_PC_OFFSET,
             static_cast<int32_t>(art::ShadowFrame::DexPCOffset()))
-#define SHADOWFRAME_VREGS_OFFSET (SHADOWFRAME_NUMBER_OF_VREGS_OFFSET + 8)
+#define SHADOWFRAME_CACHED_HOTNESS_COUNTDOWN_OFFSET (SHADOWFRAME_NUMBER_OF_VREGS_OFFSET + 8)
+ADD_TEST_EQ(SHADOWFRAME_CACHED_HOTNESS_COUNTDOWN_OFFSET,
+            static_cast<int32_t>(art::ShadowFrame::CachedHotnessCountdownOffset()))
+#define SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET (SHADOWFRAME_NUMBER_OF_VREGS_OFFSET + 10)
+ADD_TEST_EQ(SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET,
+            static_cast<int32_t>(art::ShadowFrame::HotnessCountdownOffset()))
+#define SHADOWFRAME_VREGS_OFFSET (SHADOWFRAME_NUMBER_OF_VREGS_OFFSET + 12)
 ADD_TEST_EQ(SHADOWFRAME_VREGS_OFFSET,
             static_cast<int32_t>(art::ShadowFrame::VRegsOffset()))
 
@@ -389,6 +396,12 @@
 #define THREAD_CHECKPOINT_REQUEST 2
 ADD_TEST_EQ(THREAD_CHECKPOINT_REQUEST, static_cast<int32_t>(art::kCheckpointRequest))
 
+#define JIT_CHECK_OSR -1
+ADD_TEST_EQ(JIT_CHECK_OSR, static_cast<int32_t>(art::jit::kJitCheckForOSR))
+
+#define JIT_HOTNESS_DISABLE -2
+ADD_TEST_EQ(JIT_HOTNESS_DISABLE, static_cast<int32_t>(art::jit::kJitHotnessDisabled))
+
 #if defined(__cplusplus)
 }  // End of CheckAsmSupportOffsets.
 #endif
diff --git a/runtime/base/hash_set.h b/runtime/base/hash_set.h
index fc1a52f..12d3be7 100644
--- a/runtime/base/hash_set.h
+++ b/runtime/base/hash_set.h
@@ -140,7 +140,7 @@
 
   HashSet() : HashSet(kDefaultMinLoadFactor, kDefaultMaxLoadFactor) {}
 
-  HashSet(double min_load_factor, double max_load_factor)
+  HashSet(double min_load_factor, double max_load_factor) noexcept
       : num_elements_(0u),
         num_buckets_(0u),
         elements_until_expand_(0u),
@@ -152,7 +152,7 @@
     DCHECK_LT(max_load_factor, 1.0);
   }
 
-  explicit HashSet(const allocator_type& alloc)
+  explicit HashSet(const allocator_type& alloc) noexcept
       : allocfn_(alloc),
         hashfn_(),
         emptyfn_(),
@@ -166,7 +166,7 @@
         max_load_factor_(kDefaultMaxLoadFactor) {
   }
 
-  HashSet(const HashSet& other)
+  HashSet(const HashSet& other) noexcept
       : allocfn_(other.allocfn_),
         hashfn_(other.hashfn_),
         emptyfn_(other.emptyfn_),
@@ -184,7 +184,9 @@
     }
   }
 
-  HashSet(HashSet&& other)
+  // noexcept required so that the move constructor is used instead of copy constructor.
+  // b/27860101
+  HashSet(HashSet&& other) noexcept
       : allocfn_(std::move(other.allocfn_)),
         hashfn_(std::move(other.hashfn_)),
         emptyfn_(std::move(other.emptyfn_)),
@@ -206,7 +208,7 @@
   // Construct from existing data.
   // Read from a block of memory, if make_copy_of_data is false, then data_ points to within the
   // passed in ptr_.
-  HashSet(const uint8_t* ptr, bool make_copy_of_data, size_t* read_count) {
+  HashSet(const uint8_t* ptr, bool make_copy_of_data, size_t* read_count) noexcept {
     uint64_t temp;
     size_t offset = 0;
     offset = ReadFromBytes(ptr, offset, &temp);
@@ -256,12 +258,12 @@
     DeallocateStorage();
   }
 
-  HashSet& operator=(HashSet&& other) {
+  HashSet& operator=(HashSet&& other) noexcept {
     HashSet(std::move(other)).swap(*this);
     return *this;
   }
 
-  HashSet& operator=(const HashSet& other) {
+  HashSet& operator=(const HashSet& other) noexcept {
     HashSet(other).swap(*this);  // NOLINT(runtime/explicit) - a case of lint gone mad.
     return *this;
   }
@@ -298,6 +300,11 @@
     return Size() == 0;
   }
 
+  // Return true if the hash set has ownership of the underlying data.
+  bool OwnsData() const {
+    return owns_data_;
+  }
+
   // Erase algorithm:
   // Make an empty slot where the iterator is pointing.
   // Scan forwards until we hit another empty slot.
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index 293451c..17e0339 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -88,6 +88,8 @@
   kOatFileManagerLock,
   kTracingUniqueMethodsLock,
   kTracingStreamingLock,
+  kDeoptimizedMethodsLock,
+  kClassLoaderClassesLock,
   kDefaultMutexLevel,
   kMarkSweepLargeObjectLock,
   kPinTableLock,
@@ -96,7 +98,7 @@
   kAllocatedThreadIdsLock,
   kMonitorPoolLock,
   kMethodVerifiersLock,
-  kClassLinkerClassesLock,
+  kClassLinkerClassesLock,  // TODO rename.
   kBreakpointLock,
   kMonitorLock,
   kMonitorListLock,
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index d73f205..18def2d 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -5876,9 +5876,14 @@
           !target_name_comparator.HasSameNameAndSignature(
               current_method->GetInterfaceMethodIfProxy(image_pointer_size_))) {
         continue;
+      } else if (!current_method->IsPublic()) {
+        // The verifier should have caught the non-public method for dex version 37. Just warn and
+        // skip it since this is from before default-methods so we don't really need to care that it
+        // has code.
+        LOG(WARNING) << "Interface method " << PrettyMethod(current_method) << " is not public! "
+                     << "This will be a fatal error in subsequent versions of android. "
+                     << "Continuing anyway.";
       }
-      // The verifier should have caught the non-public method.
-      DCHECK(current_method->IsPublic()) << "Interface method is not public!";
       if (UNLIKELY(chosen_iface.Get() != nullptr)) {
         // We have multiple default impls of the same method. This is a potential default conflict.
         // We need to check if this possibly conflicting method is either a superclass of the chosen
diff --git a/runtime/class_table-inl.h b/runtime/class_table-inl.h
index e512906..42e320a 100644
--- a/runtime/class_table-inl.h
+++ b/runtime/class_table-inl.h
@@ -23,6 +23,7 @@
 
 template<class Visitor>
 void ClassTable::VisitRoots(Visitor& visitor) {
+  ReaderMutexLock mu(Thread::Current(), lock_);
   for (ClassSet& class_set : classes_) {
     for (GcRoot<mirror::Class>& root : class_set) {
       visitor.VisitRoot(root.AddressWithoutBarrier());
@@ -35,6 +36,7 @@
 
 template<class Visitor>
 void ClassTable::VisitRoots(const Visitor& visitor) {
+  ReaderMutexLock mu(Thread::Current(), lock_);
   for (ClassSet& class_set : classes_) {
     for (GcRoot<mirror::Class>& root : class_set) {
       visitor.VisitRoot(root.AddressWithoutBarrier());
@@ -47,6 +49,7 @@
 
 template <typename Visitor>
 bool ClassTable::Visit(Visitor& visitor) {
+  ReaderMutexLock mu(Thread::Current(), lock_);
   for (ClassSet& class_set : classes_) {
     for (GcRoot<mirror::Class>& root : class_set) {
       if (!visitor(root.Read())) {
diff --git a/runtime/class_table.cc b/runtime/class_table.cc
index d815b1a..8267c68 100644
--- a/runtime/class_table.cc
+++ b/runtime/class_table.cc
@@ -20,17 +20,19 @@
 
 namespace art {
 
-ClassTable::ClassTable() {
+ClassTable::ClassTable() : lock_("Class loader classes", kClassLoaderClassesLock) {
   Runtime* const runtime = Runtime::Current();
   classes_.push_back(ClassSet(runtime->GetHashTableMinLoadFactor(),
                               runtime->GetHashTableMaxLoadFactor()));
 }
 
 void ClassTable::FreezeSnapshot() {
+  WriterMutexLock mu(Thread::Current(), lock_);
   classes_.push_back(ClassSet());
 }
 
 bool ClassTable::Contains(mirror::Class* klass) {
+  ReaderMutexLock mu(Thread::Current(), lock_);
   for (ClassSet& class_set : classes_) {
     auto it = class_set.Find(GcRoot<mirror::Class>(klass));
     if (it != class_set.end()) {
@@ -41,6 +43,7 @@
 }
 
 mirror::Class* ClassTable::LookupByDescriptor(mirror::Class* klass) {
+  ReaderMutexLock mu(Thread::Current(), lock_);
   for (ClassSet& class_set : classes_) {
     auto it = class_set.Find(GcRoot<mirror::Class>(klass));
     if (it != class_set.end()) {
@@ -51,6 +54,7 @@
 }
 
 mirror::Class* ClassTable::UpdateClass(const char* descriptor, mirror::Class* klass, size_t hash) {
+  WriterMutexLock mu(Thread::Current(), lock_);
   // Should only be updating latest table.
   auto existing_it = classes_.back().FindWithHash(descriptor, hash);
   if (kIsDebugBuild && existing_it == classes_.back().end()) {
@@ -74,6 +78,7 @@
 }
 
 size_t ClassTable::NumZygoteClasses() const {
+  ReaderMutexLock mu(Thread::Current(), lock_);
   size_t sum = 0;
   for (size_t i = 0; i < classes_.size() - 1; ++i) {
     sum += classes_[i].Size();
@@ -82,10 +87,12 @@
 }
 
 size_t ClassTable::NumNonZygoteClasses() const {
+  ReaderMutexLock mu(Thread::Current(), lock_);
   return classes_.back().Size();
 }
 
 mirror::Class* ClassTable::Lookup(const char* descriptor, size_t hash) {
+  ReaderMutexLock mu(Thread::Current(), lock_);
   for (ClassSet& class_set : classes_) {
     auto it = class_set.FindWithHash(descriptor, hash);
     if (it != class_set.end()) {
@@ -96,14 +103,17 @@
 }
 
 void ClassTable::Insert(mirror::Class* klass) {
+  WriterMutexLock mu(Thread::Current(), lock_);
   classes_.back().Insert(GcRoot<mirror::Class>(klass));
 }
 
 void ClassTable::InsertWithHash(mirror::Class* klass, size_t hash) {
+  WriterMutexLock mu(Thread::Current(), lock_);
   classes_.back().InsertWithHash(GcRoot<mirror::Class>(klass), hash);
 }
 
 bool ClassTable::Remove(const char* descriptor) {
+  WriterMutexLock mu(Thread::Current(), lock_);
   for (ClassSet& class_set : classes_) {
     auto it = class_set.Find(descriptor);
     if (it != class_set.end()) {
@@ -137,6 +147,7 @@
 }
 
 bool ClassTable::InsertDexFile(mirror::Object* dex_file) {
+  WriterMutexLock mu(Thread::Current(), lock_);
   DCHECK(dex_file != nullptr);
   for (GcRoot<mirror::Object>& root : dex_files_) {
     if (root.Read() == dex_file) {
@@ -148,6 +159,7 @@
 }
 
 size_t ClassTable::WriteToMemory(uint8_t* ptr) const {
+  ReaderMutexLock mu(Thread::Current(), lock_);
   ClassSet combined;
   // Combine all the class sets in case there are multiple, also adjusts load factor back to
   // default in case classes were pruned.
@@ -173,6 +185,7 @@
 }
 
 void ClassTable::AddClassSet(ClassSet&& set) {
+  WriterMutexLock mu(Thread::Current(), lock_);
   classes_.insert(classes_.begin(), std::move(set));
 }
 
diff --git a/runtime/class_table.h b/runtime/class_table.h
index 0e0e860..eb784b5 100644
--- a/runtime/class_table.h
+++ b/runtime/class_table.h
@@ -71,87 +71,96 @@
 
   // Used by image writer for checking.
   bool Contains(mirror::Class* klass)
-      REQUIRES(Locks::classlinker_classes_lock_)
+      REQUIRES(!lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Freeze the current class tables by allocating a new table and never updating or modifying the
   // existing table. This helps prevents dirty pages after caused by inserting after zygote fork.
   void FreezeSnapshot()
-      REQUIRES(Locks::classlinker_classes_lock_)
+      REQUIRES(!lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Returns the number of classes in previous snapshots.
-  size_t NumZygoteClasses() const SHARED_REQUIRES(Locks::classlinker_classes_lock_);
+  size_t NumZygoteClasses() const REQUIRES(!lock_);
 
   // Returns all off the classes in the lastest snapshot.
-  size_t NumNonZygoteClasses() const SHARED_REQUIRES(Locks::classlinker_classes_lock_);
+  size_t NumNonZygoteClasses() const REQUIRES(!lock_);
 
   // Update a class in the table with the new class. Returns the existing class which was replaced.
   mirror::Class* UpdateClass(const char* descriptor, mirror::Class* new_klass, size_t hash)
-      REQUIRES(Locks::classlinker_classes_lock_)
+      REQUIRES(!lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // NO_THREAD_SAFETY_ANALYSIS for object marking requiring heap bitmap lock.
   template<class Visitor>
   void VisitRoots(Visitor& visitor)
       NO_THREAD_SAFETY_ANALYSIS
-      SHARED_REQUIRES(Locks::classlinker_classes_lock_, Locks::mutator_lock_);
+      REQUIRES(!lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
   template<class Visitor>
   void VisitRoots(const Visitor& visitor)
       NO_THREAD_SAFETY_ANALYSIS
-      SHARED_REQUIRES(Locks::classlinker_classes_lock_, Locks::mutator_lock_);
+      REQUIRES(!lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Stops visit if the visitor returns false.
   template <typename Visitor>
   bool Visit(Visitor& visitor)
-      SHARED_REQUIRES(Locks::classlinker_classes_lock_, Locks::mutator_lock_);
+      REQUIRES(!lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Return the first class that matches the descriptor. Returns null if there are none.
   mirror::Class* Lookup(const char* descriptor, size_t hash)
-      SHARED_REQUIRES(Locks::classlinker_classes_lock_, Locks::mutator_lock_);
+      REQUIRES(!lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Return the first class that matches the descriptor of klass. Returns null if there are none.
   mirror::Class* LookupByDescriptor(mirror::Class* klass)
-      SHARED_REQUIRES(Locks::classlinker_classes_lock_, Locks::mutator_lock_);
+      REQUIRES(!lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   void Insert(mirror::Class* klass)
-      REQUIRES(Locks::classlinker_classes_lock_)
+      REQUIRES(!lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
+
   void InsertWithHash(mirror::Class* klass, size_t hash)
-      REQUIRES(Locks::classlinker_classes_lock_)
+      REQUIRES(!lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Returns true if the class was found and removed, false otherwise.
   bool Remove(const char* descriptor)
-      REQUIRES(Locks::classlinker_classes_lock_)
+      REQUIRES(!lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Return true if we inserted the dex file, false if it already exists.
   bool InsertDexFile(mirror::Object* dex_file)
-      REQUIRES(Locks::classlinker_classes_lock_)
+      REQUIRES(!lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Combines all of the tables into one class set.
   size_t WriteToMemory(uint8_t* ptr) const
-      SHARED_REQUIRES(Locks::classlinker_classes_lock_, Locks::mutator_lock_);
+      REQUIRES(!lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Read a table from ptr and put it at the front of the class set.
   size_t ReadFromMemory(uint8_t* ptr)
-      REQUIRES(Locks::classlinker_classes_lock_)
+      REQUIRES(!lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Add a class set to the front of classes.
   void AddClassSet(ClassSet&& set)
-      REQUIRES(Locks::classlinker_classes_lock_)
+      REQUIRES(!lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
  private:
-  // TODO: shard lock to have one per class loader.
+  // Lock to guard inserting and removing.
+  mutable ReaderWriterMutex lock_;
   // We have a vector to help prevent dirty pages after the zygote forks by calling FreezeSnapshot.
-  std::vector<ClassSet> classes_ GUARDED_BY(Locks::classlinker_classes_lock_);
+  std::vector<ClassSet> classes_ GUARDED_BY(lock_);
   // Dex files used by the class loader which may not be owned by the class loader. We keep these
   // live so that we do not have issues closing any of the dex files.
-  std::vector<GcRoot<mirror::Object>> dex_files_ GUARDED_BY(Locks::classlinker_classes_lock_);
+  std::vector<GcRoot<mirror::Object>> dex_files_ GUARDED_BY(lock_);
 };
 
 }  // namespace art
diff --git a/runtime/dex_file.cc b/runtime/dex_file.cc
index e63eaa2..63f3f08 100644
--- a/runtime/dex_file.cc
+++ b/runtime/dex_file.cc
@@ -1351,6 +1351,17 @@
   return ProcessAnnotationSetRefList(method_class, set_ref_list, size);
 }
 
+mirror::ObjectArray<mirror::String>* DexFile::GetSignatureAnnotationForMethod(ArtMethod* method)
+    const {
+  const AnnotationSetItem* annotation_set = FindAnnotationSetForMethod(method);
+  if (annotation_set == nullptr) {
+    return nullptr;
+  }
+  StackHandleScope<1> hs(Thread::Current());
+  Handle<mirror::Class> method_class(hs.NewHandle(method->GetDeclaringClass()));
+  return GetSignatureValue(method_class, annotation_set);
+}
+
 bool DexFile::IsMethodAnnotationPresent(ArtMethod* method, Handle<mirror::Class> annotation_class)
     const {
   const AnnotationSetItem* annotation_set = FindAnnotationSetForMethod(method);
@@ -1548,6 +1559,15 @@
   return true;
 }
 
+mirror::ObjectArray<mirror::String>* DexFile::GetSignatureAnnotationForClass(
+    Handle<mirror::Class> klass) const {
+  const AnnotationSetItem* annotation_set = FindAnnotationSetForClass(klass);
+  if (annotation_set == nullptr) {
+    return nullptr;
+  }
+  return GetSignatureValue(klass, annotation_set);
+}
+
 bool DexFile::IsClassAnnotationPresent(Handle<mirror::Class> klass,
                                        Handle<mirror::Class> annotation_class) const {
   const AnnotationSetItem* annotation_set = FindAnnotationSetForClass(klass);
diff --git a/runtime/dex_file.h b/runtime/dex_file.h
index 1456636..3a28422 100644
--- a/runtime/dex_file.h
+++ b/runtime/dex_file.h
@@ -991,6 +991,8 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
   mirror::ObjectArray<mirror::Object>* GetParameterAnnotations(ArtMethod* method) const
       SHARED_REQUIRES(Locks::mutator_lock_);
+  mirror::ObjectArray<mirror::String>* GetSignatureAnnotationForMethod(ArtMethod* method) const
+      SHARED_REQUIRES(Locks::mutator_lock_);
   bool IsMethodAnnotationPresent(ArtMethod* method, Handle<mirror::Class> annotation_class) const
       SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -1013,6 +1015,8 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
   bool GetInnerClassFlags(Handle<mirror::Class> klass, uint32_t* flags) const
       SHARED_REQUIRES(Locks::mutator_lock_);
+  mirror::ObjectArray<mirror::String>* GetSignatureAnnotationForClass(Handle<mirror::Class> klass)
+      const SHARED_REQUIRES(Locks::mutator_lock_);
   bool IsClassAnnotationPresent(Handle<mirror::Class> klass, Handle<mirror::Class> annotation_class)
       const SHARED_REQUIRES(Locks::mutator_lock_);
 
diff --git a/runtime/dex_file_verifier.cc b/runtime/dex_file_verifier.cc
index 811e763..681c5f9 100644
--- a/runtime/dex_file_verifier.cc
+++ b/runtime/dex_file_verifier.cc
@@ -2626,6 +2626,23 @@
   // From here on out it is easier to mask out the bits we're supposed to ignore.
   method_access_flags &= kMethodAccessFlags;
 
+  // Interfaces are special.
+  if ((class_access_flags & kAccInterface) != 0) {
+    // Non-static interface methods must be public.
+    if ((method_access_flags & (kAccPublic | kAccStatic)) == 0) {
+      *error_msg = StringPrintf("Interface virtual method %" PRIu32 "(%s) is not public",
+          method_index,
+          GetMethodDescriptionOrError(begin_, header_, method_index).c_str());
+      if (header_->GetVersion() >= 37) {
+        return false;
+      } else {
+        // Allow in older versions, but warn.
+        LOG(WARNING) << "This dex file is invalid and will be rejected in the future. Error is: "
+                      << *error_msg;
+      }
+    }
+  }
+
   // If there aren't any instructions, make sure that's expected.
   if (!has_code) {
     // Only native or abstract methods may not have code.
@@ -2664,7 +2681,7 @@
     }
     // Interfaces are special.
     if ((class_access_flags & kAccInterface) != 0) {
-      // Interface methods must be public and abstract.
+      // Interface methods without code must be abstract.
       if ((method_access_flags & (kAccPublic | kAccAbstract)) != (kAccPublic | kAccAbstract)) {
         *error_msg = StringPrintf("Interface method %" PRIu32 "(%s) is not public and abstract",
             method_index,
diff --git a/runtime/dex_file_verifier_test.cc b/runtime/dex_file_verifier_test.cc
index 4a5ed5d..344d186 100644
--- a/runtime/dex_file_verifier_test.cc
+++ b/runtime/dex_file_verifier_test.cc
@@ -767,7 +767,7 @@
 
         ApplyMaskToMethodFlags(dex_file, "foo", ~kAccPublic);
       },
-      "Interface method 1(LInterfaceMethodFlags;.foo) is not public and abstract");
+      "Interface virtual method 1(LInterfaceMethodFlags;.foo) is not public");
 
   VerifyModification(
       kMethodFlagsInterface,
@@ -817,7 +817,7 @@
 
         ApplyMaskToMethodFlags(dex_file, "foo", ~kAccPublic);
       },
-      "Interface method 1(LInterfaceMethodFlags;.foo) is not public and abstract");
+      "Interface virtual method 1(LInterfaceMethodFlags;.foo) is not public");
 
   VerifyModification(
       kMethodFlagsInterface,
@@ -839,7 +839,7 @@
         ApplyMaskToMethodFlags(dex_file, "foo", ~kAccPublic);
         OrMaskToMethodFlags(dex_file, "foo", kAccProtected);
       },
-      "Interface method 1(LInterfaceMethodFlags;.foo) is not public and abstract");
+      "Interface virtual method 1(LInterfaceMethodFlags;.foo) is not public");
 
   constexpr uint32_t kAllMethodFlags =
       kAccPublic |
diff --git a/runtime/gc/accounting/mod_union_table.cc b/runtime/gc/accounting/mod_union_table.cc
index d16afd9..4e40aea 100644
--- a/runtime/gc/accounting/mod_union_table.cc
+++ b/runtime/gc/accounting/mod_union_table.cc
@@ -210,7 +210,11 @@
     if (mod_union_table_->ShouldAddReference(root->AsMirrorPtr())) {
       *has_target_reference_ = true;
       // TODO: Add MarkCompressedReference callback here.
-      root->Assign(visitor_->MarkObject(root->AsMirrorPtr()));
+      mirror::Object* old_ref = root->AsMirrorPtr();
+      mirror::Object* new_ref = visitor_->MarkObject(old_ref);
+      if (old_ref != new_ref) {
+        root->Assign(new_ref);
+      }
     }
   }
 
diff --git a/runtime/gc/collector/mark_sweep.h b/runtime/gc/collector/mark_sweep.h
index b61bef7..c19107a 100644
--- a/runtime/gc/collector/mark_sweep.h
+++ b/runtime/gc/collector/mark_sweep.h
@@ -231,7 +231,7 @@
  protected:
   // Returns object if the object is marked in the heap bitmap, otherwise null.
   virtual mirror::Object* IsMarked(mirror::Object* object) OVERRIDE
-      SHARED_REQUIRES(Locks::heap_bitmap_lock_);
+      SHARED_REQUIRES(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
 
   void MarkObjectNonNull(mirror::Object* obj,
                          mirror::Object* holder = nullptr,
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 01db90a..c2f772f 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -1304,6 +1304,13 @@
 }
 
 void Heap::ThrowOutOfMemoryError(Thread* self, size_t byte_count, AllocatorType allocator_type) {
+  // If we're in a stack overflow, do not create a new exception. It would require running the
+  // constructor, which will of course still be in a stack overflow.
+  if (self->IsHandlingStackOverflow()) {
+    self->SetException(Runtime::Current()->GetPreAllocatedOutOfMemoryError());
+    return;
+  }
+
   std::ostringstream oss;
   size_t total_bytes_free = GetFreeMemory();
   oss << "Failed to allocate a " << byte_count << " byte allocation with " << total_bytes_free
diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc
index 22bf5f9..7a2b8d7 100644
--- a/runtime/gc/space/image_space.cc
+++ b/runtime/gc/space/image_space.cc
@@ -407,7 +407,7 @@
                                     &is_global_cache);
   }
 
-  if (Runtime::Current()->IsZygote() && !secondary_image) {
+  if (is_zygote && !secondary_image) {
     MarkZygoteStart(image_isa, Runtime::Current()->GetZygoteMaxFailedBoots());
   }
 
@@ -444,7 +444,7 @@
             // Whether we can write to the cache.
             success = false;
           } else if (secondary_image) {
-            if (Runtime::Current()->IsZygote()) {
+            if (is_zygote) {
               // Secondary image is out of date. Clear cache and exit to let it retry from scratch.
               LOG(ERROR) << "Cannot patch secondary image '" << image_location
                          << "', clearing dalvik_cache and restarting zygote.";
@@ -503,7 +503,16 @@
       // descriptor (and the associated exclusive lock) to be released when
       // we leave Create.
       ScopedFlock image_lock;
-      image_lock.Init(image_filename->c_str(), error_msg);
+      // Should this be a RDWR lock? This is only a defensive measure, as at
+      // this point the image should exist.
+      // However, only the zygote can write into the global dalvik-cache, so
+      // restrict to zygote processes, or any process that isn't using
+      // /data/dalvik-cache (which we assume to be allowed to write there).
+      const bool rw_lock = is_zygote || !is_global_cache;
+      image_lock.Init(image_filename->c_str(),
+                      rw_lock ? (O_CREAT | O_RDWR) : O_RDONLY /* flags */,
+                      true /* block */,
+                      error_msg);
       VLOG(startup) << "Using image file " << image_filename->c_str() << " for image location "
                     << image_location;
       // If we are in /system we can assume the image is good. We can also
@@ -694,6 +703,11 @@
     return src;
   }
 
+  // Must be called on pointers that already have been relocated to the destination relocation.
+  ALWAYS_INLINE bool IsInAppImage(mirror::Object* object) const {
+    return app_image_.InDest(reinterpret_cast<uintptr_t>(object));
+  }
+
  protected:
   // Source section.
   const RelocationRange boot_image_;
@@ -708,36 +722,12 @@
   template<typename... Args>
   explicit FixupObjectAdapter(Args... args) : FixupVisitor(args...) {}
 
-  // Must be called on pointers that already have been relocated to the destination relocation.
-  ALWAYS_INLINE bool IsInAppImage(mirror::Object* object) const {
-    return app_image_.InDest(reinterpret_cast<uintptr_t>(object));
-  }
-
   template <typename T>
   T* operator()(T* obj) const {
     return ForwardObject(obj);
   }
 };
 
-class FixupClassVisitor : public FixupVisitor {
- public:
-  template<typename... Args>
-  explicit FixupClassVisitor(Args... args) : FixupVisitor(args...) {}
-
-  // The image space is contained so the GC doesn't need to know about it. Avoid requiring mutator
-  // lock to prevent possible pauses.
-  ALWAYS_INLINE void operator()(mirror::Object* obj) const NO_THREAD_SAFETY_ANALYSIS {
-    mirror::Class* klass = obj->GetClass<kVerifyNone, kWithoutReadBarrier>();
-    DCHECK(klass != nullptr) << "Null class in image";
-    // No AsClass since our fields aren't quite fixed up yet.
-    mirror::Class* new_klass = down_cast<mirror::Class*>(ForwardObject(klass));
-    // Keep clean if possible.
-    if (klass != new_klass) {
-      obj->SetClass<kVerifyNone>(new_klass);
-    }
-  }
-};
-
 class FixupRootVisitor : public FixupVisitor {
  public:
   template<typename... Args>
@@ -763,12 +753,12 @@
 class FixupObjectVisitor : public FixupVisitor {
  public:
   template<typename... Args>
-  explicit FixupObjectVisitor(gc::accounting::ContinuousSpaceBitmap* pointer_array_visited,
+  explicit FixupObjectVisitor(gc::accounting::ContinuousSpaceBitmap* visited,
                               const size_t pointer_size,
                               Args... args)
       : FixupVisitor(args...),
         pointer_size_(pointer_size),
-        pointer_array_visited_(pointer_array_visited) {}
+        visited_(visited) {}
 
   // Fix up separately since we also need to fix up method entrypoints.
   ALWAYS_INLINE void VisitRootIfNonNull(
@@ -796,13 +786,20 @@
   // Visit a pointer array and forward corresponding native data. Ignores pointer arrays in the
   // boot image. Uses the bitmap to ensure the same array is not visited multiple times.
   template <typename Visitor>
-  void VisitPointerArray(mirror::PointerArray* array, const Visitor& visitor) const
+  void UpdatePointerArrayContents(mirror::PointerArray* array, const Visitor& visitor) const
       NO_THREAD_SAFETY_ANALYSIS {
-    if (array != nullptr &&
-        visitor.IsInAppImage(array) &&
-        !pointer_array_visited_->Test(array)) {
+    DCHECK(array != nullptr);
+    DCHECK(visitor.IsInAppImage(array));
+    // The bit for the array contents is different than the bit for the array. Since we may have
+    // already visited the array as a long / int array from walking the bitmap without knowing it
+    // was a pointer array.
+    static_assert(kObjectAlignment == 8u, "array bit may be in another object");
+    mirror::Object* const contents_bit = reinterpret_cast<mirror::Object*>(
+        reinterpret_cast<uintptr_t>(array) + kObjectAlignment);
+    // If the bit is not set then the contents have not yet been updated.
+    if (!visited_->Test(contents_bit)) {
       array->Fixup<kVerifyNone, kWithoutReadBarrier>(array, pointer_size_, visitor);
-      pointer_array_visited_->Set(array);
+      visited_->Set(contents_bit);
     }
   }
 
@@ -816,25 +813,60 @@
   }
 
   ALWAYS_INLINE void operator()(mirror::Object* obj) const NO_THREAD_SAFETY_ANALYSIS {
+    if (visited_->Test(obj)) {
+      // Already visited.
+      return;
+    }
+    visited_->Set(obj);
+
+    // Handle class specially first since we need it to be updated to properly visit the rest of
+    // the instance fields.
+    {
+      mirror::Class* klass = obj->GetClass<kVerifyNone, kWithoutReadBarrier>();
+      DCHECK(klass != nullptr) << "Null class in image";
+      // No AsClass since our fields aren't quite fixed up yet.
+      mirror::Class* new_klass = down_cast<mirror::Class*>(ForwardObject(klass));
+      if (klass != new_klass) {
+        obj->SetClass<kVerifyNone>(new_klass);
+      }
+      if (new_klass != klass && IsInAppImage(new_klass)) {
+        // Make sure the klass contents are fixed up since we depend on it to walk the fields.
+        operator()(new_klass);
+      }
+    }
+
     obj->VisitReferences</*visit native roots*/false, kVerifyNone, kWithoutReadBarrier>(
         *this,
         *this);
+    // Note that this code relies on no circular dependencies.
     // We want to use our own class loader and not the one in the image.
     if (obj->IsClass<kVerifyNone, kWithoutReadBarrier>()) {
-      mirror::Class* klass = obj->AsClass<kVerifyNone, kWithoutReadBarrier>();
+      mirror::Class* as_klass = obj->AsClass<kVerifyNone, kWithoutReadBarrier>();
       FixupObjectAdapter visitor(boot_image_, boot_oat_, app_image_, app_oat_);
-      klass->FixupNativePointers<kVerifyNone, kWithoutReadBarrier>(klass, pointer_size_, visitor);
+      as_klass->FixupNativePointers<kVerifyNone, kWithoutReadBarrier>(as_klass,
+                                                                      pointer_size_,
+                                                                      visitor);
       // Deal with the pointer arrays. Use the helper function since multiple classes can reference
       // the same arrays.
-      VisitPointerArray(klass->GetVTable<kVerifyNone, kWithoutReadBarrier>(), visitor);
-      mirror::IfTable* iftable = klass->GetIfTable<kVerifyNone, kWithoutReadBarrier>();
-      if (iftable != nullptr) {
+      mirror::PointerArray* const vtable = as_klass->GetVTable<kVerifyNone, kWithoutReadBarrier>();
+      if (vtable != nullptr && IsInAppImage(vtable)) {
+        operator()(vtable);
+        UpdatePointerArrayContents(vtable, visitor);
+      }
+      mirror::IfTable* iftable = as_klass->GetIfTable<kVerifyNone, kWithoutReadBarrier>();
+      // Ensure iftable arrays are fixed up since we need GetMethodArray to return the valid
+      // contents.
+      if (iftable != nullptr && IsInAppImage(iftable)) {
+        operator()(iftable);
         for (int32_t i = 0, count = iftable->Count(); i < count; ++i) {
           if (iftable->GetMethodArrayCount<kVerifyNone, kWithoutReadBarrier>(i) > 0) {
             mirror::PointerArray* methods =
                 iftable->GetMethodArray<kVerifyNone, kWithoutReadBarrier>(i);
-            DCHECK(methods != nullptr);
-            VisitPointerArray(methods, visitor);
+            if (visitor.IsInAppImage(methods)) {
+              operator()(methods);
+              DCHECK(methods != nullptr);
+              UpdatePointerArrayContents(methods, visitor);
+            }
           }
         }
       }
@@ -843,7 +875,7 @@
 
  private:
   const size_t pointer_size_;
-  gc::accounting::ContinuousSpaceBitmap* const pointer_array_visited_;
+  gc::accounting::ContinuousSpaceBitmap* const visited_;
 };
 
 class ForwardObjectAdapter {
@@ -929,9 +961,14 @@
   const size_t pointer_size = image_header.GetPointerSize();
   gc::Heap* const heap = Runtime::Current()->GetHeap();
   heap->GetBootImagesSize(&boot_image_begin, &boot_image_end, &boot_oat_begin, &boot_oat_end);
-  CHECK_NE(boot_image_begin, boot_image_end)
-      << "Can not relocate app image without boot image space";
-  CHECK_NE(boot_oat_begin, boot_oat_end) << "Can not relocate app image without boot oat file";
+  if (boot_image_begin == boot_image_end) {
+    *error_msg = "Can not relocate app image without boot image space";
+    return false;
+  }
+  if (boot_oat_begin == boot_oat_end) {
+    *error_msg = "Can not relocate app image without boot oat file";
+    return false;
+  }
   const uint32_t boot_image_size = boot_image_end - boot_image_begin;
   const uint32_t boot_oat_size = boot_oat_end - boot_oat_begin;
   const uint32_t image_header_boot_image_size = image_header.GetBootImageSize();
@@ -985,7 +1022,7 @@
     // Two pass approach, fix up all classes first, then fix up non class-objects.
     // The visited bitmap is used to ensure that pointer arrays are not forwarded twice.
     std::unique_ptr<gc::accounting::ContinuousSpaceBitmap> visited_bitmap(
-        gc::accounting::ContinuousSpaceBitmap::Create("Pointer array bitmap",
+        gc::accounting::ContinuousSpaceBitmap::Create("Relocate bitmap",
                                                       target_base,
                                                       image_header.GetImageSize()));
     FixupObjectVisitor fixup_object_visitor(visited_bitmap.get(),
@@ -995,10 +1032,6 @@
                                             app_image,
                                             app_oat);
     TimingLogger::ScopedTiming timing("Fixup classes", &logger);
-    // Fixup class only touches app image classes, don't need the mutator lock since the space is
-    // not yet visible to the GC.
-    FixupClassVisitor fixup_class_visitor(boot_image, boot_oat, app_image, app_oat);
-    bitmap->VisitMarkedRange(objects_begin, objects_end, fixup_class_visitor);
     // Fixup objects may read fields in the boot image, use the mutator lock here for sanity. Though
     // its probably not required.
     ScopedObjectAccess soa(Thread::Current());
diff --git a/runtime/gc/space/image_space_fs.h b/runtime/gc/space/image_space_fs.h
index 5237466..eac52f7 100644
--- a/runtime/gc/space/image_space_fs.h
+++ b/runtime/gc/space/image_space_fs.h
@@ -62,7 +62,7 @@
         if (recurse) {
           DeleteDirectoryContents(file, recurse);
           // Try to rmdir the directory.
-          if (TEMP_FAILURE_RETRY(rmdir(file.c_str())) != 0) {
+          if (rmdir(file.c_str()) != 0) {
             PLOG(ERROR) << "Unable to rmdir " << file;
           }
         }
@@ -71,12 +71,12 @@
       }
     } else {
       // Try to unlink the file.
-      if (TEMP_FAILURE_RETRY(unlink(file.c_str())) != 0) {
+      if (unlink(file.c_str()) != 0) {
         PLOG(ERROR) << "Unable to unlink " << file;
       }
     }
   }
-  CHECK_EQ(0, TEMP_FAILURE_RETRY(closedir(c_dir))) << "Unable to close directory.";
+  CHECK_EQ(0, closedir(c_dir)) << "Unable to close directory.";
 }
 
 static bool HasContent(const char* dir) {
@@ -95,10 +95,10 @@
       continue;
     }
     // Something here.
-    CHECK_EQ(0, TEMP_FAILURE_RETRY(closedir(c_dir))) << "Unable to close directory.";
+    CHECK_EQ(0, closedir(c_dir)) << "Unable to close directory.";
     return true;
   }
-  CHECK_EQ(0, TEMP_FAILURE_RETRY(closedir(c_dir))) << "Unable to close directory.";
+  CHECK_EQ(0, closedir(c_dir)) << "Unable to close directory.";
   return false;
 }
 
@@ -115,7 +115,7 @@
     }
   }
   if (OS::DirectoryExists(dir.c_str())) {
-    if (TEMP_FAILURE_RETRY(rmdir(dir.c_str())) != 0) {
+    if (rmdir(dir.c_str()) != 0) {
       PLOG(ERROR) << "Unable to rmdir " << dir;
       return;
     }
@@ -136,7 +136,7 @@
     return;
   }
 
-  if (TEMP_FAILURE_RETRY(rename(src, trg)) != 0) {
+  if (rename(src, trg) != 0) {
     PLOG(ERROR) << "Could not rename OTA cache " << src << " to target " << trg;
   }
 }
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index a0c6bfb..34bc458 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -80,7 +80,7 @@
       have_exception_caught_listeners_(false),
       have_branch_listeners_(false),
       have_invoke_virtual_or_interface_listeners_(false),
-      deoptimized_methods_lock_("deoptimized methods lock"),
+      deoptimized_methods_lock_("deoptimized methods lock", kDeoptimizedMethodsLock),
       deoptimization_enabled_(false),
       interpreter_handler_table_(kMainHandlerTable),
       quick_alloc_entry_points_instrumentation_counter_(0),
diff --git a/runtime/instrumentation.h b/runtime/instrumentation.h
index d07f47b..a4c3d41 100644
--- a/runtime/instrumentation.h
+++ b/runtime/instrumentation.h
@@ -303,7 +303,8 @@
   bool NonJitProfilingActive() const SHARED_REQUIRES(Locks::mutator_lock_) {
     return have_dex_pc_listeners_ || have_method_exit_listeners_ ||
         have_field_read_listeners_ || have_field_write_listeners_ ||
-        have_exception_caught_listeners_ || have_method_unwind_listeners_;
+        have_exception_caught_listeners_ || have_method_unwind_listeners_ ||
+        have_branch_listeners_;
   }
 
   // Inform listeners that a method has been entered. A dex PC is provided as we may install
diff --git a/runtime/intern_table.cc b/runtime/intern_table.cc
index 79f24a8..eceb593 100644
--- a/runtime/intern_table.cc
+++ b/runtime/intern_table.cc
@@ -393,6 +393,10 @@
 size_t InternTable::Table::AddTableFromMemory(const uint8_t* ptr) {
   size_t read_count = 0;
   UnorderedSet set(ptr, /*make copy*/false, &read_count);
+  if (set.Empty()) {
+    // Avoid inserting empty sets.
+    return read_count;
+  }
   // TODO: Disable this for app images if app images have intern tables.
   static constexpr bool kCheckDuplicates = true;
   if (kCheckDuplicates) {
@@ -400,7 +404,7 @@
       CHECK(Find(string.Read()) == nullptr) << "Already found " << string.Read()->ToModifiedUtf8();
     }
   }
-  // Insert at the front since we insert into the back.
+  // Insert at the front since we add new interns into the back.
   tables_.insert(tables_.begin(), std::move(set));
   return read_count;
 }
diff --git a/runtime/interpreter/interpreter_goto_table_impl.cc b/runtime/interpreter/interpreter_goto_table_impl.cc
index 12d6fdc..ce698fb 100644
--- a/runtime/interpreter/interpreter_goto_table_impl.cc
+++ b/runtime/interpreter/interpreter_goto_table_impl.cc
@@ -22,6 +22,7 @@
 #include "experimental_flags.h"
 #include "interpreter_common.h"
 #include "jit/jit.h"
+#include "jit/jit_instrumentation.h"
 #include "safe_math.h"
 
 #include <memory>  // std::unique_ptr
@@ -64,15 +65,20 @@
   currentHandlersTable = handlersTable[ \
       Runtime::Current()->GetInstrumentation()->GetInterpreterHandlerTable()]
 
-#define BRANCH_INSTRUMENTATION(offset)                                                            \
-  do {                                                                                            \
-    ArtMethod* method = shadow_frame.GetMethod();                                                 \
-    instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation(); \
-    instrumentation->Branch(self, method, dex_pc, offset);                                        \
-    JValue result;                                                                                \
-    if (jit::Jit::MaybeDoOnStackReplacement(self, method, dex_pc, offset, &result)) {             \
-      return result;                                                                              \
-    }                                                                                             \
+#define BRANCH_INSTRUMENTATION(offset)                                                          \
+  do {                                                                                          \
+    instrumentation->Branch(self, method, dex_pc, offset);                                      \
+    JValue result;                                                                              \
+    if (jit::Jit::MaybeDoOnStackReplacement(self, method, dex_pc, offset, &result)) {           \
+      return result;                                                                            \
+    }                                                                                           \
+  } while (false)
+
+#define HOTNESS_UPDATE()                                                                       \
+  do {                                                                                         \
+    if (jit_instrumentation_cache != nullptr) {                                                \
+      jit_instrumentation_cache->AddSamples(self, method, 1);                                  \
+    }                                                                                          \
   } while (false)
 
 #define UNREACHABLE_CODE_CHECK()                \
@@ -186,6 +192,13 @@
   UPDATE_HANDLER_TABLE();
   std::unique_ptr<lambda::ClosureBuilder> lambda_closure_builder;
   size_t lambda_captured_variable_index = 0;
+  const auto* const instrumentation = Runtime::Current()->GetInstrumentation();
+  ArtMethod* method = shadow_frame.GetMethod();
+  jit::Jit* jit = Runtime::Current()->GetJit();
+  jit::JitInstrumentationCache* jit_instrumentation_cache = nullptr;
+  if (jit != nullptr) {
+    jit_instrumentation_cache = jit->GetInstrumentationCache();
+  }
 
   // Jump to first instruction.
   ADVANCE(0);
@@ -277,7 +290,6 @@
     JValue result;
     self->AllowThreadSuspension();
     HANDLE_MONITOR_CHECKS();
-    instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
     if (UNLIKELY(instrumentation->HasMethodExitListeners())) {
       instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                        shadow_frame.GetMethod(), dex_pc,
@@ -292,7 +304,6 @@
     JValue result;
     self->AllowThreadSuspension();
     HANDLE_MONITOR_CHECKS();
-    instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
     if (UNLIKELY(instrumentation->HasMethodExitListeners())) {
       instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                        shadow_frame.GetMethod(), dex_pc,
@@ -308,7 +319,6 @@
     result.SetI(shadow_frame.GetVReg(inst->VRegA_11x(inst_data)));
     self->AllowThreadSuspension();
     HANDLE_MONITOR_CHECKS();
-    instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
     if (UNLIKELY(instrumentation->HasMethodExitListeners())) {
       instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                        shadow_frame.GetMethod(), dex_pc,
@@ -323,7 +333,6 @@
     result.SetJ(shadow_frame.GetVRegLong(inst->VRegA_11x(inst_data)));
     self->AllowThreadSuspension();
     HANDLE_MONITOR_CHECKS();
-    instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
     if (UNLIKELY(instrumentation->HasMethodExitListeners())) {
       instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                        shadow_frame.GetMethod(), dex_pc,
@@ -359,7 +368,6 @@
       }
     }
     result.SetL(obj_result);
-    instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
     if (UNLIKELY(instrumentation->HasMethodExitListeners())) {
       instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                        shadow_frame.GetMethod(), dex_pc,
@@ -630,6 +638,7 @@
     int8_t offset = inst->VRegA_10t(inst_data);
     BRANCH_INSTRUMENTATION(offset);
     if (IsBackwardBranch(offset)) {
+      HOTNESS_UPDATE();
       if (UNLIKELY(self->TestAllFlags())) {
         self->CheckSuspend();
         UPDATE_HANDLER_TABLE();
@@ -643,6 +652,7 @@
     int16_t offset = inst->VRegA_20t();
     BRANCH_INSTRUMENTATION(offset);
     if (IsBackwardBranch(offset)) {
+      HOTNESS_UPDATE();
       if (UNLIKELY(self->TestAllFlags())) {
         self->CheckSuspend();
         UPDATE_HANDLER_TABLE();
@@ -656,6 +666,7 @@
     int32_t offset = inst->VRegA_30t();
     BRANCH_INSTRUMENTATION(offset);
     if (IsBackwardBranch(offset)) {
+      HOTNESS_UPDATE();
       if (UNLIKELY(self->TestAllFlags())) {
         self->CheckSuspend();
         UPDATE_HANDLER_TABLE();
@@ -669,6 +680,7 @@
     int32_t offset = DoPackedSwitch(inst, shadow_frame, inst_data);
     BRANCH_INSTRUMENTATION(offset);
     if (IsBackwardBranch(offset)) {
+      HOTNESS_UPDATE();
       if (UNLIKELY(self->TestAllFlags())) {
         self->CheckSuspend();
         UPDATE_HANDLER_TABLE();
@@ -682,6 +694,7 @@
     int32_t offset = DoSparseSwitch(inst, shadow_frame, inst_data);
     BRANCH_INSTRUMENTATION(offset);
     if (IsBackwardBranch(offset)) {
+      HOTNESS_UPDATE();
       if (UNLIKELY(self->TestAllFlags())) {
         self->CheckSuspend();
         UPDATE_HANDLER_TABLE();
@@ -785,6 +798,7 @@
       int16_t offset = inst->VRegC_22t();
       BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
+        HOTNESS_UPDATE();
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -804,6 +818,7 @@
       int16_t offset = inst->VRegC_22t();
       BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
+        HOTNESS_UPDATE();
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -823,6 +838,7 @@
       int16_t offset = inst->VRegC_22t();
       BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
+        HOTNESS_UPDATE();
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -842,6 +858,7 @@
       int16_t offset = inst->VRegC_22t();
       BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
+        HOTNESS_UPDATE();
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -861,6 +878,7 @@
       int16_t offset = inst->VRegC_22t();
       BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
+        HOTNESS_UPDATE();
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -880,6 +898,7 @@
       int16_t offset = inst->VRegC_22t();
       BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
+        HOTNESS_UPDATE();
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -898,6 +917,7 @@
       int16_t offset = inst->VRegB_21t();
       BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
+        HOTNESS_UPDATE();
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -916,6 +936,7 @@
       int16_t offset = inst->VRegB_21t();
       BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
+        HOTNESS_UPDATE();
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -934,6 +955,7 @@
       int16_t offset = inst->VRegB_21t();
       BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
+        HOTNESS_UPDATE();
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -952,6 +974,7 @@
       int16_t offset = inst->VRegB_21t();
       BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
+        HOTNESS_UPDATE();
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -970,6 +993,7 @@
       int16_t offset = inst->VRegB_21t();
       BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
+        HOTNESS_UPDATE();
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -988,6 +1012,7 @@
       int16_t offset = inst->VRegB_21t();
       BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
+        HOTNESS_UPDATE();
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -2558,7 +2583,6 @@
       self->CheckSuspend();
       UPDATE_HANDLER_TABLE();
     }
-    instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
     uint32_t found_dex_pc = FindNextInstructionFollowingException(self, shadow_frame, dex_pc,
                                                                   instrumentation);
     if (found_dex_pc == DexFile::kDexNoIndex) {
@@ -2579,8 +2603,6 @@
 // a constant condition that would remove the "if" statement so the test is free.
 #define INSTRUMENTATION_INSTRUCTION_HANDLER(o, code, n, f, r, i, a, v)                        \
   alt_op_##code: {                                                                            \
-    Runtime* const runtime = Runtime::Current();                                              \
-    const instrumentation::Instrumentation* instrumentation = runtime->GetInstrumentation();  \
     if (UNLIKELY(instrumentation->HasDexPcListeners())) {                                     \
       Object* this_object = shadow_frame.GetThisObject(code_item->ins_size_);                 \
       instrumentation->DexPcMovedEvent(self, this_object, shadow_frame.GetMethod(), dex_pc);  \
diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc
index 0488dbf..442e191 100644
--- a/runtime/interpreter/interpreter_switch_impl.cc
+++ b/runtime/interpreter/interpreter_switch_impl.cc
@@ -18,6 +18,7 @@
 #include "experimental_flags.h"
 #include "interpreter_common.h"
 #include "jit/jit.h"
+#include "jit/jit_instrumentation.h"
 #include "safe_math.h"
 
 #include <memory>  // std::unique_ptr
@@ -37,6 +38,7 @@
       shadow_frame.GetLockCountData().                                                          \
           CheckAllMonitorsReleasedOrThrow<do_assignability_check>(self);                        \
       if (interpret_one_instruction) {                                                          \
+        /* Signal mterp to return to caller */                                                  \
         shadow_frame.SetDexPC(DexFile::kDexNoIndex);                                            \
       }                                                                                         \
       return JValue(); /* Handled in caller. */                                                 \
@@ -72,14 +74,24 @@
 
 #define BRANCH_INSTRUMENTATION(offset)                                                         \
   do {                                                                                         \
-    ArtMethod* method = shadow_frame.GetMethod();                                              \
     instrumentation->Branch(self, method, dex_pc, offset);                                     \
     JValue result;                                                                             \
     if (jit::Jit::MaybeDoOnStackReplacement(self, method, dex_pc, offset, &result)) {          \
+      if (interpret_one_instruction) {                                                         \
+        /* OSR has completed execution of the method.  Signal mterp to return to caller */     \
+        shadow_frame.SetDexPC(DexFile::kDexNoIndex);                                           \
+      }                                                                                        \
       return result;                                                                           \
     }                                                                                          \
   } while (false)
 
+#define HOTNESS_UPDATE()                                                                       \
+  do {                                                                                         \
+    if (jit_instrumentation_cache != nullptr) {                                                \
+      jit_instrumentation_cache->AddSamples(self, method, 1);                                  \
+    }                                                                                          \
+  } while (false)
+
 static bool IsExperimentalInstructionEnabled(const Instruction *inst) {
   DCHECK(inst->IsExperimental());
   return Runtime::Current()->AreExperimentalFlagsEnabled(ExperimentalFlags::kLambdas);
@@ -101,6 +113,12 @@
   const uint16_t* const insns = code_item->insns_;
   const Instruction* inst = Instruction::At(insns + dex_pc);
   uint16_t inst_data;
+  ArtMethod* method = shadow_frame.GetMethod();
+  jit::Jit* jit = Runtime::Current()->GetJit();
+  jit::JitInstrumentationCache* jit_instrumentation_cache = nullptr;
+  if (jit != nullptr) {
+    jit_instrumentation_cache = jit->GetInstrumentationCache();
+  }
 
   // TODO: collapse capture-variable+create-lambda into one opcode, then we won't need
   // to keep this live for the scope of the entire function call.
@@ -205,6 +223,7 @@
                                            result);
         }
         if (interpret_one_instruction) {
+          /* Signal mterp to return to caller */
           shadow_frame.SetDexPC(DexFile::kDexNoIndex);
         }
         return result;
@@ -221,6 +240,7 @@
                                            result);
         }
         if (interpret_one_instruction) {
+          /* Signal mterp to return to caller */
           shadow_frame.SetDexPC(DexFile::kDexNoIndex);
         }
         return result;
@@ -238,6 +258,7 @@
                                            result);
         }
         if (interpret_one_instruction) {
+          /* Signal mterp to return to caller */
           shadow_frame.SetDexPC(DexFile::kDexNoIndex);
         }
         return result;
@@ -254,6 +275,7 @@
                                            result);
         }
         if (interpret_one_instruction) {
+          /* Signal mterp to return to caller */
           shadow_frame.SetDexPC(DexFile::kDexNoIndex);
         }
         return result;
@@ -292,6 +314,7 @@
                                            result);
         }
         if (interpret_one_instruction) {
+          /* Signal mterp to return to caller */
           shadow_frame.SetDexPC(DexFile::kDexNoIndex);
         }
         return result;
@@ -564,6 +587,7 @@
         int8_t offset = inst->VRegA_10t(inst_data);
         BRANCH_INSTRUMENTATION(offset);
         if (IsBackwardBranch(offset)) {
+          HOTNESS_UPDATE();
           self->AllowThreadSuspension();
         }
         inst = inst->RelativeAt(offset);
@@ -574,6 +598,7 @@
         int16_t offset = inst->VRegA_20t();
         BRANCH_INSTRUMENTATION(offset);
         if (IsBackwardBranch(offset)) {
+          HOTNESS_UPDATE();
           self->AllowThreadSuspension();
         }
         inst = inst->RelativeAt(offset);
@@ -584,6 +609,7 @@
         int32_t offset = inst->VRegA_30t();
         BRANCH_INSTRUMENTATION(offset);
         if (IsBackwardBranch(offset)) {
+          HOTNESS_UPDATE();
           self->AllowThreadSuspension();
         }
         inst = inst->RelativeAt(offset);
@@ -594,6 +620,7 @@
         int32_t offset = DoPackedSwitch(inst, shadow_frame, inst_data);
         BRANCH_INSTRUMENTATION(offset);
         if (IsBackwardBranch(offset)) {
+          HOTNESS_UPDATE();
           self->AllowThreadSuspension();
         }
         inst = inst->RelativeAt(offset);
@@ -604,6 +631,7 @@
         int32_t offset = DoSparseSwitch(inst, shadow_frame, inst_data);
         BRANCH_INSTRUMENTATION(offset);
         if (IsBackwardBranch(offset)) {
+          HOTNESS_UPDATE();
           self->AllowThreadSuspension();
         }
         inst = inst->RelativeAt(offset);
@@ -708,6 +736,7 @@
           int16_t offset = inst->VRegC_22t();
           BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
+            HOTNESS_UPDATE();
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -724,6 +753,7 @@
           int16_t offset = inst->VRegC_22t();
           BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
+            HOTNESS_UPDATE();
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -740,6 +770,7 @@
           int16_t offset = inst->VRegC_22t();
           BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
+            HOTNESS_UPDATE();
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -756,6 +787,7 @@
           int16_t offset = inst->VRegC_22t();
           BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
+            HOTNESS_UPDATE();
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -772,6 +804,7 @@
           int16_t offset = inst->VRegC_22t();
           BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
+            HOTNESS_UPDATE();
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -788,6 +821,7 @@
           int16_t offset = inst->VRegC_22t();
           BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
+            HOTNESS_UPDATE();
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -803,6 +837,7 @@
           int16_t offset = inst->VRegB_21t();
           BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
+            HOTNESS_UPDATE();
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -818,6 +853,7 @@
           int16_t offset = inst->VRegB_21t();
           BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
+            HOTNESS_UPDATE();
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -833,6 +869,7 @@
           int16_t offset = inst->VRegB_21t();
           BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
+            HOTNESS_UPDATE();
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -848,6 +885,7 @@
           int16_t offset = inst->VRegB_21t();
           BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
+            HOTNESS_UPDATE();
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -863,6 +901,7 @@
           int16_t offset = inst->VRegB_21t();
           BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
+            HOTNESS_UPDATE();
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -878,6 +917,7 @@
           int16_t offset = inst->VRegB_21t();
           BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
+            HOTNESS_UPDATE();
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
diff --git a/runtime/interpreter/mterp/arm/bincmp.S b/runtime/interpreter/mterp/arm/bincmp.S
index cfad714..8fad42f 100644
--- a/runtime/interpreter/mterp/arm/bincmp.S
+++ b/runtime/interpreter/mterp/arm/bincmp.S
@@ -1,7 +1,6 @@
     /*
-     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic two-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
@@ -9,23 +8,12 @@
     mov     r1, rINST, lsr #12          @ r1<- B
     ubfx    r0, rINST, #8, #4           @ r0<- A
     GET_VREG r3, r1                     @ r3<- vB
-    GET_VREG r2, r0                     @ r2<- vA
+    GET_VREG r0, r0                     @ r0<- vA
     FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
-    cmp     r2, r3                      @ compare (vA, vB)
-    mov${revcmp} rINST, #2
-#if MTERP_PROFILE_BRANCHES
-    @ TUNING: once measurements are complete, remove #if and hand-schedule.
-    EXPORT_PC
-    mov     r0, rSELF
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    mov     r2, rINST
-    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
-    cmp     r0, #0
-    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
-#endif
-    adds    r2, rINST, rINST            @ convert to bytes, check sign
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
-    bmi     MterpCheckSuspendAndContinue
+    cmp     r0, r3                      @ compare (vA, vB)
+    b${condition} MterpCommonTakenBranchNoFlags
+    cmp     rPROFILE, #JIT_CHECK_OSR    @ possible OSR re-entry?
+    beq     .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/entry.S b/runtime/interpreter/mterp/arm/entry.S
index 981c036..a6b131d 100644
--- a/runtime/interpreter/mterp/arm/entry.S
+++ b/runtime/interpreter/mterp/arm/entry.S
@@ -33,10 +33,8 @@
 
 ExecuteMterpImpl:
     .fnstart
-    .save {r4-r10,fp,lr}
-    stmfd   sp!, {r4-r10,fp,lr}         @ save 9 regs
-    .pad    #4
-    sub     sp, sp, #4                  @ align 64
+    .save {r3-r10,fp,lr}
+    stmfd   sp!, {r3-r10,fp,lr}         @ save 10 regs, (r3 just to align 64)
 
     /* Remember the return register */
     str     r3, [r2, #SHADOWFRAME_RESULT_REGISTER_OFFSET]
@@ -57,6 +55,12 @@
     /* Starting ibase */
     ldr     rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]
 
+    /* Set up for backwards branches & osr profiling */
+    ldr     r0, [rFP, #OFF_FP_METHOD]
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    bl      MterpSetUpHotnessCountdown
+    mov     rPROFILE, r0                @ Starting hotness countdown to rPROFILE
+
     /* start executing the instruction at rPC */
     FETCH_INST                          @ load rINST from rPC
     GET_INST_OPCODE ip                  @ extract opcode from rINST
diff --git a/runtime/interpreter/mterp/arm/footer.S b/runtime/interpreter/mterp/arm/footer.S
index 3456a75..62e573a 100644
--- a/runtime/interpreter/mterp/arm/footer.S
+++ b/runtime/interpreter/mterp/arm/footer.S
@@ -114,21 +114,117 @@
     /* NOTE: no fallthrough */
 
 /*
- * Check for suspend check request.  Assumes rINST already loaded, rPC advanced and
- * still needs to get the opcode and branch to it, and flags are in lr.
+ * Common handling for branches with support for Jit profiling.
+ * On entry:
+ *    rINST          <= signed offset
+ *    rPROFILE       <= signed hotness countdown (expanded to 32 bits)
+ *    condition bits <= set to establish sign of offset (use "NoFlags" entry if not)
+ *
+ * We have quite a few different cases for branch profiling, OSR detection and
+ * suspend check support here.
+ *
+ * Taken backward branches:
+ *    If profiling active, do hotness countdown and report if we hit zero.
+ *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
+ *    Is there a pending suspend request?  If so, suspend.
+ *
+ * Taken forward branches and not-taken backward branches:
+ *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
+ *
+ * Our most common case is expected to be a taken backward branch with active jit profiling,
+ * but no full OSR check and no pending suspend request.
+ * Next most common case is not-taken branch with no full OSR check.
+ *
  */
-MterpCheckSuspendAndContinue:
-    ldr     rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]  @ refresh rIBASE
+MterpCommonTakenBranchNoFlags:
+    cmp     rINST, #0
+MterpCommonTakenBranch:
+    bgt     .L_forward_branch           @ don't add forward branches to hotness
+/*
+ * We need to subtract 1 from positive values and we should not see 0 here,
+ * so we may use the result of the comparison with -1.
+ */
+#if JIT_CHECK_OSR != -1
+#  error "JIT_CHECK_OSR must be -1."
+#endif
+    cmp     rPROFILE, #JIT_CHECK_OSR
+    beq     .L_osr_check
+    subgts  rPROFILE, #1
+    beq     .L_add_batch                @ counted down to zero - report
+.L_resume_backward_branch:
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    REFRESH_IBASE
+    add     r2, rINST, rINST            @ r2<- byte offset
+    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
     ands    lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
-    bne     1f
+    bne     .L_suspend_request_pending
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
-1:
+
+.L_suspend_request_pending:
     EXPORT_PC
     mov     r0, rSELF
     bl      MterpSuspendCheck           @ (self)
     cmp     r0, #0
     bne     MterpFallback
+    REFRESH_IBASE                       @ might have changed during suspend
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+.L_no_count_backwards:
+    cmp     rPROFILE, #JIT_CHECK_OSR    @ possible OSR re-entry?
+    bne     .L_resume_backward_branch
+.L_osr_check:
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    EXPORT_PC
+    bl      MterpMaybeDoOnStackReplacement  @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement
+    b       .L_resume_backward_branch
+
+.L_forward_branch:
+    cmp     rPROFILE, #JIT_CHECK_OSR @ possible OSR re-entry?
+    beq     .L_check_osr_forward
+.L_resume_forward_branch:
+    add     r2, rINST, rINST            @ r2<- byte offset
+    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+.L_check_osr_forward:
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    EXPORT_PC
+    bl      MterpMaybeDoOnStackReplacement  @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement
+    b       .L_resume_forward_branch
+
+.L_add_batch:
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    strh    rPROFILE, [r1, #SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET]
+    ldr     r0, [rFP, #OFF_FP_METHOD]
+    mov     r2, rSELF
+    bl      MterpAddHotnessBatch        @ (method, shadow_frame, self)
+    mov     rPROFILE, r0                @ restore new hotness countdown to rPROFILE
+    b       .L_no_count_backwards
+
+/*
+ * Entered from the conditional branch handlers when OSR check request active on
+ * not-taken path.  All Dalvik not-taken conditional branch offsets are 2.
+ */
+.L_check_not_taken_osr:
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, #2
+    EXPORT_PC
+    bl      MterpMaybeDoOnStackReplacement  @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement
+    FETCH_ADVANCE_INST 2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 
@@ -176,9 +272,27 @@
     str     r1, [r2, #4]
     mov     r0, #1                                  @ signal return to caller.
 MterpDone:
-    add     sp, sp, #4                              @ un-align 64
-    ldmfd   sp!, {r4-r10,fp,pc}                     @ restore 9 regs and return
+/*
+ * At this point, we expect rPROFILE to be non-zero.  If negative, hotness is disabled or we're
+ * checking for OSR.  If greater than zero, we might have unreported hotness to register
+ * (the difference between the ending rPROFILE and the cached hotness counter).  rPROFILE
+ * should only reach zero immediately after a hotness decrement, and is then reset to either
+ * a negative special state or the new non-zero countdown value.
+ */
+    cmp     rPROFILE, #0
+    bgt     MterpProfileActive                      @ if > 0, we may have some counts to report.
+    ldmfd   sp!, {r3-r10,fp,pc}                     @ restore 10 regs and return
 
+MterpProfileActive:
+    mov     rINST, r0                               @ stash return value
+    /* Report cached hotness counts */
+    ldr     r0, [rFP, #OFF_FP_METHOD]
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rSELF
+    strh    rPROFILE, [r1, #SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET]
+    bl      MterpAddHotnessBatch                    @ (method, shadow_frame, self)
+    mov     r0, rINST                               @ restore return value
+    ldmfd   sp!, {r3-r10,fp,pc}                     @ restore 10 regs and return
 
     .fnend
     .size   ExecuteMterpImpl, .-ExecuteMterpImpl
diff --git a/runtime/interpreter/mterp/arm/header.S b/runtime/interpreter/mterp/arm/header.S
index 298af8a..039bcbe 100644
--- a/runtime/interpreter/mterp/arm/header.S
+++ b/runtime/interpreter/mterp/arm/header.S
@@ -72,7 +72,8 @@
   r6  rSELF     self (Thread) pointer
   r7  rINST     first 16-bit code unit of current instruction
   r8  rIBASE    interpreted instruction base pointer, used for computed goto
-  r11 rREFS	base of object references in shadow frame  (ideally, we'll get rid of this later).
+  r10 rPROFILE  branch profiling countdown
+  r11 rREFS     base of object references in shadow frame  (ideally, we'll get rid of this later).
 
 Macros are provided for common operations.  Each macro MUST emit only
 one instruction to make instruction-counting easier.  They MUST NOT alter
@@ -90,12 +91,13 @@
 
 /* During bringup, we'll use the shadow frame model instead of rFP */
 /* single-purpose registers, given names for clarity */
-#define rPC     r4
-#define rFP     r5
-#define rSELF   r6
-#define rINST   r7
-#define rIBASE  r8
-#define rREFS   r11
+#define rPC      r4
+#define rFP      r5
+#define rSELF    r6
+#define rINST    r7
+#define rIBASE   r8
+#define rPROFILE r10
+#define rREFS    r11
 
 /*
  * Instead of holding a pointer to the shadow frame, we keep rFP at the base of the vregs.  So,
@@ -109,7 +111,7 @@
 #define OFF_FP_RESULT_REGISTER OFF_FP(SHADOWFRAME_RESULT_REGISTER_OFFSET)
 #define OFF_FP_DEX_PC_PTR OFF_FP(SHADOWFRAME_DEX_PC_PTR_OFFSET)
 #define OFF_FP_CODE_ITEM OFF_FP(SHADOWFRAME_CODE_ITEM_OFFSET)
-#define OFF_FP_SHADOWFRAME (-SHADOWFRAME_VREGS_OFFSET)
+#define OFF_FP_SHADOWFRAME OFF_FP(0)
 
 /*
  * "export" the PC to dex_pc field in the shadow frame, f/b/o future exception objects.  Must
diff --git a/runtime/interpreter/mterp/arm/op_cmp_long.S b/runtime/interpreter/mterp/arm/op_cmp_long.S
index e57b19c..6626ff0 100644
--- a/runtime/interpreter/mterp/arm/op_cmp_long.S
+++ b/runtime/interpreter/mterp/arm/op_cmp_long.S
@@ -1,22 +1,6 @@
     /*
      * Compare two 64-bit values.  Puts 0, 1, or -1 into the destination
      * register based on the results of the comparison.
-     *
-     * We load the full values with LDM, but in practice many values could
-     * be resolved by only looking at the high word.  This could be made
-     * faster or slower by splitting the LDM into a pair of LDRs.
-     *
-     * If we just wanted to set condition flags, we could do this:
-     *  subs    ip, r0, r2
-     *  sbcs    ip, r1, r3
-     *  subeqs  ip, r0, r2
-     * Leaving { <0, 0, >0 } in ip.  However, we have to set it to a specific
-     * integer value, which we can do with 2 conditional mov/mvn instructions
-     * (set 1, set -1; if they're equal we already have 0 in ip), giving
-     * us a constant 5-cycle path plus a branch at the end to the
-     * instruction epilogue code.  The multi-compare approach below needs
-     * 2 or 3 cycles + branch if the high word doesn't match, 6 + branch
-     * in the worst case (the 64-bit values are equal).
      */
     /* cmp-long vAA, vBB, vCC */
     FETCH r0, 1                         @ r0<- CCBB
@@ -27,30 +11,13 @@
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[CC]
     ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
     ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
-    cmp     r1, r3                      @ compare (vBB+1, vCC+1)
-    blt     .L${opcode}_less            @ signed compare on high part
-    bgt     .L${opcode}_greater
-    subs    r1, r0, r2                  @ r1<- r0 - r2
-    bhi     .L${opcode}_greater         @ unsigned compare on low part
-    bne     .L${opcode}_less
-    b       .L${opcode}_finish          @ equal; r1 already holds 0
-%break
-
-.L${opcode}_less:
-    mvn     r1, #0                      @ r1<- -1
-    @ Want to cond code the next mov so we can avoid branch, but don't see it;
-    @ instead, we just replicate the tail end.
+    cmp     r0, r2
+    sbcs    ip, r1, r3                  @ Sets correct CCs for checking LT (but not EQ/NE)
+    mov     ip, #0
+    mvnlt   ip, #0                      @ -1
+    cmpeq   r0, r2                      @ For correct EQ/NE, we may need to repeat the first CMP
+    orrne   ip, #1
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
-    SET_VREG r1, r9                     @ vAA<- r1
-    GET_INST_OPCODE ip                  @ extract opcode from rINST
-    GOTO_OPCODE ip                      @ jump to next instruction
-
-.L${opcode}_greater:
-    mov     r1, #1                      @ r1<- 1
-    @ fall through to _finish
-
-.L${opcode}_finish:
-    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
-    SET_VREG r1, r9                     @ vAA<- r1
+    SET_VREG ip, r9                     @ vAA<- ip
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_goto.S b/runtime/interpreter/mterp/arm/op_goto.S
index 6861950..aa42dfd 100644
--- a/runtime/interpreter/mterp/arm/op_goto.S
+++ b/runtime/interpreter/mterp/arm/op_goto.S
@@ -5,32 +5,5 @@
      * double to get a byte offset.
      */
     /* goto +AA */
-    /* tuning: use sbfx for 6t2+ targets */
-#if MTERP_PROFILE_BRANCHES
-    mov     r0, rINST, lsl #16          @ r0<- AAxx0000
-    movs    rINST, r0, asr #24          @ rINST<- ssssssAA (sign-extended)
-    EXPORT_PC
-    mov     r0, rSELF
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    mov     r2, rINST
-    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
-    cmp     r0, #0
-    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    adds    r2, rINST, rINST            @ r2<- byte offset, set flags
-    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
-       @ If backwards branch refresh rIBASE
-    bmi     MterpCheckSuspendAndContinue
-    GET_INST_OPCODE ip                  @ extract opcode from rINST
-    GOTO_OPCODE ip                      @ jump to next instruction
-#else
-    mov     r0, rINST, lsl #16          @ r0<- AAxx0000
-    movs    rINST, r0, asr #24          @ rINST<- ssssssAA (sign-extended)
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    adds    r2, rINST, rINST            @ r2<- byte offset, set flags
-    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
-       @ If backwards branch refresh rIBASE
-    bmi     MterpCheckSuspendAndContinue
-    GET_INST_OPCODE ip                  @ extract opcode from rINST
-    GOTO_OPCODE ip                      @ jump to next instruction
-#endif
+    sbfx    rINST, rINST, #8, #8           @ rINST<- ssssssAA (sign-extended)
+    b       MterpCommonTakenBranchNoFlags
diff --git a/runtime/interpreter/mterp/arm/op_goto_16.S b/runtime/interpreter/mterp/arm/op_goto_16.S
index 91639ca..12a6bc0 100644
--- a/runtime/interpreter/mterp/arm/op_goto_16.S
+++ b/runtime/interpreter/mterp/arm/op_goto_16.S
@@ -5,27 +5,5 @@
      * double to get a byte offset.
      */
     /* goto/16 +AAAA */
-#if MTERP_PROFILE_BRANCHES
     FETCH_S rINST, 1                    @ rINST<- ssssAAAA (sign-extended)
-    EXPORT_PC
-    mov     r0, rSELF
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    mov     r2, rINST
-    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
-    cmp     r0, #0
-    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    adds    r1, rINST, rINST            @ r1<- byte offset, flags set
-    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    bmi     MterpCheckSuspendAndContinue
-    GET_INST_OPCODE ip                  @ extract opcode from rINST
-    GOTO_OPCODE ip                      @ jump to next instruction
-#else
-    FETCH_S rINST, 1                    @ rINST<- ssssAAAA (sign-extended)
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    adds    r1, rINST, rINST            @ r1<- byte offset, flags set
-    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    bmi     MterpCheckSuspendAndContinue
-    GET_INST_OPCODE ip                  @ extract opcode from rINST
-    GOTO_OPCODE ip                      @ jump to next instruction
-#endif
+    b       MterpCommonTakenBranchNoFlags
diff --git a/runtime/interpreter/mterp/arm/op_goto_32.S b/runtime/interpreter/mterp/arm/op_goto_32.S
index e730b52..7325a1c 100644
--- a/runtime/interpreter/mterp/arm/op_goto_32.S
+++ b/runtime/interpreter/mterp/arm/op_goto_32.S
@@ -10,31 +10,7 @@
      * offset to byte offset.
      */
     /* goto/32 +AAAAAAAA */
-#if MTERP_PROFILE_BRANCHES
     FETCH r0, 1                         @ r0<- aaaa (lo)
-    FETCH r1, 2                         @ r1<- AAAA (hi)
-    orr     rINST, r0, r1, lsl #16      @ rINST<- AAAAaaaa
-    EXPORT_PC
-    mov     r0, rSELF
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    mov     r2, rINST
-    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
-    cmp     r0, #0
-    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    adds    r1, rINST, rINST            @ r1<- byte offset
-    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    ble     MterpCheckSuspendAndContinue
-    GET_INST_OPCODE ip                  @ extract opcode from rINST
-    GOTO_OPCODE ip                      @ jump to next instruction
-#else
-    FETCH r0, 1                         @ r0<- aaaa (lo)
-    FETCH r1, 2                         @ r1<- AAAA (hi)
-    orr     rINST, r0, r1, lsl #16      @ rINST<- AAAAaaaa
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    adds    r1, rINST, rINST            @ r1<- byte offset
-    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    ble     MterpCheckSuspendAndContinue
-    GET_INST_OPCODE ip                  @ extract opcode from rINST
-    GOTO_OPCODE ip                      @ jump to next instruction
-#endif
+    FETCH r3, 2                         @ r1<- AAAA (hi)
+    orrs    rINST, r0, r3, lsl #16      @ rINST<- AAAAaaaa
+    b       MterpCommonTakenBranch
diff --git a/runtime/interpreter/mterp/arm/op_if_eq.S b/runtime/interpreter/mterp/arm/op_if_eq.S
index 5685686..b8b6a6e 100644
--- a/runtime/interpreter/mterp/arm/op_if_eq.S
+++ b/runtime/interpreter/mterp/arm/op_if_eq.S
@@ -1 +1 @@
-%include "arm/bincmp.S" { "revcmp":"ne" }
+%include "arm/bincmp.S" { "condition":"eq" }
diff --git a/runtime/interpreter/mterp/arm/op_if_eqz.S b/runtime/interpreter/mterp/arm/op_if_eqz.S
index 2a9c0f9..7012f61 100644
--- a/runtime/interpreter/mterp/arm/op_if_eqz.S
+++ b/runtime/interpreter/mterp/arm/op_if_eqz.S
@@ -1 +1 @@
-%include "arm/zcmp.S" { "revcmp":"ne" }
+%include "arm/zcmp.S" { "condition":"eq" }
diff --git a/runtime/interpreter/mterp/arm/op_if_ge.S b/runtime/interpreter/mterp/arm/op_if_ge.S
index 60a0307..eb29e63 100644
--- a/runtime/interpreter/mterp/arm/op_if_ge.S
+++ b/runtime/interpreter/mterp/arm/op_if_ge.S
@@ -1 +1 @@
-%include "arm/bincmp.S" { "revcmp":"lt" }
+%include "arm/bincmp.S" { "condition":"ge" }
diff --git a/runtime/interpreter/mterp/arm/op_if_gez.S b/runtime/interpreter/mterp/arm/op_if_gez.S
index 981cdec..d9da374 100644
--- a/runtime/interpreter/mterp/arm/op_if_gez.S
+++ b/runtime/interpreter/mterp/arm/op_if_gez.S
@@ -1 +1 @@
-%include "arm/zcmp.S" { "revcmp":"lt" }
+%include "arm/zcmp.S" { "condition":"ge" }
diff --git a/runtime/interpreter/mterp/arm/op_if_gt.S b/runtime/interpreter/mterp/arm/op_if_gt.S
index ca50cd7..a35eab8 100644
--- a/runtime/interpreter/mterp/arm/op_if_gt.S
+++ b/runtime/interpreter/mterp/arm/op_if_gt.S
@@ -1 +1 @@
-%include "arm/bincmp.S" { "revcmp":"le" }
+%include "arm/bincmp.S" { "condition":"gt" }
diff --git a/runtime/interpreter/mterp/arm/op_if_gtz.S b/runtime/interpreter/mterp/arm/op_if_gtz.S
index c621812..4ef4d8e 100644
--- a/runtime/interpreter/mterp/arm/op_if_gtz.S
+++ b/runtime/interpreter/mterp/arm/op_if_gtz.S
@@ -1 +1 @@
-%include "arm/zcmp.S" { "revcmp":"le" }
+%include "arm/zcmp.S" { "condition":"gt" }
diff --git a/runtime/interpreter/mterp/arm/op_if_le.S b/runtime/interpreter/mterp/arm/op_if_le.S
index 7e060f2..c7c31bc 100644
--- a/runtime/interpreter/mterp/arm/op_if_le.S
+++ b/runtime/interpreter/mterp/arm/op_if_le.S
@@ -1 +1 @@
-%include "arm/bincmp.S" { "revcmp":"gt" }
+%include "arm/bincmp.S" { "condition":"le" }
diff --git a/runtime/interpreter/mterp/arm/op_if_lez.S b/runtime/interpreter/mterp/arm/op_if_lez.S
index f92be23..9fbf6c9 100644
--- a/runtime/interpreter/mterp/arm/op_if_lez.S
+++ b/runtime/interpreter/mterp/arm/op_if_lez.S
@@ -1 +1 @@
-%include "arm/zcmp.S" { "revcmp":"gt" }
+%include "arm/zcmp.S" { "condition":"le" }
diff --git a/runtime/interpreter/mterp/arm/op_if_lt.S b/runtime/interpreter/mterp/arm/op_if_lt.S
index 213344d..9469fbb 100644
--- a/runtime/interpreter/mterp/arm/op_if_lt.S
+++ b/runtime/interpreter/mterp/arm/op_if_lt.S
@@ -1 +1 @@
-%include "arm/bincmp.S" { "revcmp":"ge" }
+%include "arm/bincmp.S" { "condition":"lt" }
diff --git a/runtime/interpreter/mterp/arm/op_if_ltz.S b/runtime/interpreter/mterp/arm/op_if_ltz.S
index dfd4e44..a4fc1b8 100644
--- a/runtime/interpreter/mterp/arm/op_if_ltz.S
+++ b/runtime/interpreter/mterp/arm/op_if_ltz.S
@@ -1 +1 @@
-%include "arm/zcmp.S" { "revcmp":"ge" }
+%include "arm/zcmp.S" { "condition":"lt" }
diff --git a/runtime/interpreter/mterp/arm/op_if_ne.S b/runtime/interpreter/mterp/arm/op_if_ne.S
index 4a58b4a..c945331 100644
--- a/runtime/interpreter/mterp/arm/op_if_ne.S
+++ b/runtime/interpreter/mterp/arm/op_if_ne.S
@@ -1 +1 @@
-%include "arm/bincmp.S" { "revcmp":"eq" }
+%include "arm/bincmp.S" { "condition":"ne" }
diff --git a/runtime/interpreter/mterp/arm/op_if_nez.S b/runtime/interpreter/mterp/arm/op_if_nez.S
index d864ef4..2d81fda 100644
--- a/runtime/interpreter/mterp/arm/op_if_nez.S
+++ b/runtime/interpreter/mterp/arm/op_if_nez.S
@@ -1 +1 @@
-%include "arm/zcmp.S" { "revcmp":"eq" }
+%include "arm/zcmp.S" { "condition":"ne" }
diff --git a/runtime/interpreter/mterp/arm/op_mul_long.S b/runtime/interpreter/mterp/arm/op_mul_long.S
index 8f40f19..a13c803 100644
--- a/runtime/interpreter/mterp/arm/op_mul_long.S
+++ b/runtime/interpreter/mterp/arm/op_mul_long.S
@@ -24,13 +24,13 @@
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[CC]
     ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
     ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
-    mul     ip, r2, r1                  @  ip<- ZxW
-    umull   r9, r10, r2, r0             @  r9/r10 <- ZxX
-    mla     r2, r0, r3, ip              @  r2<- YxX + (ZxW)
+    mul     ip, r2, r1                  @ ip<- ZxW
+    umull   r1, lr, r2, r0              @ r1/lr <- ZxX
+    mla     r2, r0, r3, ip              @ r2<- YxX + (ZxW)
     mov     r0, rINST, lsr #8           @ r0<- AA
-    add     r10, r2, r10                @  r10<- r10 + low(ZxW + (YxX))
+    add     r2, r2, lr                  @ r2<- lr + low(ZxW + (YxX))
     VREG_INDEX_TO_ADDR r0, r0           @ r0<- &fp[AA]
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
     GET_INST_OPCODE ip                  @ extract opcode from rINST
-    stmia   r0, {r9-r10}                @ vAA/vAA+1<- r9/r10
+    stmia   r0, {r1-r2 }                @ vAA/vAA+1<- r1/r2
     GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_mul_long_2addr.S b/runtime/interpreter/mterp/arm/op_mul_long_2addr.S
index 7ef24c5..4c1f058 100644
--- a/runtime/interpreter/mterp/arm/op_mul_long_2addr.S
+++ b/runtime/interpreter/mterp/arm/op_mul_long_2addr.S
@@ -13,12 +13,12 @@
     VREG_INDEX_TO_ADDR rINST, r9        @ rINST<- &fp[A]
     ldmia   r1, {r2-r3}                 @ r2/r3<- vBB/vBB+1
     ldmia   rINST, {r0-r1}              @ r0/r1<- vAA/vAA+1
-    mul     ip, r2, r1                  @  ip<- ZxW
-    umull   r9, r10, r2, r0             @  r9/r10 <- ZxX
-    mla     r2, r0, r3, ip              @  r2<- YxX + (ZxW)
+    mul     ip, r2, r1                  @ ip<- ZxW
+    umull   r1, lr, r2, r0              @ r1/lr <- ZxX
+    mla     r2, r0, r3, ip              @ r2<- YxX + (ZxW)
     mov     r0, rINST                   @ r0<- &fp[A] (free up rINST)
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
-    add     r10, r2, r10                @  r10<- r10 + low(ZxW + (YxX))
+    add     r2, r2, lr                  @ r2<- r2 + low(ZxW + (YxX))
     GET_INST_OPCODE ip                  @ extract opcode from rINST
-    stmia   r0, {r9-r10}                @ vAA/vAA+1<- r9/r10
+    stmia   r0, {r1-r2}                 @ vAA/vAA+1<- r1/r2
     GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_packed_switch.S b/runtime/interpreter/mterp/arm/op_packed_switch.S
index 4c369cb..412c58f 100644
--- a/runtime/interpreter/mterp/arm/op_packed_switch.S
+++ b/runtime/interpreter/mterp/arm/op_packed_switch.S
@@ -9,7 +9,6 @@
      * for: packed-switch, sparse-switch
      */
     /* op vAA, +BBBB */
-#if MTERP_PROFILE_BRANCHES
     FETCH r0, 1                         @ r0<- bbbb (lo)
     FETCH r1, 2                         @ r1<- BBBB (hi)
     mov     r3, rINST, lsr #8           @ r3<- AA
@@ -17,33 +16,5 @@
     GET_VREG r1, r3                     @ r1<- vAA
     add     r0, rPC, r0, lsl #1         @ r0<- PC + BBBBbbbb*2
     bl      $func                       @ r0<- code-unit branch offset
-    mov     rINST, r0
-    EXPORT_PC
-    mov     r0, rSELF
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    mov     r2, rINST
-    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
-    cmp     r0, #0
-    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    adds    r1, rINST, rINST            @ r1<- byte offset; clear V
-    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    ble     MterpCheckSuspendAndContinue
-    GET_INST_OPCODE ip                  @ extract opcode from rINST
-    GOTO_OPCODE ip                      @ jump to next instruction
-#else
-    FETCH r0, 1                         @ r0<- bbbb (lo)
-    FETCH r1, 2                         @ r1<- BBBB (hi)
-    mov     r3, rINST, lsr #8           @ r3<- AA
-    orr     r0, r0, r1, lsl #16         @ r0<- BBBBbbbb
-    GET_VREG r1, r3                     @ r1<- vAA
-    add     r0, rPC, r0, lsl #1         @ r0<- PC + BBBBbbbb*2
-    bl      $func                       @ r0<- code-unit branch offset
-    mov     rINST, r0
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    adds    r1, rINST, rINST            @ r1<- byte offset; clear V
-    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    ble     MterpCheckSuspendAndContinue
-    GET_INST_OPCODE ip                  @ extract opcode from rINST
-    GOTO_OPCODE ip                      @ jump to next instruction
-#endif
+    movs    rINST, r0
+    b       MterpCommonTakenBranch
diff --git a/runtime/interpreter/mterp/arm/zcmp.S b/runtime/interpreter/mterp/arm/zcmp.S
index 3d7dec0..5db8b6c 100644
--- a/runtime/interpreter/mterp/arm/zcmp.S
+++ b/runtime/interpreter/mterp/arm/zcmp.S
@@ -1,29 +1,17 @@
     /*
-     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic one-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
     mov     r0, rINST, lsr #8           @ r0<- AA
-    GET_VREG r2, r0                     @ r2<- vAA
+    GET_VREG r0, r0                     @ r0<- vAA
     FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    cmp     r2, #0                      @ compare (vA, 0)
-    mov${revcmp} rINST, #2
-#if MTERP_PROFILE_BRANCHES
-    @ TUNING: once measurements are complete, remove #if and hand-schedule.
-    EXPORT_PC
-    mov     r0, rSELF
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    mov     r2, rINST
-    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
-    cmp     r0, #0
-    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
-#endif
-    adds    r1, rINST, rINST            @ convert to bytes & set flags
-    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    bmi     MterpCheckSuspendAndContinue
+    cmp     r0, #0                      @ compare (vA, 0)
+    b${condition} MterpCommonTakenBranchNoFlags
+    cmp     rPROFILE, #JIT_CHECK_OSR    @ possible OSR re-entry?
+    beq     .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/bincmp.S b/runtime/interpreter/mterp/arm64/bincmp.S
index 2356ecb..8dd4fed 100644
--- a/runtime/interpreter/mterp/arm64/bincmp.S
+++ b/runtime/interpreter/mterp/arm64/bincmp.S
@@ -1,7 +1,6 @@
     /*
-     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic two-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
@@ -10,22 +9,11 @@
     ubfx    w0, wINST, #8, #4           // w0<- A
     GET_VREG w3, w1                     // w3<- vB
     GET_VREG w2, w0                     // w2<- vA
-    FETCH_S w1, 1                       // w1<- branch offset, in code units
-    mov     w0, #2                      // Offset if branch not taken
+    FETCH_S wINST, 1                    // wINST<- branch offset, in code units
     cmp     w2, w3                      // compare (vA, vB)
-    csel    wINST, w1, w0, ${condition} // Branch if true, stashing result in callee save reg.
-#if MTERP_PROFILE_BRANCHES
-    // TUINING: once measurements are complete, remove #if and hand-schedule.
-    EXPORT_PC
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    sbfm    x2, xINST, 0, 31            // Sign extend branch offset
-    bl      MterpProfileBranch          // (self, shadow_frame, offset)
-    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
-#endif
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w2, wINST, wINST            // convert to bytes, check sign
-    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
-    b.mi     MterpCheckSuspendAndContinue
+    b.${condition} MterpCommonTakenBranchNoFlags
+    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
+    b.eq    .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/entry.S b/runtime/interpreter/mterp/arm64/entry.S
index 23e656e..9fbbbd3 100644
--- a/runtime/interpreter/mterp/arm64/entry.S
+++ b/runtime/interpreter/mterp/arm64/entry.S
@@ -31,11 +31,12 @@
 
 ExecuteMterpImpl:
     .cfi_startproc
-    stp     xIBASE, xREFS, [sp, #-64]!
-    stp     xSELF, xINST, [sp, #16]
-    stp     xPC, xFP, [sp, #32]
-    stp     fp, lr, [sp, #48]
-    add     fp, sp, #48
+    stp     xPROFILE, x27, [sp, #-80]!
+    stp     xIBASE, xREFS, [sp, #16]
+    stp     xSELF, xINST, [sp, #32]
+    stp     xPC, xFP, [sp, #48]
+    stp     fp, lr, [sp, #64]
+    add     fp, sp, #64
 
     /* Remember the return register */
     str     x3, [x2, #SHADOWFRAME_RESULT_REGISTER_OFFSET]
@@ -56,6 +57,12 @@
     /* Starting ibase */
     ldr     xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]
 
+    /* Set up for backwards branches & osr profiling */
+    ldr     x0, [xFP, #OFF_FP_METHOD]
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    bl      MterpSetUpHotnessCountdown
+    mov     wPROFILE, w0                // Starting hotness countdown to xPROFILE
+
     /* start executing the instruction at rPC */
     FETCH_INST                          // load wINST from rPC
     GET_INST_OPCODE ip                  // extract opcode from wINST
diff --git a/runtime/interpreter/mterp/arm64/footer.S b/runtime/interpreter/mterp/arm64/footer.S
index aae78de..2d3a11e 100644
--- a/runtime/interpreter/mterp/arm64/footer.S
+++ b/runtime/interpreter/mterp/arm64/footer.S
@@ -107,6 +107,107 @@
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
     /* NOTE: no fallthrough */
+/*
+ * Common handling for branches with support for Jit profiling.
+ * On entry:
+ *    wINST          <= signed offset
+ *    wPROFILE       <= signed hotness countdown (expanded to 32 bits)
+ *    condition bits <= set to establish sign of offset (use "NoFlags" entry if not)
+ *
+ * We have quite a few different cases for branch profiling, OSR detection and
+ * suspend check support here.
+ *
+ * Taken backward branches:
+ *    If profiling active, do hotness countdown and report if we hit zero.
+ *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
+ *    Is there a pending suspend request?  If so, suspend.
+ *
+ * Taken forward branches and not-taken backward branches:
+ *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
+ *
+ * Our most common case is expected to be a taken backward branch with active jit profiling,
+ * but no full OSR check and no pending suspend request.
+ * Next most common case is not-taken branch with no full OSR check.
+ *
+ */
+MterpCommonTakenBranchNoFlags:
+    cmp     wINST, #0
+    b.gt    .L_forward_branch           // don't add forward branches to hotness
+    tbnz    wPROFILE, #31, .L_no_count_backwards  // go if negative
+    subs    wPROFILE, wPROFILE, #1      // countdown
+    b.eq    .L_add_batch                // counted down to zero - report
+.L_resume_backward_branch:
+    ldr     lr, [xSELF, #THREAD_FLAGS_OFFSET]
+    add     w2, wINST, wINST            // w2<- byte offset
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    REFRESH_IBASE
+    ands    lr, lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    b.ne    .L_suspend_request_pending
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+.L_suspend_request_pending:
+    EXPORT_PC
+    mov     x0, xSELF
+    bl      MterpSuspendCheck           // (self)
+    cbnz    x0, MterpFallback
+    REFRESH_IBASE                       // might have changed during suspend
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+.L_no_count_backwards:
+    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
+    b.ne    .L_resume_backward_branch
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, xINST
+    EXPORT_PC
+    bl      MterpMaybeDoOnStackReplacement  // (self, shadow_frame, offset)
+    cbnz    x0, MterpOnStackReplacement
+    b       .L_resume_backward_branch
+
+.L_forward_branch:
+    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
+    b.eq    .L_check_osr_forward
+.L_resume_forward_branch:
+    add     w2, wINST, wINST            // w2<- byte offset
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+.L_check_osr_forward:
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, xINST
+    EXPORT_PC
+    bl      MterpMaybeDoOnStackReplacement  // (self, shadow_frame, offset)
+    cbnz    x0, MterpOnStackReplacement
+    b       .L_resume_forward_branch
+
+.L_add_batch:
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    strh    wPROFILE, [x1, #SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET]
+    ldr     x0, [xFP, #OFF_FP_METHOD]
+    mov     x2, xSELF
+    bl      MterpAddHotnessBatch        // (method, shadow_frame, self)
+    mov     wPROFILE, w0                // restore new hotness countdown to wPROFILE
+    b       .L_no_count_backwards
+
+/*
+ * Entered from the conditional branch handlers when OSR check request active on
+ * not-taken path.  All Dalvik not-taken conditional branch offsets are 2.
+ */
+.L_check_not_taken_osr:
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, #2
+    EXPORT_PC
+    bl      MterpMaybeDoOnStackReplacement  // (self, shadow_frame, offset)
+    cbnz    x0, MterpOnStackReplacement
+    FETCH_ADVANCE_INST 2
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
 
 /*
  * Check for suspend check request.  Assumes wINST already loaded, xPC advanced and
@@ -175,10 +276,36 @@
 check2:
     mov     x0, #1                                  // signal return to caller.
 MterpDone:
-    ldp     fp, lr, [sp, #48]
-    ldp     xPC, xFP, [sp, #32]
-    ldp     xSELF, xINST, [sp, #16]
-    ldp     xIBASE, xREFS, [sp], #64
+/*
+ * At this point, we expect wPROFILE to be non-zero.  If negative, hotness is disabled or we're
+ * checking for OSR.  If greater than zero, we might have unreported hotness to register
+ * (the difference between the ending wPROFILE and the cached hotness counter).  wPROFILE
+ * should only reach zero immediately after a hotness decrement, and is then reset to either
+ * a negative special state or the new non-zero countdown value.
+ */
+    cmp     wPROFILE, #0
+    bgt     MterpProfileActive                      // if > 0, we may have some counts to report.
+    ldp     fp, lr, [sp, #64]
+    ldp     xPC, xFP, [sp, #48]
+    ldp     xSELF, xINST, [sp, #32]
+    ldp     xIBASE, xREFS, [sp, #16]
+    ldp     xPROFILE, x27, [sp], #80
+    ret
+
+MterpProfileActive:
+    mov     xINST, x0                               // stash return value
+    /* Report cached hotness counts */
+    ldr     x0, [xFP, #OFF_FP_METHOD]
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, xSELF
+    strh    wPROFILE, [x1, #SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET]
+    bl      MterpAddHotnessBatch                    // (method, shadow_frame, self)
+    mov     x0, xINST                               // restore return value
+    ldp     fp, lr, [sp, #64]
+    ldp     xPC, xFP, [sp, #48]
+    ldp     xSELF, xINST, [sp, #32]
+    ldp     xIBASE, xREFS, [sp, #16]
+    ldp     xPROFILE, x27, [sp], #80
     ret
 
     .cfi_endproc
diff --git a/runtime/interpreter/mterp/arm64/header.S b/runtime/interpreter/mterp/arm64/header.S
index 7101ba9..4257200 100644
--- a/runtime/interpreter/mterp/arm64/header.S
+++ b/runtime/interpreter/mterp/arm64/header.S
@@ -74,6 +74,7 @@
   x23  xINST     first 16-bit code unit of current instruction
   x24  xIBASE    interpreted instruction base pointer, used for computed goto
   x25  xREFS     base of object references in shadow frame  (ideally, we'll get rid of this later).
+  x26  wPROFILE  jit profile hotness countdown
   x16  ip        scratch reg
   x17  ip2       scratch reg (used by macros)
 
@@ -92,15 +93,17 @@
 
 /* During bringup, we'll use the shadow frame model instead of xFP */
 /* single-purpose registers, given names for clarity */
-#define xPC     x20
-#define xFP     x21
-#define xSELF   x22
-#define xINST   x23
-#define wINST   w23
-#define xIBASE  x24
-#define xREFS   x25
-#define ip      x16
-#define ip2     x17
+#define xPC      x20
+#define xFP      x21
+#define xSELF    x22
+#define xINST    x23
+#define wINST    w23
+#define xIBASE   x24
+#define xREFS    x25
+#define wPROFILE w26
+#define xPROFILE x26
+#define ip       x16
+#define ip2      x17
 
 /*
  * Instead of holding a pointer to the shadow frame, we keep xFP at the base of the vregs.  So,
@@ -114,7 +117,7 @@
 #define OFF_FP_RESULT_REGISTER OFF_FP(SHADOWFRAME_RESULT_REGISTER_OFFSET)
 #define OFF_FP_DEX_PC_PTR OFF_FP(SHADOWFRAME_DEX_PC_PTR_OFFSET)
 #define OFF_FP_CODE_ITEM OFF_FP(SHADOWFRAME_CODE_ITEM_OFFSET)
-#define OFF_FP_SHADOWFRAME (-SHADOWFRAME_VREGS_OFFSET)
+#define OFF_FP_SHADOWFRAME OFF_FP(0)
 
 /*
  * "export" the PC to dex_pc field in the shadow frame, f/b/o future exception objects.  Must
diff --git a/runtime/interpreter/mterp/arm64/op_goto.S b/runtime/interpreter/mterp/arm64/op_goto.S
index 7e2f6a9..6381e94 100644
--- a/runtime/interpreter/mterp/arm64/op_goto.S
+++ b/runtime/interpreter/mterp/arm64/op_goto.S
@@ -5,21 +5,5 @@
      * double to get a byte offset.
      */
     /* goto +AA */
-    /* tuning: use sbfx for 6t2+ targets */
-    lsl     w0, wINST, #16              // w0<- AAxx0000
-    asr     wINST, w0, #24              // wINST<- ssssssAA (sign-extended)
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    sbfm    x2, xINST, 0, 31
-    bl      MterpProfileBranch          // (self, shadow_frame, offset)
-    cbnz    w0, MterpOnStackReplacement // Note: offset must be in wINST
-#endif
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]  // Preload flags for MterpCheckSuspendAndContinue
-    adds    w1, wINST, wINST            // Convert dalvik offset to byte offset, setting flags
-    FETCH_ADVANCE_INST_RB w1            // load wINST and advance xPC
-       // If backwards branch refresh rIBASE
-    b.mi     MterpCheckSuspendAndContinue
-    GET_INST_OPCODE ip                  // extract opcode from wINST
-    GOTO_OPCODE ip                      // jump to next instruction
+    sbfx    wINST, wINST, #8, #8           // wINST<- ssssssAA (sign-extended)
+    b       MterpCommonTakenBranchNoFlags
diff --git a/runtime/interpreter/mterp/arm64/op_goto_16.S b/runtime/interpreter/mterp/arm64/op_goto_16.S
index b2b9924..fb9a80a 100644
--- a/runtime/interpreter/mterp/arm64/op_goto_16.S
+++ b/runtime/interpreter/mterp/arm64/op_goto_16.S
@@ -6,17 +6,4 @@
      */
     /* goto/16 +AAAA */
     FETCH_S wINST, 1                    // wINST<- ssssAAAA (sign-extended)
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    sbfm    x2, xINST, 0, 31
-    bl      MterpProfileBranch          // (self, shadow_frame, offset)
-    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
-#endif
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w1, wINST, wINST            // w1<- byte offset, flags set
-    FETCH_ADVANCE_INST_RB w1            // update rPC, load rINST
-    b.mi    MterpCheckSuspendAndContinue
-    GET_INST_OPCODE ip                  // extract opcode from rINST
-    GOTO_OPCODE ip                      // jump to next instruction
+    b       MterpCommonTakenBranchNoFlags
diff --git a/runtime/interpreter/mterp/arm64/op_goto_32.S b/runtime/interpreter/mterp/arm64/op_goto_32.S
index b785857..b13cb41 100644
--- a/runtime/interpreter/mterp/arm64/op_goto_32.S
+++ b/runtime/interpreter/mterp/arm64/op_goto_32.S
@@ -13,17 +13,4 @@
     FETCH w0, 1                         // w0<- aaaa (lo)
     FETCH w1, 2                         // w1<- AAAA (hi)
     orr     wINST, w0, w1, lsl #16      // wINST<- AAAAaaaa
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    sbfm    x2, xINST, 0, 31
-    bl      MterpProfileBranch          // (self, shadow_frame, offset)
-    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
-#endif
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w1, wINST, wINST            // w1<- byte offset
-    FETCH_ADVANCE_INST_RB w1            // update rPC, load xINST
-    b.le    MterpCheckSuspendAndContinue
-    GET_INST_OPCODE ip                  // extract opcode from xINST
-    GOTO_OPCODE ip                      // jump to next instruction
+    b       MterpCommonTakenBranchNoFlags
diff --git a/runtime/interpreter/mterp/arm64/op_packed_switch.S b/runtime/interpreter/mterp/arm64/op_packed_switch.S
index e8b4f04..1456f1a 100644
--- a/runtime/interpreter/mterp/arm64/op_packed_switch.S
+++ b/runtime/interpreter/mterp/arm64/op_packed_switch.S
@@ -17,17 +17,4 @@
     add     x0, xPC, w0, lsl #1         // w0<- PC + BBBBbbbb*2
     bl      $func                       // w0<- code-unit branch offset
     sbfm    xINST, x0, 0, 31
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    mov     x2, xINST
-    bl      MterpProfileBranch          // (self, shadow_frame, offset)
-    cbnz    w0, MterpOnStackReplacement
-#endif
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w1, wINST, wINST            // w1<- byte offset; clear V
-    FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
-    b.le    MterpCheckSuspendAndContinue
-    GET_INST_OPCODE ip                  // extract opcode from wINST
-    GOTO_OPCODE ip                      // jump to next instruction
+    b       MterpCommonTakenBranchNoFlags
diff --git a/runtime/interpreter/mterp/arm64/zcmp.S b/runtime/interpreter/mterp/arm64/zcmp.S
index 3f1e1b1..b303e6a 100644
--- a/runtime/interpreter/mterp/arm64/zcmp.S
+++ b/runtime/interpreter/mterp/arm64/zcmp.S
@@ -1,29 +1,17 @@
     /*
-     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic one-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
     lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
-    FETCH_S w1, 1                       // w1<- branch offset, in code units
-    mov     w0, #2                      // Branch offset if not taken
+    FETCH_S wINST, 1                    // w1<- branch offset, in code units
     cmp     w2, #0                      // compare (vA, 0)
-    csel    wINST, w1, w0, ${condition} // Branch if true, stashing result in callee save reg
-#if MTERP_PROFILE_BRANCHES
-    // TUNING: once measurements are complete, remove #if and hand-schedule.
-    EXPORT_PC
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    sbfm    x2, xINST, 0, 31
-    bl      MterpProfileBranch          // (self, shadow_frame, offset)
-    cbnz    w0, MterpOnStackReplacement // Note: offset must be in wINST
-#endif
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w2, wINST, wINST            // convert to bytes & set flags
-    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
-    b.mi    MterpCheckSuspendAndContinue
+    b.${condition} MterpCommonTakenBranchNoFlags
+    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
+    b.eq    .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/mterp.cc b/runtime/interpreter/mterp/mterp.cc
index 10b19c5..1da1181 100644
--- a/runtime/interpreter/mterp/mterp.cc
+++ b/runtime/interpreter/mterp/mterp.cc
@@ -21,6 +21,7 @@
 #include "entrypoints/entrypoint_utils-inl.h"
 #include "mterp.h"
 #include "jit/jit.h"
+#include "jit/jit_instrumentation.h"
 #include "debugger.h"
 
 namespace art {
@@ -432,7 +433,7 @@
 }
 
 extern "C" void MterpCheckBefore(Thread* self, ShadowFrame* shadow_frame)
-  SHARED_REQUIRES(Locks::mutator_lock_) {
+    SHARED_REQUIRES(Locks::mutator_lock_) {
   const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr());
   uint16_t inst_data = inst->Fetch16(0);
   if (inst->Opcode(inst_data) == Instruction::MOVE_EXCEPTION) {
@@ -444,7 +445,7 @@
 }
 
 extern "C" void MterpLogDivideByZeroException(Thread* self, ShadowFrame* shadow_frame)
-  SHARED_REQUIRES(Locks::mutator_lock_) {
+    SHARED_REQUIRES(Locks::mutator_lock_) {
   UNUSED(self);
   const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr());
   uint16_t inst_data = inst->Fetch16(0);
@@ -452,7 +453,7 @@
 }
 
 extern "C" void MterpLogArrayIndexException(Thread* self, ShadowFrame* shadow_frame)
-  SHARED_REQUIRES(Locks::mutator_lock_) {
+    SHARED_REQUIRES(Locks::mutator_lock_) {
   UNUSED(self);
   const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr());
   uint16_t inst_data = inst->Fetch16(0);
@@ -460,7 +461,7 @@
 }
 
 extern "C" void MterpLogNegativeArraySizeException(Thread* self, ShadowFrame* shadow_frame)
-  SHARED_REQUIRES(Locks::mutator_lock_) {
+    SHARED_REQUIRES(Locks::mutator_lock_) {
   UNUSED(self);
   const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr());
   uint16_t inst_data = inst->Fetch16(0);
@@ -468,7 +469,7 @@
 }
 
 extern "C" void MterpLogNoSuchMethodException(Thread* self, ShadowFrame* shadow_frame)
-  SHARED_REQUIRES(Locks::mutator_lock_) {
+    SHARED_REQUIRES(Locks::mutator_lock_) {
   UNUSED(self);
   const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr());
   uint16_t inst_data = inst->Fetch16(0);
@@ -476,7 +477,7 @@
 }
 
 extern "C" void MterpLogExceptionThrownException(Thread* self, ShadowFrame* shadow_frame)
-  SHARED_REQUIRES(Locks::mutator_lock_) {
+    SHARED_REQUIRES(Locks::mutator_lock_) {
   UNUSED(self);
   const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr());
   uint16_t inst_data = inst->Fetch16(0);
@@ -484,7 +485,7 @@
 }
 
 extern "C" void MterpLogNullObjectException(Thread* self, ShadowFrame* shadow_frame)
-  SHARED_REQUIRES(Locks::mutator_lock_) {
+    SHARED_REQUIRES(Locks::mutator_lock_) {
   UNUSED(self);
   const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr());
   uint16_t inst_data = inst->Fetch16(0);
@@ -492,7 +493,7 @@
 }
 
 extern "C" void MterpLogFallback(Thread* self, ShadowFrame* shadow_frame)
-  SHARED_REQUIRES(Locks::mutator_lock_) {
+    SHARED_REQUIRES(Locks::mutator_lock_) {
   UNUSED(self);
   const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr());
   uint16_t inst_data = inst->Fetch16(0);
@@ -501,7 +502,7 @@
 }
 
 extern "C" void MterpLogOSR(Thread* self, ShadowFrame* shadow_frame, int32_t offset)
-  SHARED_REQUIRES(Locks::mutator_lock_) {
+    SHARED_REQUIRES(Locks::mutator_lock_) {
   UNUSED(self);
   const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr());
   uint16_t inst_data = inst->Fetch16(0);
@@ -509,7 +510,7 @@
 }
 
 extern "C" void MterpLogSuspendFallback(Thread* self, ShadowFrame* shadow_frame, uint32_t flags)
-  SHARED_REQUIRES(Locks::mutator_lock_) {
+    SHARED_REQUIRES(Locks::mutator_lock_) {
   UNUSED(self);
   const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr());
   uint16_t inst_data = inst->Fetch16(0);
@@ -521,7 +522,7 @@
 }
 
 extern "C" bool MterpSuspendCheck(Thread* self)
-  SHARED_REQUIRES(Locks::mutator_lock_) {
+    SHARED_REQUIRES(Locks::mutator_lock_) {
   self->AllowThreadSuspension();
   return MterpShouldSwitchInterpreters();
 }
@@ -617,7 +618,7 @@
 }
 
 extern "C" mirror::Object* artAGetObjectFromMterp(mirror::Object* arr, int32_t index)
-  SHARED_REQUIRES(Locks::mutator_lock_) {
+    SHARED_REQUIRES(Locks::mutator_lock_) {
   if (UNLIKELY(arr == nullptr)) {
     ThrowNullPointerExceptionFromInterpreter();
     return nullptr;
@@ -631,7 +632,7 @@
 }
 
 extern "C" mirror::Object* artIGetObjectFromMterp(mirror::Object* obj, uint32_t field_offset)
-  SHARED_REQUIRES(Locks::mutator_lock_) {
+    SHARED_REQUIRES(Locks::mutator_lock_) {
   if (UNLIKELY(obj == nullptr)) {
     ThrowNullPointerExceptionFromInterpreter();
     return nullptr;
@@ -639,13 +640,90 @@
   return obj->GetFieldObject<mirror::Object>(MemberOffset(field_offset));
 }
 
+/*
+ * Create a hotness_countdown based on the current method hotness_count and profiling
+ * mode.  In short, determine how many hotness events we hit before reporting back
+ * to the full instrumentation via MterpAddHotnessBatch.  Called once on entry to the method,
+ * and regenerated following batch updates.
+ */
+extern "C" int MterpSetUpHotnessCountdown(ArtMethod* method, ShadowFrame* shadow_frame)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  uint16_t hotness_count = method->GetCounter();
+  int32_t countdown_value = jit::kJitHotnessDisabled;
+  jit::Jit* jit = Runtime::Current()->GetJit();
+  if (jit != nullptr) {
+    jit::JitInstrumentationCache* cache = jit->GetInstrumentationCache();
+    int32_t warm_threshold = cache->WarmMethodThreshold();
+    int32_t hot_threshold = cache->HotMethodThreshold();
+    int32_t osr_threshold = cache->OSRMethodThreshold();
+    if (hotness_count < warm_threshold) {
+      countdown_value = warm_threshold - hotness_count;
+    } else if (hotness_count < hot_threshold) {
+      countdown_value = hot_threshold - hotness_count;
+    } else if (hotness_count < osr_threshold) {
+      countdown_value = osr_threshold - hotness_count;
+    } else {
+      countdown_value = jit::kJitCheckForOSR;
+    }
+  }
+  /*
+   * The actual hotness threshold may exceed the range of our int16_t countdown value.  This is
+   * not a problem, though.  We can just break it down into smaller chunks.
+   */
+  countdown_value = std::min(countdown_value,
+                             static_cast<int32_t>(std::numeric_limits<int16_t>::max()));
+  shadow_frame->SetCachedHotnessCountdown(countdown_value);
+  shadow_frame->SetHotnessCountdown(countdown_value);
+  return countdown_value;
+}
+
+/*
+ * Report a batch of hotness events to the instrumentation and then return the new
+ * countdown value to the next time we should report.
+ */
+extern "C" int16_t MterpAddHotnessBatch(ArtMethod* method,
+                                        ShadowFrame* shadow_frame,
+                                        Thread* self)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  jit::Jit* jit = Runtime::Current()->GetJit();
+  if (jit != nullptr) {
+    int16_t count = shadow_frame->GetCachedHotnessCountdown() - shadow_frame->GetHotnessCountdown();
+    jit->GetInstrumentationCache()->AddSamples(self, method, count);
+  }
+  return MterpSetUpHotnessCountdown(method, shadow_frame);
+}
+
+// TUNING: Unused by arm/arm64.  Remove when x86/x86_64/mips/mips64 mterps support batch updates.
 extern "C" bool  MterpProfileBranch(Thread* self, ShadowFrame* shadow_frame, int32_t offset)
-  SHARED_REQUIRES(Locks::mutator_lock_) {
+    SHARED_REQUIRES(Locks::mutator_lock_) {
   ArtMethod* method = shadow_frame->GetMethod();
   JValue* result = shadow_frame->GetResultRegister();
   uint32_t dex_pc = shadow_frame->GetDexPC();
-  const auto* const instrumentation = Runtime::Current()->GetInstrumentation();
-  instrumentation->Branch(self, method, dex_pc, offset);
+  jit::Jit* jit = Runtime::Current()->GetJit();
+  if ((jit != nullptr) && (offset <= 0)) {
+    jit->GetInstrumentationCache()->AddSamples(self, method, 1);
+  }
+  int16_t countdown_value = MterpSetUpHotnessCountdown(method, shadow_frame);
+  if (countdown_value == jit::kJitCheckForOSR) {
+    return jit::Jit::MaybeDoOnStackReplacement(self, method, dex_pc, offset, result);
+  } else {
+    return false;
+  }
+}
+
+extern "C" bool MterpMaybeDoOnStackReplacement(Thread* self,
+                                               ShadowFrame* shadow_frame,
+                                               int32_t offset)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  ArtMethod* method = shadow_frame->GetMethod();
+  JValue* result = shadow_frame->GetResultRegister();
+  uint32_t dex_pc = shadow_frame->GetDexPC();
+  jit::Jit* jit = Runtime::Current()->GetJit();
+  if (offset <= 0) {
+    // Keep updating hotness in case a compilation request was dropped.  Eventually it will retry.
+    jit->GetInstrumentationCache()->AddSamples(self, method, 1);
+  }
+  // Assumes caller has already determined that an OSR check is appropriate.
   return jit::Jit::MaybeDoOnStackReplacement(self, method, dex_pc, offset, result);
 }
 
diff --git a/runtime/interpreter/mterp/out/mterp_arm.S b/runtime/interpreter/mterp/out/mterp_arm.S
index 092474d..a38a87b 100644
--- a/runtime/interpreter/mterp/out/mterp_arm.S
+++ b/runtime/interpreter/mterp/out/mterp_arm.S
@@ -79,7 +79,8 @@
   r6  rSELF     self (Thread) pointer
   r7  rINST     first 16-bit code unit of current instruction
   r8  rIBASE    interpreted instruction base pointer, used for computed goto
-  r11 rREFS	base of object references in shadow frame  (ideally, we'll get rid of this later).
+  r10 rPROFILE  branch profiling countdown
+  r11 rREFS     base of object references in shadow frame  (ideally, we'll get rid of this later).
 
 Macros are provided for common operations.  Each macro MUST emit only
 one instruction to make instruction-counting easier.  They MUST NOT alter
@@ -97,12 +98,13 @@
 
 /* During bringup, we'll use the shadow frame model instead of rFP */
 /* single-purpose registers, given names for clarity */
-#define rPC     r4
-#define rFP     r5
-#define rSELF   r6
-#define rINST   r7
-#define rIBASE  r8
-#define rREFS   r11
+#define rPC      r4
+#define rFP      r5
+#define rSELF    r6
+#define rINST    r7
+#define rIBASE   r8
+#define rPROFILE r10
+#define rREFS    r11
 
 /*
  * Instead of holding a pointer to the shadow frame, we keep rFP at the base of the vregs.  So,
@@ -116,7 +118,7 @@
 #define OFF_FP_RESULT_REGISTER OFF_FP(SHADOWFRAME_RESULT_REGISTER_OFFSET)
 #define OFF_FP_DEX_PC_PTR OFF_FP(SHADOWFRAME_DEX_PC_PTR_OFFSET)
 #define OFF_FP_CODE_ITEM OFF_FP(SHADOWFRAME_CODE_ITEM_OFFSET)
-#define OFF_FP_SHADOWFRAME (-SHADOWFRAME_VREGS_OFFSET)
+#define OFF_FP_SHADOWFRAME OFF_FP(0)
 
 /*
  * "export" the PC to dex_pc field in the shadow frame, f/b/o future exception objects.  Must
@@ -329,10 +331,8 @@
 
 ExecuteMterpImpl:
     .fnstart
-    .save {r4-r10,fp,lr}
-    stmfd   sp!, {r4-r10,fp,lr}         @ save 9 regs
-    .pad    #4
-    sub     sp, sp, #4                  @ align 64
+    .save {r3-r10,fp,lr}
+    stmfd   sp!, {r3-r10,fp,lr}         @ save 10 regs, (r3 just to align 64)
 
     /* Remember the return register */
     str     r3, [r2, #SHADOWFRAME_RESULT_REGISTER_OFFSET]
@@ -353,6 +353,12 @@
     /* Starting ibase */
     ldr     rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]
 
+    /* Set up for backwards branches & osr profiling */
+    ldr     r0, [rFP, #OFF_FP_METHOD]
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    bl      MterpSetUpHotnessCountdown
+    mov     rPROFILE, r0                @ Starting hotness countdown to rPROFILE
+
     /* start executing the instruction at rPC */
     FETCH_INST                          @ load rINST from rPC
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -1103,35 +1109,8 @@
      * double to get a byte offset.
      */
     /* goto +AA */
-    /* tuning: use sbfx for 6t2+ targets */
-#if MTERP_PROFILE_BRANCHES
-    mov     r0, rINST, lsl #16          @ r0<- AAxx0000
-    movs    rINST, r0, asr #24          @ rINST<- ssssssAA (sign-extended)
-    EXPORT_PC
-    mov     r0, rSELF
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    mov     r2, rINST
-    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
-    cmp     r0, #0
-    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    adds    r2, rINST, rINST            @ r2<- byte offset, set flags
-    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
-       @ If backwards branch refresh rIBASE
-    bmi     MterpCheckSuspendAndContinue
-    GET_INST_OPCODE ip                  @ extract opcode from rINST
-    GOTO_OPCODE ip                      @ jump to next instruction
-#else
-    mov     r0, rINST, lsl #16          @ r0<- AAxx0000
-    movs    rINST, r0, asr #24          @ rINST<- ssssssAA (sign-extended)
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    adds    r2, rINST, rINST            @ r2<- byte offset, set flags
-    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
-       @ If backwards branch refresh rIBASE
-    bmi     MterpCheckSuspendAndContinue
-    GET_INST_OPCODE ip                  @ extract opcode from rINST
-    GOTO_OPCODE ip                      @ jump to next instruction
-#endif
+    sbfx    rINST, rINST, #8, #8           @ rINST<- ssssssAA (sign-extended)
+    b       MterpCommonTakenBranchNoFlags
 
 /* ------------------------------ */
     .balign 128
@@ -1144,30 +1123,8 @@
      * double to get a byte offset.
      */
     /* goto/16 +AAAA */
-#if MTERP_PROFILE_BRANCHES
     FETCH_S rINST, 1                    @ rINST<- ssssAAAA (sign-extended)
-    EXPORT_PC
-    mov     r0, rSELF
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    mov     r2, rINST
-    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
-    cmp     r0, #0
-    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    adds    r1, rINST, rINST            @ r1<- byte offset, flags set
-    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    bmi     MterpCheckSuspendAndContinue
-    GET_INST_OPCODE ip                  @ extract opcode from rINST
-    GOTO_OPCODE ip                      @ jump to next instruction
-#else
-    FETCH_S rINST, 1                    @ rINST<- ssssAAAA (sign-extended)
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    adds    r1, rINST, rINST            @ r1<- byte offset, flags set
-    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    bmi     MterpCheckSuspendAndContinue
-    GET_INST_OPCODE ip                  @ extract opcode from rINST
-    GOTO_OPCODE ip                      @ jump to next instruction
-#endif
+    b       MterpCommonTakenBranchNoFlags
 
 /* ------------------------------ */
     .balign 128
@@ -1185,34 +1142,10 @@
      * offset to byte offset.
      */
     /* goto/32 +AAAAAAAA */
-#if MTERP_PROFILE_BRANCHES
     FETCH r0, 1                         @ r0<- aaaa (lo)
-    FETCH r1, 2                         @ r1<- AAAA (hi)
-    orr     rINST, r0, r1, lsl #16      @ rINST<- AAAAaaaa
-    EXPORT_PC
-    mov     r0, rSELF
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    mov     r2, rINST
-    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
-    cmp     r0, #0
-    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    adds    r1, rINST, rINST            @ r1<- byte offset
-    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    ble     MterpCheckSuspendAndContinue
-    GET_INST_OPCODE ip                  @ extract opcode from rINST
-    GOTO_OPCODE ip                      @ jump to next instruction
-#else
-    FETCH r0, 1                         @ r0<- aaaa (lo)
-    FETCH r1, 2                         @ r1<- AAAA (hi)
-    orr     rINST, r0, r1, lsl #16      @ rINST<- AAAAaaaa
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    adds    r1, rINST, rINST            @ r1<- byte offset
-    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    ble     MterpCheckSuspendAndContinue
-    GET_INST_OPCODE ip                  @ extract opcode from rINST
-    GOTO_OPCODE ip                      @ jump to next instruction
-#endif
+    FETCH r3, 2                         @ r1<- AAAA (hi)
+    orrs    rINST, r0, r3, lsl #16      @ rINST<- AAAAaaaa
+    b       MterpCommonTakenBranch
 
 /* ------------------------------ */
     .balign 128
@@ -1228,7 +1161,6 @@
      * for: packed-switch, sparse-switch
      */
     /* op vAA, +BBBB */
-#if MTERP_PROFILE_BRANCHES
     FETCH r0, 1                         @ r0<- bbbb (lo)
     FETCH r1, 2                         @ r1<- BBBB (hi)
     mov     r3, rINST, lsr #8           @ r3<- AA
@@ -1236,36 +1168,8 @@
     GET_VREG r1, r3                     @ r1<- vAA
     add     r0, rPC, r0, lsl #1         @ r0<- PC + BBBBbbbb*2
     bl      MterpDoPackedSwitch                       @ r0<- code-unit branch offset
-    mov     rINST, r0
-    EXPORT_PC
-    mov     r0, rSELF
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    mov     r2, rINST
-    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
-    cmp     r0, #0
-    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    adds    r1, rINST, rINST            @ r1<- byte offset; clear V
-    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    ble     MterpCheckSuspendAndContinue
-    GET_INST_OPCODE ip                  @ extract opcode from rINST
-    GOTO_OPCODE ip                      @ jump to next instruction
-#else
-    FETCH r0, 1                         @ r0<- bbbb (lo)
-    FETCH r1, 2                         @ r1<- BBBB (hi)
-    mov     r3, rINST, lsr #8           @ r3<- AA
-    orr     r0, r0, r1, lsl #16         @ r0<- BBBBbbbb
-    GET_VREG r1, r3                     @ r1<- vAA
-    add     r0, rPC, r0, lsl #1         @ r0<- PC + BBBBbbbb*2
-    bl      MterpDoPackedSwitch                       @ r0<- code-unit branch offset
-    mov     rINST, r0
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    adds    r1, rINST, rINST            @ r1<- byte offset; clear V
-    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    ble     MterpCheckSuspendAndContinue
-    GET_INST_OPCODE ip                  @ extract opcode from rINST
-    GOTO_OPCODE ip                      @ jump to next instruction
-#endif
+    movs    rINST, r0
+    b       MterpCommonTakenBranch
 
 /* ------------------------------ */
     .balign 128
@@ -1282,7 +1186,6 @@
      * for: packed-switch, sparse-switch
      */
     /* op vAA, +BBBB */
-#if MTERP_PROFILE_BRANCHES
     FETCH r0, 1                         @ r0<- bbbb (lo)
     FETCH r1, 2                         @ r1<- BBBB (hi)
     mov     r3, rINST, lsr #8           @ r3<- AA
@@ -1290,36 +1193,8 @@
     GET_VREG r1, r3                     @ r1<- vAA
     add     r0, rPC, r0, lsl #1         @ r0<- PC + BBBBbbbb*2
     bl      MterpDoSparseSwitch                       @ r0<- code-unit branch offset
-    mov     rINST, r0
-    EXPORT_PC
-    mov     r0, rSELF
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    mov     r2, rINST
-    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
-    cmp     r0, #0
-    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    adds    r1, rINST, rINST            @ r1<- byte offset; clear V
-    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    ble     MterpCheckSuspendAndContinue
-    GET_INST_OPCODE ip                  @ extract opcode from rINST
-    GOTO_OPCODE ip                      @ jump to next instruction
-#else
-    FETCH r0, 1                         @ r0<- bbbb (lo)
-    FETCH r1, 2                         @ r1<- BBBB (hi)
-    mov     r3, rINST, lsr #8           @ r3<- AA
-    orr     r0, r0, r1, lsl #16         @ r0<- BBBBbbbb
-    GET_VREG r1, r3                     @ r1<- vAA
-    add     r0, rPC, r0, lsl #1         @ r0<- PC + BBBBbbbb*2
-    bl      MterpDoSparseSwitch                       @ r0<- code-unit branch offset
-    mov     rINST, r0
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    adds    r1, rINST, rINST            @ r1<- byte offset; clear V
-    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    ble     MterpCheckSuspendAndContinue
-    GET_INST_OPCODE ip                  @ extract opcode from rINST
-    GOTO_OPCODE ip                      @ jump to next instruction
-#endif
+    movs    rINST, r0
+    b       MterpCommonTakenBranch
 
 
 /* ------------------------------ */
@@ -1485,22 +1360,6 @@
     /*
      * Compare two 64-bit values.  Puts 0, 1, or -1 into the destination
      * register based on the results of the comparison.
-     *
-     * We load the full values with LDM, but in practice many values could
-     * be resolved by only looking at the high word.  This could be made
-     * faster or slower by splitting the LDM into a pair of LDRs.
-     *
-     * If we just wanted to set condition flags, we could do this:
-     *  subs    ip, r0, r2
-     *  sbcs    ip, r1, r3
-     *  subeqs  ip, r0, r2
-     * Leaving { <0, 0, >0 } in ip.  However, we have to set it to a specific
-     * integer value, which we can do with 2 conditional mov/mvn instructions
-     * (set 1, set -1; if they're equal we already have 0 in ip), giving
-     * us a constant 5-cycle path plus a branch at the end to the
-     * instruction epilogue code.  The multi-compare approach below needs
-     * 2 or 3 cycles + branch if the high word doesn't match, 6 + branch
-     * in the worst case (the 64-bit values are equal).
      */
     /* cmp-long vAA, vBB, vCC */
     FETCH r0, 1                         @ r0<- CCBB
@@ -1511,13 +1370,16 @@
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[CC]
     ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
     ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
-    cmp     r1, r3                      @ compare (vBB+1, vCC+1)
-    blt     .Lop_cmp_long_less            @ signed compare on high part
-    bgt     .Lop_cmp_long_greater
-    subs    r1, r0, r2                  @ r1<- r0 - r2
-    bhi     .Lop_cmp_long_greater         @ unsigned compare on low part
-    bne     .Lop_cmp_long_less
-    b       .Lop_cmp_long_finish          @ equal; r1 already holds 0
+    cmp     r0, r2
+    sbcs    ip, r1, r3                  @ Sets correct CCs for checking LT (but not EQ/NE)
+    mov     ip, #0
+    mvnlt   ip, #0                      @ -1
+    cmpeq   r0, r2                      @ For correct EQ/NE, we may need to repeat the first CMP
+    orrne   ip, #1
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    SET_VREG ip, r9                     @ vAA<- ip
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
 
 /* ------------------------------ */
     .balign 128
@@ -1525,9 +1387,8 @@
 /* File: arm/op_if_eq.S */
 /* File: arm/bincmp.S */
     /*
-     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic two-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
@@ -1535,24 +1396,13 @@
     mov     r1, rINST, lsr #12          @ r1<- B
     ubfx    r0, rINST, #8, #4           @ r0<- A
     GET_VREG r3, r1                     @ r3<- vB
-    GET_VREG r2, r0                     @ r2<- vA
+    GET_VREG r0, r0                     @ r0<- vA
     FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
-    cmp     r2, r3                      @ compare (vA, vB)
-    movne rINST, #2
-#if MTERP_PROFILE_BRANCHES
-    @ TUNING: once measurements are complete, remove #if and hand-schedule.
-    EXPORT_PC
-    mov     r0, rSELF
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    mov     r2, rINST
-    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
-    cmp     r0, #0
-    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
-#endif
-    adds    r2, rINST, rINST            @ convert to bytes, check sign
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
-    bmi     MterpCheckSuspendAndContinue
+    cmp     r0, r3                      @ compare (vA, vB)
+    beq MterpCommonTakenBranchNoFlags
+    cmp     rPROFILE, #JIT_CHECK_OSR    @ possible OSR re-entry?
+    beq     .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 
@@ -1563,9 +1413,8 @@
 /* File: arm/op_if_ne.S */
 /* File: arm/bincmp.S */
     /*
-     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic two-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
@@ -1573,24 +1422,13 @@
     mov     r1, rINST, lsr #12          @ r1<- B
     ubfx    r0, rINST, #8, #4           @ r0<- A
     GET_VREG r3, r1                     @ r3<- vB
-    GET_VREG r2, r0                     @ r2<- vA
+    GET_VREG r0, r0                     @ r0<- vA
     FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
-    cmp     r2, r3                      @ compare (vA, vB)
-    moveq rINST, #2
-#if MTERP_PROFILE_BRANCHES
-    @ TUNING: once measurements are complete, remove #if and hand-schedule.
-    EXPORT_PC
-    mov     r0, rSELF
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    mov     r2, rINST
-    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
-    cmp     r0, #0
-    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
-#endif
-    adds    r2, rINST, rINST            @ convert to bytes, check sign
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
-    bmi     MterpCheckSuspendAndContinue
+    cmp     r0, r3                      @ compare (vA, vB)
+    bne MterpCommonTakenBranchNoFlags
+    cmp     rPROFILE, #JIT_CHECK_OSR    @ possible OSR re-entry?
+    beq     .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 
@@ -1601,9 +1439,8 @@
 /* File: arm/op_if_lt.S */
 /* File: arm/bincmp.S */
     /*
-     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic two-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
@@ -1611,24 +1448,13 @@
     mov     r1, rINST, lsr #12          @ r1<- B
     ubfx    r0, rINST, #8, #4           @ r0<- A
     GET_VREG r3, r1                     @ r3<- vB
-    GET_VREG r2, r0                     @ r2<- vA
+    GET_VREG r0, r0                     @ r0<- vA
     FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
-    cmp     r2, r3                      @ compare (vA, vB)
-    movge rINST, #2
-#if MTERP_PROFILE_BRANCHES
-    @ TUNING: once measurements are complete, remove #if and hand-schedule.
-    EXPORT_PC
-    mov     r0, rSELF
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    mov     r2, rINST
-    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
-    cmp     r0, #0
-    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
-#endif
-    adds    r2, rINST, rINST            @ convert to bytes, check sign
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
-    bmi     MterpCheckSuspendAndContinue
+    cmp     r0, r3                      @ compare (vA, vB)
+    blt MterpCommonTakenBranchNoFlags
+    cmp     rPROFILE, #JIT_CHECK_OSR    @ possible OSR re-entry?
+    beq     .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 
@@ -1639,9 +1465,8 @@
 /* File: arm/op_if_ge.S */
 /* File: arm/bincmp.S */
     /*
-     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic two-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
@@ -1649,24 +1474,13 @@
     mov     r1, rINST, lsr #12          @ r1<- B
     ubfx    r0, rINST, #8, #4           @ r0<- A
     GET_VREG r3, r1                     @ r3<- vB
-    GET_VREG r2, r0                     @ r2<- vA
+    GET_VREG r0, r0                     @ r0<- vA
     FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
-    cmp     r2, r3                      @ compare (vA, vB)
-    movlt rINST, #2
-#if MTERP_PROFILE_BRANCHES
-    @ TUNING: once measurements are complete, remove #if and hand-schedule.
-    EXPORT_PC
-    mov     r0, rSELF
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    mov     r2, rINST
-    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
-    cmp     r0, #0
-    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
-#endif
-    adds    r2, rINST, rINST            @ convert to bytes, check sign
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
-    bmi     MterpCheckSuspendAndContinue
+    cmp     r0, r3                      @ compare (vA, vB)
+    bge MterpCommonTakenBranchNoFlags
+    cmp     rPROFILE, #JIT_CHECK_OSR    @ possible OSR re-entry?
+    beq     .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 
@@ -1677,9 +1491,8 @@
 /* File: arm/op_if_gt.S */
 /* File: arm/bincmp.S */
     /*
-     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic two-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
@@ -1687,24 +1500,13 @@
     mov     r1, rINST, lsr #12          @ r1<- B
     ubfx    r0, rINST, #8, #4           @ r0<- A
     GET_VREG r3, r1                     @ r3<- vB
-    GET_VREG r2, r0                     @ r2<- vA
+    GET_VREG r0, r0                     @ r0<- vA
     FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
-    cmp     r2, r3                      @ compare (vA, vB)
-    movle rINST, #2
-#if MTERP_PROFILE_BRANCHES
-    @ TUNING: once measurements are complete, remove #if and hand-schedule.
-    EXPORT_PC
-    mov     r0, rSELF
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    mov     r2, rINST
-    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
-    cmp     r0, #0
-    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
-#endif
-    adds    r2, rINST, rINST            @ convert to bytes, check sign
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
-    bmi     MterpCheckSuspendAndContinue
+    cmp     r0, r3                      @ compare (vA, vB)
+    bgt MterpCommonTakenBranchNoFlags
+    cmp     rPROFILE, #JIT_CHECK_OSR    @ possible OSR re-entry?
+    beq     .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 
@@ -1715,9 +1517,8 @@
 /* File: arm/op_if_le.S */
 /* File: arm/bincmp.S */
     /*
-     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic two-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
@@ -1725,24 +1526,13 @@
     mov     r1, rINST, lsr #12          @ r1<- B
     ubfx    r0, rINST, #8, #4           @ r0<- A
     GET_VREG r3, r1                     @ r3<- vB
-    GET_VREG r2, r0                     @ r2<- vA
+    GET_VREG r0, r0                     @ r0<- vA
     FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
-    cmp     r2, r3                      @ compare (vA, vB)
-    movgt rINST, #2
-#if MTERP_PROFILE_BRANCHES
-    @ TUNING: once measurements are complete, remove #if and hand-schedule.
-    EXPORT_PC
-    mov     r0, rSELF
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    mov     r2, rINST
-    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
-    cmp     r0, #0
-    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
-#endif
-    adds    r2, rINST, rINST            @ convert to bytes, check sign
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
-    bmi     MterpCheckSuspendAndContinue
+    cmp     r0, r3                      @ compare (vA, vB)
+    ble MterpCommonTakenBranchNoFlags
+    cmp     rPROFILE, #JIT_CHECK_OSR    @ possible OSR re-entry?
+    beq     .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 
@@ -1753,32 +1543,20 @@
 /* File: arm/op_if_eqz.S */
 /* File: arm/zcmp.S */
     /*
-     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic one-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
     mov     r0, rINST, lsr #8           @ r0<- AA
-    GET_VREG r2, r0                     @ r2<- vAA
+    GET_VREG r0, r0                     @ r0<- vAA
     FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    cmp     r2, #0                      @ compare (vA, 0)
-    movne rINST, #2
-#if MTERP_PROFILE_BRANCHES
-    @ TUNING: once measurements are complete, remove #if and hand-schedule.
-    EXPORT_PC
-    mov     r0, rSELF
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    mov     r2, rINST
-    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
-    cmp     r0, #0
-    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
-#endif
-    adds    r1, rINST, rINST            @ convert to bytes & set flags
-    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    bmi     MterpCheckSuspendAndContinue
+    cmp     r0, #0                      @ compare (vA, 0)
+    beq MterpCommonTakenBranchNoFlags
+    cmp     rPROFILE, #JIT_CHECK_OSR    @ possible OSR re-entry?
+    beq     .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 
@@ -1789,32 +1567,20 @@
 /* File: arm/op_if_nez.S */
 /* File: arm/zcmp.S */
     /*
-     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic one-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
     mov     r0, rINST, lsr #8           @ r0<- AA
-    GET_VREG r2, r0                     @ r2<- vAA
+    GET_VREG r0, r0                     @ r0<- vAA
     FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    cmp     r2, #0                      @ compare (vA, 0)
-    moveq rINST, #2
-#if MTERP_PROFILE_BRANCHES
-    @ TUNING: once measurements are complete, remove #if and hand-schedule.
-    EXPORT_PC
-    mov     r0, rSELF
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    mov     r2, rINST
-    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
-    cmp     r0, #0
-    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
-#endif
-    adds    r1, rINST, rINST            @ convert to bytes & set flags
-    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    bmi     MterpCheckSuspendAndContinue
+    cmp     r0, #0                      @ compare (vA, 0)
+    bne MterpCommonTakenBranchNoFlags
+    cmp     rPROFILE, #JIT_CHECK_OSR    @ possible OSR re-entry?
+    beq     .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 
@@ -1825,32 +1591,20 @@
 /* File: arm/op_if_ltz.S */
 /* File: arm/zcmp.S */
     /*
-     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic one-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
     mov     r0, rINST, lsr #8           @ r0<- AA
-    GET_VREG r2, r0                     @ r2<- vAA
+    GET_VREG r0, r0                     @ r0<- vAA
     FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    cmp     r2, #0                      @ compare (vA, 0)
-    movge rINST, #2
-#if MTERP_PROFILE_BRANCHES
-    @ TUNING: once measurements are complete, remove #if and hand-schedule.
-    EXPORT_PC
-    mov     r0, rSELF
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    mov     r2, rINST
-    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
-    cmp     r0, #0
-    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
-#endif
-    adds    r1, rINST, rINST            @ convert to bytes & set flags
-    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    bmi     MterpCheckSuspendAndContinue
+    cmp     r0, #0                      @ compare (vA, 0)
+    blt MterpCommonTakenBranchNoFlags
+    cmp     rPROFILE, #JIT_CHECK_OSR    @ possible OSR re-entry?
+    beq     .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 
@@ -1861,32 +1615,20 @@
 /* File: arm/op_if_gez.S */
 /* File: arm/zcmp.S */
     /*
-     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic one-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
     mov     r0, rINST, lsr #8           @ r0<- AA
-    GET_VREG r2, r0                     @ r2<- vAA
+    GET_VREG r0, r0                     @ r0<- vAA
     FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    cmp     r2, #0                      @ compare (vA, 0)
-    movlt rINST, #2
-#if MTERP_PROFILE_BRANCHES
-    @ TUNING: once measurements are complete, remove #if and hand-schedule.
-    EXPORT_PC
-    mov     r0, rSELF
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    mov     r2, rINST
-    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
-    cmp     r0, #0
-    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
-#endif
-    adds    r1, rINST, rINST            @ convert to bytes & set flags
-    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    bmi     MterpCheckSuspendAndContinue
+    cmp     r0, #0                      @ compare (vA, 0)
+    bge MterpCommonTakenBranchNoFlags
+    cmp     rPROFILE, #JIT_CHECK_OSR    @ possible OSR re-entry?
+    beq     .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 
@@ -1897,32 +1639,20 @@
 /* File: arm/op_if_gtz.S */
 /* File: arm/zcmp.S */
     /*
-     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic one-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
     mov     r0, rINST, lsr #8           @ r0<- AA
-    GET_VREG r2, r0                     @ r2<- vAA
+    GET_VREG r0, r0                     @ r0<- vAA
     FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    cmp     r2, #0                      @ compare (vA, 0)
-    movle rINST, #2
-#if MTERP_PROFILE_BRANCHES
-    @ TUNING: once measurements are complete, remove #if and hand-schedule.
-    EXPORT_PC
-    mov     r0, rSELF
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    mov     r2, rINST
-    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
-    cmp     r0, #0
-    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
-#endif
-    adds    r1, rINST, rINST            @ convert to bytes & set flags
-    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    bmi     MterpCheckSuspendAndContinue
+    cmp     r0, #0                      @ compare (vA, 0)
+    bgt MterpCommonTakenBranchNoFlags
+    cmp     rPROFILE, #JIT_CHECK_OSR    @ possible OSR re-entry?
+    beq     .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 
@@ -1933,32 +1663,20 @@
 /* File: arm/op_if_lez.S */
 /* File: arm/zcmp.S */
     /*
-     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic one-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
     mov     r0, rINST, lsr #8           @ r0<- AA
-    GET_VREG r2, r0                     @ r2<- vAA
+    GET_VREG r0, r0                     @ r0<- vAA
     FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    cmp     r2, #0                      @ compare (vA, 0)
-    movgt rINST, #2
-#if MTERP_PROFILE_BRANCHES
-    @ TUNING: once measurements are complete, remove #if and hand-schedule.
-    EXPORT_PC
-    mov     r0, rSELF
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    mov     r2, rINST
-    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
-    cmp     r0, #0
-    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
-#endif
-    adds    r1, rINST, rINST            @ convert to bytes & set flags
-    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    bmi     MterpCheckSuspendAndContinue
+    cmp     r0, #0                      @ compare (vA, 0)
+    ble MterpCommonTakenBranchNoFlags
+    cmp     rPROFILE, #JIT_CHECK_OSR    @ possible OSR re-entry?
+    beq     .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 
@@ -4711,15 +4429,15 @@
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[CC]
     ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
     ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
-    mul     ip, r2, r1                  @  ip<- ZxW
-    umull   r9, r10, r2, r0             @  r9/r10 <- ZxX
-    mla     r2, r0, r3, ip              @  r2<- YxX + (ZxW)
+    mul     ip, r2, r1                  @ ip<- ZxW
+    umull   r1, lr, r2, r0              @ r1/lr <- ZxX
+    mla     r2, r0, r3, ip              @ r2<- YxX + (ZxW)
     mov     r0, rINST, lsr #8           @ r0<- AA
-    add     r10, r2, r10                @  r10<- r10 + low(ZxW + (YxX))
+    add     r2, r2, lr                  @ r2<- lr + low(ZxW + (YxX))
     VREG_INDEX_TO_ADDR r0, r0           @ r0<- &fp[AA]
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
     GET_INST_OPCODE ip                  @ extract opcode from rINST
-    stmia   r0, {r9-r10}                @ vAA/vAA+1<- r9/r10
+    stmia   r0, {r1-r2 }                @ vAA/vAA+1<- r1/r2
     GOTO_OPCODE ip                      @ jump to next instruction
 
 /* ------------------------------ */
@@ -5877,14 +5595,14 @@
     VREG_INDEX_TO_ADDR rINST, r9        @ rINST<- &fp[A]
     ldmia   r1, {r2-r3}                 @ r2/r3<- vBB/vBB+1
     ldmia   rINST, {r0-r1}              @ r0/r1<- vAA/vAA+1
-    mul     ip, r2, r1                  @  ip<- ZxW
-    umull   r9, r10, r2, r0             @  r9/r10 <- ZxX
-    mla     r2, r0, r3, ip              @  r2<- YxX + (ZxW)
+    mul     ip, r2, r1                  @ ip<- ZxW
+    umull   r1, lr, r2, r0              @ r1/lr <- ZxX
+    mla     r2, r0, r3, ip              @ r2<- YxX + (ZxW)
     mov     r0, rINST                   @ r0<- &fp[A] (free up rINST)
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
-    add     r10, r2, r10                @  r10<- r10 + low(ZxW + (YxX))
+    add     r2, r2, lr                  @ r2<- r2 + low(ZxW + (YxX))
     GET_INST_OPCODE ip                  @ extract opcode from rINST
-    stmia   r0, {r9-r10}                @ vAA/vAA+1<- r9/r10
+    stmia   r0, {r1-r2}                 @ vAA/vAA+1<- r1/r2
     GOTO_OPCODE ip                      @ jump to next instruction
 
 /* ------------------------------ */
@@ -7616,27 +7334,6 @@
     .balign 4
 artMterpAsmSisterStart:
 
-/* continuation for op_cmp_long */
-
-.Lop_cmp_long_less:
-    mvn     r1, #0                      @ r1<- -1
-    @ Want to cond code the next mov so we can avoid branch, but don't see it;
-    @ instead, we just replicate the tail end.
-    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
-    SET_VREG r1, r9                     @ vAA<- r1
-    GET_INST_OPCODE ip                  @ extract opcode from rINST
-    GOTO_OPCODE ip                      @ jump to next instruction
-
-.Lop_cmp_long_greater:
-    mov     r1, #1                      @ r1<- 1
-    @ fall through to _finish
-
-.Lop_cmp_long_finish:
-    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
-    SET_VREG r1, r9                     @ vAA<- r1
-    GET_INST_OPCODE ip                  @ extract opcode from rINST
-    GOTO_OPCODE ip                      @ jump to next instruction
-
 /* continuation for op_float_to_long */
 /*
  * Convert the float in r0 to a long in r0/r1.
@@ -12207,21 +11904,117 @@
     /* NOTE: no fallthrough */
 
 /*
- * Check for suspend check request.  Assumes rINST already loaded, rPC advanced and
- * still needs to get the opcode and branch to it, and flags are in lr.
+ * Common handling for branches with support for Jit profiling.
+ * On entry:
+ *    rINST          <= signed offset
+ *    rPROFILE       <= signed hotness countdown (expanded to 32 bits)
+ *    condition bits <= set to establish sign of offset (use "NoFlags" entry if not)
+ *
+ * We have quite a few different cases for branch profiling, OSR detection and
+ * suspend check support here.
+ *
+ * Taken backward branches:
+ *    If profiling active, do hotness countdown and report if we hit zero.
+ *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
+ *    Is there a pending suspend request?  If so, suspend.
+ *
+ * Taken forward branches and not-taken backward branches:
+ *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
+ *
+ * Our most common case is expected to be a taken backward branch with active jit profiling,
+ * but no full OSR check and no pending suspend request.
+ * Next most common case is not-taken branch with no full OSR check.
+ *
  */
-MterpCheckSuspendAndContinue:
-    ldr     rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]  @ refresh rIBASE
+MterpCommonTakenBranchNoFlags:
+    cmp     rINST, #0
+MterpCommonTakenBranch:
+    bgt     .L_forward_branch           @ don't add forward branches to hotness
+/*
+ * We need to subtract 1 from positive values and we should not see 0 here,
+ * so we may use the result of the comparison with -1.
+ */
+#if JIT_CHECK_OSR != -1
+#  error "JIT_CHECK_OSR must be -1."
+#endif
+    cmp     rPROFILE, #JIT_CHECK_OSR
+    beq     .L_osr_check
+    subgts  rPROFILE, #1
+    beq     .L_add_batch                @ counted down to zero - report
+.L_resume_backward_branch:
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    REFRESH_IBASE
+    add     r2, rINST, rINST            @ r2<- byte offset
+    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
     ands    lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
-    bne     1f
+    bne     .L_suspend_request_pending
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
-1:
+
+.L_suspend_request_pending:
     EXPORT_PC
     mov     r0, rSELF
     bl      MterpSuspendCheck           @ (self)
     cmp     r0, #0
     bne     MterpFallback
+    REFRESH_IBASE                       @ might have changed during suspend
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+.L_no_count_backwards:
+    cmp     rPROFILE, #JIT_CHECK_OSR    @ possible OSR re-entry?
+    bne     .L_resume_backward_branch
+.L_osr_check:
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    EXPORT_PC
+    bl      MterpMaybeDoOnStackReplacement  @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement
+    b       .L_resume_backward_branch
+
+.L_forward_branch:
+    cmp     rPROFILE, #JIT_CHECK_OSR @ possible OSR re-entry?
+    beq     .L_check_osr_forward
+.L_resume_forward_branch:
+    add     r2, rINST, rINST            @ r2<- byte offset
+    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+.L_check_osr_forward:
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    EXPORT_PC
+    bl      MterpMaybeDoOnStackReplacement  @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement
+    b       .L_resume_forward_branch
+
+.L_add_batch:
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    strh    rPROFILE, [r1, #SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET]
+    ldr     r0, [rFP, #OFF_FP_METHOD]
+    mov     r2, rSELF
+    bl      MterpAddHotnessBatch        @ (method, shadow_frame, self)
+    mov     rPROFILE, r0                @ restore new hotness countdown to rPROFILE
+    b       .L_no_count_backwards
+
+/*
+ * Entered from the conditional branch handlers when OSR check request active on
+ * not-taken path.  All Dalvik not-taken conditional branch offsets are 2.
+ */
+.L_check_not_taken_osr:
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, #2
+    EXPORT_PC
+    bl      MterpMaybeDoOnStackReplacement  @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement
+    FETCH_ADVANCE_INST 2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 
@@ -12269,9 +12062,27 @@
     str     r1, [r2, #4]
     mov     r0, #1                                  @ signal return to caller.
 MterpDone:
-    add     sp, sp, #4                              @ un-align 64
-    ldmfd   sp!, {r4-r10,fp,pc}                     @ restore 9 regs and return
+/*
+ * At this point, we expect rPROFILE to be non-zero.  If negative, hotness is disabled or we're
+ * checking for OSR.  If greater than zero, we might have unreported hotness to register
+ * (the difference between the ending rPROFILE and the cached hotness counter).  rPROFILE
+ * should only reach zero immediately after a hotness decrement, and is then reset to either
+ * a negative special state or the new non-zero countdown value.
+ */
+    cmp     rPROFILE, #0
+    bgt     MterpProfileActive                      @ if > 0, we may have some counts to report.
+    ldmfd   sp!, {r3-r10,fp,pc}                     @ restore 10 regs and return
 
+MterpProfileActive:
+    mov     rINST, r0                               @ stash return value
+    /* Report cached hotness counts */
+    ldr     r0, [rFP, #OFF_FP_METHOD]
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rSELF
+    strh    rPROFILE, [r1, #SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET]
+    bl      MterpAddHotnessBatch                    @ (method, shadow_frame, self)
+    mov     r0, rINST                               @ restore return value
+    ldmfd   sp!, {r3-r10,fp,pc}                     @ restore 10 regs and return
 
     .fnend
     .size   ExecuteMterpImpl, .-ExecuteMterpImpl
diff --git a/runtime/interpreter/mterp/out/mterp_arm64.S b/runtime/interpreter/mterp/out/mterp_arm64.S
index 6ae59d8..55797e6 100644
--- a/runtime/interpreter/mterp/out/mterp_arm64.S
+++ b/runtime/interpreter/mterp/out/mterp_arm64.S
@@ -81,6 +81,7 @@
   x23  xINST     first 16-bit code unit of current instruction
   x24  xIBASE    interpreted instruction base pointer, used for computed goto
   x25  xREFS     base of object references in shadow frame  (ideally, we'll get rid of this later).
+  x26  wPROFILE  jit profile hotness countdown
   x16  ip        scratch reg
   x17  ip2       scratch reg (used by macros)
 
@@ -99,15 +100,17 @@
 
 /* During bringup, we'll use the shadow frame model instead of xFP */
 /* single-purpose registers, given names for clarity */
-#define xPC     x20
-#define xFP     x21
-#define xSELF   x22
-#define xINST   x23
-#define wINST   w23
-#define xIBASE  x24
-#define xREFS   x25
-#define ip      x16
-#define ip2     x17
+#define xPC      x20
+#define xFP      x21
+#define xSELF    x22
+#define xINST    x23
+#define wINST    w23
+#define xIBASE   x24
+#define xREFS    x25
+#define wPROFILE w26
+#define xPROFILE x26
+#define ip       x16
+#define ip2      x17
 
 /*
  * Instead of holding a pointer to the shadow frame, we keep xFP at the base of the vregs.  So,
@@ -121,7 +124,7 @@
 #define OFF_FP_RESULT_REGISTER OFF_FP(SHADOWFRAME_RESULT_REGISTER_OFFSET)
 #define OFF_FP_DEX_PC_PTR OFF_FP(SHADOWFRAME_DEX_PC_PTR_OFFSET)
 #define OFF_FP_CODE_ITEM OFF_FP(SHADOWFRAME_CODE_ITEM_OFFSET)
-#define OFF_FP_SHADOWFRAME (-SHADOWFRAME_VREGS_OFFSET)
+#define OFF_FP_SHADOWFRAME OFF_FP(0)
 
 /*
  * "export" the PC to dex_pc field in the shadow frame, f/b/o future exception objects.  Must
@@ -323,11 +326,12 @@
 
 ExecuteMterpImpl:
     .cfi_startproc
-    stp     xIBASE, xREFS, [sp, #-64]!
-    stp     xSELF, xINST, [sp, #16]
-    stp     xPC, xFP, [sp, #32]
-    stp     fp, lr, [sp, #48]
-    add     fp, sp, #48
+    stp     xPROFILE, x27, [sp, #-80]!
+    stp     xIBASE, xREFS, [sp, #16]
+    stp     xSELF, xINST, [sp, #32]
+    stp     xPC, xFP, [sp, #48]
+    stp     fp, lr, [sp, #64]
+    add     fp, sp, #64
 
     /* Remember the return register */
     str     x3, [x2, #SHADOWFRAME_RESULT_REGISTER_OFFSET]
@@ -348,6 +352,12 @@
     /* Starting ibase */
     ldr     xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]
 
+    /* Set up for backwards branches & osr profiling */
+    ldr     x0, [xFP, #OFF_FP_METHOD]
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    bl      MterpSetUpHotnessCountdown
+    mov     wPROFILE, w0                // Starting hotness countdown to xPROFILE
+
     /* start executing the instruction at rPC */
     FETCH_INST                          // load wINST from rPC
     GET_INST_OPCODE ip                  // extract opcode from wINST
@@ -1081,24 +1091,8 @@
      * double to get a byte offset.
      */
     /* goto +AA */
-    /* tuning: use sbfx for 6t2+ targets */
-    lsl     w0, wINST, #16              // w0<- AAxx0000
-    asr     wINST, w0, #24              // wINST<- ssssssAA (sign-extended)
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    sbfm    x2, xINST, 0, 31
-    bl      MterpProfileBranch          // (self, shadow_frame, offset)
-    cbnz    w0, MterpOnStackReplacement // Note: offset must be in wINST
-#endif
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]  // Preload flags for MterpCheckSuspendAndContinue
-    adds    w1, wINST, wINST            // Convert dalvik offset to byte offset, setting flags
-    FETCH_ADVANCE_INST_RB w1            // load wINST and advance xPC
-       // If backwards branch refresh rIBASE
-    b.mi     MterpCheckSuspendAndContinue
-    GET_INST_OPCODE ip                  // extract opcode from wINST
-    GOTO_OPCODE ip                      // jump to next instruction
+    sbfx    wINST, wINST, #8, #8           // wINST<- ssssssAA (sign-extended)
+    b       MterpCommonTakenBranchNoFlags
 
 /* ------------------------------ */
     .balign 128
@@ -1112,20 +1106,7 @@
      */
     /* goto/16 +AAAA */
     FETCH_S wINST, 1                    // wINST<- ssssAAAA (sign-extended)
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    sbfm    x2, xINST, 0, 31
-    bl      MterpProfileBranch          // (self, shadow_frame, offset)
-    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
-#endif
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w1, wINST, wINST            // w1<- byte offset, flags set
-    FETCH_ADVANCE_INST_RB w1            // update rPC, load rINST
-    b.mi    MterpCheckSuspendAndContinue
-    GET_INST_OPCODE ip                  // extract opcode from rINST
-    GOTO_OPCODE ip                      // jump to next instruction
+    b       MterpCommonTakenBranchNoFlags
 
 /* ------------------------------ */
     .balign 128
@@ -1146,20 +1127,7 @@
     FETCH w0, 1                         // w0<- aaaa (lo)
     FETCH w1, 2                         // w1<- AAAA (hi)
     orr     wINST, w0, w1, lsl #16      // wINST<- AAAAaaaa
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    sbfm    x2, xINST, 0, 31
-    bl      MterpProfileBranch          // (self, shadow_frame, offset)
-    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
-#endif
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w1, wINST, wINST            // w1<- byte offset
-    FETCH_ADVANCE_INST_RB w1            // update rPC, load xINST
-    b.le    MterpCheckSuspendAndContinue
-    GET_INST_OPCODE ip                  // extract opcode from xINST
-    GOTO_OPCODE ip                      // jump to next instruction
+    b       MterpCommonTakenBranchNoFlags
 
 /* ------------------------------ */
     .balign 128
@@ -1183,20 +1151,7 @@
     add     x0, xPC, w0, lsl #1         // w0<- PC + BBBBbbbb*2
     bl      MterpDoPackedSwitch                       // w0<- code-unit branch offset
     sbfm    xINST, x0, 0, 31
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    mov     x2, xINST
-    bl      MterpProfileBranch          // (self, shadow_frame, offset)
-    cbnz    w0, MterpOnStackReplacement
-#endif
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w1, wINST, wINST            // w1<- byte offset; clear V
-    FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
-    b.le    MterpCheckSuspendAndContinue
-    GET_INST_OPCODE ip                  // extract opcode from wINST
-    GOTO_OPCODE ip                      // jump to next instruction
+    b       MterpCommonTakenBranchNoFlags
 
 /* ------------------------------ */
     .balign 128
@@ -1221,20 +1176,7 @@
     add     x0, xPC, w0, lsl #1         // w0<- PC + BBBBbbbb*2
     bl      MterpDoSparseSwitch                       // w0<- code-unit branch offset
     sbfm    xINST, x0, 0, 31
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    mov     x2, xINST
-    bl      MterpProfileBranch          // (self, shadow_frame, offset)
-    cbnz    w0, MterpOnStackReplacement
-#endif
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w1, wINST, wINST            // w1<- byte offset; clear V
-    FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
-    b.le    MterpCheckSuspendAndContinue
-    GET_INST_OPCODE ip                  // extract opcode from wINST
-    GOTO_OPCODE ip                      // jump to next instruction
+    b       MterpCommonTakenBranchNoFlags
 
 
 /* ------------------------------ */
@@ -1365,9 +1307,8 @@
 /* File: arm64/op_if_eq.S */
 /* File: arm64/bincmp.S */
     /*
-     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic two-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
@@ -1376,23 +1317,12 @@
     ubfx    w0, wINST, #8, #4           // w0<- A
     GET_VREG w3, w1                     // w3<- vB
     GET_VREG w2, w0                     // w2<- vA
-    FETCH_S w1, 1                       // w1<- branch offset, in code units
-    mov     w0, #2                      // Offset if branch not taken
+    FETCH_S wINST, 1                    // wINST<- branch offset, in code units
     cmp     w2, w3                      // compare (vA, vB)
-    csel    wINST, w1, w0, eq // Branch if true, stashing result in callee save reg.
-#if MTERP_PROFILE_BRANCHES
-    // TUINING: once measurements are complete, remove #if and hand-schedule.
-    EXPORT_PC
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    sbfm    x2, xINST, 0, 31            // Sign extend branch offset
-    bl      MterpProfileBranch          // (self, shadow_frame, offset)
-    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
-#endif
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w2, wINST, wINST            // convert to bytes, check sign
-    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
-    b.mi     MterpCheckSuspendAndContinue
+    b.eq MterpCommonTakenBranchNoFlags
+    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
+    b.eq    .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 
@@ -1403,9 +1333,8 @@
 /* File: arm64/op_if_ne.S */
 /* File: arm64/bincmp.S */
     /*
-     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic two-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
@@ -1414,23 +1343,12 @@
     ubfx    w0, wINST, #8, #4           // w0<- A
     GET_VREG w3, w1                     // w3<- vB
     GET_VREG w2, w0                     // w2<- vA
-    FETCH_S w1, 1                       // w1<- branch offset, in code units
-    mov     w0, #2                      // Offset if branch not taken
+    FETCH_S wINST, 1                    // wINST<- branch offset, in code units
     cmp     w2, w3                      // compare (vA, vB)
-    csel    wINST, w1, w0, ne // Branch if true, stashing result in callee save reg.
-#if MTERP_PROFILE_BRANCHES
-    // TUINING: once measurements are complete, remove #if and hand-schedule.
-    EXPORT_PC
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    sbfm    x2, xINST, 0, 31            // Sign extend branch offset
-    bl      MterpProfileBranch          // (self, shadow_frame, offset)
-    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
-#endif
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w2, wINST, wINST            // convert to bytes, check sign
-    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
-    b.mi     MterpCheckSuspendAndContinue
+    b.ne MterpCommonTakenBranchNoFlags
+    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
+    b.eq    .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 
@@ -1441,9 +1359,8 @@
 /* File: arm64/op_if_lt.S */
 /* File: arm64/bincmp.S */
     /*
-     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic two-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
@@ -1452,23 +1369,12 @@
     ubfx    w0, wINST, #8, #4           // w0<- A
     GET_VREG w3, w1                     // w3<- vB
     GET_VREG w2, w0                     // w2<- vA
-    FETCH_S w1, 1                       // w1<- branch offset, in code units
-    mov     w0, #2                      // Offset if branch not taken
+    FETCH_S wINST, 1                    // wINST<- branch offset, in code units
     cmp     w2, w3                      // compare (vA, vB)
-    csel    wINST, w1, w0, lt // Branch if true, stashing result in callee save reg.
-#if MTERP_PROFILE_BRANCHES
-    // TUINING: once measurements are complete, remove #if and hand-schedule.
-    EXPORT_PC
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    sbfm    x2, xINST, 0, 31            // Sign extend branch offset
-    bl      MterpProfileBranch          // (self, shadow_frame, offset)
-    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
-#endif
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w2, wINST, wINST            // convert to bytes, check sign
-    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
-    b.mi     MterpCheckSuspendAndContinue
+    b.lt MterpCommonTakenBranchNoFlags
+    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
+    b.eq    .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 
@@ -1479,9 +1385,8 @@
 /* File: arm64/op_if_ge.S */
 /* File: arm64/bincmp.S */
     /*
-     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic two-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
@@ -1490,23 +1395,12 @@
     ubfx    w0, wINST, #8, #4           // w0<- A
     GET_VREG w3, w1                     // w3<- vB
     GET_VREG w2, w0                     // w2<- vA
-    FETCH_S w1, 1                       // w1<- branch offset, in code units
-    mov     w0, #2                      // Offset if branch not taken
+    FETCH_S wINST, 1                    // wINST<- branch offset, in code units
     cmp     w2, w3                      // compare (vA, vB)
-    csel    wINST, w1, w0, ge // Branch if true, stashing result in callee save reg.
-#if MTERP_PROFILE_BRANCHES
-    // TUINING: once measurements are complete, remove #if and hand-schedule.
-    EXPORT_PC
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    sbfm    x2, xINST, 0, 31            // Sign extend branch offset
-    bl      MterpProfileBranch          // (self, shadow_frame, offset)
-    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
-#endif
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w2, wINST, wINST            // convert to bytes, check sign
-    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
-    b.mi     MterpCheckSuspendAndContinue
+    b.ge MterpCommonTakenBranchNoFlags
+    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
+    b.eq    .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 
@@ -1517,9 +1411,8 @@
 /* File: arm64/op_if_gt.S */
 /* File: arm64/bincmp.S */
     /*
-     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic two-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
@@ -1528,23 +1421,12 @@
     ubfx    w0, wINST, #8, #4           // w0<- A
     GET_VREG w3, w1                     // w3<- vB
     GET_VREG w2, w0                     // w2<- vA
-    FETCH_S w1, 1                       // w1<- branch offset, in code units
-    mov     w0, #2                      // Offset if branch not taken
+    FETCH_S wINST, 1                    // wINST<- branch offset, in code units
     cmp     w2, w3                      // compare (vA, vB)
-    csel    wINST, w1, w0, gt // Branch if true, stashing result in callee save reg.
-#if MTERP_PROFILE_BRANCHES
-    // TUINING: once measurements are complete, remove #if and hand-schedule.
-    EXPORT_PC
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    sbfm    x2, xINST, 0, 31            // Sign extend branch offset
-    bl      MterpProfileBranch          // (self, shadow_frame, offset)
-    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
-#endif
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w2, wINST, wINST            // convert to bytes, check sign
-    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
-    b.mi     MterpCheckSuspendAndContinue
+    b.gt MterpCommonTakenBranchNoFlags
+    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
+    b.eq    .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 
@@ -1555,9 +1437,8 @@
 /* File: arm64/op_if_le.S */
 /* File: arm64/bincmp.S */
     /*
-     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic two-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
@@ -1566,23 +1447,12 @@
     ubfx    w0, wINST, #8, #4           // w0<- A
     GET_VREG w3, w1                     // w3<- vB
     GET_VREG w2, w0                     // w2<- vA
-    FETCH_S w1, 1                       // w1<- branch offset, in code units
-    mov     w0, #2                      // Offset if branch not taken
+    FETCH_S wINST, 1                    // wINST<- branch offset, in code units
     cmp     w2, w3                      // compare (vA, vB)
-    csel    wINST, w1, w0, le // Branch if true, stashing result in callee save reg.
-#if MTERP_PROFILE_BRANCHES
-    // TUINING: once measurements are complete, remove #if and hand-schedule.
-    EXPORT_PC
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    sbfm    x2, xINST, 0, 31            // Sign extend branch offset
-    bl      MterpProfileBranch          // (self, shadow_frame, offset)
-    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
-#endif
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w2, wINST, wINST            // convert to bytes, check sign
-    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
-    b.mi     MterpCheckSuspendAndContinue
+    b.le MterpCommonTakenBranchNoFlags
+    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
+    b.eq    .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 
@@ -1593,32 +1463,20 @@
 /* File: arm64/op_if_eqz.S */
 /* File: arm64/zcmp.S */
     /*
-     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic one-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
     lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
-    FETCH_S w1, 1                       // w1<- branch offset, in code units
-    mov     w0, #2                      // Branch offset if not taken
+    FETCH_S wINST, 1                    // w1<- branch offset, in code units
     cmp     w2, #0                      // compare (vA, 0)
-    csel    wINST, w1, w0, eq // Branch if true, stashing result in callee save reg
-#if MTERP_PROFILE_BRANCHES
-    // TUNING: once measurements are complete, remove #if and hand-schedule.
-    EXPORT_PC
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    sbfm    x2, xINST, 0, 31
-    bl      MterpProfileBranch          // (self, shadow_frame, offset)
-    cbnz    w0, MterpOnStackReplacement // Note: offset must be in wINST
-#endif
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w2, wINST, wINST            // convert to bytes & set flags
-    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
-    b.mi    MterpCheckSuspendAndContinue
+    b.eq MterpCommonTakenBranchNoFlags
+    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
+    b.eq    .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 
@@ -1629,32 +1487,20 @@
 /* File: arm64/op_if_nez.S */
 /* File: arm64/zcmp.S */
     /*
-     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic one-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
     lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
-    FETCH_S w1, 1                       // w1<- branch offset, in code units
-    mov     w0, #2                      // Branch offset if not taken
+    FETCH_S wINST, 1                    // w1<- branch offset, in code units
     cmp     w2, #0                      // compare (vA, 0)
-    csel    wINST, w1, w0, ne // Branch if true, stashing result in callee save reg
-#if MTERP_PROFILE_BRANCHES
-    // TUNING: once measurements are complete, remove #if and hand-schedule.
-    EXPORT_PC
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    sbfm    x2, xINST, 0, 31
-    bl      MterpProfileBranch          // (self, shadow_frame, offset)
-    cbnz    w0, MterpOnStackReplacement // Note: offset must be in wINST
-#endif
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w2, wINST, wINST            // convert to bytes & set flags
-    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
-    b.mi    MterpCheckSuspendAndContinue
+    b.ne MterpCommonTakenBranchNoFlags
+    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
+    b.eq    .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 
@@ -1665,32 +1511,20 @@
 /* File: arm64/op_if_ltz.S */
 /* File: arm64/zcmp.S */
     /*
-     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic one-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
     lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
-    FETCH_S w1, 1                       // w1<- branch offset, in code units
-    mov     w0, #2                      // Branch offset if not taken
+    FETCH_S wINST, 1                    // w1<- branch offset, in code units
     cmp     w2, #0                      // compare (vA, 0)
-    csel    wINST, w1, w0, lt // Branch if true, stashing result in callee save reg
-#if MTERP_PROFILE_BRANCHES
-    // TUNING: once measurements are complete, remove #if and hand-schedule.
-    EXPORT_PC
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    sbfm    x2, xINST, 0, 31
-    bl      MterpProfileBranch          // (self, shadow_frame, offset)
-    cbnz    w0, MterpOnStackReplacement // Note: offset must be in wINST
-#endif
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w2, wINST, wINST            // convert to bytes & set flags
-    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
-    b.mi    MterpCheckSuspendAndContinue
+    b.lt MterpCommonTakenBranchNoFlags
+    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
+    b.eq    .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 
@@ -1701,32 +1535,20 @@
 /* File: arm64/op_if_gez.S */
 /* File: arm64/zcmp.S */
     /*
-     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic one-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
     lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
-    FETCH_S w1, 1                       // w1<- branch offset, in code units
-    mov     w0, #2                      // Branch offset if not taken
+    FETCH_S wINST, 1                    // w1<- branch offset, in code units
     cmp     w2, #0                      // compare (vA, 0)
-    csel    wINST, w1, w0, ge // Branch if true, stashing result in callee save reg
-#if MTERP_PROFILE_BRANCHES
-    // TUNING: once measurements are complete, remove #if and hand-schedule.
-    EXPORT_PC
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    sbfm    x2, xINST, 0, 31
-    bl      MterpProfileBranch          // (self, shadow_frame, offset)
-    cbnz    w0, MterpOnStackReplacement // Note: offset must be in wINST
-#endif
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w2, wINST, wINST            // convert to bytes & set flags
-    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
-    b.mi    MterpCheckSuspendAndContinue
+    b.ge MterpCommonTakenBranchNoFlags
+    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
+    b.eq    .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 
@@ -1737,32 +1559,20 @@
 /* File: arm64/op_if_gtz.S */
 /* File: arm64/zcmp.S */
     /*
-     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic one-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
     lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
-    FETCH_S w1, 1                       // w1<- branch offset, in code units
-    mov     w0, #2                      // Branch offset if not taken
+    FETCH_S wINST, 1                    // w1<- branch offset, in code units
     cmp     w2, #0                      // compare (vA, 0)
-    csel    wINST, w1, w0, gt // Branch if true, stashing result in callee save reg
-#if MTERP_PROFILE_BRANCHES
-    // TUNING: once measurements are complete, remove #if and hand-schedule.
-    EXPORT_PC
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    sbfm    x2, xINST, 0, 31
-    bl      MterpProfileBranch          // (self, shadow_frame, offset)
-    cbnz    w0, MterpOnStackReplacement // Note: offset must be in wINST
-#endif
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w2, wINST, wINST            // convert to bytes & set flags
-    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
-    b.mi    MterpCheckSuspendAndContinue
+    b.gt MterpCommonTakenBranchNoFlags
+    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
+    b.eq    .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 
@@ -1773,32 +1583,20 @@
 /* File: arm64/op_if_lez.S */
 /* File: arm64/zcmp.S */
     /*
-     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic one-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
     lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
-    FETCH_S w1, 1                       // w1<- branch offset, in code units
-    mov     w0, #2                      // Branch offset if not taken
+    FETCH_S wINST, 1                    // w1<- branch offset, in code units
     cmp     w2, #0                      // compare (vA, 0)
-    csel    wINST, w1, w0, le // Branch if true, stashing result in callee save reg
-#if MTERP_PROFILE_BRANCHES
-    // TUNING: once measurements are complete, remove #if and hand-schedule.
-    EXPORT_PC
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    sbfm    x2, xINST, 0, 31
-    bl      MterpProfileBranch          // (self, shadow_frame, offset)
-    cbnz    w0, MterpOnStackReplacement // Note: offset must be in wINST
-#endif
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w2, wINST, wINST            // convert to bytes & set flags
-    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
-    b.mi    MterpCheckSuspendAndContinue
+    b.le MterpCommonTakenBranchNoFlags
+    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
+    b.eq    .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 
@@ -11596,6 +11394,107 @@
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
     /* NOTE: no fallthrough */
+/*
+ * Common handling for branches with support for Jit profiling.
+ * On entry:
+ *    wINST          <= signed offset
+ *    wPROFILE       <= signed hotness countdown (expanded to 32 bits)
+ *    condition bits <= set to establish sign of offset (use "NoFlags" entry if not)
+ *
+ * We have quite a few different cases for branch profiling, OSR detection and
+ * suspend check support here.
+ *
+ * Taken backward branches:
+ *    If profiling active, do hotness countdown and report if we hit zero.
+ *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
+ *    Is there a pending suspend request?  If so, suspend.
+ *
+ * Taken forward branches and not-taken backward branches:
+ *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
+ *
+ * Our most common case is expected to be a taken backward branch with active jit profiling,
+ * but no full OSR check and no pending suspend request.
+ * Next most common case is not-taken branch with no full OSR check.
+ *
+ */
+MterpCommonTakenBranchNoFlags:
+    cmp     wINST, #0
+    b.gt    .L_forward_branch           // don't add forward branches to hotness
+    tbnz    wPROFILE, #31, .L_no_count_backwards  // go if negative
+    subs    wPROFILE, wPROFILE, #1      // countdown
+    b.eq    .L_add_batch                // counted down to zero - report
+.L_resume_backward_branch:
+    ldr     lr, [xSELF, #THREAD_FLAGS_OFFSET]
+    add     w2, wINST, wINST            // w2<- byte offset
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    REFRESH_IBASE
+    ands    lr, lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    b.ne    .L_suspend_request_pending
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+.L_suspend_request_pending:
+    EXPORT_PC
+    mov     x0, xSELF
+    bl      MterpSuspendCheck           // (self)
+    cbnz    x0, MterpFallback
+    REFRESH_IBASE                       // might have changed during suspend
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+.L_no_count_backwards:
+    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
+    b.ne    .L_resume_backward_branch
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, xINST
+    EXPORT_PC
+    bl      MterpMaybeDoOnStackReplacement  // (self, shadow_frame, offset)
+    cbnz    x0, MterpOnStackReplacement
+    b       .L_resume_backward_branch
+
+.L_forward_branch:
+    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
+    b.eq    .L_check_osr_forward
+.L_resume_forward_branch:
+    add     w2, wINST, wINST            // w2<- byte offset
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+.L_check_osr_forward:
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, xINST
+    EXPORT_PC
+    bl      MterpMaybeDoOnStackReplacement  // (self, shadow_frame, offset)
+    cbnz    x0, MterpOnStackReplacement
+    b       .L_resume_forward_branch
+
+.L_add_batch:
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    strh    wPROFILE, [x1, #SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET]
+    ldr     x0, [xFP, #OFF_FP_METHOD]
+    mov     x2, xSELF
+    bl      MterpAddHotnessBatch        // (method, shadow_frame, self)
+    mov     wPROFILE, w0                // restore new hotness countdown to wPROFILE
+    b       .L_no_count_backwards
+
+/*
+ * Entered from the conditional branch handlers when OSR check request active on
+ * not-taken path.  All Dalvik not-taken conditional branch offsets are 2.
+ */
+.L_check_not_taken_osr:
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, #2
+    EXPORT_PC
+    bl      MterpMaybeDoOnStackReplacement  // (self, shadow_frame, offset)
+    cbnz    x0, MterpOnStackReplacement
+    FETCH_ADVANCE_INST 2
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
 
 /*
  * Check for suspend check request.  Assumes wINST already loaded, xPC advanced and
@@ -11664,10 +11563,36 @@
 check2:
     mov     x0, #1                                  // signal return to caller.
 MterpDone:
-    ldp     fp, lr, [sp, #48]
-    ldp     xPC, xFP, [sp, #32]
-    ldp     xSELF, xINST, [sp, #16]
-    ldp     xIBASE, xREFS, [sp], #64
+/*
+ * At this point, we expect wPROFILE to be non-zero.  If negative, hotness is disabled or we're
+ * checking for OSR.  If greater than zero, we might have unreported hotness to register
+ * (the difference between the ending wPROFILE and the cached hotness counter).  wPROFILE
+ * should only reach zero immediately after a hotness decrement, and is then reset to either
+ * a negative special state or the new non-zero countdown value.
+ */
+    cmp     wPROFILE, #0
+    bgt     MterpProfileActive                      // if > 0, we may have some counts to report.
+    ldp     fp, lr, [sp, #64]
+    ldp     xPC, xFP, [sp, #48]
+    ldp     xSELF, xINST, [sp, #32]
+    ldp     xIBASE, xREFS, [sp, #16]
+    ldp     xPROFILE, x27, [sp], #80
+    ret
+
+MterpProfileActive:
+    mov     xINST, x0                               // stash return value
+    /* Report cached hotness counts */
+    ldr     x0, [xFP, #OFF_FP_METHOD]
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, xSELF
+    strh    wPROFILE, [x1, #SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET]
+    bl      MterpAddHotnessBatch                    // (method, shadow_frame, self)
+    mov     x0, xINST                               // restore return value
+    ldp     fp, lr, [sp, #64]
+    ldp     xPC, xFP, [sp, #48]
+    ldp     xSELF, xINST, [sp, #32]
+    ldp     xIBASE, xREFS, [sp, #16]
+    ldp     xPROFILE, x27, [sp], #80
     ret
 
     .cfi_endproc
diff --git a/runtime/interpreter/unstarted_runtime.cc b/runtime/interpreter/unstarted_runtime.cc
index a75f878..80ffedc 100644
--- a/runtime/interpreter/unstarted_runtime.cc
+++ b/runtime/interpreter/unstarted_runtime.cc
@@ -16,7 +16,11 @@
 
 #include "unstarted_runtime.h"
 
+#include <errno.h>
+#include <stdlib.h>
+
 #include <cmath>
+#include <limits>
 #include <unordered_map>
 
 #include "ScopedLocalRef.h"
@@ -282,6 +286,23 @@
   }
 }
 
+// Special managed code cut-out to allow constructor lookup in a un-started runtime.
+void UnstartedRuntime::UnstartedClassGetDeclaredConstructor(
+    Thread* self, ShadowFrame* shadow_frame, JValue* result, size_t arg_offset) {
+  mirror::Class* klass = shadow_frame->GetVRegReference(arg_offset)->AsClass();
+  if (klass == nullptr) {
+    ThrowNullPointerExceptionForMethodAccess(shadow_frame->GetMethod(), InvokeType::kVirtual);
+    return;
+  }
+  mirror::ObjectArray<mirror::Class>* args =
+      shadow_frame->GetVRegReference(arg_offset + 1)->AsObjectArray<mirror::Class>();
+  if (Runtime::Current()->IsActiveTransaction()) {
+    result->SetL(mirror::Class::GetDeclaredConstructorInternal<true>(self, klass, args));
+  } else {
+    result->SetL(mirror::Class::GetDeclaredConstructorInternal<false>(self, klass, args));
+  }
+}
+
 void UnstartedRuntime::UnstartedClassGetEnclosingClass(
     Thread* self, ShadowFrame* shadow_frame, JValue* result, size_t arg_offset) {
   StackHandleScope<1> hs(self);
@@ -427,6 +448,8 @@
                     dst_pos, src, src_pos, length, true /* throw_exception */);
       }
     }
+  } else if (src_type->IsPrimitiveByte()) {
+    PrimitiveArrayCopy<uint8_t>(self, src_array, src_pos, dst_array, dst_pos, length);
   } else if (src_type->IsPrimitiveChar()) {
     PrimitiveArrayCopy<uint16_t>(self, src_array, src_pos, dst_array, dst_pos, length);
   } else if (src_type->IsPrimitiveInt()) {
@@ -437,6 +460,12 @@
   }
 }
 
+void UnstartedRuntime::UnstartedSystemArraycopyByte(
+    Thread* self, ShadowFrame* shadow_frame, JValue* result, size_t arg_offset) {
+  // Just forward.
+  UnstartedRuntime::UnstartedSystemArraycopy(self, shadow_frame, result, arg_offset);
+}
+
 void UnstartedRuntime::UnstartedSystemArraycopyChar(
     Thread* self, ShadowFrame* shadow_frame, JValue* result, size_t arg_offset) {
   // Just forward.
@@ -1037,6 +1066,24 @@
   result->SetL(value);
 }
 
+void UnstartedRuntime::UnstartedUnsafePutObjectVolatile(
+    Thread* self, ShadowFrame* shadow_frame, JValue* result ATTRIBUTE_UNUSED, size_t arg_offset)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  // Argument 0 is the Unsafe instance, skip.
+  mirror::Object* obj = shadow_frame->GetVRegReference(arg_offset + 1);
+  if (obj == nullptr) {
+    AbortTransactionOrFail(self, "Cannot access null object, retry at runtime.");
+    return;
+  }
+  int64_t offset = shadow_frame->GetVRegLong(arg_offset + 2);
+  mirror::Object* value = shadow_frame->GetVRegReference(arg_offset + 4);
+  if (Runtime::Current()->IsActiveTransaction()) {
+    obj->SetFieldObjectVolatile<true>(MemberOffset(offset), value);
+  } else {
+    obj->SetFieldObjectVolatile<false>(MemberOffset(offset), value);
+  }
+}
+
 void UnstartedRuntime::UnstartedUnsafePutOrderedObject(
     Thread* self, ShadowFrame* shadow_frame, JValue* result ATTRIBUTE_UNUSED, size_t arg_offset)
     SHARED_REQUIRES(Locks::mutator_lock_) {
@@ -1056,6 +1103,93 @@
   }
 }
 
+// A cutout for Integer.parseInt(String). Note: this code is conservative and will bail instead
+// of correctly handling the corner cases.
+void UnstartedRuntime::UnstartedIntegerParseInt(
+    Thread* self, ShadowFrame* shadow_frame, JValue* result, size_t arg_offset)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  mirror::Object* obj = shadow_frame->GetVRegReference(arg_offset);
+  if (obj == nullptr) {
+    AbortTransactionOrFail(self, "Cannot parse null string, retry at runtime.");
+    return;
+  }
+
+  std::string string_value = obj->AsString()->ToModifiedUtf8();
+  if (string_value.empty()) {
+    AbortTransactionOrFail(self, "Cannot parse empty string, retry at runtime.");
+    return;
+  }
+
+  const char* c_str = string_value.c_str();
+  char *end;
+  // Can we set errno to 0? Is this always a variable, and not a macro?
+  // Worst case, we'll incorrectly fail a transaction. Seems OK.
+  int64_t l = strtol(c_str, &end, 10);
+
+  if ((errno == ERANGE && l == LONG_MAX) || l > std::numeric_limits<int32_t>::max() ||
+      (errno == ERANGE && l == LONG_MIN) || l < std::numeric_limits<int32_t>::min()) {
+    AbortTransactionOrFail(self, "Cannot parse string %s, retry at runtime.", c_str);
+    return;
+  }
+  if (l == 0) {
+    // Check whether the string wasn't exactly zero.
+    if (string_value != "0") {
+      AbortTransactionOrFail(self, "Cannot parse string %s, retry at runtime.", c_str);
+      return;
+    }
+  } else if (*end != '\0') {
+    AbortTransactionOrFail(self, "Cannot parse string %s, retry at runtime.", c_str);
+    return;
+  }
+
+  result->SetI(static_cast<int32_t>(l));
+}
+
+// A cutout for Long.parseLong.
+//
+// Note: for now use code equivalent to Integer.parseInt, as the full range may not be supported
+//       well.
+void UnstartedRuntime::UnstartedLongParseLong(
+    Thread* self, ShadowFrame* shadow_frame, JValue* result, size_t arg_offset)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  mirror::Object* obj = shadow_frame->GetVRegReference(arg_offset);
+  if (obj == nullptr) {
+    AbortTransactionOrFail(self, "Cannot parse null string, retry at runtime.");
+    return;
+  }
+
+  std::string string_value = obj->AsString()->ToModifiedUtf8();
+  if (string_value.empty()) {
+    AbortTransactionOrFail(self, "Cannot parse empty string, retry at runtime.");
+    return;
+  }
+
+  const char* c_str = string_value.c_str();
+  char *end;
+  // Can we set errno to 0? Is this always a variable, and not a macro?
+  // Worst case, we'll incorrectly fail a transaction. Seems OK.
+  int64_t l = strtol(c_str, &end, 10);
+
+  // Note: comparing against int32_t min/max is intentional here.
+  if ((errno == ERANGE && l == LONG_MAX) || l > std::numeric_limits<int32_t>::max() ||
+      (errno == ERANGE && l == LONG_MIN) || l < std::numeric_limits<int32_t>::min()) {
+    AbortTransactionOrFail(self, "Cannot parse string %s, retry at runtime.", c_str);
+    return;
+  }
+  if (l == 0) {
+    // Check whether the string wasn't exactly zero.
+    if (string_value != "0") {
+      AbortTransactionOrFail(self, "Cannot parse string %s, retry at runtime.", c_str);
+      return;
+    }
+  } else if (*end != '\0') {
+    AbortTransactionOrFail(self, "Cannot parse string %s, retry at runtime.", c_str);
+    return;
+  }
+
+  result->SetJ(l);
+}
+
 
 void UnstartedRuntime::UnstartedJNIVMRuntimeNewUnpaddedArray(
     Thread* self, ArtMethod* method ATTRIBUTE_UNUSED, mirror::Object* receiver ATTRIBUTE_UNUSED,
diff --git a/runtime/interpreter/unstarted_runtime_list.h b/runtime/interpreter/unstarted_runtime_list.h
index c76470a..3312701 100644
--- a/runtime/interpreter/unstarted_runtime_list.h
+++ b/runtime/interpreter/unstarted_runtime_list.h
@@ -25,10 +25,12 @@
   V(ClassNewInstance, "java.lang.Object java.lang.Class.newInstance()") \
   V(ClassGetDeclaredField, "java.lang.reflect.Field java.lang.Class.getDeclaredField(java.lang.String)") \
   V(ClassGetDeclaredMethod, "java.lang.reflect.Method java.lang.Class.getDeclaredMethodInternal(java.lang.String, java.lang.Class[])") \
+  V(ClassGetDeclaredConstructor, "java.lang.reflect.Constructor java.lang.Class.getDeclaredConstructorInternal(java.lang.Class[])") \
   V(ClassGetEnclosingClass, "java.lang.Class java.lang.Class.getEnclosingClass()") \
   V(VmClassLoaderFindLoadedClass, "java.lang.Class java.lang.VMClassLoader.findLoadedClass(java.lang.ClassLoader, java.lang.String)") \
   V(VoidLookupType, "java.lang.Class java.lang.Void.lookupType()") \
   V(SystemArraycopy, "void java.lang.System.arraycopy(java.lang.Object, int, java.lang.Object, int, int)") \
+  V(SystemArraycopyByte, "void java.lang.System.arraycopy(byte[], int, byte[], int, int)") \
   V(SystemArraycopyChar, "void java.lang.System.arraycopy(char[], int, char[], int, int)") \
   V(SystemArraycopyInt, "void java.lang.System.arraycopy(int[], int, int[], int, int)") \
   V(SystemGetSecurityManager, "java.lang.SecurityManager java.lang.System.getSecurityManager()") \
@@ -56,7 +58,10 @@
   V(UnsafeCompareAndSwapLong, "boolean sun.misc.Unsafe.compareAndSwapLong(java.lang.Object, long, long, long)") \
   V(UnsafeCompareAndSwapObject, "boolean sun.misc.Unsafe.compareAndSwapObject(java.lang.Object, long, java.lang.Object, java.lang.Object)") \
   V(UnsafeGetObjectVolatile, "java.lang.Object sun.misc.Unsafe.getObjectVolatile(java.lang.Object, long)") \
-  V(UnsafePutOrderedObject, "void sun.misc.Unsafe.putOrderedObject(java.lang.Object, long, java.lang.Object)")
+  V(UnsafePutObjectVolatile, "void sun.misc.Unsafe.putObjectVolatile(java.lang.Object, long, java.lang.Object)") \
+  V(UnsafePutOrderedObject, "void sun.misc.Unsafe.putOrderedObject(java.lang.Object, long, java.lang.Object)") \
+  V(IntegerParseInt, "int java.lang.Integer.parseInt(java.lang.String)") \
+  V(LongParseLong, "long java.lang.Long.parseLong(java.lang.String)")
 
 // Methods that are native.
 #define UNSTARTED_RUNTIME_JNI_LIST(V)           \
diff --git a/runtime/interpreter/unstarted_runtime_test.cc b/runtime/interpreter/unstarted_runtime_test.cc
index fb53b1d..f40e4e3 100644
--- a/runtime/interpreter/unstarted_runtime_test.cc
+++ b/runtime/interpreter/unstarted_runtime_test.cc
@@ -508,5 +508,100 @@
   ShadowFrame::DeleteDeoptimizedFrame(tmp);
 }
 
+TEST_F(UnstartedRuntimeTest, IntegerParseIntTest) {
+  Thread* self = Thread::Current();
+  ScopedObjectAccess soa(self);
+
+  ShadowFrame* tmp = ShadowFrame::CreateDeoptimizedFrame(10, nullptr, nullptr, 0);
+
+  // Test string. Should be valid, and between minimal values of LONG_MIN and LONG_MAX (for all
+  // suffixes).
+  constexpr const char* test_string = "-2147483646";
+  constexpr int32_t test_values[] = {
+                6,
+               46,
+              646,
+             3646,
+            83646,
+           483646,
+          7483646,
+         47483646,
+        147483646,
+       2147483646,
+      -2147483646
+  };
+
+  static_assert(arraysize(test_values) == 11U, "test_values");
+  CHECK_EQ(strlen(test_string), 11U);
+
+  for (size_t i = 0; i <= 10; ++i) {
+    const char* test_value = &test_string[10 - i];
+
+    StackHandleScope<1> hs_str(self);
+    Handle<mirror::String> h_str(
+        hs_str.NewHandle(mirror::String::AllocFromModifiedUtf8(self, test_value)));
+    ASSERT_NE(h_str.Get(), nullptr);
+    ASSERT_FALSE(self->IsExceptionPending());
+
+    tmp->SetVRegReference(0, h_str.Get());
+
+    JValue result;
+    UnstartedIntegerParseInt(self, tmp, &result, 0);
+
+    ASSERT_FALSE(self->IsExceptionPending());
+    EXPECT_EQ(result.GetI(), test_values[i]);
+  }
+
+  ShadowFrame::DeleteDeoptimizedFrame(tmp);
+}
+
+// Right now the same as Integer.Parse
+TEST_F(UnstartedRuntimeTest, LongParseLongTest) {
+  Thread* self = Thread::Current();
+  ScopedObjectAccess soa(self);
+
+  ShadowFrame* tmp = ShadowFrame::CreateDeoptimizedFrame(10, nullptr, nullptr, 0);
+
+  // Test string. Should be valid, and between minimal values of LONG_MIN and LONG_MAX (for all
+  // suffixes).
+  constexpr const char* test_string = "-2147483646";
+  constexpr int64_t test_values[] = {
+                6,
+               46,
+              646,
+             3646,
+            83646,
+           483646,
+          7483646,
+         47483646,
+        147483646,
+       2147483646,
+      -2147483646
+  };
+
+  static_assert(arraysize(test_values) == 11U, "test_values");
+  CHECK_EQ(strlen(test_string), 11U);
+
+  for (size_t i = 0; i <= 10; ++i) {
+    const char* test_value = &test_string[10 - i];
+
+    StackHandleScope<1> hs_str(self);
+    Handle<mirror::String> h_str(
+        hs_str.NewHandle(mirror::String::AllocFromModifiedUtf8(self, test_value)));
+    ASSERT_NE(h_str.Get(), nullptr);
+    ASSERT_FALSE(self->IsExceptionPending());
+
+    tmp->SetVRegReference(0, h_str.Get());
+
+    JValue result;
+    UnstartedLongParseLong(self, tmp, &result, 0);
+
+    ASSERT_FALSE(self->IsExceptionPending());
+    EXPECT_EQ(result.GetJ(), test_values[i]);
+  }
+
+  ShadowFrame::DeleteDeoptimizedFrame(tmp);
+}
+
 }  // namespace interpreter
 }  // namespace art
diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc
index 73aaf04..4db9f71 100644
--- a/runtime/jit/jit.cc
+++ b/runtime/jit/jit.cc
@@ -208,7 +208,7 @@
     return false;
   }
   bool success = jit_compile_method_(jit_compiler_handle_, method_to_compile, self, osr);
-  code_cache_->DoneCompiling(method_to_compile, self);
+  code_cache_->DoneCompiling(method_to_compile, self, osr);
   return success;
 }
 
@@ -319,11 +319,6 @@
     return false;
   }
 
-  if (kRuntimeISA == kMips || kRuntimeISA == kMips64) {
-    VLOG(jit) << "OSR not supported on this platform: " << kRuntimeISA;
-    return false;
-  }
-
   if (UNLIKELY(__builtin_frame_address(0) < thread->GetStackEnd())) {
     // Don't attempt to do an OSR if we are close to the stack limit. Since
     // the interpreter frames are still on stack, OSR has the potential
diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc
index 37ff6a5..820ae6a 100644
--- a/runtime/jit/jit_code_cache.cc
+++ b/runtime/jit/jit_code_cache.cc
@@ -927,12 +927,12 @@
     return false;
   }
 
-  if (info->IsMethodBeingCompiled()) {
+  if (info->IsMethodBeingCompiled(osr)) {
     VLOG(jit) << PrettyMethod(method) << " is already being compiled";
     return false;
   }
 
-  info->SetIsMethodBeingCompiled(true);
+  info->SetIsMethodBeingCompiled(true, osr);
   return true;
 }
 
@@ -952,10 +952,10 @@
   info->DecrementInlineUse();
 }
 
-void JitCodeCache::DoneCompiling(ArtMethod* method, Thread* self ATTRIBUTE_UNUSED) {
+void JitCodeCache::DoneCompiling(ArtMethod* method, Thread* self ATTRIBUTE_UNUSED, bool osr) {
   ProfilingInfo* info = method->GetProfilingInfo(sizeof(void*));
-  DCHECK(info->IsMethodBeingCompiled());
-  info->SetIsMethodBeingCompiled(false);
+  DCHECK(info->IsMethodBeingCompiled(osr));
+  info->SetIsMethodBeingCompiled(false, osr);
 }
 
 size_t JitCodeCache::GetMemorySizeOfCodePointer(const void* ptr) {
diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h
index 6faa8f1..9f18c70 100644
--- a/runtime/jit/jit_code_cache.h
+++ b/runtime/jit/jit_code_cache.h
@@ -80,7 +80,7 @@
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!lock_);
 
-  void DoneCompiling(ArtMethod* method, Thread* self)
+  void DoneCompiling(ArtMethod* method, Thread* self, bool osr)
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!lock_);
 
diff --git a/runtime/jit/jit_instrumentation.cc b/runtime/jit/jit_instrumentation.cc
index d751e5a..d2180c7 100644
--- a/runtime/jit/jit_instrumentation.cc
+++ b/runtime/jit/jit_instrumentation.cc
@@ -80,9 +80,9 @@
   DISALLOW_IMPLICIT_CONSTRUCTORS(JitCompileTask);
 };
 
-JitInstrumentationCache::JitInstrumentationCache(size_t hot_method_threshold,
-                                                 size_t warm_method_threshold,
-                                                 size_t osr_method_threshold)
+JitInstrumentationCache::JitInstrumentationCache(uint16_t hot_method_threshold,
+                                                 uint16_t warm_method_threshold,
+                                                 uint16_t osr_method_threshold)
     : hot_method_threshold_(hot_method_threshold),
       warm_method_threshold_(warm_method_threshold),
       osr_method_threshold_(osr_method_threshold),
@@ -130,44 +130,61 @@
   }
 }
 
-void JitInstrumentationCache::AddSamples(Thread* self, ArtMethod* method, size_t) {
+void JitInstrumentationCache::AddSamples(Thread* self, ArtMethod* method, uint16_t count) {
   // Since we don't have on-stack replacement, some methods can remain in the interpreter longer
-  // than we want resulting in samples even after the method is compiled.
-  if (method->IsClassInitializer() || method->IsNative()) {
+  // than we want resulting in samples even after the method is compiled.  Also, if the
+  // jit is no longer interested in hotness samples because we're shutting down, just return.
+  if (method->IsClassInitializer() || method->IsNative() || (thread_pool_ == nullptr)) {
+    if (thread_pool_ == nullptr) {
+      // Should only see this when shutting down.
+      DCHECK(Runtime::Current()->IsShuttingDown(self));
+    }
     return;
   }
   DCHECK(thread_pool_ != nullptr);
+  DCHECK_GT(warm_method_threshold_, 0);
+  DCHECK_GT(hot_method_threshold_, warm_method_threshold_);
+  DCHECK_GT(osr_method_threshold_, hot_method_threshold_);
 
-  uint16_t sample_count = method->IncrementCounter();
-  if (sample_count == warm_method_threshold_) {
-    bool success = ProfilingInfo::Create(self, method, /* retry_allocation */ false);
-    if (success) {
-      VLOG(jit) << "Start profiling " << PrettyMethod(method);
+  int32_t starting_count = method->GetCounter();
+  int32_t new_count = starting_count + count;   // int32 here to avoid wrap-around;
+  if (starting_count < warm_method_threshold_) {
+    if (new_count >= warm_method_threshold_) {
+      bool success = ProfilingInfo::Create(self, method, /* retry_allocation */ false);
+      if (success) {
+        VLOG(jit) << "Start profiling " << PrettyMethod(method);
+      }
+
+      if (thread_pool_ == nullptr) {
+        // Calling ProfilingInfo::Create might put us in a suspended state, which could
+        // lead to the thread pool being deleted when we are shutting down.
+        DCHECK(Runtime::Current()->IsShuttingDown(self));
+        return;
+      }
+
+      if (!success) {
+        // We failed allocating. Instead of doing the collection on the Java thread, we push
+        // an allocation to a compiler thread, that will do the collection.
+        thread_pool_->AddTask(self, new JitCompileTask(method, JitCompileTask::kAllocateProfile));
+      }
     }
-
-    if (thread_pool_ == nullptr) {
-      // Calling ProfilingInfo::Create might put us in a suspended state, which could
-      // lead to the thread pool being deleted when we are shutting down.
-      DCHECK(Runtime::Current()->IsShuttingDown(self));
-      return;
+    // Avoid jumping more than one state at a time.
+    new_count = std::min(new_count, hot_method_threshold_ - 1);
+  } else if (starting_count < hot_method_threshold_) {
+    if (new_count >= hot_method_threshold_) {
+      DCHECK(thread_pool_ != nullptr);
+      thread_pool_->AddTask(self, new JitCompileTask(method, JitCompileTask::kCompile));
     }
-
-    if (!success) {
-      // We failed allocating. Instead of doing the collection on the Java thread, we push
-      // an allocation to a compiler thread, that will do the collection.
-      thread_pool_->AddTask(self, new JitCompileTask(method, JitCompileTask::kAllocateProfile));
+    // Avoid jumping more than one state at a time.
+    new_count = std::min(new_count, osr_method_threshold_ - 1);
+  } else if (starting_count < osr_method_threshold_) {
+    if (new_count >= osr_method_threshold_) {
+      DCHECK(thread_pool_ != nullptr);
+      thread_pool_->AddTask(self, new JitCompileTask(method, JitCompileTask::kCompileOsr));
     }
   }
-
-  if (sample_count == hot_method_threshold_) {
-    DCHECK(thread_pool_ != nullptr);
-    thread_pool_->AddTask(self, new JitCompileTask(method, JitCompileTask::kCompile));
-  }
-
-  if (sample_count == osr_method_threshold_) {
-    DCHECK(thread_pool_ != nullptr);
-    thread_pool_->AddTask(self, new JitCompileTask(method, JitCompileTask::kCompileOsr));
-  }
+  // Update hotness counter
+  method->SetCounter(new_count);
 }
 
 JitInstrumentationListener::JitInstrumentationListener(JitInstrumentationCache* cache)
diff --git a/runtime/jit/jit_instrumentation.h b/runtime/jit/jit_instrumentation.h
index d1c5c44..7ffd4eb 100644
--- a/runtime/jit/jit_instrumentation.h
+++ b/runtime/jit/jit_instrumentation.h
@@ -40,6 +40,8 @@
 class Thread;
 
 namespace jit {
+static constexpr int16_t kJitCheckForOSR = -1;
+static constexpr int16_t kJitHotnessDisabled = -2;
 
 class JitInstrumentationCache;
 
@@ -84,7 +86,6 @@
 
   static constexpr uint32_t kJitEvents =
       instrumentation::Instrumentation::kMethodEntered |
-      instrumentation::Instrumentation::kBranch |
       instrumentation::Instrumentation::kInvokeVirtualOrInterface;
 
  private:
@@ -96,25 +97,33 @@
 // Keeps track of which methods are hot.
 class JitInstrumentationCache {
  public:
-  JitInstrumentationCache(size_t hot_method_threshold,
-                          size_t warm_method_threshold,
-                          size_t osr_method_threshold);
-  void AddSamples(Thread* self, ArtMethod* method, size_t samples)
+  JitInstrumentationCache(uint16_t hot_method_threshold,
+                          uint16_t warm_method_threshold,
+                          uint16_t osr_method_threshold);
+  void AddSamples(Thread* self, ArtMethod* method, uint16_t samples)
       SHARED_REQUIRES(Locks::mutator_lock_);
   void CreateThreadPool();
   void DeleteThreadPool(Thread* self);
 
+  size_t OSRMethodThreshold() const {
+    return osr_method_threshold_;
+  }
+
   size_t HotMethodThreshold() const {
     return hot_method_threshold_;
   }
 
+  size_t WarmMethodThreshold() const {
+    return warm_method_threshold_;
+  }
+
   // Wait until there is no more pending compilation tasks.
   void WaitForCompilationToFinish(Thread* self);
 
  private:
-  size_t hot_method_threshold_;
-  size_t warm_method_threshold_;
-  size_t osr_method_threshold_;
+  uint16_t hot_method_threshold_;
+  uint16_t warm_method_threshold_;
+  uint16_t osr_method_threshold_;
   JitInstrumentationListener listener_;
   std::unique_ptr<ThreadPool> thread_pool_;
 
diff --git a/runtime/jit/profiling_info.h b/runtime/jit/profiling_info.h
index 55d627a..3a71bba 100644
--- a/runtime/jit/profiling_info.h
+++ b/runtime/jit/profiling_info.h
@@ -119,12 +119,18 @@
 
   InlineCache* GetInlineCache(uint32_t dex_pc);
 
-  bool IsMethodBeingCompiled() const {
-    return is_method_being_compiled_;
+  bool IsMethodBeingCompiled(bool osr) const {
+    return osr
+        ? is_osr_method_being_compiled_
+        : is_method_being_compiled_;
   }
 
-  void SetIsMethodBeingCompiled(bool value) {
-    is_method_being_compiled_ = value;
+  void SetIsMethodBeingCompiled(bool value, bool osr) {
+    if (osr) {
+      is_osr_method_being_compiled_ = value;
+    } else {
+      is_method_being_compiled_ = value;
+    }
   }
 
   void SetSavedEntryPoint(const void* entry_point) {
@@ -155,7 +161,8 @@
   }
 
   bool IsInUseByCompiler() const {
-    return IsMethodBeingCompiled() || (current_inline_uses_ > 0);
+    return IsMethodBeingCompiled(/*osr*/ true) || IsMethodBeingCompiled(/*osr*/ false) ||
+        (current_inline_uses_ > 0);
   }
 
  private:
@@ -181,6 +188,7 @@
   // is implicitly guarded by the JIT code cache lock.
   // TODO: Make the JIT code cache lock global.
   bool is_method_being_compiled_;
+  bool is_osr_method_being_compiled_;
 
   // When the compiler inlines the method associated to this ProfilingInfo,
   // it updates this counter so that the GC does not try to clear the inline caches.
diff --git a/runtime/mirror/class.cc b/runtime/mirror/class.cc
index a36091f..42f003d 100644
--- a/runtime/mirror/class.cc
+++ b/runtime/mirror/class.cc
@@ -1024,8 +1024,8 @@
 
 // TODO: Move this to java_lang_Class.cc?
 ArtMethod* Class::GetDeclaredConstructor(
-    Thread* self, Handle<mirror::ObjectArray<mirror::Class>> args) {
-  for (auto& m : GetDirectMethods(sizeof(void*))) {
+    Thread* self, Handle<mirror::ObjectArray<mirror::Class>> args, size_t pointer_size) {
+  for (auto& m : GetDirectMethods(pointer_size)) {
     // Skip <clinit> which is a static constructor, as well as non constructors.
     if (m.IsStatic() || !m.IsConstructor()) {
       continue;
@@ -1139,5 +1139,31 @@
                                                        mirror::String* name,
                                                        mirror::ObjectArray<mirror::Class>* args);
 
+template <bool kTransactionActive>
+mirror::Constructor* Class::GetDeclaredConstructorInternal(
+    Thread* self,
+    mirror::Class* klass,
+    mirror::ObjectArray<mirror::Class>* args) {
+  StackHandleScope<1> hs(self);
+  const size_t pointer_size = kTransactionActive
+                                  ? Runtime::Current()->GetClassLinker()->GetImagePointerSize()
+                                  : sizeof(void*);
+  ArtMethod* result = klass->GetDeclaredConstructor(self, hs.NewHandle(args), pointer_size);
+  return result != nullptr
+      ? mirror::Constructor::CreateFromArtMethod<kTransactionActive>(self, result)
+      : nullptr;
+}
+
+// mirror::Constructor::CreateFromArtMethod<kTransactionActive>(self, result)
+
+template mirror::Constructor* Class::GetDeclaredConstructorInternal<false>(
+    Thread* self,
+    mirror::Class* klass,
+    mirror::ObjectArray<mirror::Class>* args);
+template mirror::Constructor* Class::GetDeclaredConstructorInternal<true>(
+    Thread* self,
+    mirror::Class* klass,
+    mirror::ObjectArray<mirror::Class>* args);
+
 }  // namespace mirror
 }  // namespace art
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index 9808c3e..57c3590 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -768,6 +768,11 @@
                                            mirror::String* name,
                                            mirror::ObjectArray<mirror::Class>* args)
       SHARED_REQUIRES(Locks::mutator_lock_);
+  template <bool kTransactionActive = false>
+  static Constructor* GetDeclaredConstructorInternal(Thread* self,
+                                                     mirror::Class* klass,
+                                                     mirror::ObjectArray<mirror::Class>* args)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   ALWAYS_INLINE ArraySlice<ArtMethod> GetDeclaredVirtualMethodsSlice(size_t pointer_size)
@@ -1215,7 +1220,7 @@
 
   // May cause thread suspension due to EqualParameters.
   ArtMethod* GetDeclaredConstructor(
-      Thread* self, Handle<mirror::ObjectArray<mirror::Class>> args)
+      Thread* self, Handle<mirror::ObjectArray<mirror::Class>> args, size_t pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Used to initialize a class in the allocation code path to ensure it is guarded by a StoreStore
diff --git a/runtime/mirror/class_loader-inl.h b/runtime/mirror/class_loader-inl.h
index 84fa80f..cc910b0 100644
--- a/runtime/mirror/class_loader-inl.h
+++ b/runtime/mirror/class_loader-inl.h
@@ -34,7 +34,6 @@
   VisitInstanceFieldsReferences<kVerifyFlags, kReadBarrierOption>(klass, visitor);
   if (kVisitClasses) {
     // Visit classes loaded after.
-    ReaderMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
     ClassTable* const class_table = GetClassTable();
     if (class_table != nullptr) {
       class_table->VisitRoots(visitor);
diff --git a/runtime/mirror/method.cc b/runtime/mirror/method.cc
index 97973e6..9838b71 100644
--- a/runtime/mirror/method.cc
+++ b/runtime/mirror/method.cc
@@ -96,14 +96,18 @@
   array_class_.VisitRootIfNonNull(visitor, RootInfo(kRootStickyClass));
 }
 
+template <bool kTransactionActive>
 Constructor* Constructor::CreateFromArtMethod(Thread* self, ArtMethod* method) {
   DCHECK(method->IsConstructor()) << PrettyMethod(method);
   auto* ret = down_cast<Constructor*>(StaticClass()->AllocObject(self));
   if (LIKELY(ret != nullptr)) {
-    static_cast<AbstractMethod*>(ret)->CreateFromArtMethod(method);
+    static_cast<AbstractMethod*>(ret)->CreateFromArtMethod<kTransactionActive>(method);
   }
   return ret;
 }
 
+template Constructor* Constructor::CreateFromArtMethod<false>(Thread* self, ArtMethod* method);
+template Constructor* Constructor::CreateFromArtMethod<true>(Thread* self, ArtMethod* method);
+
 }  // namespace mirror
 }  // namespace art
diff --git a/runtime/mirror/method.h b/runtime/mirror/method.h
index 12a72fe..0b56964 100644
--- a/runtime/mirror/method.h
+++ b/runtime/mirror/method.h
@@ -60,6 +60,7 @@
 // C++ mirror of java.lang.reflect.Constructor.
 class MANAGED Constructor: public AbstractMethod {
  public:
+  template <bool kTransactionActive = false>
   static Constructor* CreateFromArtMethod(Thread* self, ArtMethod* method)
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_);
 
diff --git a/runtime/native/java_lang_Class.cc b/runtime/native/java_lang_Class.cc
index bf24de5..c1899af 100644
--- a/runtime/native/java_lang_Class.cc
+++ b/runtime/native/java_lang_Class.cc
@@ -322,15 +322,11 @@
 static jobject Class_getDeclaredConstructorInternal(
     JNIEnv* env, jobject javaThis, jobjectArray args) {
   ScopedFastNativeObjectAccess soa(env);
-  auto* klass = DecodeClass(soa, javaThis);
-  auto* params = soa.Decode<mirror::ObjectArray<mirror::Class>*>(args);
-  StackHandleScope<1> hs(soa.Self());
-  auto* declared_constructor = klass->GetDeclaredConstructor(soa.Self(), hs.NewHandle(params));
-  if (declared_constructor != nullptr) {
-    return soa.AddLocalReference<jobject>(
-        mirror::Constructor::CreateFromArtMethod(soa.Self(), declared_constructor));
-  }
-  return nullptr;
+  mirror::Constructor* result = mirror::Class::GetDeclaredConstructorInternal(
+      soa.Self(),
+      DecodeClass(soa, javaThis),
+      soa.Decode<mirror::ObjectArray<mirror::Class>*>(args));
+  return soa.AddLocalReference<jobject>(result);
 }
 
 static ALWAYS_INLINE inline bool MethodMatchesConstructor(ArtMethod* m, bool public_only)
@@ -545,6 +541,17 @@
   return soa.AddLocalReference<jstring>(class_name);
 }
 
+static jobjectArray Class_getSignatureAnnotation(JNIEnv* env, jobject javaThis) {
+  ScopedFastNativeObjectAccess soa(env);
+  StackHandleScope<1> hs(soa.Self());
+  Handle<mirror::Class> klass(hs.NewHandle(DecodeClass(soa, javaThis)));
+  if (klass->IsProxyClass() || klass->GetDexCache() == nullptr) {
+    return nullptr;
+  }
+  return soa.AddLocalReference<jobjectArray>(
+      klass->GetDexFile().GetSignatureAnnotationForClass(klass));
+}
+
 static jboolean Class_isAnonymousClass(JNIEnv* env, jobject javaThis) {
   ScopedFastNativeObjectAccess soa(env);
   StackHandleScope<1> hs(soa.Self());
@@ -608,7 +615,8 @@
   }
   auto* constructor = klass->GetDeclaredConstructor(
       soa.Self(),
-      ScopedNullHandle<mirror::ObjectArray<mirror::Class>>());
+      ScopedNullHandle<mirror::ObjectArray<mirror::Class>>(),
+      sizeof(void*));
   if (UNLIKELY(constructor == nullptr)) {
     soa.Self()->ThrowNewExceptionF("Ljava/lang/InstantiationException;",
                                    "%s has no zero argument constructor",
@@ -692,6 +700,7 @@
   NATIVE_METHOD(Class, getNameNative, "!()Ljava/lang/String;"),
   NATIVE_METHOD(Class, getProxyInterfaces, "!()[Ljava/lang/Class;"),
   NATIVE_METHOD(Class, getPublicDeclaredFields, "!()[Ljava/lang/reflect/Field;"),
+  NATIVE_METHOD(Class, getSignatureAnnotation, "!()[Ljava/lang/String;"),
   NATIVE_METHOD(Class, isAnonymousClass, "!()Z"),
   NATIVE_METHOD(Class, isDeclaredAnnotationPresent, "!(Ljava/lang/Class;)Z"),
   NATIVE_METHOD(Class, newInstance, "!()Ljava/lang/Object;"),
diff --git a/runtime/native/java_lang_reflect_AbstractMethod.cc b/runtime/native/java_lang_reflect_AbstractMethod.cc
new file mode 100644
index 0000000..7e11c11
--- /dev/null
+++ b/runtime/native/java_lang_reflect_AbstractMethod.cc
@@ -0,0 +1,78 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "java_lang_reflect_AbstractMethod.h"
+
+#include "art_method-inl.h"
+#include "jni_internal.h"
+#include "mirror/class-inl.h"
+#include "mirror/object-inl.h"
+#include "mirror/object_array-inl.h"
+#include "reflection.h"
+#include "scoped_fast_native_object_access.h"
+#include "well_known_classes.h"
+
+namespace art {
+
+static jobjectArray AbstractMethod_getDeclaredAnnotations(JNIEnv* env, jobject javaMethod) {
+  ScopedFastNativeObjectAccess soa(env);
+  ArtMethod* method = ArtMethod::FromReflectedMethod(soa, javaMethod);
+  if (method->GetDeclaringClass()->IsProxyClass()) {
+    // Return an empty array instead of a null pointer.
+    mirror::Class* annotation_array_class =
+        soa.Decode<mirror::Class*>(WellKnownClasses::java_lang_annotation_Annotation__array);
+    mirror::ObjectArray<mirror::Object>* empty_array =
+        mirror::ObjectArray<mirror::Object>::Alloc(soa.Self(), annotation_array_class, 0);
+    return soa.AddLocalReference<jobjectArray>(empty_array);
+  }
+  return soa.AddLocalReference<jobjectArray>(method->GetDexFile()->GetAnnotationsForMethod(method));
+}
+
+static jobjectArray AbstractMethod_getSignatureAnnotation(JNIEnv* env, jobject javaMethod) {
+  ScopedFastNativeObjectAccess soa(env);
+  ArtMethod* method = ArtMethod::FromReflectedMethod(soa, javaMethod);
+  if (method->GetDeclaringClass()->IsProxyClass()) {
+    return nullptr;
+  }
+  StackHandleScope<1> hs(soa.Self());
+  return soa.AddLocalReference<jobjectArray>(
+      method->GetDexFile()->GetSignatureAnnotationForMethod(method));
+}
+
+
+static jboolean AbstractMethod_isAnnotationPresentNative(JNIEnv* env, jobject javaMethod,
+                                                         jclass annotationType) {
+  ScopedFastNativeObjectAccess soa(env);
+  ArtMethod* method = ArtMethod::FromReflectedMethod(soa, javaMethod);
+  if (method->GetDeclaringClass()->IsProxyClass()) {
+    return false;
+  }
+  StackHandleScope<1> hs(soa.Self());
+  Handle<mirror::Class> klass(hs.NewHandle(soa.Decode<mirror::Class*>(annotationType)));
+  return method->GetDexFile()->IsMethodAnnotationPresent(method, klass);
+}
+
+static JNINativeMethod gMethods[] = {
+  NATIVE_METHOD(AbstractMethod, getDeclaredAnnotations, "!()[Ljava/lang/annotation/Annotation;"),
+  NATIVE_METHOD(AbstractMethod, getSignatureAnnotation, "!()[Ljava/lang/String;"),
+  NATIVE_METHOD(AbstractMethod, isAnnotationPresentNative, "!(Ljava/lang/Class;)Z"),
+};
+
+void register_java_lang_reflect_AbstractMethod(JNIEnv* env) {
+  REGISTER_NATIVE_METHODS("java/lang/reflect/AbstractMethod");
+}
+
+}  // namespace art
diff --git a/runtime/native/java_lang_reflect_AbstractMethod.h b/runtime/native/java_lang_reflect_AbstractMethod.h
new file mode 100644
index 0000000..222e5a0
--- /dev/null
+++ b/runtime/native/java_lang_reflect_AbstractMethod.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_NATIVE_JAVA_LANG_REFLECT_ABSTRACTMETHOD_H_
+#define ART_RUNTIME_NATIVE_JAVA_LANG_REFLECT_ABSTRACTMETHOD_H_
+
+#include <jni.h>
+
+namespace art {
+
+void register_java_lang_reflect_AbstractMethod(JNIEnv* env);
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_NATIVE_JAVA_LANG_REFLECT_ABSTRACTMETHOD_H_
diff --git a/runtime/native/java_lang_reflect_Method.cc b/runtime/native/java_lang_reflect_Method.cc
index d7cf62e..78999c2 100644
--- a/runtime/native/java_lang_reflect_Method.cc
+++ b/runtime/native/java_lang_reflect_Method.cc
@@ -41,20 +41,6 @@
       method->GetDexFile()->GetAnnotationForMethod(method, klass));
 }
 
-static jobjectArray Method_getDeclaredAnnotations(JNIEnv* env, jobject javaMethod) {
-  ScopedFastNativeObjectAccess soa(env);
-  ArtMethod* method = ArtMethod::FromReflectedMethod(soa, javaMethod);
-  if (method->GetDeclaringClass()->IsProxyClass()) {
-    // Return an empty array instead of a null pointer.
-    mirror::Class* annotation_array_class =
-        soa.Decode<mirror::Class*>(WellKnownClasses::java_lang_annotation_Annotation__array);
-    mirror::ObjectArray<mirror::Object>* empty_array =
-        mirror::ObjectArray<mirror::Object>::Alloc(soa.Self(), annotation_array_class, 0);
-    return soa.AddLocalReference<jobjectArray>(empty_array);
-  }
-  return soa.AddLocalReference<jobjectArray>(method->GetDexFile()->GetAnnotationsForMethod(method));
-}
-
 static jobject Method_getDefaultValue(JNIEnv* env, jobject javaMethod) {
   ScopedFastNativeObjectAccess soa(env);
   ArtMethod* method = ArtMethod::FromReflectedMethod(soa, javaMethod);
@@ -116,27 +102,13 @@
   return InvokeMethod(soa, javaMethod, javaReceiver, javaArgs);
 }
 
-static jboolean Method_isAnnotationPresentNative(JNIEnv* env, jobject javaMethod,
-                                                 jclass annotationType) {
-  ScopedFastNativeObjectAccess soa(env);
-  ArtMethod* method = ArtMethod::FromReflectedMethod(soa, javaMethod);
-  if (method->GetDeclaringClass()->IsProxyClass()) {
-    return false;
-  }
-  StackHandleScope<1> hs(soa.Self());
-  Handle<mirror::Class> klass(hs.NewHandle(soa.Decode<mirror::Class*>(annotationType)));
-  return method->GetDexFile()->IsMethodAnnotationPresent(method, klass);
-}
-
 static JNINativeMethod gMethods[] = {
   NATIVE_METHOD(Method, getAnnotationNative,
                 "!(Ljava/lang/Class;)Ljava/lang/annotation/Annotation;"),
-  NATIVE_METHOD(Method, getDeclaredAnnotations, "!()[Ljava/lang/annotation/Annotation;"),
   NATIVE_METHOD(Method, getDefaultValue, "!()Ljava/lang/Object;"),
   NATIVE_METHOD(Method, getExceptionTypes, "!()[Ljava/lang/Class;"),
   NATIVE_METHOD(Method, getParameterAnnotationsNative, "!()[[Ljava/lang/annotation/Annotation;"),
   NATIVE_METHOD(Method, invoke, "!(Ljava/lang/Object;[Ljava/lang/Object;)Ljava/lang/Object;"),
-  NATIVE_METHOD(Method, isAnnotationPresentNative, "!(Ljava/lang/Class;)Z"),
 };
 
 void register_java_lang_reflect_Method(JNIEnv* env) {
diff --git a/runtime/oat_file_assistant.cc b/runtime/oat_file_assistant.cc
index ce892f3..78e372a 100644
--- a/runtime/oat_file_assistant.cc
+++ b/runtime/oat_file_assistant.cc
@@ -77,7 +77,7 @@
 OatFileAssistant::~OatFileAssistant() {
   // Clean up the lock file.
   if (flock_.HasFile()) {
-    TEMP_FAILURE_RETRY(unlink(flock_.GetFile()->GetPath().c_str()));
+    unlink(flock_.GetFile()->GetPath().c_str());
   }
 }
 
@@ -109,7 +109,7 @@
   std::string lock_file_name = *OatFileName() + ".flock";
 
   if (!flock_.Init(lock_file_name.c_str(), error_msg)) {
-    TEMP_FAILURE_RETRY(unlink(lock_file_name.c_str()));
+    unlink(lock_file_name.c_str());
     return false;
   }
   return true;
@@ -495,7 +495,7 @@
       return true;
     }
 
-    if (file.GetOatHeader().GetImageFileLocationOatChecksum() != image_info->oat_checksum) {
+    if (file.GetOatHeader().GetImageFileLocationOatChecksum() != GetCombinedImageChecksum()) {
       VLOG(oat) << "Oat image checksum does not match image checksum.";
       return true;
     }
@@ -613,7 +613,7 @@
   if (!Exec(argv, error_msg)) {
     // Manually delete the file. This ensures there is no garbage left over if
     // the process unexpectedly died.
-    TEMP_FAILURE_RETRY(unlink(oat_file_name.c_str()));
+    unlink(oat_file_name.c_str());
     return kUpdateFailed;
   }
 
@@ -673,13 +673,13 @@
     // Manually delete the file. This ensures there is no garbage left over if
     // the process unexpectedly died.
     oat_file->Erase();
-    TEMP_FAILURE_RETRY(unlink(oat_file_name.c_str()));
+    unlink(oat_file_name.c_str());
     return kUpdateFailed;
   }
 
   if (oat_file->FlushCloseOrErase() != 0) {
     *error_msg = "Unable to close oat file " + oat_file_name;
-    TEMP_FAILURE_RETRY(unlink(oat_file_name.c_str()));
+    unlink(oat_file_name.c_str());
     return kUpdateFailed;
   }
 
@@ -931,8 +931,7 @@
         cached_image_info_.patch_delta = image_header.GetPatchDelta();
       } else {
         std::unique_ptr<ImageHeader> image_header(
-            gc::space::ImageSpace::ReadImageHeaderOrDie(
-                cached_image_info_.location.c_str(), isa_));
+            gc::space::ImageSpace::ReadImageHeaderOrDie(cached_image_info_.location.c_str(), isa_));
         cached_image_info_.oat_checksum = image_header->GetOatChecksum();
         cached_image_info_.oat_data_begin = reinterpret_cast<uintptr_t>(
             image_header->GetOatDataBegin());
@@ -940,10 +939,39 @@
       }
     }
     image_info_load_succeeded_ = (!image_spaces.empty());
+
+    combined_image_checksum_ = CalculateCombinedImageChecksum(isa_);
   }
   return image_info_load_succeeded_ ? &cached_image_info_ : nullptr;
 }
 
+// TODO: Use something better than xor.
+uint32_t OatFileAssistant::CalculateCombinedImageChecksum(InstructionSet isa) {
+  uint32_t checksum = 0;
+  std::vector<gc::space::ImageSpace*> image_spaces =
+      Runtime::Current()->GetHeap()->GetBootImageSpaces();
+  if (isa == kRuntimeISA) {
+    for (gc::space::ImageSpace* image_space : image_spaces) {
+      checksum ^= image_space->GetImageHeader().GetOatChecksum();
+    }
+  } else {
+    for (gc::space::ImageSpace* image_space : image_spaces) {
+      std::string location = image_space->GetImageLocation();
+      std::unique_ptr<ImageHeader> image_header(
+          gc::space::ImageSpace::ReadImageHeaderOrDie(location.c_str(), isa));
+      checksum ^= image_header->GetOatChecksum();
+    }
+  }
+  return checksum;
+}
+
+uint32_t OatFileAssistant::GetCombinedImageChecksum() {
+  if (!image_info_load_attempted_) {
+    GetImageInfo();
+  }
+  return combined_image_checksum_;
+}
+
 gc::space::ImageSpace* OatFileAssistant::OpenImageSpace(const OatFile* oat_file) {
   DCHECK(oat_file != nullptr);
   std::string art_file = ArtFileName(oat_file);
diff --git a/runtime/oat_file_assistant.h b/runtime/oat_file_assistant.h
index 17f72fe..d3228de 100644
--- a/runtime/oat_file_assistant.h
+++ b/runtime/oat_file_assistant.h
@@ -279,6 +279,8 @@
   static bool DexFilenameToOdexFilename(const std::string& location,
       InstructionSet isa, std::string* odex_filename, std::string* error_msg);
 
+  static uint32_t CalculateCombinedImageChecksum(InstructionSet isa = kRuntimeISA);
+
  private:
   struct ImageInfo {
     uint32_t oat_checksum = 0;
@@ -352,6 +354,8 @@
   // The caller shouldn't clean up or free the returned pointer.
   const ImageInfo* GetImageInfo();
 
+  uint32_t GetCombinedImageChecksum();
+
   // To implement Lock(), we lock a dummy file where the oat file would go
   // (adding ".flock" to the target file name) and retain the lock for the
   // remaining lifetime of the OatFileAssistant object.
@@ -423,6 +427,7 @@
   bool image_info_load_attempted_ = false;
   bool image_info_load_succeeded_ = false;
   ImageInfo cached_image_info_;
+  uint32_t combined_image_checksum_ = 0;
 
   // For debugging only.
   // If this flag is set, the oat or odex file has been released to the user
diff --git a/runtime/oat_file_assistant_test.cc b/runtime/oat_file_assistant_test.cc
index bddfa4f..f50d1cb 100644
--- a/runtime/oat_file_assistant_test.cc
+++ b/runtime/oat_file_assistant_test.cc
@@ -239,7 +239,8 @@
       ASSERT_TRUE(!image_spaces.empty() && image_spaces[0] != nullptr);
       const ImageHeader& image_header = image_spaces[0]->GetImageHeader();
       const OatHeader& oat_header = odex_file->GetOatHeader();
-      EXPECT_EQ(image_header.GetOatChecksum(), oat_header.GetImageFileLocationOatChecksum());
+      uint32_t combined_checksum = OatFileAssistant::CalculateCombinedImageChecksum();
+      EXPECT_EQ(combined_checksum, oat_header.GetImageFileLocationOatChecksum());
       EXPECT_NE(reinterpret_cast<uintptr_t>(image_header.GetOatDataBegin()),
           oat_header.GetImageFileLocationOatDataBegin());
       EXPECT_NE(image_header.GetPatchDelta(), oat_header.GetImagePatchDelta());
diff --git a/runtime/reflection_test.cc b/runtime/reflection_test.cc
index 6234720..a098bf0 100644
--- a/runtime/reflection_test.cc
+++ b/runtime/reflection_test.cc
@@ -506,8 +506,6 @@
 };
 
 TEST_F(ReflectionTest, StaticMainMethod) {
-  TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING_WITH_QUICK();
-  TEST_DISABLED_FOR_READ_BARRIER_WITH_QUICK();
   TEST_DISABLED_FOR_READ_BARRIER_WITH_OPTIMIZING_FOR_UNSUPPORTED_INSTRUCTION_SETS();
   ScopedObjectAccess soa(Thread::Current());
   jobject jclass_loader = LoadDex("Main");
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 630d101..d3454e8 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -103,6 +103,7 @@
 #include "native/java_lang_VMClassLoader.h"
 #include "native/java_lang_ref_FinalizerReference.h"
 #include "native/java_lang_ref_Reference.h"
+#include "native/java_lang_reflect_AbstractMethod.h"
 #include "native/java_lang_reflect_Array.h"
 #include "native/java_lang_reflect_Constructor.h"
 #include "native/java_lang_reflect_Field.h"
@@ -1349,6 +1350,7 @@
   register_java_lang_DexCache(env);
   register_java_lang_Object(env);
   register_java_lang_ref_FinalizerReference(env);
+  register_java_lang_reflect_AbstractMethod(env);
   register_java_lang_reflect_Array(env);
   register_java_lang_reflect_Constructor(env);
   register_java_lang_reflect_Field(env);
diff --git a/runtime/stack.h b/runtime/stack.h
index 51f7d63..7301184 100644
--- a/runtime/stack.h
+++ b/runtime/stack.h
@@ -187,6 +187,22 @@
     return (dex_pc_ptr_ == nullptr) ? dex_pc_ : dex_pc_ptr_ - code_item_->insns_;
   }
 
+  int16_t GetCachedHotnessCountdown() const {
+    return cached_hotness_countdown_;
+  }
+
+  void SetCachedHotnessCountdown(int16_t cached_hotness_countdown) {
+    cached_hotness_countdown_ = cached_hotness_countdown;
+  }
+
+  int16_t GetHotnessCountdown() const {
+    return hotness_countdown_;
+  }
+
+  void SetHotnessCountdown(int16_t hotness_countdown) {
+    hotness_countdown_ = hotness_countdown;
+  }
+
   void SetDexPC(uint32_t dex_pc) {
     dex_pc_ = dex_pc;
     dex_pc_ptr_ = nullptr;
@@ -397,6 +413,14 @@
     return OFFSETOF_MEMBER(ShadowFrame, code_item_);
   }
 
+  static size_t CachedHotnessCountdownOffset() {
+    return OFFSETOF_MEMBER(ShadowFrame, cached_hotness_countdown_);
+  }
+
+  static size_t HotnessCountdownOffset() {
+    return OFFSETOF_MEMBER(ShadowFrame, hotness_countdown_);
+  }
+
   // Create ShadowFrame for interpreter using provided memory.
   static ShadowFrame* CreateShadowFrameImpl(uint32_t num_vregs,
                                             ShadowFrame* link,
@@ -406,7 +430,7 @@
     return new (memory) ShadowFrame(num_vregs, link, method, dex_pc, true);
   }
 
-  uint16_t* GetDexPCPtr() {
+  const uint16_t* GetDexPCPtr() {
     return dex_pc_ptr_;
   }
 
@@ -443,11 +467,13 @@
   ShadowFrame* link_;
   ArtMethod* method_;
   JValue* result_register_;
-  uint16_t* dex_pc_ptr_;
+  const uint16_t* dex_pc_ptr_;
   const DexFile::CodeItem* code_item_;
   LockCountData lock_count_data_;  // This may contain GC roots when lock counting is active.
   const uint32_t number_of_vregs_;
   uint32_t dex_pc_;
+  int16_t cached_hotness_countdown_;
+  int16_t hotness_countdown_;
 
   // This is a two-part array:
   //  - [0..number_of_vregs) holds the raw virtual registers, and each element here is always 4
diff --git a/test/005-annotations/expected.txt b/test/005-annotations/expected.txt
index 3d9fd8b..ee5b0c7 100644
--- a/test/005-annotations/expected.txt
+++ b/test/005-annotations/expected.txt
@@ -93,6 +93,7 @@
     --> nombre is 'fubar'
 
 SimplyNoted.get(AnnoSimpleType) = @android.test.anno.AnnoSimpleType()
+SimplyNoted.get(AnnoSimpleTypeInvis) = null
 SubNoted.get(AnnoSimpleType) = @android.test.anno.AnnoSimpleType()
 
 Package annotations:
diff --git a/test/005-annotations/src/android/test/anno/TestAnnotations.java b/test/005-annotations/src/android/test/anno/TestAnnotations.java
index bc89f16..d36d43e 100644
--- a/test/005-annotations/src/android/test/anno/TestAnnotations.java
+++ b/test/005-annotations/src/android/test/anno/TestAnnotations.java
@@ -185,6 +185,9 @@
         // this is expected to be non-null
         Annotation anno = SimplyNoted.class.getAnnotation(AnnoSimpleType.class);
         System.out.println("SimplyNoted.get(AnnoSimpleType) = " + anno);
+        // this is expected to be null
+        anno = SimplyNoted.class.getAnnotation(AnnoSimpleTypeInvis.class);
+        System.out.println("SimplyNoted.get(AnnoSimpleTypeInvis) = " + anno);
         // this is non-null if the @Inherited tag is present
         anno = SubNoted.class.getAnnotation(AnnoSimpleType.class);
         System.out.println("SubNoted.get(AnnoSimpleType) = " + anno);
diff --git a/test/031-class-attributes/expected.txt b/test/031-class-attributes/expected.txt
index 72656ae..de99872 100644
--- a/test/031-class-attributes/expected.txt
+++ b/test/031-class-attributes/expected.txt
@@ -84,7 +84,7 @@
   enclosingCon: null
   enclosingMeth: null
   modifiers: 1
-  package: package otherpackage, Unknown, version 0.0
+  package: package otherpackage
   declaredClasses: [0]
   member classes: [0]
   isAnnotation: false
diff --git a/test/031-class-attributes/src/ClassAttrs.java b/test/031-class-attributes/src/ClassAttrs.java
index c2e41c5..38bd525 100644
--- a/test/031-class-attributes/src/ClassAttrs.java
+++ b/test/031-class-attributes/src/ClassAttrs.java
@@ -1,6 +1,7 @@
 import otherpackage.OtherPackageClass;
 
 import java.io.Serializable;
+import java.lang.reflect.AbstractMethod;
 import java.lang.reflect.AccessibleObject;
 import java.lang.reflect.Constructor;
 import java.lang.reflect.Field;
@@ -221,8 +222,11 @@
     public static String getSignatureAttribute(Object obj) {
         Method method;
         try {
-            Class c = Class.forName("libcore.reflect.AnnotationAccess");
-            method = c.getDeclaredMethod("getSignature", java.lang.reflect.AnnotatedElement.class);
+            Class c = obj.getClass();
+            if (c == Method.class || c == Constructor.class) {
+              c = AbstractMethod.class;
+            }
+            method = c.getDeclaredMethod("getSignatureAttribute");
             method.setAccessible(true);
         } catch (Exception ex) {
             ex.printStackTrace();
@@ -230,7 +234,7 @@
         }
 
         try {
-            return (String) method.invoke(null, obj);
+            return (String) method.invoke(obj);
         } catch (IllegalAccessException ex) {
             throw new RuntimeException(ex);
         } catch (InvocationTargetException ex) {
diff --git a/test/510-checker-try-catch/smali/Builder.smali b/test/510-checker-try-catch/smali/Builder.smali
index 3d06cdb..733a1dd 100644
--- a/test/510-checker-try-catch/smali/Builder.smali
+++ b/test/510-checker-try-catch/smali/Builder.smali
@@ -21,11 +21,11 @@
 
 ## CHECK-START: int Builder.testMultipleTryCatch(int, int, int) builder (after)
 
-## CHECK:  name             "B0"
-## CHECK:  successors       "<<BEnterTry1:B\d+>>"
-## CHECK:  <<Minus1:i\d+>>  IntConstant -1
-## CHECK:  <<Minus2:i\d+>>  IntConstant -2
-## CHECK:  <<Minus3:i\d+>>  IntConstant -3
+## CHECK:      name             "B0"
+## CHECK:      successors       "<<BEnterTry1:B\d+>>"
+## CHECK-DAG:  <<Minus1:i\d+>>  IntConstant -1
+## CHECK-DAG:  <<Minus2:i\d+>>  IntConstant -2
+## CHECK-DAG:  <<Minus3:i\d+>>  IntConstant -3
 
 ## CHECK:  name             "<<BTry1:B\d+>>"
 ## CHECK:  predecessors     "<<BEnterTry1>>"
@@ -236,10 +236,10 @@
 
 ## CHECK-START: int Builder.testMultipleExits(int, int) builder (after)
 
-## CHECK:  name             "B0"
-## CHECK:  successors       "<<BEnterTry:B\d+>>"
-## CHECK:  <<Minus1:i\d+>>  IntConstant -1
-## CHECK:  <<Minus2:i\d+>>  IntConstant -2
+## CHECK:      name             "B0"
+## CHECK:      successors       "<<BEnterTry:B\d+>>"
+## CHECK-DAG:  <<Minus1:i\d+>>  IntConstant -1
+## CHECK-DAG:  <<Minus2:i\d+>>  IntConstant -2
 
 ## CHECK:  name             "<<BTry:B\d+>>"
 ## CHECK:  predecessors     "<<BEnterTry>>"
@@ -312,10 +312,10 @@
 
 ## CHECK-START: int Builder.testSharedBoundary(int, int, int) builder (after)
 
-## CHECK:  name             "B0"
-## CHECK:  successors       "<<BEnter1:B\d+>>"
-## CHECK:  <<Minus1:i\d+>>  IntConstant -1
-## CHECK:  <<Minus2:i\d+>>  IntConstant -2
+## CHECK:      name             "B0"
+## CHECK:      successors       "<<BEnter1:B\d+>>"
+## CHECK-DAG:  <<Minus1:i\d+>>  IntConstant -1
+## CHECK-DAG:  <<Minus2:i\d+>>  IntConstant -2
 
 ## CHECK:  name             "<<BTry1:B\d+>>"
 ## CHECK:  predecessors     "<<BEnter1>>"
@@ -403,10 +403,10 @@
 
 ## CHECK-START: int Builder.testSharedBoundary_Reverse(int, int, int) builder (after)
 
-## CHECK:  name             "B0"
-## CHECK:  successors       "<<BGoto:B\d+>>"
-## CHECK:  <<Minus1:i\d+>>  IntConstant -1
-## CHECK:  <<Minus2:i\d+>>  IntConstant -2
+## CHECK:      name             "B0"
+## CHECK:      successors       "<<BGoto:B\d+>>"
+## CHECK-DAG:  <<Minus1:i\d+>>  IntConstant -1
+## CHECK-DAG:  <<Minus2:i\d+>>  IntConstant -2
 
 ## CHECK:  name             "<<BGoto>>"
 ## CHECK:  successors       "<<BEnter2:B\d+>>"
@@ -504,9 +504,9 @@
 
 ## CHECK-START: int Builder.testNestedTry(int, int, int, int) builder (after)
 
-## CHECK:  name             "B0"
-## CHECK:  <<Minus1:i\d+>>  IntConstant -1
-## CHECK:  <<Minus2:i\d+>>  IntConstant -2
+## CHECK:      name             "B0"
+## CHECK-DAG:  <<Minus1:i\d+>>  IntConstant -1
+## CHECK-DAG:  <<Minus2:i\d+>>  IntConstant -2
 
 ## CHECK:  name             "<<BTry1:B\d+>>"
 ## CHECK:  predecessors     "<<BEnter1:B\d+>>"
diff --git a/test/570-checker-osr/src/Main.java b/test/570-checker-osr/src/Main.java
index 1142d49..6514334 100644
--- a/test/570-checker-osr/src/Main.java
+++ b/test/570-checker-osr/src/Main.java
@@ -16,8 +16,28 @@
 
 public class Main {
   public static void main(String[] args) {
-    new SubMain();
     System.loadLibrary(args[0]);
+    Thread testThread = new Thread() {
+      public void run() {
+        performTest();
+      }
+    };
+    testThread.start();
+    try {
+      testThread.join(20 * 1000);  // 20s timeout.
+    } catch (InterruptedException ie) {
+      System.out.println("Interrupted.");
+      System.exit(1);
+    }
+    Thread.State state = testThread.getState();
+    if (state != Thread.State.TERMINATED) {
+      System.out.println("Test timed out, current state: " + state);
+      System.exit(1);
+    }
+  }
+
+  public static void performTest() {
+    new SubMain();
     if ($noinline$returnInt() != 53) {
       throw new Error("Unexpected return value");
     }
diff --git a/test/etc/run-test-jar b/test/etc/run-test-jar
index d13d990..28a99de 100755
--- a/test/etc/run-test-jar
+++ b/test/etc/run-test-jar
@@ -358,7 +358,7 @@
         # in 512 byte blocks and set it as the ulimit. This should be more than enough
         # room.
         if [ ! `uname` = "Darwin" ]; then  # TODO: Darwin doesn't support "du -B..."
-          ulimit -S $(du -c -B512 ${ANDROID_HOST_OUT}/framework | tail -1 | cut -f1) || exit 1
+          ulimit -S $(du -c -B512 ${ANDROID_HOST_OUT}/framework 2>/dev/null | tail -1 | cut -f1) || exit 1
         fi
     fi
 else
@@ -381,14 +381,16 @@
 dex2oat_cmdline="true"
 mkdir_cmdline="mkdir -p ${DEX_LOCATION}/dalvik-cache/$ISA"
 
-app_image="--app-image-file=$DEX_LOCATION/dalvik-cache/$ISA/$(echo $DEX_LOCATION/$TEST_NAME.jar/classes.art | cut -d/ -f 2- | sed "s:/:@:g")"
+# Pick a base that will force the app image to get relocated.
+app_image="--base=0x4000 --app-image-file=$DEX_LOCATION/oat/$ISA/$TEST_NAME.art"
 
 if [ "$PREBUILD" = "y" ]; then
+  mkdir_cmdline="${mkdir_cmdline} && mkdir -p ${DEX_LOCATION}/oat/$ISA"
   dex2oat_cmdline="$INVOKE_WITH $ANDROID_ROOT/bin/dex2oatd \
                       $COMPILE_FLAGS \
                       --boot-image=${BOOT_IMAGE} \
                       --dex-file=$DEX_LOCATION/$TEST_NAME.jar \
-                      --oat-file=$DEX_LOCATION/dalvik-cache/$ISA/$(echo $DEX_LOCATION/$TEST_NAME.jar/classes.dex | cut -d/ -f 2- | sed "s:/:@:g") \
+                      --oat-file=$DEX_LOCATION/oat/$ISA/$TEST_NAME.odex \
                       ${app_image} \
                       --instruction-set=$ISA"
   if [ "x$INSTRUCTION_SET_FEATURES" != "x" ] ; then
diff --git a/tools/libcore_failures.txt b/tools/libcore_failures.txt
index 2533ce2..38b6ea6 100644
--- a/tools/libcore_failures.txt
+++ b/tools/libcore_failures.txt
@@ -267,6 +267,24 @@
           "libcore.util.NativeAllocationRegistryTest#testNullArguments"]
 },
 {
+  description: "libnativehelper_compat_libc++.so not found by dlopen on ARM64",
+  result: EXEC_FAILED,
+  modes: [device],
+  bug: 28082914,
+  names: ["libcore.java.lang.ThreadTest#testContextClassLoaderIsInherited",
+          "libcore.java.lang.ThreadTest#testContextClassLoaderIsNotNull",
+          "libcore.java.lang.ThreadTest#testGetAllStackTracesIncludesAllGroups",
+          "libcore.java.lang.ThreadTest#testGetStackTrace",
+          "libcore.java.lang.ThreadTest#testJavaContextClassLoader",
+          "libcore.java.lang.ThreadTest#testLeakingStartedThreads",
+          "libcore.java.lang.ThreadTest#testLeakingUnstartedThreads",
+          "libcore.java.lang.ThreadTest#testNativeThreadNames",
+          "libcore.java.lang.ThreadTest#testThreadInterrupted",
+          "libcore.java.lang.ThreadTest#testThreadSleep",
+          "libcore.java.lang.ThreadTest#testThreadSleepIllegalArguments",
+          "libcore.java.lang.ThreadTest#testThreadWakeup"]
+},
+{
   description: "Only work with --mode=activity",
   result: EXEC_FAILED,
   names: [ "libcore.java.io.FileTest#testJavaIoTmpdirMutable" ]