Merge "MIPS32: peek*/poke*, and String.charAt intrinsics."
diff --git a/Android.mk b/Android.mk
index 4f73127..2e05d33 100644
--- a/Android.mk
+++ b/Android.mk
@@ -547,3 +547,5 @@
 art_dont_bother :=
 art_test_bother :=
 TEST_ART_TARGET_SYNC_DEPS :=
+
+include $(art_path)/runtime/openjdkjvm/Android.mk
diff --git a/compiler/Android.mk b/compiler/Android.mk
index b164942..159e9cf 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -90,7 +90,6 @@
 	optimizing/optimization.cc \
 	optimizing/optimizing_compiler.cc \
 	optimizing/parallel_move_resolver.cc \
-	optimizing/pc_relative_fixups_x86.cc \
 	optimizing/prepare_for_register_allocation.cc \
 	optimizing/reference_type_propagation.cc \
 	optimizing/register_allocator.cc \
@@ -182,6 +181,7 @@
 	linker/x86/relative_patcher_x86_base.cc \
 	optimizing/code_generator_x86.cc \
 	optimizing/intrinsics_x86.cc \
+	optimizing/pc_relative_fixups_x86.cc \
 	utils/x86/assembler_x86.cc \
 	utils/x86/managed_register_x86.cc \
 
diff --git a/compiler/debug/elf_debug_line_writer.h b/compiler/debug/elf_debug_line_writer.h
index ac0f4ca..d3859ca 100644
--- a/compiler/debug/elf_debug_line_writer.h
+++ b/compiler/debug/elf_debug_line_writer.h
@@ -24,7 +24,6 @@
 #include "debug/dwarf/headers.h"
 #include "debug/elf_compilation_unit.h"
 #include "dex_file-inl.h"
-#include "dex_file.h"
 #include "elf_builder.h"
 #include "stack_map.h"
 
@@ -90,8 +89,9 @@
         continue;
       }
 
-      ArrayRef<const SrcMapElem> src_mapping_table;
-      std::vector<SrcMapElem> src_mapping_table_from_stack_maps;
+      uint32_t prologue_end = std::numeric_limits<uint32_t>::max();
+      ArrayRef<const SrcMapElem> pc2dex_map;
+      std::vector<SrcMapElem> pc2dex_map_from_stack_maps;
       if (mi->IsFromOptimizingCompiler()) {
         // Use stack maps to create mapping table from pc to dex.
         const CodeInfo code_info(mi->compiled_method->GetVmapTable().data());
@@ -99,35 +99,36 @@
         for (uint32_t s = 0; s < code_info.GetNumberOfStackMaps(); s++) {
           StackMap stack_map = code_info.GetStackMapAt(s, encoding);
           DCHECK(stack_map.IsValid());
-          // Emit only locations where we have local-variable information.
-          // In particular, skip mappings inside the prologue.
+          const uint32_t pc = stack_map.GetNativePcOffset(encoding);
+          const int32_t dex = stack_map.GetDexPc(encoding);
+          pc2dex_map_from_stack_maps.push_back({pc, dex});
           if (stack_map.HasDexRegisterMap(encoding)) {
-            const uint32_t pc = stack_map.GetNativePcOffset(encoding);
-            const int32_t dex = stack_map.GetDexPc(encoding);
-            src_mapping_table_from_stack_maps.push_back({pc, dex});
+            // Guess that the first map with local variables is the end of prologue.
+            prologue_end = std::min(prologue_end, pc);
           }
         }
-        std::sort(src_mapping_table_from_stack_maps.begin(),
-                  src_mapping_table_from_stack_maps.end());
-        src_mapping_table = ArrayRef<const SrcMapElem>(src_mapping_table_from_stack_maps);
+        std::sort(pc2dex_map_from_stack_maps.begin(),
+                  pc2dex_map_from_stack_maps.end());
+        pc2dex_map = ArrayRef<const SrcMapElem>(pc2dex_map_from_stack_maps);
       } else {
         // Use the mapping table provided by the quick compiler.
-        src_mapping_table = mi->compiled_method->GetSrcMappingTable();
+        pc2dex_map = mi->compiled_method->GetSrcMappingTable();
+        prologue_end = 0;
       }
 
-      if (src_mapping_table.empty()) {
+      if (pc2dex_map.empty()) {
         continue;
       }
 
       Elf_Addr method_address = text_address + mi->low_pc;
 
-      PositionInfos position_infos;
+      PositionInfos dex2line_map;
       const DexFile* dex = mi->dex_file;
-      if (!dex->DecodeDebugPositionInfo(mi->code_item, PositionInfoCallback, &position_infos)) {
+      if (!dex->DecodeDebugPositionInfo(mi->code_item, PositionInfoCallback, &dex2line_map)) {
         continue;
       }
 
-      if (position_infos.empty()) {
+      if (dex2line_map.empty()) {
         continue;
       }
 
@@ -184,21 +185,25 @@
       // Generate mapping opcodes from PC to Java lines.
       if (file_index != 0) {
         bool first = true;
-        for (SrcMapElem pc2dex : src_mapping_table) {
+        for (SrcMapElem pc2dex : pc2dex_map) {
           uint32_t pc = pc2dex.from_;
           int dex_pc = pc2dex.to_;
           // Find mapping with address with is greater than our dex pc; then go back one step.
-          auto ub = std::upper_bound(position_infos.begin(), position_infos.end(), dex_pc,
+          auto dex2line = std::upper_bound(
+              dex2line_map.begin(),
+              dex2line_map.end(),
+              dex_pc,
               [](uint32_t address, const DexFile::PositionInfo& entry) {
                   return address < entry.address_;
               });
-          if (ub != position_infos.begin()) {
-            int line = (--ub)->line_;
+          // Look for first valid mapping after the prologue.
+          if (dex2line != dex2line_map.begin() && pc >= prologue_end) {
+            int line = (--dex2line)->line_;
             if (first) {
               first = false;
               if (pc > 0) {
                 // Assume that any preceding code is prologue.
-                int first_line = position_infos.front().line_;
+                int first_line = dex2line_map.front().line_;
                 // Prologue is not a sensible place for a breakpoint.
                 opcodes.NegateStmt();
                 opcodes.AddRow(method_address, first_line);
diff --git a/compiler/debug/elf_debug_loc_writer.h b/compiler/debug/elf_debug_loc_writer.h
index fd7f949..a19b36f 100644
--- a/compiler/debug/elf_debug_loc_writer.h
+++ b/compiler/debug/elf_debug_loc_writer.h
@@ -82,6 +82,8 @@
 // Get the location of given dex register (e.g. stack or machine register).
 // Note that the location might be different based on the current pc.
 // The result will cover all ranges where the variable is in scope.
+// PCs corresponding to stackmap with dex register map are accurate,
+// all other PCs are best-effort only.
 std::vector<VariableLocation> GetVariableLocations(const MethodDebugInfo* method_info,
                                                    uint16_t vreg,
                                                    bool is64bitValue,
@@ -141,6 +143,9 @@
         variable_locations.back().high_pc == low_pc) {
       // Merge with the previous entry (extend its range).
       variable_locations.back().high_pc = high_pc;
+    } else if (!variable_locations.empty() && reg_lo == DexRegisterLocation::None()) {
+      // Unknown location - use the last known location as best-effort guess.
+      variable_locations.back().high_pc = high_pc;
     } else {
       variable_locations.push_back({low_pc, high_pc, reg_lo, reg_hi});
     }
diff --git a/compiler/driver/compiled_method_storage.cc b/compiler/driver/compiled_method_storage.cc
index bc5c6ca..510613e 100644
--- a/compiler/driver/compiled_method_storage.cc
+++ b/compiler/driver/compiled_method_storage.cc
@@ -190,7 +190,8 @@
 
 void CompiledMethodStorage::DumpMemoryUsage(std::ostream& os, bool extended) const {
   if (swap_space_.get() != nullptr) {
-    os << " swap=" << PrettySize(swap_space_->GetSize());
+    const size_t swap_size = swap_space_->GetSize();
+    os << " swap=" << PrettySize(swap_size) << " (" << swap_size << "B)";
   }
   if (extended) {
     Thread* self = Thread::Current();
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index f078bf6..670fe94 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -2732,16 +2732,18 @@
 std::string CompilerDriver::GetMemoryUsageString(bool extended) const {
   std::ostringstream oss;
   Runtime* const runtime = Runtime::Current();
-  const ArenaPool* arena_pool = runtime->GetArenaPool();
-  gc::Heap* const heap = runtime->GetHeap();
-  oss << "arena alloc=" << PrettySize(arena_pool->GetBytesAllocated());
-  oss << " java alloc=" << PrettySize(heap->GetBytesAllocated());
+  const ArenaPool* const arena_pool = runtime->GetArenaPool();
+  const gc::Heap* const heap = runtime->GetHeap();
+  const size_t arena_alloc = arena_pool->GetBytesAllocated();
+  const size_t java_alloc = heap->GetBytesAllocated();
+  oss << "arena alloc=" << PrettySize(arena_alloc) << " (" << arena_alloc << "B)";
+  oss << " java alloc=" << PrettySize(java_alloc) << " (" << java_alloc << "B)";
 #if defined(__BIONIC__) || defined(__GLIBC__)
-  struct mallinfo info = mallinfo();
+  const struct mallinfo info = mallinfo();
   const size_t allocated_space = static_cast<size_t>(info.uordblks);
   const size_t free_space = static_cast<size_t>(info.fordblks);
-  oss << " native alloc=" << PrettySize(allocated_space) << " free="
-      << PrettySize(free_space);
+  oss << " native alloc=" << PrettySize(allocated_space) << " (" << allocated_space << "B)"
+      << " free=" << PrettySize(free_space) << " (" << free_space << "B)";
 #endif
   compiled_method_storage_.DumpMemoryUsage(oss, extended);
   return oss.str();
diff --git a/compiler/driver/compiler_driver_test.cc b/compiler/driver/compiler_driver_test.cc
index 4c03e5d..4785885 100644
--- a/compiler/driver/compiler_driver_test.cc
+++ b/compiler/driver/compiler_driver_test.cc
@@ -249,9 +249,9 @@
 
     ProfileCompilationInfo info;
     for (const std::unique_ptr<const DexFile>& dex_file : dex_files) {
-      std::cout << std::string(dex_file->GetLocation());
-      profile_info_.AddData(dex_file->GetLocation(), dex_file->GetLocationChecksum(), 1);
-      profile_info_.AddData(dex_file->GetLocation(), dex_file->GetLocationChecksum(), 2);
+      std::string key = ProfileCompilationInfo::GetProfileDexFileKey(dex_file->GetLocation());
+      profile_info_.AddData(key, dex_file->GetLocationChecksum(), 1);
+      profile_info_.AddData(key, dex_file->GetLocationChecksum(), 2);
     }
     return &profile_info_;
   }
diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc
index d2bf6c0..3fe7861 100644
--- a/compiler/jit/jit_compiler.cc
+++ b/compiler/jit/jit_compiler.cc
@@ -203,6 +203,7 @@
 }
 
 bool JitCompiler::CompileMethod(Thread* self, ArtMethod* method, bool osr) {
+  DCHECK(!method->IsProxyMethod());
   TimingLogger logger("JIT compiler timing logger", true, VLOG_IS_ON(jit));
   const uint64_t start_time = NanoTime();
   StackHandleScope<2> hs(self);
@@ -220,20 +221,17 @@
   bool success = false;
   {
     TimingLogger::ScopedTiming t2("Compiling", &logger);
-    // If we get a request to compile a proxy method, we pass the actual Java method
-    // of that proxy method, as the compiler does not expect a proxy method.
-    ArtMethod* method_to_compile = method->GetInterfaceMethodIfProxy(sizeof(void*));
     JitCodeCache* const code_cache = runtime->GetJit()->GetCodeCache();
-    success = compiler_driver_->GetCompiler()->JitCompile(self, code_cache, method_to_compile, osr);
+    success = compiler_driver_->GetCompiler()->JitCompile(self, code_cache, method, osr);
     if (success && (perf_file_ != nullptr)) {
-      const void* ptr = method_to_compile->GetEntryPointFromQuickCompiledCode();
+      const void* ptr = method->GetEntryPointFromQuickCompiledCode();
       std::ostringstream stream;
       stream << std::hex
              << reinterpret_cast<uintptr_t>(ptr)
              << " "
              << code_cache->GetMemorySizeOfCodePointer(ptr)
              << " "
-             << PrettyMethod(method_to_compile)
+             << PrettyMethod(method)
              << std::endl;
       std::string str = stream.str();
       bool res = perf_file_->WriteFully(str.c_str(), str.size());
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc
index c307522..ba1b168 100644
--- a/compiler/optimizing/bounds_check_elimination.cc
+++ b/compiler/optimizing/bounds_check_elimination.cc
@@ -63,9 +63,10 @@
     return true;
   }
 
+  // Return true if instruction can be expressed as "left_instruction + right_constant".
   static bool IsAddOrSubAConstant(HInstruction* instruction,
-                                  HInstruction** left_instruction,
-                                  int* right_constant) {
+                                  /* out */ HInstruction** left_instruction,
+                                  /* out */ int32_t* right_constant) {
     if (instruction->IsAdd() || instruction->IsSub()) {
       HBinaryOperation* bin_op = instruction->AsBinaryOperation();
       HInstruction* left = bin_op->GetLeft();
@@ -82,9 +83,22 @@
     return false;
   }
 
+  // Expresses any instruction as a value bound.
+  static ValueBound AsValueBound(HInstruction* instruction) {
+    if (instruction->IsIntConstant()) {
+      return ValueBound(nullptr, instruction->AsIntConstant()->GetValue());
+    }
+    HInstruction *left;
+    int32_t right;
+    if (IsAddOrSubAConstant(instruction, &left, &right)) {
+      return ValueBound(left, right);
+    }
+    return ValueBound(instruction, 0);
+  }
+
   // Try to detect useful value bound format from an instruction, e.g.
   // a constant or array length related value.
-  static ValueBound DetectValueBoundFromValue(HInstruction* instruction, bool* found) {
+  static ValueBound DetectValueBoundFromValue(HInstruction* instruction, /* out */ bool* found) {
     DCHECK(instruction != nullptr);
     if (instruction->IsIntConstant()) {
       *found = true;
@@ -227,7 +241,7 @@
   // Add a constant to a ValueBound.
   // `overflow` or `underflow` will return whether the resulting bound may
   // overflow or underflow an int.
-  ValueBound Add(int32_t c, bool* overflow, bool* underflow) const {
+  ValueBound Add(int32_t c, /* out */ bool* overflow, /* out */ bool* underflow) const {
     *overflow = *underflow = false;
     if (c == 0) {
       return *this;
@@ -488,10 +502,10 @@
   // the deoptimization technique.
   static constexpr size_t kThresholdForAddingDeoptimize = 2;
 
-  // Very large constant index is considered as an anomaly. This is a threshold
-  // beyond which we don't bother to apply the deoptimization technique since
-  // it's likely some AIOOBE will be thrown.
-  static constexpr int32_t kMaxConstantForAddingDeoptimize =
+  // Very large lengths are considered an anomaly. This is a threshold beyond which we don't
+  // bother to apply the deoptimization technique since it's likely, or sometimes certain,
+  // an AIOOBE will be thrown.
+  static constexpr uint32_t kMaxLengthForAddingDeoptimize =
       std::numeric_limits<int32_t>::max() - 1024 * 1024;
 
   // Added blocks for loop body entry test.
@@ -508,7 +522,7 @@
                   std::less<int>(),
                   graph->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)),
               graph->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)),
-        first_constant_index_bounds_check_map_(
+        first_index_bounds_check_map_(
             std::less<int>(),
             graph->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)),
         early_exit_loop_(
@@ -518,23 +532,16 @@
             std::less<uint32_t>(),
             graph->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)),
         finite_loop_(graph->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)),
-        need_to_revisit_block_(false),
-        has_deoptimization_on_constant_subscripts_(false),
+        has_dom_based_dynamic_bce_(false),
         initial_block_size_(graph->GetBlocks().size()),
         side_effects_(side_effects),
         induction_range_(induction_analysis) {}
 
   void VisitBasicBlock(HBasicBlock* block) OVERRIDE {
     DCHECK(!IsAddedBlock(block));
-    first_constant_index_bounds_check_map_.clear();
+    first_index_bounds_check_map_.clear();
     HGraphVisitor::VisitBasicBlock(block);
-    if (need_to_revisit_block_) {
-      AddComparesWithDeoptimization(block);
-      need_to_revisit_block_ = false;
-      first_constant_index_bounds_check_map_.clear();
-      GetValueRangeMap(block)->clear();
-      HGraphVisitor::VisitBasicBlock(block);
-    }
+    AddComparesWithDeoptimization(block);
   }
 
   void Finish() {
@@ -555,8 +562,7 @@
       // Added blocks don't keep value ranges.
       return nullptr;
     }
-    uint32_t block_id = basic_block->GetBlockId();
-    return &maps_[block_id];
+    return &maps_[basic_block->GetBlockId()];
   }
 
   // Traverse up the dominator tree to look for value range info.
@@ -576,6 +582,11 @@
     return nullptr;
   }
 
+  // Helper method to assign a new range to an instruction in given basic block.
+  void AssignRange(HBasicBlock* basic_block, HInstruction* instruction, ValueRange* range) {
+    GetValueRangeMap(basic_block)->Overwrite(instruction->GetId(), range);
+  }
+
   // Narrow the value range of `instruction` at the end of `basic_block` with `range`,
   // and push the narrowed value range to `successor`.
   void ApplyRangeFromComparison(HInstruction* instruction, HBasicBlock* basic_block,
@@ -583,7 +594,7 @@
     ValueRange* existing_range = LookupValueRange(instruction, basic_block);
     if (existing_range == nullptr) {
       if (range != nullptr) {
-        GetValueRangeMap(successor)->Overwrite(instruction->GetId(), range);
+        AssignRange(successor, instruction, range);
       }
       return;
     }
@@ -595,8 +606,7 @@
         return;
       }
     }
-    ValueRange* narrowed_range = existing_range->Narrow(range);
-    GetValueRangeMap(successor)->Overwrite(instruction->GetId(), narrowed_range);
+    AssignRange(successor, instruction, existing_range->Narrow(range));
   }
 
   // Special case that we may simultaneously narrow two MonotonicValueRange's to
@@ -778,37 +788,37 @@
            array_length->IsPhi());
     bool try_dynamic_bce = true;
 
+    // Analyze index range.
     if (!index->IsIntConstant()) {
-      // Non-constant subscript.
+      // Non-constant index.
       ValueBound lower = ValueBound(nullptr, 0);        // constant 0
       ValueBound upper = ValueBound(array_length, -1);  // array_length - 1
       ValueRange array_range(GetGraph()->GetArena(), lower, upper);
-      // Try range obtained by dominator-based analysis.
+      // Try index range obtained by dominator-based analysis.
       ValueRange* index_range = LookupValueRange(index, block);
       if (index_range != nullptr && index_range->FitsIn(&array_range)) {
         ReplaceInstruction(bounds_check, index);
         return;
       }
-      // Try range obtained by induction variable analysis.
+      // Try index range obtained by induction variable analysis.
       // Disables dynamic bce if OOB is certain.
       if (InductionRangeFitsIn(&array_range, bounds_check, index, &try_dynamic_bce)) {
         ReplaceInstruction(bounds_check, index);
         return;
       }
     } else {
-      // Constant subscript.
+      // Constant index.
       int32_t constant = index->AsIntConstant()->GetValue();
       if (constant < 0) {
         // Will always throw exception.
         return;
-      }
-      if (array_length->IsIntConstant()) {
+      } else if (array_length->IsIntConstant()) {
         if (constant < array_length->AsIntConstant()->GetValue()) {
           ReplaceInstruction(bounds_check, index);
         }
         return;
       }
-
+      // Analyze array length range.
       DCHECK(array_length->IsArrayLength());
       ValueRange* existing_range = LookupValueRange(array_length, block);
       if (existing_range != nullptr) {
@@ -823,37 +833,35 @@
           // bounds check.
         }
       }
-
-      if (first_constant_index_bounds_check_map_.find(array_length->GetId()) ==
-          first_constant_index_bounds_check_map_.end()) {
-        // Remember the first bounds check against array_length of a constant index.
-        // That bounds check instruction has an associated HEnvironment where we
-        // may add an HDeoptimize to eliminate bounds checks of constant indices
-        // against array_length.
-        first_constant_index_bounds_check_map_.Put(array_length->GetId(), bounds_check);
-      } else {
-        // We've seen it at least twice. It's beneficial to introduce a compare with
-        // deoptimization fallback to eliminate the bounds checks.
-        need_to_revisit_block_ = true;
-      }
-
       // Once we have an array access like 'array[5] = 1', we record array.length >= 6.
       // We currently don't do it for non-constant index since a valid array[i] can't prove
       // a valid array[i-1] yet due to the lower bound side.
       if (constant == std::numeric_limits<int32_t>::max()) {
         // Max() as an index will definitely throw AIOOBE.
         return;
+      } else {
+        ValueBound lower = ValueBound(nullptr, constant + 1);
+        ValueBound upper = ValueBound::Max();
+        ValueRange* range = new (GetGraph()->GetArena())
+            ValueRange(GetGraph()->GetArena(), lower, upper);
+        AssignRange(block, array_length, range);
       }
-      ValueBound lower = ValueBound(nullptr, constant + 1);
-      ValueBound upper = ValueBound::Max();
-      ValueRange* range = new (GetGraph()->GetArena())
-          ValueRange(GetGraph()->GetArena(), lower, upper);
-      GetValueRangeMap(block)->Overwrite(array_length->GetId(), range);
     }
 
     // If static analysis fails, and OOB is not certain, try dynamic elimination.
     if (try_dynamic_bce) {
-      TryDynamicBCE(bounds_check);
+      // Try loop-based dynamic elimination.
+      if (TryDynamicBCE(bounds_check)) {
+        return;
+      }
+      // Prepare dominator-based dynamic elimination.
+      if (first_index_bounds_check_map_.find(array_length->GetId()) ==
+          first_index_bounds_check_map_.end()) {
+        // Remember the first bounds check against each array_length. That bounds check
+        // instruction has an associated HEnvironment where we may add an HDeoptimize
+        // to eliminate subsequent bounds checks against the same array_length.
+        first_index_bounds_check_map_.Put(array_length->GetId(), bounds_check);
+      }
     }
   }
 
@@ -914,7 +922,7 @@
                 increment,
                 bound);
           }
-          GetValueRangeMap(phi->GetBlock())->Overwrite(phi->GetId(), range);
+          AssignRange(phi->GetBlock(), phi, range);
         }
       }
     }
@@ -942,7 +950,7 @@
       }
       ValueRange* range = left_range->Add(right->AsIntConstant()->GetValue());
       if (range != nullptr) {
-        GetValueRangeMap(add->GetBlock())->Overwrite(add->GetId(), range);
+        AssignRange(add->GetBlock(), add, range);
       }
     }
   }
@@ -957,7 +965,7 @@
       }
       ValueRange* range = left_range->Add(-right->AsIntConstant()->GetValue());
       if (range != nullptr) {
-        GetValueRangeMap(sub->GetBlock())->Overwrite(sub->GetId(), range);
+        AssignRange(sub->GetBlock(), sub, range);
         return;
       }
     }
@@ -997,7 +1005,7 @@
                     GetGraph()->GetArena(),
                     ValueBound(nullptr, right_const - upper.GetConstant()),
                     ValueBound(array_length, right_const - lower.GetConstant()));
-                GetValueRangeMap(sub->GetBlock())->Overwrite(sub->GetId(), range);
+                AssignRange(sub->GetBlock(), sub, range);
               }
             }
           }
@@ -1045,7 +1053,7 @@
           GetGraph()->GetArena(),
           ValueBound(nullptr, std::numeric_limits<int32_t>::min()),
           ValueBound(left, 0));
-      GetValueRangeMap(instruction->GetBlock())->Overwrite(instruction->GetId(), range);
+      AssignRange(instruction->GetBlock(), instruction, range);
     }
   }
 
@@ -1071,7 +1079,7 @@
             GetGraph()->GetArena(),
             ValueBound(nullptr, 0),
             ValueBound(nullptr, constant));
-        GetValueRangeMap(instruction->GetBlock())->Overwrite(instruction->GetId(), range);
+        AssignRange(instruction->GetBlock(), instruction, range);
       }
     }
   }
@@ -1095,30 +1103,11 @@
         if (existing_range != nullptr) {
           range = existing_range->Narrow(range);
         }
-        GetValueRangeMap(new_array->GetBlock())->Overwrite(left->GetId(), range);
+        AssignRange(new_array->GetBlock(), left, range);
       }
     }
   }
 
-  void VisitDeoptimize(HDeoptimize* deoptimize) OVERRIDE {
-    if (!deoptimize->InputAt(0)->IsLessThanOrEqual()) {
-      return;
-    }
-    // If this instruction was added by AddCompareWithDeoptimization(), narrow
-    // the range accordingly in subsequent basic blocks.
-    HLessThanOrEqual* less_than_or_equal = deoptimize->InputAt(0)->AsLessThanOrEqual();
-    HInstruction* instruction = less_than_or_equal->InputAt(0);
-    if (instruction->IsArrayLength()) {
-      HInstruction* constant = less_than_or_equal->InputAt(1);
-      DCHECK(constant->IsIntConstant());
-      DCHECK(constant->AsIntConstant()->GetValue() <= kMaxConstantForAddingDeoptimize);
-      ValueBound lower = ValueBound(nullptr, constant->AsIntConstant()->GetValue() + 1);
-      ValueRange* range = new (GetGraph()->GetArena())
-          ValueRange(GetGraph()->GetArena(), lower, ValueBound::Max());
-      GetValueRangeMap(deoptimize->GetBlock())->Overwrite(instruction->GetId(), range);
-    }
-  }
-
   /**
     * After null/bounds checks are eliminated, some invariant array references
     * may be exposed underneath which can be hoisted out of the loop to the
@@ -1130,13 +1119,12 @@
     *     a[i][j] = 0;               --a[i]--+
     * }
     *
-    * Note: this optimization is no longer applied after deoptimization on array references
-    * with constant subscripts has occurred (see AddCompareWithDeoptimization()), since in
-    * those cases it would be unsafe to hoist array references across their deoptimization
-    * instruction inside a loop.
+    * Note: this optimization is no longer applied after dominator-based dynamic deoptimization
+    * has occurred (see AddCompareWithDeoptimization()), since in those cases it would be
+    * unsafe to hoist array references across their deoptimization instruction inside a loop.
     */
   void VisitArrayGet(HArrayGet* array_get) OVERRIDE {
-    if (!has_deoptimization_on_constant_subscripts_ && array_get->IsInLoop()) {
+    if (!has_dom_based_dynamic_bce_ && array_get->IsInLoop()) {
       HLoopInformation* loop = array_get->GetBlock()->GetLoopInformation();
       if (loop->IsDefinedOutOfTheLoop(array_get->InputAt(0)) &&
           loop->IsDefinedOutOfTheLoop(array_get->InputAt(1))) {
@@ -1148,69 +1136,105 @@
     }
   }
 
-  void AddCompareWithDeoptimization(HInstruction* array_length,
-                                    HIntConstant* const_instr,
-                                    HBasicBlock* block) {
-    DCHECK(array_length->IsArrayLength());
-    ValueRange* range = LookupValueRange(array_length, block);
-    ValueBound lower_bound = range->GetLower();
-    DCHECK(lower_bound.IsConstant());
-    DCHECK(const_instr->GetValue() <= kMaxConstantForAddingDeoptimize);
-    // Note that the lower bound of the array length may have been refined
-    // through other instructions (such as `HNewArray(length - 4)`).
-    DCHECK_LE(const_instr->GetValue() + 1, lower_bound.GetConstant());
-
-    // If array_length is less than lower_const, deoptimize.
-    HBoundsCheck* bounds_check = first_constant_index_bounds_check_map_.Get(
-        array_length->GetId())->AsBoundsCheck();
-    HCondition* cond = new (GetGraph()->GetArena()) HLessThanOrEqual(array_length, const_instr);
-    HDeoptimize* deoptimize = new (GetGraph()->GetArena())
-        HDeoptimize(cond, bounds_check->GetDexPc());
-    block->InsertInstructionBefore(cond, bounds_check);
-    block->InsertInstructionBefore(deoptimize, bounds_check);
-    deoptimize->CopyEnvironmentFrom(bounds_check->GetEnvironment());
-    // Flag that this kind of deoptimization on array references with constant
-    // subscripts has occurred to prevent further hoisting of these references.
-    has_deoptimization_on_constant_subscripts_ = true;
+  // Perform dominator-based dynamic elimination on suitable set of bounds checks.
+  void AddCompareWithDeoptimization(HBasicBlock* block,
+                                    HInstruction* array_length,
+                                    HInstruction* base,
+                                    int32_t min_c, int32_t max_c) {
+    HBoundsCheck* bounds_check =
+        first_index_bounds_check_map_.Get(array_length->GetId())->AsBoundsCheck();
+    // Construct deoptimization on single or double bounds on range [base-min_c,base+max_c],
+    // for example either for a[0]..a[3] just 3 or for a[base-1]..a[base+3] both base-1
+    // and base+3, since we made the assumption any in between value may occur too.
+    static_assert(kMaxLengthForAddingDeoptimize < std::numeric_limits<int32_t>::max(),
+                  "Incorrect max length may be subject to arithmetic wrap-around");
+    HInstruction* upper = GetGraph()->GetIntConstant(max_c);
+    if (base == nullptr) {
+      DCHECK_GE(min_c, 0);
+    } else {
+      HInstruction* lower = new (GetGraph()->GetArena())
+          HAdd(Primitive::kPrimInt, base, GetGraph()->GetIntConstant(min_c));
+      upper = new (GetGraph()->GetArena()) HAdd(Primitive::kPrimInt, base, upper);
+      block->InsertInstructionBefore(lower, bounds_check);
+      block->InsertInstructionBefore(upper, bounds_check);
+      InsertDeoptInBlock(bounds_check, new (GetGraph()->GetArena()) HAbove(lower, upper));
+    }
+    InsertDeoptInBlock(bounds_check, new (GetGraph()->GetArena()) HAboveOrEqual(upper, array_length));
+    // Flag that this kind of deoptimization has occurred.
+    has_dom_based_dynamic_bce_ = true;
   }
 
+  // Attempt dominator-based dynamic elimination on remaining candidates.
   void AddComparesWithDeoptimization(HBasicBlock* block) {
-    for (ArenaSafeMap<int, HBoundsCheck*>::iterator it =
-             first_constant_index_bounds_check_map_.begin();
-         it != first_constant_index_bounds_check_map_.end();
-         ++it) {
-      HBoundsCheck* bounds_check = it->second;
+    for (const auto& entry : first_index_bounds_check_map_) {
+      HBoundsCheck* bounds_check = entry.second;
+      HInstruction* index = bounds_check->InputAt(0);
       HInstruction* array_length = bounds_check->InputAt(1);
       if (!array_length->IsArrayLength()) {
-        // Prior deoptimizations may have changed the array length to a phi.
-        // TODO(mingyao): propagate the range to the phi?
-        DCHECK(array_length->IsPhi()) << array_length->DebugName();
-        continue;
+        continue;  // disregard phis and constants
       }
-      HIntConstant* lower_bound_const_instr = nullptr;
-      int32_t lower_bound_const = std::numeric_limits<int32_t>::min();
-      size_t counter = 0;
-      // Count the constant indexing for which bounds checks haven't
-      // been removed yet.
-      for (HUseIterator<HInstruction*> it2(array_length->GetUses());
-           !it2.Done();
-           it2.Advance()) {
+      // Collect all bounds checks are still there and that are related as "a[base + constant]"
+      // for a base instruction (possibly absent) and various constants. Note that no attempt
+      // is made to partition the set into matching subsets (viz. a[0], a[1] and a[base+1] and
+      // a[base+2] are considered as one set).
+      // TODO: would such a partitioning be worthwhile?
+      ValueBound value = ValueBound::AsValueBound(index);
+      HInstruction* base = value.GetInstruction();
+      int32_t min_c = base == nullptr ? 0 : value.GetConstant();
+      int32_t max_c = value.GetConstant();
+      ArenaVector<HBoundsCheck*> candidates(
+          GetGraph()->GetArena()->Adapter(kArenaAllocBoundsCheckElimination));
+      ArenaVector<HBoundsCheck*> standby(
+          GetGraph()->GetArena()->Adapter(kArenaAllocBoundsCheckElimination));
+      for (HUseIterator<HInstruction*> it2(array_length->GetUses()); !it2.Done(); it2.Advance()) {
+        // Another bounds check in same or dominated block?
         HInstruction* user = it2.Current()->GetUser();
-        if (user->GetBlock() == block &&
-            user->IsBoundsCheck() &&
-            user->AsBoundsCheck()->InputAt(0)->IsIntConstant()) {
-          DCHECK_EQ(array_length, user->AsBoundsCheck()->InputAt(1));
-          HIntConstant* const_instr = user->AsBoundsCheck()->InputAt(0)->AsIntConstant();
-          if (const_instr->GetValue() > lower_bound_const) {
-            lower_bound_const = const_instr->GetValue();
-            lower_bound_const_instr = const_instr;
+        HBasicBlock* other_block = user->GetBlock();
+        if (user->IsBoundsCheck() && block->Dominates(other_block)) {
+          HBoundsCheck* other_bounds_check = user->AsBoundsCheck();
+          HInstruction* other_index = other_bounds_check->InputAt(0);
+          HInstruction* other_array_length = other_bounds_check->InputAt(1);
+          ValueBound other_value = ValueBound::AsValueBound(other_index);
+          if (array_length == other_array_length && base == other_value.GetInstruction()) {
+            int32_t other_c = other_value.GetConstant();
+            // Since a subsequent dominated block could be under a conditional, only accept
+            // the other bounds check if it is in same block or both blocks dominate the exit.
+            // TODO: we could improve this by testing proper post-dominance, or even if this
+            //       constant is seen along *all* conditional paths that follow.
+            HBasicBlock* exit = GetGraph()->GetExitBlock();
+            if (block == user->GetBlock() ||
+                (block->Dominates(exit) && other_block->Dominates(exit))) {
+              min_c = std::min(min_c, other_c);
+              max_c = std::max(max_c, other_c);
+              candidates.push_back(other_bounds_check);
+            } else {
+              // Add this candidate later only if it falls into the range.
+              standby.push_back(other_bounds_check);
+            }
           }
-          counter++;
         }
       }
-      if (counter >= kThresholdForAddingDeoptimize &&
-          lower_bound_const_instr->GetValue() <= kMaxConstantForAddingDeoptimize) {
-        AddCompareWithDeoptimization(array_length, lower_bound_const_instr, block);
+      // Add standby candidates that fall in selected range.
+      for (HBoundsCheck* other_bounds_check : standby) {
+        HInstruction* other_index = other_bounds_check->InputAt(0);
+        int32_t other_c = ValueBound::AsValueBound(other_index).GetConstant();
+        if (min_c <= other_c && other_c <= max_c) {
+          candidates.push_back(other_bounds_check);
+        }
+      }
+      // Perform dominator-based deoptimization if it seems profitable. Note that we reject cases
+      // where the distance min_c:max_c range gets close to the maximum possible array length,
+      // since those cases are likely to always deopt (such situations do not necessarily go
+      // OOB, though, since the programmer could rely on wrap-around from max to min).
+      size_t threshold = kThresholdForAddingDeoptimize + (base == nullptr ? 0 : 1);  // extra test?
+      uint32_t distance = static_cast<uint32_t>(max_c) - static_cast<uint32_t>(min_c);
+      if (candidates.size() >= threshold &&
+          (base != nullptr || min_c >= 0) &&  // reject certain OOB
+           distance <= kMaxLengthForAddingDeoptimize) {  // reject likely/certain deopt
+        AddCompareWithDeoptimization(block, array_length, base, min_c, max_c);
+        for (HInstruction* other_bounds_check : candidates) {
+          ReplaceInstruction(other_bounds_check, other_bounds_check->InputAt(0));
+        }
       }
     }
   }
@@ -1259,7 +1283,7 @@
    * deoptimization). If no deoptimization occurs, the loop is executed with all corresponding
    * bounds checks and related null checks removed.
    */
-  void TryDynamicBCE(HBoundsCheck* instruction) {
+  bool TryDynamicBCE(HBoundsCheck* instruction) {
     HLoopInformation* loop = instruction->GetBlock()->GetLoopInformation();
     HInstruction* index = instruction->InputAt(0);
     HInstruction* length = instruction->InputAt(1);
@@ -1285,11 +1309,13 @@
       HBasicBlock* block = GetPreHeader(loop, instruction);
       induction_range_.GenerateRangeCode(instruction, index, GetGraph(), block, &lower, &upper);
       if (lower != nullptr) {
-        InsertDeopt(loop, block, new (GetGraph()->GetArena()) HAbove(lower, upper));
+        InsertDeoptInLoop(loop, block, new (GetGraph()->GetArena()) HAbove(lower, upper));
       }
-      InsertDeopt(loop, block, new (GetGraph()->GetArena()) HAboveOrEqual(upper, length));
+      InsertDeoptInLoop(loop, block, new (GetGraph()->GetArena()) HAboveOrEqual(upper, length));
       ReplaceInstruction(instruction, index);
+      return true;
     }
+    return false;
   }
 
   /**
@@ -1382,7 +1408,7 @@
         HBasicBlock* block = GetPreHeader(loop, check);
         HInstruction* cond =
             new (GetGraph()->GetArena()) HEqual(array, GetGraph()->GetNullConstant());
-        InsertDeopt(loop, block, cond);
+        InsertDeoptInLoop(loop, block, cond);
         ReplaceInstruction(check, array);
         return true;
       }
@@ -1448,8 +1474,8 @@
     return loop->GetPreHeader();
   }
 
-  /** Inserts a deoptimization test. */
-  void InsertDeopt(HLoopInformation* loop, HBasicBlock* block, HInstruction* condition) {
+  /** Inserts a deoptimization test in a loop preheader. */
+  void InsertDeoptInLoop(HLoopInformation* loop, HBasicBlock* block, HInstruction* condition) {
     HInstruction* suspend = loop->GetSuspendCheck();
     block->InsertInstructionBefore(condition, block->GetLastInstruction());
     HDeoptimize* deoptimize =
@@ -1461,6 +1487,16 @@
     }
   }
 
+  /** Inserts a deoptimization test right before a bounds check. */
+  void InsertDeoptInBlock(HBoundsCheck* bounds_check, HInstruction* condition) {
+    HBasicBlock* block = bounds_check->GetBlock();
+    block->InsertInstructionBefore(condition, bounds_check);
+    HDeoptimize* deoptimize =
+        new (GetGraph()->GetArena()) HDeoptimize(condition, bounds_check->GetDexPc());
+    block->InsertInstructionBefore(deoptimize, bounds_check);
+    deoptimize->CopyEnvironmentFrom(bounds_check->GetEnvironment());
+  }
+
   /** Hoists instruction out of the loop to preheader or deoptimization block. */
   void HoistToPreHeaderOrDeoptBlock(HLoopInformation* loop, HInstruction* instruction) {
     HBasicBlock* block = GetPreHeader(loop, instruction);
@@ -1628,9 +1664,9 @@
   // A set of maps, one per basic block, from instruction to range.
   ArenaVector<ArenaSafeMap<int, ValueRange*>> maps_;
 
-  // Map an HArrayLength instruction's id to the first HBoundsCheck instruction in
-  // a block that checks a constant index against that HArrayLength.
-  ArenaSafeMap<int, HBoundsCheck*> first_constant_index_bounds_check_map_;
+  // Map an HArrayLength instruction's id to the first HBoundsCheck instruction
+  // in a block that checks an index against that HArrayLength.
+  ArenaSafeMap<int, HBoundsCheck*> first_index_bounds_check_map_;
 
   // Early-exit loop bookkeeping.
   ArenaSafeMap<uint32_t, bool> early_exit_loop_;
@@ -1641,15 +1677,8 @@
   // Finite loop bookkeeping.
   ArenaSet<uint32_t> finite_loop_;
 
-  // For the block, there is at least one HArrayLength instruction for which there
-  // is more than one bounds check instruction with constant indexing. And it's
-  // beneficial to add a compare instruction that has deoptimization fallback and
-  // eliminate those bounds checks.
-  bool need_to_revisit_block_;
-
-  // Flag that denotes whether deoptimization has occurred on array references
-  // with constant subscripts (see AddCompareWithDeoptimization()).
-  bool has_deoptimization_on_constant_subscripts_;
+  // Flag that denotes whether dominator-based dynamic elimination has occurred.
+  bool has_dom_based_dynamic_bce_;
 
   // Initial number of blocks.
   uint32_t initial_block_size_;
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index 8d77daf..05e1356 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -32,46 +32,12 @@
 #include "nodes.h"
 #include "primitive.h"
 #include "scoped_thread_state_change.h"
+#include "ssa_builder.h"
 #include "thread.h"
 #include "utils/dex_cache_arrays_layout-inl.h"
 
 namespace art {
 
-/**
- * Helper class to add HTemporary instructions. This class is used when
- * converting a DEX instruction to multiple HInstruction, and where those
- * instructions do not die at the following instruction, but instead spans
- * multiple instructions.
- */
-class Temporaries : public ValueObject {
- public:
-  explicit Temporaries(HGraph* graph) : graph_(graph), index_(0) {}
-
-  void Add(HInstruction* instruction) {
-    HInstruction* temp = new (graph_->GetArena()) HTemporary(index_, instruction->GetDexPc());
-    instruction->GetBlock()->AddInstruction(temp);
-
-    DCHECK(temp->GetPrevious() == instruction);
-
-    size_t offset;
-    if (instruction->GetType() == Primitive::kPrimLong
-        || instruction->GetType() == Primitive::kPrimDouble) {
-      offset = 2;
-    } else {
-      offset = 1;
-    }
-    index_ += offset;
-
-    graph_->UpdateTemporariesVRegSlots(index_);
-  }
-
- private:
-  HGraph* const graph_;
-
-  // Current index in the temporary stack, updated by `Add`.
-  size_t index_;
-};
-
 void HGraphBuilder::InitializeLocals(uint16_t count) {
   graph_->SetNumberOfVRegs(count);
   locals_.resize(count);
@@ -283,7 +249,7 @@
     // loop for synchronized blocks.
     if (block->HasThrowingInstructions()) {
       // Try to find a TryItem covering the block.
-      DCHECK_NE(block->GetDexPc(), kNoDexPc) << "Block must have a dec_pc to find its TryItem.";
+      DCHECK_NE(block->GetDexPc(), kNoDexPc) << "Block must have a dex_pc to find its TryItem.";
       const int32_t try_item_idx = DexFile::FindTryItem(code_item, block->GetDexPc());
       if (try_item_idx != -1) {
         // Block throwing and in a TryItem. Store the try block information.
@@ -357,7 +323,8 @@
   }
 }
 
-bool HGraphBuilder::BuildGraph(const DexFile::CodeItem& code_item) {
+GraphAnalysisResult HGraphBuilder::BuildGraph(const DexFile::CodeItem& code_item,
+                                              StackHandleScopeCollection* handles) {
   DCHECK(graph_->GetBlocks().empty());
 
   const uint16_t* code_ptr = code_item.insns_;
@@ -384,12 +351,12 @@
   // start a new block, and create these blocks.
   if (!ComputeBranchTargets(code_ptr, code_end, &number_of_branches)) {
     MaybeRecordStat(MethodCompilationStat::kNotCompiledBranchOutsideMethodCode);
-    return false;
+    return kAnalysisInvalidBytecode;
   }
 
   // Note that the compiler driver is null when unit testing.
   if ((compiler_driver_ != nullptr) && SkipCompilation(code_item, number_of_branches)) {
-    return false;
+    return kAnalysisInvalidBytecode;
   }
 
   // Find locations where we want to generate extra stackmaps for native debugging.
@@ -420,7 +387,7 @@
       }
     }
     if (!AnalyzeDexInstruction(instruction, dex_pc)) {
-      return false;
+      return kAnalysisInvalidBytecode;
     }
     dex_pc += instruction.SizeInCodeUnits();
     code_ptr += instruction.SizeInCodeUnits();
@@ -439,7 +406,13 @@
   // non-exceptional edges to have been created.
   InsertTryBoundaryBlocks(code_item);
 
-  return true;
+  GraphAnalysisResult result = graph_->BuildDominatorTree();
+  if (result != kAnalysisSuccess) {
+    return result;
+  }
+
+  graph_->InitializeInexactObjectRTI(handles);
+  return SsaBuilder(graph_, handles).BuildSsa();
 }
 
 void HGraphBuilder::MaybeUpdateCurrentBlock(size_t dex_pc) {
@@ -1166,12 +1139,10 @@
   size_t start_index = 0;
   size_t argument_index = 0;
   if (invoke->GetOriginalInvokeType() != InvokeType::kStatic) {  // Instance call.
-    Temporaries temps(graph_);
     HInstruction* arg = LoadLocal(
         is_range ? register_index : args[0], Primitive::kPrimNot, invoke->GetDexPc());
     HNullCheck* null_check = new (arena_) HNullCheck(arg, invoke->GetDexPc());
     current_block_->AddInstruction(null_check);
-    temps.Add(null_check);
     invoke->SetArgumentAt(0, null_check);
     start_index = 1;
     argument_index = 1;
@@ -1269,9 +1240,6 @@
       ? GetFieldAccessType(*dex_file_, field_index)
       : resolved_field->GetTypeAsPrimitiveType();
   if (is_put) {
-    Temporaries temps(graph_);
-    // We need one temporary for the null check.
-    temps.Add(null_check);
     HInstruction* value = LoadLocal(source_or_dest_reg, field_type, dex_pc);
     HInstruction* field_set = nullptr;
     if (resolved_field == nullptr) {
@@ -1456,8 +1424,6 @@
   uint16_t class_def_index = klass->GetDexClassDefIndex();
   if (is_put) {
     // We need to keep the class alive before loading the value.
-    Temporaries temps(graph_);
-    temps.Add(cls);
     HInstruction* value = LoadLocal(source_or_dest_reg, field_type, dex_pc);
     DCHECK_EQ(value->GetType(), field_type);
     current_block_->AddInstruction(new (arena_) HStaticFieldSet(cls,
@@ -1510,9 +1476,7 @@
       || (type == Primitive::kPrimInt && second->AsIntConstant()->GetValue() == 0)
       || (type == Primitive::kPrimLong && second->AsLongConstant()->GetValue() == 0)) {
     second = new (arena_) HDivZeroCheck(second, dex_pc);
-    Temporaries temps(graph_);
     current_block_->AddInstruction(second);
-    temps.Add(current_block_->GetLastInstruction());
   }
 
   if (isDiv) {
@@ -1531,21 +1495,15 @@
   uint8_t array_reg = instruction.VRegB_23x();
   uint8_t index_reg = instruction.VRegC_23x();
 
-  // We need one temporary for the null check, one for the index, and one for the length.
-  Temporaries temps(graph_);
-
   HInstruction* object = LoadLocal(array_reg, Primitive::kPrimNot, dex_pc);
   object = new (arena_) HNullCheck(object, dex_pc);
   current_block_->AddInstruction(object);
-  temps.Add(object);
 
   HInstruction* length = new (arena_) HArrayLength(object, dex_pc);
   current_block_->AddInstruction(length);
-  temps.Add(length);
   HInstruction* index = LoadLocal(index_reg, Primitive::kPrimInt, dex_pc);
   index = new (arena_) HBoundsCheck(index, length, dex_pc);
   current_block_->AddInstruction(index);
-  temps.Add(index);
   if (is_put) {
     HInstruction* value = LoadLocal(source_or_dest_reg, anticipated_type, dex_pc);
     // TODO: Insert a type check node if the type is Object.
@@ -1586,8 +1544,6 @@
   bool is_reference_array = (primitive == 'L') || (primitive == '[');
   Primitive::Type type = is_reference_array ? Primitive::kPrimNot : Primitive::kPrimInt;
 
-  Temporaries temps(graph_);
-  temps.Add(object);
   for (size_t i = 0; i < number_of_vreg_arguments; ++i) {
     HInstruction* value = LoadLocal(is_range ? register_index + i : args[i], type, dex_pc);
     HInstruction* index = graph_->GetIntConstant(i, dex_pc);
@@ -1612,11 +1568,9 @@
 }
 
 void HGraphBuilder::BuildFillArrayData(const Instruction& instruction, uint32_t dex_pc) {
-  Temporaries temps(graph_);
   HInstruction* array = LoadLocal(instruction.VRegA_31t(), Primitive::kPrimNot, dex_pc);
   HNullCheck* null_check = new (arena_) HNullCheck(array, dex_pc);
   current_block_->AddInstruction(null_check);
-  temps.Add(null_check);
 
   HInstruction* length = new (arena_) HArrayLength(null_check, dex_pc);
   current_block_->AddInstruction(length);
@@ -1733,10 +1687,6 @@
       compiler_driver_->CanAssumeTypeIsPresentInDexCache(dex_file, type_index));
   current_block_->AddInstruction(cls);
 
-  // The class needs a temporary before being used by the type check.
-  Temporaries temps(graph_);
-  temps.Add(cls);
-
   TypeCheckKind check_kind = ComputeTypeCheckKind(resolved_class);
   if (instruction.Opcode() == Instruction::INSTANCE_OF) {
     current_block_->AddInstruction(new (arena_) HInstanceOf(object, cls, check_kind, dex_pc));
@@ -2815,8 +2765,6 @@
 
     case Instruction::ARRAY_LENGTH: {
       HInstruction* object = LoadLocal(instruction.VRegB_12x(), Primitive::kPrimNot, dex_pc);
-      // No need for a temporary for the null check, it is the only input of the following
-      // instruction.
       object = new (arena_) HNullCheck(object, dex_pc);
       current_block_->AddInstruction(object);
       current_block_->AddInstruction(new (arena_) HArrayLength(object, dex_pc));
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index 93e17d6..e3dd0e8 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -80,7 +80,8 @@
         null_dex_cache_(),
         dex_cache_(null_dex_cache_) {}
 
-  bool BuildGraph(const DexFile::CodeItem& code);
+  GraphAnalysisResult BuildGraph(const DexFile::CodeItem& code,
+                                 StackHandleScopeCollection* handles);
 
   static constexpr const char* kBuilderPassName = "builder";
 
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index e1b83f0..c2c8ccf 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -287,19 +287,6 @@
   }
 }
 
-Location CodeGenerator::GetTemporaryLocation(HTemporary* temp) const {
-  uint16_t number_of_locals = GetGraph()->GetNumberOfLocalVRegs();
-  // The type of the previous instruction tells us if we need a single or double stack slot.
-  Primitive::Type type = temp->GetType();
-  int32_t temp_size = (type == Primitive::kPrimLong) || (type == Primitive::kPrimDouble) ? 2 : 1;
-  // Use the temporary region (right below the dex registers).
-  int32_t slot = GetFrameSize() - FrameEntrySpillSize()
-                                - kVRegSize  // filler
-                                - (number_of_locals * kVRegSize)
-                                - ((temp_size + temp->GetIndex()) * kVRegSize);
-  return temp_size == 2 ? Location::DoubleStackSlot(slot) : Location::StackSlot(slot);
-}
-
 int32_t CodeGenerator::GetStackSlot(HLocal* local) const {
   uint16_t reg_number = local->GetRegNumber();
   uint16_t number_of_locals = GetGraph()->GetNumberOfLocalVRegs();
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 0a688cf..49c193e 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -187,7 +187,6 @@
   virtual void GenerateFrameEntry() = 0;
   virtual void GenerateFrameExit() = 0;
   virtual void Bind(HBasicBlock* block) = 0;
-  virtual void Move(HInstruction* instruction, Location location, HInstruction* move_for) = 0;
   virtual void MoveConstant(Location destination, int32_t value) = 0;
   virtual void MoveLocation(Location dst, Location src, Primitive::Type dst_type) = 0;
   virtual void AddLocationAsTemp(Location location, LocationSummary* locations) = 0;
@@ -203,7 +202,6 @@
                                 size_t number_of_out_slots,
                                 const ArenaVector<HBasicBlock*>& block_order);
   int32_t GetStackSlot(HLocal* local) const;
-  Location GetTemporaryLocation(HTemporary* temp) const;
 
   uint32_t GetFrameSize() const { return frame_size_; }
   void SetFrameSize(uint32_t size) { frame_size_ = size; }
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index e434932..87f52c6 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -1195,90 +1195,6 @@
   }
 }
 
-void CodeGeneratorARM::Move(HInstruction* instruction, Location location, HInstruction* move_for) {
-  LocationSummary* locations = instruction->GetLocations();
-  if (instruction->IsCurrentMethod()) {
-    Move32(location, Location::StackSlot(kCurrentMethodStackOffset));
-  } else if (locations != nullptr && locations->Out().Equals(location)) {
-    return;
-  } else if (locations != nullptr && locations->Out().IsConstant()) {
-    HConstant* const_to_move = locations->Out().GetConstant();
-    if (const_to_move->IsIntConstant() || const_to_move->IsNullConstant()) {
-      int32_t value = GetInt32ValueOf(const_to_move);
-      if (location.IsRegister()) {
-        __ LoadImmediate(location.AsRegister<Register>(), value);
-      } else {
-        DCHECK(location.IsStackSlot());
-        __ LoadImmediate(IP, value);
-        __ StoreToOffset(kStoreWord, IP, SP, location.GetStackIndex());
-      }
-    } else {
-      DCHECK(const_to_move->IsLongConstant()) << const_to_move->DebugName();
-      int64_t value = const_to_move->AsLongConstant()->GetValue();
-      if (location.IsRegisterPair()) {
-        __ LoadImmediate(location.AsRegisterPairLow<Register>(), Low32Bits(value));
-        __ LoadImmediate(location.AsRegisterPairHigh<Register>(), High32Bits(value));
-      } else {
-        DCHECK(location.IsDoubleStackSlot());
-        __ LoadImmediate(IP, Low32Bits(value));
-        __ StoreToOffset(kStoreWord, IP, SP, location.GetStackIndex());
-        __ LoadImmediate(IP, High32Bits(value));
-        __ StoreToOffset(kStoreWord, IP, SP, location.GetHighStackIndex(kArmWordSize));
-      }
-    }
-  } else if (instruction->IsLoadLocal()) {
-    uint32_t stack_slot = GetStackSlot(instruction->AsLoadLocal()->GetLocal());
-    switch (instruction->GetType()) {
-      case Primitive::kPrimBoolean:
-      case Primitive::kPrimByte:
-      case Primitive::kPrimChar:
-      case Primitive::kPrimShort:
-      case Primitive::kPrimInt:
-      case Primitive::kPrimNot:
-      case Primitive::kPrimFloat:
-        Move32(location, Location::StackSlot(stack_slot));
-        break;
-
-      case Primitive::kPrimLong:
-      case Primitive::kPrimDouble:
-        Move64(location, Location::DoubleStackSlot(stack_slot));
-        break;
-
-      default:
-        LOG(FATAL) << "Unexpected type " << instruction->GetType();
-    }
-  } else if (instruction->IsTemporary()) {
-    Location temp_location = GetTemporaryLocation(instruction->AsTemporary());
-    if (temp_location.IsStackSlot()) {
-      Move32(location, temp_location);
-    } else {
-      DCHECK(temp_location.IsDoubleStackSlot());
-      Move64(location, temp_location);
-    }
-  } else {
-    DCHECK((instruction->GetNext() == move_for) || instruction->GetNext()->IsTemporary());
-    switch (instruction->GetType()) {
-      case Primitive::kPrimBoolean:
-      case Primitive::kPrimByte:
-      case Primitive::kPrimChar:
-      case Primitive::kPrimShort:
-      case Primitive::kPrimNot:
-      case Primitive::kPrimInt:
-      case Primitive::kPrimFloat:
-        Move32(location, locations->Out());
-        break;
-
-      case Primitive::kPrimLong:
-      case Primitive::kPrimDouble:
-        Move64(location, locations->Out());
-        break;
-
-      default:
-        LOG(FATAL) << "Unexpected type " << instruction->GetType();
-    }
-  }
-}
-
 void CodeGeneratorARM::MoveConstant(Location location, int32_t value) {
   DCHECK(location.IsRegister());
   __ LoadImmediate(location.AsRegister<Register>(), value);
@@ -2163,6 +2079,8 @@
   switch (result_type) {
     case Primitive::kPrimByte:
       switch (input_type) {
+        case Primitive::kPrimLong:
+          // Type conversion from long to byte is a result of code transformations.
         case Primitive::kPrimBoolean:
           // Boolean input is a result of code transformations.
         case Primitive::kPrimShort:
@@ -2181,6 +2099,8 @@
 
     case Primitive::kPrimShort:
       switch (input_type) {
+        case Primitive::kPrimLong:
+          // Type conversion from long to short is a result of code transformations.
         case Primitive::kPrimBoolean:
           // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
@@ -2265,6 +2185,8 @@
 
     case Primitive::kPrimChar:
       switch (input_type) {
+        case Primitive::kPrimLong:
+          // Type conversion from long to char is a result of code transformations.
         case Primitive::kPrimBoolean:
           // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
@@ -2364,6 +2286,10 @@
   switch (result_type) {
     case Primitive::kPrimByte:
       switch (input_type) {
+        case Primitive::kPrimLong:
+          // Type conversion from long to byte is a result of code transformations.
+          __ sbfx(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>(), 0, 8);
+          break;
         case Primitive::kPrimBoolean:
           // Boolean input is a result of code transformations.
         case Primitive::kPrimShort:
@@ -2381,6 +2307,10 @@
 
     case Primitive::kPrimShort:
       switch (input_type) {
+        case Primitive::kPrimLong:
+          // Type conversion from long to short is a result of code transformations.
+          __ sbfx(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>(), 0, 16);
+          break;
         case Primitive::kPrimBoolean:
           // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
@@ -2482,6 +2412,10 @@
 
     case Primitive::kPrimChar:
       switch (input_type) {
+        case Primitive::kPrimLong:
+          // Type conversion from long to char is a result of code transformations.
+          __ ubfx(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>(), 0, 16);
+          break;
         case Primitive::kPrimBoolean:
           // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
@@ -4933,14 +4867,6 @@
   }
 }
 
-void LocationsBuilderARM::VisitTemporary(HTemporary* temp) {
-  temp->SetLocations(nullptr);
-}
-
-void InstructionCodeGeneratorARM::VisitTemporary(HTemporary* temp ATTRIBUTE_UNUSED) {
-  // Nothing to do, this is driven by the code generator.
-}
-
 void LocationsBuilderARM::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) {
   LOG(FATAL) << "Unreachable";
 }
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 558c9cf..cfd7a3b 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -307,7 +307,6 @@
   void GenerateFrameEntry() OVERRIDE;
   void GenerateFrameExit() OVERRIDE;
   void Bind(HBasicBlock* block) OVERRIDE;
-  void Move(HInstruction* instruction, Location location, HInstruction* move_for) OVERRIDE;
   void MoveConstant(Location destination, int32_t value) OVERRIDE;
   void MoveLocation(Location dst, Location src, Primitive::Type dst_type) OVERRIDE;
   void AddLocationAsTemp(Location location, LocationSummary* locations) OVERRIDE;
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index cfdf6b1..435ae5e 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -1066,54 +1066,6 @@
   __ Bind(GetLabelOf(block));
 }
 
-void CodeGeneratorARM64::Move(HInstruction* instruction,
-                              Location location,
-                              HInstruction* move_for) {
-  LocationSummary* locations = instruction->GetLocations();
-  Primitive::Type type = instruction->GetType();
-  DCHECK_NE(type, Primitive::kPrimVoid);
-
-  if (instruction->IsCurrentMethod()) {
-    MoveLocation(location,
-                 Location::DoubleStackSlot(kCurrentMethodStackOffset),
-                 Primitive::kPrimVoid);
-  } else if (locations != nullptr && locations->Out().Equals(location)) {
-    return;
-  } else if (instruction->IsIntConstant()
-             || instruction->IsLongConstant()
-             || instruction->IsNullConstant()) {
-    int64_t value = GetInt64ValueOf(instruction->AsConstant());
-    if (location.IsRegister()) {
-      Register dst = RegisterFrom(location, type);
-      DCHECK(((instruction->IsIntConstant() || instruction->IsNullConstant()) && dst.Is32Bits()) ||
-             (instruction->IsLongConstant() && dst.Is64Bits()));
-      __ Mov(dst, value);
-    } else {
-      DCHECK(location.IsStackSlot() || location.IsDoubleStackSlot());
-      UseScratchRegisterScope temps(GetVIXLAssembler());
-      Register temp = (instruction->IsIntConstant() || instruction->IsNullConstant())
-          ? temps.AcquireW()
-          : temps.AcquireX();
-      __ Mov(temp, value);
-      __ Str(temp, StackOperandFrom(location));
-    }
-  } else if (instruction->IsTemporary()) {
-    Location temp_location = GetTemporaryLocation(instruction->AsTemporary());
-    MoveLocation(location, temp_location, type);
-  } else if (instruction->IsLoadLocal()) {
-    uint32_t stack_slot = GetStackSlot(instruction->AsLoadLocal()->GetLocal());
-    if (Primitive::Is64BitType(type)) {
-      MoveLocation(location, Location::DoubleStackSlot(stack_slot), type);
-    } else {
-      MoveLocation(location, Location::StackSlot(stack_slot), type);
-    }
-
-  } else {
-    DCHECK((instruction->GetNext() == move_for) || instruction->GetNext()->IsTemporary());
-    MoveLocation(location, locations->Out(), type);
-  }
-}
-
 void CodeGeneratorARM64::MoveConstant(Location location, int32_t value) {
   DCHECK(location.IsRegister());
   __ Mov(RegisterFrom(location, Primitive::kPrimInt), value);
@@ -2976,30 +2928,128 @@
                         /* false_target */ nullptr);
 }
 
+enum SelectVariant {
+  kCsel,
+  kCselFalseConst,
+  kCselTrueConst,
+  kFcsel,
+};
+
+static inline bool IsConditionOnFloatingPointValues(HInstruction* condition) {
+  return condition->IsCondition() &&
+         Primitive::IsFloatingPointType(condition->InputAt(0)->GetType());
+}
+
+static inline bool IsRecognizedCselConstant(HInstruction* constant) {
+  if (constant->IsConstant()) {
+    int64_t value = Int64FromConstant(constant->AsConstant());
+    if ((value == -1) || (value == 0) || (value == 1)) {
+      return true;
+    }
+  }
+  return false;
+}
+
+static inline SelectVariant GetSelectVariant(HSelect* select) {
+  if (Primitive::IsFloatingPointType(select->GetType())) {
+    return kFcsel;
+  } else if (IsRecognizedCselConstant(select->GetFalseValue())) {
+    return kCselFalseConst;
+  } else if (IsRecognizedCselConstant(select->GetTrueValue())) {
+    return kCselTrueConst;
+  } else {
+    return kCsel;
+  }
+}
+
+static inline bool HasSwappedInputs(SelectVariant variant) {
+  return variant == kCselTrueConst;
+}
+
+static inline Condition GetConditionForSelect(HCondition* condition, SelectVariant variant) {
+  IfCondition cond = HasSwappedInputs(variant) ? condition->GetOppositeCondition()
+                                               : condition->GetCondition();
+  return IsConditionOnFloatingPointValues(condition) ? ARM64FPCondition(cond, condition->IsGtBias())
+                                                     : ARM64Condition(cond);
+}
+
 void LocationsBuilderARM64::VisitSelect(HSelect* select) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select);
-  if (Primitive::IsFloatingPointType(select->GetType())) {
-    locations->SetInAt(0, Location::RequiresFpuRegister());
-    locations->SetInAt(1, Location::RequiresFpuRegister());
-  } else {
-    locations->SetInAt(0, Location::RequiresRegister());
-    locations->SetInAt(1, Location::RequiresRegister());
+  switch (GetSelectVariant(select)) {
+    case kCsel:
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetOut(Location::RequiresRegister());
+      break;
+    case kCselFalseConst:
+      locations->SetInAt(0, Location::ConstantLocation(select->InputAt(0)->AsConstant()));
+      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetOut(Location::RequiresRegister());
+      break;
+    case kCselTrueConst:
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::ConstantLocation(select->InputAt(1)->AsConstant()));
+      locations->SetOut(Location::RequiresRegister());
+      break;
+    case kFcsel:
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetInAt(1, Location::RequiresFpuRegister());
+      locations->SetOut(Location::RequiresFpuRegister());
+      break;
   }
   if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
     locations->SetInAt(2, Location::RequiresRegister());
   }
-  locations->SetOut(Location::SameAsFirstInput());
 }
 
 void InstructionCodeGeneratorARM64::VisitSelect(HSelect* select) {
-  LocationSummary* locations = select->GetLocations();
-  vixl::Label false_target;
-  GenerateTestAndBranch(select,
-                        /* condition_input_index */ 2,
-                        /* true_target */ nullptr,
-                        &false_target);
-  codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
-  __ Bind(&false_target);
+  HInstruction* cond = select->GetCondition();
+  SelectVariant variant = GetSelectVariant(select);
+  Condition csel_cond;
+
+  if (IsBooleanValueOrMaterializedCondition(cond)) {
+    if (cond->IsCondition() && cond->GetNext() == select) {
+      // Condition codes set from previous instruction.
+      csel_cond = GetConditionForSelect(cond->AsCondition(), variant);
+    } else {
+      __ Cmp(InputRegisterAt(select, 2), 0);
+      csel_cond = HasSwappedInputs(variant) ? eq : ne;
+    }
+  } else if (IsConditionOnFloatingPointValues(cond)) {
+    Location rhs = cond->GetLocations()->InAt(1);
+    if (rhs.IsConstant()) {
+      DCHECK(IsFloatingPointZeroConstant(rhs.GetConstant()));
+      __ Fcmp(InputFPRegisterAt(cond, 0), 0.0);
+    } else {
+      __ Fcmp(InputFPRegisterAt(cond, 0), InputFPRegisterAt(cond, 1));
+    }
+    csel_cond = GetConditionForSelect(cond->AsCondition(), variant);
+  } else {
+    __ Cmp(InputRegisterAt(cond, 0), InputOperandAt(cond, 1));
+    csel_cond = GetConditionForSelect(cond->AsCondition(), variant);
+  }
+
+  switch (variant) {
+    case kCsel:
+    case kCselFalseConst:
+      __ Csel(OutputRegister(select),
+              InputRegisterAt(select, 1),
+              InputOperandAt(select, 0),
+              csel_cond);
+      break;
+    case kCselTrueConst:
+      __ Csel(OutputRegister(select),
+              InputRegisterAt(select, 0),
+              InputOperandAt(select, 1),
+              csel_cond);
+      break;
+    case kFcsel:
+      __ Fcsel(OutputFPRegister(select),
+               InputFPRegisterAt(select, 1),
+               InputFPRegisterAt(select, 0),
+               csel_cond);
+      break;
+  }
 }
 
 void LocationsBuilderARM64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
@@ -4445,14 +4495,6 @@
   GenerateSuspendCheck(instruction, nullptr);
 }
 
-void LocationsBuilderARM64::VisitTemporary(HTemporary* temp) {
-  temp->SetLocations(nullptr);
-}
-
-void InstructionCodeGeneratorARM64::VisitTemporary(HTemporary* temp ATTRIBUTE_UNUSED) {
-  // Nothing to do, this is driven by the code generator.
-}
-
 void LocationsBuilderARM64::VisitThrow(HThrow* instruction) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index a9d1bbd..360488e 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -350,8 +350,6 @@
     return CommonGetLabelOf<vixl::Label>(block_labels_, block);
   }
 
-  void Move(HInstruction* instruction, Location location, HInstruction* move_for) OVERRIDE;
-
   size_t GetWordSize() const OVERRIDE {
     return kArm64WordSize;
   }
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index c450866..ad3e988 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -976,46 +976,6 @@
   __ LoadConst32(dst, value);
 }
 
-void CodeGeneratorMIPS::Move(HInstruction* instruction,
-                             Location location,
-                             HInstruction* move_for) {
-  LocationSummary* locations = instruction->GetLocations();
-  Primitive::Type type = instruction->GetType();
-  DCHECK_NE(type, Primitive::kPrimVoid);
-
-  if (instruction->IsCurrentMethod()) {
-    Move32(location, Location::StackSlot(kCurrentMethodStackOffset));
-  } else if (locations != nullptr && locations->Out().Equals(location)) {
-    return;
-  } else if (instruction->IsIntConstant()
-             || instruction->IsLongConstant()
-             || instruction->IsNullConstant()) {
-    MoveConstant(location, instruction->AsConstant());
-  } else if (instruction->IsTemporary()) {
-    Location temp_location = GetTemporaryLocation(instruction->AsTemporary());
-    if (temp_location.IsStackSlot()) {
-      Move32(location, temp_location);
-    } else {
-      DCHECK(temp_location.IsDoubleStackSlot());
-      Move64(location, temp_location);
-    }
-  } else if (instruction->IsLoadLocal()) {
-    uint32_t stack_slot = GetStackSlot(instruction->AsLoadLocal()->GetLocal());
-    if (Primitive::Is64BitType(type)) {
-      Move64(location, Location::DoubleStackSlot(stack_slot));
-    } else {
-      Move32(location, Location::StackSlot(stack_slot));
-    }
-  } else {
-    DCHECK((instruction->GetNext() == move_for) || instruction->GetNext()->IsTemporary());
-    if (Primitive::Is64BitType(type)) {
-      Move64(location, locations->Out());
-    } else {
-      Move32(location, locations->Out());
-    }
-  }
-}
-
 void CodeGeneratorMIPS::AddLocationAsTemp(Location location, LocationSummary* locations) {
   if (location.IsRegister()) {
     locations->AddTemp(location);
@@ -4794,14 +4754,6 @@
   GenerateSuspendCheck(instruction, nullptr);
 }
 
-void LocationsBuilderMIPS::VisitTemporary(HTemporary* temp) {
-  temp->SetLocations(nullptr);
-}
-
-void InstructionCodeGeneratorMIPS::VisitTemporary(HTemporary* temp ATTRIBUTE_UNUSED) {
-  // Nothing to do, this is driven by the code generator.
-}
-
 void LocationsBuilderMIPS::VisitThrow(HThrow* instruction) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h
index 42b21ca..dd0641c 100644
--- a/compiler/optimizing/code_generator_mips.h
+++ b/compiler/optimizing/code_generator_mips.h
@@ -268,7 +268,6 @@
 
   void Bind(HBasicBlock* block) OVERRIDE;
 
-  void Move(HInstruction* instruction, Location location, HInstruction* move_for) OVERRIDE;
   void Move32(Location destination, Location source);
   void Move64(Location destination, Location source);
   void MoveConstant(Location location, HConstant* c);
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index da98a89..119084e 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -869,65 +869,6 @@
   }
 }
 
-void CodeGeneratorMIPS64::Move(HInstruction* instruction,
-                               Location location,
-                               HInstruction* move_for) {
-  LocationSummary* locations = instruction->GetLocations();
-  Primitive::Type type = instruction->GetType();
-  DCHECK_NE(type, Primitive::kPrimVoid);
-
-  if (instruction->IsCurrentMethod()) {
-    MoveLocation(location, Location::DoubleStackSlot(kCurrentMethodStackOffset), type);
-  } else if (locations != nullptr && locations->Out().Equals(location)) {
-    return;
-  } else if (instruction->IsIntConstant()
-             || instruction->IsLongConstant()
-             || instruction->IsNullConstant()) {
-    if (location.IsRegister()) {
-      // Move to GPR from constant
-      GpuRegister dst = location.AsRegister<GpuRegister>();
-      if (instruction->IsNullConstant() || instruction->IsIntConstant()) {
-        __ LoadConst32(dst, GetInt32ValueOf(instruction->AsConstant()));
-      } else {
-        __ LoadConst64(dst, instruction->AsLongConstant()->GetValue());
-      }
-    } else {
-      DCHECK(location.IsStackSlot() || location.IsDoubleStackSlot());
-      // Move to stack from constant
-      GpuRegister gpr = ZERO;
-      if (location.IsStackSlot()) {
-        int32_t value = GetInt32ValueOf(instruction->AsConstant());
-        if (value != 0) {
-          gpr = TMP;
-          __ LoadConst32(gpr, value);
-        }
-        __ StoreToOffset(kStoreWord, gpr, SP, location.GetStackIndex());
-      } else {
-        DCHECK(location.IsDoubleStackSlot());
-        int64_t value = instruction->AsLongConstant()->GetValue();
-        if (value != 0) {
-          gpr = TMP;
-          __ LoadConst64(gpr, value);
-        }
-        __ StoreToOffset(kStoreDoubleword, gpr, SP, location.GetStackIndex());
-      }
-    }
-  } else if (instruction->IsTemporary()) {
-    Location temp_location = GetTemporaryLocation(instruction->AsTemporary());
-    MoveLocation(location, temp_location, type);
-  } else if (instruction->IsLoadLocal()) {
-    uint32_t stack_slot = GetStackSlot(instruction->AsLoadLocal()->GetLocal());
-    if (Primitive::Is64BitType(type)) {
-      MoveLocation(location, Location::DoubleStackSlot(stack_slot), type);
-    } else {
-      MoveLocation(location, Location::StackSlot(stack_slot), type);
-    }
-  } else {
-    DCHECK((instruction->GetNext() == move_for) || instruction->GetNext()->IsTemporary());
-    MoveLocation(location, locations->Out(), type);
-  }
-}
-
 void CodeGeneratorMIPS64::MoveConstant(Location location, int32_t value) {
   DCHECK(location.IsRegister());
   __ LoadConst32(location.AsRegister<GpuRegister>(), value);
@@ -3946,14 +3887,6 @@
   GenerateSuspendCheck(instruction, nullptr);
 }
 
-void LocationsBuilderMIPS64::VisitTemporary(HTemporary* temp) {
-  temp->SetLocations(nullptr);
-}
-
-void InstructionCodeGeneratorMIPS64::VisitTemporary(HTemporary* temp ATTRIBUTE_UNUSED) {
-  // Nothing to do, this is driven by the code generator.
-}
-
 void LocationsBuilderMIPS64::VisitThrow(HThrow* instruction) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
@@ -4010,18 +3943,26 @@
         __ Andi(dst, src, 0xFFFF);
         break;
       case Primitive::kPrimByte:
-        // long is never converted into types narrower than int directly,
-        // so SEB and SEH can be used without ever causing unpredictable results
-        // on 64-bit inputs
-        DCHECK(input_type != Primitive::kPrimLong);
-        __ Seb(dst, src);
+        if (input_type == Primitive::kPrimLong) {
+          // Type conversion from long to types narrower than int is a result of code
+          // transformations. To avoid unpredictable results for SEB and SEH, we first
+          // need to sign-extend the low 32-bit value into bits 32 through 63.
+          __ Sll(dst, src, 0);
+          __ Seb(dst, dst);
+        } else {
+          __ Seb(dst, src);
+        }
         break;
       case Primitive::kPrimShort:
-        // long is never converted into types narrower than int directly,
-        // so SEB and SEH can be used without ever causing unpredictable results
-        // on 64-bit inputs
-        DCHECK(input_type != Primitive::kPrimLong);
-        __ Seh(dst, src);
+        if (input_type == Primitive::kPrimLong) {
+          // Type conversion from long to types narrower than int is a result of code
+          // transformations. To avoid unpredictable results for SEB and SEH, we first
+          // need to sign-extend the low 32-bit value into bits 32 through 63.
+          __ Sll(dst, src, 0);
+          __ Seh(dst, dst);
+        } else {
+          __ Seh(dst, src);
+        }
         break;
       case Primitive::kPrimInt:
       case Primitive::kPrimLong:
diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h
index 12a5fc6..eb7315a 100644
--- a/compiler/optimizing/code_generator_mips64.h
+++ b/compiler/optimizing/code_generator_mips64.h
@@ -268,8 +268,6 @@
 
   void Bind(HBasicBlock* block) OVERRIDE;
 
-  void Move(HInstruction* instruction, Location location, HInstruction* move_for) OVERRIDE;
-
   size_t GetWordSize() const OVERRIDE { return kMips64DoublewordSize; }
 
   size_t GetFloatingPointSpillSlotSize() const OVERRIDE { return kMips64DoublewordSize; }
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index de62010..f032f51 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -26,7 +26,6 @@
 #include "intrinsics_x86.h"
 #include "mirror/array-inl.h"
 #include "mirror/class-inl.h"
-#include "pc_relative_fixups_x86.h"
 #include "thread.h"
 #include "utils/assembler.h"
 #include "utils/stack_checks.h"
@@ -1127,91 +1126,6 @@
   }
 }
 
-void CodeGeneratorX86::Move(HInstruction* instruction, Location location, HInstruction* move_for) {
-  LocationSummary* locations = instruction->GetLocations();
-  if (instruction->IsCurrentMethod()) {
-    Move32(location, Location::StackSlot(kCurrentMethodStackOffset));
-  } else if (locations != nullptr && locations->Out().Equals(location)) {
-    return;
-  } else if (locations != nullptr && locations->Out().IsConstant()) {
-    HConstant* const_to_move = locations->Out().GetConstant();
-    if (const_to_move->IsIntConstant() || const_to_move->IsNullConstant()) {
-      Immediate imm(GetInt32ValueOf(const_to_move));
-      if (location.IsRegister()) {
-        __ movl(location.AsRegister<Register>(), imm);
-      } else if (location.IsStackSlot()) {
-        __ movl(Address(ESP, location.GetStackIndex()), imm);
-      } else {
-        DCHECK(location.IsConstant());
-        DCHECK_EQ(location.GetConstant(), const_to_move);
-      }
-    } else if (const_to_move->IsLongConstant()) {
-      int64_t value = const_to_move->AsLongConstant()->GetValue();
-      if (location.IsRegisterPair()) {
-        __ movl(location.AsRegisterPairLow<Register>(), Immediate(Low32Bits(value)));
-        __ movl(location.AsRegisterPairHigh<Register>(), Immediate(High32Bits(value)));
-      } else if (location.IsDoubleStackSlot()) {
-        __ movl(Address(ESP, location.GetStackIndex()), Immediate(Low32Bits(value)));
-        __ movl(Address(ESP, location.GetHighStackIndex(kX86WordSize)),
-                Immediate(High32Bits(value)));
-      } else {
-        DCHECK(location.IsConstant());
-        DCHECK_EQ(location.GetConstant(), instruction);
-      }
-    }
-  } else if (instruction->IsTemporary()) {
-    Location temp_location = GetTemporaryLocation(instruction->AsTemporary());
-    if (temp_location.IsStackSlot()) {
-      Move32(location, temp_location);
-    } else {
-      DCHECK(temp_location.IsDoubleStackSlot());
-      Move64(location, temp_location);
-    }
-  } else if (instruction->IsLoadLocal()) {
-    int slot = GetStackSlot(instruction->AsLoadLocal()->GetLocal());
-    switch (instruction->GetType()) {
-      case Primitive::kPrimBoolean:
-      case Primitive::kPrimByte:
-      case Primitive::kPrimChar:
-      case Primitive::kPrimShort:
-      case Primitive::kPrimInt:
-      case Primitive::kPrimNot:
-      case Primitive::kPrimFloat:
-        Move32(location, Location::StackSlot(slot));
-        break;
-
-      case Primitive::kPrimLong:
-      case Primitive::kPrimDouble:
-        Move64(location, Location::DoubleStackSlot(slot));
-        break;
-
-      default:
-        LOG(FATAL) << "Unimplemented local type " << instruction->GetType();
-    }
-  } else {
-    DCHECK((instruction->GetNext() == move_for) || instruction->GetNext()->IsTemporary());
-    switch (instruction->GetType()) {
-      case Primitive::kPrimBoolean:
-      case Primitive::kPrimByte:
-      case Primitive::kPrimChar:
-      case Primitive::kPrimShort:
-      case Primitive::kPrimInt:
-      case Primitive::kPrimNot:
-      case Primitive::kPrimFloat:
-        Move32(location, locations->Out());
-        break;
-
-      case Primitive::kPrimLong:
-      case Primitive::kPrimDouble:
-        Move64(location, locations->Out());
-        break;
-
-      default:
-        LOG(FATAL) << "Unexpected type " << instruction->GetType();
-    }
-  }
-}
-
 void CodeGeneratorX86::MoveConstant(Location location, int32_t value) {
   DCHECK(location.IsRegister());
   __ movl(location.AsRegister<Register>(), Immediate(value));
@@ -1361,7 +1275,7 @@
     }
     // Must be equal high, so compare the lows.
     codegen_->Compare32BitValue(left_low, val_low);
-  } else {
+  } else if (right.IsRegisterPair()) {
     Register right_high = right.AsRegisterPairHigh<Register>();
     Register right_low = right.AsRegisterPairLow<Register>();
 
@@ -1376,6 +1290,19 @@
     }
     // Must be equal high, so compare the lows.
     __ cmpl(left_low, right_low);
+  } else {
+    DCHECK(right.IsDoubleStackSlot());
+    __ cmpl(left_high, Address(ESP, right.GetHighStackIndex(kX86WordSize)));
+    if (if_cond == kCondNE) {
+      __ j(X86Condition(true_high_cond), true_label);
+    } else if (if_cond == kCondEQ) {
+      __ j(X86Condition(false_high_cond), false_label);
+    } else {
+      __ j(X86Condition(true_high_cond), true_label);
+      __ j(X86Condition(false_high_cond), false_label);
+    }
+    // Must be equal high, so compare the lows.
+    __ cmpl(left_low, Address(ESP, right.GetStackIndex()));
   }
   // The last comparison might be unsigned.
   __ j(final_condition, true_label);
@@ -1590,30 +1517,131 @@
                                /* false_target */ nullptr);
 }
 
+static bool SelectCanUseCMOV(HSelect* select) {
+  // There are no conditional move instructions for XMMs.
+  if (Primitive::IsFloatingPointType(select->GetType())) {
+    return false;
+  }
+
+  // A FP condition doesn't generate the single CC that we need.
+  // In 32 bit mode, a long condition doesn't generate a single CC either.
+  HInstruction* condition = select->GetCondition();
+  if (condition->IsCondition()) {
+    Primitive::Type compare_type = condition->InputAt(0)->GetType();
+    if (compare_type == Primitive::kPrimLong ||
+        Primitive::IsFloatingPointType(compare_type)) {
+      return false;
+    }
+  }
+
+  // We can generate a CMOV for this Select.
+  return true;
+}
+
 void LocationsBuilderX86::VisitSelect(HSelect* select) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select);
-  Primitive::Type select_type = select->GetType();
-  HInstruction* cond = select->GetCondition();
-
-  if (Primitive::IsFloatingPointType(select_type)) {
+  if (Primitive::IsFloatingPointType(select->GetType())) {
     locations->SetInAt(0, Location::RequiresFpuRegister());
+    locations->SetInAt(1, Location::Any());
   } else {
     locations->SetInAt(0, Location::RequiresRegister());
+    if (SelectCanUseCMOV(select)) {
+      if (select->InputAt(1)->IsConstant()) {
+        // Cmov can't handle a constant value.
+        locations->SetInAt(1, Location::RequiresRegister());
+      } else {
+        locations->SetInAt(1, Location::Any());
+      }
+    } else {
+      locations->SetInAt(1, Location::Any());
+    }
   }
-  locations->SetInAt(1, Location::Any());
-  if (IsBooleanValueOrMaterializedCondition(cond)) {
-    locations->SetInAt(2, Location::Any());
+  if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
+    locations->SetInAt(2, Location::RequiresRegister());
   }
   locations->SetOut(Location::SameAsFirstInput());
 }
 
+void InstructionCodeGeneratorX86::GenerateIntCompare(Location lhs, Location rhs) {
+  Register lhs_reg = lhs.AsRegister<Register>();
+  if (rhs.IsConstant()) {
+    int32_t value = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
+    codegen_->Compare32BitValue(lhs_reg, value);
+  } else if (rhs.IsStackSlot()) {
+    __ cmpl(lhs_reg, Address(ESP, rhs.GetStackIndex()));
+  } else {
+    __ cmpl(lhs_reg, rhs.AsRegister<Register>());
+  }
+}
+
 void InstructionCodeGeneratorX86::VisitSelect(HSelect* select) {
   LocationSummary* locations = select->GetLocations();
-  NearLabel false_target;
-  GenerateTestAndBranch<NearLabel>(
-      select, /* condition_input_index */ 2, /* true_target */ nullptr, &false_target);
-  codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
-  __ Bind(&false_target);
+  DCHECK(locations->InAt(0).Equals(locations->Out()));
+  if (SelectCanUseCMOV(select)) {
+    // If both the condition and the source types are integer, we can generate
+    // a CMOV to implement Select.
+
+    HInstruction* select_condition = select->GetCondition();
+    Condition cond = kNotEqual;
+
+    // Figure out how to test the 'condition'.
+    if (select_condition->IsCondition()) {
+      HCondition* condition = select_condition->AsCondition();
+      if (!condition->IsEmittedAtUseSite()) {
+        // This was a previously materialized condition.
+        // Can we use the existing condition code?
+        if (AreEflagsSetFrom(condition, select)) {
+          // Materialization was the previous instruction. Condition codes are right.
+          cond = X86Condition(condition->GetCondition());
+        } else {
+          // No, we have to recreate the condition code.
+          Register cond_reg = locations->InAt(2).AsRegister<Register>();
+          __ testl(cond_reg, cond_reg);
+        }
+      } else {
+        // We can't handle FP or long here.
+        DCHECK_NE(condition->InputAt(0)->GetType(), Primitive::kPrimLong);
+        DCHECK(!Primitive::IsFloatingPointType(condition->InputAt(0)->GetType()));
+        LocationSummary* cond_locations = condition->GetLocations();
+        GenerateIntCompare(cond_locations->InAt(0), cond_locations->InAt(1));
+        cond = X86Condition(condition->GetCondition());
+      }
+    } else {
+      // Must be a boolean condition, which needs to be compared to 0.
+      Register cond_reg = locations->InAt(2).AsRegister<Register>();
+      __ testl(cond_reg, cond_reg);
+    }
+
+    // If the condition is true, overwrite the output, which already contains false.
+    Location false_loc = locations->InAt(0);
+    Location true_loc = locations->InAt(1);
+    if (select->GetType() == Primitive::kPrimLong) {
+      // 64 bit conditional move.
+      Register false_high = false_loc.AsRegisterPairHigh<Register>();
+      Register false_low = false_loc.AsRegisterPairLow<Register>();
+      if (true_loc.IsRegisterPair()) {
+        __ cmovl(cond, false_high, true_loc.AsRegisterPairHigh<Register>());
+        __ cmovl(cond, false_low, true_loc.AsRegisterPairLow<Register>());
+      } else {
+        __ cmovl(cond, false_high, Address(ESP, true_loc.GetHighStackIndex(kX86WordSize)));
+        __ cmovl(cond, false_low, Address(ESP, true_loc.GetStackIndex()));
+      }
+    } else {
+      // 32 bit conditional move.
+      Register false_reg = false_loc.AsRegister<Register>();
+      if (true_loc.IsRegister()) {
+        __ cmovl(cond, false_reg, true_loc.AsRegister<Register>());
+      } else {
+        __ cmovl(cond, false_reg, Address(ESP, true_loc.GetStackIndex()));
+      }
+    }
+  } else {
+    NearLabel false_target;
+    GenerateTestAndBranch<NearLabel>(
+        select, /* condition_input_index */ 2, /* true_target */ nullptr, &false_target);
+    codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
+    __ Bind(&false_target);
+  }
 }
 
 void LocationsBuilderX86::VisitNativeDebugInfo(HNativeDebugInfo* info) {
@@ -1678,7 +1706,7 @@
   switch (cond->InputAt(0)->GetType()) {
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RequiresRegister());
-      locations->SetInAt(1, Location::RegisterOrConstant(cond->InputAt(1)));
+      locations->SetInAt(1, Location::Any());
       if (!cond->IsEmittedAtUseSite()) {
         locations->SetOut(Location::RequiresRegister());
       }
@@ -1727,15 +1755,7 @@
 
       // Clear output register: setb only sets the low byte.
       __ xorl(reg, reg);
-
-      if (rhs.IsRegister()) {
-        __ cmpl(lhs.AsRegister<Register>(), rhs.AsRegister<Register>());
-      } else if (rhs.IsConstant()) {
-        int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
-        codegen_->Compare32BitValue(lhs.AsRegister<Register>(), constant);
-      } else {
-        __ cmpl(lhs.AsRegister<Register>(), Address(ESP, rhs.GetStackIndex()));
-      }
+      GenerateIntCompare(lhs, rhs);
       __ setb(X86Condition(cond->GetCondition()), reg);
       return;
     }
@@ -2230,6 +2250,18 @@
   switch (result_type) {
     case Primitive::kPrimByte:
       switch (input_type) {
+        case Primitive::kPrimLong: {
+          // Type conversion from long to byte is a result of code transformations.
+          HInstruction* input = conversion->InputAt(0);
+          Location input_location = input->IsConstant()
+              ? Location::ConstantLocation(input->AsConstant())
+              : Location::RegisterPairLocation(EAX, EDX);
+          locations->SetInAt(0, input_location);
+          // Make the output overlap to please the register allocator. This greatly simplifies
+          // the validation of the linear scan implementation
+          locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+          break;
+        }
         case Primitive::kPrimBoolean:
           // Boolean input is a result of code transformations.
         case Primitive::kPrimShort:
@@ -2250,6 +2282,8 @@
 
     case Primitive::kPrimShort:
       switch (input_type) {
+        case Primitive::kPrimLong:
+          // Type conversion from long to short is a result of code transformations.
         case Primitive::kPrimBoolean:
           // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
@@ -2327,6 +2361,8 @@
 
     case Primitive::kPrimChar:
       switch (input_type) {
+        case Primitive::kPrimLong:
+          // Type conversion from long to char is a result of code transformations.
         case Primitive::kPrimBoolean:
           // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
@@ -2421,6 +2457,16 @@
   switch (result_type) {
     case Primitive::kPrimByte:
       switch (input_type) {
+        case Primitive::kPrimLong:
+          // Type conversion from long to byte is a result of code transformations.
+          if (in.IsRegisterPair()) {
+            __ movsxb(out.AsRegister<Register>(), in.AsRegisterPairLow<ByteRegister>());
+          } else {
+            DCHECK(in.GetConstant()->IsLongConstant());
+            int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
+            __ movl(out.AsRegister<Register>(), Immediate(static_cast<int8_t>(value)));
+          }
+          break;
         case Primitive::kPrimBoolean:
           // Boolean input is a result of code transformations.
         case Primitive::kPrimShort:
@@ -2444,6 +2490,18 @@
 
     case Primitive::kPrimShort:
       switch (input_type) {
+        case Primitive::kPrimLong:
+          // Type conversion from long to short is a result of code transformations.
+          if (in.IsRegisterPair()) {
+            __ movsxw(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>());
+          } else if (in.IsDoubleStackSlot()) {
+            __ movsxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
+          } else {
+            DCHECK(in.GetConstant()->IsLongConstant());
+            int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
+            __ movl(out.AsRegister<Register>(), Immediate(static_cast<int16_t>(value)));
+          }
+          break;
         case Primitive::kPrimBoolean:
           // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
@@ -2580,6 +2638,18 @@
 
     case Primitive::kPrimChar:
       switch (input_type) {
+        case Primitive::kPrimLong:
+          // Type conversion from long to short is a result of code transformations.
+          if (in.IsRegisterPair()) {
+            __ movzxw(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>());
+          } else if (in.IsDoubleStackSlot()) {
+            __ movzxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
+          } else {
+            DCHECK(in.GetConstant()->IsLongConstant());
+            int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
+            __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint16_t>(value)));
+          }
+          break;
         case Primitive::kPrimBoolean:
           // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
@@ -4163,15 +4233,7 @@
 
   switch (compare->InputAt(0)->GetType()) {
     case Primitive::kPrimInt: {
-      Register left_reg = left.AsRegister<Register>();
-      if (right.IsConstant()) {
-        int32_t value = right.GetConstant()->AsIntConstant()->GetValue();
-        codegen_->Compare32BitValue(left_reg, value);
-      } else if (right.IsStackSlot()) {
-        __ cmpl(left_reg, Address(ESP, right.GetStackIndex()));
-      } else {
-        __ cmpl(left_reg, right.AsRegister<Register>());
-      }
+      GenerateIntCompare(left, right);
       break;
     }
     case Primitive::kPrimLong: {
@@ -5513,14 +5575,6 @@
   }
 }
 
-void LocationsBuilderX86::VisitTemporary(HTemporary* temp) {
-  temp->SetLocations(nullptr);
-}
-
-void InstructionCodeGeneratorX86::VisitTemporary(HTemporary* temp ATTRIBUTE_UNUSED) {
-  // Nothing to do, this is driven by the code generator.
-}
-
 void LocationsBuilderX86::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) {
   LOG(FATAL) << "Unreachable";
 }
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 45e8ffa..63e9b2f 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -297,6 +297,7 @@
                                    HBasicBlock* default_block);
 
   void GenerateFPCompare(Location lhs, Location rhs, HInstruction* insn, bool is_double);
+  void GenerateIntCompare(Location lhs, Location rhs);
 
   X86Assembler* const assembler_;
   CodeGeneratorX86* const codegen_;
@@ -317,7 +318,6 @@
   void GenerateFrameEntry() OVERRIDE;
   void GenerateFrameExit() OVERRIDE;
   void Bind(HBasicBlock* block) OVERRIDE;
-  void Move(HInstruction* instruction, Location location, HInstruction* move_for) OVERRIDE;
   void MoveConstant(Location destination, int32_t value) OVERRIDE;
   void MoveLocation(Location dst, Location src, Primitive::Type dst_type) OVERRIDE;
   void AddLocationAsTemp(Location location, LocationSummary* locations) OVERRIDE;
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 99396cd..a53a6be 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -1206,82 +1206,6 @@
   }
 }
 
-void CodeGeneratorX86_64::Move(HInstruction* instruction,
-                               Location location,
-                               HInstruction* move_for) {
-  LocationSummary* locations = instruction->GetLocations();
-  if (instruction->IsCurrentMethod()) {
-    Move(location, Location::DoubleStackSlot(kCurrentMethodStackOffset));
-  } else if (locations != nullptr && locations->Out().Equals(location)) {
-    return;
-  } else if (locations != nullptr && locations->Out().IsConstant()) {
-    HConstant* const_to_move = locations->Out().GetConstant();
-    if (const_to_move->IsIntConstant() || const_to_move->IsNullConstant()) {
-      Immediate imm(GetInt32ValueOf(const_to_move));
-      if (location.IsRegister()) {
-        __ movl(location.AsRegister<CpuRegister>(), imm);
-      } else if (location.IsStackSlot()) {
-        __ movl(Address(CpuRegister(RSP), location.GetStackIndex()), imm);
-      } else {
-        DCHECK(location.IsConstant());
-        DCHECK_EQ(location.GetConstant(), const_to_move);
-      }
-    } else if (const_to_move->IsLongConstant()) {
-      int64_t value = const_to_move->AsLongConstant()->GetValue();
-      if (location.IsRegister()) {
-        Load64BitValue(location.AsRegister<CpuRegister>(), value);
-      } else if (location.IsDoubleStackSlot()) {
-        Store64BitValueToStack(location, value);
-      } else {
-        DCHECK(location.IsConstant());
-        DCHECK_EQ(location.GetConstant(), const_to_move);
-      }
-    }
-  } else if (instruction->IsLoadLocal()) {
-    switch (instruction->GetType()) {
-      case Primitive::kPrimBoolean:
-      case Primitive::kPrimByte:
-      case Primitive::kPrimChar:
-      case Primitive::kPrimShort:
-      case Primitive::kPrimInt:
-      case Primitive::kPrimNot:
-      case Primitive::kPrimFloat:
-        Move(location, Location::StackSlot(GetStackSlot(instruction->AsLoadLocal()->GetLocal())));
-        break;
-
-      case Primitive::kPrimLong:
-      case Primitive::kPrimDouble:
-        Move(location,
-             Location::DoubleStackSlot(GetStackSlot(instruction->AsLoadLocal()->GetLocal())));
-        break;
-
-      default:
-        LOG(FATAL) << "Unexpected local type " << instruction->GetType();
-    }
-  } else if (instruction->IsTemporary()) {
-    Location temp_location = GetTemporaryLocation(instruction->AsTemporary());
-    Move(location, temp_location);
-  } else {
-    DCHECK((instruction->GetNext() == move_for) || instruction->GetNext()->IsTemporary());
-    switch (instruction->GetType()) {
-      case Primitive::kPrimBoolean:
-      case Primitive::kPrimByte:
-      case Primitive::kPrimChar:
-      case Primitive::kPrimShort:
-      case Primitive::kPrimInt:
-      case Primitive::kPrimNot:
-      case Primitive::kPrimLong:
-      case Primitive::kPrimFloat:
-      case Primitive::kPrimDouble:
-        Move(location, locations->Out());
-        break;
-
-      default:
-        LOG(FATAL) << "Unexpected type " << instruction->GetType();
-    }
-  }
-}
-
 void CodeGeneratorX86_64::MoveConstant(Location location, int32_t value) {
   DCHECK(location.IsRegister());
   Load64BitValue(location.AsRegister<CpuRegister>(), static_cast<int64_t>(value));
@@ -1627,14 +1551,16 @@
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select);
   if (Primitive::IsFloatingPointType(select->GetType())) {
     locations->SetInAt(0, Location::RequiresFpuRegister());
-    // Since we can't use CMOV, there is no need to force 'true' into a register.
     locations->SetInAt(1, Location::Any());
   } else {
     locations->SetInAt(0, Location::RequiresRegister());
     if (SelectCanUseCMOV(select)) {
-      locations->SetInAt(1, Location::RequiresRegister());
+      if (select->InputAt(1)->IsConstant()) {
+        locations->SetInAt(1, Location::RequiresRegister());
+      } else {
+        locations->SetInAt(1, Location::Any());
+      }
     } else {
-      // Since we can't use CMOV, there is no need to force 'true' into a register.
       locations->SetInAt(1, Location::Any());
     }
   }
@@ -1650,7 +1576,7 @@
     // If both the condition and the source types are integer, we can generate
     // a CMOV to implement Select.
     CpuRegister value_false = locations->InAt(0).AsRegister<CpuRegister>();
-    CpuRegister value_true = locations->InAt(1).AsRegister<CpuRegister>();
+    Location value_true_loc = locations->InAt(1);
     DCHECK(locations->InAt(0).Equals(locations->Out()));
 
     HInstruction* select_condition = select->GetCondition();
@@ -1682,7 +1608,14 @@
 
     // If the condition is true, overwrite the output, which already contains false.
     // Generate the correct sized CMOV.
-    __ cmov(cond, value_false, value_true, select->GetType() == Primitive::kPrimLong);
+    bool is_64_bit = Primitive::Is64BitType(select->GetType());
+    if (value_true_loc.IsRegister()) {
+      __ cmov(cond, value_false, value_true_loc.AsRegister<CpuRegister>(), is_64_bit);
+    } else {
+      __ cmov(cond,
+              value_false,
+              Address(CpuRegister(RSP), value_true_loc.GetStackIndex()), is_64_bit);
+    }
   } else {
     NearLabel false_target;
     GenerateTestAndBranch<NearLabel>(select,
@@ -2439,6 +2372,8 @@
   switch (result_type) {
     case Primitive::kPrimByte:
       switch (input_type) {
+        case Primitive::kPrimLong:
+          // Type conversion from long to byte is a result of code transformations.
         case Primitive::kPrimBoolean:
           // Boolean input is a result of code transformations.
         case Primitive::kPrimShort:
@@ -2457,6 +2392,8 @@
 
     case Primitive::kPrimShort:
       switch (input_type) {
+        case Primitive::kPrimLong:
+          // Type conversion from long to short is a result of code transformations.
         case Primitive::kPrimBoolean:
           // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
@@ -2534,6 +2471,8 @@
 
     case Primitive::kPrimChar:
       switch (input_type) {
+        case Primitive::kPrimLong:
+          // Type conversion from long to char is a result of code transformations.
         case Primitive::kPrimBoolean:
           // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
@@ -2628,6 +2567,8 @@
   switch (result_type) {
     case Primitive::kPrimByte:
       switch (input_type) {
+        case Primitive::kPrimLong:
+          // Type conversion from long to byte is a result of code transformations.
         case Primitive::kPrimBoolean:
           // Boolean input is a result of code transformations.
         case Primitive::kPrimShort:
@@ -2636,13 +2577,12 @@
           // Processing a Dex `int-to-byte' instruction.
           if (in.IsRegister()) {
             __ movsxb(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
-          } else if (in.IsStackSlot()) {
+          } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
             __ movsxb(out.AsRegister<CpuRegister>(),
                       Address(CpuRegister(RSP), in.GetStackIndex()));
           } else {
-            DCHECK(in.GetConstant()->IsIntConstant());
             __ movl(out.AsRegister<CpuRegister>(),
-                    Immediate(static_cast<int8_t>(in.GetConstant()->AsIntConstant()->GetValue())));
+                    Immediate(static_cast<int8_t>(Int64FromConstant(in.GetConstant()))));
           }
           break;
 
@@ -2654,6 +2594,8 @@
 
     case Primitive::kPrimShort:
       switch (input_type) {
+        case Primitive::kPrimLong:
+          // Type conversion from long to short is a result of code transformations.
         case Primitive::kPrimBoolean:
           // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
@@ -2662,13 +2604,12 @@
           // Processing a Dex `int-to-short' instruction.
           if (in.IsRegister()) {
             __ movsxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
-          } else if (in.IsStackSlot()) {
+          } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
             __ movsxw(out.AsRegister<CpuRegister>(),
                       Address(CpuRegister(RSP), in.GetStackIndex()));
           } else {
-            DCHECK(in.GetConstant()->IsIntConstant());
             __ movl(out.AsRegister<CpuRegister>(),
-                    Immediate(static_cast<int16_t>(in.GetConstant()->AsIntConstant()->GetValue())));
+                    Immediate(static_cast<int16_t>(Int64FromConstant(in.GetConstant()))));
           }
           break;
 
@@ -2811,6 +2752,8 @@
 
     case Primitive::kPrimChar:
       switch (input_type) {
+        case Primitive::kPrimLong:
+          // Type conversion from long to char is a result of code transformations.
         case Primitive::kPrimBoolean:
           // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
@@ -2819,14 +2762,12 @@
           // Processing a Dex `int-to-char' instruction.
           if (in.IsRegister()) {
             __ movzxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
-          } else if (in.IsStackSlot()) {
+          } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
             __ movzxw(out.AsRegister<CpuRegister>(),
                       Address(CpuRegister(RSP), in.GetStackIndex()));
           } else {
-            DCHECK(in.GetConstant()->IsIntConstant());
             __ movl(out.AsRegister<CpuRegister>(),
-                    Immediate(static_cast<uint16_t>(
-                        in.GetConstant()->AsIntConstant()->GetValue())));
+                    Immediate(static_cast<uint16_t>(Int64FromConstant(in.GetConstant()))));
           }
           break;
 
@@ -5142,14 +5083,6 @@
   }
 }
 
-void LocationsBuilderX86_64::VisitTemporary(HTemporary* temp) {
-  temp->SetLocations(nullptr);
-}
-
-void InstructionCodeGeneratorX86_64::VisitTemporary(HTemporary* temp ATTRIBUTE_UNUSED) {
-  // Nothing to do, this is driven by the code generator.
-}
-
 void LocationsBuilderX86_64::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) {
   LOG(FATAL) << "Unimplemented";
 }
@@ -5840,19 +5773,20 @@
                                                            is_type_check_slow_path_fatal);
   codegen_->AddSlowPath(type_check_slow_path);
 
-  NearLabel done;
-  // Avoid null check if we know obj is not null.
-  if (instruction->MustDoNullCheck()) {
-    __ testl(obj, obj);
-    __ j(kEqual, &done);
-  }
-
-  // /* HeapReference<Class> */ temp = obj->klass_
-  GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
-
   switch (type_check_kind) {
     case TypeCheckKind::kExactCheck:
     case TypeCheckKind::kArrayCheck: {
+      NearLabel done;
+      // Avoid null check if we know obj is not null.
+      if (instruction->MustDoNullCheck()) {
+        __ testl(obj, obj);
+        __ j(kEqual, &done);
+      }
+
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(
+          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
+
       if (cls.IsRegister()) {
         __ cmpl(temp, cls.AsRegister<CpuRegister>());
       } else {
@@ -5862,10 +5796,22 @@
       // Jump to slow path for throwing the exception or doing a
       // more involved array check.
       __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
+      __ Bind(&done);
       break;
     }
 
     case TypeCheckKind::kAbstractClassCheck: {
+      NearLabel done;
+      // Avoid null check if we know obj is not null.
+      if (instruction->MustDoNullCheck()) {
+        __ testl(obj, obj);
+        __ j(kEqual, &done);
+      }
+
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(
+          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
+
       // If the class is abstract, we eagerly fetch the super class of the
       // object to avoid doing a comparison we know will fail.
       NearLabel loop, compare_classes;
@@ -5896,10 +5842,22 @@
         __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
       }
       __ j(kNotEqual, &loop);
+      __ Bind(&done);
       break;
     }
 
     case TypeCheckKind::kClassHierarchyCheck: {
+      NearLabel done;
+      // Avoid null check if we know obj is not null.
+      if (instruction->MustDoNullCheck()) {
+        __ testl(obj, obj);
+        __ j(kEqual, &done);
+      }
+
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(
+          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
+
       // Walk over the class hierarchy to find a match.
       NearLabel loop;
       __ Bind(&loop);
@@ -5927,10 +5885,26 @@
       GenerateReferenceLoadTwoRegisters(
           instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
       __ jmp(type_check_slow_path->GetEntryLabel());
+      __ Bind(&done);
       break;
     }
 
     case TypeCheckKind::kArrayObjectCheck: {
+      // We cannot use a NearLabel here, as its range might be too
+      // short in some cases when read barriers are enabled.  This has
+      // been observed for instance when the code emitted for this
+      // case uses high x86-64 registers (R8-R15).
+      Label done;
+      // Avoid null check if we know obj is not null.
+      if (instruction->MustDoNullCheck()) {
+        __ testl(obj, obj);
+        __ j(kEqual, &done);
+      }
+
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(
+          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
+
       // Do an exact check.
       NearLabel check_non_primitive_component_type;
       if (cls.IsRegister()) {
@@ -5969,11 +5943,23 @@
       GenerateReferenceLoadTwoRegisters(
           instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
       __ jmp(type_check_slow_path->GetEntryLabel());
+      __ Bind(&done);
       break;
     }
 
     case TypeCheckKind::kUnresolvedCheck:
     case TypeCheckKind::kInterfaceCheck:
+      NearLabel done;
+      // Avoid null check if we know obj is not null.
+      if (instruction->MustDoNullCheck()) {
+        __ testl(obj, obj);
+        __ j(kEqual, &done);
+      }
+
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(
+          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
+
       // We always go into the type check slow path for the unresolved
       // and interface check cases.
       //
@@ -5992,9 +5978,9 @@
       // call to the runtime not using a type checking slow path).
       // This should also be beneficial for the other cases above.
       __ jmp(type_check_slow_path->GetEntryLabel());
+      __ Bind(&done);
       break;
   }
-  __ Bind(&done);
 
   __ Bind(type_check_slow_path->GetExitLabel());
 }
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 72dddfd..97f6f84 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -299,7 +299,6 @@
   void GenerateFrameEntry() OVERRIDE;
   void GenerateFrameExit() OVERRIDE;
   void Bind(HBasicBlock* block) OVERRIDE;
-  void Move(HInstruction* instruction, Location location, HInstruction* move_for) OVERRIDE;
   void MoveConstant(Location destination, int32_t value) OVERRIDE;
   void MoveLocation(Location dst, Location src, Primitive::Type dst_type) OVERRIDE;
   void AddLocationAsTemp(Location location, LocationSummary* locations) OVERRIDE;
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index 322a577..6be79fa 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -206,10 +206,13 @@
                     std::function<void(HGraph*)> hook_before_codegen,
                     bool has_result,
                     Expected expected) {
-  ASSERT_TRUE(graph->IsInSsaForm());
-
-  SSAChecker graph_checker(graph);
+  GraphChecker graph_checker(graph);
   graph_checker.Run();
+  if (!graph_checker.IsValid()) {
+    for (auto error : graph_checker.GetErrors()) {
+      std::cout << error << std::endl;
+    }
+  }
   ASSERT_TRUE(graph_checker.IsValid());
 
   SsaLivenessAnalysis liveness(graph, codegen);
@@ -292,14 +295,9 @@
   for (InstructionSet target_isa : GetTargetISAs()) {
     ArenaPool pool;
     ArenaAllocator arena(&pool);
-    HGraph* graph = CreateGraph(&arena);
-    HGraphBuilder builder(graph);
-    const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
-    bool graph_built = builder.BuildGraph(*item);
-    ASSERT_TRUE(graph_built);
+    HGraph* graph = CreateCFG(&arena, data);
     // Remove suspend checks, they cannot be executed in this context.
     RemoveSuspendChecks(graph);
-    TransformToSsa(graph);
     RunCode(target_isa, graph, [](HGraph*) {}, has_result, expected);
   }
 }
@@ -310,14 +308,9 @@
   for (InstructionSet target_isa : GetTargetISAs()) {
     ArenaPool pool;
     ArenaAllocator arena(&pool);
-    HGraph* graph = CreateGraph(&arena);
-    HGraphBuilder builder(graph, Primitive::kPrimLong);
-    const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
-    bool graph_built = builder.BuildGraph(*item);
-    ASSERT_TRUE(graph_built);
+    HGraph* graph = CreateCFG(&arena, data, Primitive::kPrimLong);
     // Remove suspend checks, they cannot be executed in this context.
     RemoveSuspendChecks(graph);
-    TransformToSsa(graph);
     RunCode(target_isa, graph, [](HGraph*) {}, has_result, expected);
   }
 }
@@ -640,6 +633,7 @@
     ArenaAllocator allocator(&pool);
 
     HGraph* graph = CreateGraph(&allocator);
+
     HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
     graph->AddBlock(entry);
     graph->SetEntryBlock(entry);
@@ -672,7 +666,7 @@
     else_block->AddInstruction(new (&allocator) HReturn(constant1));
 
     ASSERT_FALSE(equal->IsEmittedAtUseSite());
-    TransformToSsa(graph);
+    graph->BuildDominatorTree();
     PrepareForRegisterAllocation(graph).Run();
     ASSERT_TRUE(equal->IsEmittedAtUseSite());
 
@@ -723,7 +717,7 @@
       HReturn ret(&cmp_lt);
       code_block->AddInstruction(&ret);
 
-      TransformToSsa(graph);
+      graph->BuildDominatorTree();
       auto hook_before_codegen = [](HGraph* graph_in) {
         HBasicBlock* block = graph_in->GetEntryBlock()->GetSuccessors()[0];
         HParallelMove* move = new (graph_in->GetArena()) HParallelMove(graph_in->GetArena());
@@ -777,9 +771,9 @@
       HIntConstant* cst_rhs = graph->GetIntConstant(rhs[i]);
       HLessThan cmp_lt(cst_lhs, cst_rhs);
       if_block->AddInstruction(&cmp_lt);
-      // We insert a temporary to separate the HIf from the HLessThan and force
-      // the materialization of the condition.
-      HTemporary force_materialization(0);
+      // We insert a dummy instruction to separate the HIf from the HLessThan
+      // and force the materialization of the condition.
+      HMemoryBarrier force_materialization(MemBarrierKind::kAnyAny, 0);
       if_block->AddInstruction(&force_materialization);
       HIf if_lt(&cmp_lt);
       if_block->AddInstruction(&if_lt);
@@ -791,7 +785,7 @@
       HReturn ret_ge(cst_ge);
       if_false_block->AddInstruction(&ret_ge);
 
-      TransformToSsa(graph);
+      graph->BuildDominatorTree();
       auto hook_before_codegen = [](HGraph* graph_in) {
         HBasicBlock* block = graph_in->GetEntryBlock()->GetSuccessors()[0];
         HParallelMove* move = new (graph_in->GetArena()) HParallelMove(graph_in->GetArena());
@@ -907,7 +901,7 @@
   block->AddInstruction(comparison);
   block->AddInstruction(new (&allocator) HReturn(comparison));
 
-  TransformToSsa(graph);
+  graph->BuildDominatorTree();
   RunCode(target_isa, graph, [](HGraph*) {}, true, expected_result);
 }
 
diff --git a/compiler/optimizing/constant_folding_test.cc b/compiler/optimizing/constant_folding_test.cc
index a8f65bf..9c69f8c 100644
--- a/compiler/optimizing/constant_folding_test.cc
+++ b/compiler/optimizing/constant_folding_test.cc
@@ -56,7 +56,6 @@
                             const std::string& expected_after_dce,
                             std::function<void(HGraph*)> check_after_cf) {
     ASSERT_NE(graph_, nullptr);
-    TransformToSsa(graph_);
 
     StringPrettyPrinter printer_before(graph_);
     printer_before.VisitInsertionOrder();
@@ -67,9 +66,9 @@
         X86InstructionSetFeatures::FromCppDefines());
     x86::CodeGeneratorX86 codegenX86(graph_, *features_x86.get(), CompilerOptions());
     HConstantFolding(graph_).Run();
-    SSAChecker ssa_checker_cf(graph_);
-    ssa_checker_cf.Run();
-    ASSERT_TRUE(ssa_checker_cf.IsValid());
+    GraphChecker graph_checker_cf(graph_);
+    graph_checker_cf.Run();
+    ASSERT_TRUE(graph_checker_cf.IsValid());
 
     StringPrettyPrinter printer_after_cf(graph_);
     printer_after_cf.VisitInsertionOrder();
@@ -79,9 +78,9 @@
     check_after_cf(graph_);
 
     HDeadCodeElimination(graph_).Run();
-    SSAChecker ssa_checker_dce(graph_);
-    ssa_checker_dce.Run();
-    ASSERT_TRUE(ssa_checker_dce.IsValid());
+    GraphChecker graph_checker_dce(graph_);
+    graph_checker_dce.Run();
+    ASSERT_TRUE(graph_checker_dce.IsValid());
 
     StringPrettyPrinter printer_after_dce(graph_);
     printer_after_dce.VisitInsertionOrder();
@@ -775,76 +774,87 @@
   HInstruction* zero = graph_->GetIntConstant(0);
   HInstruction* last;
   block->AddInstruction(last = new (&allocator_) HAbove(zero, parameter));
-  block->AddInstruction(new (&allocator_) HDeoptimize(last, 0));
+  block->AddInstruction(new (&allocator_) HSelect(last, parameter, parameter, 0));
   block->AddInstruction(last = new (&allocator_) HAbove(parameter, zero));
-  block->AddInstruction(new (&allocator_) HDeoptimize(last, 0));
+  block->AddInstruction(new (&allocator_) HSelect(last, parameter, parameter, 0));
   block->AddInstruction(last = new (&allocator_) HAboveOrEqual(zero, parameter));
-  block->AddInstruction(new (&allocator_) HDeoptimize(last, 0));
+  block->AddInstruction(new (&allocator_) HSelect(last, parameter, parameter, 0));
   block->AddInstruction(last = new (&allocator_) HAboveOrEqual(parameter, zero));
-  block->AddInstruction(new (&allocator_) HDeoptimize(last, 0));
+  block->AddInstruction(new (&allocator_) HSelect(last, parameter, parameter, 0));
   block->AddInstruction(last = new (&allocator_) HBelow(zero, parameter));
-  block->AddInstruction(new (&allocator_) HDeoptimize(last, 0));
+  block->AddInstruction(new (&allocator_) HSelect(last, parameter, parameter, 0));
   block->AddInstruction(last = new (&allocator_) HBelow(parameter, zero));
-  block->AddInstruction(new (&allocator_) HDeoptimize(last, 0));
+  block->AddInstruction(new (&allocator_) HSelect(last, parameter, parameter, 0));
   block->AddInstruction(last = new (&allocator_) HBelowOrEqual(zero, parameter));
-  block->AddInstruction(new (&allocator_) HDeoptimize(last, 0));
+  block->AddInstruction(new (&allocator_) HSelect(last, parameter, parameter, 0));
   block->AddInstruction(last = new (&allocator_) HBelowOrEqual(parameter, zero));
-  block->AddInstruction(new (&allocator_) HDeoptimize(last, 0));
+  block->AddInstruction(new (&allocator_) HSelect(last, parameter, parameter, 0));
 
   entry_block->AddInstruction(new (&allocator_) HGoto());
   block->AddInstruction(new (&allocator_) HReturn(zero));
   exit_block->AddInstruction(new (&allocator_) HExit());
 
+  graph_->BuildDominatorTree();
+
   const std::string expected_before =
       "BasicBlock 0, succ: 1\n"
-      "  0: ParameterValue [16, 14, 12, 10, 8, 6, 4, 2]\n"
+      "  0: ParameterValue [17, 17, 16, 15, 15, 14, 13, 13, 12, 11, 11, 10, 9, 9, "
+                           "8, 7, 7, 6, 5, 5, 4, 3, 3, 2]\n"
       "  1: IntConstant [19, 16, 14, 12, 10, 8, 6, 4, 2]\n"
       "  18: Goto 1\n"
       "BasicBlock 1, pred: 0, succ: 2\n"
       "  2: Above(1, 0) [3]\n"
-      "  3: Deoptimize(2)\n"
+      "  3: Select(0, 0, 2)\n"
       "  4: Above(0, 1) [5]\n"
-      "  5: Deoptimize(4)\n"
+      "  5: Select(0, 0, 4)\n"
       "  6: AboveOrEqual(1, 0) [7]\n"
-      "  7: Deoptimize(6)\n"
+      "  7: Select(0, 0, 6)\n"
       "  8: AboveOrEqual(0, 1) [9]\n"
-      "  9: Deoptimize(8)\n"
+      "  9: Select(0, 0, 8)\n"
       "  10: Below(1, 0) [11]\n"
-      "  11: Deoptimize(10)\n"
+      "  11: Select(0, 0, 10)\n"
       "  12: Below(0, 1) [13]\n"
-      "  13: Deoptimize(12)\n"
+      "  13: Select(0, 0, 12)\n"
       "  14: BelowOrEqual(1, 0) [15]\n"
-      "  15: Deoptimize(14)\n"
+      "  15: Select(0, 0, 14)\n"
       "  16: BelowOrEqual(0, 1) [17]\n"
-      "  17: Deoptimize(16)\n"
+      "  17: Select(0, 0, 16)\n"
       "  19: Return(1)\n"
       "BasicBlock 2, pred: 1\n"
       "  20: Exit\n";
 
   const std::string expected_after_cf =
       "BasicBlock 0, succ: 1\n"
-      "  0: ParameterValue [16, 10, 6, 4]\n"
+      "  0: ParameterValue [17, 17, 16, 15, 15, 13, 13, 11, 11, 10, 9, 9, 7, 7, 6, 5, 5, 4, 3, 3]\n"
       "  1: IntConstant [13, 3, 19, 16, 10, 6, 4]\n"
       "  21: IntConstant [15, 9]\n"
       "  18: Goto 1\n"
       "BasicBlock 1, pred: 0, succ: 2\n"
-      "  3: Deoptimize(1)\n"
+      "  3: Select(0, 0, 1)\n"
       "  4: Above(0, 1) [5]\n"
-      "  5: Deoptimize(4)\n"
+      "  5: Select(0, 0, 4)\n"
       "  6: AboveOrEqual(1, 0) [7]\n"
-      "  7: Deoptimize(6)\n"
-      "  9: Deoptimize(21)\n"
+      "  7: Select(0, 0, 6)\n"
+      "  9: Select(0, 0, 21)\n"
       "  10: Below(1, 0) [11]\n"
-      "  11: Deoptimize(10)\n"
-      "  13: Deoptimize(1)\n"
-      "  15: Deoptimize(21)\n"
+      "  11: Select(0, 0, 10)\n"
+      "  13: Select(0, 0, 1)\n"
+      "  15: Select(0, 0, 21)\n"
       "  16: BelowOrEqual(0, 1) [17]\n"
-      "  17: Deoptimize(16)\n"
+      "  17: Select(0, 0, 16)\n"
       "  19: Return(1)\n"
       "BasicBlock 2, pred: 1\n"
       "  20: Exit\n";
 
-  const std::string expected_after_dce = expected_after_cf;
+  const std::string expected_after_dce =
+      "BasicBlock 0, succ: 1\n"
+      "  0: ParameterValue\n"
+      "  1: IntConstant [19]\n"
+      "  18: Goto 1\n"
+      "BasicBlock 1, pred: 0, succ: 2\n"
+      "  19: Return(1)\n"
+      "BasicBlock 2, pred: 1\n"
+      "  20: Exit\n";
 
   auto check_after_cf = [](HGraph* graph) {
     CHECK(graph != nullptr);
diff --git a/compiler/optimizing/dead_code_elimination_test.cc b/compiler/optimizing/dead_code_elimination_test.cc
index f0f98ef..930795b 100644
--- a/compiler/optimizing/dead_code_elimination_test.cc
+++ b/compiler/optimizing/dead_code_elimination_test.cc
@@ -36,8 +36,6 @@
   HGraph* graph = CreateCFG(&allocator, data);
   ASSERT_NE(graph, nullptr);
 
-  TransformToSsa(graph);
-
   StringPrettyPrinter printer_before(graph);
   printer_before.VisitInsertionOrder();
   std::string actual_before = printer_before.str();
@@ -47,9 +45,9 @@
       X86InstructionSetFeatures::FromCppDefines());
   x86::CodeGeneratorX86 codegenX86(graph, *features_x86.get(), CompilerOptions());
   HDeadCodeElimination(graph).Run();
-  SSAChecker ssa_checker(graph);
-  ssa_checker.Run();
-  ASSERT_TRUE(ssa_checker.IsValid());
+  GraphChecker graph_checker(graph);
+  graph_checker.Run();
+  ASSERT_TRUE(graph_checker.IsValid());
 
   StringPrettyPrinter printer_after(graph);
   printer_after.VisitInsertionOrder();
diff --git a/compiler/optimizing/dominator_test.cc b/compiler/optimizing/dominator_test.cc
index feb8b20..50c677a 100644
--- a/compiler/optimizing/dominator_test.cc
+++ b/compiler/optimizing/dominator_test.cc
@@ -24,15 +24,12 @@
 
 namespace art {
 
+class OptimizerTest : public CommonCompilerTest {};
+
 static void TestCode(const uint16_t* data, const uint32_t* blocks, size_t blocks_length) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
-  HGraph* graph = CreateGraph(&allocator);
-  HGraphBuilder builder(graph);
-  const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
-  bool graph_built = builder.BuildGraph(*item);
-  ASSERT_TRUE(graph_built);
-  graph->BuildDominatorTree();
+  HGraph* graph = CreateCFG(&allocator, data);
   ASSERT_EQ(graph->GetBlocks().size(), blocks_length);
   for (size_t i = 0, e = blocks_length; i < e; ++i) {
     if (blocks[i] == kInvalidBlockId) {
@@ -50,7 +47,7 @@
   }
 }
 
-TEST(OptimizerTest, ReturnVoid) {
+TEST_F(OptimizerTest, ReturnVoid) {
   const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(
       Instruction::RETURN_VOID);  // Block number 1
 
@@ -63,7 +60,7 @@
   TestCode(data, dominators, sizeof(dominators) / sizeof(int));
 }
 
-TEST(OptimizerTest, CFG1) {
+TEST_F(OptimizerTest, CFG1) {
   const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(
     Instruction::GOTO | 0x100,  // Block number 1
     Instruction::RETURN_VOID);  // Block number 2
@@ -78,7 +75,7 @@
   TestCode(data, dominators, sizeof(dominators) / sizeof(int));
 }
 
-TEST(OptimizerTest, CFG2) {
+TEST_F(OptimizerTest, CFG2) {
   const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(
     Instruction::GOTO | 0x100,  // Block number 1
     Instruction::GOTO | 0x100,  // Block number 2
@@ -95,7 +92,7 @@
   TestCode(data, dominators, sizeof(dominators) / sizeof(int));
 }
 
-TEST(OptimizerTest, CFG3) {
+TEST_F(OptimizerTest, CFG3) {
   const uint16_t data1[] = ZERO_REGISTER_CODE_ITEM(
     Instruction::GOTO | 0x200,    // Block number 1
     Instruction::RETURN_VOID,     // Block number 2
@@ -126,7 +123,7 @@
   TestCode(data3, dominators, sizeof(dominators) / sizeof(int));
 }
 
-TEST(OptimizerTest, CFG4) {
+TEST_F(OptimizerTest, CFG4) {
   const uint16_t data1[] = ZERO_REGISTER_CODE_ITEM(
     Instruction::NOP,
     Instruction::GOTO | 0xFF00);
@@ -146,7 +143,7 @@
   TestCode(data2, dominators, sizeof(dominators) / sizeof(int));
 }
 
-TEST(OptimizerTest, CFG5) {
+TEST_F(OptimizerTest, CFG5) {
   const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(
     Instruction::RETURN_VOID,     // Block number 1
     Instruction::GOTO | 0x100,    // Dead block
@@ -163,7 +160,7 @@
   TestCode(data, dominators, sizeof(dominators) / sizeof(int));
 }
 
-TEST(OptimizerTest, CFG6) {
+TEST_F(OptimizerTest, CFG6) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::IF_EQ, 3,
@@ -182,7 +179,7 @@
   TestCode(data, dominators, sizeof(dominators) / sizeof(int));
 }
 
-TEST(OptimizerTest, CFG7) {
+TEST_F(OptimizerTest, CFG7) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::IF_EQ, 3,        // Block number 1
@@ -202,7 +199,7 @@
   TestCode(data, dominators, sizeof(dominators) / sizeof(int));
 }
 
-TEST(OptimizerTest, CFG8) {
+TEST_F(OptimizerTest, CFG8) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::IF_EQ, 3,        // Block number 1
@@ -223,7 +220,7 @@
   TestCode(data, dominators, sizeof(dominators) / sizeof(int));
 }
 
-TEST(OptimizerTest, CFG9) {
+TEST_F(OptimizerTest, CFG9) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::IF_EQ, 3,        // Block number 1
@@ -244,7 +241,7 @@
   TestCode(data, dominators, sizeof(dominators) / sizeof(int));
 }
 
-TEST(OptimizerTest, CFG10) {
+TEST_F(OptimizerTest, CFG10) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::IF_EQ, 6,  // Block number 1
diff --git a/compiler/optimizing/find_loops_test.cc b/compiler/optimizing/find_loops_test.cc
index 4770fa2..04789d9 100644
--- a/compiler/optimizing/find_loops_test.cc
+++ b/compiler/optimizing/find_loops_test.cc
@@ -27,16 +27,9 @@
 
 namespace art {
 
-static HGraph* TestCode(const uint16_t* data, ArenaAllocator* allocator) {
-  HGraph* graph = CreateGraph(allocator);
-  HGraphBuilder builder(graph);
-  const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
-  builder.BuildGraph(*item);
-  graph->BuildDominatorTree();
-  return graph;
-}
+class FindLoopsTest : public CommonCompilerTest {};
 
-TEST(FindLoopsTest, CFG1) {
+TEST_F(FindLoopsTest, CFG1) {
   // Constant is not used.
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
@@ -44,26 +37,26 @@
 
   ArenaPool arena;
   ArenaAllocator allocator(&arena);
-  HGraph* graph = TestCode(data, &allocator);
+  HGraph* graph = CreateCFG(&allocator, data);
   for (HBasicBlock* block : graph->GetBlocks()) {
     ASSERT_EQ(block->GetLoopInformation(), nullptr);
   }
 }
 
-TEST(FindLoopsTest, CFG2) {
+TEST_F(FindLoopsTest, CFG2) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::RETURN);
 
   ArenaPool arena;
   ArenaAllocator allocator(&arena);
-  HGraph* graph = TestCode(data, &allocator);
+  HGraph* graph = CreateCFG(&allocator, data);
   for (HBasicBlock* block : graph->GetBlocks()) {
     ASSERT_EQ(block->GetLoopInformation(), nullptr);
   }
 }
 
-TEST(FindLoopsTest, CFG3) {
+TEST_F(FindLoopsTest, CFG3) {
   const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 3 << 12 | 0,
     Instruction::CONST_4 | 4 << 12 | 1 << 8,
@@ -73,13 +66,13 @@
 
   ArenaPool arena;
   ArenaAllocator allocator(&arena);
-  HGraph* graph = TestCode(data, &allocator);
+  HGraph* graph = CreateCFG(&allocator, data);
   for (HBasicBlock* block : graph->GetBlocks()) {
     ASSERT_EQ(block->GetLoopInformation(), nullptr);
   }
 }
 
-TEST(FindLoopsTest, CFG4) {
+TEST_F(FindLoopsTest, CFG4) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::IF_EQ, 4,
@@ -90,13 +83,13 @@
 
   ArenaPool arena;
   ArenaAllocator allocator(&arena);
-  HGraph* graph = TestCode(data, &allocator);
+  HGraph* graph = CreateCFG(&allocator, data);
   for (HBasicBlock* block : graph->GetBlocks()) {
     ASSERT_EQ(block->GetLoopInformation(), nullptr);
   }
 }
 
-TEST(FindLoopsTest, CFG5) {
+TEST_F(FindLoopsTest, CFG5) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::IF_EQ, 3,
@@ -105,7 +98,7 @@
 
   ArenaPool arena;
   ArenaAllocator allocator(&arena);
-  HGraph* graph = TestCode(data, &allocator);
+  HGraph* graph = CreateCFG(&allocator, data);
   for (HBasicBlock* block : graph->GetBlocks()) {
     ASSERT_EQ(block->GetLoopInformation(), nullptr);
   }
@@ -137,7 +130,7 @@
   }
 }
 
-TEST(FindLoopsTest, Loop1) {
+TEST_F(FindLoopsTest, Loop1) {
   // Simple loop with one preheader and one back edge.
   // var a = 0;
   // while (a == a) {
@@ -151,7 +144,7 @@
 
   ArenaPool arena;
   ArenaAllocator allocator(&arena);
-  HGraph* graph = TestCode(data, &allocator);
+  HGraph* graph = CreateCFG(&allocator, data);
 
   TestBlock(graph, 0, false, kInvalidBlockId);  // entry block
   TestBlock(graph, 1, false, kInvalidBlockId);  // pre header
@@ -162,7 +155,7 @@
   TestBlock(graph, 5, false, kInvalidBlockId);  // exit block
 }
 
-TEST(FindLoopsTest, Loop2) {
+TEST_F(FindLoopsTest, Loop2) {
   // Make sure we support a preheader of a loop not being the first predecessor
   // in the predecessor list of the header.
   // var a = 0;
@@ -179,7 +172,7 @@
 
   ArenaPool arena;
   ArenaAllocator allocator(&arena);
-  HGraph* graph = TestCode(data, &allocator);
+  HGraph* graph = CreateCFG(&allocator, data);
 
   TestBlock(graph, 0, false, kInvalidBlockId);  // entry block
   TestBlock(graph, 1, false, kInvalidBlockId);  // goto block
@@ -191,7 +184,7 @@
   TestBlock(graph, 6, false, kInvalidBlockId);  // exit block
 }
 
-TEST(FindLoopsTest, Loop3) {
+TEST_F(FindLoopsTest, Loop3) {
   // Make sure we create a preheader of a loop when a header originally has two
   // incoming blocks and one back edge.
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
@@ -204,7 +197,7 @@
 
   ArenaPool arena;
   ArenaAllocator allocator(&arena);
-  HGraph* graph = TestCode(data, &allocator);
+  HGraph* graph = CreateCFG(&allocator, data);
 
   TestBlock(graph, 0, false, kInvalidBlockId);  // entry block
   TestBlock(graph, 1, false, kInvalidBlockId);  // goto block
@@ -218,7 +211,7 @@
   TestBlock(graph, 8, false, kInvalidBlockId);  // synthesized pre header
 }
 
-TEST(FindLoopsTest, Loop4) {
+TEST_F(FindLoopsTest, Loop4) {
   // Test loop with originally two back edges.
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
@@ -230,7 +223,7 @@
 
   ArenaPool arena;
   ArenaAllocator allocator(&arena);
-  HGraph* graph = TestCode(data, &allocator);
+  HGraph* graph = CreateCFG(&allocator, data);
 
   TestBlock(graph, 0, false, kInvalidBlockId);  // entry block
   TestBlock(graph, 1, false, kInvalidBlockId);  // pre header
@@ -244,7 +237,7 @@
 }
 
 
-TEST(FindLoopsTest, Loop5) {
+TEST_F(FindLoopsTest, Loop5) {
   // Test loop with two exit edges.
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
@@ -256,7 +249,7 @@
 
   ArenaPool arena;
   ArenaAllocator allocator(&arena);
-  HGraph* graph = TestCode(data, &allocator);
+  HGraph* graph = CreateCFG(&allocator, data);
 
   TestBlock(graph, 0, false, kInvalidBlockId);  // entry block
   TestBlock(graph, 1, false, kInvalidBlockId);  // pre header
@@ -270,7 +263,7 @@
   TestBlock(graph, 8, false, kInvalidBlockId);  // synthesized block at the loop exit
 }
 
-TEST(FindLoopsTest, InnerLoop) {
+TEST_F(FindLoopsTest, InnerLoop) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::IF_EQ, 6,
@@ -281,7 +274,7 @@
 
   ArenaPool arena;
   ArenaAllocator allocator(&arena);
-  HGraph* graph = TestCode(data, &allocator);
+  HGraph* graph = CreateCFG(&allocator, data);
 
   TestBlock(graph, 0, false, kInvalidBlockId);  // entry block
   TestBlock(graph, 1, false, kInvalidBlockId);  // pre header of outer loop
@@ -301,7 +294,7 @@
                     *graph->GetBlocks()[3]->GetLoopInformation()));
 }
 
-TEST(FindLoopsTest, TwoLoops) {
+TEST_F(FindLoopsTest, TwoLoops) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::IF_EQ, 3,
@@ -312,7 +305,7 @@
 
   ArenaPool arena;
   ArenaAllocator allocator(&arena);
-  HGraph* graph = TestCode(data, &allocator);
+  HGraph* graph = CreateCFG(&allocator, data);
 
   TestBlock(graph, 0, false, kInvalidBlockId);  // entry block
   TestBlock(graph, 1, false, kInvalidBlockId);  // pre header of first loop
@@ -331,7 +324,7 @@
                     *graph->GetBlocks()[4]->GetLoopInformation()));
 }
 
-TEST(FindLoopsTest, NonNaturalLoop) {
+TEST_F(FindLoopsTest, NonNaturalLoop) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::IF_EQ, 3,
@@ -342,14 +335,14 @@
 
   ArenaPool arena;
   ArenaAllocator allocator(&arena);
-  HGraph* graph = TestCode(data, &allocator);
+  HGraph* graph = CreateCFG(&allocator, data);
   ASSERT_TRUE(graph->GetBlocks()[3]->IsLoopHeader());
   HLoopInformation* info = graph->GetBlocks()[3]->GetLoopInformation();
   ASSERT_EQ(1u, info->NumberOfBackEdges());
   ASSERT_FALSE(info->GetHeader()->Dominates(info->GetBackEdges()[0]));
 }
 
-TEST(FindLoopsTest, DoWhileLoop) {
+TEST_F(FindLoopsTest, DoWhileLoop) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::GOTO | 0x0100,
@@ -358,7 +351,7 @@
 
   ArenaPool arena;
   ArenaAllocator allocator(&arena);
-  HGraph* graph = TestCode(data, &allocator);
+  HGraph* graph = CreateCFG(&allocator, data);
 
   TestBlock(graph, 0, false, kInvalidBlockId);  // entry block
   TestBlock(graph, 1, false, kInvalidBlockId);  // pre header of first loop
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index 962e77d..e6e9177 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -149,6 +149,103 @@
     }
     current->Accept(this);
   }
+
+  // Ensure that catch blocks are not normal successors, and normal blocks are
+  // never exceptional successors.
+  for (HBasicBlock* successor : block->GetNormalSuccessors()) {
+    if (successor->IsCatchBlock()) {
+      AddError(StringPrintf("Catch block %d is a normal successor of block %d.",
+                            successor->GetBlockId(),
+                            block->GetBlockId()));
+    }
+  }
+  for (HBasicBlock* successor : block->GetExceptionalSuccessors()) {
+    if (!successor->IsCatchBlock()) {
+      AddError(StringPrintf("Normal block %d is an exceptional successor of block %d.",
+                            successor->GetBlockId(),
+                            block->GetBlockId()));
+    }
+  }
+
+  // Ensure dominated blocks have `block` as the dominator.
+  for (HBasicBlock* dominated : block->GetDominatedBlocks()) {
+    if (dominated->GetDominator() != block) {
+      AddError(StringPrintf("Block %d should be the dominator of %d.",
+                            block->GetBlockId(),
+                            dominated->GetBlockId()));
+    }
+  }
+
+  // Ensure there is no critical edge (i.e., an edge connecting a
+  // block with multiple successors to a block with multiple
+  // predecessors). Exceptional edges are synthesized and hence
+  // not accounted for.
+  if (block->GetSuccessors().size() > 1) {
+    for (HBasicBlock* successor : block->GetNormalSuccessors()) {
+      if (successor->IsExitBlock() &&
+          block->IsSingleTryBoundary() &&
+          block->GetPredecessors().size() == 1u &&
+          block->GetSinglePredecessor()->GetLastInstruction()->IsThrow()) {
+        // Allowed critical edge Throw->TryBoundary->Exit.
+      } else if (successor->GetPredecessors().size() > 1) {
+        AddError(StringPrintf("Critical edge between blocks %d and %d.",
+                              block->GetBlockId(),
+                              successor->GetBlockId()));
+      }
+    }
+  }
+
+  // Ensure try membership information is consistent.
+  if (block->IsCatchBlock()) {
+    if (block->IsTryBlock()) {
+      const HTryBoundary& try_entry = block->GetTryCatchInformation()->GetTryEntry();
+      AddError(StringPrintf("Catch blocks should not be try blocks but catch block %d "
+                            "has try entry %s:%d.",
+                            block->GetBlockId(),
+                            try_entry.DebugName(),
+                            try_entry.GetId()));
+    }
+
+    if (block->IsLoopHeader()) {
+      AddError(StringPrintf("Catch blocks should not be loop headers but catch block %d is.",
+                            block->GetBlockId()));
+    }
+  } else {
+    for (HBasicBlock* predecessor : block->GetPredecessors()) {
+      const HTryBoundary* incoming_try_entry = predecessor->ComputeTryEntryOfSuccessors();
+      if (block->IsTryBlock()) {
+        const HTryBoundary& stored_try_entry = block->GetTryCatchInformation()->GetTryEntry();
+        if (incoming_try_entry == nullptr) {
+          AddError(StringPrintf("Block %d has try entry %s:%d but no try entry follows "
+                                "from predecessor %d.",
+                                block->GetBlockId(),
+                                stored_try_entry.DebugName(),
+                                stored_try_entry.GetId(),
+                                predecessor->GetBlockId()));
+        } else if (!incoming_try_entry->HasSameExceptionHandlersAs(stored_try_entry)) {
+          AddError(StringPrintf("Block %d has try entry %s:%d which is not consistent "
+                                "with %s:%d that follows from predecessor %d.",
+                                block->GetBlockId(),
+                                stored_try_entry.DebugName(),
+                                stored_try_entry.GetId(),
+                                incoming_try_entry->DebugName(),
+                                incoming_try_entry->GetId(),
+                                predecessor->GetBlockId()));
+        }
+      } else if (incoming_try_entry != nullptr) {
+        AddError(StringPrintf("Block %d is not a try block but try entry %s:%d follows "
+                              "from predecessor %d.",
+                              block->GetBlockId(),
+                              incoming_try_entry->DebugName(),
+                              incoming_try_entry->GetId(),
+                              predecessor->GetBlockId()));
+      }
+    }
+  }
+
+  if (block->IsLoopHeader()) {
+    HandleLoop(block);
+  }
 }
 
 void GraphChecker::VisitBoundsCheck(HBoundsCheck* check) {
@@ -168,7 +265,7 @@
 
   // Ensure that all exception handlers are catch blocks.
   // Note that a normal-flow successor may be a catch block before CFG
-  // simplification. We only test normal-flow successors in SsaChecker.
+  // simplification. We only test normal-flow successors in GraphChecker.
   for (HBasicBlock* handler : handlers) {
     if (!handler->IsCatchBlock()) {
       AddError(StringPrintf("Block %d with %s:%d has exceptional successor %d which "
@@ -303,6 +400,88 @@
                             input->GetId()));
     }
   }
+
+  // Ensure an instruction dominates all its uses.
+  for (HUseIterator<HInstruction*> use_it(instruction->GetUses());
+       !use_it.Done(); use_it.Advance()) {
+    HInstruction* use = use_it.Current()->GetUser();
+    if (!use->IsPhi() && !instruction->StrictlyDominates(use)) {
+      AddError(StringPrintf("Instruction %s:%d in block %d does not dominate "
+                            "use %s:%d in block %d.",
+                            instruction->DebugName(),
+                            instruction->GetId(),
+                            current_block_->GetBlockId(),
+                            use->DebugName(),
+                            use->GetId(),
+                            use->GetBlock()->GetBlockId()));
+    }
+  }
+
+  if (instruction->NeedsEnvironment() && !instruction->HasEnvironment()) {
+    AddError(StringPrintf("Instruction %s:%d in block %d requires an environment "
+                          "but does not have one.",
+                          instruction->DebugName(),
+                          instruction->GetId(),
+                          current_block_->GetBlockId()));
+  }
+
+  // Ensure an instruction having an environment is dominated by the
+  // instructions contained in the environment.
+  for (HEnvironment* environment = instruction->GetEnvironment();
+       environment != nullptr;
+       environment = environment->GetParent()) {
+    for (size_t i = 0, e = environment->Size(); i < e; ++i) {
+      HInstruction* env_instruction = environment->GetInstructionAt(i);
+      if (env_instruction != nullptr
+          && !env_instruction->StrictlyDominates(instruction)) {
+        AddError(StringPrintf("Instruction %d in environment of instruction %d "
+                              "from block %d does not dominate instruction %d.",
+                              env_instruction->GetId(),
+                              instruction->GetId(),
+                              current_block_->GetBlockId(),
+                              instruction->GetId()));
+      }
+    }
+  }
+
+  // Ensure that reference type instructions have reference type info.
+  if (instruction->GetType() == Primitive::kPrimNot) {
+    ScopedObjectAccess soa(Thread::Current());
+    if (!instruction->GetReferenceTypeInfo().IsValid()) {
+      AddError(StringPrintf("Reference type instruction %s:%d does not have "
+                            "valid reference type information.",
+                            instruction->DebugName(),
+                            instruction->GetId()));
+    }
+  }
+
+  if (instruction->CanThrowIntoCatchBlock()) {
+    // Find the top-level environment. This corresponds to the environment of
+    // the catch block since we do not inline methods with try/catch.
+    HEnvironment* environment = instruction->GetEnvironment();
+    while (environment->GetParent() != nullptr) {
+      environment = environment->GetParent();
+    }
+
+    // Find all catch blocks and test that `instruction` has an environment
+    // value for each one.
+    const HTryBoundary& entry = instruction->GetBlock()->GetTryCatchInformation()->GetTryEntry();
+    for (HBasicBlock* catch_block : entry.GetExceptionHandlers()) {
+      for (HInstructionIterator phi_it(catch_block->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
+        HPhi* catch_phi = phi_it.Current()->AsPhi();
+        if (environment->GetInstructionAt(catch_phi->GetRegNumber()) == nullptr) {
+          AddError(StringPrintf("Instruction %s:%d throws into catch block %d "
+                                "with catch phi %d for vreg %d but its "
+                                "corresponding environment slot is empty.",
+                                instruction->DebugName(),
+                                instruction->GetId(),
+                                catch_block->GetBlockId(),
+                                catch_phi->GetId(),
+                                catch_phi->GetRegNumber()));
+        }
+      }
+    }
+  }
 }
 
 void GraphChecker::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
@@ -371,108 +550,7 @@
   }
 }
 
-void SSAChecker::VisitBasicBlock(HBasicBlock* block) {
-  super_type::VisitBasicBlock(block);
-
-  // Ensure that catch blocks are not normal successors, and normal blocks are
-  // never exceptional successors.
-  for (HBasicBlock* successor : block->GetNormalSuccessors()) {
-    if (successor->IsCatchBlock()) {
-      AddError(StringPrintf("Catch block %d is a normal successor of block %d.",
-                            successor->GetBlockId(),
-                            block->GetBlockId()));
-    }
-  }
-  for (HBasicBlock* successor : block->GetExceptionalSuccessors()) {
-    if (!successor->IsCatchBlock()) {
-      AddError(StringPrintf("Normal block %d is an exceptional successor of block %d.",
-                            successor->GetBlockId(),
-                            block->GetBlockId()));
-    }
-  }
-
-  // Ensure dominated blocks have `block` as the dominator.
-  for (HBasicBlock* dominated : block->GetDominatedBlocks()) {
-    if (dominated->GetDominator() != block) {
-      AddError(StringPrintf("Block %d should be the dominator of %d.",
-                            block->GetBlockId(),
-                            dominated->GetBlockId()));
-    }
-  }
-
-  // Ensure there is no critical edge (i.e., an edge connecting a
-  // block with multiple successors to a block with multiple
-  // predecessors). Exceptional edges are synthesized and hence
-  // not accounted for.
-  if (block->GetSuccessors().size() > 1) {
-    for (HBasicBlock* successor : block->GetNormalSuccessors()) {
-      if (successor->IsExitBlock() &&
-          block->IsSingleTryBoundary() &&
-          block->GetPredecessors().size() == 1u &&
-          block->GetSinglePredecessor()->GetLastInstruction()->IsThrow()) {
-        // Allowed critical edge Throw->TryBoundary->Exit.
-      } else if (successor->GetPredecessors().size() > 1) {
-        AddError(StringPrintf("Critical edge between blocks %d and %d.",
-                              block->GetBlockId(),
-                              successor->GetBlockId()));
-      }
-    }
-  }
-
-  // Ensure try membership information is consistent.
-  if (block->IsCatchBlock()) {
-    if (block->IsTryBlock()) {
-      const HTryBoundary& try_entry = block->GetTryCatchInformation()->GetTryEntry();
-      AddError(StringPrintf("Catch blocks should not be try blocks but catch block %d "
-                            "has try entry %s:%d.",
-                            block->GetBlockId(),
-                            try_entry.DebugName(),
-                            try_entry.GetId()));
-    }
-
-    if (block->IsLoopHeader()) {
-      AddError(StringPrintf("Catch blocks should not be loop headers but catch block %d is.",
-                            block->GetBlockId()));
-    }
-  } else {
-    for (HBasicBlock* predecessor : block->GetPredecessors()) {
-      const HTryBoundary* incoming_try_entry = predecessor->ComputeTryEntryOfSuccessors();
-      if (block->IsTryBlock()) {
-        const HTryBoundary& stored_try_entry = block->GetTryCatchInformation()->GetTryEntry();
-        if (incoming_try_entry == nullptr) {
-          AddError(StringPrintf("Block %d has try entry %s:%d but no try entry follows "
-                                "from predecessor %d.",
-                                block->GetBlockId(),
-                                stored_try_entry.DebugName(),
-                                stored_try_entry.GetId(),
-                                predecessor->GetBlockId()));
-        } else if (!incoming_try_entry->HasSameExceptionHandlersAs(stored_try_entry)) {
-          AddError(StringPrintf("Block %d has try entry %s:%d which is not consistent "
-                                "with %s:%d that follows from predecessor %d.",
-                                block->GetBlockId(),
-                                stored_try_entry.DebugName(),
-                                stored_try_entry.GetId(),
-                                incoming_try_entry->DebugName(),
-                                incoming_try_entry->GetId(),
-                                predecessor->GetBlockId()));
-        }
-      } else if (incoming_try_entry != nullptr) {
-        AddError(StringPrintf("Block %d is not a try block but try entry %s:%d follows "
-                              "from predecessor %d.",
-                              block->GetBlockId(),
-                              incoming_try_entry->DebugName(),
-                              incoming_try_entry->GetId(),
-                              predecessor->GetBlockId()));
-      }
-    }
-  }
-
-  if (block->IsLoopHeader()) {
-    CheckLoop(block);
-  }
-}
-
-void SSAChecker::CheckLoop(HBasicBlock* loop_header) {
+void GraphChecker::HandleLoop(HBasicBlock* loop_header) {
   int id = loop_header->GetBlockId();
   HLoopInformation* loop_information = loop_header->GetLoopInformation();
 
@@ -582,92 +660,6 @@
   }
 }
 
-void SSAChecker::VisitInstruction(HInstruction* instruction) {
-  super_type::VisitInstruction(instruction);
-
-  // Ensure an instruction dominates all its uses.
-  for (HUseIterator<HInstruction*> use_it(instruction->GetUses());
-       !use_it.Done(); use_it.Advance()) {
-    HInstruction* use = use_it.Current()->GetUser();
-    if (!use->IsPhi() && !instruction->StrictlyDominates(use)) {
-      AddError(StringPrintf("Instruction %s:%d in block %d does not dominate "
-                            "use %s:%d in block %d.",
-                            instruction->DebugName(),
-                            instruction->GetId(),
-                            current_block_->GetBlockId(),
-                            use->DebugName(),
-                            use->GetId(),
-                            use->GetBlock()->GetBlockId()));
-    }
-  }
-
-  if (instruction->NeedsEnvironment() && !instruction->HasEnvironment()) {
-    AddError(StringPrintf("Instruction %s:%d in block %d requires an environment "
-                          "but does not have one.",
-                          instruction->DebugName(),
-                          instruction->GetId(),
-                          current_block_->GetBlockId()));
-  }
-
-  // Ensure an instruction having an environment is dominated by the
-  // instructions contained in the environment.
-  for (HEnvironment* environment = instruction->GetEnvironment();
-       environment != nullptr;
-       environment = environment->GetParent()) {
-    for (size_t i = 0, e = environment->Size(); i < e; ++i) {
-      HInstruction* env_instruction = environment->GetInstructionAt(i);
-      if (env_instruction != nullptr
-          && !env_instruction->StrictlyDominates(instruction)) {
-        AddError(StringPrintf("Instruction %d in environment of instruction %d "
-                              "from block %d does not dominate instruction %d.",
-                              env_instruction->GetId(),
-                              instruction->GetId(),
-                              current_block_->GetBlockId(),
-                              instruction->GetId()));
-      }
-    }
-  }
-
-  // Ensure that reference type instructions have reference type info.
-  if (instruction->GetType() == Primitive::kPrimNot) {
-    ScopedObjectAccess soa(Thread::Current());
-    if (!instruction->GetReferenceTypeInfo().IsValid()) {
-      AddError(StringPrintf("Reference type instruction %s:%d does not have "
-                            "valid reference type information.",
-                            instruction->DebugName(),
-                            instruction->GetId()));
-    }
-  }
-
-  if (instruction->CanThrowIntoCatchBlock()) {
-    // Find the top-level environment. This corresponds to the environment of
-    // the catch block since we do not inline methods with try/catch.
-    HEnvironment* environment = instruction->GetEnvironment();
-    while (environment->GetParent() != nullptr) {
-      environment = environment->GetParent();
-    }
-
-    // Find all catch blocks and test that `instruction` has an environment
-    // value for each one.
-    const HTryBoundary& entry = instruction->GetBlock()->GetTryCatchInformation()->GetTryEntry();
-    for (HBasicBlock* catch_block : entry.GetExceptionHandlers()) {
-      for (HInstructionIterator phi_it(catch_block->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
-        HPhi* catch_phi = phi_it.Current()->AsPhi();
-        if (environment->GetInstructionAt(catch_phi->GetRegNumber()) == nullptr) {
-          AddError(StringPrintf("Instruction %s:%d throws into catch block %d "
-                                "with catch phi %d for vreg %d but its "
-                                "corresponding environment slot is empty.",
-                                instruction->DebugName(),
-                                instruction->GetId(),
-                                catch_block->GetBlockId(),
-                                catch_phi->GetId(),
-                                catch_phi->GetRegNumber()));
-        }
-      }
-    }
-  }
-}
-
 static Primitive::Type PrimitiveKind(Primitive::Type type) {
   switch (type) {
     case Primitive::kPrimBoolean:
@@ -710,7 +702,7 @@
   }
 }
 
-void SSAChecker::VisitPhi(HPhi* phi) {
+void GraphChecker::VisitPhi(HPhi* phi) {
   VisitInstruction(phi);
 
   // Ensure the first input of a phi is not itself.
@@ -846,7 +838,7 @@
   }
 }
 
-void SSAChecker::HandleBooleanInput(HInstruction* instruction, size_t input_index) {
+void GraphChecker::HandleBooleanInput(HInstruction* instruction, size_t input_index) {
   HInstruction* input = instruction->InputAt(input_index);
   if (input->IsIntConstant()) {
     int32_t value = input->AsIntConstant()->GetValue();
@@ -876,7 +868,7 @@
   }
 }
 
-void SSAChecker::VisitPackedSwitch(HPackedSwitch* instruction) {
+void GraphChecker::VisitPackedSwitch(HPackedSwitch* instruction) {
   VisitInstruction(instruction);
   // Check that the number of block successors matches the switch count plus
   // one for the default block.
@@ -892,22 +884,22 @@
   }
 }
 
-void SSAChecker::VisitIf(HIf* instruction) {
+void GraphChecker::VisitIf(HIf* instruction) {
   VisitInstruction(instruction);
   HandleBooleanInput(instruction, 0);
 }
 
-void SSAChecker::VisitSelect(HSelect* instruction) {
+void GraphChecker::VisitSelect(HSelect* instruction) {
   VisitInstruction(instruction);
   HandleBooleanInput(instruction, 2);
 }
 
-void SSAChecker::VisitBooleanNot(HBooleanNot* instruction) {
+void GraphChecker::VisitBooleanNot(HBooleanNot* instruction) {
   VisitInstruction(instruction);
   HandleBooleanInput(instruction, 0);
 }
 
-void SSAChecker::VisitCondition(HCondition* op) {
+void GraphChecker::VisitCondition(HCondition* op) {
   VisitInstruction(op);
   if (op->GetType() != Primitive::kPrimBoolean) {
     AddError(StringPrintf(
@@ -937,7 +929,7 @@
   }
 }
 
-void SSAChecker::VisitBinaryOperation(HBinaryOperation* op) {
+void GraphChecker::VisitBinaryOperation(HBinaryOperation* op) {
   VisitInstruction(op);
   if (op->IsUShr() || op->IsShr() || op->IsShl() || op->IsRor()) {
     if (PrimitiveKind(op->InputAt(1)->GetType()) != Primitive::kPrimInt) {
@@ -979,7 +971,7 @@
   }
 }
 
-void SSAChecker::VisitConstant(HConstant* instruction) {
+void GraphChecker::VisitConstant(HConstant* instruction) {
   HBasicBlock* block = instruction->GetBlock();
   if (!block->IsEntryBlock()) {
     AddError(StringPrintf(
@@ -990,7 +982,7 @@
   }
 }
 
-void SSAChecker::VisitBoundType(HBoundType* instruction) {
+void GraphChecker::VisitBoundType(HBoundType* instruction) {
   VisitInstruction(instruction);
 
   ScopedObjectAccess soa(Thread::Current());
diff --git a/compiler/optimizing/graph_checker.h b/compiler/optimizing/graph_checker.h
index 8724cde..52252cd 100644
--- a/compiler/optimizing/graph_checker.h
+++ b/compiler/optimizing/graph_checker.h
@@ -32,34 +32,38 @@
       dump_prefix_(dump_prefix),
       seen_ids_(graph->GetArena(), graph->GetCurrentInstructionId(), false) {}
 
-  // Check the whole graph (in insertion order).
-  virtual void Run() { VisitInsertionOrder(); }
+  // Check the whole graph (in reverse post-order).
+  void Run() {
+    // VisitReversePostOrder is used instead of VisitInsertionOrder,
+    // as the latter might visit dead blocks removed by the dominator
+    // computation.
+    VisitReversePostOrder();
+  }
 
-  // Check `block`.
   void VisitBasicBlock(HBasicBlock* block) OVERRIDE;
 
-  // Check `instruction`.
   void VisitInstruction(HInstruction* instruction) OVERRIDE;
+  void VisitPhi(HPhi* phi) OVERRIDE;
 
-  // Perform control-flow graph checks on instruction.
-  void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE;
-
-  // Check that the HasBoundsChecks() flag is set for bounds checks.
+  void VisitBinaryOperation(HBinaryOperation* op) OVERRIDE;
+  void VisitBooleanNot(HBooleanNot* instruction) OVERRIDE;
+  void VisitBoundType(HBoundType* instruction) OVERRIDE;
   void VisitBoundsCheck(HBoundsCheck* check) OVERRIDE;
-
-  // Check successors of blocks ending in TryBoundary.
-  void VisitTryBoundary(HTryBoundary* try_boundary) OVERRIDE;
-
-  // Check that LoadException is the first instruction in a catch block.
-  void VisitLoadException(HLoadException* load) OVERRIDE;
-
-  // Check that HCheckCast and HInstanceOf have HLoadClass as second input.
   void VisitCheckCast(HCheckCast* check) OVERRIDE;
+  void VisitCondition(HCondition* op) OVERRIDE;
+  void VisitConstant(HConstant* instruction) OVERRIDE;
+  void VisitIf(HIf* instruction) OVERRIDE;
   void VisitInstanceOf(HInstanceOf* check) OVERRIDE;
-
-  // Check that the Return and ReturnVoid jump to the exit block.
+  void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE;
+  void VisitLoadException(HLoadException* load) OVERRIDE;
+  void VisitPackedSwitch(HPackedSwitch* instruction) OVERRIDE;
   void VisitReturn(HReturn* ret) OVERRIDE;
   void VisitReturnVoid(HReturnVoid* ret) OVERRIDE;
+  void VisitSelect(HSelect* instruction) OVERRIDE;
+  void VisitTryBoundary(HTryBoundary* try_boundary) OVERRIDE;
+
+  void HandleLoop(HBasicBlock* loop_header);
+  void HandleBooleanInput(HInstruction* instruction, size_t input_index);
 
   // Was the last visit of the graph valid?
   bool IsValid() const {
@@ -97,46 +101,6 @@
   DISALLOW_COPY_AND_ASSIGN(GraphChecker);
 };
 
-
-// An SSA graph visitor performing various checks.
-class SSAChecker : public GraphChecker {
- public:
-  typedef GraphChecker super_type;
-
-  explicit SSAChecker(HGraph* graph)
-    : GraphChecker(graph, "art::SSAChecker: ") {}
-
-  // Check the whole graph (in reverse post-order).
-  void Run() OVERRIDE {
-    // VisitReversePostOrder is used instead of VisitInsertionOrder,
-    // as the latter might visit dead blocks removed by the dominator
-    // computation.
-    VisitReversePostOrder();
-  }
-
-  // Perform SSA form checks on `block`.
-  void VisitBasicBlock(HBasicBlock* block) OVERRIDE;
-  // Loop-related checks from block `loop_header`.
-  void CheckLoop(HBasicBlock* loop_header);
-
-  // Perform SSA form checks on instructions.
-  void VisitInstruction(HInstruction* instruction) OVERRIDE;
-  void VisitPhi(HPhi* phi) OVERRIDE;
-  void VisitBinaryOperation(HBinaryOperation* op) OVERRIDE;
-  void VisitCondition(HCondition* op) OVERRIDE;
-  void VisitIf(HIf* instruction) OVERRIDE;
-  void VisitPackedSwitch(HPackedSwitch* instruction) OVERRIDE;
-  void VisitSelect(HSelect* instruction) OVERRIDE;
-  void VisitBooleanNot(HBooleanNot* instruction) OVERRIDE;
-  void VisitConstant(HConstant* instruction) OVERRIDE;
-  void VisitBoundType(HBoundType* instruction) OVERRIDE;
-
-  void HandleBooleanInput(HInstruction* instruction, size_t input_index);
-
- private:
-  DISALLOW_COPY_AND_ASSIGN(SSAChecker);
-};
-
 }  // namespace art
 
 #endif  // ART_COMPILER_OPTIMIZING_GRAPH_CHECKER_H_
diff --git a/compiler/optimizing/graph_checker_test.cc b/compiler/optimizing/graph_checker_test.cc
index d10df4c..2b82319 100644
--- a/compiler/optimizing/graph_checker_test.cc
+++ b/compiler/optimizing/graph_checker_test.cc
@@ -38,6 +38,7 @@
   graph->AddBlock(exit_block);
   graph->SetExitBlock(exit_block);
   entry_block->AddSuccessor(exit_block);
+  graph->BuildDominatorTree();
   return graph;
 }
 
@@ -52,28 +53,16 @@
   ASSERT_TRUE(graph_checker.IsValid());
 }
 
-static void TestCodeSSA(const uint16_t* data) {
-  ArenaPool pool;
-  ArenaAllocator allocator(&pool);
-  HGraph* graph = CreateCFG(&allocator, data);
-  ASSERT_NE(graph, nullptr);
+class GraphCheckerTest : public CommonCompilerTest {};
 
-  TransformToSsa(graph);
-
-  SSAChecker ssa_checker(graph);
-  ssa_checker.Run();
-  ASSERT_TRUE(ssa_checker.IsValid());
-}
-
-
-TEST(GraphChecker, ReturnVoid) {
+TEST_F(GraphCheckerTest, ReturnVoid) {
   const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(
       Instruction::RETURN_VOID);
 
   TestCode(data);
 }
 
-TEST(GraphChecker, CFG1) {
+TEST_F(GraphCheckerTest, CFG1) {
   const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(
       Instruction::GOTO | 0x100,
       Instruction::RETURN_VOID);
@@ -81,7 +70,7 @@
   TestCode(data);
 }
 
-TEST(GraphChecker, CFG2) {
+TEST_F(GraphCheckerTest, CFG2) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::IF_EQ, 3,
@@ -91,7 +80,7 @@
   TestCode(data);
 }
 
-TEST(GraphChecker, CFG3) {
+TEST_F(GraphCheckerTest, CFG3) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::IF_EQ, 3,
@@ -103,7 +92,7 @@
 
 // Test case with an invalid graph containing inconsistent
 // predecessor/successor arcs in CFG.
-TEST(GraphChecker, InconsistentPredecessorsAndSuccessors) {
+TEST_F(GraphCheckerTest, InconsistentPredecessorsAndSuccessors) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
 
@@ -121,7 +110,7 @@
 
 // Test case with an invalid graph containing a non-branch last
 // instruction in a block.
-TEST(GraphChecker, BlockEndingWithNonBranchInstruction) {
+TEST_F(GraphCheckerTest, BlockEndingWithNonBranchInstruction) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
 
@@ -141,9 +130,7 @@
   ASSERT_FALSE(graph_checker.IsValid());
 }
 
-class SSACheckerTest : public CommonCompilerTest {};
-
-TEST_F(SSACheckerTest, SSAPhi) {
+TEST_F(GraphCheckerTest, SSAPhi) {
   // This code creates one Phi function during the conversion to SSA form.
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
@@ -151,7 +138,7 @@
     Instruction::CONST_4 | 4 << 12 | 0,
     Instruction::RETURN | 0 << 8);
 
-  TestCodeSSA(data);
+  TestCode(data);
 }
 
 }  // namespace art
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index 9d796c1..4cf0eb1 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -22,6 +22,7 @@
 #include <sstream>
 
 #include "bounds_check_elimination.h"
+#include "builder.h"
 #include "code_generator.h"
 #include "dead_code_elimination.h"
 #include "disassembler.h"
@@ -31,7 +32,6 @@
 #include "optimization.h"
 #include "reference_type_propagation.h"
 #include "register_allocator.h"
-#include "ssa_builder.h"
 #include "ssa_liveness_analysis.h"
 #include "utils/assembler.h"
 
@@ -368,11 +368,13 @@
   }
 
   void VisitCheckCast(HCheckCast* check_cast) OVERRIDE {
+    StartAttributeStream("check_kind") << check_cast->GetTypeCheckKind();
     StartAttributeStream("must_do_null_check") << std::boolalpha
         << check_cast->MustDoNullCheck() << std::noboolalpha;
   }
 
   void VisitInstanceOf(HInstanceOf* instance_of) OVERRIDE {
+    StartAttributeStream("check_kind") << instance_of->GetTypeCheckKind();
     StartAttributeStream("must_do_null_check") << std::boolalpha
         << instance_of->MustDoNullCheck() << std::noboolalpha;
   }
@@ -508,7 +510,7 @@
         || IsPass(HDeadCodeElimination::kInitialDeadCodeEliminationPassName)
         || IsPass(BoundsCheckElimination::kBoundsCheckEliminationPassName)
         || IsPass(RegisterAllocator::kRegisterAllocatorPassName)
-        || IsPass(SsaBuilder::kSsaBuilderPassName)) {
+        || IsPass(HGraphBuilder::kBuilderPassName)) {
       HLoopInformation* info = instruction->GetBlock()->GetLoopInformation();
       if (info == nullptr) {
         StartAttributeStream("loop") << "none";
@@ -525,7 +527,7 @@
       }
     }
 
-    if ((IsPass(SsaBuilder::kSsaBuilderPassName)
+    if ((IsPass(HGraphBuilder::kBuilderPassName)
         || IsPass(HInliner::kInlinerPassName))
         && (instruction->GetType() == Primitive::kPrimNot)) {
       ReferenceTypeInfo info = instruction->IsLoadClass()
@@ -545,7 +547,7 @@
         // doesn't run or doesn't inline anything, the NullConstant remains untyped.
         // So we should check NullConstants for validity only after reference type propagation.
         DCHECK(graph_in_bad_state_ ||
-               (!is_after_pass_ && IsPass(SsaBuilder::kSsaBuilderPassName)))
+               (!is_after_pass_ && IsPass(HGraphBuilder::kBuilderPassName)))
             << instruction->DebugName() << instruction->GetId() << " has invalid rti "
             << (is_after_pass_ ? "after" : "before") << " pass " << pass_name_;
       }
diff --git a/compiler/optimizing/gvn_test.cc b/compiler/optimizing/gvn_test.cc
index 1f4eaf3..56dc088 100644
--- a/compiler/optimizing/gvn_test.cc
+++ b/compiler/optimizing/gvn_test.cc
@@ -100,7 +100,7 @@
   ASSERT_EQ(different_offset->GetBlock(), block);
   ASSERT_EQ(use_after_kill->GetBlock(), block);
 
-  TransformToSsa(graph);
+  graph->BuildDominatorTree();
   SideEffectsAnalysis side_effects(graph);
   side_effects.Run();
   GVNOptimization(graph, side_effects).Run();
@@ -182,7 +182,7 @@
                                                           0));
   join->AddInstruction(new (&allocator) HExit());
 
-  TransformToSsa(graph);
+  graph->BuildDominatorTree();
   SideEffectsAnalysis side_effects(graph);
   side_effects.Run();
   GVNOptimization(graph, side_effects).Run();
@@ -288,7 +288,7 @@
   ASSERT_EQ(field_get_in_loop_body->GetBlock(), loop_body);
   ASSERT_EQ(field_get_in_exit->GetBlock(), exit);
 
-  TransformToSsa(graph);
+  graph->BuildDominatorTree();
   {
     SideEffectsAnalysis side_effects(graph);
     side_effects.Run();
@@ -364,7 +364,7 @@
   inner_loop_exit->AddInstruction(new (&allocator) HGoto());
   outer_loop_exit->AddInstruction(new (&allocator) HExit());
 
-  TransformToSsa(graph);
+  graph->BuildDominatorTree();
 
   ASSERT_TRUE(inner_loop_header->GetLoopInformation()->IsIn(
       *outer_loop_header->GetLoopInformation()));
diff --git a/compiler/optimizing/induction_var_analysis_test.cc b/compiler/optimizing/induction_var_analysis_test.cc
index 29a1845..89e4690 100644
--- a/compiler/optimizing/induction_var_analysis_test.cc
+++ b/compiler/optimizing/induction_var_analysis_test.cc
@@ -86,39 +86,28 @@
     constant1_ = graph_->GetIntConstant(1);
     constant100_ = graph_->GetIntConstant(100);
     float_constant0_ = graph_->GetFloatConstant(0.0f);
-    induc_ = new (&allocator_) HLocal(n);
-    entry_->AddInstruction(induc_);
-    entry_->AddInstruction(new (&allocator_) HStoreLocal(induc_, constant0_));
-    tmp_ = new (&allocator_) HLocal(n + 1);
-    entry_->AddInstruction(tmp_);
-    entry_->AddInstruction(new (&allocator_) HStoreLocal(tmp_, constant100_));
-    dum_ = new (&allocator_) HLocal(n + 2);
-    entry_->AddInstruction(dum_);
     return_->AddInstruction(new (&allocator_) HReturnVoid());
     exit_->AddInstruction(new (&allocator_) HExit());
 
     // Provide loop instructions.
     for (int d = 0; d < n; d++) {
-      basic_[d] = new (&allocator_) HLocal(d);
-      entry_->AddInstruction(basic_[d]);
-      loop_preheader_[d]->AddInstruction(new (&allocator_) HStoreLocal(basic_[d], constant0_));
+      basic_[d] = new (&allocator_) HPhi(&allocator_, d, 0, Primitive::kPrimInt);
       loop_preheader_[d]->AddInstruction(new (&allocator_) HGoto());
-      HInstruction* load = new (&allocator_) HLoadLocal(basic_[d], Primitive::kPrimInt);
-      loop_header_[d]->AddInstruction(load);
-      HInstruction* compare = new (&allocator_) HLessThan(load, constant100_);
+      loop_header_[d]->AddPhi(basic_[d]);
+      HInstruction* compare = new (&allocator_) HLessThan(basic_[d], constant100_);
       loop_header_[d]->AddInstruction(compare);
       loop_header_[d]->AddInstruction(new (&allocator_) HIf(compare));
-      load = new (&allocator_) HLoadLocal(basic_[d], Primitive::kPrimInt);
-      loop_body_[d]->AddInstruction(load);
-      increment_[d] = new (&allocator_) HAdd(Primitive::kPrimInt, load, constant1_);
+      increment_[d] = new (&allocator_) HAdd(Primitive::kPrimInt, basic_[d], constant1_);
       loop_body_[d]->AddInstruction(increment_[d]);
-      loop_body_[d]->AddInstruction(new (&allocator_) HStoreLocal(basic_[d], increment_[d]));
       loop_body_[d]->AddInstruction(new (&allocator_) HGoto());
+
+      basic_[d]->AddInput(constant0_);
+      basic_[d]->AddInput(increment_[d]);
     }
   }
 
   // Builds if-statement at depth d.
-  void BuildIf(int d, HBasicBlock** ifT, HBasicBlock **ifF) {
+  HPhi* BuildIf(int d, HBasicBlock** ifT, HBasicBlock **ifF) {
     HBasicBlock* cond = new (&allocator_) HBasicBlock(graph_);
     HBasicBlock* ifTrue = new (&allocator_) HBasicBlock(graph_);
     HBasicBlock* ifFalse = new (&allocator_) HBasicBlock(graph_);
@@ -134,6 +123,10 @@
     cond->AddInstruction(new (&allocator_) HIf(parameter_));
     *ifT = ifTrue;
     *ifF = ifFalse;
+
+    HPhi* select_phi = new (&allocator_) HPhi(&allocator_, -1, 0, Primitive::kPrimInt);
+    loop_body_[d]->AddPhi(select_phi);
+    return select_phi;
   }
 
   // Inserts instruction right before increment at depth d.
@@ -142,25 +135,20 @@
     return instruction;
   }
 
-  // Inserts local load at depth d.
-  HInstruction* InsertLocalLoad(HLocal* local, int d) {
-    return InsertInstruction(new (&allocator_) HLoadLocal(local, Primitive::kPrimInt), d);
+  // Inserts a phi to loop header at depth d and returns it.
+  HPhi* InsertLoopPhi(int vreg, int d) {
+    HPhi* phi = new (&allocator_) HPhi(&allocator_, vreg, 0, Primitive::kPrimInt);
+    loop_header_[d]->AddPhi(phi);
+    return phi;
   }
 
-  // Inserts local store at depth d.
-  HInstruction* InsertLocalStore(HLocal* local, HInstruction* rhs, int d) {
-    return InsertInstruction(new (&allocator_) HStoreLocal(local, rhs), d);
-  }
-
-  // Inserts an array store with given local as subscript at depth d to
+  // Inserts an array store with given `subscript` at depth d to
   // enable tests to inspect the computed induction at that point easily.
-  HInstruction* InsertArrayStore(HLocal* subscript, int d) {
-    HInstruction* load = InsertInstruction(
-        new (&allocator_) HLoadLocal(subscript, Primitive::kPrimInt), d);
+  HInstruction* InsertArrayStore(HInstruction* subscript, int d) {
     // ArraySet is given a float value in order to avoid SsaBuilder typing
     // it from the array's non-existent reference type info.
     return InsertInstruction(new (&allocator_) HArraySet(
-        parameter_, load, float_constant0_, Primitive::kPrimFloat, 0), d);
+        parameter_, subscript, float_constant0_, Primitive::kPrimFloat, 0), d);
   }
 
   // Returns induction information of instruction in loop at depth d.
@@ -171,7 +159,7 @@
 
   // Performs InductionVarAnalysis (after proper set up).
   void PerformInductionVarAnalysis() {
-    TransformToSsa(graph_);
+    graph_->BuildDominatorTree();
     iva_ = new (&allocator_) HInductionVarAnalysis(graph_);
     iva_->Run();
   }
@@ -191,16 +179,13 @@
   HInstruction* constant1_;
   HInstruction* constant100_;
   HInstruction* float_constant0_;
-  HLocal* induc_;  // "vreg_n", the "k"
-  HLocal* tmp_;    // "vreg_n+1"
-  HLocal* dum_;    // "vreg_n+2"
 
   // Loop specifics.
   HBasicBlock* loop_preheader_[10];
   HBasicBlock* loop_header_[10];
   HBasicBlock* loop_body_[10];
   HInstruction* increment_[10];
-  HLocal* basic_[10];  // "vreg_d", the "i_d"
+  HPhi* basic_[10];  // "vreg_d", the "i_d"
 };
 
 //
@@ -216,7 +201,7 @@
   //   ..
   // }
   BuildLoopNest(10);
-  TransformToSsa(graph_);
+  graph_->BuildDominatorTree();
   ASSERT_EQ(entry_->GetLoopInformation(), nullptr);
   for (int d = 0; d < 1; d++) {
     ASSERT_EQ(loop_preheader_[d]->GetLoopInformation(),
@@ -258,20 +243,15 @@
   // }
   BuildLoopNest(1);
   HInstruction *add = InsertInstruction(
-      new (&allocator_) HAdd(Primitive::kPrimInt, constant100_, InsertLocalLoad(basic_[0], 0)), 0);
-  InsertLocalStore(induc_, add, 0);
+      new (&allocator_) HAdd(Primitive::kPrimInt, constant100_, basic_[0]), 0);
   HInstruction *sub = InsertInstruction(
-      new (&allocator_) HSub(Primitive::kPrimInt, constant100_, InsertLocalLoad(basic_[0], 0)), 0);
-  InsertLocalStore(induc_, sub, 0);
+      new (&allocator_) HSub(Primitive::kPrimInt, constant100_, basic_[0]), 0);
   HInstruction *mul = InsertInstruction(
-      new (&allocator_) HMul(Primitive::kPrimInt, constant100_, InsertLocalLoad(basic_[0], 0)), 0);
-  InsertLocalStore(induc_, mul, 0);
+      new (&allocator_) HMul(Primitive::kPrimInt, constant100_, basic_[0]), 0);
   HInstruction *shl = InsertInstruction(
-      new (&allocator_) HShl(Primitive::kPrimInt, InsertLocalLoad(basic_[0], 0), constant1_), 0);
-  InsertLocalStore(induc_, shl, 0);
+      new (&allocator_) HShl(Primitive::kPrimInt, basic_[0], constant1_), 0);
   HInstruction *neg = InsertInstruction(
-      new (&allocator_) HNeg(Primitive::kPrimInt, InsertLocalLoad(basic_[0], 0)), 0);
-  InsertLocalStore(induc_, neg, 0);
+      new (&allocator_) HNeg(Primitive::kPrimInt, basic_[0]), 0);
   PerformInductionVarAnalysis();
 
   EXPECT_STREQ("((1) * i + (100))", GetInductionInfo(add, 0).c_str());
@@ -291,14 +271,16 @@
   //   a[k] = 0;
   // }
   BuildLoopNest(1);
+  HPhi* k = InsertLoopPhi(0, 0);
+  k->AddInput(constant0_);
+
   HInstruction *add = InsertInstruction(
-      new (&allocator_) HAdd(Primitive::kPrimInt, InsertLocalLoad(induc_, 0), constant100_), 0);
-  InsertLocalStore(induc_, add, 0);
-  HInstruction* store1 = InsertArrayStore(induc_, 0);
+      new (&allocator_) HAdd(Primitive::kPrimInt, k, constant100_), 0);
+  HInstruction* store1 = InsertArrayStore(add, 0);
   HInstruction *sub = InsertInstruction(
-      new (&allocator_) HSub(Primitive::kPrimInt, InsertLocalLoad(induc_, 0), constant1_), 0);
-  InsertLocalStore(induc_, sub, 0);
-  HInstruction* store2 = InsertArrayStore(induc_, 0);
+      new (&allocator_) HSub(Primitive::kPrimInt, add, constant1_), 0);
+  HInstruction* store2 = InsertArrayStore(sub, 0);
+  k->AddInput(sub);
   PerformInductionVarAnalysis();
 
   EXPECT_STREQ("(((100) - (1)) * i + (100))",
@@ -316,23 +298,24 @@
   //   a[k] = 0;
   // }
   BuildLoopNest(1);
+  HPhi* k_header = InsertLoopPhi(0, 0);
+  k_header->AddInput(constant0_);
+
   HBasicBlock* ifTrue;
   HBasicBlock* ifFalse;
-  BuildIf(0, &ifTrue, &ifFalse);
+  HPhi* k_body = BuildIf(0, &ifTrue, &ifFalse);
+
   // True-branch.
-  HInstruction* load1 = new (&allocator_) HLoadLocal(induc_, Primitive::kPrimInt);
-  ifTrue->AddInstruction(load1);
-  HInstruction* inc1 = new (&allocator_) HAdd(Primitive::kPrimInt, load1, constant1_);
+  HInstruction* inc1 = new (&allocator_) HAdd(Primitive::kPrimInt, k_header, constant1_);
   ifTrue->AddInstruction(inc1);
-  ifTrue->AddInstruction(new (&allocator_) HStoreLocal(induc_, inc1));
+  k_body->AddInput(inc1);
   // False-branch.
-  HInstruction* load2 = new (&allocator_) HLoadLocal(induc_, Primitive::kPrimInt);
-  ifFalse->AddInstruction(load2);
-  HInstruction* inc2 = new (&allocator_) HAdd(Primitive::kPrimInt, load2, constant1_);
+  HInstruction* inc2 = new (&allocator_) HAdd(Primitive::kPrimInt, k_header, constant1_);
   ifFalse->AddInstruction(inc2);
-  ifFalse->AddInstruction(new (&allocator_) HStoreLocal(induc_, inc2));
+  k_body->AddInput(inc2);
   // Merge over a phi.
-  HInstruction* store = InsertArrayStore(induc_, 0);
+  HInstruction* store = InsertArrayStore(k_body, 0);
+  k_header->AddInput(k_body);
   PerformInductionVarAnalysis();
 
   EXPECT_STREQ("((1) * i + (1))", GetInductionInfo(store->InputAt(1), 0).c_str());
@@ -348,21 +331,18 @@
   BuildLoopNest(1);
   HBasicBlock* ifTrue;
   HBasicBlock* ifFalse;
-  BuildIf(0, &ifTrue, &ifFalse);
+  HPhi* k = BuildIf(0, &ifTrue, &ifFalse);
+
   // True-branch.
-  HInstruction* load1 = new (&allocator_) HLoadLocal(basic_[0], Primitive::kPrimInt);
-  ifTrue->AddInstruction(load1);
-  HInstruction* inc1 = new (&allocator_) HAdd(Primitive::kPrimInt, load1, constant1_);
+  HInstruction* inc1 = new (&allocator_) HAdd(Primitive::kPrimInt, basic_[0], constant1_);
   ifTrue->AddInstruction(inc1);
-  ifTrue->AddInstruction(new (&allocator_) HStoreLocal(induc_, inc1));
+  k->AddInput(inc1);
   // False-branch.
-  HInstruction* load2 = new (&allocator_) HLoadLocal(basic_[0], Primitive::kPrimInt);
-  ifFalse->AddInstruction(load2);
-  HInstruction* inc2 = new (&allocator_) HAdd(Primitive::kPrimInt, load2, constant1_);
+  HInstruction* inc2 = new (&allocator_) HAdd(Primitive::kPrimInt, basic_[0], constant1_);
   ifFalse->AddInstruction(inc2);
-  ifFalse->AddInstruction(new (&allocator_) HStoreLocal(induc_, inc2));
+  k->AddInput(inc2);
   // Merge over a phi.
-  HInstruction* store = InsertArrayStore(induc_, 0);
+  HInstruction* store = InsertArrayStore(k, 0);
   PerformInductionVarAnalysis();
 
   EXPECT_STREQ("((1) * i + (1))", GetInductionInfo(store->InputAt(1), 0).c_str());
@@ -376,10 +356,13 @@
   //   k = 100 - i;
   // }
   BuildLoopNest(1);
-  HInstruction* store = InsertArrayStore(induc_, 0);
+  HPhi* k = InsertLoopPhi(0, 0);
+  k->AddInput(constant0_);
+
+  HInstruction* store = InsertArrayStore(k, 0);
   HInstruction *sub = InsertInstruction(
-      new (&allocator_) HSub(Primitive::kPrimInt, constant100_, InsertLocalLoad(basic_[0], 0)), 0);
-  InsertLocalStore(induc_, sub, 0);
+      new (&allocator_) HSub(Primitive::kPrimInt, constant100_, basic_[0]), 0);
+  k->AddInput(sub);
   PerformInductionVarAnalysis();
 
   EXPECT_STREQ("wrap((0), (( - (1)) * i + (100)))",
@@ -396,11 +379,16 @@
   //   t = 100 - i;
   // }
   BuildLoopNest(1);
-  HInstruction* store = InsertArrayStore(induc_, 0);
-  InsertLocalStore(induc_, InsertLocalLoad(tmp_, 0), 0);
+  HPhi* k = InsertLoopPhi(0, 0);
+  k->AddInput(constant0_);
+  HPhi* t = InsertLoopPhi(1, 0);
+  t->AddInput(constant100_);
+
+  HInstruction* store = InsertArrayStore(k, 0);
+  k->AddInput(t);
   HInstruction *sub = InsertInstruction(
-      new (&allocator_) HSub(Primitive::kPrimInt, constant100_, InsertLocalLoad(basic_[0], 0)), 0);
-  InsertLocalStore(tmp_, sub, 0);
+      new (&allocator_) HSub(Primitive::kPrimInt, constant100_, basic_[0], 0), 0);
+  t->AddInput(sub);
   PerformInductionVarAnalysis();
 
   EXPECT_STREQ("wrap((0), wrap((100), (( - (1)) * i + (100))))",
@@ -419,26 +407,21 @@
   //   k = i << 1;
   // }
   BuildLoopNest(1);
+  HPhi* k = InsertLoopPhi(0, 0);
+  k->AddInput(constant0_);
+
   HInstruction *add = InsertInstruction(
-      new (&allocator_) HAdd(Primitive::kPrimInt, InsertLocalLoad(induc_, 0), constant100_), 0);
-  InsertLocalStore(tmp_, add, 0);
+      new (&allocator_) HAdd(Primitive::kPrimInt, k, constant100_), 0);
   HInstruction *sub = InsertInstruction(
-      new (&allocator_) HSub(Primitive::kPrimInt, InsertLocalLoad(induc_, 0), constant100_), 0);
-  InsertLocalStore(tmp_, sub, 0);
+      new (&allocator_) HSub(Primitive::kPrimInt, k, constant100_), 0);
   HInstruction *mul = InsertInstruction(
-      new (&allocator_) HMul(Primitive::kPrimInt, InsertLocalLoad(induc_, 0), constant100_), 0);
-  InsertLocalStore(tmp_, mul, 0);
+      new (&allocator_) HMul(Primitive::kPrimInt, k, constant100_), 0);
   HInstruction *shl = InsertInstruction(
-      new (&allocator_) HShl(Primitive::kPrimInt, InsertLocalLoad(induc_, 0), constant1_), 0);
-  InsertLocalStore(tmp_, shl, 0);
+      new (&allocator_) HShl(Primitive::kPrimInt, k, constant1_), 0);
   HInstruction *neg = InsertInstruction(
-      new (&allocator_) HNeg(Primitive::kPrimInt, InsertLocalLoad(induc_, 0)), 0);
-  InsertLocalStore(tmp_, neg, 0);
-  InsertLocalStore(
-      induc_,
-      InsertInstruction(
-          new (&allocator_)
-          HShl(Primitive::kPrimInt, InsertLocalLoad(basic_[0], 0), constant1_), 0), 0);
+      new (&allocator_) HNeg(Primitive::kPrimInt, k), 0);
+  k->AddInput(
+      InsertInstruction(new (&allocator_) HShl(Primitive::kPrimInt, basic_[0], constant1_), 0));
   PerformInductionVarAnalysis();
 
   EXPECT_STREQ("wrap((100), ((2) * i + (100)))", GetInductionInfo(add, 0).c_str());
@@ -461,11 +444,15 @@
   //   k = d;
   // }
   BuildLoopNest(1);
-  HInstruction* store1 = InsertArrayStore(induc_, 0);
-  HInstruction* store2 = InsertArrayStore(tmp_, 0);
-  InsertLocalStore(dum_, InsertLocalLoad(tmp_, 0), 0);
-  InsertLocalStore(tmp_, InsertLocalLoad(induc_, 0), 0);
-  InsertLocalStore(induc_, InsertLocalLoad(dum_, 0), 0);
+  HPhi* k = InsertLoopPhi(0, 0);
+  k->AddInput(constant0_);
+  HPhi* t = InsertLoopPhi(1, 0);
+  t->AddInput(constant100_);
+
+  HInstruction* store1 = InsertArrayStore(k, 0);
+  HInstruction* store2 = InsertArrayStore(t, 0);
+  k->AddInput(t);
+  t->AddInput(k);
   PerformInductionVarAnalysis();
 
   EXPECT_STREQ("periodic((0), (100))", GetInductionInfo(store1->InputAt(1), 0).c_str());
@@ -480,10 +467,13 @@
   //   k = 1 - k;
   // }
   BuildLoopNest(1);
-  HInstruction* store = InsertArrayStore(induc_, 0);
+  HPhi* k = InsertLoopPhi(0, 0);
+  k->AddInput(constant0_);
+
+  HInstruction* store = InsertArrayStore(k, 0);
   HInstruction *sub = InsertInstruction(
-      new (&allocator_) HSub(Primitive::kPrimInt, constant1_, InsertLocalLoad(induc_, 0)), 0);
-  InsertLocalStore(induc_, sub, 0);
+      new (&allocator_) HSub(Primitive::kPrimInt, constant1_, k), 0);
+  k->AddInput(sub);
   PerformInductionVarAnalysis();
 
   EXPECT_STREQ("periodic((0), (1))", GetInductionInfo(store->InputAt(1), 0).c_str());
@@ -502,26 +492,24 @@
   //   t = - k;
   // }
   BuildLoopNest(1);
-  InsertLocalStore(
-      induc_,
-      InsertInstruction(new (&allocator_)
-                        HSub(Primitive::kPrimInt, constant1_, InsertLocalLoad(induc_, 0)), 0), 0);
+  HPhi* k_header = InsertLoopPhi(0, 0);
+  k_header->AddInput(constant0_);
+
+  HInstruction* k_body = InsertInstruction(
+      new (&allocator_) HSub(Primitive::kPrimInt, constant1_, k_header), 0);
+  k_header->AddInput(k_body);
+
   // Derived expressions.
   HInstruction *add = InsertInstruction(
-      new (&allocator_) HAdd(Primitive::kPrimInt, InsertLocalLoad(induc_, 0), constant100_), 0);
-  InsertLocalStore(tmp_, add, 0);
+      new (&allocator_) HAdd(Primitive::kPrimInt, k_body, constant100_), 0);
   HInstruction *sub = InsertInstruction(
-      new (&allocator_) HSub(Primitive::kPrimInt, InsertLocalLoad(induc_, 0), constant100_), 0);
-  InsertLocalStore(tmp_, sub, 0);
+      new (&allocator_) HSub(Primitive::kPrimInt, k_body, constant100_), 0);
   HInstruction *mul = InsertInstruction(
-      new (&allocator_) HMul(Primitive::kPrimInt, InsertLocalLoad(induc_, 0), constant100_), 0);
-  InsertLocalStore(tmp_, mul, 0);
+      new (&allocator_) HMul(Primitive::kPrimInt, k_body, constant100_), 0);
   HInstruction *shl = InsertInstruction(
-      new (&allocator_) HShl(Primitive::kPrimInt, InsertLocalLoad(induc_, 0), constant1_), 0);
-  InsertLocalStore(tmp_, shl, 0);
+      new (&allocator_) HShl(Primitive::kPrimInt, k_body, constant1_), 0);
   HInstruction *neg = InsertInstruction(
-      new (&allocator_) HNeg(Primitive::kPrimInt, InsertLocalLoad(induc_, 0)), 0);
-  InsertLocalStore(tmp_, neg, 0);
+      new (&allocator_) HNeg(Primitive::kPrimInt, k_body), 0);
   PerformInductionVarAnalysis();
 
   EXPECT_STREQ("periodic(((1) + (100)), (100))", GetInductionInfo(add, 0).c_str());
@@ -543,10 +531,20 @@
   //   ..
   // }
   BuildLoopNest(10);
+
+  HPhi* k[10];
+  for (int d = 0; d < 10; d++) {
+    k[d] = InsertLoopPhi(0, d);
+  }
+
   HInstruction *inc = InsertInstruction(
-      new (&allocator_) HAdd(Primitive::kPrimInt, constant1_, InsertLocalLoad(induc_, 9)), 9);
-  InsertLocalStore(induc_, inc, 9);
-  HInstruction* store = InsertArrayStore(induc_, 9);
+      new (&allocator_) HAdd(Primitive::kPrimInt, constant1_, k[9]), 9);
+  HInstruction* store = InsertArrayStore(inc, 9);
+
+  for (int d = 0; d < 10; d++) {
+    k[d]->AddInput((d != 0) ? k[d - 1] : constant0_);
+    k[d]->AddInput((d != 9) ? k[d + 1] : inc);
+  }
   PerformInductionVarAnalysis();
 
   // Avoid exact phi number, since that depends on the SSA building phase.
diff --git a/compiler/optimizing/induction_var_range_test.cc b/compiler/optimizing/induction_var_range_test.cc
index eda9c01..55a654e 100644
--- a/compiler/optimizing/induction_var_range_test.cc
+++ b/compiler/optimizing/induction_var_range_test.cc
@@ -86,25 +86,20 @@
     loop_body->AddSuccessor(loop_header);
     return_block->AddSuccessor(exit_block_);
     // Instructions.
-    HLocal* induc = new (&allocator_) HLocal(0);
-    entry_block_->AddInstruction(induc);
-    loop_preheader_->AddInstruction(
-        new (&allocator_) HStoreLocal(induc, graph_->GetIntConstant(lower)));  // i = l
     loop_preheader_->AddInstruction(new (&allocator_) HGoto());
-    HInstruction* load = new (&allocator_) HLoadLocal(induc, Primitive::kPrimInt);
-          loop_header->AddInstruction(load);
+    HPhi* phi = new (&allocator_) HPhi(&allocator_, 0, 0, Primitive::kPrimInt);
+    loop_header->AddPhi(phi);
+    phi->AddInput(graph_->GetIntConstant(lower));  // i = l
     if (stride > 0) {
-      condition_ = new (&allocator_) HLessThan(load, upper);  // i < u
+      condition_ = new (&allocator_) HLessThan(phi, upper);  // i < u
     } else {
-      condition_ = new (&allocator_) HGreaterThan(load, upper);  // i > u
+      condition_ = new (&allocator_) HGreaterThan(phi, upper);  // i > u
     }
     loop_header->AddInstruction(condition_);
     loop_header->AddInstruction(new (&allocator_) HIf(condition_));
-    load = new (&allocator_) HLoadLocal(induc, Primitive::kPrimInt);
-    loop_body->AddInstruction(load);
-    increment_ = new (&allocator_) HAdd(Primitive::kPrimInt, load, graph_->GetIntConstant(stride));
-    loop_body->AddInstruction(increment_);
-    loop_body->AddInstruction(new (&allocator_) HStoreLocal(induc, increment_));  // i += s
+    increment_ = new (&allocator_) HAdd(Primitive::kPrimInt, phi, graph_->GetIntConstant(stride));
+    loop_body->AddInstruction(increment_);  // i += s
+    phi->AddInput(increment_);
     loop_body->AddInstruction(new (&allocator_) HGoto());
     return_block->AddInstruction(new (&allocator_) HReturnVoid());
     exit_block_->AddInstruction(new (&allocator_) HExit());
@@ -112,7 +107,7 @@
 
   /** Constructs SSA and performs induction variable analysis. */
   void PerformInductionVarAnalysis() {
-    TransformToSsa(graph_);
+    graph_->BuildDominatorTree();
     iva_->Run();
   }
 
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index fa6aae8..a5acab8 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -258,8 +258,9 @@
   }
 
   if (actual_method != nullptr) {
-    return TryInline(invoke_instruction, actual_method);
+    return TryInlineAndReplace(invoke_instruction, actual_method, /* do_rtp */ true);
   }
+
   DCHECK(!invoke_instruction->IsInvokeStaticOrDirect());
 
   // Check if we can use an inline cache.
@@ -344,7 +345,7 @@
   HInstruction* cursor = invoke_instruction->GetPrevious();
   HBasicBlock* bb_cursor = invoke_instruction->GetBlock();
 
-  if (!TryInline(invoke_instruction, resolved_method, /* do_rtp */ false)) {
+  if (!TryInlineAndReplace(invoke_instruction, resolved_method, /* do_rtp */ false)) {
     return false;
   }
 
@@ -378,7 +379,7 @@
 
   // Run type propagation to get the guard typed, and eventually propagate the
   // type of the receiver.
-  ReferenceTypePropagation rtp_fixup(graph_, handles_);
+  ReferenceTypePropagation rtp_fixup(graph_, handles_, /* is_first_run */ false);
   rtp_fixup.Run();
 
   MaybeRecordStat(kInlinedMonomorphicCall);
@@ -420,6 +421,9 @@
       actual_method = new_method;
     } else if (actual_method != new_method) {
       // Different methods, bailout.
+      VLOG(compiler) << "Call to " << PrettyMethod(resolved_method)
+                     << " from inline cache is not inlined because it resolves"
+                     << " to different methods";
       return false;
     }
   }
@@ -428,7 +432,7 @@
   HInstruction* cursor = invoke_instruction->GetPrevious();
   HBasicBlock* bb_cursor = invoke_instruction->GetBlock();
 
-  if (!TryInline(invoke_instruction, actual_method, /* do_rtp */ false)) {
+  if (!TryInlineAndReplace(invoke_instruction, actual_method, /* do_rtp */ false)) {
     return false;
   }
 
@@ -474,7 +478,7 @@
   deoptimize->CopyEnvironmentFrom(invoke_instruction->GetEnvironment());
 
   // Run type propagation to get the guard typed.
-  ReferenceTypePropagation rtp_fixup(graph_, handles_);
+  ReferenceTypePropagation rtp_fixup(graph_, handles_, /* is_first_run */ false);
   rtp_fixup.Run();
 
   MaybeRecordStat(kInlinedPolymorphicCall);
@@ -482,14 +486,29 @@
   return true;
 }
 
-bool HInliner::TryInline(HInvoke* invoke_instruction, ArtMethod* method, bool do_rtp) {
+bool HInliner::TryInlineAndReplace(HInvoke* invoke_instruction, ArtMethod* method, bool do_rtp) {
+  HInstruction* return_replacement = nullptr;
+  if (!TryBuildAndInline(invoke_instruction, method, &return_replacement)) {
+    return false;
+  }
+  if (return_replacement != nullptr) {
+    invoke_instruction->ReplaceWith(return_replacement);
+  }
+  invoke_instruction->GetBlock()->RemoveInstruction(invoke_instruction);
+  FixUpReturnReferenceType(invoke_instruction, method, return_replacement, do_rtp);
+  return true;
+}
+
+bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction,
+                                 ArtMethod* method,
+                                 HInstruction** return_replacement) {
   const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile();
 
   // Check whether we're allowed to inline. The outermost compilation unit is the relevant
   // dex file here (though the transitivity of an inline chain would allow checking the calller).
   if (!compiler_driver_->MayInline(method->GetDexFile(),
                                    outer_compilation_unit_.GetDexFile())) {
-    if (TryPatternSubstitution(invoke_instruction, method, do_rtp)) {
+    if (TryPatternSubstitution(invoke_instruction, method, return_replacement)) {
       VLOG(compiler) << "Successfully replaced pattern of invoke " << PrettyMethod(method);
       MaybeRecordStat(kReplacedInvokeWithSimplePattern);
       return true;
@@ -556,7 +575,7 @@
     return false;
   }
 
-  if (!TryBuildAndInline(method, invoke_instruction, same_dex_file, do_rtp)) {
+  if (!TryBuildAndInlineHelper(invoke_instruction, method, same_dex_file, return_replacement)) {
     return false;
   }
 
@@ -583,27 +602,27 @@
 // Try to recognize known simple patterns and replace invoke call with appropriate instructions.
 bool HInliner::TryPatternSubstitution(HInvoke* invoke_instruction,
                                       ArtMethod* resolved_method,
-                                      bool do_rtp) {
+                                      HInstruction** return_replacement) {
   InlineMethod inline_method;
   if (!InlineMethodAnalyser::AnalyseMethodCode(resolved_method, &inline_method)) {
     return false;
   }
 
-  HInstruction* return_replacement = nullptr;
   switch (inline_method.opcode) {
     case kInlineOpNop:
       DCHECK_EQ(invoke_instruction->GetType(), Primitive::kPrimVoid);
+      *return_replacement = nullptr;
       break;
     case kInlineOpReturnArg:
-      return_replacement = GetInvokeInputForArgVRegIndex(invoke_instruction,
-                                                         inline_method.d.return_data.arg);
+      *return_replacement = GetInvokeInputForArgVRegIndex(invoke_instruction,
+                                                          inline_method.d.return_data.arg);
       break;
     case kInlineOpNonWideConst:
       if (resolved_method->GetShorty()[0] == 'L') {
         DCHECK_EQ(inline_method.d.data, 0u);
-        return_replacement = graph_->GetNullConstant();
+        *return_replacement = graph_->GetNullConstant();
       } else {
-        return_replacement = graph_->GetIntConstant(static_cast<int32_t>(inline_method.d.data));
+        *return_replacement = graph_->GetIntConstant(static_cast<int32_t>(inline_method.d.data));
       }
       break;
     case kInlineOpIGet: {
@@ -618,7 +637,7 @@
       DCHECK_EQ(iget->GetFieldOffset().Uint32Value(), data.field_offset);
       DCHECK_EQ(iget->IsVolatile() ? 1u : 0u, data.is_volatile);
       invoke_instruction->GetBlock()->InsertInstructionBefore(iget, invoke_instruction);
-      return_replacement = iget;
+      *return_replacement = iget;
       break;
     }
     case kInlineOpIPut: {
@@ -636,7 +655,7 @@
       invoke_instruction->GetBlock()->InsertInstructionBefore(iput, invoke_instruction);
       if (data.return_arg_plus1 != 0u) {
         size_t return_arg = data.return_arg_plus1 - 1u;
-        return_replacement = GetInvokeInputForArgVRegIndex(invoke_instruction, return_arg);
+        *return_replacement = GetInvokeInputForArgVRegIndex(invoke_instruction, return_arg);
       }
       break;
     }
@@ -691,19 +710,13 @@
         HMemoryBarrier* barrier = new (graph_->GetArena()) HMemoryBarrier(kStoreStore, kNoDexPc);
         invoke_instruction->GetBlock()->InsertInstructionBefore(barrier, invoke_instruction);
       }
+      *return_replacement = nullptr;
       break;
     }
     default:
       LOG(FATAL) << "UNREACHABLE";
       UNREACHABLE();
   }
-
-  if (return_replacement != nullptr) {
-    invoke_instruction->ReplaceWith(return_replacement);
-  }
-  invoke_instruction->GetBlock()->RemoveInstruction(invoke_instruction);
-
-  FixUpReturnReferenceType(resolved_method, invoke_instruction, return_replacement, do_rtp);
   return true;
 }
 
@@ -727,7 +740,7 @@
       // dex pc for the associated stack map. 0 is bogus but valid. Bug: 26854537.
       /* dex_pc */ 0);
   if (iget->GetType() == Primitive::kPrimNot) {
-    ReferenceTypePropagation rtp(graph_, handles_);
+    ReferenceTypePropagation rtp(graph_, handles_, /* is_first_run */ false);
     rtp.Visit(iget);
   }
   return iget;
@@ -756,10 +769,11 @@
       /* dex_pc */ 0);
   return iput;
 }
-bool HInliner::TryBuildAndInline(ArtMethod* resolved_method,
-                                 HInvoke* invoke_instruction,
-                                 bool same_dex_file,
-                                 bool do_rtp) {
+
+bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction,
+                                       ArtMethod* resolved_method,
+                                       bool same_dex_file,
+                                       HInstruction** return_replacement) {
   ScopedObjectAccess soa(Thread::Current());
   const DexFile::CodeItem* code_item = resolved_method->GetCodeItem();
   const DexFile& callee_dex_file = *resolved_method->GetDexFile();
@@ -825,7 +839,7 @@
                         resolved_method->GetQuickenedInfo(),
                         dex_cache);
 
-  if (!builder.BuildGraph(*code_item)) {
+  if (builder.BuildGraph(*code_item, handles_) != kAnalysisSuccess) {
     VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file)
                    << " could not be built, so cannot be inlined";
     return false;
@@ -838,12 +852,6 @@
     return false;
   }
 
-  if (callee_graph->TryBuildingSsa(handles_) != kAnalysisSuccess) {
-    VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file)
-                   << " could not be transformed to SSA";
-    return false;
-  }
-
   size_t parameter_index = 0;
   for (HInstructionIterator instructions(callee_graph->GetEntryBlock()->GetInstructions());
        !instructions.Done();
@@ -988,12 +996,18 @@
 
       if (current->IsNewInstance() &&
           (current->AsNewInstance()->GetEntrypoint() == kQuickAllocObjectWithAccessCheck)) {
+        VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file)
+                       << " could not be inlined because it is using an entrypoint"
+                       << " with access checks";
         // Allocation entrypoint does not handle inlined frames.
         return false;
       }
 
       if (current->IsNewArray() &&
           (current->AsNewArray()->GetEntrypoint() == kQuickAllocArrayWithAccessCheck)) {
+        VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file)
+                       << " could not be inlined because it is using an entrypoint"
+                       << " with access checks";
         // Allocation entrypoint does not handle inlined frames.
         return false;
       }
@@ -1003,22 +1017,21 @@
           current->IsUnresolvedStaticFieldSet() ||
           current->IsUnresolvedInstanceFieldSet()) {
         // Entrypoint for unresolved fields does not handle inlined frames.
+        VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file)
+                       << " could not be inlined because it is using an unresolved"
+                       << " entrypoint";
         return false;
       }
     }
   }
   number_of_inlined_instructions_ += number_of_instructions;
 
-  HInstruction* return_replacement = callee_graph->InlineInto(graph_, invoke_instruction);
-  if (return_replacement != nullptr) {
-    DCHECK_EQ(graph_, return_replacement->GetBlock()->GetGraph());
-  }
-  FixUpReturnReferenceType(resolved_method, invoke_instruction, return_replacement, do_rtp);
+  *return_replacement = callee_graph->InlineInto(graph_, invoke_instruction);
   return true;
 }
 
-void HInliner::FixUpReturnReferenceType(ArtMethod* resolved_method,
-                                        HInvoke* invoke_instruction,
+void HInliner::FixUpReturnReferenceType(HInvoke* invoke_instruction,
+                                        ArtMethod* resolved_method,
                                         HInstruction* return_replacement,
                                         bool do_rtp) {
   // Check the integrity of reference types and run another type propagation if needed.
@@ -1044,13 +1057,13 @@
         if (invoke_rti.IsStrictSupertypeOf(return_rti)
             || (return_rti.IsExact() && !invoke_rti.IsExact())
             || !return_replacement->CanBeNull()) {
-          ReferenceTypePropagation(graph_, handles_).Run();
+          ReferenceTypePropagation(graph_, handles_, /* is_first_run */ false).Run();
         }
       }
     } else if (return_replacement->IsInstanceOf()) {
       if (do_rtp) {
         // Inlining InstanceOf into an If may put a tighter bound on reference types.
-        ReferenceTypePropagation(graph_, handles_).Run();
+        ReferenceTypePropagation(graph_, handles_, /* is_first_run */ false).Run();
       }
     }
   }
diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h
index 7d343c6..9dd9bf5 100644
--- a/compiler/optimizing/inliner.h
+++ b/compiler/optimizing/inliner.h
@@ -61,12 +61,25 @@
   bool TryInline(HInvoke* invoke_instruction);
 
   // Try to inline `resolved_method` in place of `invoke_instruction`. `do_rtp` is whether
-  // reference type propagation can run after the inlining.
-  bool TryInline(HInvoke* invoke_instruction, ArtMethod* resolved_method, bool do_rtp = true)
+  // reference type propagation can run after the inlining. If the inlining is successful, this
+  // method will replace and remove the `invoke_instruction`.
+  bool TryInlineAndReplace(HInvoke* invoke_instruction, ArtMethod* resolved_method, bool do_rtp)
     SHARED_REQUIRES(Locks::mutator_lock_);
 
+  bool TryBuildAndInline(HInvoke* invoke_instruction,
+                         ArtMethod* resolved_method,
+                         HInstruction** return_replacement)
+    SHARED_REQUIRES(Locks::mutator_lock_);
+
+  bool TryBuildAndInlineHelper(HInvoke* invoke_instruction,
+                               ArtMethod* resolved_method,
+                               bool same_dex_file,
+                               HInstruction** return_replacement);
+
   // Try to recognize known simple patterns and replace invoke call with appropriate instructions.
-  bool TryPatternSubstitution(HInvoke* invoke_instruction, ArtMethod* resolved_method, bool do_rtp)
+  bool TryPatternSubstitution(HInvoke* invoke_instruction,
+                              ArtMethod* resolved_method,
+                              HInstruction** return_replacement)
     SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Create a new HInstanceFieldGet.
@@ -94,18 +107,13 @@
                                 const InlineCache& ic)
     SHARED_REQUIRES(Locks::mutator_lock_);
 
-  bool TryBuildAndInline(ArtMethod* resolved_method,
-                         HInvoke* invoke_instruction,
-                         bool same_dex_file,
-                         bool do_rtp = true);
-
   HInstanceFieldGet* BuildGetReceiverClass(ClassLinker* class_linker,
                                            HInstruction* receiver,
                                            uint32_t dex_pc) const
     SHARED_REQUIRES(Locks::mutator_lock_);
 
-  void FixUpReturnReferenceType(ArtMethod* resolved_method,
-                                HInvoke* invoke_instruction,
+  void FixUpReturnReferenceType(HInvoke* invoke_instruction,
+                                ArtMethod* resolved_method,
                                 HInstruction* return_replacement,
                                 bool do_rtp)
     SHARED_REQUIRES(Locks::mutator_lock_);
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index 0029cc3..a48d06f 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -757,11 +757,97 @@
   }
 }
 
+static bool IsTypeConversionImplicit(Primitive::Type input_type, Primitive::Type result_type) {
+  // Besides conversion to the same type, widening integral conversions are implicit,
+  // excluding conversions to long and the byte->char conversion where we need to
+  // clear the high 16 bits of the 32-bit sign-extended representation of byte.
+  return result_type == input_type ||
+      (result_type == Primitive::kPrimInt && input_type == Primitive::kPrimByte) ||
+      (result_type == Primitive::kPrimInt && input_type == Primitive::kPrimShort) ||
+      (result_type == Primitive::kPrimInt && input_type == Primitive::kPrimChar) ||
+      (result_type == Primitive::kPrimShort && input_type == Primitive::kPrimByte);
+}
+
+static bool IsTypeConversionLossless(Primitive::Type input_type, Primitive::Type result_type) {
+  // The conversion to a larger type is loss-less with the exception of two cases,
+  //   - conversion to char, the only unsigned type, where we may lose some bits, and
+  //   - conversion from float to long, the only FP to integral conversion with smaller FP type.
+  // For integral to FP conversions this holds because the FP mantissa is large enough.
+  DCHECK_NE(input_type, result_type);
+  return Primitive::ComponentSize(result_type) > Primitive::ComponentSize(input_type) &&
+      result_type != Primitive::kPrimChar &&
+      !(result_type == Primitive::kPrimLong && input_type == Primitive::kPrimFloat);
+}
+
 void InstructionSimplifierVisitor::VisitTypeConversion(HTypeConversion* instruction) {
-  if (instruction->GetResultType() == instruction->GetInputType()) {
-    // Remove the instruction if it's converting to the same type.
-    instruction->ReplaceWith(instruction->GetInput());
+  HInstruction* input = instruction->GetInput();
+  Primitive::Type input_type = input->GetType();
+  Primitive::Type result_type = instruction->GetResultType();
+  if (IsTypeConversionImplicit(input_type, result_type)) {
+    // Remove the implicit conversion; this includes conversion to the same type.
+    instruction->ReplaceWith(input);
     instruction->GetBlock()->RemoveInstruction(instruction);
+    RecordSimplification();
+    return;
+  }
+
+  if (input->IsTypeConversion()) {
+    HTypeConversion* input_conversion = input->AsTypeConversion();
+    HInstruction* original_input = input_conversion->GetInput();
+    Primitive::Type original_type = original_input->GetType();
+
+    // When the first conversion is lossless, a direct conversion from the original type
+    // to the final type yields the same result, even for a lossy second conversion, for
+    // example float->double->int or int->double->float.
+    bool is_first_conversion_lossless = IsTypeConversionLossless(original_type, input_type);
+
+    // For integral conversions, see if the first conversion loses only bits that the second
+    // doesn't need, i.e. the final type is no wider than the intermediate. If so, direct
+    // conversion yields the same result, for example long->int->short or int->char->short.
+    bool integral_conversions_with_non_widening_second =
+        Primitive::IsIntegralType(input_type) &&
+        Primitive::IsIntegralType(original_type) &&
+        Primitive::IsIntegralType(result_type) &&
+        Primitive::ComponentSize(result_type) <= Primitive::ComponentSize(input_type);
+
+    if (is_first_conversion_lossless || integral_conversions_with_non_widening_second) {
+      // If the merged conversion is implicit, do the simplification unconditionally.
+      if (IsTypeConversionImplicit(original_type, result_type)) {
+        instruction->ReplaceWith(original_input);
+        instruction->GetBlock()->RemoveInstruction(instruction);
+        if (!input_conversion->HasUses()) {
+          // Don't wait for DCE.
+          input_conversion->GetBlock()->RemoveInstruction(input_conversion);
+        }
+        RecordSimplification();
+        return;
+      }
+      // Otherwise simplify only if the first conversion has no other use.
+      if (input_conversion->HasOnlyOneNonEnvironmentUse()) {
+        input_conversion->ReplaceWith(original_input);
+        input_conversion->GetBlock()->RemoveInstruction(input_conversion);
+        RecordSimplification();
+        return;
+      }
+    }
+  } else if (input->IsAnd() &&
+      Primitive::IsIntegralType(result_type) &&
+      input->HasOnlyOneNonEnvironmentUse()) {
+    DCHECK(Primitive::IsIntegralType(input_type));
+    HAnd* input_and = input->AsAnd();
+    HConstant* constant = input_and->GetConstantRight();
+    if (constant != nullptr) {
+      int64_t value = Int64FromConstant(constant);
+      DCHECK_NE(value, -1);  // "& -1" would have been optimized away in VisitAnd().
+      size_t trailing_ones = CTZ(~static_cast<uint64_t>(value));
+      if (trailing_ones >= kBitsPerByte * Primitive::ComponentSize(result_type)) {
+        // The `HAnd` is useless, for example in `(byte) (x & 0xff)`, get rid of it.
+        input_and->ReplaceWith(input_and->GetLeastConstantLeft());
+        input_and->GetBlock()->RemoveInstruction(input_and);
+        RecordSimplification();
+        return;
+      }
+    }
   }
 }
 
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index ab4f6f9..22cefe8 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -261,7 +261,8 @@
   locations->SetOut(Location::SameAsFirstInput());
   HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect();
   DCHECK(static_or_direct != nullptr);
-  if (invoke->InputAt(static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) {
+  if (static_or_direct->HasSpecialInput() &&
+      invoke->InputAt(static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) {
     // We need addressibility for the constant area.
     locations->SetInAt(1, Location::RequiresRegister());
     // We need a temporary to hold the constant.
@@ -276,7 +277,7 @@
   Location output = locations->Out();
 
   DCHECK(output.IsFpuRegister());
-  if (locations->InAt(1).IsValid()) {
+  if (locations->GetInputCount() == 2 && locations->InAt(1).IsValid()) {
     DCHECK(locations->InAt(1).IsRegister());
     // We also have a constant area pointer.
     Register constant_area = locations->InAt(1).AsRegister<Register>();
@@ -465,7 +466,7 @@
   // NaN handling.
   __ Bind(&nan);
   // Do we have a constant area pointer?
-  if (locations->InAt(2).IsValid()) {
+  if (locations->GetInputCount() == 3 && locations->InAt(2).IsValid()) {
     DCHECK(locations->InAt(2).IsRegister());
     Register constant_area = locations->InAt(2).AsRegister<Register>();
     if (is_double) {
@@ -510,7 +511,8 @@
   locations->SetOut(Location::SameAsFirstInput());
   HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect();
   DCHECK(static_or_direct != nullptr);
-  if (invoke->InputAt(static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) {
+  if (static_or_direct->HasSpecialInput() &&
+      invoke->InputAt(static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) {
     locations->SetInAt(2, Location::RequiresRegister());
   }
 }
diff --git a/compiler/optimizing/licm_test.cc b/compiler/optimizing/licm_test.cc
index 2b63ec8..9fb32f4 100644
--- a/compiler/optimizing/licm_test.cc
+++ b/compiler/optimizing/licm_test.cc
@@ -76,7 +76,7 @@
 
   // Performs LICM optimizations (after proper set up).
   void PerformLICM() {
-    TransformToSsa(graph_);
+    graph_->BuildDominatorTree();
     SideEffectsAnalysis side_effects(graph_);
     side_effects.Run();
     LICM(graph_, side_effects).Run();
diff --git a/compiler/optimizing/linearize_test.cc b/compiler/optimizing/linearize_test.cc
index ed275b1..13e14c5 100644
--- a/compiler/optimizing/linearize_test.cc
+++ b/compiler/optimizing/linearize_test.cc
@@ -39,14 +39,7 @@
 static void TestCode(const uint16_t* data, const uint32_t (&expected_order)[number_of_blocks]) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
-  HGraph* graph = CreateGraph(&allocator);
-  HGraphBuilder builder(graph);
-  const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
-  bool graph_built = builder.BuildGraph(*item);
-  ASSERT_TRUE(graph_built);
-
-  TransformToSsa(graph);
-
+  HGraph* graph = CreateCFG(&allocator, data);
   std::unique_ptr<const X86InstructionSetFeatures> features_x86(
       X86InstructionSetFeatures::FromCppDefines());
   x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
diff --git a/compiler/optimizing/live_ranges_test.cc b/compiler/optimizing/live_ranges_test.cc
index 991f8f7..3202493 100644
--- a/compiler/optimizing/live_ranges_test.cc
+++ b/compiler/optimizing/live_ranges_test.cc
@@ -32,14 +32,10 @@
 class LiveRangesTest : public CommonCompilerTest {};
 
 static HGraph* BuildGraph(const uint16_t* data, ArenaAllocator* allocator) {
-  HGraph* graph = CreateGraph(allocator);
-  HGraphBuilder builder(graph);
-  const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
-  builder.BuildGraph(*item);
+  HGraph* graph = CreateCFG(allocator, data);
   // Suspend checks implementation may change in the future, and this test relies
   // on how instructions are ordered.
   RemoveSuspendChecks(graph);
-  TransformToSsa(graph);
   // `Inline` conditions into ifs.
   PrepareForRegisterAllocation(graph).Run();
   return graph;
@@ -303,13 +299,12 @@
    *       12: equal
    *       14: if +++++
    *        |       \ +
-   *        |     18: suspend
-   *        |     20: add
-   *        |     22: goto
+   *        |     18: add
+   *        |     20: goto
    *        |
-   *       26: return
+   *       24: return
    *         |
-   *       30: exit
+   *       28: exit
    *
    * We want to make sure the phi at 10 has a lifetime hole after the add at 20.
    */
@@ -345,18 +340,18 @@
   interval = phi->GetLiveInterval();
   range = interval->GetFirstRange();
   ASSERT_EQ(10u, range->GetStart());
-  ASSERT_EQ(21u, range->GetEnd());
+  ASSERT_EQ(19u, range->GetEnd());
   range = range->GetNext();
   ASSERT_TRUE(range != nullptr);
-  ASSERT_EQ(24u, range->GetStart());
-  ASSERT_EQ(26u, range->GetEnd());
+  ASSERT_EQ(22u, range->GetStart());
+  ASSERT_EQ(24u, range->GetEnd());
 
   // Test for the add instruction.
   HAdd* add = liveness.GetInstructionFromSsaIndex(2)->AsAdd();
   interval = add->GetLiveInterval();
   range = interval->GetFirstRange();
-  ASSERT_EQ(20u, range->GetStart());
-  ASSERT_EQ(24u, range->GetEnd());
+  ASSERT_EQ(18u, range->GetStart());
+  ASSERT_EQ(22u, range->GetEnd());
   ASSERT_TRUE(range->GetNext() == nullptr);
 }
 
diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc
index 7736eed..92a987c 100644
--- a/compiler/optimizing/liveness_test.cc
+++ b/compiler/optimizing/liveness_test.cc
@@ -46,12 +46,7 @@
 static void TestCode(const uint16_t* data, const char* expected) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
-  HGraph* graph = CreateGraph(&allocator);
-  HGraphBuilder builder(graph);
-  const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
-  bool graph_built = builder.BuildGraph(*item);
-  ASSERT_TRUE(graph_built);
-  TransformToSsa(graph);
+  HGraph* graph = CreateCFG(&allocator, data);
   // `Inline` conditions into ifs.
   PrepareForRegisterAllocation(graph).Run();
   std::unique_ptr<const X86InstructionSetFeatures> features_x86(
diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc
index 9a97f54..8eaac0b 100644
--- a/compiler/optimizing/load_store_elimination.cc
+++ b/compiler/optimizing/load_store_elimination.cc
@@ -61,7 +61,7 @@
           (use->IsStaticFieldSet() && (reference_ == use->InputAt(1))) ||
           (use->IsUnresolvedStaticFieldSet() && (reference_ == use->InputAt(0))) ||
           (use->IsArraySet() && (reference_ == use->InputAt(2)))) {
-        // reference_ is merged to a phi/HSelect, passed to a callee, or stored to heap.
+        // reference_ is merged to HPhi/HSelect, passed to a callee, or stored to heap.
         // reference_ isn't the only name that can refer to its value anymore.
         is_singleton_ = false;
         is_singleton_and_not_returned_ = false;
@@ -458,6 +458,10 @@
     CreateReferenceInfoForReferenceType(instruction);
   }
 
+  void VisitSelect(HSelect* instruction) OVERRIDE {
+    CreateReferenceInfoForReferenceType(instruction);
+  }
+
   void VisitDeoptimize(HDeoptimize* instruction ATTRIBUTE_UNUSED) OVERRIDE {
     may_deoptimize_ = true;
   }
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index f269885..27015c0 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -27,6 +27,15 @@
 
 namespace art {
 
+void HGraph::InitializeInexactObjectRTI(StackHandleScopeCollection* handles) {
+  ScopedObjectAccess soa(Thread::Current());
+  // Create the inexact Object reference type and store it in the HGraph.
+  ClassLinker* linker = Runtime::Current()->GetClassLinker();
+  inexact_object_rti_ = ReferenceTypeInfo::Create(
+      handles->NewHandle(linker->GetClassRoot(ClassLinker::kJavaLangObject)),
+      /* is_exact */ false);
+}
+
 void HGraph::AddBlock(HBasicBlock* block) {
   block->SetBlockId(blocks_.size());
   blocks_.push_back(block);
@@ -236,29 +245,6 @@
   }
 }
 
-GraphAnalysisResult HGraph::TryBuildingSsa(StackHandleScopeCollection* handles) {
-  GraphAnalysisResult result = BuildDominatorTree();
-  if (result != kAnalysisSuccess) {
-    return result;
-  }
-
-  // Create the inexact Object reference type and store it in the HGraph.
-  ScopedObjectAccess soa(Thread::Current());
-  ClassLinker* linker = Runtime::Current()->GetClassLinker();
-  inexact_object_rti_ = ReferenceTypeInfo::Create(
-      handles->NewHandle(linker->GetClassRoot(ClassLinker::kJavaLangObject)),
-      /* is_exact */ false);
-
-  // Tranforms graph to SSA form.
-  result = SsaBuilder(this, handles).BuildSsa();
-  if (result != kAnalysisSuccess) {
-    return result;
-  }
-
-  in_ssa_form_ = true;
-  return kAnalysisSuccess;
-}
-
 HBasicBlock* HGraph::SplitEdge(HBasicBlock* block, HBasicBlock* successor) {
   HBasicBlock* new_block = new (arena_) HBasicBlock(this, successor->GetDexPc());
   AddBlock(new_block);
@@ -1592,7 +1578,7 @@
     loop_info->Remove(this);
     if (loop_info->IsBackEdge(*this)) {
       // If this was the last back edge of the loop, we deliberately leave the
-      // loop in an inconsistent state and will fail SSAChecker unless the
+      // loop in an inconsistent state and will fail GraphChecker unless the
       // entire loop is removed during the pass.
       loop_info->RemoveBackEdge(this);
     }
@@ -1631,7 +1617,7 @@
     } else if (num_pred_successors == 0u) {
       // The predecessor has no remaining successors and therefore must be dead.
       // We deliberately leave it without a control-flow instruction so that the
-      // SSAChecker fails unless it is not removed during the pass too.
+      // GraphChecker fails unless it is not removed during the pass too.
       predecessor->RemoveInstruction(last_instruction);
     } else {
       // There are multiple successors left. The removed block might be a successor
@@ -2044,13 +2030,6 @@
     }
   }
 
-  if (return_value != nullptr) {
-    invoke->ReplaceWith(return_value);
-  }
-
-  // Finally remove the invoke from the caller.
-  invoke->GetBlock()->RemoveInstruction(invoke);
-
   return return_value;
 }
 
@@ -2387,4 +2366,26 @@
   return os;
 }
 
+std::ostream& operator<<(std::ostream& os, TypeCheckKind rhs) {
+  switch (rhs) {
+    case TypeCheckKind::kUnresolvedCheck:
+      return os << "unresolved_check";
+    case TypeCheckKind::kExactCheck:
+      return os << "exact_check";
+    case TypeCheckKind::kClassHierarchyCheck:
+      return os << "class_hierarchy_check";
+    case TypeCheckKind::kAbstractClassCheck:
+      return os << "abstract_class_check";
+    case TypeCheckKind::kInterfaceCheck:
+      return os << "interface_check";
+    case TypeCheckKind::kArrayObjectCheck:
+      return os << "array_object_check";
+    case TypeCheckKind::kArrayCheck:
+      return os << "array_check";
+    default:
+      LOG(FATAL) << "Unknown TypeCheckKind: " << static_cast<int>(rhs);
+      UNREACHABLE();
+  }
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index daec096..854854f 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -98,6 +98,7 @@
 };
 
 enum GraphAnalysisResult {
+  kAnalysisInvalidBytecode,
   kAnalysisFailThrowCatchLoop,
   kAnalysisFailAmbiguousArrayOp,
   kAnalysisSuccess,
@@ -308,10 +309,14 @@
     blocks_.reserve(kDefaultNumberOfBlocks);
   }
 
+  // Acquires and stores RTI of inexact Object to be used when creating HNullConstant.
+  void InitializeInexactObjectRTI(StackHandleScopeCollection* handles);
+
   ArenaAllocator* GetArena() const { return arena_; }
   const ArenaVector<HBasicBlock*>& GetBlocks() const { return blocks_; }
 
   bool IsInSsaForm() const { return in_ssa_form_; }
+  void SetInSsaForm() { in_ssa_form_ = true; }
 
   HBasicBlock* GetEntryBlock() const { return entry_block_; }
   HBasicBlock* GetExitBlock() const { return exit_block_; }
@@ -322,11 +327,6 @@
 
   void AddBlock(HBasicBlock* block);
 
-  // Try building the SSA form of this graph, with dominance computation and
-  // loop recognition. Returns a code specifying that it was successful or the
-  // reason for failure.
-  GraphAnalysisResult TryBuildingSsa(StackHandleScopeCollection* handles);
-
   void ComputeDominanceInformation();
   void ClearDominanceInformation();
   void ClearLoopInformation();
@@ -345,8 +345,9 @@
   void ComputeTryBlockInformation();
 
   // Inline this graph in `outer_graph`, replacing the given `invoke` instruction.
-  // Returns the instruction used to replace the invoke expression or null if the
-  // invoke is for a void method.
+  // Returns the instruction to replace the invoke expression or null if the
+  // invoke is for a void method. Note that the caller is responsible for replacing
+  // and removing the invoke instruction.
   HInstruction* InlineInto(HGraph* outer_graph, HInvoke* invoke);
 
   // Need to add a couple of blocks to test if the loop body is entered and
@@ -1235,7 +1236,6 @@
   M(StoreLocal, Instruction)                                            \
   M(Sub, BinaryOperation)                                               \
   M(SuspendCheck, Instruction)                                          \
-  M(Temporary, Instruction)                                             \
   M(Throw, Instruction)                                                 \
   M(TryBoundary, Instruction)                                           \
   M(TypeConversion, Instruction)                                        \
@@ -3672,6 +3672,7 @@
   // method pointer; otherwise there may be one platform-specific special input,
   // such as PC-relative addressing base.
   uint32_t GetSpecialInputIndex() const { return GetNumberOfArguments(); }
+  bool HasSpecialInput() const { return GetNumberOfArguments() != InputCount(); }
 
   InvokeType GetOptimizedInvokeType() const { return optimized_invoke_type_; }
   void SetOptimizedInvokeType(InvokeType invoke_type) {
@@ -4941,33 +4942,6 @@
   DISALLOW_COPY_AND_ASSIGN(HBoundsCheck);
 };
 
-/**
- * Some DEX instructions are folded into multiple HInstructions that need
- * to stay live until the last HInstruction. This class
- * is used as a marker for the baseline compiler to ensure its preceding
- * HInstruction stays live. `index` represents the stack location index of the
- * instruction (the actual offset is computed as index * vreg_size).
- */
-class HTemporary : public HTemplateInstruction<0> {
- public:
-  explicit HTemporary(size_t index, uint32_t dex_pc = kNoDexPc)
-      : HTemplateInstruction(SideEffects::None(), dex_pc), index_(index) {}
-
-  size_t GetIndex() const { return index_; }
-
-  Primitive::Type GetType() const OVERRIDE {
-    // The previous instruction is the one that will be stored in the temporary location.
-    DCHECK(GetPrevious() != nullptr);
-    return GetPrevious()->GetType();
-  }
-
-  DECLARE_INSTRUCTION(Temporary);
-
- private:
-  const size_t index_;
-  DISALLOW_COPY_AND_ASSIGN(HTemporary);
-};
-
 class HSuspendCheck : public HTemplateInstruction<0> {
  public:
   explicit HSuspendCheck(uint32_t dex_pc)
@@ -5451,6 +5425,8 @@
   kArrayCheck             // No optimization yet when checking against a generic array.
 };
 
+std::ostream& operator<<(std::ostream& os, TypeCheckKind rhs);
+
 class HInstanceOf : public HExpression<2> {
  public:
   HInstanceOf(HInstruction* object,
@@ -6023,9 +5999,14 @@
 };
 
 inline int64_t Int64FromConstant(HConstant* constant) {
-  DCHECK(constant->IsIntConstant() || constant->IsLongConstant());
-  return constant->IsIntConstant() ? constant->AsIntConstant()->GetValue()
-                                   : constant->AsLongConstant()->GetValue();
+  if (constant->IsIntConstant()) {
+    return constant->AsIntConstant()->GetValue();
+  } else if (constant->IsLongConstant()) {
+    return constant->AsLongConstant()->GetValue();
+  } else {
+    DCHECK(constant->IsNullConstant());
+    return 0;
+  }
 }
 
 inline bool IsSameDexFile(const DexFile& lhs, const DexFile& rhs) {
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index dcd8e7d..12b748b 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -186,18 +186,10 @@
     // Validate the HGraph if running in debug mode.
     if (kIsDebugBuild) {
       if (!graph_in_bad_state_) {
-        if (graph_->IsInSsaForm()) {
-          SSAChecker checker(graph_);
-          checker.Run();
-          if (!checker.IsValid()) {
-            LOG(FATAL) << "Error after " << pass_name << ": " << Dumpable<SSAChecker>(checker);
-          }
-        } else {
-          GraphChecker checker(graph_);
-          checker.Run();
-          if (!checker.IsValid()) {
-            LOG(FATAL) << "Error after " << pass_name << ": " << Dumpable<GraphChecker>(checker);
-          }
+        GraphChecker checker(graph_);
+        checker.Run();
+        if (!checker.IsValid()) {
+          LOG(FATAL) << "Error after " << pass_name << ": " << Dumpable<GraphChecker>(checker);
         }
       }
     }
@@ -665,6 +657,7 @@
       && compiler_driver->RequiresConstructorBarrier(Thread::Current(),
                                                      dex_compilation_unit.GetDexFile(),
                                                      dex_compilation_unit.GetClassDefIndex());
+
   HGraph* graph = new (arena) HGraph(
       arena,
       dex_file,
@@ -675,6 +668,21 @@
       compiler_driver->GetCompilerOptions().GetDebuggable(),
       osr);
 
+  const uint8_t* interpreter_metadata = nullptr;
+  {
+    ScopedObjectAccess soa(Thread::Current());
+    StackHandleScope<1> hs(soa.Self());
+    Handle<mirror::ClassLoader> loader(hs.NewHandle(
+        soa.Decode<mirror::ClassLoader*>(class_loader)));
+    ArtMethod* art_method = compiler_driver->ResolveMethod(
+        soa, dex_cache, loader, &dex_compilation_unit, method_idx, invoke_type);
+    // We may not get a method, for example if its class is erroneous.
+    if (art_method != nullptr) {
+      graph->SetArtMethod(art_method);
+      interpreter_metadata = art_method->GetQuickenedInfo();
+    }
+  }
+
   std::unique_ptr<CodeGenerator> codegen(
       CodeGenerator::Create(graph,
                             instruction_set,
@@ -692,73 +700,54 @@
                              visualizer_output_.get(),
                              compiler_driver);
 
-  const uint8_t* interpreter_metadata = nullptr;
-  {
-    ScopedObjectAccess soa(Thread::Current());
-    StackHandleScope<1> hs(soa.Self());
-    Handle<mirror::ClassLoader> loader(hs.NewHandle(
-        soa.Decode<mirror::ClassLoader*>(class_loader)));
-    ArtMethod* art_method = compiler_driver->ResolveMethod(
-        soa, dex_cache, loader, &dex_compilation_unit, method_idx, invoke_type);
-    // We may not get a method, for example if its class is erroneous.
-    if (art_method != nullptr) {
-      graph->SetArtMethod(art_method);
-      interpreter_metadata = art_method->GetQuickenedInfo();
-    }
-  }
-  HGraphBuilder builder(graph,
-                        &dex_compilation_unit,
-                        &dex_compilation_unit,
-                        &dex_file,
-                        compiler_driver,
-                        compilation_stats_.get(),
-                        interpreter_metadata,
-                        dex_cache);
-
   VLOG(compiler) << "Building " << pass_observer.GetMethodName();
 
   {
-    PassScope scope(HGraphBuilder::kBuilderPassName, &pass_observer);
-    if (!builder.BuildGraph(*code_item)) {
-      pass_observer.SetGraphInBadState();
-      return nullptr;
-    }
-  }
+    ScopedObjectAccess soa(Thread::Current());
+    StackHandleScopeCollection handles(soa.Self());
+    // Do not hold `mutator_lock_` between optimizations.
+    ScopedThreadSuspension sts(soa.Self(), kNative);
 
-  VLOG(compiler) << "Optimizing " << pass_observer.GetMethodName();
-
-  ScopedObjectAccess soa(Thread::Current());
-  StackHandleScopeCollection handles(soa.Self());
-  ScopedThreadSuspension sts(soa.Self(), kNative);
-
-  {
-    PassScope scope(SsaBuilder::kSsaBuilderPassName, &pass_observer);
-    GraphAnalysisResult result = graph->TryBuildingSsa(&handles);
-    if (result != kAnalysisSuccess) {
-      switch (result) {
-        case kAnalysisFailThrowCatchLoop:
-          MaybeRecordStat(MethodCompilationStat::kNotCompiledThrowCatchLoop);
-          break;
-        case kAnalysisFailAmbiguousArrayOp:
-          MaybeRecordStat(MethodCompilationStat::kNotCompiledAmbiguousArrayOp);
-          break;
-        case kAnalysisSuccess:
-          UNREACHABLE();
+    {
+      PassScope scope(HGraphBuilder::kBuilderPassName, &pass_observer);
+      HGraphBuilder builder(graph,
+                            &dex_compilation_unit,
+                            &dex_compilation_unit,
+                            &dex_file,
+                            compiler_driver,
+                            compilation_stats_.get(),
+                            interpreter_metadata,
+                            dex_cache);
+      GraphAnalysisResult result = builder.BuildGraph(*code_item, &handles);
+      if (result != kAnalysisSuccess) {
+        switch (result) {
+          case kAnalysisInvalidBytecode:
+            break;
+          case kAnalysisFailThrowCatchLoop:
+            MaybeRecordStat(MethodCompilationStat::kNotCompiledThrowCatchLoop);
+            break;
+          case kAnalysisFailAmbiguousArrayOp:
+            MaybeRecordStat(MethodCompilationStat::kNotCompiledAmbiguousArrayOp);
+            break;
+          case kAnalysisSuccess:
+            UNREACHABLE();
+        }
+        pass_observer.SetGraphInBadState();
+        return nullptr;
       }
-      pass_observer.SetGraphInBadState();
-      return nullptr;
     }
-  }
 
-  RunOptimizations(graph,
-                   codegen.get(),
-                   compiler_driver,
-                   compilation_stats_.get(),
-                   dex_compilation_unit,
-                   &pass_observer,
-                   &handles);
-  codegen->Compile(code_allocator);
-  pass_observer.DumpDisassembly();
+    RunOptimizations(graph,
+                     codegen.get(),
+                     compiler_driver,
+                     compilation_stats_.get(),
+                     dex_compilation_unit,
+                     &pass_observer,
+                     &handles);
+
+    codegen->Compile(code_allocator);
+    pass_observer.DumpDisassembly();
+  }
 
   if (kArenaAllocatorCountAllocations) {
     if (arena->BytesAllocated() > 4 * MB) {
diff --git a/compiler/optimizing/optimizing_unit_test.h b/compiler/optimizing/optimizing_unit_test.h
index 5a91043..0c7648e 100644
--- a/compiler/optimizing/optimizing_unit_test.h
+++ b/compiler/optimizing/optimizing_unit_test.h
@@ -64,10 +64,12 @@
 
 void RemoveSuspendChecks(HGraph* graph) {
   for (HBasicBlock* block : graph->GetBlocks()) {
-    for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
-      HInstruction* current = it.Current();
-      if (current->IsSuspendCheck()) {
-        current->GetBlock()->RemoveInstruction(current);
+    if (block != nullptr) {
+      for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+        HInstruction* current = it.Current();
+        if (current->IsSuspendCheck()) {
+          current->GetBlock()->RemoveInstruction(current);
+        }
       }
     }
   }
@@ -83,12 +85,17 @@
 inline HGraph* CreateCFG(ArenaAllocator* allocator,
                          const uint16_t* data,
                          Primitive::Type return_type = Primitive::kPrimInt) {
-  HGraph* graph = CreateGraph(allocator);
-  HGraphBuilder builder(graph, return_type);
   const DexFile::CodeItem* item =
     reinterpret_cast<const DexFile::CodeItem*>(data);
-  bool graph_built = builder.BuildGraph(*item);
-  return graph_built ? graph : nullptr;
+  HGraph* graph = CreateGraph(allocator);
+
+  {
+    ScopedObjectAccess soa(Thread::Current());
+    StackHandleScopeCollection handles(soa.Self());
+    HGraphBuilder builder(graph, return_type);
+    bool graph_built = (builder.BuildGraph(*item, &handles) == kAnalysisSuccess);
+    return graph_built ? graph : nullptr;
+  }
 }
 
 // Naive string diff data type.
@@ -114,12 +121,6 @@
   return instruction->GetBlock() == nullptr;
 }
 
-inline void TransformToSsa(HGraph* graph) {
-  ScopedObjectAccess soa(Thread::Current());
-  StackHandleScopeCollection handles(soa.Self());
-  EXPECT_EQ(graph->TryBuildingSsa(&handles), kAnalysisSuccess);
-}
-
 }  // namespace art
 
 #endif  // ART_COMPILER_OPTIMIZING_OPTIMIZING_UNIT_TEST_H_
diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc
index 324d84f..0ad104e 100644
--- a/compiler/optimizing/prepare_for_register_allocation.cc
+++ b/compiler/optimizing/prepare_for_register_allocation.cc
@@ -138,15 +138,7 @@
   }
 
   if (user->IsSelect() && user->AsSelect()->GetCondition() == condition) {
-    if (GetGraph()->GetInstructionSet() == kX86) {
-      // Long values and long condition inputs result in 8 required core registers.
-      // We don't have that many on x86. Materialize the condition in such case.
-      return user->GetType() != Primitive::kPrimLong ||
-             condition->InputAt(1)->GetType() != Primitive::kPrimLong ||
-             condition->InputAt(1)->IsConstant();
-    } else {
-      return true;
-    }
+    return true;
   }
 
   return false;
diff --git a/compiler/optimizing/pretty_printer_test.cc b/compiler/optimizing/pretty_printer_test.cc
index c56100d..2de0c1b 100644
--- a/compiler/optimizing/pretty_printer_test.cc
+++ b/compiler/optimizing/pretty_printer_test.cc
@@ -30,17 +30,15 @@
 static void TestCode(const uint16_t* data, const char* expected) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
-  HGraph* graph = CreateGraph(&allocator);
-  HGraphBuilder builder(graph);
-  const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
-  bool graph_built = builder.BuildGraph(*item);
-  ASSERT_TRUE(graph_built);
+  HGraph* graph = CreateCFG(&allocator, data);
   StringPrettyPrinter printer(graph);
   printer.VisitInsertionOrder();
   ASSERT_STREQ(expected, printer.str().c_str());
 }
 
-TEST(PrettyPrinterTest, ReturnVoid) {
+class PrettyPrinterTest : public CommonCompilerTest {};
+
+TEST_F(PrettyPrinterTest, ReturnVoid) {
   const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(
       Instruction::RETURN_VOID);
 
@@ -56,7 +54,7 @@
   TestCode(data, expected);
 }
 
-TEST(PrettyPrinterTest, CFG1) {
+TEST_F(PrettyPrinterTest, CFG1) {
   const char* expected =
     "BasicBlock 0, succ: 1\n"
     "  3: SuspendCheck\n"
@@ -76,7 +74,7 @@
   TestCode(data, expected);
 }
 
-TEST(PrettyPrinterTest, CFG2) {
+TEST_F(PrettyPrinterTest, CFG2) {
   const char* expected =
     "BasicBlock 0, succ: 1\n"
     "  4: SuspendCheck\n"
@@ -98,7 +96,7 @@
   TestCode(data, expected);
 }
 
-TEST(PrettyPrinterTest, CFG3) {
+TEST_F(PrettyPrinterTest, CFG3) {
   const char* expected =
     "BasicBlock 0, succ: 1\n"
     "  4: SuspendCheck\n"
@@ -134,16 +132,16 @@
   TestCode(data3, expected);
 }
 
-TEST(PrettyPrinterTest, CFG4) {
+TEST_F(PrettyPrinterTest, CFG4) {
   const char* expected =
-    "BasicBlock 0, succ: 1\n"
+    "BasicBlock 0, succ: 3\n"
     "  3: SuspendCheck\n"
-    "  4: Goto 1\n"
-    "BasicBlock 1, pred: 0, 1, succ: 1\n"
+    "  4: Goto 3\n"
+    "BasicBlock 1, pred: 3, 1, succ: 1\n"
     "  0: SuspendCheck\n"
     "  1: Goto 1\n"
-    "BasicBlock 2\n"
-    "  2: Exit\n";
+    "BasicBlock 3, pred: 0, succ: 1\n"
+    "  5: Goto 1\n";
 
   const uint16_t data1[] = ZERO_REGISTER_CODE_ITEM(
     Instruction::NOP,
@@ -157,15 +155,13 @@
   TestCode(data2, expected);
 }
 
-TEST(PrettyPrinterTest, CFG5) {
+TEST_F(PrettyPrinterTest, CFG5) {
   const char* expected =
     "BasicBlock 0, succ: 1\n"
     "  3: SuspendCheck\n"
     "  4: Goto 1\n"
-    "BasicBlock 1, pred: 0, 2, succ: 3\n"
+    "BasicBlock 1, pred: 0, succ: 3\n"
     "  0: ReturnVoid\n"
-    "BasicBlock 2, succ: 1\n"
-    "  1: Goto 1\n"
     "BasicBlock 3, pred: 1\n"
     "  2: Exit\n";
 
@@ -177,25 +173,23 @@
   TestCode(data, expected);
 }
 
-TEST(PrettyPrinterTest, CFG6) {
+TEST_F(PrettyPrinterTest, CFG6) {
   const char* expected =
     "BasicBlock 0, succ: 1\n"
-    "  0: Local [4, 3, 2]\n"
-    "  1: IntConstant [2]\n"
+    "  1: IntConstant [5, 5]\n"
     "  10: SuspendCheck\n"
     "  11: Goto 1\n"
-    "BasicBlock 1, pred: 0, succ: 3, 2\n"
-    "  2: StoreLocal(0, 1)\n"
-    "  3: LoadLocal(0) [5]\n"
-    "  4: LoadLocal(0) [5]\n"
-    "  5: Equal(3, 4) [6]\n"
+    "BasicBlock 1, pred: 0, succ: 5, 2\n"
+    "  5: Equal(1, 1) [6]\n"
     "  6: If(5)\n"
     "BasicBlock 2, pred: 1, succ: 3\n"
     "  7: Goto 3\n"
-    "BasicBlock 3, pred: 1, 2, succ: 4\n"
+    "BasicBlock 3, pred: 5, 2, succ: 4\n"
     "  8: ReturnVoid\n"
     "BasicBlock 4, pred: 3\n"
-    "  9: Exit\n";
+    "  9: Exit\n"
+    "BasicBlock 5, pred: 1, succ: 3\n"
+    "  12: Goto 3\n";
 
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
@@ -206,26 +200,24 @@
   TestCode(data, expected);
 }
 
-TEST(PrettyPrinterTest, CFG7) {
+TEST_F(PrettyPrinterTest, CFG7) {
   const char* expected =
     "BasicBlock 0, succ: 1\n"
-    "  0: Local [4, 3, 2]\n"
-    "  1: IntConstant [2]\n"
+    "  1: IntConstant [5, 5]\n"
     "  11: SuspendCheck\n"
     "  12: Goto 1\n"
-    "BasicBlock 1, pred: 0, succ: 3, 2\n"
-    "  2: StoreLocal(0, 1)\n"
-    "  3: LoadLocal(0) [5]\n"
-    "  4: LoadLocal(0) [5]\n"
-    "  5: Equal(3, 4) [6]\n"
+    "BasicBlock 1, pred: 0, succ: 5, 6\n"
+    "  5: Equal(1, 1) [6]\n"
     "  6: If(5)\n"
-    "BasicBlock 2, pred: 1, 3, succ: 3\n"
+    "BasicBlock 2, pred: 6, 3, succ: 3\n"
     "  7: Goto 3\n"
-    "BasicBlock 3, pred: 1, 2, succ: 2\n"
+    "BasicBlock 3, pred: 5, 2, succ: 2\n"
     "  8: SuspendCheck\n"
     "  9: Goto 2\n"
-    "BasicBlock 4\n"
-    "  10: Exit\n";
+    "BasicBlock 5, pred: 1, succ: 3\n"
+    "  13: Goto 3\n"
+    "BasicBlock 6, pred: 1, succ: 2\n"
+    "  14: Goto 2\n";
 
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
@@ -236,15 +228,13 @@
   TestCode(data, expected);
 }
 
-TEST(PrettyPrinterTest, IntConstant) {
+TEST_F(PrettyPrinterTest, IntConstant) {
   const char* expected =
     "BasicBlock 0, succ: 1\n"
-    "  0: Local [2]\n"
-    "  1: IntConstant [2]\n"
+    "  1: IntConstant\n"
     "  5: SuspendCheck\n"
     "  6: Goto 1\n"
     "BasicBlock 1, pred: 0, succ: 2\n"
-    "  2: StoreLocal(0, 1)\n"
     "  3: ReturnVoid\n"
     "BasicBlock 2, pred: 1\n"
     "  4: Exit\n";
diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc
index 1224a48..deaa415 100644
--- a/compiler/optimizing/reference_type_propagation.cc
+++ b/compiler/optimizing/reference_type_propagation.cc
@@ -55,10 +55,12 @@
  public:
   RTPVisitor(HGraph* graph,
              HandleCache* handle_cache,
-             ArenaVector<HInstruction*>* worklist)
+             ArenaVector<HInstruction*>* worklist,
+             bool is_first_run)
     : HGraphDelegateVisitor(graph),
       handle_cache_(handle_cache),
-      worklist_(worklist) {}
+      worklist_(worklist),
+      is_first_run_(is_first_run) {}
 
   void VisitNewInstance(HNewInstance* new_instance) OVERRIDE;
   void VisitLoadClass(HLoadClass* load_class) OVERRIDE;
@@ -86,14 +88,17 @@
  private:
   HandleCache* handle_cache_;
   ArenaVector<HInstruction*>* worklist_;
+  const bool is_first_run_;
 };
 
 ReferenceTypePropagation::ReferenceTypePropagation(HGraph* graph,
                                                    StackHandleScopeCollection* handles,
+                                                   bool is_first_run,
                                                    const char* name)
     : HOptimization(graph, name),
       handle_cache_(handles),
-      worklist_(graph->GetArena()->Adapter(kArenaAllocReferenceTypePropagation)) {
+      worklist_(graph->GetArena()->Adapter(kArenaAllocReferenceTypePropagation)),
+      is_first_run_(is_first_run) {
 }
 
 void ReferenceTypePropagation::ValidateTypes() {
@@ -125,7 +130,7 @@
 }
 
 void ReferenceTypePropagation::Visit(HInstruction* instruction) {
-  RTPVisitor visitor(graph_, &handle_cache_, &worklist_);
+  RTPVisitor visitor(graph_, &handle_cache_, &worklist_, is_first_run_);
   instruction->Accept(&visitor);
 }
 
@@ -144,7 +149,7 @@
 }
 
 void ReferenceTypePropagation::VisitBasicBlock(HBasicBlock* block) {
-  RTPVisitor visitor(graph_, &handle_cache_, &worklist_);
+  RTPVisitor visitor(graph_, &handle_cache_, &worklist_, is_first_run_);
   // Handle Phis first as there might be instructions in the same block who depend on them.
   for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
     VisitPhi(it.Current()->AsPhi());
@@ -620,6 +625,7 @@
   DCHECK_EQ(bound_type->InputAt(0), check_cast->InputAt(0));
 
   if (class_rti.IsValid()) {
+    DCHECK(is_first_run_);
     // This is the first run of RTP and class is resolved.
     bound_type->SetUpperBound(class_rti, /* CheckCast succeeds for nulls. */ true);
   } else {
@@ -636,6 +642,12 @@
   }
 
   if (phi->GetBlock()->IsLoopHeader()) {
+    if (!is_first_run_ && graph_->IsCompilingOsr()) {
+      // Don't update the type of a loop phi when compiling OSR: we may have done
+      // speculative optimizations dominating that phi, that do not hold at the
+      // point the interpreter jumps to that loop header.
+      return;
+    }
     ScopedObjectAccess soa(Thread::Current());
     // Set the initial type for the phi. Use the non back edge input for reaching
     // a fixed point faster.
diff --git a/compiler/optimizing/reference_type_propagation.h b/compiler/optimizing/reference_type_propagation.h
index a7f10a6..028a6fc 100644
--- a/compiler/optimizing/reference_type_propagation.h
+++ b/compiler/optimizing/reference_type_propagation.h
@@ -33,6 +33,7 @@
  public:
   ReferenceTypePropagation(HGraph* graph,
                            StackHandleScopeCollection* handles,
+                           bool is_first_run,
                            const char* name = kReferenceTypePropagationPassName);
 
   // Visit a single instruction.
@@ -93,6 +94,8 @@
 
   ArenaVector<HInstruction*> worklist_;
 
+  // Whether this reference type propagation is the first run we are doing.
+  const bool is_first_run_;
 
   static constexpr size_t kDefaultWorklistSize = 8;
 
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index 5cd30ad..b8d76b9 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -994,10 +994,6 @@
     return false;
   }
 
-  // We use the first use to compare with other intervals. If this interval
-  // is used after any active intervals, we will spill this interval.
-  size_t first_use = current->FirstUseAfter(current->GetStart());
-
   // First set all registers as not being used.
   size_t* next_use = registers_array_;
   for (size_t i = 0; i < number_of_registers_; ++i) {
@@ -1011,7 +1007,7 @@
     if (active->IsFixed()) {
       next_use[active->GetRegister()] = current->GetStart();
     } else {
-      size_t use = active->FirstUseAfter(current->GetStart());
+      size_t use = active->FirstRegisterUseAfter(current->GetStart());
       if (use != kNoLifetime) {
         next_use[active->GetRegister()] = use;
       }
@@ -1052,16 +1048,16 @@
     DCHECK(current->IsHighInterval());
     reg = current->GetRegister();
     // When allocating the low part, we made sure the high register was available.
-    DCHECK_LT(first_use, next_use[reg]);
+    DCHECK_LT(first_register_use, next_use[reg]);
   } else if (current->IsLowInterval()) {
-    reg = FindAvailableRegisterPair(next_use, first_use);
+    reg = FindAvailableRegisterPair(next_use, first_register_use);
     // We should spill if both registers are not available.
-    should_spill = (first_use >= next_use[reg])
-      || (first_use >= next_use[GetHighForLowRegister(reg)]);
+    should_spill = (first_register_use >= next_use[reg])
+      || (first_register_use >= next_use[GetHighForLowRegister(reg)]);
   } else {
     DCHECK(!current->IsHighInterval());
     reg = FindAvailableRegister(next_use, current);
-    should_spill = (first_use >= next_use[reg]);
+    should_spill = (first_register_use >= next_use[reg]);
   }
 
   DCHECK_NE(reg, kNoRegister);
diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc
index 572faa8..a9de7c3 100644
--- a/compiler/optimizing/register_allocator_test.cc
+++ b/compiler/optimizing/register_allocator_test.cc
@@ -38,11 +38,7 @@
 static bool Check(const uint16_t* data) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
-  HGraph* graph = CreateGraph(&allocator);
-  HGraphBuilder builder(graph);
-  const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
-  builder.BuildGraph(*item);
-  TransformToSsa(graph);
+  HGraph* graph = CreateCFG(&allocator, data);
   std::unique_ptr<const X86InstructionSetFeatures> features_x86(
       X86InstructionSetFeatures::FromCppDefines());
   x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
@@ -254,15 +250,6 @@
   ASSERT_TRUE(Check(data));
 }
 
-static HGraph* BuildSSAGraph(const uint16_t* data, ArenaAllocator* allocator) {
-  HGraph* graph = CreateGraph(allocator);
-  HGraphBuilder builder(graph);
-  const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
-  builder.BuildGraph(*item);
-  TransformToSsa(graph);
-  return graph;
-}
-
 TEST_F(RegisterAllocatorTest, Loop3) {
   /*
    * Test the following snippet:
@@ -302,7 +289,7 @@
 
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
-  HGraph* graph = BuildSSAGraph(data, &allocator);
+  HGraph* graph = CreateCFG(&allocator, data);
   std::unique_ptr<const X86InstructionSetFeatures> features_x86(
       X86InstructionSetFeatures::FromCppDefines());
   x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
@@ -336,7 +323,7 @@
 
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
-  HGraph* graph = BuildSSAGraph(data, &allocator);
+  HGraph* graph = CreateCFG(&allocator, data);
   std::unique_ptr<const X86InstructionSetFeatures> features_x86(
       X86InstructionSetFeatures::FromCppDefines());
   x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
@@ -390,7 +377,7 @@
 
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
-  HGraph* graph = BuildSSAGraph(data, &allocator);
+  HGraph* graph = CreateCFG(&allocator, data);
   SsaDeadPhiElimination(graph).Run();
   std::unique_ptr<const X86InstructionSetFeatures> features_x86(
       X86InstructionSetFeatures::FromCppDefines());
@@ -414,7 +401,7 @@
 
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
-  HGraph* graph = BuildSSAGraph(data, &allocator);
+  HGraph* graph = CreateCFG(&allocator, data);
   SsaDeadPhiElimination(graph).Run();
   std::unique_ptr<const X86InstructionSetFeatures> features_x86(
       X86InstructionSetFeatures::FromCppDefines());
diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc
index 165d09d..43f2499 100644
--- a/compiler/optimizing/ssa_builder.cc
+++ b/compiler/optimizing/ssa_builder.cc
@@ -430,8 +430,6 @@
   }
 
   for (HNewInstance* new_instance : uninitialized_strings_) {
-    DCHECK(new_instance->IsStringAlloc());
-
     // Replace NewInstance of String with NullConstant if not used prior to
     // calling StringFactory. In case of deoptimization, the interpreter is
     // expected to skip null check on the `this` argument of the StringFactory call.
@@ -440,10 +438,26 @@
       new_instance->GetBlock()->RemoveInstruction(new_instance);
 
       // Remove LoadClass if not needed any more.
-      HLoadClass* load_class = new_instance->InputAt(0)->AsLoadClass();
+      HInstruction* input = new_instance->InputAt(0);
+      HLoadClass* load_class = nullptr;
+
+      // If the class was not present in the dex cache at the point of building
+      // the graph, the builder inserted a HClinitCheck in between. Since the String
+      // class is always initialized at the point of running Java code, we can remove
+      // that check.
+      if (input->IsClinitCheck()) {
+        load_class = input->InputAt(0)->AsLoadClass();
+        input->ReplaceWith(load_class);
+        input->GetBlock()->RemoveInstruction(input);
+      } else {
+        load_class = input->AsLoadClass();
+        DCHECK(new_instance->IsStringAlloc());
+        DCHECK(!load_class->NeedsAccessCheck()) << "String class is always accessible";
+      }
       DCHECK(load_class != nullptr);
-      DCHECK(!load_class->NeedsAccessCheck()) << "String class is always accessible";
       if (!load_class->HasUses()) {
+        // Even if the HLoadClass needs access check, we can remove it, as we know the
+        // String class does not need it.
         load_class->GetBlock()->RemoveInstruction(load_class);
       }
     }
@@ -451,6 +465,8 @@
 }
 
 GraphAnalysisResult SsaBuilder::BuildSsa() {
+  DCHECK(!GetGraph()->IsInSsaForm());
+
   // 1) Visit in reverse post order. We need to have all predecessors of a block
   // visited (with the exception of loops) in order to create the right environment
   // for that block. For loops, we create phis whose inputs will be set in 2).
@@ -483,7 +499,7 @@
 
   // 6) Compute type of reference type instructions. The pass assumes that
   // NullConstant has been fixed up.
-  ReferenceTypePropagation(GetGraph(), handles_).Run();
+  ReferenceTypePropagation(GetGraph(), handles_, /* is_first_run */ true).Run();
 
   // 7) Step 1) duplicated ArrayGet instructions with ambiguous type (int/float
   // or long/double) and marked ArraySets with ambiguous input type. Now that RTP
@@ -533,6 +549,7 @@
     }
   }
 
+  GetGraph()->SetInSsaForm();
   return kAnalysisSuccess;
 }
 
@@ -899,11 +916,6 @@
   }
 }
 
-void SsaBuilder::VisitTemporary(HTemporary* temp) {
-  // Temporaries are only used by the baseline register allocator.
-  temp->GetBlock()->RemoveInstruction(temp);
-}
-
 void SsaBuilder::VisitArrayGet(HArrayGet* aget) {
   Primitive::Type type = aget->GetType();
   DCHECK(!Primitive::IsFloatingPointType(type));
diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h
index ccef8ea..2dae9c2 100644
--- a/compiler/optimizing/ssa_builder.h
+++ b/compiler/optimizing/ssa_builder.h
@@ -75,13 +75,10 @@
   void VisitLoadLocal(HLoadLocal* load) OVERRIDE;
   void VisitStoreLocal(HStoreLocal* store) OVERRIDE;
   void VisitInstruction(HInstruction* instruction) OVERRIDE;
-  void VisitTemporary(HTemporary* instruction) OVERRIDE;
   void VisitArrayGet(HArrayGet* aget) OVERRIDE;
   void VisitArraySet(HArraySet* aset) OVERRIDE;
   void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE;
 
-  static constexpr const char* kSsaBuilderPassName = "ssa_builder";
-
  private:
   void SetLoopHeaderPhiInputs();
   void FixEnvironmentPhis();
diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc
index 1dd3508..83e9dac 100644
--- a/compiler/optimizing/ssa_liveness_analysis.cc
+++ b/compiler/optimizing/ssa_liveness_analysis.cc
@@ -226,7 +226,7 @@
         // The only instructions which may not be recorded in the environments
         // are constants created by the SSA builder as typed equivalents of
         // untyped constants from the bytecode, or phis with only such constants
-        // as inputs (verified by SSAChecker). Their raw binary value must
+        // as inputs (verified by GraphChecker). Their raw binary value must
         // therefore be the same and we only need to keep alive one.
       } else {
         size_t phi_input_index = successor->GetPredecessorIndexOf(block);
diff --git a/compiler/optimizing/ssa_test.cc b/compiler/optimizing/ssa_test.cc
index d2885a8..a688092 100644
--- a/compiler/optimizing/ssa_test.cc
+++ b/compiler/optimizing/ssa_test.cc
@@ -79,13 +79,7 @@
 static void TestCode(const uint16_t* data, const char* expected) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
-  HGraph* graph = CreateGraph(&allocator);
-  HGraphBuilder builder(graph);
-  const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
-  bool graph_built = builder.BuildGraph(*item);
-  ASSERT_TRUE(graph_built);
-
-  TransformToSsa(graph);
+  HGraph* graph = CreateCFG(&allocator, data);
   // Suspend checks implementation may change in the future, and this test relies
   // on how instructions are ordered.
   RemoveSuspendChecks(graph);
diff --git a/compiler/optimizing/suspend_check_test.cc b/compiler/optimizing/suspend_check_test.cc
index b6c704c..15cd4e8 100644
--- a/compiler/optimizing/suspend_check_test.cc
+++ b/compiler/optimizing/suspend_check_test.cc
@@ -18,6 +18,7 @@
 #include "dex_instruction.h"
 #include "nodes.h"
 #include "optimizing_unit_test.h"
+#include "pretty_printer.h"
 
 #include "gtest/gtest.h"
 
@@ -30,20 +31,17 @@
 static void TestCode(const uint16_t* data) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
-  HGraph* graph = CreateGraph(&allocator);
-  HGraphBuilder builder(graph);
-  const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
-  bool graph_built = builder.BuildGraph(*item);
-  ASSERT_TRUE(graph_built);
-
-  HBasicBlock* first_block = graph->GetEntryBlock()->GetSuccessors()[0];
-  HInstruction* first_instruction = first_block->GetFirstInstruction();
-  // Account for some tests having a store local as first instruction.
-  ASSERT_TRUE(first_instruction->IsSuspendCheck()
-              || first_instruction->GetNext()->IsSuspendCheck());
+  HGraph* graph = CreateCFG(&allocator, data);
+  HBasicBlock* first_block = graph->GetEntryBlock()->GetSingleSuccessor();
+  HBasicBlock* loop_header = first_block->GetSingleSuccessor();
+  ASSERT_TRUE(loop_header->IsLoopHeader());
+  ASSERT_EQ(loop_header->GetLoopInformation()->GetPreHeader(), first_block);
+  ASSERT_TRUE(loop_header->GetFirstInstruction()->IsSuspendCheck());
 }
 
-TEST(CodegenTest, CFG1) {
+class SuspendCheckTest : public CommonCompilerTest {};
+
+TEST_F(SuspendCheckTest, CFG1) {
   const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(
     Instruction::NOP,
     Instruction::GOTO | 0xFF00);
@@ -51,14 +49,14 @@
   TestCode(data);
 }
 
-TEST(CodegenTest, CFG2) {
+TEST_F(SuspendCheckTest, CFG2) {
   const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(
     Instruction::GOTO_32, 0, 0);
 
   TestCode(data);
 }
 
-TEST(CodegenTest, CFG3) {
+TEST_F(SuspendCheckTest, CFG3) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::IF_EQ, 0xFFFF,
@@ -67,7 +65,7 @@
   TestCode(data);
 }
 
-TEST(CodegenTest, CFG4) {
+TEST_F(SuspendCheckTest, CFG4) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::IF_NE, 0xFFFF,
@@ -76,7 +74,7 @@
   TestCode(data);
 }
 
-TEST(CodegenTest, CFG5) {
+TEST_F(SuspendCheckTest, CFG5) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::IF_EQZ, 0xFFFF,
@@ -85,7 +83,7 @@
   TestCode(data);
 }
 
-TEST(CodegenTest, CFG6) {
+TEST_F(SuspendCheckTest, CFG6) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::IF_NEZ, 0xFFFF,
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index 7138a46..3efef70 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -326,6 +326,14 @@
 }
 
 
+void X86Assembler::cmovl(Condition condition, Register dst, const Address& src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x0F);
+  EmitUint8(0x40 + condition);
+  EmitOperand(dst, src);
+}
+
+
 void X86Assembler::setb(Condition condition, Register dst) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x0F);
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index 759a41e..00ff7bd 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -363,6 +363,7 @@
   void leal(Register dst, const Address& src);
 
   void cmovl(Condition condition, Register dst, Register src);
+  void cmovl(Condition condition, Register dst, const Address& src);
 
   void setb(Condition condition, Register dst);
 
diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc
index 0fd0982..d0d5147 100644
--- a/compiler/utils/x86/assembler_x86_test.cc
+++ b/compiler/utils/x86/assembler_x86_test.cc
@@ -332,6 +332,21 @@
 }
 
 
+TEST_F(AssemblerX86Test, CmovlAddress) {
+  GetAssembler()->cmovl(x86::kEqual, x86::Register(x86::EAX), x86::Address(
+      x86::Register(x86::EDI), x86::Register(x86::EBX), x86::TIMES_4, 12));
+  GetAssembler()->cmovl(x86::kNotEqual, x86::Register(x86::EDI), x86::Address(
+      x86::Register(x86::ESI), x86::Register(x86::EBX), x86::TIMES_4, 12));
+  GetAssembler()->cmovl(x86::kEqual, x86::Register(x86::EDI), x86::Address(
+      x86::Register(x86::EDI), x86::Register(x86::EAX), x86::TIMES_4, 12));
+  const char* expected =
+    "cmovzl 0xc(%EDI,%EBX,4), %eax\n"
+    "cmovnzl 0xc(%ESI,%EBX,4), %edi\n"
+    "cmovzl 0xc(%EDI,%EAX,4), %edi\n";
+
+  DriverStr(expected, "cmovl_address");
+}
+
 /////////////////
 // Near labels //
 /////////////////
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index 10f5a00..d86ad1b 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -223,6 +223,19 @@
 }
 
 
+void X86_64Assembler::cmov(Condition c, CpuRegister dst, const Address& src, bool is64bit) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  if (is64bit) {
+    EmitRex64(dst, src);
+  } else {
+    EmitOptionalRex32(dst, src);
+  }
+  EmitUint8(0x0F);
+  EmitUint8(0x40 + c);
+  EmitOperand(dst.LowBits(), src);
+}
+
+
 void X86_64Assembler::movzxb(CpuRegister dst, CpuRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitOptionalByteRegNormalizingRex32(dst, src);
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 6f0847e..f00cb12 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -366,6 +366,7 @@
 
   void cmov(Condition c, CpuRegister dst, CpuRegister src);  // This is the 64b version.
   void cmov(Condition c, CpuRegister dst, CpuRegister src, bool is64bit);
+  void cmov(Condition c, CpuRegister dst, const Address& src, bool is64bit);
 
   void movzxb(CpuRegister dst, CpuRegister src);
   void movzxb(CpuRegister dst, const Address& src);
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index 8a87fca..4f65709 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -1371,6 +1371,37 @@
   DriverStr(expected, "popcntq_address");
 }
 
+TEST_F(AssemblerX86_64Test, CmovlAddress) {
+  GetAssembler()->cmov(x86_64::kEqual, x86_64::CpuRegister(x86_64::R10), x86_64::Address(
+      x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12), false);
+  GetAssembler()->cmov(x86_64::kNotEqual, x86_64::CpuRegister(x86_64::RDI), x86_64::Address(
+      x86_64::CpuRegister(x86_64::R10), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12), false);
+  GetAssembler()->cmov(x86_64::kEqual, x86_64::CpuRegister(x86_64::RDI), x86_64::Address(
+      x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12), false);
+  const char* expected =
+    "cmovzl 0xc(%RDI,%RBX,4), %R10d\n"
+    "cmovnzl 0xc(%R10,%RBX,4), %edi\n"
+    "cmovzl 0xc(%RDI,%R9,4), %edi\n";
+
+  DriverStr(expected, "cmovl_address");
+}
+
+TEST_F(AssemblerX86_64Test, CmovqAddress) {
+  GetAssembler()->cmov(x86_64::kEqual, x86_64::CpuRegister(x86_64::R10), x86_64::Address(
+      x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12), true);
+  GetAssembler()->cmov(x86_64::kNotEqual, x86_64::CpuRegister(x86_64::RDI), x86_64::Address(
+      x86_64::CpuRegister(x86_64::R10), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12), true);
+  GetAssembler()->cmov(x86_64::kEqual, x86_64::CpuRegister(x86_64::RDI), x86_64::Address(
+      x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12), true);
+  const char* expected =
+    "cmovzq 0xc(%RDI,%RBX,4), %R10\n"
+    "cmovnzq 0xc(%R10,%RBX,4), %rdi\n"
+    "cmovzq 0xc(%RDI,%R9,4), %rdi\n";
+
+  DriverStr(expected, "cmovq_address");
+}
+
+
 /////////////////
 // Near labels //
 /////////////////
diff --git a/patchoat/patchoat.cc b/patchoat/patchoat.cc
index 1668dc5..1d80bda 100644
--- a/patchoat/patchoat.cc
+++ b/patchoat/patchoat.cc
@@ -150,30 +150,17 @@
   }
 }
 
-bool PatchOat::Patch(File* input_oat, const std::string& image_location, off_t delta,
-                     File* output_oat, File* output_image, InstructionSet isa,
-                     TimingLogger* timings,
-                     bool output_oat_opened_from_fd ATTRIBUTE_UNUSED,
-                     bool new_oat_out) {
+bool PatchOat::Patch(const std::string& image_location,
+                     off_t delta,
+                     const std::string& output_directory,
+                     InstructionSet isa,
+                     TimingLogger* timings) {
   CHECK(Runtime::Current() == nullptr);
-  CHECK(output_image != nullptr);
-  CHECK_GE(output_image->Fd(), 0);
-  CHECK(input_oat != nullptr);
-  CHECK(output_oat != nullptr);
-  CHECK_GE(input_oat->Fd(), 0);
-  CHECK_GE(output_oat->Fd(), 0);
   CHECK(!image_location.empty()) << "image file must have a filename.";
 
   TimingLogger::ScopedTiming t("Runtime Setup", timings);
 
-  if (isa == kNone) {
-    Elf32_Ehdr elf_hdr;
-    if (sizeof(elf_hdr) != input_oat->Read(reinterpret_cast<char*>(&elf_hdr), sizeof(elf_hdr), 0)) {
-      LOG(ERROR) << "unable to read elf header";
-      return false;
-    }
-    isa = GetInstructionSetFromELF(elf_hdr.e_machine, elf_hdr.e_flags);
-  }
+  CHECK_NE(isa, kNone);
   const char* isa_name = GetInstructionSetString(isa);
 
   // Set up the runtime
@@ -193,8 +180,6 @@
   Thread::Current()->TransitionFromRunnableToSuspended(kNative);
   ScopedObjectAccess soa(Thread::Current());
 
-  std::string output_directory =
-      output_image->GetPath().substr(0, output_image->GetPath().find_last_of("/"));
   t.NewTiming("Image and oat Patching setup");
   std::vector<gc::space::ImageSpace*> spaces = Runtime::Current()->GetHeap()->GetBootImageSpaces();
   std::map<gc::space::ImageSpace*, std::unique_ptr<File>> space_to_file_map;
@@ -325,6 +310,7 @@
     std::string output_image_filename = output_directory +
                                         (StartsWith(converted_image_filename, "/") ? "" : "/") +
                                         converted_image_filename;
+    bool new_oat_out;
     std::unique_ptr<File>
         output_image_file(CreateOrOpen(output_image_filename.c_str(), &new_oat_out));
     if (output_image_file.get() == nullptr) {
@@ -932,21 +918,9 @@
   UsageError("  --output-image-file=<file.art>: Specifies the exact file to write the patched");
   UsageError("      image file to.");
   UsageError("");
-  UsageError("  --output-image-fd=<file-descriptor>: Specifies the file-descriptor to write the");
-  UsageError("      the patched image file to.");
-  UsageError("");
-  UsageError("  --orig-base-offset=<original-base-offset>: Specify the base offset the input file");
-  UsageError("      was compiled with. This is needed if one is specifying a --base-offset");
-  UsageError("");
-  UsageError("  --base-offset=<new-base-offset>: Specify the base offset we will repatch the");
-  UsageError("      given files to use. This requires that --orig-base-offset is also given.");
-  UsageError("");
   UsageError("  --base-offset-delta=<delta>: Specify the amount to change the old base-offset by.");
   UsageError("      This value may be negative.");
   UsageError("");
-  UsageError("  --patched-image-file=<file.art>: Relocate the oat file to be the same as the");
-  UsageError("      given image file.");
-  UsageError("");
   UsageError("  --patched-image-location=<file.art>: Relocate the oat file to be the same as the");
   UsageError("      image at the given location. If used one must also specify the");
   UsageError("      --instruction-set flag. It will search for this image in the same way that");
@@ -992,6 +966,244 @@
   return true;
 }
 
+static int patchoat_image(TimingLogger& timings,
+                          InstructionSet isa,
+                          const std::string& input_image_location,
+                          const std::string& output_image_filename,
+                          off_t base_delta,
+                          bool base_delta_set,
+                          bool debug) {
+  CHECK(!input_image_location.empty());
+  if (output_image_filename.empty()) {
+    Usage("Image patching requires --output-image-file");
+  }
+
+  if (!base_delta_set) {
+    Usage("Must supply a desired new offset or delta.");
+  }
+
+  if (!IsAligned<kPageSize>(base_delta)) {
+    Usage("Base offset/delta must be aligned to a pagesize (0x%08x) boundary.", kPageSize);
+  }
+
+  if (debug) {
+    LOG(INFO) << "moving offset by " << base_delta
+        << " (0x" << std::hex << base_delta << ") bytes or "
+        << std::dec << (base_delta/kPageSize) << " pages.";
+  }
+
+  TimingLogger::ScopedTiming pt("patch image and oat", &timings);
+
+  std::string output_directory =
+      output_image_filename.substr(0, output_image_filename.find_last_of("/"));
+  bool ret = PatchOat::Patch(input_image_location, base_delta, output_directory, isa, &timings);
+
+  if (kIsDebugBuild) {
+    LOG(INFO) << "Exiting with return ... " << ret;
+  }
+  return ret ? EXIT_SUCCESS : EXIT_FAILURE;
+}
+
+static int patchoat_oat(TimingLogger& timings,
+                        InstructionSet isa,
+                        const std::string& patched_image_location,
+                        off_t base_delta,
+                        bool base_delta_set,
+                        int input_oat_fd,
+                        const std::string& input_oat_location,
+                        std::string input_oat_filename,
+                        bool have_input_oat,
+                        int output_oat_fd,
+                        std::string output_oat_filename,
+                        bool have_output_oat,
+                        bool lock_output,
+                        bool debug) {
+  {
+    // Only 1 of these may be set.
+    uint32_t cnt = 0;
+    cnt += (base_delta_set) ? 1 : 0;
+    cnt += (!patched_image_location.empty()) ? 1 : 0;
+    if (cnt > 1) {
+      Usage("Only one of --base-offset-delta or --patched-image-location may be used.");
+    } else if (cnt == 0) {
+      Usage("Must specify --base-offset-delta or --patched-image-location.");
+    }
+  }
+
+  if (!have_input_oat || !have_output_oat) {
+    Usage("Both input and output oat must be supplied to patch an app odex.");
+  }
+
+  if (!input_oat_location.empty()) {
+    if (!LocationToFilename(input_oat_location, isa, &input_oat_filename)) {
+      Usage("Unable to find filename for input oat location %s", input_oat_location.c_str());
+    }
+    if (debug) {
+      LOG(INFO) << "Using input-oat-file " << input_oat_filename;
+    }
+  }
+
+  bool match_delta = false;
+  if (!patched_image_location.empty()) {
+    std::string system_filename;
+    bool has_system = false;
+    std::string cache_filename;
+    bool has_cache = false;
+    bool has_android_data_unused = false;
+    bool is_global_cache = false;
+    if (!gc::space::ImageSpace::FindImageFilename(patched_image_location.c_str(), isa,
+                                                  &system_filename, &has_system, &cache_filename,
+                                                  &has_android_data_unused, &has_cache,
+                                                  &is_global_cache)) {
+      Usage("Unable to determine image file for location %s", patched_image_location.c_str());
+    }
+    std::string patched_image_filename;
+    if (has_cache) {
+      patched_image_filename = cache_filename;
+    } else if (has_system) {
+      LOG(WARNING) << "Only image file found was in /system for image location "
+          << patched_image_location;
+      patched_image_filename = system_filename;
+    } else {
+      Usage("Unable to determine image file for location %s", patched_image_location.c_str());
+    }
+    if (debug) {
+      LOG(INFO) << "Using patched-image-file " << patched_image_filename;
+    }
+
+    base_delta_set = true;
+    match_delta = true;
+    std::string error_msg;
+    if (!ReadBaseDelta(patched_image_filename.c_str(), &base_delta, &error_msg)) {
+      Usage(error_msg.c_str(), patched_image_filename.c_str());
+    }
+  }
+
+  if (!IsAligned<kPageSize>(base_delta)) {
+    Usage("Base offset/delta must be alligned to a pagesize (0x%08x) boundary.", kPageSize);
+  }
+
+  // Do we need to cleanup output files if we fail?
+  bool new_oat_out = false;
+
+  std::unique_ptr<File> input_oat;
+  std::unique_ptr<File> output_oat;
+
+  if (input_oat_fd != -1) {
+    if (input_oat_filename.empty()) {
+      input_oat_filename = "input-oat-file";
+    }
+    input_oat.reset(new File(input_oat_fd, input_oat_filename, false));
+    if (input_oat_fd == output_oat_fd) {
+      input_oat.get()->DisableAutoClose();
+    }
+    if (input_oat == nullptr) {
+      // Unlikely, but ensure exhaustive logging in non-0 exit code case
+      LOG(ERROR) << "Failed to open input oat file by its FD" << input_oat_fd;
+    }
+  } else {
+    CHECK(!input_oat_filename.empty());
+    input_oat.reset(OS::OpenFileForReading(input_oat_filename.c_str()));
+    if (input_oat == nullptr) {
+      int err = errno;
+      LOG(ERROR) << "Failed to open input oat file " << input_oat_filename
+          << ": " << strerror(err) << "(" << err << ")";
+    }
+  }
+
+  if (output_oat_fd != -1) {
+    if (output_oat_filename.empty()) {
+      output_oat_filename = "output-oat-file";
+    }
+    output_oat.reset(new File(output_oat_fd, output_oat_filename, true));
+    if (output_oat == nullptr) {
+      // Unlikely, but ensure exhaustive logging in non-0 exit code case
+      LOG(ERROR) << "Failed to open output oat file by its FD" << output_oat_fd;
+    }
+  } else {
+    CHECK(!output_oat_filename.empty());
+    output_oat.reset(CreateOrOpen(output_oat_filename.c_str(), &new_oat_out));
+    if (output_oat == nullptr) {
+      int err = errno;
+      LOG(ERROR) << "Failed to open output oat file " << output_oat_filename
+          << ": " << strerror(err) << "(" << err << ")";
+    }
+  }
+
+  // TODO: get rid of this.
+  auto cleanup = [&output_oat_filename, &new_oat_out](bool success) {
+    if (!success) {
+      if (new_oat_out) {
+        CHECK(!output_oat_filename.empty());
+        TEMP_FAILURE_RETRY(unlink(output_oat_filename.c_str()));
+      }
+    }
+
+    if (kIsDebugBuild) {
+      LOG(INFO) << "Cleaning up.. success? " << success;
+    }
+  };
+
+  if (input_oat.get() == nullptr || output_oat.get() == nullptr) {
+    LOG(ERROR) << "Failed to open input/output oat files";
+    cleanup(false);
+    return EXIT_FAILURE;
+  }
+
+  if (match_delta) {
+    std::string error_msg;
+    // Figure out what the current delta is so we can match it to the desired delta.
+    std::unique_ptr<ElfFile> elf(ElfFile::Open(input_oat.get(), PROT_READ, MAP_PRIVATE,
+                                               &error_msg));
+    off_t current_delta = 0;
+    if (elf.get() == nullptr) {
+      LOG(ERROR) << "unable to open oat file " << input_oat->GetPath() << " : " << error_msg;
+      cleanup(false);
+      return EXIT_FAILURE;
+    } else if (!ReadOatPatchDelta(elf.get(), &current_delta, &error_msg)) {
+      LOG(ERROR) << "Unable to get current delta: " << error_msg;
+      cleanup(false);
+      return EXIT_FAILURE;
+    }
+    // Before this line base_delta is the desired final delta. We need it to be the actual amount to
+    // change everything by. We subtract the current delta from it to make it this.
+    base_delta -= current_delta;
+    if (!IsAligned<kPageSize>(base_delta)) {
+      LOG(ERROR) << "Given image file was relocated by an illegal delta";
+      cleanup(false);
+      return false;
+    }
+  }
+
+  if (debug) {
+    LOG(INFO) << "moving offset by " << base_delta
+        << " (0x" << std::hex << base_delta << ") bytes or "
+        << std::dec << (base_delta/kPageSize) << " pages.";
+  }
+
+  ScopedFlock output_oat_lock;
+  if (lock_output) {
+    std::string error_msg;
+    if (!output_oat_lock.Init(output_oat.get(), &error_msg)) {
+      LOG(ERROR) << "Unable to lock output oat " << output_oat->GetPath() << ": " << error_msg;
+      cleanup(false);
+      return EXIT_FAILURE;
+    }
+  }
+
+  TimingLogger::ScopedTiming pt("patch oat", &timings);
+  bool ret = PatchOat::Patch(input_oat.get(), base_delta, output_oat.get(), &timings,
+                             output_oat_fd >= 0,  // was it opened from FD?
+                             new_oat_out);
+  ret = FinishFile(output_oat.get(), ret);
+
+  if (kIsDebugBuild) {
+    LOG(INFO) << "Exiting with return ... " << ret;
+  }
+  cleanup(ret);
+  return ret ? EXIT_SUCCESS : EXIT_FAILURE;
+}
+
 static int patchoat(int argc, char **argv) {
   InitLogging(argv);
   MemMap::Init();
@@ -1024,15 +1236,8 @@
   int output_oat_fd = -1;
   bool have_output_oat = false;
   std::string output_image_filename;
-  int output_image_fd = -1;
-  bool have_output_image = false;
-  uintptr_t base_offset = 0;
-  bool base_offset_set = false;
-  uintptr_t orig_base_offset = 0;
-  bool orig_base_offset_set = false;
   off_t base_delta = 0;
   bool base_delta_set = false;
-  bool match_delta = false;
   std::string patched_image_filename;
   std::string patched_image_location;
   bool dump_timings = kIsDebugBuild;
@@ -1096,36 +1301,7 @@
         Usage("--output-oat-fd pass a negative value %d", output_oat_fd);
       }
     } else if (option.starts_with("--output-image-file=")) {
-      if (have_output_image) {
-        Usage("Only one of --output-image-file, and --output-image-fd may be used.");
-      }
-      have_output_image = true;
       output_image_filename = option.substr(strlen("--output-image-file=")).data();
-    } else if (option.starts_with("--output-image-fd=")) {
-      if (have_output_image) {
-        Usage("Only one of --output-image-file, and --output-image-fd may be used.");
-      }
-      have_output_image = true;
-      const char* image_fd_str = option.substr(strlen("--output-image-fd=")).data();
-      if (!ParseInt(image_fd_str, &output_image_fd)) {
-        Usage("Failed to parse --output-image-fd argument '%s' as an integer", image_fd_str);
-      }
-      if (output_image_fd < 0) {
-        Usage("--output-image-fd pass a negative value %d", output_image_fd);
-      }
-    } else if (option.starts_with("--orig-base-offset=")) {
-      const char* orig_base_offset_str = option.substr(strlen("--orig-base-offset=")).data();
-      orig_base_offset_set = true;
-      if (!ParseUint(orig_base_offset_str, &orig_base_offset)) {
-        Usage("Failed to parse --orig-base-offset argument '%s' as an uintptr_t",
-              orig_base_offset_str);
-      }
-    } else if (option.starts_with("--base-offset=")) {
-      const char* base_offset_str = option.substr(strlen("--base-offset=")).data();
-      base_offset_set = true;
-      if (!ParseUint(base_offset_str, &base_offset)) {
-        Usage("Failed to parse --base-offset argument '%s' as an uintptr_t", base_offset_str);
-      }
     } else if (option.starts_with("--base-offset-delta=")) {
       const char* base_delta_str = option.substr(strlen("--base-offset-delta=")).data();
       base_delta_set = true;
@@ -1134,8 +1310,6 @@
       }
     } else if (option.starts_with("--patched-image-location=")) {
       patched_image_location = option.substr(strlen("--patched-image-location=")).data();
-    } else if (option.starts_with("--patched-image-file=")) {
-      patched_image_filename = option.substr(strlen("--patched-image-file=")).data();
     } else if (option == "--lock-output") {
       lock_output = true;
     } else if (option == "--no-lock-output") {
@@ -1149,284 +1323,43 @@
     }
   }
 
-  {
-    // Only 1 of these may be set.
-    uint32_t cnt = 0;
-    cnt += (base_delta_set) ? 1 : 0;
-    cnt += (base_offset_set && orig_base_offset_set) ? 1 : 0;
-    cnt += (!patched_image_filename.empty()) ? 1 : 0;
-    cnt += (!patched_image_location.empty()) ? 1 : 0;
-    if (cnt > 1) {
-      Usage("Only one of --base-offset/--orig-base-offset, --base-offset-delta, "
-            "--patched-image-filename or --patched-image-location may be used.");
-    } else if (cnt == 0) {
-      Usage("Must specify --base-offset-delta, --base-offset and --orig-base-offset, "
-            "--patched-image-location or --patched-image-file");
-    }
+  // The instruction set is mandatory. This simplifies things...
+  if (!isa_set) {
+    Usage("Instruction set must be set.");
   }
 
-  if (have_input_oat != have_output_oat) {
-    Usage("Either both input and output oat must be supplied or niether must be.");
-  }
-
-  if ((!input_image_location.empty()) != have_output_image) {
-    Usage("Either both input and output image must be supplied or niether must be.");
-  }
-
-  // We know we have both the input and output so rename for clarity.
-  bool have_image_files = have_output_image;
-  bool have_oat_files = have_output_oat;
-
-  if (!have_oat_files) {
-    if (have_image_files) {
-      Usage("Cannot patch an image file without an oat file");
-    } else {
-      Usage("Must be patching either an oat file or an image file with an oat file.");
-    }
-  }
-
-  if (!have_oat_files && !isa_set) {
-    Usage("Must include ISA if patching an image file without an oat file.");
-  }
-
-  if (!input_oat_location.empty()) {
-    if (!isa_set) {
-      Usage("specifying a location requires specifying an instruction set");
-    }
-    if (!LocationToFilename(input_oat_location, isa, &input_oat_filename)) {
-      Usage("Unable to find filename for input oat location %s", input_oat_location.c_str());
-    }
-    if (debug) {
-      LOG(INFO) << "Using input-oat-file " << input_oat_filename;
-    }
-  }
-  if (!patched_image_location.empty()) {
-    if (!isa_set) {
-      Usage("specifying a location requires specifying an instruction set");
-    }
-    std::string system_filename;
-    bool has_system = false;
-    std::string cache_filename;
-    bool has_cache = false;
-    bool has_android_data_unused = false;
-    bool is_global_cache = false;
-    if (!gc::space::ImageSpace::FindImageFilename(patched_image_location.c_str(), isa,
-                                                  &system_filename, &has_system, &cache_filename,
-                                                  &has_android_data_unused, &has_cache,
-                                                  &is_global_cache)) {
-      Usage("Unable to determine image file for location %s", patched_image_location.c_str());
-    }
-    if (has_cache) {
-      patched_image_filename = cache_filename;
-    } else if (has_system) {
-      LOG(WARNING) << "Only image file found was in /system for image location "
-                   << patched_image_location;
-      patched_image_filename = system_filename;
-    } else {
-      Usage("Unable to determine image file for location %s", patched_image_location.c_str());
-    }
-    if (debug) {
-      LOG(INFO) << "Using patched-image-file " << patched_image_filename;
-    }
-  }
-
-  if (!base_delta_set) {
-    if (orig_base_offset_set && base_offset_set) {
-      base_delta_set = true;
-      base_delta = base_offset - orig_base_offset;
-    } else if (!patched_image_filename.empty()) {
-      if (have_image_files) {
-        Usage("--patched-image-location should not be used when patching other images");
-      }
-      base_delta_set = true;
-      match_delta = true;
-      std::string error_msg;
-      if (!ReadBaseDelta(patched_image_filename.c_str(), &base_delta, &error_msg)) {
-        Usage(error_msg.c_str(), patched_image_filename.c_str());
-      }
-    } else {
-      if (base_offset_set) {
-        Usage("Unable to determine original base offset.");
-      } else {
-        Usage("Must supply a desired new offset or delta.");
-      }
-    }
-  }
-
-  if (!IsAligned<kPageSize>(base_delta)) {
-    Usage("Base offset/delta must be alligned to a pagesize (0x%08x) boundary.", kPageSize);
-  }
-
-  // Do we need to cleanup output files if we fail?
-  bool new_image_out = false;
-  bool new_oat_out = false;
-
-  std::unique_ptr<File> input_oat;
-  std::unique_ptr<File> output_oat;
-  std::unique_ptr<File> output_image;
-
-  if (have_image_files) {
-    CHECK(!input_image_location.empty());
-
-    if (output_image_fd != -1) {
-      if (output_image_filename.empty()) {
-        output_image_filename = "output-image-file";
-      }
-      output_image.reset(new File(output_image_fd, output_image_filename, true));
-    } else {
-      CHECK(!output_image_filename.empty());
-      output_image.reset(CreateOrOpen(output_image_filename.c_str(), &new_image_out));
-    }
+  int ret;
+  if (!input_image_location.empty()) {
+    ret = patchoat_image(timings,
+                         isa,
+                         input_image_location,
+                         output_image_filename,
+                         base_delta,
+                         base_delta_set,
+                         debug);
   } else {
-    CHECK(output_image_filename.empty() && output_image_fd == -1 && input_image_location.empty());
+    ret = patchoat_oat(timings,
+                       isa,
+                       patched_image_location,
+                       base_delta,
+                       base_delta_set,
+                       input_oat_fd,
+                       input_oat_location,
+                       input_oat_filename,
+                       have_input_oat,
+                       output_oat_fd,
+                       output_oat_filename,
+                       have_output_oat,
+                       lock_output,
+                       debug);
   }
 
-  if (have_oat_files) {
-    if (input_oat_fd != -1) {
-      if (input_oat_filename.empty()) {
-        input_oat_filename = "input-oat-file";
-      }
-      input_oat.reset(new File(input_oat_fd, input_oat_filename, false));
-      if (input_oat_fd == output_oat_fd) {
-        input_oat.get()->DisableAutoClose();
-      }
-      if (input_oat == nullptr) {
-        // Unlikely, but ensure exhaustive logging in non-0 exit code case
-        LOG(ERROR) << "Failed to open input oat file by its FD" << input_oat_fd;
-      }
-    } else {
-      CHECK(!input_oat_filename.empty());
-      input_oat.reset(OS::OpenFileForReading(input_oat_filename.c_str()));
-      if (input_oat == nullptr) {
-        int err = errno;
-        LOG(ERROR) << "Failed to open input oat file " << input_oat_filename
-                   << ": " << strerror(err) << "(" << err << ")";
-      }
-    }
-
-    if (output_oat_fd != -1) {
-      if (output_oat_filename.empty()) {
-        output_oat_filename = "output-oat-file";
-      }
-      output_oat.reset(new File(output_oat_fd, output_oat_filename, true));
-      if (output_oat == nullptr) {
-        // Unlikely, but ensure exhaustive logging in non-0 exit code case
-        LOG(ERROR) << "Failed to open output oat file by its FD" << output_oat_fd;
-      }
-    } else {
-      CHECK(!output_oat_filename.empty());
-      output_oat.reset(CreateOrOpen(output_oat_filename.c_str(), &new_oat_out));
-      if (output_oat == nullptr) {
-        int err = errno;
-        LOG(ERROR) << "Failed to open output oat file " << output_oat_filename
-                   << ": " << strerror(err) << "(" << err << ")";
-      }
-    }
+  timings.EndTiming();
+  if (dump_timings) {
+    LOG(INFO) << Dumpable<TimingLogger>(timings);
   }
 
-  // TODO: get rid of this.
-  auto cleanup = [&output_image_filename, &output_oat_filename,
-                  &new_oat_out, &new_image_out, &timings, &dump_timings](bool success) {
-    timings.EndTiming();
-    if (!success) {
-      if (new_oat_out) {
-        CHECK(!output_oat_filename.empty());
-        TEMP_FAILURE_RETRY(unlink(output_oat_filename.c_str()));
-      }
-      if (new_image_out) {
-        CHECK(!output_image_filename.empty());
-        TEMP_FAILURE_RETRY(unlink(output_image_filename.c_str()));
-      }
-    }
-    if (dump_timings) {
-      LOG(INFO) << Dumpable<TimingLogger>(timings);
-    }
-
-    if (kIsDebugBuild) {
-      LOG(INFO) << "Cleaning up.. success? " << success;
-    }
-  };
-
-  if (have_oat_files && (input_oat.get() == nullptr || output_oat.get() == nullptr)) {
-    LOG(ERROR) << "Failed to open input/output oat files";
-    cleanup(false);
-    return EXIT_FAILURE;
-  } else if (have_image_files && output_image.get() == nullptr) {
-    LOG(ERROR) << "Failed to open output image file";
-    cleanup(false);
-    return EXIT_FAILURE;
-  }
-
-  if (match_delta) {
-    CHECK(!have_image_files);  // We will not do this with images.
-    std::string error_msg;
-    // Figure out what the current delta is so we can match it to the desired delta.
-    std::unique_ptr<ElfFile> elf(ElfFile::Open(input_oat.get(), PROT_READ, MAP_PRIVATE,
-                                               &error_msg));
-    off_t current_delta = 0;
-    if (elf.get() == nullptr) {
-      LOG(ERROR) << "unable to open oat file " << input_oat->GetPath() << " : " << error_msg;
-      cleanup(false);
-      return EXIT_FAILURE;
-    } else if (!ReadOatPatchDelta(elf.get(), &current_delta, &error_msg)) {
-      LOG(ERROR) << "Unable to get current delta: " << error_msg;
-      cleanup(false);
-      return EXIT_FAILURE;
-    }
-    // Before this line base_delta is the desired final delta. We need it to be the actual amount to
-    // change everything by. We subtract the current delta from it to make it this.
-    base_delta -= current_delta;
-    if (!IsAligned<kPageSize>(base_delta)) {
-      LOG(ERROR) << "Given image file was relocated by an illegal delta";
-      cleanup(false);
-      return false;
-    }
-  }
-
-  if (debug) {
-    LOG(INFO) << "moving offset by " << base_delta
-              << " (0x" << std::hex << base_delta << ") bytes or "
-              << std::dec << (base_delta/kPageSize) << " pages.";
-  }
-
-  // TODO: is it going to be promatic to unlink a file that was flock-ed?
-  ScopedFlock output_oat_lock;
-  if (lock_output) {
-    std::string error_msg;
-    if (have_oat_files && !output_oat_lock.Init(output_oat.get(), &error_msg)) {
-      LOG(ERROR) << "Unable to lock output oat " << output_image->GetPath() << ": " << error_msg;
-      cleanup(false);
-      return EXIT_FAILURE;
-    }
-  }
-
-  bool ret;
-  if (have_image_files && have_oat_files) {
-    TimingLogger::ScopedTiming pt("patch image and oat", &timings);
-    ret = PatchOat::Patch(input_oat.get(), input_image_location, base_delta,
-                          output_oat.get(), output_image.get(), isa, &timings,
-                          output_oat_fd >= 0,  // was it opened from FD?
-                          new_oat_out);
-    // The order here doesn't matter. If the first one is successfully saved and the second one
-    // erased, ImageSpace will still detect a problem and not use the files.
-    ret = FinishFile(output_image.get(), ret);
-    ret = FinishFile(output_oat.get(), ret);
-  } else if (have_oat_files) {
-    TimingLogger::ScopedTiming pt("patch oat", &timings);
-    ret = PatchOat::Patch(input_oat.get(), base_delta, output_oat.get(), &timings,
-                          output_oat_fd >= 0,  // was it opened from FD?
-                          new_oat_out);
-    ret = FinishFile(output_oat.get(), ret);
-  } else {
-    CHECK(false);
-    ret = true;
-  }
-
-  if (kIsDebugBuild) {
-    LOG(INFO) << "Exiting with return ... " << ret;
-  }
-  cleanup(ret);
-  return (ret) ? EXIT_SUCCESS : EXIT_FAILURE;
+  return ret;
 }
 
 }  // namespace art
diff --git a/patchoat/patchoat.h b/patchoat/patchoat.h
index ceddc34..a6a8fee 100644
--- a/patchoat/patchoat.h
+++ b/patchoat/patchoat.h
@@ -53,11 +53,11 @@
                     TimingLogger* timings);
 
   // Patch both the image and the oat file
-  static bool Patch(File* oat_in, const std::string& art_location,
-                    off_t delta, File* oat_out, File* art_out, InstructionSet isa,
-                    TimingLogger* timings,
-                    bool output_oat_opened_from_fd,  // Was this using --oatput-oat-fd ?
-                    bool new_oat_out);               // Output oat was a new file created by us?
+  static bool Patch(const std::string& art_location,
+                    off_t delta,
+                    const std::string& output_directory,
+                    InstructionSet isa,
+                    TimingLogger* timings);
 
   ~PatchOat() {}
   PatchOat(PatchOat&&) = default;
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index 949ad99..c4e314b 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -444,11 +444,12 @@
     mov    r10, r1                         @ Save size of stack
     ldr    r9, [r11, #40]                  @ Move managed thread pointer into r9
     mov    r8, r2                          @ Save the pc to call
-    sub    r7, sp, #12                     @ Reserve space for stack pointer, JValue result, and ArtMethod* slot
+    sub    r7, sp, #12                     @ Reserve space for stack pointer,
+                                           @    JValue* result, and ArtMethod* slot.
     and    r7, #0xFFFFFFF0                 @ Align stack pointer
     mov    sp, r7                          @ Update stack pointer
     str    r11, [sp, #4]                   @ Save old stack pointer
-    str    r3, [sp, #8]                    @ Save JValue result
+    str    r3, [sp, #8]                    @ Save JValue* result
     mov    ip, #0
     str    ip, [sp]                        @ Store null for ArtMethod* at bottom of frame
     sub    sp, sp, r1                      @ Reserve space for callee stack
@@ -457,9 +458,8 @@
     mov    r0, sp
     bl     memcpy                          @ memcpy (dest r0, src r1, bytes r2)
     bl     .Losr_entry                     @ Call the method
-    ldr    r11, [sp, #4]                   @ Restore saved stack pointer
-    ldr    r10, [sp, #8]                   @ Restore JValue result
-    mov    sp, r11                         @ Restore stack pointer.
+    ldr    r10, [sp, #8]                   @ Restore JValue* result
+    ldr    sp, [sp, #4]                    @ Restore saved stack pointer
     ldr    r4, [sp, #36]                   @ load shorty
     ldrb   r4, [r4, #0]                    @ load return type
     cmp    r4, #68                         @ Test if result type char == 'D'.
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index d6e0f1c..69caec8 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -1755,6 +1755,8 @@
      *   rcx = JValue* result
      *   r8 = shorty
      *   r9 = thread
+     *
+     * Note that the native C ABI already aligned the stack to 16-byte.
      */
 DEFINE_FUNCTION art_quick_osr_stub
     // Save the non-volatiles.
diff --git a/runtime/art_method-inl.h b/runtime/art_method-inl.h
index cc45c38..ebe89bb 100644
--- a/runtime/art_method-inl.h
+++ b/runtime/art_method-inl.h
@@ -449,24 +449,25 @@
 void ArtMethod::VisitRoots(RootVisitorType& visitor, size_t pointer_size) {
   ArtMethod* interface_method = nullptr;
   mirror::Class* klass = declaring_class_.Read();
-  if (UNLIKELY(klass != nullptr && klass->IsProxyClass())) {
-    // For normal methods, dex cache shortcuts will be visited through the declaring class.
-    // However, for proxies we need to keep the interface method alive, so we visit its roots.
-    interface_method = mirror::DexCache::GetElementPtrSize(
-        GetDexCacheResolvedMethods(pointer_size),
-        GetDexMethodIndex(),
-        pointer_size);
-    DCHECK(interface_method != nullptr);
-    DCHECK_EQ(interface_method,
-              Runtime::Current()->GetClassLinker()->FindMethodForProxy(klass, this));
-    interface_method->VisitRoots(visitor, pointer_size);
-  }
-
-  visitor.VisitRootIfNonNull(declaring_class_.AddressWithoutBarrier());
-  if (!IsNative()) {
-    ProfilingInfo* profiling_info = GetProfilingInfo(pointer_size);
-    if (profiling_info != nullptr) {
-      profiling_info->VisitRoots(visitor);
+  if (LIKELY(klass != nullptr)) {
+    if (UNLIKELY(klass->IsProxyClass())) {
+      // For normal methods, dex cache shortcuts will be visited through the declaring class.
+      // However, for proxies we need to keep the interface method alive, so we visit its roots.
+      interface_method = mirror::DexCache::GetElementPtrSize(
+          GetDexCacheResolvedMethods(pointer_size),
+          GetDexMethodIndex(),
+          pointer_size);
+      DCHECK(interface_method != nullptr);
+      DCHECK_EQ(interface_method,
+                Runtime::Current()->GetClassLinker()->FindMethodForProxy(klass, this));
+      interface_method->VisitRoots(visitor, pointer_size);
+    }
+    visitor.VisitRoot(declaring_class_.AddressWithoutBarrier());
+    if (!IsNative()) {
+      ProfilingInfo* profiling_info = GetProfilingInfo(pointer_size);
+      if (profiling_info != nullptr) {
+        profiling_info->VisitRoots(visitor);
+      }
     }
   }
 }
diff --git a/runtime/base/scoped_arena_allocator.cc b/runtime/base/scoped_arena_allocator.cc
index 90c6ee3..7d04fa0 100644
--- a/runtime/base/scoped_arena_allocator.cc
+++ b/runtime/base/scoped_arena_allocator.cc
@@ -99,7 +99,7 @@
   if (UNLIKELY(static_cast<size_t>(top_end_ - ptr) < rounded_bytes)) {
     ptr = AllocateFromNextArena(rounded_bytes);
     CHECK(ptr != nullptr) << "Failed to allocate memory";
-    MEMORY_TOOL_MAKE_NOACCESS(ptr, top_end_);
+    MEMORY_TOOL_MAKE_NOACCESS(ptr, top_end_ - ptr);
   }
   CurrentStats()->RecordAlloc(bytes, kind);
   top_ptr_ = ptr + rounded_bytes;
diff --git a/runtime/base/scoped_arena_containers.h b/runtime/base/scoped_arena_containers.h
index 1236585..9b56856 100644
--- a/runtime/base/scoped_arena_containers.h
+++ b/runtime/base/scoped_arena_containers.h
@@ -201,22 +201,28 @@
 template <typename T>
 class ArenaDelete {
   static constexpr uint8_t kMagicFill = 0xCE;
- public:
-  void operator()(T* ptr) const {
-    ptr->~T();
+ protected:
+  // Used for variable sized objects such as RegisterLine.
+  ALWAYS_INLINE void ProtectMemory(T* ptr, size_t size) const {
     if (RUNNING_ON_MEMORY_TOOL > 0) {
       // Writing to the memory will fail if it we already destroyed the pointer with
       // DestroyOnlyDelete since we make it no access.
-      memset(ptr, kMagicFill, sizeof(T));
-      MEMORY_TOOL_MAKE_NOACCESS(ptr, sizeof(T));
+      memset(ptr, kMagicFill, size);
+      MEMORY_TOOL_MAKE_NOACCESS(ptr, size);
     } else if (kIsDebugBuild) {
       CHECK(ArenaStack::ArenaTagForAllocation(reinterpret_cast<void*>(ptr)) == ArenaFreeTag::kUsed)
           << "Freeing invalid object " << ptr;
       ArenaStack::ArenaTagForAllocation(reinterpret_cast<void*>(ptr)) = ArenaFreeTag::kFree;
       // Write a magic value to try and catch use after free error.
-      memset(ptr, kMagicFill, sizeof(T));
+      memset(ptr, kMagicFill, size);
     }
   }
+
+ public:
+  void operator()(T* ptr) const {
+    ptr->~T();
+    ProtectMemory(ptr, sizeof(T));
+  }
 };
 
 // In general we lack support for arrays. We would need to call the destructor on each element,
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 0631ebe..5278d1b 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -2620,19 +2620,37 @@
   return oat_class.GetOatMethod(oat_method_idx).GetQuickCode();
 }
 
-// Returns true if the method must run with interpreter, false otherwise.
-static bool NeedsInterpreter(ArtMethod* method, const void* quick_code)
-    SHARED_REQUIRES(Locks::mutator_lock_) {
+bool ClassLinker::ShouldUseInterpreterEntrypoint(ArtMethod* method, const void* quick_code) {
+  if (UNLIKELY(method->IsNative() || method->IsProxyMethod())) {
+    return false;
+  }
+
   if (quick_code == nullptr) {
-    // No code: need interpreter.
-    // May return true for native code, in the case of generic JNI
-    // DCHECK(!method->IsNative());
     return true;
   }
-  // If interpreter mode is enabled, every method (except native and proxy) must
-  // be run with interpreter.
-  return Runtime::Current()->GetInstrumentation()->InterpretOnly() &&
-         !method->IsNative() && !method->IsProxyMethod();
+
+  Runtime* runtime = Runtime::Current();
+  instrumentation::Instrumentation* instr = runtime->GetInstrumentation();
+  if (instr->InterpretOnly()) {
+    return true;
+  }
+
+  if (runtime->GetClassLinker()->IsQuickToInterpreterBridge(quick_code)) {
+    // Doing this check avoids doing compiled/interpreter transitions.
+    return true;
+  }
+
+  if (Dbg::IsForcedInterpreterNeededForCalling(Thread::Current(), method)) {
+    // Force the use of interpreter when it is required by the debugger.
+    return true;
+  }
+
+  if (runtime->UseJit() && runtime->GetJit()->JitAtFirstUse()) {
+    // The force JIT uses the interpreter entry point to execute the JIT.
+    return true;
+  }
+
+  return false;
 }
 
 void ClassLinker::FixupStaticTrampolines(mirror::Class* klass) {
@@ -2677,15 +2695,12 @@
       OatFile::OatMethod oat_method = oat_class.GetOatMethod(method_index);
       quick_code = oat_method.GetQuickCode();
     }
-    const bool enter_interpreter = NeedsInterpreter(method, quick_code);
-    if (enter_interpreter) {
+    // Check whether the method is native, in which case it's generic JNI.
+    if (quick_code == nullptr && method->IsNative()) {
+      quick_code = GetQuickGenericJniStub();
+    } else if (ShouldUseInterpreterEntrypoint(method, quick_code)) {
       // Use interpreter entry point.
-      // Check whether the method is native, in which case it's generic JNI.
-      if (quick_code == nullptr && method->IsNative()) {
-        quick_code = GetQuickGenericJniStub();
-      } else {
-        quick_code = GetQuickToInterpreterBridge();
-      }
+      quick_code = GetQuickToInterpreterBridge();
     }
     runtime->GetInstrumentation()->UpdateMethodsCode(method, quick_code);
   }
@@ -2716,7 +2731,8 @@
   }
 
   // Install entry point from interpreter.
-  bool enter_interpreter = NeedsInterpreter(method, method->GetEntryPointFromQuickCompiledCode());
+  const void* quick_code = method->GetEntryPointFromQuickCompiledCode();
+  bool enter_interpreter = ShouldUseInterpreterEntrypoint(method, quick_code);
 
   if (!method->IsInvokable()) {
     EnsureThrowsInvocationError(method);
@@ -2728,20 +2744,18 @@
     // It will be replaced by the proper entry point by ClassLinker::FixupStaticTrampolines
     // after initializing class (see ClassLinker::InitializeClass method).
     method->SetEntryPointFromQuickCompiledCode(GetQuickResolutionStub());
+  } else if (quick_code == nullptr && method->IsNative()) {
+    method->SetEntryPointFromQuickCompiledCode(GetQuickGenericJniStub());
   } else if (enter_interpreter) {
-    if (!method->IsNative()) {
-      // Set entry point from compiled code if there's no code or in interpreter only mode.
-      method->SetEntryPointFromQuickCompiledCode(GetQuickToInterpreterBridge());
-    } else {
-      method->SetEntryPointFromQuickCompiledCode(GetQuickGenericJniStub());
-    }
+    // Set entry point from compiled code if there's no code or in interpreter only mode.
+    method->SetEntryPointFromQuickCompiledCode(GetQuickToInterpreterBridge());
   }
 
   if (method->IsNative()) {
     // Unregistering restores the dlsym lookup stub.
     method->UnregisterNative();
 
-    if (enter_interpreter) {
+    if (enter_interpreter || quick_code == nullptr) {
       // We have a native method here without code. Then it should have either the generic JNI
       // trampoline as entrypoint (non-static), or the resolution trampoline (static).
       // TODO: this doesn't handle all the cases where trampolines may be installed.
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index 56a868a..a9448f7 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -592,6 +592,9 @@
       REQUIRES(!Locks::classlinker_classes_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  static bool ShouldUseInterpreterEntrypoint(ArtMethod* method, const void* quick_code)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
   struct DexCacheData {
     // Weak root to the DexCache. Note: Do not decode this unnecessarily or else class unloading may
     // not work properly.
diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc
index 9269339..0c06c38 100644
--- a/runtime/gc/space/image_space.cc
+++ b/runtime/gc/space/image_space.cc
@@ -305,12 +305,6 @@
   std::string output_image_filename_arg("--output-image-file=");
   output_image_filename_arg += dest_filename;
 
-  std::string input_oat_location_arg("--input-oat-location=");
-  input_oat_location_arg += ImageHeader::GetOatLocationFromImageLocation(image_location);
-
-  std::string output_oat_filename_arg("--output-oat-file=");
-  output_oat_filename_arg += ImageHeader::GetOatLocationFromImageLocation(dest_filename);
-
   std::string instruction_set_arg("--instruction-set=");
   instruction_set_arg += GetInstructionSetString(isa);
 
@@ -324,9 +318,6 @@
   argv.push_back(input_image_location_arg);
   argv.push_back(output_image_filename_arg);
 
-  argv.push_back(input_oat_location_arg);
-  argv.push_back(output_oat_filename_arg);
-
   argv.push_back(instruction_set_arg);
   argv.push_back(base_offset_arg);
 
diff --git a/runtime/instrumentation.h b/runtime/instrumentation.h
index e3cbf53..56aeefc 100644
--- a/runtime/instrumentation.h
+++ b/runtime/instrumentation.h
@@ -290,14 +290,6 @@
   bool IsActive() const SHARED_REQUIRES(Locks::mutator_lock_) {
     return have_dex_pc_listeners_ || have_method_entry_listeners_ || have_method_exit_listeners_ ||
         have_field_read_listeners_ || have_field_write_listeners_ ||
-        have_exception_caught_listeners_ || have_method_unwind_listeners_ ||
-        have_branch_listeners_ || have_invoke_virtual_or_interface_listeners_;
-  }
-
-  // Any instrumentation *other* than what is needed for Jit profiling active?
-  bool NonJitProfilingActive() const SHARED_REQUIRES(Locks::mutator_lock_) {
-    return have_dex_pc_listeners_ || have_method_exit_listeners_ ||
-        have_field_read_listeners_ || have_field_write_listeners_ ||
         have_exception_caught_listeners_ || have_method_unwind_listeners_;
   }
 
diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc
index 01498a2..4fd3c78 100644
--- a/runtime/interpreter/interpreter.cc
+++ b/runtime/interpreter/interpreter.cc
@@ -27,6 +27,7 @@
 #include "unstarted_runtime.h"
 #include "mterp/mterp.h"
 #include "jit/jit.h"
+#include "jit/jit_code_cache.h"
 
 namespace art {
 namespace interpreter {
@@ -293,9 +294,10 @@
                                         method, 0);
     }
 
-    if (UNLIKELY(Runtime::Current()->GetJit() != nullptr &&
-                 Runtime::Current()->GetJit()->JitAtFirstUse() &&
-                 method->HasAnyCompiledCode())) {
+    jit::Jit* jit = Runtime::Current()->GetJit();
+    if (UNLIKELY(jit != nullptr &&
+                 jit->JitAtFirstUse() &&
+                 jit->GetCodeCache()->ContainsMethod(method))) {
       JValue result;
 
       // Pop the shadow frame before calling into compiled code.
@@ -322,14 +324,8 @@
         const instrumentation::Instrumentation* const instrumentation =
             Runtime::Current()->GetInstrumentation();
         while (true) {
-          // Mterp does not support all instrumentation.
-          bool unhandled_instrumentation;
-          if ((kRuntimeISA == kArm64) || (kRuntimeISA == kArm)) {
-            unhandled_instrumentation = instrumentation->NonJitProfilingActive();
-          } else {
-            unhandled_instrumentation = instrumentation->IsActive();
-          }
-          if (unhandled_instrumentation || !Runtime::Current()->IsStarted()) {
+          if (instrumentation->IsActive() || !Runtime::Current()->IsStarted()) {
+            // TODO: allow JIT profiling instrumentation.  Now, just punt on all instrumentation.
 #if !defined(__clang__)
             return ExecuteGotoImpl<false, false>(self, code_item, shadow_frame, result_register);
 #else
diff --git a/runtime/interpreter/interpreter_common.cc b/runtime/interpreter/interpreter_common.cc
index 09d8601..cbaa817 100644
--- a/runtime/interpreter/interpreter_common.cc
+++ b/runtime/interpreter/interpreter_common.cc
@@ -20,6 +20,7 @@
 
 #include "debugger.h"
 #include "entrypoints/runtime_asm_entrypoints.h"
+#include "jit/jit.h"
 #include "mirror/array-inl.h"
 #include "stack.h"
 #include "unstarted_runtime.h"
@@ -501,23 +502,6 @@
                                 uint32_t (&arg)[kVarArgMax],
                                 uint32_t vregC) ALWAYS_INLINE;
 
-SHARED_REQUIRES(Locks::mutator_lock_)
-static inline bool NeedsInterpreter(Thread* self, ShadowFrame* new_shadow_frame) ALWAYS_INLINE;
-
-static inline bool NeedsInterpreter(Thread* self, ShadowFrame* new_shadow_frame) {
-  ArtMethod* target = new_shadow_frame->GetMethod();
-  if (UNLIKELY(target->IsNative() || target->IsProxyMethod())) {
-    return false;
-  }
-  Runtime* runtime = Runtime::Current();
-  ClassLinker* class_linker = runtime->GetClassLinker();
-  return runtime->GetInstrumentation()->IsForcedInterpretOnly() ||
-        // Doing this check avoids doing compiled/interpreter transitions.
-        class_linker->IsQuickToInterpreterBridge(target->GetEntryPointFromQuickCompiledCode()) ||
-        // Force the use of interpreter when it is required by the debugger.
-        Dbg::IsForcedInterpreterNeededForCalling(self, target);
-}
-
 void ArtInterpreterToCompiledCodeBridge(Thread* self,
                                         const DexFile::CodeItem* code_item,
                                         ShadowFrame* shadow_frame,
@@ -736,7 +720,10 @@
 
   // Do the call now.
   if (LIKELY(Runtime::Current()->IsStarted())) {
-    if (NeedsInterpreter(self, new_shadow_frame)) {
+    ArtMethod* target = new_shadow_frame->GetMethod();
+    if (ClassLinker::ShouldUseInterpreterEntrypoint(
+        target,
+        target->GetEntryPointFromQuickCompiledCode())) {
       ArtInterpreterToInterpreterBridge(self, code_item, new_shadow_frame, result);
     } else {
       ArtInterpreterToCompiledCodeBridge(self, code_item, new_shadow_frame, result);
diff --git a/runtime/interpreter/mterp/arm/bincmp.S b/runtime/interpreter/mterp/arm/bincmp.S
index 774e167..474bc3c 100644
--- a/runtime/interpreter/mterp/arm/bincmp.S
+++ b/runtime/interpreter/mterp/arm/bincmp.S
@@ -6,29 +6,17 @@
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
     /* if-cmp vA, vB, +CCCC */
-#if MTERP_PROFILE_BRANCHES
+#if MTERP_SUSPEND
     mov     r1, rINST, lsr #12          @ r1<- B
     ubfx    r0, rINST, #8, #4           @ r0<- A
     GET_VREG r3, r1                     @ r3<- vB
     GET_VREG r2, r0                     @ r2<- vA
-    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
     cmp     r2, r3                      @ compare (vA, vB)
-    b${revcmp} .L_${opcode}_not_taken
-    EXPORT_PC
-    mov     r0, rSELF
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    mov     r2, rINST
-    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
-    cmp     r0, #0
-    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
-    adds    r2, rINST, rINST            @ convert to bytes, check sign
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    mov${revcmp} r1, #2                 @ r1<- BYTE branch dist for not-taken
+    adds    r2, r1, r1                  @ convert to bytes, check sign
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
-    bmi     MterpCheckSuspendAndContinue
-    GET_INST_OPCODE ip                  @ extract opcode from rINST
-    GOTO_OPCODE ip                      @ jump to next instruction
-.L_${opcode}_not_taken:
-    FETCH_ADVANCE_INST 2                @ update rPC, load rINST
+    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]  @ refresh rIBASE
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
@@ -37,10 +25,10 @@
     GET_VREG r3, r1                     @ r3<- vB
     GET_VREG r2, r0                     @ r2<- vA
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
     cmp     r2, r3                      @ compare (vA, vB)
-    mov${revcmp} rINST, #2              @ rINST<- BYTE branch dist for not-taken
-    adds    r2, rINST, rINST            @ convert to bytes, check sign
+    mov${revcmp} r1, #2                 @ r1<- BYTE branch dist for not-taken
+    adds    r2, r1, r1                  @ convert to bytes, check sign
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
diff --git a/runtime/interpreter/mterp/arm/footer.S b/runtime/interpreter/mterp/arm/footer.S
index 9a433e3..1dba856 100644
--- a/runtime/interpreter/mterp/arm/footer.S
+++ b/runtime/interpreter/mterp/arm/footer.S
@@ -12,6 +12,7 @@
  * has not yet been thrown.  Just bail out to the reference interpreter to deal with it.
  * TUNING: for consistency, we may want to just go ahead and handle these here.
  */
+#define MTERP_LOGGING 0
 common_errDivideByZero:
     EXPORT_PC
 #if MTERP_LOGGING
@@ -123,18 +124,6 @@
     GOTO_OPCODE ip                      @ jump to next instruction
 
 /*
- * On-stack replacement pending.
- * Branch offset in rINST on entry.
- */
-MterpOnStackReplacement:
-#if MTERP_LOGGING
-    mov r0, rSELF
-    add r1, rFP, #OFF_FP_SHADOWFRAME
-    mov r2, rINST
-    bl MterpLogOSR
-#endif
-    b MterpFallback                     @ Let the reference interpreter deal with it.
-/*
  * Bail out to reference interpreter.
  */
 MterpFallback:
diff --git a/runtime/interpreter/mterp/arm/header.S b/runtime/interpreter/mterp/arm/header.S
index 298af8a..b2370bf 100644
--- a/runtime/interpreter/mterp/arm/header.S
+++ b/runtime/interpreter/mterp/arm/header.S
@@ -85,9 +85,6 @@
  */
 #include "asm_support.h"
 
-#define MTERP_PROFILE_BRANCHES 1
-#define MTERP_LOGGING 0
-
 /* During bringup, we'll use the shadow frame model instead of rFP */
 /* single-purpose registers, given names for clarity */
 #define rPC     r4
@@ -112,6 +109,14 @@
 #define OFF_FP_SHADOWFRAME (-SHADOWFRAME_VREGS_OFFSET)
 
 /*
+ *
+ * The reference interpreter performs explicit suspect checks, which is somewhat wasteful.
+ * Dalvik's interpreter folded suspend checks into the jump table mechanism, and eventually
+ * mterp should do so as well.
+ */
+#define MTERP_SUSPEND 0
+
+/*
  * "export" the PC to dex_pc field in the shadow frame, f/b/o future exception objects.  Must
  * be done *before* something throws.
  *
diff --git a/runtime/interpreter/mterp/arm/op_goto.S b/runtime/interpreter/mterp/arm/op_goto.S
index eb1d429..9b3632a 100644
--- a/runtime/interpreter/mterp/arm/op_goto.S
+++ b/runtime/interpreter/mterp/arm/op_goto.S
@@ -6,28 +6,20 @@
      */
     /* goto +AA */
     /* tuning: use sbfx for 6t2+ targets */
-#if MTERP_PROFILE_BRANCHES
+#if MTERP_SUSPEND
     mov     r0, rINST, lsl #16          @ r0<- AAxx0000
-    movs    rINST, r0, asr #24          @ rINST<- ssssssAA (sign-extended)
-    EXPORT_PC
-    mov     r0, rSELF
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    mov     r2, rINST
-    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
-    cmp     r0, #0
-    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    add     r2, rINST, rINST            @ r2<- byte offset, set flags
-    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
+    movs    r1, r0, asr #24             @ r1<- ssssssAA (sign-extended)
+    add     r2, r1, r1                  @ r2<- byte offset, set flags
        @ If backwards branch refresh rIBASE
-    bmi     MterpCheckSuspendAndContinue
+    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET] @ refresh handler base
+    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
-    mov     r0, rINST, lsl #16          @ r0<- AAxx0000
-    movs    rINST, r0, asr #24          @ rINST<- ssssssAA (sign-extended)
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    add     r2, rINST, rINST            @ r2<- byte offset, set flags
+    mov     r0, rINST, lsl #16          @ r0<- AAxx0000
+    movs    r1, r0, asr #24             @ r1<- ssssssAA (sign-extended)
+    add     r2, r1, r1                  @ r2<- byte offset, set flags
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
        @ If backwards branch refresh rIBASE
     bmi     MterpCheckSuspendAndContinue
diff --git a/runtime/interpreter/mterp/arm/op_goto_16.S b/runtime/interpreter/mterp/arm/op_goto_16.S
index 91639ca..2231acd 100644
--- a/runtime/interpreter/mterp/arm/op_goto_16.S
+++ b/runtime/interpreter/mterp/arm/op_goto_16.S
@@ -5,25 +5,17 @@
      * double to get a byte offset.
      */
     /* goto/16 +AAAA */
-#if MTERP_PROFILE_BRANCHES
-    FETCH_S rINST, 1                    @ rINST<- ssssAAAA (sign-extended)
-    EXPORT_PC
-    mov     r0, rSELF
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    mov     r2, rINST
-    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
-    cmp     r0, #0
-    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    adds    r1, rINST, rINST            @ r1<- byte offset, flags set
+#if MTERP_SUSPEND
+    FETCH_S r0, 1                       @ r0<- ssssAAAA (sign-extended)
+    adds    r1, r0, r0                  @ r1<- byte offset, flags set
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    bmi     MterpCheckSuspendAndContinue
+    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET] @ refresh handler base
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
-    FETCH_S rINST, 1                    @ rINST<- ssssAAAA (sign-extended)
+    FETCH_S r0, 1                       @ r0<- ssssAAAA (sign-extended)
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    adds    r1, rINST, rINST            @ r1<- byte offset, flags set
+    adds    r1, r0, r0                  @ r1<- byte offset, flags set
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
diff --git a/runtime/interpreter/mterp/arm/op_goto_32.S b/runtime/interpreter/mterp/arm/op_goto_32.S
index e730b52..6b72ff5 100644
--- a/runtime/interpreter/mterp/arm/op_goto_32.S
+++ b/runtime/interpreter/mterp/arm/op_goto_32.S
@@ -10,29 +10,21 @@
      * offset to byte offset.
      */
     /* goto/32 +AAAAAAAA */
-#if MTERP_PROFILE_BRANCHES
+#if MTERP_SUSPEND
     FETCH r0, 1                         @ r0<- aaaa (lo)
     FETCH r1, 2                         @ r1<- AAAA (hi)
-    orr     rINST, r0, r1, lsl #16      @ rINST<- AAAAaaaa
-    EXPORT_PC
-    mov     r0, rSELF
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    mov     r2, rINST
-    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
-    cmp     r0, #0
-    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    adds    r1, rINST, rINST            @ r1<- byte offset
+    orr     r0, r0, r1, lsl #16         @ r0<- AAAAaaaa
+    adds    r1, r0, r0                  @ r1<- byte offset
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    ble     MterpCheckSuspendAndContinue
+    ldrle   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET] @ refresh handler base
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
     FETCH r0, 1                         @ r0<- aaaa (lo)
     FETCH r1, 2                         @ r1<- AAAA (hi)
-    orr     rINST, r0, r1, lsl #16      @ rINST<- AAAAaaaa
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    adds    r1, rINST, rINST            @ r1<- byte offset
+    orr     r0, r0, r1, lsl #16         @ r0<- AAAAaaaa
+    adds    r1, r0, r0                  @ r1<- byte offset
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
     ble     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
diff --git a/runtime/interpreter/mterp/arm/op_packed_switch.S b/runtime/interpreter/mterp/arm/op_packed_switch.S
index 4c369cb..1e3370e 100644
--- a/runtime/interpreter/mterp/arm/op_packed_switch.S
+++ b/runtime/interpreter/mterp/arm/op_packed_switch.S
@@ -9,7 +9,7 @@
      * for: packed-switch, sparse-switch
      */
     /* op vAA, +BBBB */
-#if MTERP_PROFILE_BRANCHES
+#if MTERP_SUSPEND
     FETCH r0, 1                         @ r0<- bbbb (lo)
     FETCH r1, 2                         @ r1<- BBBB (hi)
     mov     r3, rINST, lsr #8           @ r3<- AA
@@ -17,18 +17,9 @@
     GET_VREG r1, r3                     @ r1<- vAA
     add     r0, rPC, r0, lsl #1         @ r0<- PC + BBBBbbbb*2
     bl      $func                       @ r0<- code-unit branch offset
-    mov     rINST, r0
-    EXPORT_PC
-    mov     r0, rSELF
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    mov     r2, rINST
-    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
-    cmp     r0, #0
-    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    adds    r1, rINST, rINST            @ r1<- byte offset; clear V
+    adds    r1, r0, r0                  @ r1<- byte offset; clear V
+    ldrle   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET] @ refresh handler base
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    ble     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
@@ -39,9 +30,8 @@
     GET_VREG r1, r3                     @ r1<- vAA
     add     r0, rPC, r0, lsl #1         @ r0<- PC + BBBBbbbb*2
     bl      $func                       @ r0<- code-unit branch offset
-    mov     rINST, r0
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    adds    r1, rINST, rINST            @ r1<- byte offset; clear V
+    adds    r1, r0, r0                  @ r1<- byte offset; clear V
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
     ble     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
diff --git a/runtime/interpreter/mterp/arm/zcmp.S b/runtime/interpreter/mterp/arm/zcmp.S
index 800804d..6e9ef55 100644
--- a/runtime/interpreter/mterp/arm/zcmp.S
+++ b/runtime/interpreter/mterp/arm/zcmp.S
@@ -6,37 +6,25 @@
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
-#if MTERP_PROFILE_BRANCHES
+#if MTERP_SUSPEND
     mov     r0, rINST, lsr #8           @ r0<- AA
     GET_VREG r2, r0                     @ r2<- vAA
-    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
     cmp     r2, #0                      @ compare (vA, 0)
-    b${revcmp} .L_${opcode}_not_taken
-    EXPORT_PC
-    mov     r0, rSELF
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    mov     r2, rINST
-    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
-    cmp     r0, #0
-    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
-    adds    r1, rINST, rINST            @ convert to bytes & set flags
+    mov${revcmp} r1, #2                 @ r1<- inst branch dist for not-taken
+    adds    r1, r1, r1                  @ convert to bytes & set flags
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    bmi     MterpCheckSuspendAndContinue
-    GET_INST_OPCODE ip                  @ extract opcode from rINST
-    GOTO_OPCODE ip                      @ jump to next instruction
-.L_${opcode}_not_taken:
-    FETCH_ADVANCE_INST 2                @ update rPC, load rINST
+    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]   @ refresh table base
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
     mov     r0, rINST, lsr #8           @ r0<- AA
     GET_VREG r2, r0                     @ r2<- vAA
-    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     cmp     r2, #0                      @ compare (vA, 0)
-    mov${revcmp} rINST, #2              @ rINST<- inst branch dist for not-taken
-    adds    r1, rINST, rINST            @ convert to bytes & set flags
+    mov${revcmp} r1, #2                 @ r1<- inst branch dist for not-taken
+    adds    r1, r1, r1                  @ convert to bytes & set flags
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
diff --git a/runtime/interpreter/mterp/arm64/bincmp.S b/runtime/interpreter/mterp/arm64/bincmp.S
index ed850fc..ecab2ce 100644
--- a/runtime/interpreter/mterp/arm64/bincmp.S
+++ b/runtime/interpreter/mterp/arm64/bincmp.S
@@ -6,28 +6,17 @@
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
     /* if-cmp vA, vB, +CCCC */
-#if MTERP_PROFILE_BRANCHES
-    lsr     w1, wINST, #12              // w1<- B
+#if MTERP_SUSPEND
+    mov     w1, wINST, lsr #12          // w1<- B
     ubfx    w0, wINST, #8, #4           // w0<- A
     GET_VREG w3, w1                     // w3<- vB
     GET_VREG w2, w0                     // w2<- vA
-    FETCH_S wINST, 1                    // wINST<- branch offset, in code units
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
     cmp     w2, w3                      // compare (vA, vB)
-    b.${condition} .L_${opcode}_taken
-    FETCH_ADVANCE_INST 2                // update rPC, load wINST
-    GET_INST_OPCODE ip                  // extract opcode from wINST
-    GOTO_OPCODE ip                      // jump to next instruction
-.L_${opcode}_taken:
-    EXPORT_PC
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    sbfm    x2, xINST, 0, 31            // Sign extend branch offset
-    bl      MterpProfileBranch          // (self, shadow_frame, offset)
-    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w2, wINST, wINST            // convert to bytes, check sign
+    mov${condition} w1, #2                 // w1<- BYTE branch dist for not-taken
+    adds    w2, w1, w1                  // convert to bytes, check sign
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
-    b.mi     MterpCheckSuspendAndContinue
+    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]  // refresh rIBASE
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 #else
@@ -36,11 +25,11 @@
     GET_VREG w3, w1                     // w3<- vB
     GET_VREG w2, w0                     // w2<- vA
     FETCH_S w1, 1                       // w1<- branch offset, in code units
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
     mov     w0, #2                      // Offset if branch not taken
     cmp     w2, w3                      // compare (vA, vB)
-    csel    wINST, w1, w0, ${condition} // Branch if true, stashing result in callee save reg.
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w2, wINST, wINST            // convert to bytes, check sign
+    csel    w1, w1, w0, ${condition}    // Branch if true
+    adds    w2, w1, w1                  // convert to bytes, check sign
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
     b.mi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
diff --git a/runtime/interpreter/mterp/arm64/footer.S b/runtime/interpreter/mterp/arm64/footer.S
index 867e927..b360539 100644
--- a/runtime/interpreter/mterp/arm64/footer.S
+++ b/runtime/interpreter/mterp/arm64/footer.S
@@ -10,6 +10,7 @@
  * has not yet been thrown.  Just bail out to the reference interpreter to deal with it.
  * TUNING: for consistency, we may want to just go ahead and handle these here.
  */
+#define MTERP_LOGGING 0
 common_errDivideByZero:
     EXPORT_PC
 #if MTERP_LOGGING
@@ -123,19 +124,6 @@
     GOTO_OPCODE ip                      // jump to next instruction
 
 /*
- * On-stack replacement pending.
- * Branch offset in wINST on entry.
- */
-MterpOnStackReplacement:
-#if MTERP_LOGGING
-    mov  x0, xSELF
-    add  x1, xFP, #OFF_FP_SHADOWFRAME
-    sbfm x2, xINST, 0, 31
-    bl MterpLogOSR
-#endif
-    b MterpFallback                     // Let the reference interpreter deal with it.
-
-/*
  * Bail out to reference interpreter.
  */
 MterpFallback:
diff --git a/runtime/interpreter/mterp/arm64/header.S b/runtime/interpreter/mterp/arm64/header.S
index 7223750..351a607 100644
--- a/runtime/interpreter/mterp/arm64/header.S
+++ b/runtime/interpreter/mterp/arm64/header.S
@@ -87,9 +87,6 @@
  */
 #include "asm_support.h"
 
-#define MTERP_PROFILE_BRANCHES 1
-#define MTERP_LOGGING 0
-
 /* During bringup, we'll use the shadow frame model instead of xFP */
 /* single-purpose registers, given names for clarity */
 #define xPC     x20
@@ -117,6 +114,14 @@
 #define OFF_FP_SHADOWFRAME (-SHADOWFRAME_VREGS_OFFSET)
 
 /*
+ *
+ * The reference interpreter performs explicit suspect checks, which is somewhat wasteful.
+ * Dalvik's interpreter folded suspend checks into the jump table mechanism, and eventually
+ * mterp should do so as well.
+ */
+#define MTERP_SUSPEND 0
+
+/*
  * "export" the PC to dex_pc field in the shadow frame, f/b/o future exception objects.  Must
  * be done *before* something throws.
  *
diff --git a/runtime/interpreter/mterp/arm64/op_goto.S b/runtime/interpreter/mterp/arm64/op_goto.S
index 7e2f6a9..db98a45 100644
--- a/runtime/interpreter/mterp/arm64/op_goto.S
+++ b/runtime/interpreter/mterp/arm64/op_goto.S
@@ -6,20 +6,23 @@
      */
     /* goto +AA */
     /* tuning: use sbfx for 6t2+ targets */
-    lsl     w0, wINST, #16              // w0<- AAxx0000
-    asr     wINST, w0, #24              // wINST<- ssssssAA (sign-extended)
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    sbfm    x2, xINST, 0, 31
-    bl      MterpProfileBranch          // (self, shadow_frame, offset)
-    cbnz    w0, MterpOnStackReplacement // Note: offset must be in wINST
-#endif
+#if MTERP_SUSPEND
+    mov     w0, wINST, lsl #16          // w0<- AAxx0000
+    movs    w1, w0, asr #24             // w1<- ssssssAA (sign-extended)
+    add     w2, w1, w1                  // w2<- byte offset, set flags
+       // If backwards branch refresh rIBASE
+    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET] // refresh handler base
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#else
     ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]  // Preload flags for MterpCheckSuspendAndContinue
-    adds    w1, wINST, wINST            // Convert dalvik offset to byte offset, setting flags
+    lsl     w0, wINST, #16              // w0<- AAxx0000
+    asr     w0, w0, #24                 // w0<- ssssssAA (sign-extended)
+    adds    w1, w0, w0                  // Convert dalvik offset to byte offset, setting flags
     FETCH_ADVANCE_INST_RB w1            // load wINST and advance xPC
        // If backwards branch refresh rIBASE
     b.mi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
+#endif
diff --git a/runtime/interpreter/mterp/arm64/op_goto_16.S b/runtime/interpreter/mterp/arm64/op_goto_16.S
index b2b9924..ff66a23 100644
--- a/runtime/interpreter/mterp/arm64/op_goto_16.S
+++ b/runtime/interpreter/mterp/arm64/op_goto_16.S
@@ -5,18 +5,19 @@
      * double to get a byte offset.
      */
     /* goto/16 +AAAA */
-    FETCH_S wINST, 1                    // wINST<- ssssAAAA (sign-extended)
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    sbfm    x2, xINST, 0, 31
-    bl      MterpProfileBranch          // (self, shadow_frame, offset)
-    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
-#endif
+#if MTERP_SUSPEND
+    FETCH_S w0, 1                       // w0<- ssssAAAA (sign-extended)
+    adds    w1, w0, w0                  // w1<- byte offset, flags set
+    FETCH_ADVANCE_INST_RB w1            // update rPC, load rINST
+    ldrmi   xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET] // refresh handler base
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#else
+    FETCH_S w0, 1                       // w0<- ssssAAAA (sign-extended)
     ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w1, wINST, wINST            // w1<- byte offset, flags set
+    adds    w1, w0, w0                  // w1<- byte offset, flags set
     FETCH_ADVANCE_INST_RB w1            // update rPC, load rINST
     b.mi    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from rINST
     GOTO_OPCODE ip                      // jump to next instruction
+#endif
diff --git a/runtime/interpreter/mterp/arm64/op_goto_32.S b/runtime/interpreter/mterp/arm64/op_goto_32.S
index b785857..8a6980e 100644
--- a/runtime/interpreter/mterp/arm64/op_goto_32.S
+++ b/runtime/interpreter/mterp/arm64/op_goto_32.S
@@ -10,20 +10,23 @@
      * offset to byte offset.
      */
     /* goto/32 +AAAAAAAA */
+#if MTERP_SUSPEND
     FETCH w0, 1                         // w0<- aaaa (lo)
     FETCH w1, 2                         // w1<- AAAA (hi)
-    orr     wINST, w0, w1, lsl #16      // wINST<- AAAAaaaa
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    sbfm    x2, xINST, 0, 31
-    bl      MterpProfileBranch          // (self, shadow_frame, offset)
-    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
-#endif
+    orr     w0, w0, w1, lsl #16         // w0<- AAAAaaaa
+    adds    w1, w0, w0                  // w1<- byte offset
+    FETCH_ADVANCE_INST_RB w1            // update rPC, load xINST
+    ldrle   xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET] // refresh handler base
+    GET_INST_OPCODE ip                  // extract opcode from xINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#else
+    FETCH w0, 1                         // w0<- aaaa (lo)
+    FETCH w1, 2                         // w1<- AAAA (hi)
     ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w1, wINST, wINST            // w1<- byte offset
+    orr     w0, w0, w1, lsl #16         // w0<- AAAAaaaa
+    adds    w1, w0, w0                  // w1<- byte offset
     FETCH_ADVANCE_INST_RB w1            // update rPC, load xINST
     b.le    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from xINST
     GOTO_OPCODE ip                      // jump to next instruction
+#endif
diff --git a/runtime/interpreter/mterp/arm64/op_iget.S b/runtime/interpreter/mterp/arm64/op_iget.S
index 88533bd..165c730 100644
--- a/runtime/interpreter/mterp/arm64/op_iget.S
+++ b/runtime/interpreter/mterp/arm64/op_iget.S
@@ -1,4 +1,4 @@
-%default { "extend":"", "is_object":"0", "helper":"artGet32InstanceFromCode"}
+%default { "is_object":"0", "helper":"artGet32InstanceFromCode"}
     /*
      * General instance field get.
      *
@@ -12,7 +12,6 @@
     mov      x3, xSELF                     // w3<- self
     bl       $helper
     ldr      x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
-    $extend
     ubfx     w2, wINST, #8, #4             // w2<- A
     PREFETCH_INST 2
     cbnz     x3, MterpPossibleException    // bail out
diff --git a/runtime/interpreter/mterp/arm64/op_packed_switch.S b/runtime/interpreter/mterp/arm64/op_packed_switch.S
index e8b4f04..f087d23 100644
--- a/runtime/interpreter/mterp/arm64/op_packed_switch.S
+++ b/runtime/interpreter/mterp/arm64/op_packed_switch.S
@@ -9,6 +9,20 @@
      * for: packed-switch, sparse-switch
      */
     /* op vAA, +BBBB */
+#if MTERP_SUSPEND
+    FETCH w0, 1                         // w0<- bbbb (lo)
+    FETCH w1, 2                         // w1<- BBBB (hi)
+    mov     w3, wINST, lsr #8           // w3<- AA
+    orr     w0, w0, w1, lsl #16         // w0<- BBBBbbbb
+    GET_VREG w1, w3                     // w1<- vAA
+    add     w0, rPC, w0, lsl #1         // w0<- PC + BBBBbbbb*2
+    bl      $func                       // w0<- code-unit branch offset
+    adds    w1, w0, w0                  // w1<- byte offset; clear V
+    ldrle   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET] // refresh handler base
+    FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#else
     FETCH w0, 1                         // w0<- bbbb (lo)
     FETCH w1, 2                         // w1<- BBBB (hi)
     lsr     w3, wINST, #8               // w3<- AA
@@ -16,18 +30,10 @@
     GET_VREG w1, w3                     // w1<- vAA
     add     x0, xPC, w0, lsl #1         // w0<- PC + BBBBbbbb*2
     bl      $func                       // w0<- code-unit branch offset
-    sbfm    xINST, x0, 0, 31
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    mov     x2, xINST
-    bl      MterpProfileBranch          // (self, shadow_frame, offset)
-    cbnz    w0, MterpOnStackReplacement
-#endif
     ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w1, wINST, wINST            // w1<- byte offset; clear V
+    adds    w1, w0, w0                  // w1<- byte offset; clear V
     FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
     b.le    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
+#endif
diff --git a/runtime/interpreter/mterp/arm64/zcmp.S b/runtime/interpreter/mterp/arm64/zcmp.S
index e528d9f..d4856d2 100644
--- a/runtime/interpreter/mterp/arm64/zcmp.S
+++ b/runtime/interpreter/mterp/arm64/zcmp.S
@@ -6,37 +6,26 @@
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
-#if MTERP_PROFILE_BRANCHES
-    lsr     w0, wINST, #8               // w0<- AA
+#if MTERP_SUSPEND
+    mov     w0, wINST, lsr #8           // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
-    FETCH_S wINST, 1                    // w1<- branch offset, in code units
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
     cmp     w2, #0                      // compare (vA, 0)
-    b.${condition} .L_${opcode}_taken
-    FETCH_ADVANCE_INST 2                // update rPC, load wINST
-    GET_INST_OPCODE ip                  // extract opcode from wINST
-    GOTO_OPCODE ip                      // jump to next instruction
-.L_${opcode}_taken:
-    EXPORT_PC
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    sbfm    x2, xINST, 0, 31
-    bl      MterpProfileBranch          // (self, shadow_frame, offset)
-    cbnz    w0, MterpOnStackReplacement // Note: offset must be in wINST
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w2, wINST, wINST            // convert to bytes & set flags
-    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
-    b.mi    MterpCheckSuspendAndContinue
+    mov${condition} w1, #2                 // w1<- inst branch dist for not-taken
+    adds    w1, w1, w1                  // convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
+    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]   // refresh table base
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 #else
     lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
     FETCH_S w1, 1                       // w1<- branch offset, in code units
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
     mov     w0, #2                      // Branch offset if not taken
     cmp     w2, #0                      // compare (vA, 0)
-    csel    wINST, w1, w0, ${condition} // Branch if true, stashing result in callee save reg
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w2, wINST, wINST            // convert to bytes & set flags
+    csel    w1, w1, w0, ${condition}    // Branch if true
+    adds    w2, w1, w1                  // convert to bytes & set flags
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
     b.mi    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
diff --git a/runtime/interpreter/mterp/mterp.cc b/runtime/interpreter/mterp/mterp.cc
index 3feea50..0afd276 100644
--- a/runtime/interpreter/mterp/mterp.cc
+++ b/runtime/interpreter/mterp/mterp.cc
@@ -20,7 +20,6 @@
 #include "interpreter/interpreter_common.h"
 #include "entrypoints/entrypoint_utils-inl.h"
 #include "mterp.h"
-#include "jit/jit.h"
 
 namespace art {
 namespace interpreter {
@@ -489,14 +488,6 @@
             << self->IsExceptionPending();
 }
 
-extern "C" void MterpLogOSR(Thread* self, ShadowFrame* shadow_frame, int32_t offset)
-  SHARED_REQUIRES(Locks::mutator_lock_) {
-  UNUSED(self);
-  const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr());
-  uint16_t inst_data = inst->Fetch16(0);
-  LOG(INFO) << "OSR: " << inst->Opcode(inst_data) << ", offset = " << offset;
-}
-
 extern "C" void MterpLogSuspendFallback(Thread* self, ShadowFrame* shadow_frame, uint32_t flags)
   SHARED_REQUIRES(Locks::mutator_lock_) {
   UNUSED(self);
@@ -627,15 +618,5 @@
   return obj->GetFieldObject<mirror::Object>(MemberOffset(field_offset));
 }
 
-extern "C" bool  MterpProfileBranch(Thread* self, ShadowFrame* shadow_frame, int32_t offset)
-  SHARED_REQUIRES(Locks::mutator_lock_) {
-  ArtMethod* method = shadow_frame->GetMethod();
-  uint32_t dex_pc = shadow_frame->GetDexPC();
-  const auto* const instrumentation = Runtime::Current()->GetInstrumentation();
-  instrumentation->Branch(self, method, dex_pc, offset);
-  JValue result;
-  return jit::Jit::MaybeDoOnStackReplacement(self, method, dex_pc, offset, &result);
-}
-
 }  // namespace interpreter
 }  // namespace art
diff --git a/runtime/interpreter/mterp/out/mterp_arm.S b/runtime/interpreter/mterp/out/mterp_arm.S
index b08427d..ee19559 100644
--- a/runtime/interpreter/mterp/out/mterp_arm.S
+++ b/runtime/interpreter/mterp/out/mterp_arm.S
@@ -92,9 +92,6 @@
  */
 #include "asm_support.h"
 
-#define MTERP_PROFILE_BRANCHES 1
-#define MTERP_LOGGING 0
-
 /* During bringup, we'll use the shadow frame model instead of rFP */
 /* single-purpose registers, given names for clarity */
 #define rPC     r4
@@ -119,6 +116,14 @@
 #define OFF_FP_SHADOWFRAME (-SHADOWFRAME_VREGS_OFFSET)
 
 /*
+ *
+ * The reference interpreter performs explicit suspect checks, which is somewhat wasteful.
+ * Dalvik's interpreter folded suspend checks into the jump table mechanism, and eventually
+ * mterp should do so as well.
+ */
+#define MTERP_SUSPEND 0
+
+/*
  * "export" the PC to dex_pc field in the shadow frame, f/b/o future exception objects.  Must
  * be done *before* something throws.
  *
@@ -1106,28 +1111,20 @@
      */
     /* goto +AA */
     /* tuning: use sbfx for 6t2+ targets */
-#if MTERP_PROFILE_BRANCHES
+#if MTERP_SUSPEND
     mov     r0, rINST, lsl #16          @ r0<- AAxx0000
-    movs    rINST, r0, asr #24          @ rINST<- ssssssAA (sign-extended)
-    EXPORT_PC
-    mov     r0, rSELF
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    mov     r2, rINST
-    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
-    cmp     r0, #0
-    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    add     r2, rINST, rINST            @ r2<- byte offset, set flags
-    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
+    movs    r1, r0, asr #24             @ r1<- ssssssAA (sign-extended)
+    add     r2, r1, r1                  @ r2<- byte offset, set flags
        @ If backwards branch refresh rIBASE
-    bmi     MterpCheckSuspendAndContinue
+    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET] @ refresh handler base
+    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
-    mov     r0, rINST, lsl #16          @ r0<- AAxx0000
-    movs    rINST, r0, asr #24          @ rINST<- ssssssAA (sign-extended)
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    add     r2, rINST, rINST            @ r2<- byte offset, set flags
+    mov     r0, rINST, lsl #16          @ r0<- AAxx0000
+    movs    r1, r0, asr #24             @ r1<- ssssssAA (sign-extended)
+    add     r2, r1, r1                  @ r2<- byte offset, set flags
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
        @ If backwards branch refresh rIBASE
     bmi     MterpCheckSuspendAndContinue
@@ -1146,25 +1143,17 @@
      * double to get a byte offset.
      */
     /* goto/16 +AAAA */
-#if MTERP_PROFILE_BRANCHES
-    FETCH_S rINST, 1                    @ rINST<- ssssAAAA (sign-extended)
-    EXPORT_PC
-    mov     r0, rSELF
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    mov     r2, rINST
-    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
-    cmp     r0, #0
-    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    adds    r1, rINST, rINST            @ r1<- byte offset, flags set
+#if MTERP_SUSPEND
+    FETCH_S r0, 1                       @ r0<- ssssAAAA (sign-extended)
+    adds    r1, r0, r0                  @ r1<- byte offset, flags set
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    bmi     MterpCheckSuspendAndContinue
+    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET] @ refresh handler base
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
-    FETCH_S rINST, 1                    @ rINST<- ssssAAAA (sign-extended)
+    FETCH_S r0, 1                       @ r0<- ssssAAAA (sign-extended)
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    adds    r1, rINST, rINST            @ r1<- byte offset, flags set
+    adds    r1, r0, r0                  @ r1<- byte offset, flags set
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -1187,29 +1176,21 @@
      * offset to byte offset.
      */
     /* goto/32 +AAAAAAAA */
-#if MTERP_PROFILE_BRANCHES
+#if MTERP_SUSPEND
     FETCH r0, 1                         @ r0<- aaaa (lo)
     FETCH r1, 2                         @ r1<- AAAA (hi)
-    orr     rINST, r0, r1, lsl #16      @ rINST<- AAAAaaaa
-    EXPORT_PC
-    mov     r0, rSELF
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    mov     r2, rINST
-    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
-    cmp     r0, #0
-    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    adds    r1, rINST, rINST            @ r1<- byte offset
+    orr     r0, r0, r1, lsl #16         @ r0<- AAAAaaaa
+    adds    r1, r0, r0                  @ r1<- byte offset
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    ble     MterpCheckSuspendAndContinue
+    ldrle   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET] @ refresh handler base
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
     FETCH r0, 1                         @ r0<- aaaa (lo)
     FETCH r1, 2                         @ r1<- AAAA (hi)
-    orr     rINST, r0, r1, lsl #16      @ rINST<- AAAAaaaa
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    adds    r1, rINST, rINST            @ r1<- byte offset
+    orr     r0, r0, r1, lsl #16         @ r0<- AAAAaaaa
+    adds    r1, r0, r0                  @ r1<- byte offset
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
     ble     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -1230,7 +1211,7 @@
      * for: packed-switch, sparse-switch
      */
     /* op vAA, +BBBB */
-#if MTERP_PROFILE_BRANCHES
+#if MTERP_SUSPEND
     FETCH r0, 1                         @ r0<- bbbb (lo)
     FETCH r1, 2                         @ r1<- BBBB (hi)
     mov     r3, rINST, lsr #8           @ r3<- AA
@@ -1238,18 +1219,9 @@
     GET_VREG r1, r3                     @ r1<- vAA
     add     r0, rPC, r0, lsl #1         @ r0<- PC + BBBBbbbb*2
     bl      MterpDoPackedSwitch                       @ r0<- code-unit branch offset
-    mov     rINST, r0
-    EXPORT_PC
-    mov     r0, rSELF
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    mov     r2, rINST
-    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
-    cmp     r0, #0
-    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    adds    r1, rINST, rINST            @ r1<- byte offset; clear V
+    adds    r1, r0, r0                  @ r1<- byte offset; clear V
+    ldrle   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET] @ refresh handler base
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    ble     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
@@ -1260,9 +1232,8 @@
     GET_VREG r1, r3                     @ r1<- vAA
     add     r0, rPC, r0, lsl #1         @ r0<- PC + BBBBbbbb*2
     bl      MterpDoPackedSwitch                       @ r0<- code-unit branch offset
-    mov     rINST, r0
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    adds    r1, rINST, rINST            @ r1<- byte offset; clear V
+    adds    r1, r0, r0                  @ r1<- byte offset; clear V
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
     ble     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -1284,7 +1255,7 @@
      * for: packed-switch, sparse-switch
      */
     /* op vAA, +BBBB */
-#if MTERP_PROFILE_BRANCHES
+#if MTERP_SUSPEND
     FETCH r0, 1                         @ r0<- bbbb (lo)
     FETCH r1, 2                         @ r1<- BBBB (hi)
     mov     r3, rINST, lsr #8           @ r3<- AA
@@ -1292,18 +1263,9 @@
     GET_VREG r1, r3                     @ r1<- vAA
     add     r0, rPC, r0, lsl #1         @ r0<- PC + BBBBbbbb*2
     bl      MterpDoSparseSwitch                       @ r0<- code-unit branch offset
-    mov     rINST, r0
-    EXPORT_PC
-    mov     r0, rSELF
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    mov     r2, rINST
-    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
-    cmp     r0, #0
-    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    adds    r1, rINST, rINST            @ r1<- byte offset; clear V
+    adds    r1, r0, r0                  @ r1<- byte offset; clear V
+    ldrle   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET] @ refresh handler base
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    ble     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
@@ -1314,9 +1276,8 @@
     GET_VREG r1, r3                     @ r1<- vAA
     add     r0, rPC, r0, lsl #1         @ r0<- PC + BBBBbbbb*2
     bl      MterpDoSparseSwitch                       @ r0<- code-unit branch offset
-    mov     rINST, r0
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    adds    r1, rINST, rINST            @ r1<- byte offset; clear V
+    adds    r1, r0, r0                  @ r1<- byte offset; clear V
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
     ble     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -1534,29 +1495,17 @@
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
     /* if-cmp vA, vB, +CCCC */
-#if MTERP_PROFILE_BRANCHES
+#if MTERP_SUSPEND
     mov     r1, rINST, lsr #12          @ r1<- B
     ubfx    r0, rINST, #8, #4           @ r0<- A
     GET_VREG r3, r1                     @ r3<- vB
     GET_VREG r2, r0                     @ r2<- vA
-    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
     cmp     r2, r3                      @ compare (vA, vB)
-    bne .L_op_if_eq_not_taken
-    EXPORT_PC
-    mov     r0, rSELF
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    mov     r2, rINST
-    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
-    cmp     r0, #0
-    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
-    adds    r2, rINST, rINST            @ convert to bytes, check sign
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    movne r1, #2                 @ r1<- BYTE branch dist for not-taken
+    adds    r2, r1, r1                  @ convert to bytes, check sign
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
-    bmi     MterpCheckSuspendAndContinue
-    GET_INST_OPCODE ip                  @ extract opcode from rINST
-    GOTO_OPCODE ip                      @ jump to next instruction
-.L_op_if_eq_not_taken:
-    FETCH_ADVANCE_INST 2                @ update rPC, load rINST
+    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]  @ refresh rIBASE
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
@@ -1565,10 +1514,10 @@
     GET_VREG r3, r1                     @ r3<- vB
     GET_VREG r2, r0                     @ r2<- vA
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
     cmp     r2, r3                      @ compare (vA, vB)
-    movne rINST, #2              @ rINST<- BYTE branch dist for not-taken
-    adds    r2, rINST, rINST            @ convert to bytes, check sign
+    movne r1, #2                 @ r1<- BYTE branch dist for not-taken
+    adds    r2, r1, r1                  @ convert to bytes, check sign
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -1589,29 +1538,17 @@
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
     /* if-cmp vA, vB, +CCCC */
-#if MTERP_PROFILE_BRANCHES
+#if MTERP_SUSPEND
     mov     r1, rINST, lsr #12          @ r1<- B
     ubfx    r0, rINST, #8, #4           @ r0<- A
     GET_VREG r3, r1                     @ r3<- vB
     GET_VREG r2, r0                     @ r2<- vA
-    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
     cmp     r2, r3                      @ compare (vA, vB)
-    beq .L_op_if_ne_not_taken
-    EXPORT_PC
-    mov     r0, rSELF
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    mov     r2, rINST
-    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
-    cmp     r0, #0
-    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
-    adds    r2, rINST, rINST            @ convert to bytes, check sign
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    moveq r1, #2                 @ r1<- BYTE branch dist for not-taken
+    adds    r2, r1, r1                  @ convert to bytes, check sign
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
-    bmi     MterpCheckSuspendAndContinue
-    GET_INST_OPCODE ip                  @ extract opcode from rINST
-    GOTO_OPCODE ip                      @ jump to next instruction
-.L_op_if_ne_not_taken:
-    FETCH_ADVANCE_INST 2                @ update rPC, load rINST
+    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]  @ refresh rIBASE
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
@@ -1620,10 +1557,10 @@
     GET_VREG r3, r1                     @ r3<- vB
     GET_VREG r2, r0                     @ r2<- vA
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
     cmp     r2, r3                      @ compare (vA, vB)
-    moveq rINST, #2              @ rINST<- BYTE branch dist for not-taken
-    adds    r2, rINST, rINST            @ convert to bytes, check sign
+    moveq r1, #2                 @ r1<- BYTE branch dist for not-taken
+    adds    r2, r1, r1                  @ convert to bytes, check sign
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -1644,29 +1581,17 @@
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
     /* if-cmp vA, vB, +CCCC */
-#if MTERP_PROFILE_BRANCHES
+#if MTERP_SUSPEND
     mov     r1, rINST, lsr #12          @ r1<- B
     ubfx    r0, rINST, #8, #4           @ r0<- A
     GET_VREG r3, r1                     @ r3<- vB
     GET_VREG r2, r0                     @ r2<- vA
-    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
     cmp     r2, r3                      @ compare (vA, vB)
-    bge .L_op_if_lt_not_taken
-    EXPORT_PC
-    mov     r0, rSELF
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    mov     r2, rINST
-    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
-    cmp     r0, #0
-    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
-    adds    r2, rINST, rINST            @ convert to bytes, check sign
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    movge r1, #2                 @ r1<- BYTE branch dist for not-taken
+    adds    r2, r1, r1                  @ convert to bytes, check sign
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
-    bmi     MterpCheckSuspendAndContinue
-    GET_INST_OPCODE ip                  @ extract opcode from rINST
-    GOTO_OPCODE ip                      @ jump to next instruction
-.L_op_if_lt_not_taken:
-    FETCH_ADVANCE_INST 2                @ update rPC, load rINST
+    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]  @ refresh rIBASE
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
@@ -1675,10 +1600,10 @@
     GET_VREG r3, r1                     @ r3<- vB
     GET_VREG r2, r0                     @ r2<- vA
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
     cmp     r2, r3                      @ compare (vA, vB)
-    movge rINST, #2              @ rINST<- BYTE branch dist for not-taken
-    adds    r2, rINST, rINST            @ convert to bytes, check sign
+    movge r1, #2                 @ r1<- BYTE branch dist for not-taken
+    adds    r2, r1, r1                  @ convert to bytes, check sign
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -1699,29 +1624,17 @@
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
     /* if-cmp vA, vB, +CCCC */
-#if MTERP_PROFILE_BRANCHES
+#if MTERP_SUSPEND
     mov     r1, rINST, lsr #12          @ r1<- B
     ubfx    r0, rINST, #8, #4           @ r0<- A
     GET_VREG r3, r1                     @ r3<- vB
     GET_VREG r2, r0                     @ r2<- vA
-    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
     cmp     r2, r3                      @ compare (vA, vB)
-    blt .L_op_if_ge_not_taken
-    EXPORT_PC
-    mov     r0, rSELF
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    mov     r2, rINST
-    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
-    cmp     r0, #0
-    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
-    adds    r2, rINST, rINST            @ convert to bytes, check sign
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    movlt r1, #2                 @ r1<- BYTE branch dist for not-taken
+    adds    r2, r1, r1                  @ convert to bytes, check sign
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
-    bmi     MterpCheckSuspendAndContinue
-    GET_INST_OPCODE ip                  @ extract opcode from rINST
-    GOTO_OPCODE ip                      @ jump to next instruction
-.L_op_if_ge_not_taken:
-    FETCH_ADVANCE_INST 2                @ update rPC, load rINST
+    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]  @ refresh rIBASE
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
@@ -1730,10 +1643,10 @@
     GET_VREG r3, r1                     @ r3<- vB
     GET_VREG r2, r0                     @ r2<- vA
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
     cmp     r2, r3                      @ compare (vA, vB)
-    movlt rINST, #2              @ rINST<- BYTE branch dist for not-taken
-    adds    r2, rINST, rINST            @ convert to bytes, check sign
+    movlt r1, #2                 @ r1<- BYTE branch dist for not-taken
+    adds    r2, r1, r1                  @ convert to bytes, check sign
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -1754,29 +1667,17 @@
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
     /* if-cmp vA, vB, +CCCC */
-#if MTERP_PROFILE_BRANCHES
+#if MTERP_SUSPEND
     mov     r1, rINST, lsr #12          @ r1<- B
     ubfx    r0, rINST, #8, #4           @ r0<- A
     GET_VREG r3, r1                     @ r3<- vB
     GET_VREG r2, r0                     @ r2<- vA
-    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
     cmp     r2, r3                      @ compare (vA, vB)
-    ble .L_op_if_gt_not_taken
-    EXPORT_PC
-    mov     r0, rSELF
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    mov     r2, rINST
-    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
-    cmp     r0, #0
-    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
-    adds    r2, rINST, rINST            @ convert to bytes, check sign
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    movle r1, #2                 @ r1<- BYTE branch dist for not-taken
+    adds    r2, r1, r1                  @ convert to bytes, check sign
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
-    bmi     MterpCheckSuspendAndContinue
-    GET_INST_OPCODE ip                  @ extract opcode from rINST
-    GOTO_OPCODE ip                      @ jump to next instruction
-.L_op_if_gt_not_taken:
-    FETCH_ADVANCE_INST 2                @ update rPC, load rINST
+    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]  @ refresh rIBASE
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
@@ -1785,10 +1686,10 @@
     GET_VREG r3, r1                     @ r3<- vB
     GET_VREG r2, r0                     @ r2<- vA
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
     cmp     r2, r3                      @ compare (vA, vB)
-    movle rINST, #2              @ rINST<- BYTE branch dist for not-taken
-    adds    r2, rINST, rINST            @ convert to bytes, check sign
+    movle r1, #2                 @ r1<- BYTE branch dist for not-taken
+    adds    r2, r1, r1                  @ convert to bytes, check sign
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -1809,29 +1710,17 @@
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
     /* if-cmp vA, vB, +CCCC */
-#if MTERP_PROFILE_BRANCHES
+#if MTERP_SUSPEND
     mov     r1, rINST, lsr #12          @ r1<- B
     ubfx    r0, rINST, #8, #4           @ r0<- A
     GET_VREG r3, r1                     @ r3<- vB
     GET_VREG r2, r0                     @ r2<- vA
-    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
     cmp     r2, r3                      @ compare (vA, vB)
-    bgt .L_op_if_le_not_taken
-    EXPORT_PC
-    mov     r0, rSELF
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    mov     r2, rINST
-    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
-    cmp     r0, #0
-    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
-    adds    r2, rINST, rINST            @ convert to bytes, check sign
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    movgt r1, #2                 @ r1<- BYTE branch dist for not-taken
+    adds    r2, r1, r1                  @ convert to bytes, check sign
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
-    bmi     MterpCheckSuspendAndContinue
-    GET_INST_OPCODE ip                  @ extract opcode from rINST
-    GOTO_OPCODE ip                      @ jump to next instruction
-.L_op_if_le_not_taken:
-    FETCH_ADVANCE_INST 2                @ update rPC, load rINST
+    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]  @ refresh rIBASE
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
@@ -1840,10 +1729,10 @@
     GET_VREG r3, r1                     @ r3<- vB
     GET_VREG r2, r0                     @ r2<- vA
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
     cmp     r2, r3                      @ compare (vA, vB)
-    movgt rINST, #2              @ rINST<- BYTE branch dist for not-taken
-    adds    r2, rINST, rINST            @ convert to bytes, check sign
+    movgt r1, #2                 @ r1<- BYTE branch dist for not-taken
+    adds    r2, r1, r1                  @ convert to bytes, check sign
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -1864,37 +1753,25 @@
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
-#if MTERP_PROFILE_BRANCHES
+#if MTERP_SUSPEND
     mov     r0, rINST, lsr #8           @ r0<- AA
     GET_VREG r2, r0                     @ r2<- vAA
-    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
     cmp     r2, #0                      @ compare (vA, 0)
-    bne .L_op_if_eqz_not_taken
-    EXPORT_PC
-    mov     r0, rSELF
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    mov     r2, rINST
-    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
-    cmp     r0, #0
-    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
-    adds    r1, rINST, rINST            @ convert to bytes & set flags
+    movne r1, #2                 @ r1<- inst branch dist for not-taken
+    adds    r1, r1, r1                  @ convert to bytes & set flags
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    bmi     MterpCheckSuspendAndContinue
-    GET_INST_OPCODE ip                  @ extract opcode from rINST
-    GOTO_OPCODE ip                      @ jump to next instruction
-.L_op_if_eqz_not_taken:
-    FETCH_ADVANCE_INST 2                @ update rPC, load rINST
+    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]   @ refresh table base
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
     mov     r0, rINST, lsr #8           @ r0<- AA
     GET_VREG r2, r0                     @ r2<- vAA
-    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     cmp     r2, #0                      @ compare (vA, 0)
-    movne rINST, #2              @ rINST<- inst branch dist for not-taken
-    adds    r1, rINST, rINST            @ convert to bytes & set flags
+    movne r1, #2                 @ r1<- inst branch dist for not-taken
+    adds    r1, r1, r1                  @ convert to bytes & set flags
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -1915,37 +1792,25 @@
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
-#if MTERP_PROFILE_BRANCHES
+#if MTERP_SUSPEND
     mov     r0, rINST, lsr #8           @ r0<- AA
     GET_VREG r2, r0                     @ r2<- vAA
-    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
     cmp     r2, #0                      @ compare (vA, 0)
-    beq .L_op_if_nez_not_taken
-    EXPORT_PC
-    mov     r0, rSELF
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    mov     r2, rINST
-    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
-    cmp     r0, #0
-    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
-    adds    r1, rINST, rINST            @ convert to bytes & set flags
+    moveq r1, #2                 @ r1<- inst branch dist for not-taken
+    adds    r1, r1, r1                  @ convert to bytes & set flags
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    bmi     MterpCheckSuspendAndContinue
-    GET_INST_OPCODE ip                  @ extract opcode from rINST
-    GOTO_OPCODE ip                      @ jump to next instruction
-.L_op_if_nez_not_taken:
-    FETCH_ADVANCE_INST 2                @ update rPC, load rINST
+    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]   @ refresh table base
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
     mov     r0, rINST, lsr #8           @ r0<- AA
     GET_VREG r2, r0                     @ r2<- vAA
-    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     cmp     r2, #0                      @ compare (vA, 0)
-    moveq rINST, #2              @ rINST<- inst branch dist for not-taken
-    adds    r1, rINST, rINST            @ convert to bytes & set flags
+    moveq r1, #2                 @ r1<- inst branch dist for not-taken
+    adds    r1, r1, r1                  @ convert to bytes & set flags
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -1966,37 +1831,25 @@
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
-#if MTERP_PROFILE_BRANCHES
+#if MTERP_SUSPEND
     mov     r0, rINST, lsr #8           @ r0<- AA
     GET_VREG r2, r0                     @ r2<- vAA
-    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
     cmp     r2, #0                      @ compare (vA, 0)
-    bge .L_op_if_ltz_not_taken
-    EXPORT_PC
-    mov     r0, rSELF
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    mov     r2, rINST
-    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
-    cmp     r0, #0
-    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
-    adds    r1, rINST, rINST            @ convert to bytes & set flags
+    movge r1, #2                 @ r1<- inst branch dist for not-taken
+    adds    r1, r1, r1                  @ convert to bytes & set flags
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    bmi     MterpCheckSuspendAndContinue
-    GET_INST_OPCODE ip                  @ extract opcode from rINST
-    GOTO_OPCODE ip                      @ jump to next instruction
-.L_op_if_ltz_not_taken:
-    FETCH_ADVANCE_INST 2                @ update rPC, load rINST
+    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]   @ refresh table base
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
     mov     r0, rINST, lsr #8           @ r0<- AA
     GET_VREG r2, r0                     @ r2<- vAA
-    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     cmp     r2, #0                      @ compare (vA, 0)
-    movge rINST, #2              @ rINST<- inst branch dist for not-taken
-    adds    r1, rINST, rINST            @ convert to bytes & set flags
+    movge r1, #2                 @ r1<- inst branch dist for not-taken
+    adds    r1, r1, r1                  @ convert to bytes & set flags
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -2017,37 +1870,25 @@
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
-#if MTERP_PROFILE_BRANCHES
+#if MTERP_SUSPEND
     mov     r0, rINST, lsr #8           @ r0<- AA
     GET_VREG r2, r0                     @ r2<- vAA
-    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
     cmp     r2, #0                      @ compare (vA, 0)
-    blt .L_op_if_gez_not_taken
-    EXPORT_PC
-    mov     r0, rSELF
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    mov     r2, rINST
-    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
-    cmp     r0, #0
-    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
-    adds    r1, rINST, rINST            @ convert to bytes & set flags
+    movlt r1, #2                 @ r1<- inst branch dist for not-taken
+    adds    r1, r1, r1                  @ convert to bytes & set flags
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    bmi     MterpCheckSuspendAndContinue
-    GET_INST_OPCODE ip                  @ extract opcode from rINST
-    GOTO_OPCODE ip                      @ jump to next instruction
-.L_op_if_gez_not_taken:
-    FETCH_ADVANCE_INST 2                @ update rPC, load rINST
+    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]   @ refresh table base
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
     mov     r0, rINST, lsr #8           @ r0<- AA
     GET_VREG r2, r0                     @ r2<- vAA
-    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     cmp     r2, #0                      @ compare (vA, 0)
-    movlt rINST, #2              @ rINST<- inst branch dist for not-taken
-    adds    r1, rINST, rINST            @ convert to bytes & set flags
+    movlt r1, #2                 @ r1<- inst branch dist for not-taken
+    adds    r1, r1, r1                  @ convert to bytes & set flags
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -2068,37 +1909,25 @@
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
-#if MTERP_PROFILE_BRANCHES
+#if MTERP_SUSPEND
     mov     r0, rINST, lsr #8           @ r0<- AA
     GET_VREG r2, r0                     @ r2<- vAA
-    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
     cmp     r2, #0                      @ compare (vA, 0)
-    ble .L_op_if_gtz_not_taken
-    EXPORT_PC
-    mov     r0, rSELF
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    mov     r2, rINST
-    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
-    cmp     r0, #0
-    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
-    adds    r1, rINST, rINST            @ convert to bytes & set flags
+    movle r1, #2                 @ r1<- inst branch dist for not-taken
+    adds    r1, r1, r1                  @ convert to bytes & set flags
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    bmi     MterpCheckSuspendAndContinue
-    GET_INST_OPCODE ip                  @ extract opcode from rINST
-    GOTO_OPCODE ip                      @ jump to next instruction
-.L_op_if_gtz_not_taken:
-    FETCH_ADVANCE_INST 2                @ update rPC, load rINST
+    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]   @ refresh table base
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
     mov     r0, rINST, lsr #8           @ r0<- AA
     GET_VREG r2, r0                     @ r2<- vAA
-    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     cmp     r2, #0                      @ compare (vA, 0)
-    movle rINST, #2              @ rINST<- inst branch dist for not-taken
-    adds    r1, rINST, rINST            @ convert to bytes & set flags
+    movle r1, #2                 @ r1<- inst branch dist for not-taken
+    adds    r1, r1, r1                  @ convert to bytes & set flags
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -2119,37 +1948,25 @@
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
-#if MTERP_PROFILE_BRANCHES
+#if MTERP_SUSPEND
     mov     r0, rINST, lsr #8           @ r0<- AA
     GET_VREG r2, r0                     @ r2<- vAA
-    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
     cmp     r2, #0                      @ compare (vA, 0)
-    bgt .L_op_if_lez_not_taken
-    EXPORT_PC
-    mov     r0, rSELF
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    mov     r2, rINST
-    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
-    cmp     r0, #0
-    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
-    adds    r1, rINST, rINST            @ convert to bytes & set flags
+    movgt r1, #2                 @ r1<- inst branch dist for not-taken
+    adds    r1, r1, r1                  @ convert to bytes & set flags
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
-    bmi     MterpCheckSuspendAndContinue
-    GET_INST_OPCODE ip                  @ extract opcode from rINST
-    GOTO_OPCODE ip                      @ jump to next instruction
-.L_op_if_lez_not_taken:
-    FETCH_ADVANCE_INST 2                @ update rPC, load rINST
+    ldrmi   rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]   @ refresh table base
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 #else
     mov     r0, rINST, lsr #8           @ r0<- AA
     GET_VREG r2, r0                     @ r2<- vAA
-    FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
+    FETCH_S r1, 1                       @ r1<- branch offset, in code units
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     cmp     r2, #0                      @ compare (vA, 0)
-    movgt rINST, #2              @ rINST<- inst branch dist for not-taken
-    adds    r1, rINST, rINST            @ convert to bytes & set flags
+    movgt r1, #2                 @ r1<- inst branch dist for not-taken
+    adds    r1, r1, r1                  @ convert to bytes & set flags
     FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
     bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -12281,6 +12098,7 @@
  * has not yet been thrown.  Just bail out to the reference interpreter to deal with it.
  * TUNING: for consistency, we may want to just go ahead and handle these here.
  */
+#define MTERP_LOGGING 0
 common_errDivideByZero:
     EXPORT_PC
 #if MTERP_LOGGING
@@ -12392,18 +12210,6 @@
     GOTO_OPCODE ip                      @ jump to next instruction
 
 /*
- * On-stack replacement pending.
- * Branch offset in rINST on entry.
- */
-MterpOnStackReplacement:
-#if MTERP_LOGGING
-    mov r0, rSELF
-    add r1, rFP, #OFF_FP_SHADOWFRAME
-    mov r2, rINST
-    bl MterpLogOSR
-#endif
-    b MterpFallback                     @ Let the reference interpreter deal with it.
-/*
  * Bail out to reference interpreter.
  */
 MterpFallback:
diff --git a/runtime/interpreter/mterp/out/mterp_arm64.S b/runtime/interpreter/mterp/out/mterp_arm64.S
index 66598cc..e9d28ab 100644
--- a/runtime/interpreter/mterp/out/mterp_arm64.S
+++ b/runtime/interpreter/mterp/out/mterp_arm64.S
@@ -94,9 +94,6 @@
  */
 #include "asm_support.h"
 
-#define MTERP_PROFILE_BRANCHES 1
-#define MTERP_LOGGING 0
-
 /* During bringup, we'll use the shadow frame model instead of xFP */
 /* single-purpose registers, given names for clarity */
 #define xPC     x20
@@ -124,6 +121,14 @@
 #define OFF_FP_SHADOWFRAME (-SHADOWFRAME_VREGS_OFFSET)
 
 /*
+ *
+ * The reference interpreter performs explicit suspect checks, which is somewhat wasteful.
+ * Dalvik's interpreter folded suspend checks into the jump table mechanism, and eventually
+ * mterp should do so as well.
+ */
+#define MTERP_SUSPEND 0
+
+/*
  * "export" the PC to dex_pc field in the shadow frame, f/b/o future exception objects.  Must
  * be done *before* something throws.
  *
@@ -1082,23 +1087,26 @@
      */
     /* goto +AA */
     /* tuning: use sbfx for 6t2+ targets */
-    lsl     w0, wINST, #16              // w0<- AAxx0000
-    asr     wINST, w0, #24              // wINST<- ssssssAA (sign-extended)
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    sbfm    x2, xINST, 0, 31
-    bl      MterpProfileBranch          // (self, shadow_frame, offset)
-    cbnz    w0, MterpOnStackReplacement // Note: offset must be in wINST
-#endif
+#if MTERP_SUSPEND
+    mov     w0, wINST, lsl #16          // w0<- AAxx0000
+    movs    w1, w0, asr #24             // w1<- ssssssAA (sign-extended)
+    add     w2, w1, w1                  // w2<- byte offset, set flags
+       // If backwards branch refresh rIBASE
+    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET] // refresh handler base
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#else
     ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]  // Preload flags for MterpCheckSuspendAndContinue
-    adds    w1, wINST, wINST            // Convert dalvik offset to byte offset, setting flags
+    lsl     w0, wINST, #16              // w0<- AAxx0000
+    asr     w0, w0, #24                 // w0<- ssssssAA (sign-extended)
+    adds    w1, w0, w0                  // Convert dalvik offset to byte offset, setting flags
     FETCH_ADVANCE_INST_RB w1            // load wINST and advance xPC
        // If backwards branch refresh rIBASE
     b.mi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
+#endif
 
 /* ------------------------------ */
     .balign 128
@@ -1111,21 +1119,22 @@
      * double to get a byte offset.
      */
     /* goto/16 +AAAA */
-    FETCH_S wINST, 1                    // wINST<- ssssAAAA (sign-extended)
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    sbfm    x2, xINST, 0, 31
-    bl      MterpProfileBranch          // (self, shadow_frame, offset)
-    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
-#endif
+#if MTERP_SUSPEND
+    FETCH_S w0, 1                       // w0<- ssssAAAA (sign-extended)
+    adds    w1, w0, w0                  // w1<- byte offset, flags set
+    FETCH_ADVANCE_INST_RB w1            // update rPC, load rINST
+    ldrmi   xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET] // refresh handler base
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#else
+    FETCH_S w0, 1                       // w0<- ssssAAAA (sign-extended)
     ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w1, wINST, wINST            // w1<- byte offset, flags set
+    adds    w1, w0, w0                  // w1<- byte offset, flags set
     FETCH_ADVANCE_INST_RB w1            // update rPC, load rINST
     b.mi    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from rINST
     GOTO_OPCODE ip                      // jump to next instruction
+#endif
 
 /* ------------------------------ */
     .balign 128
@@ -1143,23 +1152,26 @@
      * offset to byte offset.
      */
     /* goto/32 +AAAAAAAA */
+#if MTERP_SUSPEND
     FETCH w0, 1                         // w0<- aaaa (lo)
     FETCH w1, 2                         // w1<- AAAA (hi)
-    orr     wINST, w0, w1, lsl #16      // wINST<- AAAAaaaa
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    sbfm    x2, xINST, 0, 31
-    bl      MterpProfileBranch          // (self, shadow_frame, offset)
-    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
-#endif
+    orr     w0, w0, w1, lsl #16         // w0<- AAAAaaaa
+    adds    w1, w0, w0                  // w1<- byte offset
+    FETCH_ADVANCE_INST_RB w1            // update rPC, load xINST
+    ldrle   xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET] // refresh handler base
+    GET_INST_OPCODE ip                  // extract opcode from xINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#else
+    FETCH w0, 1                         // w0<- aaaa (lo)
+    FETCH w1, 2                         // w1<- AAAA (hi)
     ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w1, wINST, wINST            // w1<- byte offset
+    orr     w0, w0, w1, lsl #16         // w0<- AAAAaaaa
+    adds    w1, w0, w0                  // w1<- byte offset
     FETCH_ADVANCE_INST_RB w1            // update rPC, load xINST
     b.le    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from xINST
     GOTO_OPCODE ip                      // jump to next instruction
+#endif
 
 /* ------------------------------ */
     .balign 128
@@ -1175,6 +1187,20 @@
      * for: packed-switch, sparse-switch
      */
     /* op vAA, +BBBB */
+#if MTERP_SUSPEND
+    FETCH w0, 1                         // w0<- bbbb (lo)
+    FETCH w1, 2                         // w1<- BBBB (hi)
+    mov     w3, wINST, lsr #8           // w3<- AA
+    orr     w0, w0, w1, lsl #16         // w0<- BBBBbbbb
+    GET_VREG w1, w3                     // w1<- vAA
+    add     w0, rPC, w0, lsl #1         // w0<- PC + BBBBbbbb*2
+    bl      MterpDoPackedSwitch                       // w0<- code-unit branch offset
+    adds    w1, w0, w0                  // w1<- byte offset; clear V
+    ldrle   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET] // refresh handler base
+    FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#else
     FETCH w0, 1                         // w0<- bbbb (lo)
     FETCH w1, 2                         // w1<- BBBB (hi)
     lsr     w3, wINST, #8               // w3<- AA
@@ -1182,21 +1208,13 @@
     GET_VREG w1, w3                     // w1<- vAA
     add     x0, xPC, w0, lsl #1         // w0<- PC + BBBBbbbb*2
     bl      MterpDoPackedSwitch                       // w0<- code-unit branch offset
-    sbfm    xINST, x0, 0, 31
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    mov     x2, xINST
-    bl      MterpProfileBranch          // (self, shadow_frame, offset)
-    cbnz    w0, MterpOnStackReplacement
-#endif
     ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w1, wINST, wINST            // w1<- byte offset; clear V
+    adds    w1, w0, w0                  // w1<- byte offset; clear V
     FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
     b.le    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
+#endif
 
 /* ------------------------------ */
     .balign 128
@@ -1213,6 +1231,20 @@
      * for: packed-switch, sparse-switch
      */
     /* op vAA, +BBBB */
+#if MTERP_SUSPEND
+    FETCH w0, 1                         // w0<- bbbb (lo)
+    FETCH w1, 2                         // w1<- BBBB (hi)
+    mov     w3, wINST, lsr #8           // w3<- AA
+    orr     w0, w0, w1, lsl #16         // w0<- BBBBbbbb
+    GET_VREG w1, w3                     // w1<- vAA
+    add     w0, rPC, w0, lsl #1         // w0<- PC + BBBBbbbb*2
+    bl      MterpDoSparseSwitch                       // w0<- code-unit branch offset
+    adds    w1, w0, w0                  // w1<- byte offset; clear V
+    ldrle   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET] // refresh handler base
+    FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+#else
     FETCH w0, 1                         // w0<- bbbb (lo)
     FETCH w1, 2                         // w1<- BBBB (hi)
     lsr     w3, wINST, #8               // w3<- AA
@@ -1220,21 +1252,13 @@
     GET_VREG w1, w3                     // w1<- vAA
     add     x0, xPC, w0, lsl #1         // w0<- PC + BBBBbbbb*2
     bl      MterpDoSparseSwitch                       // w0<- code-unit branch offset
-    sbfm    xINST, x0, 0, 31
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    mov     x2, xINST
-    bl      MterpProfileBranch          // (self, shadow_frame, offset)
-    cbnz    w0, MterpOnStackReplacement
-#endif
     ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w1, wINST, wINST            // w1<- byte offset; clear V
+    adds    w1, w0, w0                  // w1<- byte offset; clear V
     FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
     b.le    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
+#endif
 
 
 /* ------------------------------ */
@@ -1372,28 +1396,17 @@
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
     /* if-cmp vA, vB, +CCCC */
-#if MTERP_PROFILE_BRANCHES
-    lsr     w1, wINST, #12              // w1<- B
+#if MTERP_SUSPEND
+    mov     w1, wINST, lsr #12          // w1<- B
     ubfx    w0, wINST, #8, #4           // w0<- A
     GET_VREG w3, w1                     // w3<- vB
     GET_VREG w2, w0                     // w2<- vA
-    FETCH_S wINST, 1                    // wINST<- branch offset, in code units
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
     cmp     w2, w3                      // compare (vA, vB)
-    b.eq .L_op_if_eq_taken
-    FETCH_ADVANCE_INST 2                // update rPC, load wINST
-    GET_INST_OPCODE ip                  // extract opcode from wINST
-    GOTO_OPCODE ip                      // jump to next instruction
-.L_op_if_eq_taken:
-    EXPORT_PC
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    sbfm    x2, xINST, 0, 31            // Sign extend branch offset
-    bl      MterpProfileBranch          // (self, shadow_frame, offset)
-    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w2, wINST, wINST            // convert to bytes, check sign
+    moveq w1, #2                 // w1<- BYTE branch dist for not-taken
+    adds    w2, w1, w1                  // convert to bytes, check sign
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
-    b.mi     MterpCheckSuspendAndContinue
+    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]  // refresh rIBASE
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 #else
@@ -1402,11 +1415,11 @@
     GET_VREG w3, w1                     // w3<- vB
     GET_VREG w2, w0                     // w2<- vA
     FETCH_S w1, 1                       // w1<- branch offset, in code units
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
     mov     w0, #2                      // Offset if branch not taken
     cmp     w2, w3                      // compare (vA, vB)
-    csel    wINST, w1, w0, eq // Branch if true, stashing result in callee save reg.
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w2, wINST, wINST            // convert to bytes, check sign
+    csel    w1, w1, w0, eq    // Branch if true
+    adds    w2, w1, w1                  // convert to bytes, check sign
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
     b.mi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
@@ -1427,28 +1440,17 @@
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
     /* if-cmp vA, vB, +CCCC */
-#if MTERP_PROFILE_BRANCHES
-    lsr     w1, wINST, #12              // w1<- B
+#if MTERP_SUSPEND
+    mov     w1, wINST, lsr #12          // w1<- B
     ubfx    w0, wINST, #8, #4           // w0<- A
     GET_VREG w3, w1                     // w3<- vB
     GET_VREG w2, w0                     // w2<- vA
-    FETCH_S wINST, 1                    // wINST<- branch offset, in code units
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
     cmp     w2, w3                      // compare (vA, vB)
-    b.ne .L_op_if_ne_taken
-    FETCH_ADVANCE_INST 2                // update rPC, load wINST
-    GET_INST_OPCODE ip                  // extract opcode from wINST
-    GOTO_OPCODE ip                      // jump to next instruction
-.L_op_if_ne_taken:
-    EXPORT_PC
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    sbfm    x2, xINST, 0, 31            // Sign extend branch offset
-    bl      MterpProfileBranch          // (self, shadow_frame, offset)
-    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w2, wINST, wINST            // convert to bytes, check sign
+    movne w1, #2                 // w1<- BYTE branch dist for not-taken
+    adds    w2, w1, w1                  // convert to bytes, check sign
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
-    b.mi     MterpCheckSuspendAndContinue
+    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]  // refresh rIBASE
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 #else
@@ -1457,11 +1459,11 @@
     GET_VREG w3, w1                     // w3<- vB
     GET_VREG w2, w0                     // w2<- vA
     FETCH_S w1, 1                       // w1<- branch offset, in code units
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
     mov     w0, #2                      // Offset if branch not taken
     cmp     w2, w3                      // compare (vA, vB)
-    csel    wINST, w1, w0, ne // Branch if true, stashing result in callee save reg.
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w2, wINST, wINST            // convert to bytes, check sign
+    csel    w1, w1, w0, ne    // Branch if true
+    adds    w2, w1, w1                  // convert to bytes, check sign
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
     b.mi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
@@ -1482,28 +1484,17 @@
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
     /* if-cmp vA, vB, +CCCC */
-#if MTERP_PROFILE_BRANCHES
-    lsr     w1, wINST, #12              // w1<- B
+#if MTERP_SUSPEND
+    mov     w1, wINST, lsr #12          // w1<- B
     ubfx    w0, wINST, #8, #4           // w0<- A
     GET_VREG w3, w1                     // w3<- vB
     GET_VREG w2, w0                     // w2<- vA
-    FETCH_S wINST, 1                    // wINST<- branch offset, in code units
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
     cmp     w2, w3                      // compare (vA, vB)
-    b.lt .L_op_if_lt_taken
-    FETCH_ADVANCE_INST 2                // update rPC, load wINST
-    GET_INST_OPCODE ip                  // extract opcode from wINST
-    GOTO_OPCODE ip                      // jump to next instruction
-.L_op_if_lt_taken:
-    EXPORT_PC
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    sbfm    x2, xINST, 0, 31            // Sign extend branch offset
-    bl      MterpProfileBranch          // (self, shadow_frame, offset)
-    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w2, wINST, wINST            // convert to bytes, check sign
+    movlt w1, #2                 // w1<- BYTE branch dist for not-taken
+    adds    w2, w1, w1                  // convert to bytes, check sign
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
-    b.mi     MterpCheckSuspendAndContinue
+    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]  // refresh rIBASE
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 #else
@@ -1512,11 +1503,11 @@
     GET_VREG w3, w1                     // w3<- vB
     GET_VREG w2, w0                     // w2<- vA
     FETCH_S w1, 1                       // w1<- branch offset, in code units
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
     mov     w0, #2                      // Offset if branch not taken
     cmp     w2, w3                      // compare (vA, vB)
-    csel    wINST, w1, w0, lt // Branch if true, stashing result in callee save reg.
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w2, wINST, wINST            // convert to bytes, check sign
+    csel    w1, w1, w0, lt    // Branch if true
+    adds    w2, w1, w1                  // convert to bytes, check sign
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
     b.mi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
@@ -1537,28 +1528,17 @@
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
     /* if-cmp vA, vB, +CCCC */
-#if MTERP_PROFILE_BRANCHES
-    lsr     w1, wINST, #12              // w1<- B
+#if MTERP_SUSPEND
+    mov     w1, wINST, lsr #12          // w1<- B
     ubfx    w0, wINST, #8, #4           // w0<- A
     GET_VREG w3, w1                     // w3<- vB
     GET_VREG w2, w0                     // w2<- vA
-    FETCH_S wINST, 1                    // wINST<- branch offset, in code units
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
     cmp     w2, w3                      // compare (vA, vB)
-    b.ge .L_op_if_ge_taken
-    FETCH_ADVANCE_INST 2                // update rPC, load wINST
-    GET_INST_OPCODE ip                  // extract opcode from wINST
-    GOTO_OPCODE ip                      // jump to next instruction
-.L_op_if_ge_taken:
-    EXPORT_PC
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    sbfm    x2, xINST, 0, 31            // Sign extend branch offset
-    bl      MterpProfileBranch          // (self, shadow_frame, offset)
-    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w2, wINST, wINST            // convert to bytes, check sign
+    movge w1, #2                 // w1<- BYTE branch dist for not-taken
+    adds    w2, w1, w1                  // convert to bytes, check sign
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
-    b.mi     MterpCheckSuspendAndContinue
+    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]  // refresh rIBASE
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 #else
@@ -1567,11 +1547,11 @@
     GET_VREG w3, w1                     // w3<- vB
     GET_VREG w2, w0                     // w2<- vA
     FETCH_S w1, 1                       // w1<- branch offset, in code units
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
     mov     w0, #2                      // Offset if branch not taken
     cmp     w2, w3                      // compare (vA, vB)
-    csel    wINST, w1, w0, ge // Branch if true, stashing result in callee save reg.
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w2, wINST, wINST            // convert to bytes, check sign
+    csel    w1, w1, w0, ge    // Branch if true
+    adds    w2, w1, w1                  // convert to bytes, check sign
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
     b.mi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
@@ -1592,28 +1572,17 @@
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
     /* if-cmp vA, vB, +CCCC */
-#if MTERP_PROFILE_BRANCHES
-    lsr     w1, wINST, #12              // w1<- B
+#if MTERP_SUSPEND
+    mov     w1, wINST, lsr #12          // w1<- B
     ubfx    w0, wINST, #8, #4           // w0<- A
     GET_VREG w3, w1                     // w3<- vB
     GET_VREG w2, w0                     // w2<- vA
-    FETCH_S wINST, 1                    // wINST<- branch offset, in code units
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
     cmp     w2, w3                      // compare (vA, vB)
-    b.gt .L_op_if_gt_taken
-    FETCH_ADVANCE_INST 2                // update rPC, load wINST
-    GET_INST_OPCODE ip                  // extract opcode from wINST
-    GOTO_OPCODE ip                      // jump to next instruction
-.L_op_if_gt_taken:
-    EXPORT_PC
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    sbfm    x2, xINST, 0, 31            // Sign extend branch offset
-    bl      MterpProfileBranch          // (self, shadow_frame, offset)
-    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w2, wINST, wINST            // convert to bytes, check sign
+    movgt w1, #2                 // w1<- BYTE branch dist for not-taken
+    adds    w2, w1, w1                  // convert to bytes, check sign
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
-    b.mi     MterpCheckSuspendAndContinue
+    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]  // refresh rIBASE
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 #else
@@ -1622,11 +1591,11 @@
     GET_VREG w3, w1                     // w3<- vB
     GET_VREG w2, w0                     // w2<- vA
     FETCH_S w1, 1                       // w1<- branch offset, in code units
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
     mov     w0, #2                      // Offset if branch not taken
     cmp     w2, w3                      // compare (vA, vB)
-    csel    wINST, w1, w0, gt // Branch if true, stashing result in callee save reg.
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w2, wINST, wINST            // convert to bytes, check sign
+    csel    w1, w1, w0, gt    // Branch if true
+    adds    w2, w1, w1                  // convert to bytes, check sign
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
     b.mi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
@@ -1647,28 +1616,17 @@
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
     /* if-cmp vA, vB, +CCCC */
-#if MTERP_PROFILE_BRANCHES
-    lsr     w1, wINST, #12              // w1<- B
+#if MTERP_SUSPEND
+    mov     w1, wINST, lsr #12          // w1<- B
     ubfx    w0, wINST, #8, #4           // w0<- A
     GET_VREG w3, w1                     // w3<- vB
     GET_VREG w2, w0                     // w2<- vA
-    FETCH_S wINST, 1                    // wINST<- branch offset, in code units
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
     cmp     w2, w3                      // compare (vA, vB)
-    b.le .L_op_if_le_taken
-    FETCH_ADVANCE_INST 2                // update rPC, load wINST
-    GET_INST_OPCODE ip                  // extract opcode from wINST
-    GOTO_OPCODE ip                      // jump to next instruction
-.L_op_if_le_taken:
-    EXPORT_PC
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    sbfm    x2, xINST, 0, 31            // Sign extend branch offset
-    bl      MterpProfileBranch          // (self, shadow_frame, offset)
-    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w2, wINST, wINST            // convert to bytes, check sign
+    movle w1, #2                 // w1<- BYTE branch dist for not-taken
+    adds    w2, w1, w1                  // convert to bytes, check sign
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
-    b.mi     MterpCheckSuspendAndContinue
+    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]  // refresh rIBASE
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 #else
@@ -1677,11 +1635,11 @@
     GET_VREG w3, w1                     // w3<- vB
     GET_VREG w2, w0                     // w2<- vA
     FETCH_S w1, 1                       // w1<- branch offset, in code units
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
     mov     w0, #2                      // Offset if branch not taken
     cmp     w2, w3                      // compare (vA, vB)
-    csel    wINST, w1, w0, le // Branch if true, stashing result in callee save reg.
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w2, wINST, wINST            // convert to bytes, check sign
+    csel    w1, w1, w0, le    // Branch if true
+    adds    w2, w1, w1                  // convert to bytes, check sign
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
     b.mi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
@@ -1702,37 +1660,26 @@
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
-#if MTERP_PROFILE_BRANCHES
-    lsr     w0, wINST, #8               // w0<- AA
+#if MTERP_SUSPEND
+    mov     w0, wINST, lsr #8           // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
-    FETCH_S wINST, 1                    // w1<- branch offset, in code units
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
     cmp     w2, #0                      // compare (vA, 0)
-    b.eq .L_op_if_eqz_taken
-    FETCH_ADVANCE_INST 2                // update rPC, load wINST
-    GET_INST_OPCODE ip                  // extract opcode from wINST
-    GOTO_OPCODE ip                      // jump to next instruction
-.L_op_if_eqz_taken:
-    EXPORT_PC
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    sbfm    x2, xINST, 0, 31
-    bl      MterpProfileBranch          // (self, shadow_frame, offset)
-    cbnz    w0, MterpOnStackReplacement // Note: offset must be in wINST
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w2, wINST, wINST            // convert to bytes & set flags
-    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
-    b.mi    MterpCheckSuspendAndContinue
+    moveq w1, #2                 // w1<- inst branch dist for not-taken
+    adds    w1, w1, w1                  // convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
+    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]   // refresh table base
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 #else
     lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
     FETCH_S w1, 1                       // w1<- branch offset, in code units
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
     mov     w0, #2                      // Branch offset if not taken
     cmp     w2, #0                      // compare (vA, 0)
-    csel    wINST, w1, w0, eq // Branch if true, stashing result in callee save reg
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w2, wINST, wINST            // convert to bytes & set flags
+    csel    w1, w1, w0, eq    // Branch if true
+    adds    w2, w1, w1                  // convert to bytes & set flags
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
     b.mi    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
@@ -1753,37 +1700,26 @@
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
-#if MTERP_PROFILE_BRANCHES
-    lsr     w0, wINST, #8               // w0<- AA
+#if MTERP_SUSPEND
+    mov     w0, wINST, lsr #8           // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
-    FETCH_S wINST, 1                    // w1<- branch offset, in code units
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
     cmp     w2, #0                      // compare (vA, 0)
-    b.ne .L_op_if_nez_taken
-    FETCH_ADVANCE_INST 2                // update rPC, load wINST
-    GET_INST_OPCODE ip                  // extract opcode from wINST
-    GOTO_OPCODE ip                      // jump to next instruction
-.L_op_if_nez_taken:
-    EXPORT_PC
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    sbfm    x2, xINST, 0, 31
-    bl      MterpProfileBranch          // (self, shadow_frame, offset)
-    cbnz    w0, MterpOnStackReplacement // Note: offset must be in wINST
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w2, wINST, wINST            // convert to bytes & set flags
-    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
-    b.mi    MterpCheckSuspendAndContinue
+    movne w1, #2                 // w1<- inst branch dist for not-taken
+    adds    w1, w1, w1                  // convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
+    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]   // refresh table base
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 #else
     lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
     FETCH_S w1, 1                       // w1<- branch offset, in code units
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
     mov     w0, #2                      // Branch offset if not taken
     cmp     w2, #0                      // compare (vA, 0)
-    csel    wINST, w1, w0, ne // Branch if true, stashing result in callee save reg
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w2, wINST, wINST            // convert to bytes & set flags
+    csel    w1, w1, w0, ne    // Branch if true
+    adds    w2, w1, w1                  // convert to bytes & set flags
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
     b.mi    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
@@ -1804,37 +1740,26 @@
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
-#if MTERP_PROFILE_BRANCHES
-    lsr     w0, wINST, #8               // w0<- AA
+#if MTERP_SUSPEND
+    mov     w0, wINST, lsr #8           // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
-    FETCH_S wINST, 1                    // w1<- branch offset, in code units
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
     cmp     w2, #0                      // compare (vA, 0)
-    b.lt .L_op_if_ltz_taken
-    FETCH_ADVANCE_INST 2                // update rPC, load wINST
-    GET_INST_OPCODE ip                  // extract opcode from wINST
-    GOTO_OPCODE ip                      // jump to next instruction
-.L_op_if_ltz_taken:
-    EXPORT_PC
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    sbfm    x2, xINST, 0, 31
-    bl      MterpProfileBranch          // (self, shadow_frame, offset)
-    cbnz    w0, MterpOnStackReplacement // Note: offset must be in wINST
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w2, wINST, wINST            // convert to bytes & set flags
-    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
-    b.mi    MterpCheckSuspendAndContinue
+    movlt w1, #2                 // w1<- inst branch dist for not-taken
+    adds    w1, w1, w1                  // convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
+    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]   // refresh table base
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 #else
     lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
     FETCH_S w1, 1                       // w1<- branch offset, in code units
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
     mov     w0, #2                      // Branch offset if not taken
     cmp     w2, #0                      // compare (vA, 0)
-    csel    wINST, w1, w0, lt // Branch if true, stashing result in callee save reg
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w2, wINST, wINST            // convert to bytes & set flags
+    csel    w1, w1, w0, lt    // Branch if true
+    adds    w2, w1, w1                  // convert to bytes & set flags
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
     b.mi    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
@@ -1855,37 +1780,26 @@
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
-#if MTERP_PROFILE_BRANCHES
-    lsr     w0, wINST, #8               // w0<- AA
+#if MTERP_SUSPEND
+    mov     w0, wINST, lsr #8           // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
-    FETCH_S wINST, 1                    // w1<- branch offset, in code units
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
     cmp     w2, #0                      // compare (vA, 0)
-    b.ge .L_op_if_gez_taken
-    FETCH_ADVANCE_INST 2                // update rPC, load wINST
-    GET_INST_OPCODE ip                  // extract opcode from wINST
-    GOTO_OPCODE ip                      // jump to next instruction
-.L_op_if_gez_taken:
-    EXPORT_PC
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    sbfm    x2, xINST, 0, 31
-    bl      MterpProfileBranch          // (self, shadow_frame, offset)
-    cbnz    w0, MterpOnStackReplacement // Note: offset must be in wINST
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w2, wINST, wINST            // convert to bytes & set flags
-    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
-    b.mi    MterpCheckSuspendAndContinue
+    movge w1, #2                 // w1<- inst branch dist for not-taken
+    adds    w1, w1, w1                  // convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
+    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]   // refresh table base
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 #else
     lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
     FETCH_S w1, 1                       // w1<- branch offset, in code units
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
     mov     w0, #2                      // Branch offset if not taken
     cmp     w2, #0                      // compare (vA, 0)
-    csel    wINST, w1, w0, ge // Branch if true, stashing result in callee save reg
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w2, wINST, wINST            // convert to bytes & set flags
+    csel    w1, w1, w0, ge    // Branch if true
+    adds    w2, w1, w1                  // convert to bytes & set flags
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
     b.mi    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
@@ -1906,37 +1820,26 @@
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
-#if MTERP_PROFILE_BRANCHES
-    lsr     w0, wINST, #8               // w0<- AA
+#if MTERP_SUSPEND
+    mov     w0, wINST, lsr #8           // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
-    FETCH_S wINST, 1                    // w1<- branch offset, in code units
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
     cmp     w2, #0                      // compare (vA, 0)
-    b.gt .L_op_if_gtz_taken
-    FETCH_ADVANCE_INST 2                // update rPC, load wINST
-    GET_INST_OPCODE ip                  // extract opcode from wINST
-    GOTO_OPCODE ip                      // jump to next instruction
-.L_op_if_gtz_taken:
-    EXPORT_PC
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    sbfm    x2, xINST, 0, 31
-    bl      MterpProfileBranch          // (self, shadow_frame, offset)
-    cbnz    w0, MterpOnStackReplacement // Note: offset must be in wINST
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w2, wINST, wINST            // convert to bytes & set flags
-    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
-    b.mi    MterpCheckSuspendAndContinue
+    movgt w1, #2                 // w1<- inst branch dist for not-taken
+    adds    w1, w1, w1                  // convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
+    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]   // refresh table base
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 #else
     lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
     FETCH_S w1, 1                       // w1<- branch offset, in code units
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
     mov     w0, #2                      // Branch offset if not taken
     cmp     w2, #0                      // compare (vA, 0)
-    csel    wINST, w1, w0, gt // Branch if true, stashing result in callee save reg
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w2, wINST, wINST            // convert to bytes & set flags
+    csel    w1, w1, w0, gt    // Branch if true
+    adds    w2, w1, w1                  // convert to bytes & set flags
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
     b.mi    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
@@ -1957,37 +1860,26 @@
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
-#if MTERP_PROFILE_BRANCHES
-    lsr     w0, wINST, #8               // w0<- AA
+#if MTERP_SUSPEND
+    mov     w0, wINST, lsr #8           // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
-    FETCH_S wINST, 1                    // w1<- branch offset, in code units
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
     cmp     w2, #0                      // compare (vA, 0)
-    b.le .L_op_if_lez_taken
-    FETCH_ADVANCE_INST 2                // update rPC, load wINST
-    GET_INST_OPCODE ip                  // extract opcode from wINST
-    GOTO_OPCODE ip                      // jump to next instruction
-.L_op_if_lez_taken:
-    EXPORT_PC
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    sbfm    x2, xINST, 0, 31
-    bl      MterpProfileBranch          // (self, shadow_frame, offset)
-    cbnz    w0, MterpOnStackReplacement // Note: offset must be in wINST
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w2, wINST, wINST            // convert to bytes & set flags
-    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
-    b.mi    MterpCheckSuspendAndContinue
+    movle w1, #2                 // w1<- inst branch dist for not-taken
+    adds    w1, w1, w1                  // convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
+    ldrmi   rIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]   // refresh table base
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 #else
     lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
     FETCH_S w1, 1                       // w1<- branch offset, in code units
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
     mov     w0, #2                      // Branch offset if not taken
     cmp     w2, #0                      // compare (vA, 0)
-    csel    wINST, w1, w0, le // Branch if true, stashing result in callee save reg
-    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
-    adds    w2, wINST, wINST            // convert to bytes & set flags
+    csel    w1, w1, w0, le    // Branch if true
+    adds    w2, w1, w1                  // convert to bytes & set flags
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
     b.mi    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
@@ -2509,7 +2401,6 @@
     mov      x3, xSELF                     // w3<- self
     bl       artGet32InstanceFromCode
     ldr      x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
-    
     ubfx     w2, wINST, #8, #4             // w2<- A
     PREFETCH_INST 2
     cbnz     x3, MterpPossibleException    // bail out
@@ -2566,7 +2457,6 @@
     mov      x3, xSELF                     // w3<- self
     bl       artGetObjInstanceFromCode
     ldr      x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
-    
     ubfx     w2, wINST, #8, #4             // w2<- A
     PREFETCH_INST 2
     cbnz     x3, MterpPossibleException    // bail out
@@ -2598,7 +2488,6 @@
     mov      x3, xSELF                     // w3<- self
     bl       artGetBooleanInstanceFromCode
     ldr      x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
-    uxtb w0, w0
     ubfx     w2, wINST, #8, #4             // w2<- A
     PREFETCH_INST 2
     cbnz     x3, MterpPossibleException    // bail out
@@ -2630,7 +2519,6 @@
     mov      x3, xSELF                     // w3<- self
     bl       artGetByteInstanceFromCode
     ldr      x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
-    sxtb w0, w0
     ubfx     w2, wINST, #8, #4             // w2<- A
     PREFETCH_INST 2
     cbnz     x3, MterpPossibleException    // bail out
@@ -2662,7 +2550,6 @@
     mov      x3, xSELF                     // w3<- self
     bl       artGetCharInstanceFromCode
     ldr      x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
-    uxth w0, w0
     ubfx     w2, wINST, #8, #4             // w2<- A
     PREFETCH_INST 2
     cbnz     x3, MterpPossibleException    // bail out
@@ -2694,7 +2581,6 @@
     mov      x3, xSELF                     // w3<- self
     bl       artGetShortInstanceFromCode
     ldr      x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
-    sxth w0, w0
     ubfx     w2, wINST, #8, #4             // w2<- A
     PREFETCH_INST 2
     cbnz     x3, MterpPossibleException    // bail out
@@ -11679,6 +11565,7 @@
  * has not yet been thrown.  Just bail out to the reference interpreter to deal with it.
  * TUNING: for consistency, we may want to just go ahead and handle these here.
  */
+#define MTERP_LOGGING 0
 common_errDivideByZero:
     EXPORT_PC
 #if MTERP_LOGGING
@@ -11792,19 +11679,6 @@
     GOTO_OPCODE ip                      // jump to next instruction
 
 /*
- * On-stack replacement pending.
- * Branch offset in wINST on entry.
- */
-MterpOnStackReplacement:
-#if MTERP_LOGGING
-    mov  x0, xSELF
-    add  x1, xFP, #OFF_FP_SHADOWFRAME
-    sbfm x2, xINST, 0, 31
-    bl MterpLogOSR
-#endif
-    b MterpFallback                     // Let the reference interpreter deal with it.
-
-/*
  * Bail out to reference interpreter.
  */
 MterpFallback:
diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc
index e38a684..8d3da37 100644
--- a/runtime/jit/jit.cc
+++ b/runtime/jit/jit.cc
@@ -36,7 +36,7 @@
 namespace art {
 namespace jit {
 
-static constexpr bool kEnableOnStackReplacement = false;
+static constexpr bool kEnableOnStackReplacement = true;
 
 JitOptions* JitOptions::CreateFromRuntimeArguments(const RuntimeArgumentMap& options) {
   auto* jit_options = new JitOptions;
@@ -164,6 +164,7 @@
 
 bool Jit::CompileMethod(ArtMethod* method, Thread* self, bool osr) {
   DCHECK(!method->IsRuntimeMethod());
+
   // Don't compile the method if it has breakpoints.
   if (Dbg::IsDebuggerActive() && Dbg::MethodHasAnyBreakpoints(method)) {
     VLOG(jit) << "JIT not compiling " << PrettyMethod(method) << " due to breakpoint";
@@ -177,12 +178,15 @@
     return false;
   }
 
-  if (!code_cache_->NotifyCompilationOf(method, self, osr)) {
+  // If we get a request to compile a proxy method, we pass the actual Java method
+  // of that proxy method, as the compiler does not expect a proxy method.
+  ArtMethod* method_to_compile = method->GetInterfaceMethodIfProxy(sizeof(void*));
+  if (!code_cache_->NotifyCompilationOf(method_to_compile, self, osr)) {
     VLOG(jit) << "JIT not compiling " << PrettyMethod(method) << " due to code cache";
     return false;
   }
-  bool success = jit_compile_method_(jit_compiler_handle_, method, self, osr);
-  code_cache_->DoneCompiling(method, self);
+  bool success = jit_compile_method_(jit_compiler_handle_, method_to_compile, self, osr);
+  code_cache_->DoneCompiling(method_to_compile, self);
   return success;
 }
 
@@ -286,7 +290,15 @@
   }
 
   if (kRuntimeISA == kMips || kRuntimeISA == kMips64) {
-    VLOG(jit) << "OSR not supported on this platform";
+    VLOG(jit) << "OSR not supported on this platform: " << kRuntimeISA;
+    return false;
+  }
+
+  if (UNLIKELY(__builtin_frame_address(0) < thread->GetStackEnd())) {
+    // Don't attempt to do an OSR if we are close to the stack limit. Since
+    // the interpreter frames are still on stack, OSR has the potential
+    // to stack overflow even for a simple loop.
+    // b/27094810.
     return false;
   }
 
@@ -300,73 +312,93 @@
     return false;
   }
 
-  const OatQuickMethodHeader* osr_method = jit->GetCodeCache()->LookupOsrMethodHeader(method);
-  if (osr_method == nullptr) {
-    // No osr method yet, just return to the interpreter.
-    return false;
-  }
-
+  // Fetch some data before looking up for an OSR method. We don't want thread
+  // suspension once we hold an OSR method, as the JIT code cache could delete the OSR
+  // method while we are being suspended.
   const size_t number_of_vregs = method->GetCodeItem()->registers_size_;
-  CodeInfo code_info = osr_method->GetOptimizedCodeInfo();
-  StackMapEncoding encoding = code_info.ExtractEncoding();
+  const char* shorty = method->GetShorty();
+  std::string method_name(VLOG_IS_ON(jit) ? PrettyMethod(method) : "");
+  void** memory = nullptr;
+  size_t frame_size = 0;
+  ShadowFrame* shadow_frame = nullptr;
+  const uint8_t* native_pc = nullptr;
 
-  // Find stack map starting at the target dex_pc.
-  StackMap stack_map = code_info.GetOsrStackMapForDexPc(dex_pc + dex_pc_offset, encoding);
-  if (!stack_map.IsValid()) {
-    // There is no OSR stack map for this dex pc offset. Just return to the interpreter in the
-    // hope that the next branch has one.
-    return false;
-  }
-
-  // We found a stack map, now fill the frame with dex register values from the interpreter's
-  // shadow frame.
-  DexRegisterMap vreg_map =
-      code_info.GetDexRegisterMapOf(stack_map, encoding, number_of_vregs);
-
-  ShadowFrame* shadow_frame = thread->PopShadowFrame();
-
-  size_t frame_size = osr_method->GetFrameSizeInBytes();
-  void** memory = reinterpret_cast<void**>(malloc(frame_size));
-  memset(memory, 0, frame_size);
-
-  // Art ABI: ArtMethod is at the bottom of the stack.
-  memory[0] = method;
-
-  if (!vreg_map.IsValid()) {
-    // If we don't have a dex register map, then there are no live dex registers at
-    // this dex pc.
-  } else {
-    for (uint16_t vreg = 0; vreg < number_of_vregs; ++vreg) {
-      DexRegisterLocation::Kind location =
-          vreg_map.GetLocationKind(vreg, number_of_vregs, code_info, encoding);
-      if (location == DexRegisterLocation::Kind::kNone) {
-        // Dex register is dead or unitialized.
-        continue;
-      }
-
-      if (location == DexRegisterLocation::Kind::kConstant) {
-        // We skip constants because the compiled code knows how to handle them.
-        continue;
-      }
-
-      DCHECK(location == DexRegisterLocation::Kind::kInStack);
-
-      int32_t vreg_value = shadow_frame->GetVReg(vreg);
-      int32_t slot_offset = vreg_map.GetStackOffsetInBytes(vreg,
-                                                           number_of_vregs,
-                                                           code_info,
-                                                           encoding);
-      DCHECK_LT(slot_offset, static_cast<int32_t>(frame_size));
-      DCHECK_GT(slot_offset, 0);
-      (reinterpret_cast<int32_t*>(memory))[slot_offset / sizeof(int32_t)] = vreg_value;
+  {
+    ScopedAssertNoThreadSuspension sts(thread, "Holding OSR method");
+    const OatQuickMethodHeader* osr_method = jit->GetCodeCache()->LookupOsrMethodHeader(method);
+    if (osr_method == nullptr) {
+      // No osr method yet, just return to the interpreter.
+      return false;
     }
+
+    CodeInfo code_info = osr_method->GetOptimizedCodeInfo();
+    StackMapEncoding encoding = code_info.ExtractEncoding();
+
+    // Find stack map starting at the target dex_pc.
+    StackMap stack_map = code_info.GetOsrStackMapForDexPc(dex_pc + dex_pc_offset, encoding);
+    if (!stack_map.IsValid()) {
+      // There is no OSR stack map for this dex pc offset. Just return to the interpreter in the
+      // hope that the next branch has one.
+      return false;
+    }
+
+    // We found a stack map, now fill the frame with dex register values from the interpreter's
+    // shadow frame.
+    DexRegisterMap vreg_map =
+        code_info.GetDexRegisterMapOf(stack_map, encoding, number_of_vregs);
+
+    frame_size = osr_method->GetFrameSizeInBytes();
+
+    // Allocate memory to put shadow frame values. The osr stub will copy that memory to
+    // stack.
+    // Note that we could pass the shadow frame to the stub, and let it copy the values there,
+    // but that is engineering complexity not worth the effort for something like OSR.
+    memory = reinterpret_cast<void**>(malloc(frame_size));
+    CHECK(memory != nullptr);
+    memset(memory, 0, frame_size);
+
+    // Art ABI: ArtMethod is at the bottom of the stack.
+    memory[0] = method;
+
+    shadow_frame = thread->PopShadowFrame();
+    if (!vreg_map.IsValid()) {
+      // If we don't have a dex register map, then there are no live dex registers at
+      // this dex pc.
+    } else {
+      for (uint16_t vreg = 0; vreg < number_of_vregs; ++vreg) {
+        DexRegisterLocation::Kind location =
+            vreg_map.GetLocationKind(vreg, number_of_vregs, code_info, encoding);
+        if (location == DexRegisterLocation::Kind::kNone) {
+          // Dex register is dead or uninitialized.
+          continue;
+        }
+
+        if (location == DexRegisterLocation::Kind::kConstant) {
+          // We skip constants because the compiled code knows how to handle them.
+          continue;
+        }
+
+        DCHECK(location == DexRegisterLocation::Kind::kInStack)
+            << DexRegisterLocation::PrettyDescriptor(location);
+
+        int32_t vreg_value = shadow_frame->GetVReg(vreg);
+        int32_t slot_offset = vreg_map.GetStackOffsetInBytes(vreg,
+                                                             number_of_vregs,
+                                                             code_info,
+                                                             encoding);
+        DCHECK_LT(slot_offset, static_cast<int32_t>(frame_size));
+        DCHECK_GT(slot_offset, 0);
+        (reinterpret_cast<int32_t*>(memory))[slot_offset / sizeof(int32_t)] = vreg_value;
+      }
+    }
+
+    native_pc = stack_map.GetNativePcOffset(encoding) + osr_method->GetEntryPoint();
+    VLOG(jit) << "Jumping to "
+              << method_name
+              << "@"
+              << std::hex << reinterpret_cast<uintptr_t>(native_pc);
   }
 
-  const uint8_t* native_pc = stack_map.GetNativePcOffset(encoding) + osr_method->GetEntryPoint();
-  VLOG(jit) << "Jumping to "
-            << PrettyMethod(method)
-            << "@"
-            << std::hex << reinterpret_cast<uintptr_t>(native_pc);
   {
     ManagedStack fragment;
     thread->PushManagedStackFragment(&fragment);
@@ -374,8 +406,9 @@
                           frame_size,
                           native_pc,
                           result,
-                          method->GetInterfaceMethodIfProxy(sizeof(void*))->GetShorty(),
+                          shorty,
                           thread);
+
     if (UNLIKELY(thread->GetException() == Thread::GetDeoptimizationException())) {
       thread->DeoptimizeWithDeoptimizationException(result);
     }
@@ -383,7 +416,7 @@
   }
   free(memory);
   thread->PushShadowFrame(shadow_frame);
-  VLOG(jit) << "Done running OSR code for " << PrettyMethod(method);
+  VLOG(jit) << "Done running OSR code for " << method_name;
   return true;
 }
 
diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc
index 464c441..74ff741 100644
--- a/runtime/jit/jit_code_cache.cc
+++ b/runtime/jit/jit_code_cache.cc
@@ -269,6 +269,14 @@
       }
     }
   }
+  for (auto it = osr_code_map_.begin(); it != osr_code_map_.end();) {
+    if (alloc.ContainsUnsafe(it->first)) {
+      // Note that the code has already been removed in the loop above.
+      it = osr_code_map_.erase(it);
+    } else {
+      ++it;
+    }
+  }
   for (auto it = profiling_infos_.begin(); it != profiling_infos_.end();) {
     ProfilingInfo* info = *it;
     if (alloc.ContainsUnsafe(info->GetMethod())) {
@@ -578,7 +586,7 @@
       }
     }
 
-    // Empty osr method map, as osr compile code will be deleted (except the ones
+    // Empty osr method map, as osr compiled code will be deleted (except the ones
     // on thread stacks).
     osr_code_map_.clear();
   }
@@ -782,5 +790,24 @@
   return mspace_usable_size(reinterpret_cast<const void*>(FromCodeToAllocation(ptr)));
 }
 
+void JitCodeCache::InvalidateCompiledCodeFor(ArtMethod* method,
+                                             const OatQuickMethodHeader* header) {
+  if (method->GetEntryPointFromQuickCompiledCode() == header->GetEntryPoint()) {
+    // The entrypoint is the one to invalidate, so we just update
+    // it to the interpreter entry point and clear the counter to get the method
+    // Jitted again.
+    Runtime::Current()->GetInstrumentation()->UpdateMethodsCode(
+        method, GetQuickToInterpreterBridge());
+    method->ClearCounter();
+  } else {
+    MutexLock mu(Thread::Current(), lock_);
+    auto it = osr_code_map_.find(method);
+    if (it != osr_code_map_.end() && OatQuickMethodHeader::FromCodePointer(it->second) == header) {
+      // Remove the OSR method, to avoid using it again.
+      osr_code_map_.erase(it);
+    }
+  }
+}
+
 }  // namespace jit
 }  // namespace art
diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h
index 048f8d0..71f5cda 100644
--- a/runtime/jit/jit_code_cache.h
+++ b/runtime/jit/jit_code_cache.h
@@ -172,6 +172,10 @@
 
   size_t GetMemorySizeOfCodePointer(const void* ptr) REQUIRES(!lock_);
 
+  void InvalidateCompiledCodeFor(ArtMethod* method, const OatQuickMethodHeader* code)
+      REQUIRES(!lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
  private:
   // Take ownership of maps.
   JitCodeCache(MemMap* code_map,
diff --git a/runtime/jit/offline_profiling_info.cc b/runtime/jit/offline_profiling_info.cc
index b4b872f..0aff1f7 100644
--- a/runtime/jit/offline_profiling_info.cc
+++ b/runtime/jit/offline_profiling_info.cc
@@ -33,6 +33,21 @@
 
 namespace art {
 
+// Transform the actual dex location into relative paths.
+// Note: this is OK because we don't store profiles of different apps into the same file.
+// Apps with split apks don't cause trouble because each split has a different name and will not
+// collide with other entries.
+std::string ProfileCompilationInfo::GetProfileDexFileKey(const std::string& dex_location) {
+  DCHECK(!dex_location.empty());
+  size_t last_sep_index = dex_location.find_last_of('/');
+  if (last_sep_index == std::string::npos) {
+    return dex_location;
+  } else {
+    DCHECK(last_sep_index < dex_location.size());
+    return dex_location.substr(last_sep_index + 1);
+  }
+}
+
 bool ProfileCompilationInfo::SaveProfilingInfo(const std::string& filename,
                                                const std::vector<ArtMethod*>& methods) {
   if (methods.empty()) {
@@ -58,7 +73,7 @@
     ScopedObjectAccess soa(Thread::Current());
     for (auto it = methods.begin(); it != methods.end(); it++) {
       const DexFile* dex_file = (*it)->GetDexFile();
-      if (!info.AddData(dex_file->GetLocation(),
+      if (!info.AddData(GetProfileDexFileKey(dex_file->GetLocation()),
                         dex_file->GetLocationChecksum(),
                         (*it)->GetDexMethodIndex())) {
         return false;
@@ -107,8 +122,8 @@
  *    dex_location1,dex_location_checksum1,method_id11,method_id12...
  *    dex_location2,dex_location_checksum2,method_id21,method_id22...
  * e.g.
- *    /system/priv-app/app/app.apk,131232145,11,23,454,54
- *    /system/priv-app/app/app.apk:classes5.dex,218490184,39,13,49,1
+ *    app.apk,131232145,11,23,454,54
+ *    app.apk:classes5.dex,218490184,39,13,49,1
  **/
 bool ProfileCompilationInfo::Save(uint32_t fd) {
   DCHECK_GE(fd, 0u);
@@ -270,7 +285,7 @@
 }
 
 bool ProfileCompilationInfo::ContainsMethod(const MethodReference& method_ref) const {
-  auto info_it = info_.find(method_ref.dex_file->GetLocation());
+  auto info_it = info_.find(GetProfileDexFileKey(method_ref.dex_file->GetLocation()));
   if (info_it != info_.end()) {
     if (method_ref.dex_file->GetLocationChecksum() != info_it->second.checksum) {
       return false;
diff --git a/runtime/jit/offline_profiling_info.h b/runtime/jit/offline_profiling_info.h
index ffd1433..c388c4a 100644
--- a/runtime/jit/offline_profiling_info.h
+++ b/runtime/jit/offline_profiling_info.h
@@ -66,6 +66,8 @@
 
   // For testing purposes.
   bool Equals(ProfileCompilationInfo& other);
+  // Exposed for testing purpose.
+  static std::string GetProfileDexFileKey(const std::string& dex_location);
 
  private:
   bool AddData(const std::string& dex_location, uint32_t checksum, uint16_t method_idx);
diff --git a/runtime/native/dalvik_system_ZygoteHooks.cc b/runtime/native/dalvik_system_ZygoteHooks.cc
index a7881ac..a092b9f 100644
--- a/runtime/native/dalvik_system_ZygoteHooks.cc
+++ b/runtime/native/dalvik_system_ZygoteHooks.cc
@@ -66,6 +66,7 @@
     DEBUG_ENABLE_JNI_LOGGING        = 1 << 4,
     DEBUG_GENERATE_DEBUG_INFO       = 1 << 5,
     DEBUG_ALWAYS_JIT                = 1 << 6,
+    DEBUG_NATIVE_DEBUGGABLE         = 1 << 7,
   };
 
   Runtime* const runtime = Runtime::Current();
@@ -117,6 +118,11 @@
     debug_flags &= ~DEBUG_ALWAYS_JIT;
   }
 
+  if ((debug_flags & DEBUG_NATIVE_DEBUGGABLE) != 0) {
+    runtime->AddCompilerOption("--native-debuggable");
+    debug_flags &= ~DEBUG_NATIVE_DEBUGGABLE;
+  }
+
   if (debug_flags != 0) {
     LOG(ERROR) << StringPrintf("Unknown bits set in debug_flags: %#x", debug_flags);
   }
diff --git a/runtime/oat_file_manager.cc b/runtime/oat_file_manager.cc
index e76e443..18cf81a 100644
--- a/runtime/oat_file_manager.cc
+++ b/runtime/oat_file_manager.cc
@@ -387,12 +387,16 @@
             runtime->GetHeap()->AddSpace(image_space.get());
           }
           added_image_space = true;
-          if (!runtime->GetClassLinker()->AddImageSpace(image_space.get(),
-                                                        h_loader,
-                                                        dex_elements,
-                                                        dex_location,
-                                                        /*out*/&dex_files,
-                                                        /*out*/&temp_error_msg)) {
+          if (runtime->GetClassLinker()->AddImageSpace(image_space.get(),
+                                                       h_loader,
+                                                       dex_elements,
+                                                       dex_location,
+                                                       /*out*/&dex_files,
+                                                       /*out*/&temp_error_msg)) {
+            // Successfully added image space to heap, release the map so that it does not get
+            // freed.
+            image_space.release();
+          } else {
             LOG(INFO) << "Failed to add image file " << temp_error_msg;
             dex_files.clear();
             {
@@ -406,7 +410,6 @@
             added_image_space = false;
             // Non-fatal, don't update error_msg.
           }
-          image_space.release();
         }
       }
     }
diff --git a/runtime/openjdkjvm/Android.mk b/runtime/openjdkjvm/Android.mk
new file mode 100644
index 0000000..9b7404e
--- /dev/null
+++ b/runtime/openjdkjvm/Android.mk
@@ -0,0 +1,20 @@
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+LOCAL_PATH := $(call my-dir)
+
+include $(CLEAR_VARS)
+LOCAL_MODULE := openjdkjvm-phony
+include $(BUILD_PHONY_PACKAGE)
diff --git a/runtime/openjdkjvm/MODULE_LICENSE_GPL_WITH_CLASSPATH_EXCEPTION b/runtime/openjdkjvm/MODULE_LICENSE_GPL_WITH_CLASSPATH_EXCEPTION
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/runtime/openjdkjvm/MODULE_LICENSE_GPL_WITH_CLASSPATH_EXCEPTION
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index 2ea4b14..f9d916a 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -146,6 +146,10 @@
       .Define({"-XX:EnableHSpaceCompactForOOM", "-XX:DisableHSpaceCompactForOOM"})
           .WithValues({true, false})
           .IntoKey(M::EnableHSpaceCompactForOOM)
+      .Define("-XX:DumpNativeStackOnSigQuit:_")
+          .WithType<bool>()
+          .WithValueMap({{"false", false}, {"true", true}})
+          .IntoKey(M::DumpNativeStackOnSigQuit)
       .Define("-Xusejit:_")
           .WithType<bool>()
           .WithValueMap({{"false", false}, {"true", true}})
@@ -667,6 +671,7 @@
   UsageMessage(stream, "  -XX:BackgroundGC=none\n");
   UsageMessage(stream, "  -XX:LargeObjectSpace={disabled,map,freelist}\n");
   UsageMessage(stream, "  -XX:LargeObjectThreshold=N\n");
+  UsageMessage(stream, "  -XX:DumpNativeStackOnSigQuit=booleanvalue\n");
   UsageMessage(stream, "  -Xmethod-trace\n");
   UsageMessage(stream, "  -Xmethod-trace-file:filename");
   UsageMessage(stream, "  -Xmethod-trace-file-size:integervalue\n");
diff --git a/runtime/quick_exception_handler.cc b/runtime/quick_exception_handler.cc
index 786cf06..dd384c7 100644
--- a/runtime/quick_exception_handler.cc
+++ b/runtime/quick_exception_handler.cc
@@ -23,6 +23,8 @@
 #include "entrypoints/quick/quick_entrypoints_enum.h"
 #include "entrypoints/runtime_asm_entrypoints.h"
 #include "handle_scope-inl.h"
+#include "jit/jit.h"
+#include "jit/jit_code_cache.h"
 #include "mirror/class-inl.h"
 #include "mirror/class_loader.h"
 #include "mirror/throwable.h"
@@ -629,13 +631,17 @@
   DeoptimizeStackVisitor visitor(self_, context_, this, true);
   visitor.WalkStack(true);
 
-  // Compiled code made an explicit deoptimization. Transfer the code
-  // to interpreter and clear the counter to JIT the method again.
+  // Compiled code made an explicit deoptimization.
   ArtMethod* deopt_method = visitor.GetSingleFrameDeoptMethod();
   DCHECK(deopt_method != nullptr);
-  deopt_method->ClearCounter();
-  Runtime::Current()->GetInstrumentation()->UpdateMethodsCode(
-      deopt_method, GetQuickToInterpreterBridge());
+  if (Runtime::Current()->UseJit()) {
+    Runtime::Current()->GetJit()->GetCodeCache()->InvalidateCompiledCodeFor(
+        deopt_method, handler_method_header_);
+  } else {
+    // Transfer the code to interpreter.
+    Runtime::Current()->GetInstrumentation()->UpdateMethodsCode(
+        deopt_method, GetQuickToInterpreterBridge());
+  }
 
   // PC needs to be of the quick-to-interpreter bridge.
   int32_t offset;
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 1b59c6f..2aeb792 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -335,6 +335,7 @@
     os << "Runtime aborting...\n";
     if (Runtime::Current() == nullptr) {
       os << "(Runtime does not yet exist!)\n";
+      DumpNativeStack(os, GetTid(), nullptr, "  native: ", nullptr);
       return;
     }
     Thread* self = Thread::Current();
@@ -917,6 +918,7 @@
   is_explicit_gc_disabled_ = runtime_options.Exists(Opt::DisableExplicitGC);
   dex2oat_enabled_ = runtime_options.GetOrDefault(Opt::Dex2Oat);
   image_dex2oat_enabled_ = runtime_options.GetOrDefault(Opt::ImageDex2Oat);
+  dump_native_stack_on_sig_quit_ = runtime_options.GetOrDefault(Opt::DumpNativeStackOnSigQuit);
 
   vfprintf_ = runtime_options.GetOrDefault(Opt::HookVfprintf);
   exit_ = runtime_options.GetOrDefault(Opt::HookExit);
diff --git a/runtime/runtime.h b/runtime/runtime.h
index bec26f8..1956bae 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -603,6 +603,10 @@
     safe_mode_ = mode;
   }
 
+  bool GetDumpNativeStackOnSigQuit() const {
+    return dump_native_stack_on_sig_quit_;
+  }
+
  private:
   static void InitPlatformSignalHandlers();
 
@@ -813,6 +817,9 @@
   // Whether the application should run in safe mode, that is, interpreter only.
   bool safe_mode_;
 
+  // Whether threads should dump their native stack on SIGQUIT.
+  bool dump_native_stack_on_sig_quit_;
+
   DISALLOW_COPY_AND_ASSIGN(Runtime);
 };
 std::ostream& operator<<(std::ostream& os, const Runtime::CalleeSaveType& rhs);
diff --git a/runtime/runtime_options.def b/runtime/runtime_options.def
index 097bccb..838d1a9 100644
--- a/runtime/runtime_options.def
+++ b/runtime/runtime_options.def
@@ -67,6 +67,7 @@
 RUNTIME_OPTIONS_KEY (bool,                UseTLAB,                        (kUseTlab || kUseReadBarrier))
 RUNTIME_OPTIONS_KEY (bool,                EnableHSpaceCompactForOOM,      true)
 RUNTIME_OPTIONS_KEY (bool,                UseJIT,                         false)
+RUNTIME_OPTIONS_KEY (bool,                DumpNativeStackOnSigQuit,       true)
 RUNTIME_OPTIONS_KEY (unsigned int,        JITCompileThreshold,            jit::Jit::kDefaultCompileThreshold)
 RUNTIME_OPTIONS_KEY (unsigned int,        JITWarmupThreshold,             jit::Jit::kDefaultWarmupThreshold)
 RUNTIME_OPTIONS_KEY (MemoryKiB,           JITCodeCacheInitialCapacity,    jit::JitCodeCache::kInitialCapacity)
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 7a45594..2ee1605 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -936,9 +936,9 @@
      << "]";
 }
 
-void Thread::Dump(std::ostream& os, BacktraceMap* backtrace_map) const {
+void Thread::Dump(std::ostream& os, bool dump_native_stack, BacktraceMap* backtrace_map) const {
   DumpState(os);
-  DumpStack(os, backtrace_map);
+  DumpStack(os, dump_native_stack, backtrace_map);
 }
 
 mirror::String* Thread::GetThreadName(const ScopedObjectAccessAlreadyRunnable& soa) const {
@@ -1497,7 +1497,9 @@
   }
 }
 
-void Thread::DumpStack(std::ostream& os, BacktraceMap* backtrace_map) const {
+void Thread::DumpStack(std::ostream& os,
+                       bool dump_native_stack,
+                       BacktraceMap* backtrace_map) const {
   // TODO: we call this code when dying but may not have suspended the thread ourself. The
   //       IsSuspended check is therefore racy with the use for dumping (normally we inhibit
   //       the race with the thread_suspend_count_lock_).
@@ -1510,7 +1512,7 @@
   }
   if (safe_to_dump) {
     // If we're currently in native code, dump that stack before dumping the managed stack.
-    if (dump_for_abort || ShouldShowNativeStack(this)) {
+    if (dump_native_stack && (dump_for_abort || ShouldShowNativeStack(this))) {
       DumpKernelStack(os, GetTid(), "  kernel: ", false);
       ArtMethod* method = GetCurrentMethod(nullptr, !dump_for_abort);
       DumpNativeStack(os, GetTid(), backtrace_map, "  native: ", method);
diff --git a/runtime/thread.h b/runtime/thread.h
index 3a5d72e..2726e91 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -187,7 +187,9 @@
   void ShortDump(std::ostream& os) const;
 
   // Dumps the detailed thread state and the thread stack (used for SIGQUIT).
-  void Dump(std::ostream& os, BacktraceMap* backtrace_map = nullptr) const
+  void Dump(std::ostream& os,
+            bool dump_native_stack = true,
+            BacktraceMap* backtrace_map = nullptr) const
       REQUIRES(!Locks::thread_suspend_count_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -1111,7 +1113,9 @@
   void VerifyStackImpl() SHARED_REQUIRES(Locks::mutator_lock_);
 
   void DumpState(std::ostream& os) const SHARED_REQUIRES(Locks::mutator_lock_);
-  void DumpStack(std::ostream& os, BacktraceMap* backtrace_map = nullptr) const
+  void DumpStack(std::ostream& os,
+                 bool dump_native_stack = true,
+                 BacktraceMap* backtrace_map = nullptr) const
       REQUIRES(!Locks::thread_suspend_count_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index c8714a6..49d54fd 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -140,7 +140,7 @@
       suspend_all_historam_.PrintConfidenceIntervals(os, 0.99, data);  // Dump time to suspend.
     }
   }
-  Dump(os);
+  Dump(os, Runtime::Current()->GetDumpNativeStackOnSigQuit());
   DumpUnattachedThreads(os);
 }
 
@@ -189,8 +189,11 @@
 // A closure used by Thread::Dump.
 class DumpCheckpoint FINAL : public Closure {
  public:
-  explicit DumpCheckpoint(std::ostream* os)
-      : os_(os), barrier_(0), backtrace_map_(BacktraceMap::Create(getpid())) {}
+  DumpCheckpoint(std::ostream* os, bool dump_native_stack)
+      : os_(os),
+        barrier_(0),
+        backtrace_map_(dump_native_stack ? BacktraceMap::Create(getpid()) : nullptr),
+        dump_native_stack_(dump_native_stack) {}
 
   void Run(Thread* thread) OVERRIDE {
     // Note thread and self may not be equal if thread was already suspended at the point of the
@@ -199,7 +202,7 @@
     std::ostringstream local_os;
     {
       ScopedObjectAccess soa(self);
-      thread->Dump(local_os, backtrace_map_.get());
+      thread->Dump(local_os, dump_native_stack_, backtrace_map_.get());
     }
     local_os << "\n";
     {
@@ -228,14 +231,16 @@
   Barrier barrier_;
   // A backtrace map, so that all threads use a shared info and don't reacquire/parse separately.
   std::unique_ptr<BacktraceMap> backtrace_map_;
+  // Whether we should dump the native stack.
+  const bool dump_native_stack_;
 };
 
-void ThreadList::Dump(std::ostream& os) {
+void ThreadList::Dump(std::ostream& os, bool dump_native_stack) {
   {
     MutexLock mu(Thread::Current(), *Locks::thread_list_lock_);
     os << "DALVIK THREADS (" << list_.size() << "):\n";
   }
-  DumpCheckpoint checkpoint(&os);
+  DumpCheckpoint checkpoint(&os, dump_native_stack);
   size_t threads_running_checkpoint = RunCheckpoint(&checkpoint);
   if (threads_running_checkpoint != 0) {
     checkpoint.WaitForThreadsToRunThroughCheckpoint(threads_running_checkpoint);
diff --git a/runtime/thread_list.h b/runtime/thread_list.h
index 2e73f6a..363cab8 100644
--- a/runtime/thread_list.h
+++ b/runtime/thread_list.h
@@ -49,7 +49,7 @@
   void DumpForSigQuit(std::ostream& os)
       REQUIRES(!Locks::thread_list_lock_, !Locks::mutator_lock_);
   // For thread suspend timeout dumps.
-  void Dump(std::ostream& os)
+  void Dump(std::ostream& os, bool dump_native_stack = true)
       REQUIRES(!Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_);
   pid_t GetLockOwner();  // For SignalCatcher.
 
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index c7ac172..a6cf9ea 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -1960,8 +1960,8 @@
   // We need to ensure the work line is consistent while performing validation. When we spot a
   // peephole pattern we compute a new line for either the fallthrough instruction or the
   // branch target.
-  ArenaUniquePtr<RegisterLine> branch_line;
-  ArenaUniquePtr<RegisterLine> fallthrough_line;
+  RegisterLineArenaUniquePtr branch_line;
+  RegisterLineArenaUniquePtr fallthrough_line;
 
   switch (inst->Opcode()) {
     case Instruction::NOP:
@@ -2411,6 +2411,8 @@
       if (!reg_types_.JavaLangThrowable(false).IsAssignableFrom(res_type)) {
         if (res_type.IsUninitializedTypes()) {
           Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "thrown exception not initialized";
+        } else if (!res_type.IsReferenceTypes()) {
+          Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "thrown value of non-reference type " << res_type;
         } else {
           Fail(res_type.IsUnresolvedTypes() ? VERIFY_ERROR_NO_CLASS : VERIFY_ERROR_BAD_CLASS_SOFT)
                 << "thrown class " << res_type << " not instanceof Throwable";
@@ -4524,6 +4526,19 @@
     if (UNLIKELY(have_pending_hard_failure_)) {
       return;
     }
+    if (should_adjust) {
+      if (field == nullptr) {
+        Fail(VERIFY_ERROR_BAD_CLASS_SOFT) << "Might be accessing a superclass instance field prior "
+                                          << "to the superclass being initialized in "
+                                          << PrettyMethod(dex_method_idx_, *dex_file_);
+      } else if (field->GetDeclaringClass() != GetDeclaringClass().GetClass()) {
+        Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "cannot access superclass instance field "
+                                          << PrettyField(field) << " of a not fully initialized "
+                                          << "object within the context of "
+                                          << PrettyMethod(dex_method_idx_, *dex_file_);
+        return;
+      }
+    }
   }
   const RegType* field_type = nullptr;
   if (field != nullptr) {
@@ -4809,7 +4824,7 @@
       AdjustReturnLine(this, ret_inst, target_line);
     }
   } else {
-    ArenaUniquePtr<RegisterLine> copy;
+    RegisterLineArenaUniquePtr copy;
     if (kDebugVerify) {
       copy.reset(RegisterLine::Create(target_line->NumRegs(), this));
       copy->CopyFromLine(target_line);
diff --git a/runtime/verifier/method_verifier.h b/runtime/verifier/method_verifier.h
index c7d1e6b..b53a45c 100644
--- a/runtime/verifier/method_verifier.h
+++ b/runtime/verifier/method_verifier.h
@@ -30,6 +30,7 @@
 #include "handle.h"
 #include "instruction_flags.h"
 #include "method_reference.h"
+#include "register_line.h"
 #include "reg_type_cache.h"
 
 namespace art {
@@ -45,6 +46,7 @@
 class DexPcToReferenceMap;
 class MethodVerifier;
 class RegisterLine;
+using RegisterLineArenaUniquePtr = std::unique_ptr<RegisterLine, RegisterLineArenaDelete>;
 class RegType;
 
 /*
@@ -127,7 +129,7 @@
   }
 
  private:
-  ScopedArenaVector<ArenaUniquePtr<RegisterLine>> register_lines_;
+  ScopedArenaVector<RegisterLineArenaUniquePtr> register_lines_;
 
   DISALLOW_COPY_AND_ASSIGN(PcToRegisterLineTable);
 };
@@ -771,14 +773,14 @@
   PcToRegisterLineTable reg_table_;
 
   // Storage for the register status we're currently working on.
-  ArenaUniquePtr<RegisterLine> work_line_;
+  RegisterLineArenaUniquePtr work_line_;
 
   // The address of the instruction we're currently working on, note that this is in 2 byte
   // quantities
   uint32_t work_insn_idx_;
 
   // Storage for the register status we're saving for later.
-  ArenaUniquePtr<RegisterLine> saved_line_;
+  RegisterLineArenaUniquePtr saved_line_;
 
   const uint32_t dex_method_idx_;  // The method we're working on.
   // Its object representation if known.
diff --git a/runtime/verifier/register_line-inl.h b/runtime/verifier/register_line-inl.h
index 330c06a..bfbb78f 100644
--- a/runtime/verifier/register_line-inl.h
+++ b/runtime/verifier/register_line-inl.h
@@ -185,9 +185,12 @@
   }
 }
 
+inline size_t RegisterLine::ComputeSize(size_t num_regs) {
+  return OFFSETOF_MEMBER(RegisterLine, line_) + num_regs * sizeof(uint16_t);
+}
+
 inline RegisterLine* RegisterLine::Create(size_t num_regs, MethodVerifier* verifier) {
-  void* memory = verifier->GetArena().Alloc(OFFSETOF_MEMBER(RegisterLine, line_) +
-                                                (num_regs * sizeof(uint16_t)));
+  void* memory = verifier->GetArena().Alloc(ComputeSize(num_regs));
   return new (memory) RegisterLine(num_regs, verifier);
 }
 
@@ -200,6 +203,12 @@
   SetResultTypeToUnknown(verifier);
 }
 
+inline void RegisterLineArenaDelete::operator()(RegisterLine* ptr) const {
+  const size_t size = ptr != nullptr ? RegisterLine::ComputeSize(ptr->NumRegs()) : 0u;
+  ptr->~RegisterLine();
+  ProtectMemory(ptr, size);
+}
+
 }  // namespace verifier
 }  // namespace art
 
diff --git a/runtime/verifier/register_line.h b/runtime/verifier/register_line.h
index b2f5555..d508454 100644
--- a/runtime/verifier/register_line.h
+++ b/runtime/verifier/register_line.h
@@ -197,6 +197,9 @@
     return num_regs_;
   }
 
+  // Return how many bytes of memory a register line uses.
+  ALWAYS_INLINE static size_t ComputeSize(size_t num_regs);
+
   /*
    * Get the "this" pointer from a non-static method invocation. This returns the RegType so the
    * caller can decide whether it needs the reference to be initialized or not. (Can also return
@@ -401,6 +404,13 @@
   DISALLOW_COPY_AND_ASSIGN(RegisterLine);
 };
 
+class RegisterLineArenaDelete : public ArenaDelete<RegisterLine> {
+ public:
+  void operator()(RegisterLine* ptr) const;
+};
+
+
+
 }  // namespace verifier
 }  // namespace art
 
diff --git a/test/003-omnibus-opcodes/build b/test/003-omnibus-opcodes/build
index faa2983..56e8784 100644
--- a/test/003-omnibus-opcodes/build
+++ b/test/003-omnibus-opcodes/build
@@ -23,8 +23,8 @@
 ${JAVAC} -d classes `find src2 -name '*.java'`
 
 if [ ${USE_JACK} = "true" ]; then
-  ${JILL} classes --output classes.jack
-  ${JACK} --import classes.jack --output-dex .
+  jar cf classes.jill.jar -C classes .
+  ${JACK} --import classes.jill.jar --output-dex .
 else
   ${DX} -JXmx256m --debug --dex --output=classes.dex classes
   fi
diff --git a/test/005-annotations/build b/test/005-annotations/build
index 057b351..93bee50 100644
--- a/test/005-annotations/build
+++ b/test/005-annotations/build
@@ -29,8 +29,8 @@
 rm 'classes/android/test/anno/ClassWithInnerAnnotationClass$MissingInnerAnnotationClass.class'
 
 if [ ${USE_JACK} = "true" ]; then
-  ${JILL} classes --output classes.jack
-  ${JACK} --import classes.jack --output-dex .
+  jar cf classes.jill.jar -C classes .
+  ${JACK} --import classes.jill.jar --output-dex .
 else
   ${DX} -JXmx256m --debug --dex --output=classes.dex classes
 fi
diff --git a/test/022-interface/build b/test/022-interface/build
index 3f8915c..5cfc7f2 100644
--- a/test/022-interface/build
+++ b/test/022-interface/build
@@ -20,8 +20,8 @@
 # Use classes that are compiled with ecj that exposes an invokeinterface
 # issue when interfaces override methods in Object
 if [ ${USE_JACK} = "true" ]; then
-  ${JILL} classes --output classes.jack
-  ${JACK} --import classes.jack --output-dex .
+  jar cf classes.jill.jar -C classes .
+  ${JACK} --import classes.jill.jar --output-dex .
 else
   ${DX} --debug --dex --dump-to=classes.lst --output=classes.dex classes
 fi
diff --git a/test/085-old-style-inner-class/build b/test/085-old-style-inner-class/build
index 6f50a76..21dc662 100644
--- a/test/085-old-style-inner-class/build
+++ b/test/085-old-style-inner-class/build
@@ -23,8 +23,8 @@
 ${JAVAC} -source 1.4 -target 1.4 -d classes `find src -name '*.java'`
 
 if [ ${USE_JACK} = "true" ]; then
-  ${JILL} classes --output classes.jack
-  ${JACK} --import classes.jack --output-dex .
+  jar cf classes.jill.jar -C classes .
+  ${JACK} --import classes.jill.jar --output-dex .
 else
   # Suppress stderr to keep the inner class warnings out of the expected output.
   ${DX} --debug --dex --dump-to=classes.lst --output=classes.dex --dump-width=1000 classes 2>/dev/null
diff --git a/test/091-override-package-private-method/build b/test/091-override-package-private-method/build
index 5a340dc..073a4ba 100755
--- a/test/091-override-package-private-method/build
+++ b/test/091-override-package-private-method/build
@@ -24,14 +24,12 @@
 mv classes/OverridePackagePrivateMethodSuper.class classes-ex
 
 if [ ${USE_JACK} = "true" ]; then
-  # Create .jack files from classes generated with javac.
-  ${JILL} classes --output classes.jack
-  ${JILL} classes-ex --output classes-ex.jack
+  jar cf classes.jill.jar -C classes .
+  jar cf classes-ex.jill.jar -C classes-ex .
 
-  # Create DEX files from .jack files.
-  ${JACK} --import classes.jack --output-dex .
+  ${JACK} --import classes.jill.jar --output-dex .
   zip $TEST_NAME.jar classes.dex
-  ${JACK} --import classes-ex.jack --output-dex .
+  ${JACK} --import classes-ex.jill.jar --output-dex .
   zip ${TEST_NAME}-ex.jar classes.dex
 else
   if [ ${NEED_DEX} = "true" ]; then
diff --git a/test/097-duplicate-method/build b/test/097-duplicate-method/build
index a855873..4525549 100644
--- a/test/097-duplicate-method/build
+++ b/test/097-duplicate-method/build
@@ -23,10 +23,10 @@
   ${JACK} --output-jack src.jack src
 
   ${JASMIN} -d classes src/*.j
-  ${JILL} classes --output jasmin.jack
+  jar cf jasmin.jill.jar -C classes .
 
   # We set jack.import.type.policy=keep-first to consider class definitions from jasmin first.
-  ${JACK} --import jasmin.jack --import src.jack -D jack.import.type.policy=keep-first --output-dex .
+  ${JACK} --import jasmin.jill.jar --import src.jack -D jack.import.type.policy=keep-first --output-dex .
 else
   ${JAVAC} -d classes src/*.java
   ${JASMIN} -d classes src/*.j
diff --git a/test/111-unresolvable-exception/build b/test/111-unresolvable-exception/build
index e772fb8..58ac26d 100644
--- a/test/111-unresolvable-exception/build
+++ b/test/111-unresolvable-exception/build
@@ -22,8 +22,8 @@
 rm classes/TestException.class
 
 if [ ${USE_JACK} = "true" ]; then
-  ${JILL} classes --output classes.jack
-  ${JACK} --import classes.jack --output-dex .
+  jar cf classes.jill.jar -C classes .
+  ${JACK} --import classes.jill.jar --output-dex .
 else
   ${DX} -JXmx256m --debug --dex --dump-to=classes.lst --output=classes.dex classes
 fi
diff --git a/test/113-multidex/build b/test/113-multidex/build
index 8ef5c0e..4557ccd 100644
--- a/test/113-multidex/build
+++ b/test/113-multidex/build
@@ -28,14 +28,12 @@
 rm classes2/Second.class classes2/FillerA.class classes2/FillerB.class classes2/Inf*.class
 
 if [ ${USE_JACK} = "true" ]; then
-  # Create .jack files from classes generated with javac.
-  ${JILL} classes --output classes.jack
-  ${JILL} classes2 --output classes2.jack
+  jar cf classes.jill.jar -C classes .
+  jar cf classes2.jill.jar -C classes2 .
 
-  # Create DEX files from .jack files.
-  ${JACK} --import classes.jack --output-dex .
+  ${JACK} --import classes.jill.jar --output-dex .
   mv classes.dex classes-1.dex
-  ${JACK} --import classes2.jack --output-dex .
+  ${JACK} --import classes2.jill.jar --output-dex .
   mv classes.dex classes2.dex
   mv classes-1.dex classes.dex
 else
diff --git a/test/115-native-bridge/expected.txt b/test/115-native-bridge/expected.txt
index b003307..852ec2e 100644
--- a/test/115-native-bridge/expected.txt
+++ b/test/115-native-bridge/expected.txt
@@ -1,4 +1,3 @@
-Code cache exists: './code_cache'.
 Native bridge initialized.
 Checking for getEnvValues.
 Ready for native bridge tests.
diff --git a/test/115-native-bridge/nativebridge.cc b/test/115-native-bridge/nativebridge.cc
index b70ca4f..aca356b 100644
--- a/test/115-native-bridge/nativebridge.cc
+++ b/test/115-native-bridge/nativebridge.cc
@@ -269,16 +269,12 @@
   struct stat st;
   if (app_code_cache_dir != nullptr) {
     if (stat(app_code_cache_dir, &st) == 0) {
-      if (S_ISDIR(st.st_mode)) {
-        printf("Code cache exists: '%s'.\n", app_code_cache_dir);
-      } else {
+      if (!S_ISDIR(st.st_mode)) {
         printf("Code cache is not a directory.\n");
       }
     } else {
       perror("Error when stat-ing the code_cache:");
     }
-  } else {
-    printf("app_code_cache_dir is null.\n");
   }
 
   if (art_cbs != nullptr) {
diff --git a/test/121-modifiers/build b/test/121-modifiers/build
index 85b69e9..771dd51 100644
--- a/test/121-modifiers/build
+++ b/test/121-modifiers/build
@@ -31,9 +31,9 @@
 # mv Main.class A.class A\$B.class A\$C.class classes/
 
 if [ ${USE_JACK} = "true" ]; then
-  ${JILL} classes --output classes.jack
+  jar cf classes.jill.jar -C classes .
   # Workaround b/19561685: disable sanity checks to produce a DEX file with invalid modifiers.
-  ${JACK} --sanity-checks off --import classes.jack --output-dex .
+  ${JACK} --sanity-checks off --import classes.jill.jar --output-dex .
 else
   ${DX} --debug --dex --dump-to=classes.lst --output=classes.dex classes
 fi
diff --git a/test/124-missing-classes/build b/test/124-missing-classes/build
index b92ecf9..0a340a2 100644
--- a/test/124-missing-classes/build
+++ b/test/124-missing-classes/build
@@ -27,8 +27,8 @@
 rm 'classes/Main$MissingInnerClass.class'
 
 if [ ${USE_JACK} = "true" ]; then
-  ${JILL} classes --output classes.jack
-  ${JACK} --import classes.jack --output-dex .
+  jar cf classes.jill.jar -C classes .
+  ${JACK} --import classes.jill.jar --output-dex .
 else
   ${DX} -JXmx256m --debug --dex --output=classes.dex classes
 fi
diff --git a/test/126-miranda-multidex/build b/test/126-miranda-multidex/build
index b7f2118..00b9ba0 100644
--- a/test/126-miranda-multidex/build
+++ b/test/126-miranda-multidex/build
@@ -28,14 +28,12 @@
 rm classes2/Main.class classes2/MirandaAbstract.class classes2/MirandaClass*.class classes2/MirandaInterface2*.class
 
 if [ ${USE_JACK} = "true" ]; then
-  # Create .jack files from classes generated with javac.
-  ${JILL} classes --output classes.jack
-  ${JILL} classes2 --output classes2.jack
+  jar cf classes.jill.jar -C classes .
+  jar cf classes2.jill.jar -C classes2 .
 
-  # Create DEX files from .jack files.
-  ${JACK} --import classes.jack --output-dex .
+  ${JACK} --import classes.jill.jar --output-dex .
   mv classes.dex classes-1.dex
-  ${JACK} --import classes2.jack --output-dex .
+  ${JACK} --import classes2.jill.jar --output-dex .
   mv classes.dex classes2.dex
   mv classes-1.dex classes.dex
 else
diff --git a/test/127-checker-secondarydex/build b/test/127-checker-secondarydex/build
index 0d9f4d6..7ce46ac 100755
--- a/test/127-checker-secondarydex/build
+++ b/test/127-checker-secondarydex/build
@@ -24,14 +24,12 @@
 mv classes/Super.class classes-ex
 
 if [ ${USE_JACK} = "true" ]; then
-  # Create .jack files from classes generated with javac.
-  ${JILL} classes --output classes.jack
-  ${JILL} classes-ex --output classes-ex.jack
+  jar cf classes.jill.jar -C classes .
+  jar cf classes-ex.jill.jar -C classes-ex .
 
-  # Create DEX files from .jack files.
-  ${JACK} --import classes.jack --output-dex .
+  ${JACK} --import classes.jill.jar --output-dex .
   zip $TEST_NAME.jar classes.dex
-  ${JACK} --import classes-ex.jack --output-dex .
+  ${JACK} --import classes-ex.jill.jar --output-dex .
   zip ${TEST_NAME}-ex.jar classes.dex
 else
   if [ ${NEED_DEX} = "true" ]; then
diff --git a/test/127-checker-secondarydex/src/Test.java b/test/127-checker-secondarydex/src/Test.java
index 266ed19..438e854 100644
--- a/test/127-checker-secondarydex/src/Test.java
+++ b/test/127-checker-secondarydex/src/Test.java
@@ -23,7 +23,7 @@
         System.out.println("Test");
     }
 
-    /// CHECK-START: java.lang.Integer Test.toInteger() ssa_builder (after)
+    /// CHECK-START: java.lang.Integer Test.toInteger() builder (after)
     /// CHECK:         LoadClass needs_access_check:false klass:java.lang.Integer
 
     public Integer toInteger() {
diff --git a/test/137-cfi/cfi.cc b/test/137-cfi/cfi.cc
index 87656bc..45251b8 100644
--- a/test/137-cfi/cfi.cc
+++ b/test/137-cfi/cfi.cc
@@ -53,6 +53,7 @@
 
 extern "C" JNIEXPORT jboolean JNICALL Java_Main_sleep(JNIEnv*, jobject, jint, jboolean, jdouble) {
   // Keep pausing.
+  printf("Going to sleep\n");
   for (;;) {
     pause();
   }
diff --git a/test/137-cfi/expected.txt b/test/137-cfi/expected.txt
index 8db7853..6a5618e 100644
--- a/test/137-cfi/expected.txt
+++ b/test/137-cfi/expected.txt
@@ -1,2 +1 @@
 JNI_OnLoad called
-JNI_OnLoad called
diff --git a/test/137-cfi/run b/test/137-cfi/run
index 6f4bcfe..8ec98c1 100755
--- a/test/137-cfi/run
+++ b/test/137-cfi/run
@@ -20,4 +20,5 @@
 
 # Test with minimal compressed debugging information.
 # Check only method names (parameters are omitted to save space).
-${RUN} "$@" -Xcompiler-option --generate-mini-debug-info
+# Temporarily disable due to bug 27172087 (leak/race in libunwind).
+# ${RUN} "$@" -Xcompiler-option --generate-mini-debug-info
diff --git a/test/137-cfi/src/Main.java b/test/137-cfi/src/Main.java
index 7755338..d60a4eb 100644
--- a/test/137-cfi/src/Main.java
+++ b/test/137-cfi/src/Main.java
@@ -16,10 +16,7 @@
 
 import java.io.BufferedReader;
 import java.io.FileReader;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.lang.reflect.InvocationTargetException;
-import java.lang.reflect.Method;
+import java.io.InputStreamReader;
 import java.util.Arrays;
 import java.util.Comparator;
 
@@ -98,9 +95,12 @@
               throw new RuntimeException("Couldn't parse process");
           }
 
-          // Wait a bit, so the forked process has time to run until its sleep phase.
+          // Wait until the forked process had time to run until its sleep phase.
           try {
-              Thread.sleep(5000);
+              InputStreamReader stdout = new InputStreamReader(p.getInputStream(), "UTF-8");
+              BufferedReader lineReader = new BufferedReader(stdout);
+              while (!lineReader.readLine().contains("Going to sleep")) {
+              }
           } catch (Exception e) {
               throw new RuntimeException(e);
           }
diff --git a/test/444-checker-nce/src/Main.java b/test/444-checker-nce/src/Main.java
index 865355c..c96b18c 100644
--- a/test/444-checker-nce/src/Main.java
+++ b/test/444-checker-nce/src/Main.java
@@ -27,7 +27,7 @@
     return m.g();
   }
 
-  /// CHECK-START: Main Main.thisTest() ssa_builder (after)
+  /// CHECK-START: Main Main.thisTest() builder (after)
   /// CHECK:         NullCheck
   /// CHECK:         InvokeStaticOrDirect
 
@@ -38,7 +38,7 @@
     return g();
   }
 
-  /// CHECK-START: Main Main.newInstanceRemoveTest() ssa_builder (after)
+  /// CHECK-START: Main Main.newInstanceRemoveTest() builder (after)
   /// CHECK:         NewInstance
   /// CHECK:         NullCheck
   /// CHECK:         InvokeStaticOrDirect
@@ -52,7 +52,7 @@
     return m.g();
   }
 
-  /// CHECK-START: Main Main.newArrayRemoveTest() ssa_builder (after)
+  /// CHECK-START: Main Main.newArrayRemoveTest() builder (after)
   /// CHECK:         NewArray
   /// CHECK:         NullCheck
   /// CHECK:         ArrayGet
@@ -178,7 +178,7 @@
     return n.g();
   }
 
-  /// CHECK-START: Main Main.scopeRemoveTest(int, Main) ssa_builder (after)
+  /// CHECK-START: Main Main.scopeRemoveTest(int, Main) builder (after)
   /// CHECK:         NullCheck
 
   /// CHECK-START: Main Main.scopeRemoveTest(int, Main) instruction_simplifier (after)
diff --git a/test/449-checker-bce/src/Main.java b/test/449-checker-bce/src/Main.java
index 8f9a32a..31bb94c 100644
--- a/test/449-checker-bce/src/Main.java
+++ b/test/449-checker-bce/src/Main.java
@@ -122,8 +122,9 @@
   /// CHECK: ArraySet
 
   static void constantIndexing1(int[] array) {
-    array[5] = 1;
-    array[4] = 1;
+    // Decreasing order: bc for 5 but not for 4.
+    array[5] = 11;
+    array[4] = 11;
   }
 
 
@@ -136,17 +137,18 @@
   /// CHECK: ArraySet
   /// CHECK: BoundsCheck
   /// CHECK: ArraySet
+  /// CHECK: BoundsCheck
+  /// CHECK: ArraySet
 
   /// CHECK-START: void Main.$opt$noinline$constantIndexing2(int[]) BCE (after)
-  /// CHECK: LessThanOrEqual
-  /// CHECK: Deoptimize
-  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  /// CHECK: BoundsCheck
   /// CHECK: ArraySet
-  /// CHECK-NOT: BoundsCheck
+  /// CHECK: BoundsCheck
   /// CHECK: ArraySet
-  /// CHECK-NOT: BoundsCheck
+  /// CHECK: BoundsCheck
   /// CHECK: ArraySet
-  /// CHECK-NOT: BoundsCheck
+  /// CHECK: BoundsCheck
   /// CHECK: ArraySet
   /// CHECK: BoundsCheck
   /// CHECK: ArraySet
@@ -156,12 +158,39 @@
     array[2] = 1;
     array[3] = 1;
     array[4] = 1;
-    array[-1] = 1;
+    array[-1] = 1;  // prevents the whole opt on [-1:4]
     if (array[1] == 1) {
       throw new Error("");
     }
   }
 
+  /// CHECK-START: void Main.constantIndexing2b(int[]) BCE (before)
+  /// CHECK: BoundsCheck
+  /// CHECK: ArraySet
+  /// CHECK: BoundsCheck
+  /// CHECK: ArraySet
+  /// CHECK: BoundsCheck
+  /// CHECK: ArraySet
+  /// CHECK: BoundsCheck
+  /// CHECK: ArraySet
+
+  /// CHECK-START: void Main.constantIndexing2b(int[]) BCE (after)
+  /// CHECK: Deoptimize
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK: ArraySet
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK: ArraySet
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK: ArraySet
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK: ArraySet
+
+  static void constantIndexing2b(int[] array) {
+    array[0] = 7;
+    array[1] = 7;
+    array[2] = 7;
+    array[3] = 7;
+  }
 
   /// CHECK-START: int[] Main.constantIndexing3(int[], int[], boolean) BCE (before)
   /// CHECK: BoundsCheck
@@ -182,11 +211,9 @@
   /// CHECK: ArraySet
 
   /// CHECK-START: int[] Main.constantIndexing3(int[], int[], boolean) BCE (after)
-  /// CHECK: LessThanOrEqual
   /// CHECK: Deoptimize
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArrayGet
-  /// CHECK: LessThanOrEqual
   /// CHECK: Deoptimize
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArraySet
@@ -220,14 +247,14 @@
   /// CHECK: ArraySet
 
   /// CHECK-START: void Main.constantIndexing4(int[]) BCE (after)
-  /// CHECK-NOT: LessThanOrEqual
+  /// CHECK-NOT: Deoptimize
   /// CHECK: BoundsCheck
   /// CHECK: ArraySet
 
   // There is only one array access. It's not beneficial
   // to create a compare with deoptimization instruction.
   static void constantIndexing4(int[] array) {
-    array[0] = 1;
+    array[0] = -1;
   }
 
 
@@ -260,10 +287,221 @@
 
   /// CHECK-START: void Main.constantIndexing6(int[]) BCE (after)
   /// CHECK: Deoptimize
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK: ArraySet
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK: ArraySet
 
   static void constantIndexing6(int[] array) {
-    array[3] = 1;
-    array[4] = 1;
+    array[3] = 111;
+    array[4] = 111;
+  }
+
+  /// CHECK-START: void Main.constantIndexing7(int[], int) BCE (before)
+  /// CHECK: BoundsCheck
+  /// CHECK: ArraySet
+  /// CHECK: BoundsCheck
+  /// CHECK: ArraySet
+  /// CHECK: BoundsCheck
+  /// CHECK: ArraySet
+  /// CHECK: BoundsCheck
+  /// CHECK: ArraySet
+
+  /// CHECK-START: void Main.constantIndexing7(int[], int) BCE (after)
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK: ArraySet
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK: ArraySet
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK: ArraySet
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK: ArraySet
+
+  static void constantIndexing7(int[] array, int base) {
+    // With constant offsets to symbolic base.
+    array[base]     = 10;
+    array[base + 1] = 20;
+    array[base + 2] = 30;
+    array[base + 3] = 40;
+  }
+
+  /// CHECK-START: void Main.constantIndexing8(int[], int) BCE (before)
+  /// CHECK: BoundsCheck
+  /// CHECK: ArraySet
+  /// CHECK: BoundsCheck
+  /// CHECK: ArraySet
+  /// CHECK: BoundsCheck
+  /// CHECK: ArraySet
+  /// CHECK: BoundsCheck
+  /// CHECK: ArraySet
+
+  /// CHECK-START: void Main.constantIndexing8(int[], int) BCE (after)
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK: ArraySet
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK: ArraySet
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK: ArraySet
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK: ArraySet
+
+  static void constantIndexing8(int[] array, int base) {
+    // With constant offsets "both ways" to symbolic base.
+    array[base - 1] = 100;
+    array[base]     = 200;
+    array[base + 1] = 300;
+    array[base + 2] = 400;
+  }
+
+  /// CHECK-START: void Main.constantIndexing9(int[], int) BCE (before)
+  /// CHECK: BoundsCheck
+  /// CHECK: ArraySet
+  /// CHECK: BoundsCheck
+  /// CHECK: ArraySet
+  /// CHECK: BoundsCheck
+  /// CHECK: ArraySet
+  /// CHECK: BoundsCheck
+  /// CHECK: ArraySet
+
+  /// CHECK-START: void Main.constantIndexing9(int[], int) BCE (after)
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK: ArraySet
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK: ArraySet
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK: ArraySet
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK: ArraySet
+  /// CHECK-NOT: BoundsCheck
+
+  static void constantIndexing9(int[] array, int base) {
+    // Final range is base..base+3 so conditional
+    // references may be included in the end.
+    array[base] = 0;
+    if (base != 12345)
+      array[base + 2] = 2;
+    array[base + 3] = 3;
+    if (base != 67890)
+      array[base + 1] = 1;
+  }
+
+  static void runAllConstantIndices() {
+    int[] a1 = { 0 };
+    int[] a6 = { 0, 0, 0, 0, 0, 0 };
+
+    boolean caught = false;
+    try {
+      constantIndexing1(a1);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      caught = true;
+    }
+    if (!caught) {
+      System.out.println("constant indices 1 failed!");
+    }
+
+    constantIndexing1(a6);
+    if (a6[4] != 11 || a6[5] != 11) {
+      System.out.println("constant indices 1 failed!");
+    }
+
+    caught = false;
+    try {
+      $opt$noinline$constantIndexing2(a6);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      caught = true;
+    }
+    if (!caught || a6[0] != 0 || a6[1] != 1 || a6[2] != 1 ||
+                   a6[3] != 1 || a6[4] != 1 || a6[5] != 11) {
+      System.out.println("constant indices 2 failed!");
+    }
+
+    caught = false;
+    try {
+      constantIndexing2b(a1);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      caught = true;
+    }
+    if (!caught || a1[0] != 7) {
+      System.out.println("constant indices 2b failed!");
+    }
+
+    constantIndexing2b(a6);
+    if (a6[0] != 7 || a6[1] != 7 || a6[2] != 7 ||
+        a6[3] != 7 || a6[4] != 1 || a6[5] != 11) {
+      System.out.println("constant indices 2b failed!");
+    }
+
+    int[] b4 = new int[4];
+    constantIndexing3(a6, b4, true);
+    if (b4[0] != 7 || b4[1] != 7 || b4[2] != 7 || b4[3] != 7) {
+      System.out.println("constant indices 3 failed!");
+    }
+
+    constantIndexing4(a1);
+    if (a1[0] != -1) {
+      System.out.println("constant indices 4 failed!");
+    }
+
+    caught = false;
+    try {
+      constantIndexing5(a6);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      caught = true;
+    }
+    if (!caught) {
+      System.out.println("constant indices 5 failed!");
+    }
+
+    constantIndexing6(a6);
+    if (a6[0] != 7   || a6[1] != 7   || a6[2] != 7 ||
+        a6[3] != 111 || a6[4] != 111 || a6[5] != 11) {
+      System.out.println("constant indices 6 failed!");
+    }
+
+    constantIndexing7(a6, 1);
+    if (a6[0] != 7  || a6[1] != 10 || a6[2] != 20 ||
+        a6[3] != 30 || a6[4] != 40 || a6[5] != 11) {
+      System.out.println("constant indices 7 failed!");
+    }
+
+    caught = false;
+    try {
+      constantIndexing7(a6, 5);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      caught = true;
+    }
+    if (!caught || a6[0] != 7  || a6[1] != 10 || a6[2] != 20 ||
+                   a6[3] != 30 || a6[4] != 40 || a6[5] != 10) {
+      System.out.println("constant indices 7 failed!");
+    }
+
+    constantIndexing8(a6, 1);
+    if (a6[0] != 100 || a6[1] != 200 || a6[2] != 300 ||
+        a6[3] != 400 || a6[4] != 40  || a6[5] != 10) {
+      System.out.println("constant indices 8 failed!");
+    }
+
+    caught = false;
+    try {
+      constantIndexing8(a6, 0);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      caught = true;
+    }
+    if (!caught || a6[0] != 100) {
+      System.out.println("constant indices 8 failed!");
+    }
+
+    constantIndexing9(a6, 0);
+    if (a6[0] != 0 || a6[1] != 1  || a6[2] != 2  ||
+        a6[3] != 3 || a6[4] != 40 || a6[5] != 10) {
+      System.out.println("constant indices 9 failed!");
+    }
   }
 
   // A helper into which the actual throwing function should be inlined.
@@ -1102,6 +1340,9 @@
 
   static void testUnknownBounds() {
     boolean caught = false;
+
+    runAllConstantIndices();
+
     Main main = new Main();
     main.foo1(new int[10], 0, 10, false);
     if (main.sum != 10) {
diff --git a/test/450-checker-types/src/Main.java b/test/450-checker-types/src/Main.java
index d48b30e..027a9d9 100644
--- a/test/450-checker-types/src/Main.java
+++ b/test/450-checker-types/src/Main.java
@@ -205,7 +205,7 @@
   public static boolean $inline$InstanceofSubclassB(Object o) { return o instanceof SubclassB; }
   public static boolean $inline$InstanceofSubclassC(Object o) { return o instanceof SubclassC; }
 
-  /// CHECK-START: void Main.testInstanceOf_NotInlined(java.lang.Object) ssa_builder (after)
+  /// CHECK-START: void Main.testInstanceOf_NotInlined(java.lang.Object) builder (after)
   /// CHECK-DAG:     <<Cst0:i\d+>> IntConstant 0
   /// CHECK-DAG:     <<Cst1:i\d+>> IntConstant 1
   /// CHECK-DAG:     <<IOf1:z\d+>> InstanceOf
@@ -229,7 +229,7 @@
     }
   }
 
-  /// CHECK-START: void Main.testNotInstanceOf_NotInlined(java.lang.Object) ssa_builder (after)
+  /// CHECK-START: void Main.testNotInstanceOf_NotInlined(java.lang.Object) builder (after)
   /// CHECK-DAG:     <<Cst0:i\d+>> IntConstant 0
   /// CHECK-DAG:     <<Cst1:i\d+>> IntConstant 1
   /// CHECK-DAG:     <<IOf1:z\d+>> InstanceOf
@@ -487,7 +487,7 @@
     ((SubclassA)a[0]).$noinline$g();
   }
 
-  /// CHECK-START: int Main.testLoadExceptionInCatchNonExact(int, int) ssa_builder (after)
+  /// CHECK-START: int Main.testLoadExceptionInCatchNonExact(int, int) builder (after)
   /// CHECK:         LoadException klass:java.lang.ArithmeticException can_be_null:false exact:false
   public int testLoadExceptionInCatchNonExact(int x, int y) {
     try {
@@ -497,7 +497,7 @@
     }
   }
 
-  /// CHECK-START: int Main.testLoadExceptionInCatchExact(int) ssa_builder (after)
+  /// CHECK-START: int Main.testLoadExceptionInCatchExact(int) builder (after)
   /// CHECK:         LoadException klass:FinalException can_be_null:false exact:true
   public int testLoadExceptionInCatchExact(int x) {
     try {
@@ -511,7 +511,7 @@
     }
   }
 
-  /// CHECK-START: int Main.testLoadExceptionInCatchAll(int, int) ssa_builder (after)
+  /// CHECK-START: int Main.testLoadExceptionInCatchAll(int, int) builder (after)
   /// CHECK:         LoadException klass:java.lang.Throwable can_be_null:false exact:false
   public int testLoadExceptionInCatchAll(int x, int y) {
     try {
@@ -532,7 +532,7 @@
     return genericFinal.get();
   }
 
-  /// CHECK-START: SubclassC Main.inlineGenerics() ssa_builder (after)
+  /// CHECK-START: SubclassC Main.inlineGenerics() builder (after)
   /// CHECK:      <<Invoke:l\d+>>    InvokeStaticOrDirect klass:SubclassC exact:false
   /// CHECK-NEXT:                    Return [<<Invoke>>]
 
@@ -544,7 +544,7 @@
     return c;
   }
 
-  /// CHECK-START: Final Main.inlineGenericsFinal() ssa_builder (after)
+  /// CHECK-START: Final Main.inlineGenericsFinal() builder (after)
   /// CHECK:      <<Invoke:l\d+>>    InvokeStaticOrDirect klass:Final exact:true
   /// CHECK-NEXT:                    Return [<<Invoke>>]
 
@@ -586,7 +586,7 @@
     return new SubclassA();
   }
 
-  /// CHECK-START: void Main.updateNodesInTheSameBlockAsPhi(boolean) ssa_builder (after)
+  /// CHECK-START: void Main.updateNodesInTheSameBlockAsPhi(boolean) builder (after)
   /// CHECK:      <<Phi:l\d+>> Phi klass:Super
   /// CHECK:                   NullCheck [<<Phi>>] klass:Super
 
@@ -620,7 +620,7 @@
   }
 
 
-  /// CHECK-START: void Main.argumentCheck(Super, double, SubclassA, Final) ssa_builder (after)
+  /// CHECK-START: void Main.argumentCheck(Super, double, SubclassA, Final) builder (after)
   /// CHECK:      ParameterValue klass:Main can_be_null:false exact:false
   /// CHECK:      ParameterValue klass:Super can_be_null:true exact:false
   /// CHECK:      ParameterValue
@@ -636,7 +636,7 @@
 
   private int mainField = 0;
 
-  /// CHECK-START: SuperInterface Main.getWiderType(boolean, Interface, OtherInterface) ssa_builder (after)
+  /// CHECK-START: SuperInterface Main.getWiderType(boolean, Interface, OtherInterface) builder (after)
   /// CHECK:      <<Phi:l\d+>>       Phi klass:java.lang.Object
   /// CHECK:                         Return [<<Phi>>]
   private SuperInterface getWiderType(boolean cond, Interface a, OtherInterface b) {
@@ -692,7 +692,7 @@
     getSuper();
   }
 
-  /// CHECK-START: void Main.testLoopPhiWithNullFirstInput(boolean) ssa_builder (after)
+  /// CHECK-START: void Main.testLoopPhiWithNullFirstInput(boolean) builder (after)
   /// CHECK-DAG:  <<Null:l\d+>>      NullConstant
   /// CHECK-DAG:  <<Main:l\d+>>      NewInstance klass:Main exact:true
   /// CHECK-DAG:  <<LoopPhi:l\d+>>   Phi [<<Null>>,<<LoopPhi>>,<<Main>>] klass:Main exact:true
@@ -705,7 +705,7 @@
     }
   }
 
-  /// CHECK-START: java.lang.Object[] Main.testInstructionsWithUntypedParent() ssa_builder (after)
+  /// CHECK-START: java.lang.Object[] Main.testInstructionsWithUntypedParent() builder (after)
   /// CHECK-DAG:  <<Null:l\d+>>      NullConstant
   /// CHECK-DAG:  <<LoopPhi:l\d+>>   Phi [<<Null>>,<<Phi:l\d+>>] klass:java.lang.Object[] exact:true
   /// CHECK-DAG:  <<Array:l\d+>>     NewArray klass:java.lang.Object[] exact:true
diff --git a/test/458-checker-instruction-simplification/src/Main.java b/test/458-checker-instruction-simplification/src/Main.java
index 2f80470..8d6bb65 100644
--- a/test/458-checker-instruction-simplification/src/Main.java
+++ b/test/458-checker-instruction-simplification/src/Main.java
@@ -46,6 +46,12 @@
     }
   }
 
+  public static void assertStringEquals(String expected, String result) {
+    if (expected == null ? result != null : !expected.equals(result)) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
   /**
    * Tiny programs exercising optimizations of arithmetic identities.
    */
@@ -1401,7 +1407,7 @@
 
   // Test that conditions on float/double are not flipped.
 
-  /// CHECK-START: int Main.floatConditionNotEqualOne(float) ssa_builder (after)
+  /// CHECK-START: int Main.floatConditionNotEqualOne(float) builder (after)
   /// CHECK:                            LessThanOrEqual
 
   /// CHECK-START: int Main.floatConditionNotEqualOne(float) instruction_simplifier_before_codegen (after)
@@ -1417,7 +1423,7 @@
     return ((f > 42.0f) == true) ? 13 : 54;
   }
 
-  /// CHECK-START: int Main.doubleConditionEqualZero(double) ssa_builder (after)
+  /// CHECK-START: int Main.doubleConditionEqualZero(double) builder (after)
   /// CHECK:                            LessThanOrEqual
 
   /// CHECK-START: int Main.doubleConditionEqualZero(double) instruction_simplifier_before_codegen (after)
@@ -1433,6 +1439,337 @@
     return ((d > 42.0) != false) ? 13 : 54;
   }
 
+  /// CHECK-START: int Main.intToDoubleToInt(int) instruction_simplifier (before)
+  /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Double:d\d+>>   TypeConversion [<<Arg>>]
+  /// CHECK-DAG:      <<Int:i\d+>>      TypeConversion [<<Double>>]
+  /// CHECK-DAG:                        Return [<<Int>>]
+
+  /// CHECK-START: int Main.intToDoubleToInt(int) instruction_simplifier (after)
+  /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
+  /// CHECK-DAG:                        Return [<<Arg>>]
+
+  /// CHECK-START: int Main.intToDoubleToInt(int) instruction_simplifier (after)
+  /// CHECK-NOT:                        TypeConversion
+
+  public static int intToDoubleToInt(int value) {
+    // Lossless conversion followed by a conversion back.
+    return (int) (double) value;
+  }
+
+  /// CHECK-START: java.lang.String Main.intToDoubleToIntPrint(int) instruction_simplifier (before)
+  /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Double:d\d+>>   TypeConversion [<<Arg>>]
+  /// CHECK-DAG:      {{i\d+}}          TypeConversion [<<Double>>]
+
+  /// CHECK-START: java.lang.String Main.intToDoubleToIntPrint(int) instruction_simplifier (after)
+  /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
+  /// CHECK-DAG:      {{d\d+}}          TypeConversion [<<Arg>>]
+
+  /// CHECK-START: java.lang.String Main.intToDoubleToIntPrint(int) instruction_simplifier (after)
+  /// CHECK-DAG:                        TypeConversion
+  /// CHECK-NOT:                        TypeConversion
+
+  public static String intToDoubleToIntPrint(int value) {
+    // Lossless conversion followed by a conversion back
+    // with another use of the intermediate result.
+    double d = (double) value;
+    int i = (int) d;
+    return "d=" + d + ", i=" + i;
+  }
+
+  /// CHECK-START: int Main.byteToDoubleToInt(byte) instruction_simplifier (before)
+  /// CHECK-DAG:      <<Arg:b\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Double:d\d+>>   TypeConversion [<<Arg>>]
+  /// CHECK-DAG:      <<Int:i\d+>>      TypeConversion [<<Double>>]
+  /// CHECK-DAG:                        Return [<<Int>>]
+
+  /// CHECK-START: int Main.byteToDoubleToInt(byte) instruction_simplifier (after)
+  /// CHECK-DAG:      <<Arg:b\d+>>      ParameterValue
+  /// CHECK-DAG:                        Return [<<Arg>>]
+
+  /// CHECK-START: int Main.byteToDoubleToInt(byte) instruction_simplifier (after)
+  /// CHECK-NOT:                        TypeConversion
+
+  public static int byteToDoubleToInt(byte value) {
+    // Lossless conversion followed by another conversion, use implicit conversion.
+    return (int) (double) value;
+  }
+
+  /// CHECK-START: int Main.floatToDoubleToInt(float) instruction_simplifier (before)
+  /// CHECK-DAG:      <<Arg:f\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Double:d\d+>>   TypeConversion [<<Arg>>]
+  /// CHECK-DAG:      <<Int:i\d+>>      TypeConversion [<<Double>>]
+  /// CHECK-DAG:                        Return [<<Int>>]
+
+  /// CHECK-START: int Main.floatToDoubleToInt(float) instruction_simplifier (after)
+  /// CHECK-DAG:      <<Arg:f\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Int:i\d+>>      TypeConversion [<<Arg>>]
+  /// CHECK-DAG:                        Return [<<Int>>]
+
+  /// CHECK-START: int Main.floatToDoubleToInt(float) instruction_simplifier (after)
+  /// CHECK-DAG:                        TypeConversion
+  /// CHECK-NOT:                        TypeConversion
+
+  public static int floatToDoubleToInt(float value) {
+    // Lossless conversion followed by another conversion.
+    return (int) (double) value;
+  }
+
+  /// CHECK-START: java.lang.String Main.floatToDoubleToIntPrint(float) instruction_simplifier (before)
+  /// CHECK-DAG:      <<Arg:f\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Double:d\d+>>   TypeConversion [<<Arg>>]
+  /// CHECK-DAG:      {{i\d+}}          TypeConversion [<<Double>>]
+
+  /// CHECK-START: java.lang.String Main.floatToDoubleToIntPrint(float) instruction_simplifier (after)
+  /// CHECK-DAG:      <<Arg:f\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Double:d\d+>>   TypeConversion [<<Arg>>]
+  /// CHECK-DAG:      {{i\d+}}          TypeConversion [<<Double>>]
+
+  public static String floatToDoubleToIntPrint(float value) {
+    // Lossless conversion followed by another conversion with
+    // an extra use of the intermediate result.
+    double d = (double) value;
+    int i = (int) d;
+    return "d=" + d + ", i=" + i;
+  }
+
+  /// CHECK-START: short Main.byteToDoubleToShort(byte) instruction_simplifier (before)
+  /// CHECK-DAG:      <<Arg:b\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Double:d\d+>>   TypeConversion [<<Arg>>]
+  /// CHECK-DAG:      <<Int:i\d+>>      TypeConversion [<<Double>>]
+  /// CHECK-DAG:      <<Short:s\d+>>    TypeConversion [<<Int>>]
+  /// CHECK-DAG:                        Return [<<Short>>]
+
+  /// CHECK-START: short Main.byteToDoubleToShort(byte) instruction_simplifier (after)
+  /// CHECK-DAG:      <<Arg:b\d+>>      ParameterValue
+  /// CHECK-DAG:                        Return [<<Arg>>]
+
+  /// CHECK-START: short Main.byteToDoubleToShort(byte) instruction_simplifier (after)
+  /// CHECK-NOT:                        TypeConversion
+
+  public static short byteToDoubleToShort(byte value) {
+    // Originally, this is byte->double->int->short. The first conversion is lossless,
+    // so we merge this with the second one to byte->int which we omit as it's an implicit
+    // conversion. Then we eliminate the resulting byte->short as an implicit conversion.
+    return (short) (double) value;
+  }
+
+  /// CHECK-START: short Main.charToDoubleToShort(char) instruction_simplifier (before)
+  /// CHECK-DAG:      <<Arg:c\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Double:d\d+>>   TypeConversion [<<Arg>>]
+  /// CHECK-DAG:      <<Int:i\d+>>      TypeConversion [<<Double>>]
+  /// CHECK-DAG:      <<Short:s\d+>>    TypeConversion [<<Int>>]
+  /// CHECK-DAG:                        Return [<<Short>>]
+
+  /// CHECK-START: short Main.charToDoubleToShort(char) instruction_simplifier (after)
+  /// CHECK-DAG:      <<Arg:c\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Short:s\d+>>    TypeConversion [<<Arg>>]
+  /// CHECK-DAG:                        Return [<<Short>>]
+
+  /// CHECK-START: short Main.charToDoubleToShort(char) instruction_simplifier (after)
+  /// CHECK-DAG:                        TypeConversion
+  /// CHECK-NOT:                        TypeConversion
+
+  public static short charToDoubleToShort(char value) {
+    // Originally, this is char->double->int->short. The first conversion is lossless,
+    // so we merge this with the second one to char->int which we omit as it's an implicit
+    // conversion. Then we are left with the resulting char->short conversion.
+    return (short) (double) value;
+  }
+
+  /// CHECK-START: short Main.floatToIntToShort(float) instruction_simplifier (before)
+  /// CHECK-DAG:      <<Arg:f\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Int:i\d+>>      TypeConversion [<<Arg>>]
+  /// CHECK-DAG:      <<Short:s\d+>>    TypeConversion [<<Int>>]
+  /// CHECK-DAG:                        Return [<<Short>>]
+
+  /// CHECK-START: short Main.floatToIntToShort(float) instruction_simplifier (after)
+  /// CHECK-DAG:      <<Arg:f\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Int:i\d+>>      TypeConversion [<<Arg>>]
+  /// CHECK-DAG:      <<Short:s\d+>>    TypeConversion [<<Int>>]
+  /// CHECK-DAG:                        Return [<<Short>>]
+
+  public static short floatToIntToShort(float value) {
+    // Lossy FP to integral conversion followed by another conversion: no simplification.
+    return (short) value;
+  }
+
+  /// CHECK-START: int Main.intToFloatToInt(int) instruction_simplifier (before)
+  /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Float:f\d+>>    TypeConversion [<<Arg>>]
+  /// CHECK-DAG:      <<Int:i\d+>>      TypeConversion [<<Float>>]
+  /// CHECK-DAG:                        Return [<<Int>>]
+
+  /// CHECK-START: int Main.intToFloatToInt(int) instruction_simplifier (after)
+  /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Float:f\d+>>    TypeConversion [<<Arg>>]
+  /// CHECK-DAG:      <<Int:i\d+>>      TypeConversion [<<Float>>]
+  /// CHECK-DAG:                        Return [<<Int>>]
+
+  public static int intToFloatToInt(int value) {
+    // Lossy integral to FP conversion followed another conversion: no simplification.
+    return (int) (float) value;
+  }
+
+  /// CHECK-START: double Main.longToIntToDouble(long) instruction_simplifier (before)
+  /// CHECK-DAG:      <<Arg:j\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Int:i\d+>>      TypeConversion [<<Arg>>]
+  /// CHECK-DAG:      <<Double:d\d+>>   TypeConversion [<<Int>>]
+  /// CHECK-DAG:                        Return [<<Double>>]
+
+  /// CHECK-START: double Main.longToIntToDouble(long) instruction_simplifier (after)
+  /// CHECK-DAG:      <<Arg:j\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Int:i\d+>>      TypeConversion [<<Arg>>]
+  /// CHECK-DAG:      <<Double:d\d+>>   TypeConversion [<<Int>>]
+  /// CHECK-DAG:                        Return [<<Double>>]
+
+  public static double longToIntToDouble(long value) {
+    // Lossy long-to-int conversion followed an integral to FP conversion: no simplification.
+    return (double) (int) value;
+  }
+
+  /// CHECK-START: long Main.longToIntToLong(long) instruction_simplifier (before)
+  /// CHECK-DAG:      <<Arg:j\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Int:i\d+>>      TypeConversion [<<Arg>>]
+  /// CHECK-DAG:      <<Long:j\d+>>     TypeConversion [<<Int>>]
+  /// CHECK-DAG:                        Return [<<Long>>]
+
+  /// CHECK-START: long Main.longToIntToLong(long) instruction_simplifier (after)
+  /// CHECK-DAG:      <<Arg:j\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Int:i\d+>>      TypeConversion [<<Arg>>]
+  /// CHECK-DAG:      <<Long:j\d+>>     TypeConversion [<<Int>>]
+  /// CHECK-DAG:                        Return [<<Long>>]
+
+  public static long longToIntToLong(long value) {
+    // Lossy long-to-int conversion followed an int-to-long conversion: no simplification.
+    return (long) (int) value;
+  }
+
+  /// CHECK-START: short Main.shortToCharToShort(short) instruction_simplifier (before)
+  /// CHECK-DAG:      <<Arg:s\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Char:c\d+>>     TypeConversion [<<Arg>>]
+  /// CHECK-DAG:      <<Short:s\d+>>    TypeConversion [<<Char>>]
+  /// CHECK-DAG:                        Return [<<Short>>]
+
+  /// CHECK-START: short Main.shortToCharToShort(short) instruction_simplifier (after)
+  /// CHECK-DAG:      <<Arg:s\d+>>      ParameterValue
+  /// CHECK-DAG:                        Return [<<Arg>>]
+
+  public static short shortToCharToShort(short value) {
+    // Integral conversion followed by non-widening integral conversion to original type.
+    return (short) (char) value;
+  }
+
+  /// CHECK-START: int Main.shortToLongToInt(short) instruction_simplifier (before)
+  /// CHECK-DAG:      <<Arg:s\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Long:j\d+>>     TypeConversion [<<Arg>>]
+  /// CHECK-DAG:      <<Int:i\d+>>      TypeConversion [<<Long>>]
+  /// CHECK-DAG:                        Return [<<Int>>]
+
+  /// CHECK-START: int Main.shortToLongToInt(short) instruction_simplifier (after)
+  /// CHECK-DAG:      <<Arg:s\d+>>      ParameterValue
+  /// CHECK-DAG:                        Return [<<Arg>>]
+
+  public static int shortToLongToInt(short value) {
+    // Integral conversion followed by non-widening integral conversion, use implicit conversion.
+    return (int) (long) value;
+  }
+
+  /// CHECK-START: byte Main.shortToCharToByte(short) instruction_simplifier (before)
+  /// CHECK-DAG:      <<Arg:s\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Char:c\d+>>     TypeConversion [<<Arg>>]
+  /// CHECK-DAG:      <<Byte:b\d+>>     TypeConversion [<<Char>>]
+  /// CHECK-DAG:                        Return [<<Byte>>]
+
+  /// CHECK-START: byte Main.shortToCharToByte(short) instruction_simplifier (after)
+  /// CHECK-DAG:      <<Arg:s\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Byte:b\d+>>     TypeConversion [<<Arg>>]
+  /// CHECK-DAG:                        Return [<<Byte>>]
+
+  public static byte shortToCharToByte(short value) {
+    // Integral conversion followed by non-widening integral conversion losing bits
+    // from the original type. Simplify to use only one conversion.
+    return (byte) (char) value;
+  }
+
+  /// CHECK-START: java.lang.String Main.shortToCharToBytePrint(short) instruction_simplifier (before)
+  /// CHECK-DAG:      <<Arg:s\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Char:c\d+>>     TypeConversion [<<Arg>>]
+  /// CHECK-DAG:      {{b\d+}}          TypeConversion [<<Char>>]
+
+  /// CHECK-START: java.lang.String Main.shortToCharToBytePrint(short) instruction_simplifier (after)
+  /// CHECK-DAG:      <<Arg:s\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Char:c\d+>>     TypeConversion [<<Arg>>]
+  /// CHECK-DAG:      {{b\d+}}          TypeConversion [<<Char>>]
+
+  public static String shortToCharToBytePrint(short value) {
+    // Integral conversion followed by non-widening integral conversion losing bits
+    // from the original type with an extra use of the intermediate result.
+    char c = (char) value;
+    byte b = (byte) c;
+    return "c=" + ((int) c) + ", b=" + ((int) b);  // implicit conversions.
+  }
+
+  /// CHECK-START: byte Main.longAnd0xffToByte(long) instruction_simplifier (before)
+  /// CHECK-DAG:      <<Arg:j\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Mask:j\d+>>     LongConstant 255
+  /// CHECK-DAG:      <<And:j\d+>>      And [<<Mask>>,<<Arg>>]
+  /// CHECK-DAG:      <<Int:i\d+>>      TypeConversion [<<And>>]
+  /// CHECK-DAG:      <<Byte:b\d+>>     TypeConversion [<<Int>>]
+  /// CHECK-DAG:                        Return [<<Byte>>]
+
+  /// CHECK-START: byte Main.longAnd0xffToByte(long) instruction_simplifier (after)
+  /// CHECK-DAG:      <<Arg:j\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Byte:b\d+>>     TypeConversion [<<Arg>>]
+  /// CHECK-DAG:                        Return [<<Byte>>]
+
+  /// CHECK-START: byte Main.longAnd0xffToByte(long) instruction_simplifier (after)
+  /// CHECK-NOT:                        And
+
+  public static byte longAnd0xffToByte(long value) {
+    return (byte) (value & 0xff);
+  }
+
+  /// CHECK-START: char Main.intAnd0x1ffffToChar(int) instruction_simplifier (before)
+  /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Mask:i\d+>>     IntConstant 131071
+  /// CHECK-DAG:      <<And:i\d+>>      And [<<Mask>>,<<Arg>>]
+  /// CHECK-DAG:      <<Char:c\d+>>     TypeConversion [<<And>>]
+  /// CHECK-DAG:                        Return [<<Char>>]
+
+  /// CHECK-START: char Main.intAnd0x1ffffToChar(int) instruction_simplifier (after)
+  /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Char:c\d+>>     TypeConversion [<<Arg>>]
+  /// CHECK-DAG:                        Return [<<Char>>]
+
+  /// CHECK-START: char Main.intAnd0x1ffffToChar(int) instruction_simplifier (after)
+  /// CHECK-NOT:                        And
+
+  public static char intAnd0x1ffffToChar(int value) {
+    // Keeping all significant bits and one more.
+    return (char) (value & 0x1ffff);
+  }
+
+  /// CHECK-START: short Main.intAnd0x17fffToShort(int) instruction_simplifier (before)
+  /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Mask:i\d+>>     IntConstant 98303
+  /// CHECK-DAG:      <<And:i\d+>>      And [<<Mask>>,<<Arg>>]
+  /// CHECK-DAG:      <<Short:s\d+>>    TypeConversion [<<And>>]
+  /// CHECK-DAG:                        Return [<<Short>>]
+
+  /// CHECK-START: short Main.intAnd0x17fffToShort(int) instruction_simplifier (after)
+  /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Mask:i\d+>>     IntConstant 98303
+  /// CHECK-DAG:      <<And:i\d+>>      And [<<Mask>>,<<Arg>>]
+  /// CHECK-DAG:      <<Short:s\d+>>    TypeConversion [<<And>>]
+  /// CHECK-DAG:                        Return [<<Short>>]
+
+  public static short intAnd0x17fffToShort(int value) {
+    // No simplification: clearing a significant bit.
+    return (short) (value & 0x17fff);
+  }
+
   public static void main(String[] args) {
     int arg = 123456;
 
@@ -1518,6 +1855,57 @@
     assertIntEquals(floatConditionNotEqualOne(43.0f), 13);
     assertIntEquals(doubleConditionEqualZero(6.0), 54);
     assertIntEquals(doubleConditionEqualZero(43.0), 13);
+
+    assertIntEquals(intToDoubleToInt(1234567), 1234567);
+    assertIntEquals(intToDoubleToInt(Integer.MIN_VALUE), Integer.MIN_VALUE);
+    assertIntEquals(intToDoubleToInt(Integer.MAX_VALUE), Integer.MAX_VALUE);
+    assertStringEquals(intToDoubleToIntPrint(7654321), "d=7654321.0, i=7654321");
+    assertIntEquals(byteToDoubleToInt((byte) 12), 12);
+    assertIntEquals(byteToDoubleToInt(Byte.MIN_VALUE), Byte.MIN_VALUE);
+    assertIntEquals(byteToDoubleToInt(Byte.MAX_VALUE), Byte.MAX_VALUE);
+    assertIntEquals(floatToDoubleToInt(11.3f), 11);
+    assertStringEquals(floatToDoubleToIntPrint(12.25f), "d=12.25, i=12");
+    assertIntEquals(byteToDoubleToShort((byte) 123), 123);
+    assertIntEquals(byteToDoubleToShort(Byte.MIN_VALUE), Byte.MIN_VALUE);
+    assertIntEquals(byteToDoubleToShort(Byte.MAX_VALUE), Byte.MAX_VALUE);
+    assertIntEquals(charToDoubleToShort((char) 1234), 1234);
+    assertIntEquals(charToDoubleToShort(Character.MIN_VALUE), Character.MIN_VALUE);
+    assertIntEquals(charToDoubleToShort(Character.MAX_VALUE), /* sign-extended */ -1);
+    assertIntEquals(floatToIntToShort(12345.75f), 12345);
+    assertIntEquals(floatToIntToShort((float)(Short.MIN_VALUE - 1)), Short.MAX_VALUE);
+    assertIntEquals(floatToIntToShort((float)(Short.MAX_VALUE + 1)), Short.MIN_VALUE);
+    assertIntEquals(intToFloatToInt(-54321), -54321);
+    assertDoubleEquals(longToIntToDouble(0x1234567812345678L), (double) 0x12345678);
+    assertDoubleEquals(longToIntToDouble(Long.MIN_VALUE), 0.0);
+    assertDoubleEquals(longToIntToDouble(Long.MAX_VALUE), -1.0);
+    assertLongEquals(longToIntToLong(0x1234567812345678L), 0x0000000012345678L);
+    assertLongEquals(longToIntToLong(0x1234567887654321L), 0xffffffff87654321L);
+    assertLongEquals(longToIntToLong(Long.MIN_VALUE), 0L);
+    assertLongEquals(longToIntToLong(Long.MAX_VALUE), -1L);
+    assertIntEquals(shortToCharToShort((short) -5678), (short) -5678);
+    assertIntEquals(shortToCharToShort(Short.MIN_VALUE), Short.MIN_VALUE);
+    assertIntEquals(shortToCharToShort(Short.MAX_VALUE), Short.MAX_VALUE);
+    assertIntEquals(shortToLongToInt((short) 5678), 5678);
+    assertIntEquals(shortToLongToInt(Short.MIN_VALUE), Short.MIN_VALUE);
+    assertIntEquals(shortToLongToInt(Short.MAX_VALUE), Short.MAX_VALUE);
+    assertIntEquals(shortToCharToByte((short) 0x1234), 0x34);
+    assertIntEquals(shortToCharToByte((short) 0x12f0), -0x10);
+    assertIntEquals(shortToCharToByte(Short.MIN_VALUE), 0);
+    assertIntEquals(shortToCharToByte(Short.MAX_VALUE), -1);
+    assertStringEquals(shortToCharToBytePrint((short) 1025), "c=1025, b=1");
+    assertStringEquals(shortToCharToBytePrint((short) 1023), "c=1023, b=-1");
+    assertStringEquals(shortToCharToBytePrint((short) -1), "c=65535, b=-1");
+
+    assertIntEquals(longAnd0xffToByte(0x1234432112344321L), 0x21);
+    assertIntEquals(longAnd0xffToByte(Long.MIN_VALUE), 0);
+    assertIntEquals(longAnd0xffToByte(Long.MAX_VALUE), -1);
+    assertIntEquals(intAnd0x1ffffToChar(0x43211234), 0x1234);
+    assertIntEquals(intAnd0x1ffffToChar(Integer.MIN_VALUE), 0);
+    assertIntEquals(intAnd0x1ffffToChar(Integer.MAX_VALUE), Character.MAX_VALUE);
+    assertIntEquals(intAnd0x17fffToShort(0x87654321), 0x4321);
+    assertIntEquals(intAnd0x17fffToShort(0x88888888), 0x0888);
+    assertIntEquals(intAnd0x17fffToShort(Integer.MIN_VALUE), 0);
+    assertIntEquals(intAnd0x17fffToShort(Integer.MAX_VALUE), Short.MAX_VALUE);
   }
 
   public static boolean booleanField;
diff --git a/test/464-checker-inline-sharpen-calls/src/Main.java b/test/464-checker-inline-sharpen-calls/src/Main.java
index 2222e0f..3f25635 100644
--- a/test/464-checker-inline-sharpen-calls/src/Main.java
+++ b/test/464-checker-inline-sharpen-calls/src/Main.java
@@ -39,7 +39,7 @@
     m.invokeVirtual();
   }
 
-  /// CHECK-START: int Main.inlineSharpenHelperInvoke() ssa_builder (after)
+  /// CHECK-START: int Main.inlineSharpenHelperInvoke() builder (after)
   /// CHECK-DAG:     <<Invoke:i\d+>>  InvokeVirtual {{.*\.getFoo.*}}
   /// CHECK-DAG:                      Return [<<Invoke>>]
 
diff --git a/test/477-checker-bound-type/src/Main.java b/test/477-checker-bound-type/src/Main.java
index 0f65e44..2504ab2 100644
--- a/test/477-checker-bound-type/src/Main.java
+++ b/test/477-checker-bound-type/src/Main.java
@@ -17,7 +17,7 @@
 
 public class Main {
 
-  /// CHECK-START: java.lang.Object Main.boundTypeForIf(java.lang.Object) ssa_builder (after)
+  /// CHECK-START: java.lang.Object Main.boundTypeForIf(java.lang.Object) builder (after)
   /// CHECK:     BoundType
   public static Object boundTypeForIf(Object a) {
     if (a != null) {
@@ -27,7 +27,7 @@
     }
   }
 
-  /// CHECK-START: java.lang.Object Main.boundTypeForInstanceOf(java.lang.Object) ssa_builder (after)
+  /// CHECK-START: java.lang.Object Main.boundTypeForInstanceOf(java.lang.Object) builder (after)
   /// CHECK:     BoundType
   public static Object boundTypeForInstanceOf(Object a) {
     if (a instanceof Main) {
@@ -37,7 +37,7 @@
     }
   }
 
-  /// CHECK-START: java.lang.Object Main.noBoundTypeForIf(java.lang.Object) ssa_builder (after)
+  /// CHECK-START: java.lang.Object Main.noBoundTypeForIf(java.lang.Object) builder (after)
   /// CHECK-NOT: BoundType
   public static Object noBoundTypeForIf(Object a) {
     if (a == null) {
@@ -47,7 +47,7 @@
     }
   }
 
-  /// CHECK-START: java.lang.Object Main.noBoundTypeForInstanceOf(java.lang.Object) ssa_builder (after)
+  /// CHECK-START: java.lang.Object Main.noBoundTypeForInstanceOf(java.lang.Object) builder (after)
   /// CHECK-NOT: BoundType
   public static Object noBoundTypeForInstanceOf(Object a) {
     if (a instanceof Main) {
diff --git a/test/492-checker-inline-invoke-interface/src/Main.java b/test/492-checker-inline-invoke-interface/src/Main.java
index 3106ce4..a919690 100644
--- a/test/492-checker-inline-invoke-interface/src/Main.java
+++ b/test/492-checker-inline-invoke-interface/src/Main.java
@@ -31,7 +31,7 @@
     int a = ForceStatic.field;
   }
 
-  /// CHECK-START: void Main.main(java.lang.String[]) ssa_builder (after)
+  /// CHECK-START: void Main.main(java.lang.String[]) builder (after)
   /// CHECK:           InvokeStaticOrDirect {{.*Main.<init>.*}}
   /// CHECK:           InvokeInterface
 
diff --git a/test/510-checker-try-catch/smali/Builder.smali b/test/510-checker-try-catch/smali/Builder.smali
index 1fde5ed..8ec840d 100644
--- a/test/510-checker-try-catch/smali/Builder.smali
+++ b/test/510-checker-try-catch/smali/Builder.smali
@@ -41,28 +41,35 @@
 ## CHECK:  predecessors     "<<BEnterTry2>>"
 ## CHECK:  successors       "<<BExitTry2:B\d+>>"
 ## CHECK:  DivZeroCheck
+## CHECK:  <<Div:i\d+>> Div
 
-## CHECK:  name             "<<BReturn:B\d+>>"
-## CHECK:  predecessors     "<<BExitTry2>>" "<<BCatch1:B\d+>>" "<<BCatch2:B\d+>>" "<<BCatch3:B\d+>>"
+## CHECK:  name             "<<BAfterTry2:B\d+>>"
+## CHECK:  predecessors     "<<BExitTry2>>"
+## CHECK:  successors       "<<BReturn:B\d+>>"
+## CHECK:  Goto
+
+## CHECK:  name             "<<BReturn>>"
+## CHECK:  predecessors     "<<BAfterTry2>>" "<<BCatch1:B\d+>>" "<<BCatch2:B\d+>>" "<<BCatch3:B\d+>>"
+## CHECK:  Phi [<<Div>>,<<Minus1>>,<<Minus2>>,<<Minus3>>]
 ## CHECK:  Return
 
 ## CHECK:  name             "<<BCatch1>>"
 ## CHECK:  predecessors     "<<BEnterTry1>>" "<<BExitTry1>>"
 ## CHECK:  successors       "<<BReturn>>"
 ## CHECK:  flags            "catch_block"
-## CHECK:  StoreLocal       [v0,<<Minus1>>]
+## CHECK:  Goto
 
 ## CHECK:  name             "<<BCatch2>>"
 ## CHECK:  predecessors     "<<BEnterTry2>>" "<<BExitTry2>>"
 ## CHECK:  successors       "<<BReturn>>"
 ## CHECK:  flags            "catch_block"
-## CHECK:  StoreLocal       [v0,<<Minus2>>]
+## CHECK:  Goto
 
 ## CHECK:  name             "<<BCatch3>>"
 ## CHECK:  predecessors     "<<BEnterTry1>>" "<<BEnterTry2>>" "<<BExitTry1>>" "<<BExitTry2>>"
 ## CHECK:  successors       "<<BReturn>>"
 ## CHECK:  flags            "catch_block"
-## CHECK:  StoreLocal       [v0,<<Minus3>>]
+## CHECK:  Goto
 
 ## CHECK:  name             "<<BEnterTry1>>"
 ## CHECK:  predecessors     "B0"
@@ -84,7 +91,7 @@
 
 ## CHECK:  name             "<<BExitTry2>>"
 ## CHECK:  predecessors     "<<BTry2>>"
-## CHECK:  successors       "<<BReturn>>"
+## CHECK:  successors       "<<BAfterTry2>>"
 ## CHECK:  xhandlers        "<<BCatch2>>" "<<BCatch3>>"
 ## CHECK:  TryBoundary      kind:exit
 
@@ -105,6 +112,8 @@
     .catch Ljava/lang/OutOfMemoryError; {:try_start_2 .. :try_end_2} :catch_mem
     .catchall {:try_start_2 .. :try_end_2} :catch_other
 
+    nop
+
     :return
     return p0
 
@@ -131,7 +140,7 @@
 
 ## CHECK:  name             "<<BIf>>"
 ## CHECK:  predecessors     "B0"
-## CHECK:  successors       "<<BEnterTry2:B\d+>>" "<<BThen:B\d+>>"
+## CHECK:  successors       "<<BSplit1:B\d+>>" "<<BThen:B\d+>>"
 ## CHECK:  If
 
 ## CHECK:  name             "<<BThen>>"
@@ -145,19 +154,19 @@
 ## CHECK:  Div
 
 ## CHECK:  name             "<<BTry2:B\d+>>"
-## CHECK:  predecessors     "<<BEnterTry2>>"
+## CHECK:  predecessors     "<<BEnterTry2:B\d+>>"
 ## CHECK:  successors       "<<BExitTry2:B\d+>>"
 ## CHECK:  Div
 
 ## CHECK:  name             "<<BReturn:B\d+>>"
-## CHECK:  predecessors     "<<BExitTry2>>" "<<BCatch:B\d+>>"
+## CHECK:  predecessors     "<<BSplit3:B\d+>>" "<<BCatch:B\d+>>"
 ## CHECK:  Return
 
 ## CHECK:  name             "<<BCatch>>"
 ## CHECK:  predecessors     "<<BEnterTry1>>" "<<BEnterTry2>>" "<<BExitTry1>>" "<<BExitTry2>>"
 ## CHECK:  successors       "<<BReturn>>"
 ## CHECK:  flags            "catch_block"
-## CHECK:  StoreLocal       [v0,<<Minus1>>]
+## CHECK:  Goto
 
 ## CHECK:  name             "<<BEnterTry1>>"
 ## CHECK:  predecessors     "<<BThen>>"
@@ -166,23 +175,38 @@
 ## CHECK:  TryBoundary      kind:entry
 
 ## CHECK:  name             "<<BEnterTry2>>"
-## CHECK:  predecessors     "<<BIf>>" "<<BExitTry1>>"
+## CHECK:  predecessors     "<<BSplit1>>" "<<BSplit2:B\d+>>"
 ## CHECK:  successors       "<<BTry2>>"
 ## CHECK:  xhandlers        "<<BCatch>>"
 ## CHECK:  TryBoundary      kind:entry
 
 ## CHECK:  name             "<<BExitTry1>>"
 ## CHECK:  predecessors     "<<BTry1>>"
-## CHECK:  successors       "<<BEnterTry2>>"
+## CHECK:  successors       "<<BSplit2>>"
 ## CHECK:  xhandlers        "<<BCatch>>"
 ## CHECK:  TryBoundary      kind:exit
 
 ## CHECK:  name             "<<BExitTry2>>"
 ## CHECK:  predecessors     "<<BTry2>>"
-## CHECK:  successors       "<<BReturn>>"
+## CHECK:  successors       "<<BSplit3>>"
 ## CHECK:  xhandlers        "<<BCatch>>"
 ## CHECK:  TryBoundary      kind:exit
 
+## CHECK:  name             "<<BSplit1>>"
+## CHECK:  predecessors     "<<BIf>>"
+## CHECK:  successors       "<<BEnterTry2>>"
+## CHECK:  Goto
+
+## CHECK:  name             "<<BSplit2>>"
+## CHECK:  predecessors     "<<BExitTry1>>"
+## CHECK:  successors       "<<BEnterTry2>>"
+## CHECK:  Goto
+
+## CHECK:  name             "<<BSplit3>>"
+## CHECK:  predecessors     "<<BExitTry2>>"
+## CHECK:  successors       "<<BReturn>>"
+## CHECK:  Goto
+
 .method public static testMultipleEntries(IIII)I
     .registers 4
 
@@ -220,23 +244,24 @@
 ## CHECK:  name             "<<BTry:B\d+>>"
 ## CHECK:  predecessors     "<<BEnterTry>>"
 ## CHECK:  successors       "<<BExitTry1:B\d+>>" "<<BExitTry2:B\d+>>"
-## CHECK:  Div
+## CHECK:  <<Div:i\d+>> Div
 ## CHECK:  If
 
 ## CHECK:  name             "<<BReturn:B\d+>>"
-## CHECK:  predecessors     "<<BExitTry2>>" "<<BThen:B\d+>>" "<<BCatch:B\d+>>"
+## CHECK:  predecessors     "<<BSplit:B\d+>>" "<<BThen:B\d+>>" "<<BCatch:B\d+>>"
+## CHECK:  Phi [<<Div>>,<<Minus1>>,<<Minus2>>]
 ## CHECK:  Return
 
 ## CHECK:  name             "<<BThen>>"
 ## CHECK:  predecessors     "<<BExitTry1>>"
 ## CHECK:  successors       "<<BReturn>>"
-## CHECK:  StoreLocal       [v0,<<Minus1>>]
+## CHECK:  Goto
 
 ## CHECK:  name             "<<BCatch>>"
 ## CHECK:  predecessors     "<<BEnterTry>>" "<<BExitTry1>>" "<<BExitTry2>>"
 ## CHECK:  successors       "<<BReturn>>"
 ## CHECK:  flags            "catch_block"
-## CHECK:  StoreLocal       [v0,<<Minus2>>]
+## CHECK:  Goto
 
 ## CHECK:  name             "<<BEnterTry>>"
 ## CHECK:  predecessors     "B0"
@@ -252,10 +277,15 @@
 
 ## CHECK:  name             "<<BExitTry2>>"
 ## CHECK:  predecessors     "<<BTry>>"
-## CHECK:  successors       "<<BReturn>>"
+## CHECK:  successors       "<<BSplit>>"
 ## CHECK:  xhandlers        "<<BCatch>>"
 ## CHECK:  TryBoundary      kind:exit
 
+## CHECK:  name             "<<BSplit>>"
+## CHECK:  predecessors     "<<BExitTry2>>"
+## CHECK:  successors       "<<BReturn>>"
+## CHECK:  Goto
+
 .method public static testMultipleExits(II)I
     .registers 2
 
@@ -295,23 +325,25 @@
 ## CHECK:  name             "<<BTry2:B\d+>>"
 ## CHECK:  predecessors     "<<BEnter2:B\d+>>"
 ## CHECK:  successors       "<<BExit2:B\d+>>"
-## CHECK:  Div
+## CHECK:  <<Div:i\d+>> Div
+## CHECK:  Goto
 
 ## CHECK:  name             "<<BReturn:B\d+>>"
-## CHECK:  predecessors     "<<BExit2>>" "<<BCatch1:B\d+>>" "<<BCatch2:B\d+>>"
+## CHECK:  predecessors     "<<BSplit:B\d+>>" "<<BCatch1:B\d+>>" "<<BCatch2:B\d+>>"
+## CHECK:  Phi [<<Div>>,<<Minus1>>,<<Minus2>>]
 ## CHECK:  Return
 
 ## CHECK:  name             "<<BCatch1>>"
 ## CHECK:  predecessors     "<<BEnter1>>" "<<BExit1>>"
 ## CHECK:  successors       "<<BReturn>>"
 ## CHECK:  flags            "catch_block"
-## CHECK:  StoreLocal       [v0,<<Minus1>>]
+## CHECK:  Goto
 
 ## CHECK:  name             "<<BCatch2>>"
 ## CHECK:  predecessors     "<<BEnter2>>" "<<BExit2>>"
 ## CHECK:  successors       "<<BReturn>>"
 ## CHECK:  flags            "catch_block"
-## CHECK:  StoreLocal       [v0,<<Minus2>>]
+## CHECK:  Goto
 
 ## CHECK:  name             "<<BEnter1>>"
 ## CHECK:  predecessors     "B0"
@@ -333,10 +365,15 @@
 
 ## CHECK:  name             "<<BExit2>>"
 ## CHECK:  predecessors     "<<BTry2>>"
-## CHECK:  successors       "<<BReturn>>"
+## CHECK:  successors       "<<BSplit>>"
 ## CHECK:  xhandlers        "<<BCatch2>>"
 ## CHECK:  TryBoundary      kind:exit
 
+## CHECK:  name             "<<BSplit>>"
+## CHECK:  predecessors     "<<BExit2>>"
+## CHECK:  successors       "<<BReturn>>"
+## CHECK:  Goto
+
 .method public static testSharedBoundary(III)I
     .registers 3
 
@@ -378,28 +415,31 @@
 ## CHECK:  name             "<<BTry1:B\d+>>"
 ## CHECK:  predecessors     "<<BEnter1:B\d+>>"
 ## CHECK:  successors       "<<BExit1:B\d+>>"
-## CHECK:  Div
+## CHECK:  <<Div:i\d+>> Div
+## CHECK:  Goto
 
 ## CHECK:  name             "<<BTry2:B\d+>>"
 ## CHECK:  predecessors     "<<BEnter2>>"
 ## CHECK:  successors       "<<BExit2:B\d+>>"
 ## CHECK:  Div
+## CHECK:  Goto
 
 ## CHECK:  name             "<<BReturn:B\d+>>"
-## CHECK:  predecessors     "<<BExit1>>" "<<BCatch1:B\d+>>" "<<BCatch2:B\d+>>"
+## CHECK:  predecessors     "<<BSplit:B\d+>>" "<<BCatch1:B\d+>>" "<<BCatch2:B\d+>>"
+## CHECK:  Phi [<<Div>>,<<Minus1>>,<<Minus2>>]
 ## CHECK:  Return
 
 ## CHECK:  name             "<<BCatch1>>"
 ## CHECK:  predecessors     "<<BEnter1>>" "<<BExit1>>"
 ## CHECK:  successors       "<<BReturn>>"
 ## CHECK:  flags            "catch_block"
-## CHECK:  StoreLocal       [v0,<<Minus1>>]
+## CHECK:  Goto
 
 ## CHECK:  name             "<<BCatch2>>"
 ## CHECK:  predecessors     "<<BEnter2>>" "<<BExit2>>"
 ## CHECK:  successors       "<<BReturn>>"
 ## CHECK:  flags            "catch_block"
-## CHECK:  StoreLocal       [v0,<<Minus2>>]
+## CHECK:  Goto
 
 ## CHECK:  name             "<<BEnter1>>"
 ## CHECK:  predecessors     "<<BExit2>>"
@@ -415,7 +455,7 @@
 
 ## CHECK:  name             "<<BExit1>>"
 ## CHECK:  predecessors     "<<BTry1>>"
-## CHECK:  successors       "<<BReturn>>"
+## CHECK:  successors       "<<BSplit>>"
 ## CHECK:  xhandlers        "<<BCatch1>>"
 ## CHECK:  TryBoundary      kind:exit
 
@@ -425,6 +465,11 @@
 ## CHECK:  xhandlers        "<<BCatch2>>"
 ## CHECK:  TryBoundary      kind:exit
 
+## CHECK:  name             "<<BSplit>>"
+## CHECK:  predecessors     "<<BExit1>>"
+## CHECK:  successors       "<<BReturn>>"
+## CHECK:  Goto
+
 .method public static testSharedBoundary_Reverse(III)I
     .registers 3
 
@@ -472,26 +517,30 @@
 ## CHECK:  predecessors     "<<BEnter2:B\d+>>"
 ## CHECK:  successors       "<<BExit2:B\d+>>"
 ## CHECK:  Div
+## CHECK:  Goto
 
 ## CHECK:  name             "<<BTry3:B\d+>>"
 ## CHECK:  predecessors     "<<BEnter3:B\d+>>"
 ## CHECK:  successors       "<<BExit3:B\d+>>"
-## CHECK:  Div
+## CHECK:  <<Div:i\d+>> Div
+## CHECK:  Goto
 
 ## CHECK:  name             "<<BReturn:B\d+>>"
-## CHECK:  predecessors     "<<BExit3>>" "<<BCatchArith:B\d+>>" "<<BCatchAll:B\d+>>"
+## CHECK:  predecessors     "<<BSplit:B\d+>>" "<<BCatchArith:B\d+>>" "<<BCatchAll:B\d+>>"
+## CHECK:  Phi [<<Div>>,<<Minus1>>,<<Minus2>>]
+## CHECK:  Return
 
 ## CHECK:  name             "<<BCatchArith>>"
 ## CHECK:  predecessors     "<<BEnter2>>" "<<BExit2>>"
 ## CHECK:  successors       "<<BReturn>>"
 ## CHECK:  flags            "catch_block"
-## CHECK:  StoreLocal       [v0,<<Minus1>>]
+## CHECK:  Goto
 
 ## CHECK:  name             "<<BCatchAll>>"
 ## CHECK:  predecessors     "<<BEnter1>>" "<<BEnter2>>" "<<BEnter3>>" "<<BExit1>>" "<<BExit2>>" "<<BExit3>>"
 ## CHECK:  successors       "<<BReturn>>"
 ## CHECK:  flags            "catch_block"
-## CHECK:  StoreLocal       [v0,<<Minus2>>]
+## CHECK:  Goto
 
 ## CHECK:  name             "<<BEnter1>>"
 ## CHECK:  predecessors     "B0"
@@ -525,10 +574,15 @@
 
 ## CHECK:  name             "<<BExit3>>"
 ## CHECK:  predecessors     "<<BTry3>>"
-## CHECK:  successors       "<<BReturn>>"
+## CHECK:  successors       "<<BSplit>>"
 ## CHECK:  xhandlers        "<<BCatchAll>>"
 ## CHECK:  TryBoundary      kind:exit
 
+## CHECK:  name             "<<BSplit>>"
+## CHECK:  predecessors     "<<BExit3>>"
+## CHECK:  successors       "<<BReturn>>"
+## CHECK:  Goto
+
 .method public static testNestedTry(IIII)I
     .registers 4
 
@@ -567,14 +621,18 @@
 ## CHECK:  predecessors     "<<BEnterTry1:B\d+>>"
 ## CHECK:  successors       "<<BExitTry1:B\d+>>"
 ## CHECK:  Div
+## CHECK:  Goto
 
 ## CHECK:  name             "<<BTry2:B\d+>>"
 ## CHECK:  predecessors     "<<BEnterTry2:B\d+>>"
 ## CHECK:  successors       "<<BExitTry2:B\d+>>"
-## CHECK:  Div
+## CHECK:  <<Div:i\d+>> Div
+## CHECK:  Goto
 
 ## CHECK:  name             "<<BReturn:B\d+>>"
-## CHECK:  predecessors     "<<BExitTry2>>" "<<BCatch:B\d+>>"
+## CHECK:  predecessors     "<<BSplit:B\d+>>" "<<BCatch:B\d+>>"
+## CHECK:  Phi [<<Div>>,<<Minus1>>]
+## CHECK:  Return
 
 ## CHECK:  name             "<<BOutside:B\d+>>"
 ## CHECK:  predecessors     "<<BExitTry1>>"
@@ -585,7 +643,7 @@
 ## CHECK:  predecessors     "<<BEnterTry1>>" "<<BEnterTry2>>" "<<BExitTry1>>" "<<BExitTry2>>"
 ## CHECK:  successors       "<<BReturn>>"
 ## CHECK:  flags            "catch_block"
-## CHECK:  StoreLocal       [v0,<<Minus1>>]
+## CHECK:  Goto
 
 ## CHECK:  name             "<<BEnterTry1>>"
 ## CHECK:  predecessors     "B0"
@@ -607,10 +665,15 @@
 
 ## CHECK:  name             "<<BExitTry2>>"
 ## CHECK:  predecessors     "<<BTry2>>"
-## CHECK:  successors       "<<BReturn>>"
+## CHECK:  successors       "<<BSplit>>"
 ## CHECK:  xhandlers        "<<BCatch>>"
 ## CHECK:  TryBoundary      kind:exit
 
+## CHECK:  name             "<<BSplit>>"
+## CHECK:  predecessors     "<<BExitTry2>>"
+## CHECK:  successors       "<<BReturn>>"
+## CHECK:  Goto
+
 .method public static testIncontinuousTry(IIII)I
     .registers 4
 
@@ -642,12 +705,12 @@
 
 ## CHECK:  name             "<<BPSwitch0>>"
 ## CHECK:  predecessors     "B0"
-## CHECK:  successors       "<<BEnterTry2:B\d+>>" "<<BPSwitch1:B\d+>>"
+## CHECK:  successors       "<<BSplit1:B\d+>>" "<<BPSwitch1:B\d+>>"
 ## CHECK:  If
 
 ## CHECK:  name             "<<BPSwitch1>>"
 ## CHECK:  predecessors     "<<BPSwitch0>>"
-## CHECK:  successors       "<<BOutside:B\d+>>" "<<BEnterTry1:B\d+>>"
+## CHECK:  successors       "<<BSplit2:B\d+>>" "<<BEnterTry1:B\d+>>"
 ## CHECK:  If
 
 ## CHECK:  name             "<<BTry1:B\d+>>"
@@ -656,44 +719,68 @@
 ## CHECK:  Div
 
 ## CHECK:  name             "<<BTry2:B\d+>>"
-## CHECK:  predecessors     "<<BEnterTry2>>"
+## CHECK:  predecessors     "<<BEnterTry2:B\d+>>"
 ## CHECK:  successors       "<<BExitTry2:B\d+>>"
 ## CHECK:  Div
 
-## CHECK:  name             "<<BOutside>>"
-## CHECK:  predecessors     "<<BPSwitch1>>" "<<BExitTry2>>"
-## CHECK:  successors       "<<BCatchReturn:B\d+>>"
+## CHECK:  name             "<<BOutside:B\d+>>"
+## CHECK:  predecessors     "<<BSplit2>>" "<<BSplit4:B\d+>>"
+## CHECK:  successors       "<<BReturn:B\d+>>"
 ## CHECK:  Div
 
-## CHECK:  name             "<<BCatchReturn>>"
-## CHECK:  predecessors     "<<BOutside>>" "<<BEnterTry1>>" "<<BEnterTry2>>" "<<BExitTry1>>" "<<BExitTry2>>"
+## CHECK:  name             "<<BCatch:B\d+>>"
+## CHECK:  predecessors     "<<BEnterTry1>>" "<<BEnterTry2>>" "<<BExitTry1>>" "<<BExitTry2>>"
 ## CHECK:  flags            "catch_block"
-## CHECK:  Return
+## CHECK:  Goto
 
 ## CHECK:  name             "<<BEnterTry1>>"
 ## CHECK:  predecessors     "<<BPSwitch1>>"
 ## CHECK:  successors       "<<BTry1>>"
-## CHECK:  xhandlers        "<<BCatchReturn>>"
+## CHECK:  xhandlers        "<<BCatch>>"
 ## CHECK:  TryBoundary      kind:entry
 
 ## CHECK:  name             "<<BEnterTry2>>"
-## CHECK:  predecessors     "<<BPSwitch0>>"
+## CHECK:  predecessors     "<<BSplit1>>" "<<BSplit3:B\d+>>"
 ## CHECK:  successors       "<<BTry2>>"
-## CHECK:  xhandlers        "<<BCatchReturn>>"
+## CHECK:  xhandlers        "<<BCatch>>"
 ## CHECK:  TryBoundary      kind:entry
 
 ## CHECK:  name             "<<BExitTry1>>"
 ## CHECK:  predecessors     "<<BTry1>>"
-## CHECK:  successors       "<<BEnterTry2>>"
-## CHECK:  xhandlers        "<<BCatchReturn>>"
+## CHECK:  successors       "<<BSplit3>>"
+## CHECK:  xhandlers        "<<BCatch>>"
 ## CHECK:  TryBoundary      kind:exit
 
 ## CHECK:  name             "<<BExitTry2>>"
 ## CHECK:  predecessors     "<<BTry2>>"
-## CHECK:  successors       "<<BOutside>>"
-## CHECK:  xhandlers        "<<BCatchReturn>>"
+## CHECK:  successors       "<<BSplit4>>"
+## CHECK:  xhandlers        "<<BCatch>>"
 ## CHECK:  TryBoundary      kind:exit
 
+## CHECK:  name             "<<BReturn>>"
+## CHECK:  predecessors     "<<BCatch>>" "<<BOutside>>"
+## CHECK:  Return
+
+## CHECK:  name             "<<BSplit1>>"
+## CHECK:  predecessors     "<<BPSwitch0>>"
+## CHECK:  successors       "<<BEnterTry2>>"
+## CHECK:  Goto
+
+## CHECK:  name             "<<BSplit2>>"
+## CHECK:  predecessors     "<<BPSwitch1>>"
+## CHECK:  successors       "<<BOutside>>"
+## CHECK:  Goto
+
+## CHECK:  name             "<<BSplit3>>"
+## CHECK:  predecessors     "<<BExitTry1>>"
+## CHECK:  successors       "<<BEnterTry2>>"
+## CHECK:  Goto
+
+## CHECK:  name             "<<BSplit4>>"
+## CHECK:  predecessors     "<<BExitTry2>>"
+## CHECK:  successors       "<<BOutside>>"
+## CHECK:  Goto
+
 .method public static testSwitchTryEnter(IIII)I
     .registers 4
 
@@ -728,58 +815,78 @@
 
 ## CHECK:  name             "<<BPSwitch0:B\d+>>"
 ## CHECK:  predecessors     "<<BEnterTry1>>"
-## CHECK:  successors       "<<BTry2:B\d+>>" "<<BExitTry1:B\d+>>"
+## CHECK:  successors       "<<BSplit1:B\d+>>" "<<BExitTry1:B\d+>>"
 ## CHECK:  If
 
 ## CHECK:  name             "<<BPSwitch1:B\d+>>"
 ## CHECK:  predecessors     "<<BExitTry1>>"
-## CHECK:  successors       "<<BOutside:B\d+>>" "<<BEnterTry2:B\d+>>"
+## CHECK:  successors       "<<BSplit2:B\d+>>" "<<BEnterTry2:B\d+>>"
 ## CHECK:  If
 
 ## CHECK:  name             "<<BTry1:B\d+>>"
 ## CHECK:  predecessors     "<<BEnterTry2>>"
-## CHECK:  successors       "<<BTry2>>"
+## CHECK:  successors       "<<BTry2:B\d+>>"
 ## CHECK:  Div
 
 ## CHECK:  name             "<<BTry2>>"
-## CHECK:  predecessors     "<<BPSwitch0>>"
+## CHECK:  predecessors     "<<BSplit1>>" "<<BTry1>>"
 ## CHECK:  successors       "<<BExitTry2:B\d+>>"
 ## CHECK:  Div
 
-## CHECK:  name             "<<BOutside>>"
-## CHECK:  predecessors     "<<BPSwitch1>>" "<<BExitTry2>>"
-## CHECK:  successors       "<<BCatchReturn:B\d+>>"
+## CHECK:  name             "<<BOutside:B\d+>>"
+## CHECK:  predecessors     "<<BSplit2>>" "<<BSplit3:B\d+>>"
+## CHECK:  successors       "<<BReturn:B\d+>>"
 ## CHECK:  Div
 
-## CHECK:  name             "<<BCatchReturn>>"
-## CHECK:  predecessors     "<<BOutside>>" "<<BEnterTry1>>" "<<BEnterTry2>>" "<<BExitTry1>>" "<<BExitTry2>>"
+## CHECK:  name             "<<BCatch:B\d+>>"
+## CHECK:  predecessors     "<<BEnterTry1>>" "<<BEnterTry2>>" "<<BExitTry1>>" "<<BExitTry2>>"
+## CHECK:  successors       "<<BReturn>>"
 ## CHECK:  flags            "catch_block"
-## CHECK:  Return
+## CHECK:  Goto
 
 ## CHECK:  name             "<<BEnterTry1>>"
 ## CHECK:  predecessors     "B0"
 ## CHECK:  successors       "<<BPSwitch0>>"
-## CHECK:  xhandlers        "<<BCatchReturn>>"
+## CHECK:  xhandlers        "<<BCatch>>"
 ## CHECK:  TryBoundary      kind:entry
 
 ## CHECK:  name             "<<BEnterTry2>>"
 ## CHECK:  predecessors     "<<BPSwitch1>>"
 ## CHECK:  successors       "<<BTry1>>"
-## CHECK:  xhandlers        "<<BCatchReturn>>"
+## CHECK:  xhandlers        "<<BCatch>>"
 ## CHECK:  TryBoundary      kind:entry
 
 ## CHECK:  name             "<<BExitTry1>>"
 ## CHECK:  predecessors     "<<BPSwitch0>>"
 ## CHECK:  successors       "<<BPSwitch1>>"
-## CHECK:  xhandlers        "<<BCatchReturn>>"
+## CHECK:  xhandlers        "<<BCatch>>"
 ## CHECK:  TryBoundary      kind:exit
 
 ## CHECK:  name             "<<BExitTry2>>"
 ## CHECK:  predecessors     "<<BTry2>>"
-## CHECK:  successors       "<<BOutside>>"
-## CHECK:  xhandlers        "<<BCatchReturn>>"
+## CHECK:  successors       "<<BSplit3>>"
+## CHECK:  xhandlers        "<<BCatch>>"
 ## CHECK:  TryBoundary      kind:exit
 
+## CHECK:  name             "<<BReturn>>"
+## CHECK:  predecessors     "<<BCatch>>" "<<BOutside>>"
+## CHECK:  Return
+
+## CHECK:  name             "<<BSplit1>>"
+## CHECK:  predecessors     "<<BPSwitch0>>"
+## CHECK:  successors       "<<BTry2>>"
+## CHECK:  Goto
+
+## CHECK:  name             "<<BSplit2>>"
+## CHECK:  predecessors     "<<BPSwitch1>>"
+## CHECK:  successors       "<<BOutside>>"
+## CHECK:  Goto
+
+## CHECK:  name             "<<BSplit3>>"
+## CHECK:  predecessors     "<<BExitTry2>>"
+## CHECK:  successors       "<<BOutside>>"
+## CHECK:  Goto
+
 .method public static testSwitchTryExit(IIII)I
     .registers 4
 
@@ -825,7 +932,7 @@
 ## CHECK:  predecessors     "<<BEnterTry>>" "<<BExitTry>>"
 ## CHECK:  successors       "<<BExit:B\d+>>"
 ## CHECK:  flags            "catch_block"
-## CHECK:  StoreLocal       [v0,<<Minus1>>]
+## CHECK:  Return [<<Minus1>>]
 
 ## CHECK:  name             "<<BExit>>"
 ## CHECK:  predecessors     "<<BExitTry>>" "<<BCatch>>"
@@ -861,18 +968,21 @@
 ## CHECK-START: int Builder.testCatchLoop(int, int, int) builder (after)
 
 ## CHECK:  name             "B0"
-## CHECK:  successors       "<<BCatch:B\d+>>"
+## CHECK:  successors       "<<BSplit2:B\d+>>"
 
-## CHECK:  name             "<<BCatch>>"
-## CHECK:  predecessors     "B0" "<<BEnterTry:B\d+>>" "<<BExitTry:B\d+>>"
-## CHECK:  successors       "<<BEnterTry>>"
+## CHECK:  name             "<<BCatch:B\d+>>"
+## CHECK:  predecessors     "<<BEnterTry:B\d+>>" "<<BExitTry:B\d+>>"
+## CHECK:  successors       "<<BSplit1:B\d+>>"
 ## CHECK:  flags            "catch_block"
 
 ## CHECK:  name             "<<BReturn:B\d+>>"
 ## CHECK:  predecessors     "<<BExitTry>>"
 ## CHECK:  successors       "<<BExit:B\d+>>"
+## CHECK:  Return
 
 ## CHECK:  name             "<<BExit>>"
+## CHECK:  predecessors     "<<BReturn>>"
+## CHECK:  Exit
 
 ## CHECK:  name             "<<BTry:B\d+>>"
 ## CHECK:  predecessors     "<<BEnterTry>>"
@@ -880,7 +990,7 @@
 ## CHECK:  Div
 
 ## CHECK:  name             "<<BEnterTry>>"
-## CHECK:  predecessors     "<<BCatch>>"
+## CHECK:  predecessors     "<<BSplit1>>"
 ## CHECK:  successors       "<<BTry>>"
 ## CHECK:  xhandlers        "<<BCatch>>"
 ## CHECK:  TryBoundary      kind:entry
@@ -891,6 +1001,16 @@
 ## CHECK:  xhandlers        "<<BCatch>>"
 ## CHECK:  TryBoundary      kind:exit
 
+## CHECK:  name             "<<BSplit1>>"
+## CHECK:  predecessors     "<<BSplit2>>" "<<BCatch>>"
+## CHECK:  successors       "<<BEnterTry>>"
+## CHECK:  Goto
+
+## CHECK:  name             "<<BSplit2>>"
+## CHECK:  predecessors     "B0"
+## CHECK:  successors       "<<BSplit1>>"
+## CHECK:  Goto
+
 .method public static testCatchLoop(III)I
     .registers 4
 
@@ -918,14 +1038,16 @@
 ## CHECK:  Div
 
 ## CHECK:  name             "<<BCatch:B\d+>>"
-## CHECK:  predecessors     "<<BExitTry1>>" "<<BEnterTry1>>" "<<BEnterTry2:B\d+>>" "<<BExitTry1>>" "<<BExitTry2:B\d+>>"
-## CHECK:  successors       "<<BEnterTry2>>"
+## CHECK:  predecessors     "<<BEnterTry1>>" "<<BEnterTry2:B\d+>>" "<<BExitTry1>>" "<<BExitTry2:B\d+>>"
+## CHECK:  successors       "<<BSplit1:B\d+>>"
 ## CHECK:  flags            "catch_block"
 
 ## CHECK:  name             "<<BReturn:B\d+>>"
 ## CHECK:  predecessors     "<<BExitTry2>>"
+## CHECK:  successors       "<<BExit:B\d+>>"
 
-## CHECK:  name             "{{B\d+}}"
+## CHECK:  name             "<<BExit>>"
+## CHECK:  predecessors     "<<BReturn>>"
 ## CHECK:  Exit
 
 ## CHECK:  name             "<<BTry2:B\d+>>"
@@ -940,14 +1062,14 @@
 ## CHECK:  TryBoundary      kind:entry
 
 ## CHECK:  name             "<<BEnterTry2>>"
-## CHECK:  predecessors     "<<BCatch>>"
+## CHECK:  predecessors     "<<BSplit1>>"
 ## CHECK:  successors       "<<BTry2>>"
 ## CHECK:  xhandlers        "<<BCatch>>"
 ## CHECK:  TryBoundary      kind:entry
 
 ## CHECK:  name             "<<BExitTry1>>"
 ## CHECK:  predecessors     "<<BTry1>>"
-## CHECK:  successors       "<<BCatch>>"
+## CHECK:  successors       "<<BSplit2:B\d+>>"
 ## CHECK:  xhandlers        "<<BCatch>>"
 ## CHECK:  TryBoundary      kind:exit
 
@@ -957,6 +1079,16 @@
 ## CHECK:  xhandlers        "<<BCatch>>"
 ## CHECK:  TryBoundary      kind:exit
 
+## CHECK:  name             "<<BSplit1>>"
+## CHECK:  predecessors     "<<BSplit2>>" "<<BCatch>>"
+## CHECK:  successors       "<<BEnterTry2>>"
+## CHECK:  Goto
+
+## CHECK:  name             "<<BSplit2>>"
+## CHECK:  predecessors     "<<BExitTry1>>"
+## CHECK:  successors       "<<BSplit1>>"
+## CHECK:  Goto
+
 .method public static testHandlerEdge1(III)I
     .registers 4
 
@@ -977,16 +1109,16 @@
 ## CHECK-START: int Builder.testHandlerEdge2(int, int, int) builder (after)
 
 ## CHECK:  name             "B0"
-## CHECK:  successors       "<<BCatch1:B\d+>>"
+## CHECK:  successors       "<<BSplit4:B\d+>>"
 
-## CHECK:  name             "<<BCatch1>>"
-## CHECK:  predecessors     "B0" "<<BEnterTry2:B\d+>>" "<<BExitTry2:B\d+>>"
-## CHECK:  successors       "<<BEnterTry1:B\d+>>"
+## CHECK:  name             "<<BCatch1:B\d+>>"
+## CHECK:  predecessors     "<<BEnterTry2:B\d+>>" "<<BExitTry2:B\d+>>"
+## CHECK:  successors       "<<BSplit1:B\d+>>"
 ## CHECK:  flags            "catch_block"
 
 ## CHECK:  name             "<<BCatch2:B\d+>>"
-## CHECK:  predecessors     "<<BExitTry1:B\d+>>" "<<BEnterTry1>>" "<<BExitTry1>>"
-## CHECK:  successors       "<<BEnterTry2>>"
+## CHECK:  predecessors     "<<BEnterTry1:B\d+>>" "<<BExitTry1:B\d+>>"
+## CHECK:  successors       "<<BSplit2:B\d+>>"
 ## CHECK:  flags            "catch_block"
 
 ## CHECK:  name             "<<BReturn:B\d+>>"
@@ -995,6 +1127,7 @@
 ## CHECK:  Return
 
 ## CHECK:  name             "<<BExit>>"
+## CHECK:  Exit
 
 ## CHECK:  name             "<<BTry1:B\d+>>"
 ## CHECK:  predecessors     "<<BEnterTry1>>"
@@ -1007,20 +1140,20 @@
 ## CHECK:  Div
 
 ## CHECK:  name             "<<BEnterTry1>>"
-## CHECK:  predecessors     "<<BCatch1>>"
+## CHECK:  predecessors     "<<BSplit1>>"
 ## CHECK:  successors       "<<BTry1>>"
 ## CHECK:  xhandlers        "<<BCatch2>>"
 ## CHECK:  TryBoundary      kind:entry
 
 ## CHECK:  name             "<<BEnterTry2>>"
-## CHECK:  predecessors     "<<BCatch2>>"
+## CHECK:  predecessors     "<<BSplit2>>"
 ## CHECK:  successors       "<<BTry2>>"
 ## CHECK:  xhandlers        "<<BCatch1>>"
 ## CHECK:  TryBoundary      kind:entry
 
 ## CHECK:  name             "<<BExitTry1>>"
 ## CHECK:  predecessors     "<<BTry1>>"
-## CHECK:  successors       "<<BCatch2>>"
+## CHECK:  successors       "<<BSplit3:B\d+>>"
 ## CHECK:  xhandlers        "<<BCatch2>>"
 ## CHECK:  TryBoundary      kind:exit
 
@@ -1030,6 +1163,26 @@
 ## CHECK:  xhandlers        "<<BCatch1>>"
 ## CHECK:  TryBoundary      kind:exit
 
+## CHECK:  name             "<<BSplit1>>"
+## CHECK:  predecessors     "<<BSplit4>>" "<<BCatch1>>"
+## CHECK:  successors       "<<BEnterTry1>>"
+## CHECK:  Goto
+
+## CHECK:  name             "<<BSplit2>>"
+## CHECK:  predecessors     "<<BCatch2>>" "<<BSplit3>>"
+## CHECK:  successors       "<<BEnterTry2>>"
+## CHECK:  Goto
+
+## CHECK:  name             "<<BSplit3>>"
+## CHECK:  predecessors     "<<BExitTry1>>"
+## CHECK:  successors       "<<BSplit2>>"
+## CHECK:  Goto
+
+## CHECK:  name             "<<BSplit4>>"
+## CHECK:  predecessors     "B0"
+## CHECK:  successors       "<<BSplit1>>"
+## CHECK:  Goto
+
 .method public static testHandlerEdge2(III)I
     .registers 4
 
@@ -1053,10 +1206,10 @@
 ## CHECK-START: int Builder.testTryInLoop(int, int) builder (after)
 
 ## CHECK:  name             "B0"
-## CHECK:  successors       "<<BEnterTry:B\d+>>"
+## CHECK:  successors       "<<BSplit1:B\d+>>"
 
 ## CHECK:  name             "<<BTry:B\d+>>"
-## CHECK:  predecessors     "<<BEnterTry>>"
+## CHECK:  predecessors     "<<BEnterTry:B\d+>>"
 ## CHECK:  successors       "<<BExitTry:B\d+>>"
 ## CHECK:  Div
 
@@ -1065,22 +1218,28 @@
 ## CHECK:  successors       "<<BEnterTry>>"
 ## CHECK:  flags            "catch_block"
 
-## CHECK:  name             "<<BExit:B\d+>>"
-## CHECK-NOT: predecessors  "{{B\d+}}"
-## CHECK:  end_block
-
 ## CHECK:  name             "<<BEnterTry>>"
-## CHECK:  predecessors     "B0"
+## CHECK:  predecessors     "<<BSplit1>>"
 ## CHECK:  successors       "<<BTry>>"
 ## CHECK:  xhandlers        "<<BCatch>>"
 ## CHECK:  TryBoundary      kind:entry
 
 ## CHECK:  name             "<<BExitTry>>"
 ## CHECK:  predecessors     "<<BTry>>"
-## CHECK:  successors       "<<BEnterTry>>"
+## CHECK:  successors       "<<BSplit2:B\d+>>"
 ## CHECK:  xhandlers        "<<BCatch>>"
 ## CHECK:  TryBoundary      kind:exit
 
+## CHECK:  name             "<<BSplit1>>"
+## CHECK:  predecessors     "B0"
+## CHECK:  successors       "<<BEnterTry>>"
+## CHECK:  Goto
+
+## CHECK:  name             "<<BSplit2>>"
+## CHECK:  predecessors     "<<BExitTry>>"
+## CHECK:  successors       "<<BEnterTry>>"
+## CHECK:  Goto
+
 .method public static testTryInLoop(II)I
     .registers 3
 
@@ -1098,9 +1257,10 @@
 # INVOKE it follows, even if there is a try boundary between them.
 
 ## CHECK-START: int Builder.testMoveResult_Invoke(int, int, int) builder (after)
-
-## CHECK:       <<Res:i\d+>> InvokeStaticOrDirect
-## CHECK-NEXT:  StoreLocal   [v0,<<Res>>]
+## CHECK-DAG:     <<M1:i\d+>>  IntConstant -1
+## CHECK-DAG:     <<Res:i\d+>> InvokeStaticOrDirect
+## CHECK-DAG:     <<Phi:i\d+>> Phi [<<Res>>,<<M1>>]
+## CHECK-DAG:                  Return [<<Phi>>]
 
 .method public static testMoveResult_Invoke(III)I
     .registers 3
@@ -1124,16 +1284,16 @@
 # FILLED_NEW_ARRAY it follows, even if there is a try boundary between them.
 
 ## CHECK-START: int[] Builder.testMoveResult_FilledNewArray(int, int, int) builder (after)
-
-## CHECK:      <<Res:l\d+>>     NewArray
-## CHECK-NEXT:                  Temporary
-## CHECK-NEXT: <<Local1:i\d+>>  LoadLocal  [v0]
-## CHECK-NEXT:                  ArraySet   [<<Res>>,{{i\d+}},<<Local1>>]
-## CHECK-NEXT: <<Local2:i\d+>>  LoadLocal  [v1]
-## CHECK-NEXT:                  ArraySet   [<<Res>>,{{i\d+}},<<Local2>>]
-## CHECK-NEXT: <<Local3:i\d+>>  LoadLocal  [v2]
-## CHECK-NEXT:                  ArraySet   [<<Res>>,{{i\d+}},<<Local3>>]
-## CHECK-NEXT:                  StoreLocal [v0,<<Res>>]
+## CHECK-DAG:     <<Arg1:i\d+>> ParameterValue
+## CHECK-DAG:     <<Arg2:i\d+>> ParameterValue
+## CHECK-DAG:     <<Arg3:i\d+>> ParameterValue
+## CHECK-DAG:     <<Null:l\d+>> NullConstant
+## CHECK-DAG:     <<Res:l\d+>>  NewArray
+## CHECK-DAG:                   ArraySet   [<<Res>>,{{i\d+}},<<Arg1>>]
+## CHECK-DAG:                   ArraySet   [<<Res>>,{{i\d+}},<<Arg2>>]
+## CHECK-DAG:                   ArraySet   [<<Res>>,{{i\d+}},<<Arg3>>]
+## CHECK-DAG:     <<Phi:l\d+>>  Phi [<<Res>>,<<Null>>]
+## CHECK-DAG:                   Return [<<Phi>>]
 
 .method public static testMoveResult_FilledNewArray(III)[I
     .registers 3
diff --git a/test/510-checker-try-catch/smali/SsaBuilder.smali b/test/510-checker-try-catch/smali/SsaBuilder.smali
index a6a5bfe..1fd5fb2 100644
--- a/test/510-checker-try-catch/smali/SsaBuilder.smali
+++ b/test/510-checker-try-catch/smali/SsaBuilder.smali
@@ -19,7 +19,7 @@
 # Tests that catch blocks with both normal and exceptional predecessors are
 # split in two.
 
-## CHECK-START: int SsaBuilder.testSimplifyCatchBlock(int, int, int) ssa_builder (after)
+## CHECK-START: int SsaBuilder.testSimplifyCatchBlock(int, int, int) builder (after)
 
 ## CHECK:      name             "B1"
 ## CHECK-NEXT: from_bci
@@ -62,7 +62,7 @@
 
 # Should be rejected because :catch_all is a loop header.
 
-## CHECK-START: int SsaBuilder.testCatchLoopHeader(int, int, int) ssa_builder (after, bad_state)
+## CHECK-START: int SsaBuilder.testCatchLoopHeader(int, int, int) builder (after, bad_state)
 
 .method public static testCatchLoopHeader(III)I
     .registers 4
@@ -84,7 +84,7 @@
 
 # Tests creation of catch Phis.
 
-## CHECK-START: int SsaBuilder.testPhiCreation(int, int, int) ssa_builder (after)
+## CHECK-START: int SsaBuilder.testPhiCreation(int, int, int) builder (after)
 ## CHECK-DAG:     <<P0:i\d+>>   ParameterValue
 ## CHECK-DAG:     <<P1:i\d+>>   ParameterValue
 ## CHECK-DAG:     <<P2:i\d+>>   ParameterValue
@@ -127,7 +127,7 @@
 # Tests that phi elimination does not remove catch phis where the value does
 # not dominate the phi.
 
-## CHECK-START: int SsaBuilder.testPhiElimination_Domination(int, int) ssa_builder (after)
+## CHECK-START: int SsaBuilder.testPhiElimination_Domination(int, int) builder (after)
 ## CHECK-DAG:     <<P0:i\d+>>   ParameterValue
 ## CHECK-DAG:     <<P1:i\d+>>   ParameterValue
 ## CHECK-DAG:     <<Cst5:i\d+>> IntConstant 5
@@ -168,7 +168,7 @@
 
 # Tests that phi elimination loops until no more phis can be removed.
 
-## CHECK-START: int SsaBuilder.testPhiElimination_Dependencies(int, int, int) ssa_builder (after)
+## CHECK-START: int SsaBuilder.testPhiElimination_Dependencies(int, int, int) builder (after)
 ## CHECK-NOT:     Phi
 
 .method public static testPhiElimination_Dependencies(III)I
@@ -200,10 +200,7 @@
 
 # Tests that dead catch blocks are removed.
 
-## CHECK-START: int SsaBuilder.testDeadCatchBlock(int, int, int) ssa_builder (before)
-## CHECK:                       Mul
-
-## CHECK-START: int SsaBuilder.testDeadCatchBlock(int, int, int) ssa_builder (after)
+## CHECK-START: int SsaBuilder.testDeadCatchBlock(int, int, int) builder (after)
 ## CHECK-DAG:     <<P0:i\d+>>   ParameterValue
 ## CHECK-DAG:     <<P1:i\d+>>   ParameterValue
 ## CHECK-DAG:     <<P2:i\d+>>   ParameterValue
@@ -211,7 +208,7 @@
 ## CHECK-DAG:     <<Add2:i\d+>> Add [<<Add1>>,<<P2>>]
 ## CHECK-DAG:                   Return [<<Add2>>]
 
-## CHECK-START: int SsaBuilder.testDeadCatchBlock(int, int, int) ssa_builder (after)
+## CHECK-START: int SsaBuilder.testDeadCatchBlock(int, int, int) builder (after)
 ## CHECK-NOT:                   flags "catch_block"
 ## CHECK-NOT:                   Mul
 
diff --git a/test/517-checker-builder-fallthrough/smali/TestCase.smali b/test/517-checker-builder-fallthrough/smali/TestCase.smali
index bc9502b..946f169 100644
--- a/test/517-checker-builder-fallthrough/smali/TestCase.smali
+++ b/test/517-checker-builder-fallthrough/smali/TestCase.smali
@@ -25,8 +25,8 @@
 
 ## CHECK:  name            "B1"
 ## CHECK:  successors      "B5" "B2"
-## CHECK:  StoreLocal      [v0,<<Const0>>]
-## CHECK:  If
+## CHECK:  <<Cond:z\d+>>   Equal [<<Const0>>,<<Const0>>]
+## CHECK:  If [<<Cond>>]
 
 ## CHECK:  name            "B2"
 ## CHECK:  successors      "B4"
diff --git a/test/523-checker-can-throw-regression/smali/Test.smali b/test/523-checker-can-throw-regression/smali/Test.smali
index 87192ea..4b737a9 100644
--- a/test/523-checker-can-throw-regression/smali/Test.smali
+++ b/test/523-checker-can-throw-regression/smali/Test.smali
@@ -46,8 +46,10 @@
   div-int/2addr p0, p1
   :else
   div-int/2addr p0, p2
-  return p0
   :try_end_2
-  .catchall {:try_start_2 .. :try_end_2} :catchall
+  .catchall {:try_start_2 .. :try_end_2} :catchall2
+
+  :catchall2
+  return p0
 
 .end method
diff --git a/test/529-checker-unresolved/build b/test/529-checker-unresolved/build
index 8c3c4f8..d85035b 100644
--- a/test/529-checker-unresolved/build
+++ b/test/529-checker-unresolved/build
@@ -29,14 +29,12 @@
 mv classes/UnresolvedSuperClass.class classes-ex
 
 if [ ${USE_JACK} = "true" ]; then
-  # Create .jack files from classes generated with javac.
-  ${JILL} classes --output classes.jack
-  ${JILL} classes-ex --output classes-ex.jack
+  jar cf classes.jill.jar -C classes .
+  jar cf classes-ex.jill.jar -C classes-ex .
 
-  # Create DEX files from .jack files.
-  ${JACK} --import classes.jack --output-dex .
+  ${JACK} --import classes.jill.jar --output-dex .
   zip $TEST_NAME.jar classes.dex
-  ${JACK} --import classes-ex.jack --output-dex .
+  ${JACK} --import classes-ex.jill.jar --output-dex .
   zip ${TEST_NAME}-ex.jar classes.dex
 else
   if [ ${NEED_DEX} = "true" ]; then
diff --git a/test/530-checker-lse/src/Main.java b/test/530-checker-lse/src/Main.java
index d647683..4d6ea06 100644
--- a/test/530-checker-lse/src/Main.java
+++ b/test/530-checker-lse/src/Main.java
@@ -664,28 +664,19 @@
     System.out.println("testFinalizableByForcingGc() failed to force gc.");
   }
 
-  /// CHECK-START: int Main.testHSelect(boolean) load_store_elimination (before)
+  /// CHECK-START: int Main.$noinline$testHSelect(boolean) load_store_elimination (before)
   /// CHECK: InstanceFieldSet
   /// CHECK: Select
 
-  /// CHECK-START: int Main.testHSelect(boolean) load_store_elimination (after)
+  /// CHECK-START: int Main.$noinline$testHSelect(boolean) load_store_elimination (after)
   /// CHECK: InstanceFieldSet
   /// CHECK: Select
 
   // Test that HSelect creates alias.
-  public static int testHSelect(boolean b) {
-    // Disable inlining.
-    System.out.print("");
-    System.out.print("");
-    System.out.print("");
-    System.out.print("");
-    System.out.print("");
-    System.out.print("");
-    System.out.print("");
-    System.out.print("");
-    System.out.print("");
-    System.out.print("");
-
+  public static int $noinline$testHSelect(boolean b) {
+    if (sFlag) {
+      throw new Error();
+    }
     TestClass obj = new TestClass();
     TestClass obj2 = null;
     obj.i = 0xdead;
@@ -754,6 +745,8 @@
     assertIntEquals(test23(false), 5);
     assertFloatEquals(test24(), 8.0f);
     testFinalizableByForcingGc();
-    assertIntEquals(testHSelect(true), 0xdead);
+    assertIntEquals($noinline$testHSelect(true), 0xdead);
   }
+
+  static boolean sFlag;
 }
diff --git a/test/537-checker-debuggable/smali/TestCase.smali b/test/537-checker-debuggable/smali/TestCase.smali
index 8e6c7ef..5714d3a 100644
--- a/test/537-checker-debuggable/smali/TestCase.smali
+++ b/test/537-checker-debuggable/smali/TestCase.smali
@@ -20,10 +20,10 @@
 # be eliminated in normal mode but kept live in debuggable mode. Test that
 # Checker runs the correct test for each compilation mode.
 
-## CHECK-START: int TestCase.deadPhi(int, int, int) ssa_builder (after)
+## CHECK-START: int TestCase.deadPhi(int, int, int) builder (after)
 ## CHECK-NOT:         Phi
 
-## CHECK-START-DEBUGGABLE: int TestCase.deadPhi(int, int, int) ssa_builder (after)
+## CHECK-START-DEBUGGABLE: int TestCase.deadPhi(int, int, int) builder (after)
 ## CHECK:             Phi
 
 .method public static deadPhi(III)I
diff --git a/test/540-checker-rtp-bug/src/Main.java b/test/540-checker-rtp-bug/src/Main.java
index 9a9f0b6..17b11db 100644
--- a/test/540-checker-rtp-bug/src/Main.java
+++ b/test/540-checker-rtp-bug/src/Main.java
@@ -21,7 +21,7 @@
 }
 
 public class Main {
-  /// CHECK-START: Final Main.testKeepCheckCast(java.lang.Object, boolean) ssa_builder (after)
+  /// CHECK-START: Final Main.testKeepCheckCast(java.lang.Object, boolean) builder (after)
   /// CHECK:    <<Phi:l\d+>>     Phi klass:java.lang.Object
   /// CHECK:    <<Class:l\d+>>   LoadClass
   /// CHECK:                     CheckCast [<<Phi>>,<<Class>>]
@@ -43,7 +43,7 @@
     return (Final) x;
   }
 
-  /// CHECK-START: void Main.testKeepInstanceOf(java.lang.Object, boolean) ssa_builder (after)
+  /// CHECK-START: void Main.testKeepInstanceOf(java.lang.Object, boolean) builder (after)
   /// CHECK:    <<Phi:l\d+>>     Phi klass:java.lang.Object
   /// CHECK:    <<Class:l\d+>>   LoadClass
   /// CHECK:                     InstanceOf [<<Phi>>,<<Class>>]
@@ -65,7 +65,7 @@
     }
   }
 
-  /// CHECK-START: java.lang.String Main.testNoInline(java.lang.Object, boolean) ssa_builder (after)
+  /// CHECK-START: java.lang.String Main.testNoInline(java.lang.Object, boolean) builder (after)
   /// CHECK:    <<Phi:l\d+>>     Phi klass:java.lang.Object
   /// CHECK:    <<NC:l\d+>>      NullCheck [<<Phi>>]
   /// CHECK:    <<Ret:l\d+>>     InvokeVirtual [<<NC>>] method_name:java.lang.Object.toString
diff --git a/test/549-checker-types-merge/src/Main.java b/test/549-checker-types-merge/src/Main.java
index 917073b..51af3cf 100644
--- a/test/549-checker-types-merge/src/Main.java
+++ b/test/549-checker-types-merge/src/Main.java
@@ -38,14 +38,14 @@
 
 public class Main {
 
-  /// CHECK-START: java.lang.Object Main.testMergeNullContant(boolean) ssa_builder (after)
+  /// CHECK-START: java.lang.Object Main.testMergeNullContant(boolean) builder (after)
   /// CHECK:      <<Phi:l\d+>>       Phi klass:Main
   /// CHECK:                         Return [<<Phi>>]
   private Object testMergeNullContant(boolean cond) {
     return cond ? null : new Main();
   }
 
-  /// CHECK-START: java.lang.Object Main.testMergeClasses(boolean, ClassExtendsA, ClassExtendsB) ssa_builder (after)
+  /// CHECK-START: java.lang.Object Main.testMergeClasses(boolean, ClassExtendsA, ClassExtendsB) builder (after)
   /// CHECK:      <<Phi:l\d+>>       Phi klass:ClassSuper
   /// CHECK:                         Return [<<Phi>>]
   private Object testMergeClasses(boolean cond, ClassExtendsA a, ClassExtendsB b) {
@@ -53,7 +53,7 @@
     return cond ? a : b;
   }
 
-  /// CHECK-START: java.lang.Object Main.testMergeClasses(boolean, ClassExtendsA, ClassSuper) ssa_builder (after)
+  /// CHECK-START: java.lang.Object Main.testMergeClasses(boolean, ClassExtendsA, ClassSuper) builder (after)
   /// CHECK:      <<Phi:l\d+>>       Phi klass:ClassSuper
   /// CHECK:                         Return [<<Phi>>]
   private Object testMergeClasses(boolean cond, ClassExtendsA a, ClassSuper b) {
@@ -61,7 +61,7 @@
     return cond ? a : b;
   }
 
-  /// CHECK-START: java.lang.Object Main.testMergeClasses(boolean, ClassSuper, ClassSuper) ssa_builder (after)
+  /// CHECK-START: java.lang.Object Main.testMergeClasses(boolean, ClassSuper, ClassSuper) builder (after)
   /// CHECK:      <<Phi:l\d+>>       Phi klass:ClassSuper
   /// CHECK:                         Return [<<Phi>>]
   private Object testMergeClasses(boolean cond, ClassSuper a, ClassSuper b) {
@@ -69,7 +69,7 @@
     return cond ? a : b;
   }
 
-  /// CHECK-START: java.lang.Object Main.testMergeClasses(boolean, ClassOtherSuper, ClassSuper) ssa_builder (after)
+  /// CHECK-START: java.lang.Object Main.testMergeClasses(boolean, ClassOtherSuper, ClassSuper) builder (after)
   /// CHECK:      <<Phi:l\d+>>       Phi klass:java.lang.Object
   /// CHECK:                         Return [<<Phi>>]
   private Object testMergeClasses(boolean cond, ClassOtherSuper a, ClassSuper b) {
@@ -77,7 +77,7 @@
     return cond ? a : b;
   }
 
-  /// CHECK-START: java.lang.Object Main.testMergeClassWithInterface(boolean, ClassImplementsInterfaceA, InterfaceSuper) ssa_builder (after)
+  /// CHECK-START: java.lang.Object Main.testMergeClassWithInterface(boolean, ClassImplementsInterfaceA, InterfaceSuper) builder (after)
   /// CHECK:      <<Phi:l\d+>>       Phi klass:InterfaceSuper
   /// CHECK:                         Return [<<Phi>>]
   private Object testMergeClassWithInterface(boolean cond, ClassImplementsInterfaceA a, InterfaceSuper b) {
@@ -85,7 +85,7 @@
     return cond ? a : b;
   }
 
-  /// CHECK-START: java.lang.Object Main.testMergeClassWithInterface(boolean, ClassSuper, InterfaceSuper) ssa_builder (after)
+  /// CHECK-START: java.lang.Object Main.testMergeClassWithInterface(boolean, ClassSuper, InterfaceSuper) builder (after)
   /// CHECK:      <<Phi:l\d+>>       Phi klass:java.lang.Object
   /// CHECK:                         Return [<<Phi>>]
   private Object testMergeClassWithInterface(boolean cond, ClassSuper a, InterfaceSuper b) {
@@ -93,7 +93,7 @@
     return cond ? a : b;
   }
 
-  /// CHECK-START: java.lang.Object Main.testMergeInterfaces(boolean, InterfaceExtendsA, InterfaceSuper) ssa_builder (after)
+  /// CHECK-START: java.lang.Object Main.testMergeInterfaces(boolean, InterfaceExtendsA, InterfaceSuper) builder (after)
   /// CHECK:      <<Phi:l\d+>>       Phi klass:InterfaceSuper
   /// CHECK:                         Return [<<Phi>>]
   private Object testMergeInterfaces(boolean cond, InterfaceExtendsA a, InterfaceSuper b) {
@@ -101,7 +101,7 @@
     return cond ? a : b;
   }
 
-  /// CHECK-START: java.lang.Object Main.testMergeInterfaces(boolean, InterfaceSuper, InterfaceSuper) ssa_builder (after)
+  /// CHECK-START: java.lang.Object Main.testMergeInterfaces(boolean, InterfaceSuper, InterfaceSuper) builder (after)
   /// CHECK:      <<Phi:l\d+>>       Phi klass:InterfaceSuper
   /// CHECK:                         Return [<<Phi>>]
   private Object testMergeInterfaces(boolean cond, InterfaceSuper a, InterfaceSuper b) {
@@ -109,7 +109,7 @@
     return cond ? a : b;
   }
 
-  /// CHECK-START: java.lang.Object Main.testMergeInterfaces(boolean, InterfaceExtendsA, InterfaceExtendsB) ssa_builder (after)
+  /// CHECK-START: java.lang.Object Main.testMergeInterfaces(boolean, InterfaceExtendsA, InterfaceExtendsB) builder (after)
   /// CHECK:      <<Phi:l\d+>>       Phi klass:java.lang.Object
   /// CHECK:                         Return [<<Phi>>]
   private Object testMergeInterfaces(boolean cond, InterfaceExtendsA a, InterfaceExtendsB b) {
@@ -117,7 +117,7 @@
     return cond ? a : b;
   }
 
-    /// CHECK-START: java.lang.Object Main.testMergeInterfaces(boolean, InterfaceSuper, InterfaceOtherSuper) ssa_builder (after)
+    /// CHECK-START: java.lang.Object Main.testMergeInterfaces(boolean, InterfaceSuper, InterfaceOtherSuper) builder (after)
   /// CHECK:      <<Phi:l\d+>>       Phi klass:java.lang.Object
   /// CHECK:                         Return [<<Phi>>]
   private Object testMergeInterfaces(boolean cond, InterfaceSuper a, InterfaceOtherSuper b) {
diff --git a/test/550-checker-regression-wide-store/smali/TestCase.smali b/test/550-checker-regression-wide-store/smali/TestCase.smali
index 7974d56..9133c82 100644
--- a/test/550-checker-regression-wide-store/smali/TestCase.smali
+++ b/test/550-checker-regression-wide-store/smali/TestCase.smali
@@ -25,7 +25,7 @@
 # Test storing into the high vreg of a wide pair. This scenario has runtime
 # behaviour implications so we run it from Main.main.
 
-## CHECK-START: int TestCase.invalidateLow(long) ssa_builder (after)
+## CHECK-START: int TestCase.invalidateLow(long) builder (after)
 ## CHECK-DAG: <<Cst0:i\d+>> IntConstant 0
 ## CHECK-DAG: <<Arg:j\d+>>  ParameterValue
 ## CHECK-DAG: <<Cast:i\d+>> TypeConversion [<<Arg>>]
@@ -53,7 +53,7 @@
 # Test that storing a wide invalidates the value in the high vreg. This
 # cannot be detected from runtime so we only test the environment with Checker.
 
-## CHECK-START: void TestCase.invalidateHigh1(long) ssa_builder (after)
+## CHECK-START: void TestCase.invalidateHigh1(long) builder (after)
 ## CHECK-DAG: <<Arg:j\d+>>  ParameterValue
 ## CHECK-DAG: InvokeStaticOrDirect method_name:java.lang.System.nanoTime env:[[<<Arg>>,_,<<Arg>>,_]]
 
@@ -67,7 +67,7 @@
 
 .end method
 
-## CHECK-START: void TestCase.invalidateHigh2(long) ssa_builder (after)
+## CHECK-START: void TestCase.invalidateHigh2(long) builder (after)
 ## CHECK-DAG: <<Arg:j\d+>>  ParameterValue
 ## CHECK-DAG: InvokeStaticOrDirect method_name:java.lang.System.nanoTime env:[[<<Arg>>,_,_,<<Arg>>,_]]
 
diff --git a/test/551-checker-shifter-operand/src/Main.java b/test/551-checker-shifter-operand/src/Main.java
index decdd1f..8d73d69 100644
--- a/test/551-checker-shifter-operand/src/Main.java
+++ b/test/551-checker-shifter-operand/src/Main.java
@@ -241,13 +241,14 @@
 
   /// CHECK-START-ARM64: void Main.$opt$validateExtendByteInt1(int, byte) instruction_simplifier_arm64 (after)
   /// CHECK:                            Arm64DataProcWithShifterOp
-  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK-NOT:                        Arm64DataProcWithShifterOp
 
   /// CHECK-START-ARM64: void Main.$opt$validateExtendByteInt1(int, byte) instruction_simplifier_arm64 (after)
   /// CHECK-NOT:                        TypeConversion
 
   public static void $opt$validateExtendByteInt1(int a, byte b) {
     assertIntEquals(a + $noinline$byteToChar (b), a +  (char)b);
+    // Conversions byte->short and short->int are implicit; nothing to merge.
     assertIntEquals(a + $noinline$byteToShort(b), a + (short)b);
   }
 
@@ -266,17 +267,24 @@
   /// CHECK:                            Arm64DataProcWithShifterOp
   /// CHECK:                            Arm64DataProcWithShifterOp
   /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK-NOT:                        Arm64DataProcWithShifterOp
 
   /// CHECK-START-ARM64: void Main.$opt$validateExtendByteLong(long, byte) instruction_simplifier_arm64 (after)
   /// CHECK:                            TypeConversion
-  /// CHECK:                            TypeConversion
   /// CHECK-NOT:                        TypeConversion
 
   public static void $opt$validateExtendByteLong(long a, byte b) {
-    // The first two tests have a type conversion.
+    // In each of the following tests, there will be a merge on the LHS.
+
+    // The first test has an explicit byte->char conversion on RHS,
+    // followed by a conversion that is merged with the Add.
     assertLongEquals(a + $noinline$byteToChar (b), a +  (char)b);
+    // Since conversions byte->short and byte->int are implicit, the RHS
+    // for the two tests below is the same and one is eliminated by GVN.
+    // The other is then merged to a shifter operand instruction.
     assertLongEquals(a + $noinline$byteToShort(b), a + (short)b);
-    // This test does not because the conversion to `int` is optimized away.
     assertLongEquals(a + $noinline$byteToInt  (b), a +  (int)b);
   }
 
diff --git a/test/552-checker-primitive-typeprop/smali/ArrayGet.smali b/test/552-checker-primitive-typeprop/smali/ArrayGet.smali
index 042fa0c..de32290 100644
--- a/test/552-checker-primitive-typeprop/smali/ArrayGet.smali
+++ b/test/552-checker-primitive-typeprop/smali/ArrayGet.smali
@@ -19,10 +19,10 @@
 # Test phi with fixed-type ArrayGet as an input and a matching second input.
 # The phi should be typed accordingly.
 
-## CHECK-START: void ArrayGet.matchingFixedType(float[], float) ssa_builder (after)
+## CHECK-START: void ArrayGet.matchingFixedType(float[], float) builder (after)
 ## CHECK-NOT: Phi
 
-## CHECK-START-DEBUGGABLE: void ArrayGet.matchingFixedType(float[], float) ssa_builder (after)
+## CHECK-START-DEBUGGABLE: void ArrayGet.matchingFixedType(float[], float) builder (after)
 ## CHECK-DAG:  <<Arg1:f\d+>> ParameterValue
 ## CHECK-DAG:  <<Aget:f\d+>> ArrayGet
 ## CHECK-DAG:  {{f\d+}}      Phi [<<Aget>>,<<Arg1>>] reg:0
@@ -49,10 +49,10 @@
 # Test phi with fixed-type ArrayGet as an input and a conflicting second input.
 # The phi should be eliminated due to the conflict.
 
-## CHECK-START: void ArrayGet.conflictingFixedType(float[], int) ssa_builder (after)
+## CHECK-START: void ArrayGet.conflictingFixedType(float[], int) builder (after)
 ## CHECK-NOT: Phi
 
-## CHECK-START-DEBUGGABLE: void ArrayGet.conflictingFixedType(float[], int) ssa_builder (after)
+## CHECK-START-DEBUGGABLE: void ArrayGet.conflictingFixedType(float[], int) builder (after)
 ## CHECK-NOT: Phi
 .method public static conflictingFixedType([FI)V
   .registers 8
@@ -76,13 +76,13 @@
 # Same test as the one above, only this time tests that type of ArrayGet is not
 # changed.
 
-## CHECK-START: void ArrayGet.conflictingFixedType2(int[], float) ssa_builder (after)
+## CHECK-START: void ArrayGet.conflictingFixedType2(int[], float) builder (after)
 ## CHECK-NOT: Phi
 
-## CHECK-START-DEBUGGABLE: void ArrayGet.conflictingFixedType2(int[], float) ssa_builder (after)
+## CHECK-START-DEBUGGABLE: void ArrayGet.conflictingFixedType2(int[], float) builder (after)
 ## CHECK-NOT: Phi
 
-## CHECK-START-DEBUGGABLE: void ArrayGet.conflictingFixedType2(int[], float) ssa_builder (after)
+## CHECK-START-DEBUGGABLE: void ArrayGet.conflictingFixedType2(int[], float) builder (after)
 ## CHECK:     {{i\d+}} ArrayGet
 .method public static conflictingFixedType2([IF)V
   .registers 8
@@ -107,10 +107,10 @@
 # Test phi with free-type ArrayGet as an input and a matching second input.
 # The phi should be typed accordingly.
 
-## CHECK-START: void ArrayGet.matchingFreeType(float[], float) ssa_builder (after)
+## CHECK-START: void ArrayGet.matchingFreeType(float[], float) builder (after)
 ## CHECK-NOT: Phi
 
-## CHECK-START-DEBUGGABLE: void ArrayGet.matchingFreeType(float[], float) ssa_builder (after)
+## CHECK-START-DEBUGGABLE: void ArrayGet.matchingFreeType(float[], float) builder (after)
 ## CHECK-DAG:  <<Arg1:f\d+>> ParameterValue
 ## CHECK-DAG:  <<Aget:f\d+>> ArrayGet
 ## CHECK-DAG:                ArraySet [{{l\d+}},{{i\d+}},<<Aget>>]
@@ -139,10 +139,10 @@
 # The phi will be kept and typed according to the second input despite the
 # conflict.
 
-## CHECK-START: void ArrayGet.conflictingFreeType(int[], float) ssa_builder (after)
+## CHECK-START: void ArrayGet.conflictingFreeType(int[], float) builder (after)
 ## CHECK-NOT: Phi
 
-## CHECK-START-DEBUGGABLE: void ArrayGet.conflictingFreeType(int[], float) ssa_builder (after)
+## CHECK-START-DEBUGGABLE: void ArrayGet.conflictingFreeType(int[], float) builder (after)
 ## CHECK-NOT: Phi
 
 .method public static conflictingFreeType([IF)V
@@ -169,7 +169,7 @@
 # case uses ArrayGet indirectly through two phis. It also creates an unused
 # conflicting phi which should not be preserved.
 
-## CHECK-START: void ArrayGet.conflictingPhiUses(int[], float, boolean, boolean, boolean) ssa_builder (after)
+## CHECK-START: void ArrayGet.conflictingPhiUses(int[], float, boolean, boolean, boolean) builder (after)
 ## CHECK:         InvokeStaticOrDirect env:[[{{i\d+}},{{i\d+}},_,{{i\d+}},{{.*}}
 
 .method public static conflictingPhiUses([IFZZZ)V
@@ -209,10 +209,10 @@
 # another. The situation needs to be resolved so that only one instruction
 # remains.
 
-## CHECK-START: void ArrayGet.typedVsUntypedPhiUse(float[], float, boolean, boolean) ssa_builder (after)
+## CHECK-START: void ArrayGet.typedVsUntypedPhiUse(float[], float, boolean, boolean) builder (after)
 ## CHECK:         {{f\d+}} ArrayGet
 
-## CHECK-START: void ArrayGet.typedVsUntypedPhiUse(float[], float, boolean, boolean) ssa_builder (after)
+## CHECK-START: void ArrayGet.typedVsUntypedPhiUse(float[], float, boolean, boolean) builder (after)
 ## CHECK-NOT:     {{i\d+}} ArrayGet
 
 .method public static typedVsUntypedPhiUse([FFZZ)V
diff --git a/test/552-checker-primitive-typeprop/smali/ArraySet.smali b/test/552-checker-primitive-typeprop/smali/ArraySet.smali
index 57d8606..087460a 100644
--- a/test/552-checker-primitive-typeprop/smali/ArraySet.smali
+++ b/test/552-checker-primitive-typeprop/smali/ArraySet.smali
@@ -19,7 +19,7 @@
 # Note that the input is a Phi to make sure primitive type propagation is re-run
 # on the replaced inputs.
 
-## CHECK-START: void ArraySet.ambiguousSet(int[], float[], boolean) ssa_builder (after)
+## CHECK-START: void ArraySet.ambiguousSet(int[], float[], boolean) builder (after)
 ## CHECK-DAG:     <<IntArray:l\d+>>    ParameterValue klass:int[]
 ## CHECK-DAG:     <<IntA:i\d+>>        IntConstant 0
 ## CHECK-DAG:     <<IntB:i\d+>>        IntConstant 1073741824
diff --git a/test/552-checker-primitive-typeprop/smali/SsaBuilder.smali b/test/552-checker-primitive-typeprop/smali/SsaBuilder.smali
index 395feaa..0d067ed 100644
--- a/test/552-checker-primitive-typeprop/smali/SsaBuilder.smali
+++ b/test/552-checker-primitive-typeprop/smali/SsaBuilder.smali
@@ -22,7 +22,7 @@
 # otherwise running the code with an array short enough to throw will crash at
 # runtime because v0 is undefined.
 
-## CHECK-START: int SsaBuilder.environmentPhi(boolean, int[]) ssa_builder (after)
+## CHECK-START: int SsaBuilder.environmentPhi(boolean, int[]) builder (after)
 ## CHECK-DAG:     <<Cst0:f\d+>>  FloatConstant 0
 ## CHECK-DAG:     <<Cst2:f\d+>>  FloatConstant 2
 ## CHECK-DAG:     <<Phi:f\d+>>   Phi [<<Cst0>>,<<Cst2>>]
diff --git a/test/552-checker-primitive-typeprop/smali/TypePropagation.smali b/test/552-checker-primitive-typeprop/smali/TypePropagation.smali
index 58682a1..d34e43e 100644
--- a/test/552-checker-primitive-typeprop/smali/TypePropagation.smali
+++ b/test/552-checker-primitive-typeprop/smali/TypePropagation.smali
@@ -15,7 +15,7 @@
 .class public LTypePropagation;
 .super Ljava/lang/Object;
 
-## CHECK-START-DEBUGGABLE: void TypePropagation.mergeDeadPhi(boolean, boolean, int, float, float) ssa_builder (after)
+## CHECK-START-DEBUGGABLE: void TypePropagation.mergeDeadPhi(boolean, boolean, int, float, float) builder (after)
 ## CHECK-NOT: Phi
 .method public static mergeDeadPhi(ZZIFF)V
   .registers 8
@@ -34,7 +34,7 @@
   return-void
 .end method
 
-## CHECK-START-DEBUGGABLE: void TypePropagation.mergeSameType(boolean, int, int) ssa_builder (after)
+## CHECK-START-DEBUGGABLE: void TypePropagation.mergeSameType(boolean, int, int) builder (after)
 ## CHECK:     {{i\d+}} Phi
 ## CHECK-NOT:          Phi
 .method public static mergeSameType(ZII)V
@@ -47,7 +47,7 @@
   return-void
 .end method
 
-## CHECK-START-DEBUGGABLE: void TypePropagation.mergeVoidInput(boolean, boolean, int, int) ssa_builder (after)
+## CHECK-START-DEBUGGABLE: void TypePropagation.mergeVoidInput(boolean, boolean, int, int) builder (after)
 ## CHECK:     {{i\d+}} Phi
 ## CHECK:     {{i\d+}} Phi
 ## CHECK-NOT:          Phi
@@ -64,7 +64,7 @@
   return-void
 .end method
 
-## CHECK-START-DEBUGGABLE: void TypePropagation.mergeDifferentSize(boolean, int, long) ssa_builder (after)
+## CHECK-START-DEBUGGABLE: void TypePropagation.mergeDifferentSize(boolean, int, long) builder (after)
 ## CHECK-NOT: Phi
 .method public static mergeDifferentSize(ZIJ)V
   .registers 8
@@ -76,7 +76,7 @@
   return-void
 .end method
 
-## CHECK-START-DEBUGGABLE: void TypePropagation.mergeRefFloat(boolean, float, java.lang.Object) ssa_builder (after)
+## CHECK-START-DEBUGGABLE: void TypePropagation.mergeRefFloat(boolean, float, java.lang.Object) builder (after)
 ## CHECK-NOT: Phi
 .method public static mergeRefFloat(ZFLjava/lang/Object;)V
   .registers 8
@@ -88,7 +88,7 @@
   return-void
 .end method
 
-## CHECK-START-DEBUGGABLE: void TypePropagation.mergeIntFloat_Success(boolean, float) ssa_builder (after)
+## CHECK-START-DEBUGGABLE: void TypePropagation.mergeIntFloat_Success(boolean, float) builder (after)
 ## CHECK:     {{f\d+}} Phi
 ## CHECK-NOT:          Phi
 .method public static mergeIntFloat_Success(ZF)V
@@ -101,7 +101,7 @@
   return-void
 .end method
 
-## CHECK-START-DEBUGGABLE: void TypePropagation.mergeIntFloat_Fail(boolean, int, float) ssa_builder (after)
+## CHECK-START-DEBUGGABLE: void TypePropagation.mergeIntFloat_Fail(boolean, int, float) builder (after)
 ## CHECK-NOT: Phi
 .method public static mergeIntFloat_Fail(ZIF)V
   .registers 8
@@ -113,7 +113,7 @@
   return-void
 .end method
 
-## CHECK-START-DEBUGGABLE: void TypePropagation.updateAllUsersOnConflict(boolean, boolean, int, float, int) ssa_builder (after)
+## CHECK-START-DEBUGGABLE: void TypePropagation.updateAllUsersOnConflict(boolean, boolean, int, float, int) builder (after)
 ## CHECK-NOT: Phi
 .method public static updateAllUsersOnConflict(ZZIFI)V
   .registers 8
diff --git a/test/554-checker-rtp-checkcast/src/Main.java b/test/554-checker-rtp-checkcast/src/Main.java
index 607f71a..5bf766f 100644
--- a/test/554-checker-rtp-checkcast/src/Main.java
+++ b/test/554-checker-rtp-checkcast/src/Main.java
@@ -19,7 +19,7 @@
 
   public static Object returnIntArray() { return new int[10]; }
 
-  /// CHECK-START: void Main.boundTypeForMergingPhi() ssa_builder (after)
+  /// CHECK-START: void Main.boundTypeForMergingPhi() builder (after)
   /// CHECK-DAG:              ArraySet [<<NC:l\d+>>,{{i\d+}},{{i\d+}}]
   /// CHECK-DAG:     <<NC>>   NullCheck [<<Phi:l\d+>>]
   /// CHECK-DAG:     <<Phi>>  Phi klass:int[]
@@ -32,7 +32,7 @@
     array[0] = 14;
   }
 
-  /// CHECK-START: void Main.boundTypeForLoopPhi() ssa_builder (after)
+  /// CHECK-START: void Main.boundTypeForLoopPhi() builder (after)
   /// CHECK-DAG:              ArraySet [<<NC:l\d+>>,{{i\d+}},{{i\d+}}]
   /// CHECK-DAG:     <<NC>>   NullCheck [<<Phi:l\d+>>]
   /// CHECK-DAG:     <<Phi>>  Phi klass:int[]
@@ -50,7 +50,7 @@
     array[0] = 14;
   }
 
-  /// CHECK-START: void Main.boundTypeForCatchPhi() ssa_builder (after)
+  /// CHECK-START: void Main.boundTypeForCatchPhi() builder (after)
   /// CHECK-DAG:              ArraySet [<<NC:l\d+>>,{{i\d+}},{{i\d+}}]
   /// CHECK-DAG:     <<NC>>   NullCheck [<<Phi:l\d+>>]
   /// CHECK-DAG:     <<Phi>>  Phi is_catch_phi:true klass:int[]
diff --git a/test/557-checker-ref-equivalent/smali/TestCase.smali b/test/557-checker-ref-equivalent/smali/TestCase.smali
index 2472957..1347554 100644
--- a/test/557-checker-ref-equivalent/smali/TestCase.smali
+++ b/test/557-checker-ref-equivalent/smali/TestCase.smali
@@ -16,7 +16,7 @@
 
 .super Ljava/lang/Object;
 
-## CHECK-START: void TestCase.testIntRefEquivalent() ssa_builder (after)
+## CHECK-START: void TestCase.testIntRefEquivalent() builder (after)
 ## CHECK-NOT: Phi
 .method public static testIntRefEquivalent()V
     .registers 4
diff --git a/test/557-checker-ref-equivalent/src/Main.java b/test/557-checker-ref-equivalent/src/Main.java
index a970af5..9323757 100644
--- a/test/557-checker-ref-equivalent/src/Main.java
+++ b/test/557-checker-ref-equivalent/src/Main.java
@@ -16,7 +16,7 @@
 
 public class Main {
 
-  /// CHECK-START: void Main.testRedundantPhiCycle(boolean) ssa_builder (after)
+  /// CHECK-START: void Main.testRedundantPhiCycle(boolean) builder (after)
   /// CHECK-NOT:  Phi
   private void testRedundantPhiCycle(boolean cond) {
     Object o = null;
@@ -28,7 +28,7 @@
     }
   }
 
-  /// CHECK-START: void Main.testLoopPhisWithNullAndCrossUses(boolean) ssa_builder (after)
+  /// CHECK-START: void Main.testLoopPhisWithNullAndCrossUses(boolean) builder (after)
   /// CHECK-NOT:  Phi
   private void testLoopPhisWithNullAndCrossUses(boolean cond) {
     Main a = null;
diff --git a/test/559-checker-irreducible-loop/smali/IrreducibleLoop.smali b/test/559-checker-irreducible-loop/smali/IrreducibleLoop.smali
index 971ad84..7ce60a3 100644
--- a/test/559-checker-irreducible-loop/smali/IrreducibleLoop.smali
+++ b/test/559-checker-irreducible-loop/smali/IrreducibleLoop.smali
@@ -323,7 +323,7 @@
 #      -       /                 \-
 # irreducible_loop_back_edge    loop_within_back_edge
 #
-## CHECK-START: void IrreducibleLoop.analyze1(int) ssa_builder (after)
+## CHECK-START: void IrreducibleLoop.analyze1(int) builder (after)
 ## CHECK-DAG: Goto loop:<<OuterLoop:B\d+>> outer_loop:none irreducible:true
 ## CHECK-DAG: Goto outer_loop:<<OuterLoop>> irreducible:false
 .method public static analyze1(I)V
@@ -371,7 +371,7 @@
 # exit          \-         /
 #          irreducible_loop_body
 #
-## CHECK-START: void IrreducibleLoop.analyze2(int) ssa_builder (after)
+## CHECK-START: void IrreducibleLoop.analyze2(int) builder (after)
 ## CHECK-DAG: Goto outer_loop:none irreducible:false
 ## CHECK-DAG: Goto outer_loop:none irreducible:true
 .method public static analyze2(I)V
@@ -418,7 +418,7 @@
 #             |
 #           exit
 #
-## CHECK-START: void IrreducibleLoop.analyze3(int) ssa_builder (after)
+## CHECK-START: void IrreducibleLoop.analyze3(int) builder (after)
 ## CHECK-DAG: Goto loop:<<OuterLoop:B\d+>> outer_loop:none irreducible:true
 ## CHECK-DAG: Goto outer_loop:<<OuterLoop>> irreducible:true
 .method public static analyze3(I)V
@@ -467,7 +467,7 @@
 #             |
 #           exit
 #
-## CHECK-START: void IrreducibleLoop.analyze4(int) ssa_builder (after)
+## CHECK-START: void IrreducibleLoop.analyze4(int) builder (after)
 ## CHECK-DAG: Goto loop:<<OuterLoop:B\d+>> outer_loop:none irreducible:true
 ## CHECK-DAG: Goto outer_loop:<<OuterLoop>> irreducible:true
 .method public static analyze4(I)V
@@ -519,7 +519,7 @@
 #                    |
 #                   exit
 #
-## CHECK-START: void IrreducibleLoop.analyze5(int) ssa_builder (after)
+## CHECK-START: void IrreducibleLoop.analyze5(int) builder (after)
 ## CHECK-DAG: Goto loop:<<OuterLoop:B\d+>> outer_loop:none irreducible:true
 ## CHECK-DAG: Goto outer_loop:<<OuterLoop>> irreducible:true
 .method public static analyze5(I)V
diff --git a/test/559-checker-rtp-ifnotnull/src/Main.java b/test/559-checker-rtp-ifnotnull/src/Main.java
index 8f40129..2dc5666 100644
--- a/test/559-checker-rtp-ifnotnull/src/Main.java
+++ b/test/559-checker-rtp-ifnotnull/src/Main.java
@@ -17,7 +17,7 @@
 
 public class Main {
 
-  /// CHECK-START: void Main.boundTypeForIfNotNull() ssa_builder (after)
+  /// CHECK-START: void Main.boundTypeForIfNotNull() builder (after)
   /// CHECK-DAG:     <<Method:(i|j)\d+>>  CurrentMethod
   /// CHECK-DAG:     <<Null:l\d+>>        NullConstant
   /// CHECK-DAG:     <<Cst5:i\d+>>        IntConstant 5
diff --git a/test/566-checker-codegen-select/src/Main.java b/test/566-checker-codegen-select/src/Main.java
index 3a1b3fc..e215ab0 100644
--- a/test/566-checker-codegen-select/src/Main.java
+++ b/test/566-checker-codegen-select/src/Main.java
@@ -20,13 +20,6 @@
   /// CHECK:         <<Cond:z\d+>> LessThanOrEqual [{{j\d+}},{{j\d+}}]
   /// CHECK-NEXT:                  Select [{{j\d+}},{{j\d+}},<<Cond>>]
 
-  // Condition must be materialized on X86 because it would need too many
-  // registers otherwise.
-  /// CHECK-START-X86: long Main.$noinline$longSelect(long) disassembly (after)
-  /// CHECK:             LessThanOrEqual
-  /// CHECK-NEXT:          cmp
-  /// CHECK:             Select
-
   public long $noinline$longSelect(long param) {
     if (doThrow) { throw new Error(); }
     long val_true = longB;
diff --git a/test/570-checker-osr/expected.txt b/test/570-checker-osr/expected.txt
index 555c6a9..25fb220 100644
--- a/test/570-checker-osr/expected.txt
+++ b/test/570-checker-osr/expected.txt
@@ -1,5 +1,5 @@
 JNI_OnLoad called
-100000000
-200000000
-300000000
-400000000
+100000
+200000
+300000
+400000
diff --git a/test/570-checker-osr/osr.cc b/test/570-checker-osr/osr.cc
index fb84687..09e97ea 100644
--- a/test/570-checker-osr/osr.cc
+++ b/test/570-checker-osr/osr.cc
@@ -17,6 +17,7 @@
 #include "art_method.h"
 #include "jit/jit.h"
 #include "jit/jit_code_cache.h"
+#include "jit/profiling_info.h"
 #include "oat_quick_method_header.h"
 #include "scoped_thread_state_change.h"
 #include "stack_map.h"
@@ -28,7 +29,8 @@
   explicit OsrVisitor(Thread* thread)
       SHARED_REQUIRES(Locks::mutator_lock_)
       : StackVisitor(thread, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
-        in_osr_method_(false) {}
+        in_osr_method_(false),
+        in_interpreter_(false) {}
 
   bool VisitFrame() SHARED_REQUIRES(Locks::mutator_lock_) {
     ArtMethod* m = GetMethod();
@@ -38,11 +40,15 @@
         (m_name.compare("$noinline$returnFloat") == 0) ||
         (m_name.compare("$noinline$returnDouble") == 0) ||
         (m_name.compare("$noinline$returnLong") == 0) ||
-        (m_name.compare("$noinline$deopt") == 0)) {
+        (m_name.compare("$noinline$deopt") == 0) ||
+        (m_name.compare("$noinline$inlineCache") == 0) ||
+        (m_name.compare("$noinline$stackOverflow") == 0)) {
       const OatQuickMethodHeader* header =
           Runtime::Current()->GetJit()->GetCodeCache()->LookupOsrMethodHeader(m);
       if (header != nullptr && header == GetCurrentOatQuickMethodHeader()) {
         in_osr_method_ = true;
+      } else if (IsCurrentFrameInInterpreter()) {
+        in_interpreter_ = true;
       }
       return false;
     }
@@ -50,6 +56,7 @@
   }
 
   bool in_osr_method_;
+  bool in_interpreter_;
 };
 
 extern "C" JNIEXPORT jboolean JNICALL Java_Main_ensureInOsrCode(JNIEnv*, jclass) {
@@ -64,4 +71,77 @@
   return visitor.in_osr_method_;
 }
 
+extern "C" JNIEXPORT jboolean JNICALL Java_Main_ensureInInterpreter(JNIEnv*, jclass) {
+  if (!Runtime::Current()->UseJit()) {
+    // The return value is irrelevant if we're not using JIT.
+    return false;
+  }
+  ScopedObjectAccess soa(Thread::Current());
+  OsrVisitor visitor(soa.Self());
+  visitor.WalkStack();
+  return visitor.in_interpreter_;
+}
+
+class ProfilingInfoVisitor : public StackVisitor {
+ public:
+  explicit ProfilingInfoVisitor(Thread* thread)
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      : StackVisitor(thread, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames) {}
+
+  bool VisitFrame() SHARED_REQUIRES(Locks::mutator_lock_) {
+    ArtMethod* m = GetMethod();
+    std::string m_name(m->GetName());
+
+    if ((m_name.compare("$noinline$inlineCache") == 0) ||
+        (m_name.compare("$noinline$stackOverflow") == 0)) {
+      ProfilingInfo::Create(Thread::Current(), m, /* retry_allocation */ true);
+      return false;
+    }
+    return true;
+  }
+};
+
+extern "C" JNIEXPORT void JNICALL Java_Main_ensureHasProfilingInfo(JNIEnv*, jclass) {
+  if (!Runtime::Current()->UseJit()) {
+    return;
+  }
+  ScopedObjectAccess soa(Thread::Current());
+  ProfilingInfoVisitor visitor(soa.Self());
+  visitor.WalkStack();
+}
+
+class OsrCheckVisitor : public StackVisitor {
+ public:
+  explicit OsrCheckVisitor(Thread* thread)
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      : StackVisitor(thread, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames) {}
+
+  bool VisitFrame() SHARED_REQUIRES(Locks::mutator_lock_) {
+    ArtMethod* m = GetMethod();
+    std::string m_name(m->GetName());
+
+    jit::Jit* jit = Runtime::Current()->GetJit();
+    if ((m_name.compare("$noinline$inlineCache") == 0) ||
+        (m_name.compare("$noinline$stackOverflow") == 0)) {
+      while (jit->GetCodeCache()->LookupOsrMethodHeader(m) == nullptr) {
+        // Sleep to yield to the compiler thread.
+        sleep(0);
+        // Will either ensure it's compiled or do the compilation itself.
+        jit->CompileMethod(m, Thread::Current(), /* osr */ true);
+      }
+      return false;
+    }
+    return true;
+  }
+};
+
+extern "C" JNIEXPORT void JNICALL Java_Main_ensureHasOsrCode(JNIEnv*, jclass) {
+  if (!Runtime::Current()->UseJit()) {
+    return;
+  }
+  ScopedObjectAccess soa(Thread::Current());
+  OsrCheckVisitor visitor(soa.Self());
+  visitor.WalkStack();
+}
+
 }  // namespace art
diff --git a/test/570-checker-osr/run b/test/570-checker-osr/run
new file mode 100755
index 0000000..24d69b4
--- /dev/null
+++ b/test/570-checker-osr/run
@@ -0,0 +1,18 @@
+#!/bin/bash
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Ensure this test is not subject to code collection.
+exec ${RUN} "$@" --runtime-option -Xjitinitialsize:32M
diff --git a/test/570-checker-osr/src/Main.java b/test/570-checker-osr/src/Main.java
index 7485163..1142d49 100644
--- a/test/570-checker-osr/src/Main.java
+++ b/test/570-checker-osr/src/Main.java
@@ -16,6 +16,7 @@
 
 public class Main {
   public static void main(String[] args) {
+    new SubMain();
     System.loadLibrary(args[0]);
     if ($noinline$returnInt() != 53) {
       throw new Error("Unexpected return value");
@@ -33,12 +34,21 @@
     try {
       $noinline$deopt();
     } catch (Exception e) {}
+    DeoptimizationController.stopDeoptimization();
+
+    $noinline$inlineCache(new Main(), /* isSecondInvocation */ false);
+    if ($noinline$inlineCache(new SubMain(), /* isSecondInvocation */ true) != SubMain.class) {
+      throw new Error("Unexpected return value");
+    }
+
+    $noinline$stackOverflow(new Main(), /* isSecondInvocation */ false);
+    $noinline$stackOverflow(new SubMain(), /* isSecondInvocation */ true);
   }
 
   public static int $noinline$returnInt() {
     if (doThrow) throw new Error("");
     int i = 0;
-    for (; i < 100000000; ++i) {
+    for (; i < 100000; ++i) {
     }
     while (!ensureInOsrCode()) {}
     System.out.println(i);
@@ -48,7 +58,7 @@
   public static float $noinline$returnFloat() {
     if (doThrow) throw new Error("");
     int i = 0;
-    for (; i < 200000000; ++i) {
+    for (; i < 200000; ++i) {
     }
     while (!ensureInOsrCode()) {}
     System.out.println(i);
@@ -58,7 +68,7 @@
   public static double $noinline$returnDouble() {
     if (doThrow) throw new Error("");
     int i = 0;
-    for (; i < 300000000; ++i) {
+    for (; i < 300000; ++i) {
     }
     while (!ensureInOsrCode()) {}
     System.out.println(i);
@@ -67,8 +77,8 @@
 
   public static long $noinline$returnLong() {
     if (doThrow) throw new Error("");
-    int i = 1000000;
-    for (; i < 400000000; ++i) {
+    int i = 0;
+    for (; i < 400000; ++i) {
     }
     while (!ensureInOsrCode()) {}
     System.out.println(i);
@@ -78,15 +88,95 @@
   public static void $noinline$deopt() {
     if (doThrow) throw new Error("");
     int i = 0;
-    for (; i < 100000000; ++i) {
+    for (; i < 100000; ++i) {
     }
     while (!ensureInOsrCode()) {}
     DeoptimizationController.startDeoptimization();
   }
 
-  public static int[] array = new int[4];
+  public static Class $noinline$inlineCache(Main m, boolean isSecondInvocation) {
+    // If we are running in non-JIT mode, or were unlucky enough to get this method
+    // already JITted, just return the expected value.
+    if (!ensureInInterpreter()) {
+      return SubMain.class;
+    }
 
+    ensureHasProfilingInfo();
+
+    // Ensure that we have OSR code to jump to.
+    if (isSecondInvocation) {
+      ensureHasOsrCode();
+    }
+
+    // This call will be optimized in the OSR compiled code
+    // to check and deoptimize if m is not of type 'Main'.
+    Main other = m.inlineCache();
+
+    // Jump to OSR compiled code. The second run
+    // of this method will have 'm' as a SubMain, and the compiled
+    // code we are jumping to will have wrongly optimize other as being a
+    // 'Main'.
+    if (isSecondInvocation) {
+      while (!ensureInOsrCode()) {}
+    }
+
+    // We used to wrongly optimize this call and assume 'other' was a 'Main'.
+    return other.returnClass();
+  }
+
+  public Main inlineCache() {
+    return new Main();
+  }
+
+  public Class returnClass() {
+    return Main.class;
+  }
+
+  public void otherInlineCache() {
+    return;
+  }
+
+  public static void $noinline$stackOverflow(Main m, boolean isSecondInvocation) {
+    // If we are running in non-JIT mode, or were unlucky enough to get this method
+    // already JITted, just return the expected value.
+    if (!ensureInInterpreter()) {
+      return;
+    }
+
+    // We need a ProfilingInfo object to populate the 'otherInlineCache' call.
+    ensureHasProfilingInfo();
+
+    if (isSecondInvocation) {
+      // Ensure we have an OSR code and we jump to it.
+      while (!ensureInOsrCode()) {}
+    }
+
+    for (int i = 0; i < (isSecondInvocation ? 10000000 : 1); ++i) {
+      // The first invocation of $noinline$stackOverflow will populate the inline
+      // cache with Main. The second invocation of the method, will see a SubMain
+      // and will therefore trigger deoptimization.
+      m.otherInlineCache();
+    }
+  }
+
+  public static native boolean ensureInInterpreter();
   public static native boolean ensureInOsrCode();
+  public static native void ensureHasProfilingInfo();
+  public static native void ensureHasOsrCode();
 
   public static boolean doThrow = false;
 }
+
+class SubMain extends Main {
+  public Class returnClass() {
+    return SubMain.class;
+  }
+
+  public Main inlineCache() {
+    return new SubMain();
+  }
+
+  public void otherInlineCache() {
+    return;
+  }
+}
diff --git a/test/570-checker-select/src/Main.java b/test/570-checker-select/src/Main.java
index ec60240..59741d6 100644
--- a/test/570-checker-select/src/Main.java
+++ b/test/570-checker-select/src/Main.java
@@ -19,11 +19,21 @@
   /// CHECK-START: int Main.BoolCond_IntVarVar(boolean, int, int) register (after)
   /// CHECK:               Select [{{i\d+}},{{i\d+}},{{z\d+}}]
 
+  /// CHECK-START-ARM64: int Main.BoolCond_IntVarVar(boolean, int, int) disassembly (after)
+  /// CHECK:               Select
+  /// CHECK-NEXT:            cmp
+  /// CHECK-NEXT:            csel ne
+
   /// CHECK-START-X86_64: int Main.BoolCond_IntVarVar(boolean, int, int) disassembly (after)
   /// CHECK:            <<Cond:z\d+>> ParameterValue
   /// CHECK:                          Select [{{i\d+}},{{i\d+}},<<Cond>>]
   /// CHECK:                          cmovnz/ne
 
+  /// CHECK-START-X86: int Main.BoolCond_IntVarVar(boolean, int, int) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> ParameterValue
+  /// CHECK:                          Select [{{i\d+}},{{i\d+}},<<Cond>>]
+  /// CHECK:                          cmovnz/ne
+
   public static int BoolCond_IntVarVar(boolean cond, int x, int y) {
     return cond ? x : y;
   }
@@ -31,11 +41,21 @@
   /// CHECK-START: int Main.BoolCond_IntVarCst(boolean, int) register (after)
   /// CHECK:               Select [{{i\d+}},{{i\d+}},{{z\d+}}]
 
+  /// CHECK-START-ARM64: int Main.BoolCond_IntVarCst(boolean, int) disassembly (after)
+  /// CHECK:               Select
+  /// CHECK-NEXT:            cmp
+  /// CHECK-NEXT:            csinc ne
+
   /// CHECK-START-X86_64: int Main.BoolCond_IntVarCst(boolean, int) disassembly (after)
   /// CHECK:            <<Cond:z\d+>> ParameterValue
   /// CHECK:                          Select [{{i\d+}},{{i\d+}},<<Cond>>]
   /// CHECK:                          cmovnz/ne
 
+  /// CHECK-START-X86: int Main.BoolCond_IntVarCst(boolean, int) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> ParameterValue
+  /// CHECK:                          Select [{{i\d+}},{{i\d+}},<<Cond>>]
+  /// CHECK:                          cmovnz/ne
+
   public static int BoolCond_IntVarCst(boolean cond, int x) {
     return cond ? x : 1;
   }
@@ -43,11 +63,21 @@
   /// CHECK-START: int Main.BoolCond_IntCstVar(boolean, int) register (after)
   /// CHECK:               Select [{{i\d+}},{{i\d+}},{{z\d+}}]
 
+  /// CHECK-START-ARM64: int Main.BoolCond_IntCstVar(boolean, int) disassembly (after)
+  /// CHECK:               Select
+  /// CHECK-NEXT:            cmp
+  /// CHECK-NEXT:            csinc eq
+
   /// CHECK-START-X86_64: int Main.BoolCond_IntCstVar(boolean, int) disassembly (after)
   /// CHECK:            <<Cond:z\d+>> ParameterValue
   /// CHECK:                          Select [{{i\d+}},{{i\d+}},<<Cond>>]
   /// CHECK:                          cmovnz/ne
 
+  /// CHECK-START-X86: int Main.BoolCond_IntCstVar(boolean, int) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> ParameterValue
+  /// CHECK:                          Select [{{i\d+}},{{i\d+}},<<Cond>>]
+  /// CHECK:                          cmovnz/ne
+
   public static int BoolCond_IntCstVar(boolean cond, int y) {
     return cond ? 1 : y;
   }
@@ -55,11 +85,22 @@
   /// CHECK-START: long Main.BoolCond_LongVarVar(boolean, long, long) register (after)
   /// CHECK:               Select [{{j\d+}},{{j\d+}},{{z\d+}}]
 
+  /// CHECK-START-ARM64: long Main.BoolCond_LongVarVar(boolean, long, long) disassembly (after)
+  /// CHECK:               Select
+  /// CHECK-NEXT:            cmp
+  /// CHECK-NEXT:            csel ne
+
   /// CHECK-START-X86_64: long Main.BoolCond_LongVarVar(boolean, long, long) disassembly (after)
   /// CHECK:            <<Cond:z\d+>> ParameterValue
   /// CHECK:                          Select [{{j\d+}},{{j\d+}},<<Cond>>]
   /// CHECK:                          cmovnz/neq
 
+  /// CHECK-START-X86: long Main.BoolCond_LongVarVar(boolean, long, long) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> ParameterValue
+  /// CHECK:                          Select [{{j\d+}},{{j\d+}},<<Cond>>]
+  /// CHECK:                          cmovnz/ne
+  /// CHECK-NEXT:                     cmovnz/ne
+
   public static long BoolCond_LongVarVar(boolean cond, long x, long y) {
     return cond ? x : y;
   }
@@ -67,11 +108,22 @@
   /// CHECK-START: long Main.BoolCond_LongVarCst(boolean, long) register (after)
   /// CHECK:               Select [{{j\d+}},{{j\d+}},{{z\d+}}]
 
+  /// CHECK-START-ARM64: long Main.BoolCond_LongVarCst(boolean, long) disassembly (after)
+  /// CHECK:               Select
+  /// CHECK-NEXT:            cmp
+  /// CHECK-NEXT:            csinc ne
+
   /// CHECK-START-X86_64: long Main.BoolCond_LongVarCst(boolean, long) disassembly (after)
   /// CHECK:            <<Cond:z\d+>> ParameterValue
   /// CHECK:                          Select [{{j\d+}},{{j\d+}},<<Cond>>]
   /// CHECK:                          cmovnz/neq
 
+  /// CHECK-START-X86: long Main.BoolCond_LongVarCst(boolean, long) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> ParameterValue
+  /// CHECK:                          Select [{{j\d+}},{{j\d+}},<<Cond>>]
+  /// CHECK:                          cmovnz/ne
+  /// CHECK-NEXT:                     cmovnz/ne
+
   public static long BoolCond_LongVarCst(boolean cond, long x) {
     return cond ? x : 1L;
   }
@@ -79,11 +131,22 @@
   /// CHECK-START: long Main.BoolCond_LongCstVar(boolean, long) register (after)
   /// CHECK:               Select [{{j\d+}},{{j\d+}},{{z\d+}}]
 
+  /// CHECK-START-ARM64: long Main.BoolCond_LongCstVar(boolean, long) disassembly (after)
+  /// CHECK:               Select
+  /// CHECK-NEXT:            cmp
+  /// CHECK-NEXT:            csinc eq
+
   /// CHECK-START-X86_64: long Main.BoolCond_LongCstVar(boolean, long) disassembly (after)
   /// CHECK:            <<Cond:z\d+>> ParameterValue
   /// CHECK:                          Select [{{j\d+}},{{j\d+}},<<Cond>>]
   /// CHECK:                          cmovnz/neq
 
+  /// CHECK-START-X86: long Main.BoolCond_LongCstVar(boolean, long) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> ParameterValue
+  /// CHECK:                          Select [{{j\d+}},{{j\d+}},<<Cond>>]
+  /// CHECK:                          cmovnz/ne
+  /// CHECK-NEXT:                     cmovnz/ne
+
   public static long BoolCond_LongCstVar(boolean cond, long y) {
     return cond ? 1L : y;
   }
@@ -91,6 +154,11 @@
   /// CHECK-START: float Main.BoolCond_FloatVarVar(boolean, float, float) register (after)
   /// CHECK:               Select [{{f\d+}},{{f\d+}},{{z\d+}}]
 
+  /// CHECK-START-ARM64: float Main.BoolCond_FloatVarVar(boolean, float, float) disassembly (after)
+  /// CHECK:               Select
+  /// CHECK-NEXT:            cmp
+  /// CHECK-NEXT:            fcsel ne
+
   public static float BoolCond_FloatVarVar(boolean cond, float x, float y) {
     return cond ? x : y;
   }
@@ -98,6 +166,11 @@
   /// CHECK-START: float Main.BoolCond_FloatVarCst(boolean, float) register (after)
   /// CHECK:               Select [{{f\d+}},{{f\d+}},{{z\d+}}]
 
+  /// CHECK-START-ARM64: float Main.BoolCond_FloatVarCst(boolean, float) disassembly (after)
+  /// CHECK:               Select
+  /// CHECK-NEXT:            cmp
+  /// CHECK-NEXT:            fcsel ne
+
   public static float BoolCond_FloatVarCst(boolean cond, float x) {
     return cond ? x : 1.0f;
   }
@@ -105,6 +178,11 @@
   /// CHECK-START: float Main.BoolCond_FloatCstVar(boolean, float) register (after)
   /// CHECK:               Select [{{f\d+}},{{f\d+}},{{z\d+}}]
 
+  /// CHECK-START-ARM64: float Main.BoolCond_FloatCstVar(boolean, float) disassembly (after)
+  /// CHECK:               Select
+  /// CHECK-NEXT:            cmp
+  /// CHECK-NEXT:            fcsel ne
+
   public static float BoolCond_FloatCstVar(boolean cond, float y) {
     return cond ? 1.0f : y;
   }
@@ -113,11 +191,21 @@
   /// CHECK:            <<Cond:z\d+>> LessThanOrEqual [{{i\d+}},{{i\d+}}]
   /// CHECK-NEXT:                     Select [{{i\d+}},{{i\d+}},<<Cond>>]
 
+  /// CHECK-START-ARM64: int Main.IntNonmatCond_IntVarVar(int, int, int, int) disassembly (after)
+  /// CHECK:               Select
+  /// CHECK-NEXT:            cmp
+  /// CHECK-NEXT:            csel le
+
   /// CHECK-START-X86_64: int Main.IntNonmatCond_IntVarVar(int, int, int, int) disassembly (after)
   /// CHECK:            <<Cond:z\d+>> LessThanOrEqual [{{i\d+}},{{i\d+}}]
   /// CHECK-NEXT:                     Select [{{i\d+}},{{i\d+}},<<Cond>>]
   /// CHECK:                          cmovle/ng
 
+  /// CHECK-START-X86: int Main.IntNonmatCond_IntVarVar(int, int, int, int) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> LessThanOrEqual [{{i\d+}},{{i\d+}}]
+  /// CHECK-NEXT:                     Select [{{i\d+}},{{i\d+}},<<Cond>>]
+  /// CHECK:                          cmovle/ng
+
   public static int IntNonmatCond_IntVarVar(int a, int b, int x, int y) {
     return a > b ? x : y;
   }
@@ -127,11 +215,23 @@
   /// CHECK-NEXT:       <<Sel:i\d+>>  Select [{{i\d+}},{{i\d+}},{{z\d+}}]
   /// CHECK-NEXT:                     Add [<<Cond>>,<<Sel>>]
 
+  /// CHECK-START-ARM64: int Main.IntMatCond_IntVarVar(int, int, int, int) disassembly (after)
+  /// CHECK:               LessThanOrEqual
+  /// CHECK-NEXT:            cmp
+  /// CHECK-NEXT:            cset le
+  /// CHECK:               Select
+  /// CHECK-NEXT:            csel le
+
   /// CHECK-START-X86_64: int Main.IntMatCond_IntVarVar(int, int, int, int) disassembly (after)
   /// CHECK:            <<Cond:z\d+>> LessThanOrEqual [{{i\d+}},{{i\d+}}]
   /// CHECK:                          Select [{{i\d+}},{{i\d+}},<<Cond>>]
   /// CHECK:                          cmovle/ng
 
+  /// CHECK-START-X86: int Main.IntMatCond_IntVarVar(int, int, int, int) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> LessThanOrEqual [{{i\d+}},{{i\d+}}]
+  /// CHECK:                          Select [{{i\d+}},{{i\d+}},<<Cond>>]
+  /// CHECK:                          cmovle/ng
+
   public static int IntMatCond_IntVarVar(int a, int b, int x, int y) {
     int result = (a > b ? x : y);
     return result + (a > b ? 0 : 1);
@@ -141,11 +241,22 @@
   /// CHECK:            <<Cond:z\d+>> LessThanOrEqual [{{i\d+}},{{i\d+}}]
   /// CHECK-NEXT:                     Select [{{j\d+}},{{j\d+}},<<Cond>>]
 
+  /// CHECK-START-ARM64: long Main.IntNonmatCond_LongVarVar(int, int, long, long) disassembly (after)
+  /// CHECK:               Select
+  /// CHECK-NEXT:            cmp
+  /// CHECK-NEXT:            csel le
+
   /// CHECK-START-X86_64: long Main.IntNonmatCond_LongVarVar(int, int, long, long) disassembly (after)
   /// CHECK:            <<Cond:z\d+>> LessThanOrEqual [{{i\d+}},{{i\d+}}]
   /// CHECK-NEXT:                     Select [{{j\d+}},{{j\d+}},<<Cond>>]
   /// CHECK:                          cmovle/ngq
 
+  /// CHECK-START-X86: long Main.IntNonmatCond_LongVarVar(int, int, long, long) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> LessThanOrEqual [{{i\d+}},{{i\d+}}]
+  /// CHECK-NEXT:                     Select [{{j\d+}},{{j\d+}},<<Cond>>]
+  /// CHECK:                          cmovle/ng
+  /// CHECK-NEXT:                     cmovle/ng
+
   public static long IntNonmatCond_LongVarVar(int a, int b, long x, long y) {
     return a > b ? x : y;
   }
@@ -156,6 +267,13 @@
   /// CHECK:            <<Sel2:j\d+>> Select [{{j\d+}},{{j\d+}},<<Cond>>]
   /// CHECK:                          Add [<<Sel2>>,<<Sel1>>]
 
+  /// CHECK-START-ARM64: long Main.IntMatCond_LongVarVar(int, int, long, long) disassembly (after)
+  /// CHECK:               LessThanOrEqual
+  /// CHECK-NEXT:            cmp
+  /// CHECK-NEXT:            cset le
+  /// CHECK:               Select
+  /// CHECK-NEXT:            csel le
+
   /// CHECK-START-X86_64: long Main.IntMatCond_LongVarVar(int, int, long, long) disassembly (after)
   /// CHECK:            <<Cond:z\d+>> LessThanOrEqual [{{i\d+}},{{i\d+}}]
   /// CHECK:                          Select [{{j\d+}},{{j\d+}},<<Cond>>]
@@ -163,6 +281,15 @@
   /// CHECK:                          Select [{{j\d+}},{{j\d+}},<<Cond>>]
   /// CHECK:                          cmovnz/neq
 
+  /// CHECK-START-X86: long Main.IntMatCond_LongVarVar(int, int, long, long) disassembly (after)
+  /// CHECK:            <<Cond:z\d+>> LessThanOrEqual [{{i\d+}},{{i\d+}}]
+  /// CHECK:                          Select [{{j\d+}},{{j\d+}},<<Cond>>]
+  /// CHECK-NEXT:                     cmovle/ng
+  /// CHECK-NEXT:                     cmovle/ng
+  /// CHECK:                          Select [{{j\d+}},{{j\d+}},<<Cond>>]
+  /// CHECK:                          cmovnz/ne
+  /// CHECK-NEXT:                     cmovnz/ne
+
   public static long IntMatCond_LongVarVar(int a, int b, long x, long y) {
     long result = (a > b ? x : y);
     return result + (a > b ? 0L : 1L);
@@ -172,6 +299,11 @@
   /// CHECK:            <<Cond:z\d+>> LessThanOrEqual [{{j\d+}},{{j\d+}}]
   /// CHECK:                          Select [{{j\d+}},{{j\d+}},<<Cond>>]
 
+  /// CHECK-START-ARM64: long Main.LongNonmatCond_LongVarVar(long, long, long, long) disassembly (after)
+  /// CHECK:               Select
+  /// CHECK-NEXT:            cmp
+  /// CHECK-NEXT:            csel le
+
   /// CHECK-START-X86_64: long Main.LongNonmatCond_LongVarVar(long, long, long, long) disassembly (after)
   /// CHECK:            <<Cond:z\d+>> LessThanOrEqual [{{j\d+}},{{j\d+}}]
   /// CHECK:                          Select [{{j\d+}},{{j\d+}},<<Cond>>]
@@ -187,6 +319,13 @@
   /// CHECK:            <<Sel2:j\d+>> Select [{{j\d+}},{{j\d+}},<<Cond>>]
   /// CHECK:                          Add [<<Sel2>>,<<Sel1>>]
 
+  /// CHECK-START-ARM64: long Main.LongMatCond_LongVarVar(long, long, long, long) disassembly (after)
+  /// CHECK:               LessThanOrEqual
+  /// CHECK-NEXT:            cmp
+  /// CHECK-NEXT:            cset le
+  /// CHECK:               Select
+  /// CHECK-NEXT:            csel le
+
   /// CHECK-START-X86_64: long Main.LongMatCond_LongVarVar(long, long, long, long) disassembly (after)
   /// CHECK:            <<Cond:z\d+>> LessThanOrEqual [{{j\d+}},{{j\d+}}]
   /// CHECK:                          Select [{{j\d+}},{{j\d+}},<<Cond>>]
@@ -203,6 +342,12 @@
   /// CHECK:            <<Cond:z\d+>> LessThanOrEqual [{{f\d+}},{{f\d+}}]
   /// CHECK-NEXT:                     Select [{{i\d+}},{{i\d+}},<<Cond>>]
 
+  /// CHECK-START-ARM64: int Main.FloatLtNonmatCond_IntVarVar(float, float, int, int) disassembly (after)
+  /// CHECK:               LessThanOrEqual
+  /// CHECK:               Select
+  /// CHECK-NEXT:            fcmp
+  /// CHECK-NEXT:            csel le
+
   public static int FloatLtNonmatCond_IntVarVar(float a, float b, int x, int y) {
     return a > b ? x : y;
   }
@@ -211,6 +356,12 @@
   /// CHECK:            <<Cond:z\d+>> GreaterThanOrEqual [{{f\d+}},{{f\d+}}]
   /// CHECK-NEXT:                     Select [{{i\d+}},{{i\d+}},<<Cond>>]
 
+  /// CHECK-START-ARM64: int Main.FloatGtNonmatCond_IntVarVar(float, float, int, int) disassembly (after)
+  /// CHECK:               GreaterThanOrEqual
+  /// CHECK:               Select
+  /// CHECK-NEXT:            fcmp
+  /// CHECK-NEXT:            csel hs
+
   public static int FloatGtNonmatCond_IntVarVar(float a, float b, int x, int y) {
     return a < b ? x : y;
   }
@@ -219,6 +370,12 @@
   /// CHECK:            <<Cond:z\d+>> GreaterThanOrEqual [{{f\d+}},{{f\d+}}]
   /// CHECK-NEXT:                     Select [{{f\d+}},{{f\d+}},<<Cond>>]
 
+  /// CHECK-START-ARM64: float Main.FloatGtNonmatCond_FloatVarVar(float, float, float, float) disassembly (after)
+  /// CHECK:               GreaterThanOrEqual
+  /// CHECK:               Select
+  /// CHECK-NEXT:            fcmp
+  /// CHECK-NEXT:            fcsel hs
+
   public static float FloatGtNonmatCond_FloatVarVar(float a, float b, float x, float y) {
     return a < b ? x : y;
   }
@@ -228,6 +385,13 @@
   /// CHECK-NEXT:       <<Sel:i\d+>>  Select [{{i\d+}},{{i\d+}},<<Cond>>]
   /// CHECK-NEXT:                     Add [<<Cond>>,<<Sel>>]
 
+  /// CHECK-START-ARM64: int Main.FloatLtMatCond_IntVarVar(float, float, int, int) disassembly (after)
+  /// CHECK:               LessThanOrEqual
+  /// CHECK-NEXT:            fcmp
+  /// CHECK-NEXT:            cset le
+  /// CHECK:               Select
+  /// CHECK-NEXT:            csel le
+
   public static int FloatLtMatCond_IntVarVar(float a, float b, int x, int y) {
     int result = (a > b ? x : y);
     return result + (a > b ? 0 : 1);
@@ -238,6 +402,13 @@
   /// CHECK-NEXT:       <<Sel:i\d+>>  Select [{{i\d+}},{{i\d+}},<<Cond>>]
   /// CHECK-NEXT:                     Add [<<Cond>>,<<Sel>>]
 
+  /// CHECK-START-ARM64: int Main.FloatGtMatCond_IntVarVar(float, float, int, int) disassembly (after)
+  /// CHECK:               GreaterThanOrEqual
+  /// CHECK-NEXT:            fcmp
+  /// CHECK-NEXT:            cset hs
+  /// CHECK:               Select
+  /// CHECK-NEXT:            csel hs
+
   public static int FloatGtMatCond_IntVarVar(float a, float b, int x, int y) {
     int result = (a < b ? x : y);
     return result + (a < b ? 0 : 1);
@@ -248,6 +419,13 @@
   /// CHECK-NEXT:       <<Sel:f\d+>>  Select [{{f\d+}},{{f\d+}},<<Cond>>]
   /// CHECK-NEXT:                     TypeConversion [<<Cond>>]
 
+  /// CHECK-START-ARM64: float Main.FloatGtMatCond_FloatVarVar(float, float, float, float) disassembly (after)
+  /// CHECK:               GreaterThanOrEqual
+  /// CHECK-NEXT:            fcmp
+  /// CHECK-NEXT:            cset hs
+  /// CHECK:               Select
+  /// CHECK-NEXT:            fcsel hs
+
   public static float FloatGtMatCond_FloatVarVar(float a, float b, float x, float y) {
     float result = (a < b ? x : y);
     return result + (a < b ? 0 : 1);
diff --git a/test/572-checker-array-get-regression/expected.txt b/test/572-checker-array-get-regression/expected.txt
index c08d783..f7d1ad4 100644
--- a/test/572-checker-array-get-regression/expected.txt
+++ b/test/572-checker-array-get-regression/expected.txt
@@ -1 +1 @@
-1048575
+524287
diff --git a/test/572-checker-array-get-regression/info.txt b/test/572-checker-array-get-regression/info.txt
index 023d5b0..d06feee 100644
--- a/test/572-checker-array-get-regression/info.txt
+++ b/test/572-checker-array-get-regression/info.txt
@@ -1,3 +1,3 @@
 Regression test for the ARM64 Baker's read barrier fast path compiler
-instrumentatin of array loads with a large constant index, where we
+instrumentation of array loads with a large constant index, where we
 used to require too many scratch (temporary) registers.
diff --git a/test/572-checker-array-get-regression/src/Main.java b/test/572-checker-array-get-regression/src/Main.java
index 1392215..b55be70 100644
--- a/test/572-checker-array-get-regression/src/Main.java
+++ b/test/572-checker-array-get-regression/src/Main.java
@@ -20,11 +20,11 @@
     System.out.println(test().intValue());
   }
 
-  /// CHECK-START: java.lang.Integer Main.test() ssa_builder (after)
+  /// CHECK-START: java.lang.Integer Main.test() builder (after)
   /// CHECK-DAG:     <<Method:[ij]\d+>>    CurrentMethod
-  /// CHECK-DAG:     <<Const2P20:i\d+>>    IntConstant 1048576
+  /// CHECK-DAG:     <<Const2P19:i\d+>>    IntConstant 524288
   /// CHECK-DAG:     <<ConstM1:i\d+>>      IntConstant -1
-  /// CHECK-DAG:     <<Array:l\d+>>        NewArray [<<Const2P20>>,<<Method>>]
+  /// CHECK-DAG:     <<Array:l\d+>>        NewArray [<<Const2P19>>,<<Method>>]
   /// CHECK-DAG:     <<NullCheck1:l\d+>>   NullCheck [<<Array>>]
   /// CHECK-DAG:     <<Length1:i\d+>>      ArrayLength [<<NullCheck1>>]
   /// CHECK-DAG:     <<Index:i\d+>>        Add [<<Length1>>,<<ConstM1>>]
@@ -34,16 +34,17 @@
   /// CHECK-DAG:     <<LastElement:l\d+>>  ArrayGet [<<NullCheck2>>,<<BoundsCheck>>]
   /// CHECK-DAG:                           Return [<<LastElement>>]
 
+
   /// CHECK-START: java.lang.Integer Main.test() register (before)
   /// CHECK-DAG:     <<Method:[ij]\d+>>    CurrentMethod
-  /// CHECK-DAG:     <<Const2P20:i\d+>>    IntConstant 1048576
-  /// CHECK-DAG:     <<Const2P20M1:i\d+>>  IntConstant 1048575
-  /// CHECK-DAG:     <<Array:l\d+>>        NewArray [<<Const2P20>>,<<Method>>]
-  /// CHECK-DAG:     <<LastElement:l\d+>>  ArrayGet [<<Array>>,<<Const2P20M1>>]
+  /// CHECK-DAG:     <<Const2P19:i\d+>>    IntConstant 524288
+  /// CHECK-DAG:     <<Const2P19M1:i\d+>>  IntConstant 524287
+  /// CHECK-DAG:     <<Array:l\d+>>        NewArray [<<Const2P19>>,<<Method>>]
+  /// CHECK-DAG:     <<LastElement:l\d+>>  ArrayGet [<<Array>>,<<Const2P19M1>>]
   /// CHECK-DAG:                           Return [<<LastElement>>]
 
   public static Integer test() {
-    Integer[] integers = new Integer[1024 * 1024];
+    Integer[] integers = new Integer[1 << 19];
     initIntegerArray(integers);
     // Array load with a large constant index (after constant folding
     // and bounds check elimination).
diff --git a/test/573-checker-checkcast-regression/expected.txt b/test/573-checker-checkcast-regression/expected.txt
new file mode 100644
index 0000000..b8626c4
--- /dev/null
+++ b/test/573-checker-checkcast-regression/expected.txt
@@ -0,0 +1 @@
+4
diff --git a/test/573-checker-checkcast-regression/info.txt b/test/573-checker-checkcast-regression/info.txt
new file mode 100644
index 0000000..74a6d6e
--- /dev/null
+++ b/test/573-checker-checkcast-regression/info.txt
@@ -0,0 +1,4 @@
+Regression test for the x86-64 Baker's read barrier fast path compiler
+instrumentation of CheckCasts, where we used to use an
+art::x86_64::NearLabel, the range of which was sometimes too short
+with Baker's read barriers enabled.
diff --git a/test/573-checker-checkcast-regression/src/Main.java b/test/573-checker-checkcast-regression/src/Main.java
new file mode 100644
index 0000000..473a2b1
--- /dev/null
+++ b/test/573-checker-checkcast-regression/src/Main.java
@@ -0,0 +1,49 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  public static void main(String[] args) {
+    Object[] array = { new Integer(1), new Integer(2), new Integer(3) };
+    int result = test(array, 0, 2);
+    System.out.println(result);
+  }
+
+  // This test method uses two integers (`index1` and `index2`) to
+  // force the register allocator to use some high registers (R8-R15)
+  // on x86-64 in the code generated for the first CheckCast (which
+  // converts `new_array` to an `Object[]`), so as to produce code
+  // containing a conditional jump whose offset does not fit in a
+  // NearLabel when using Baker's read barrier fast path (because
+  // x86-64 instructions using these high registers have a larger
+  // encoding).
+  //
+  // The intent of this artifical constraint is to ensure the initial
+  // failure is properly tested by this regression test.
+
+  /// CHECK-START: int Main.test(java.lang.Object, int, int) register (after)
+  /// CHECK-DAG:     CheckCast check_kind:array_object_check
+  /// CHECK-DAG:     CheckCast check_kind:exact_check
+  /// CHECK-DAG:     CheckCast check_kind:exact_check
+
+  static public int test(Object new_array, int index1, int index2) {
+    Object[] objectArray = (Object[]) new_array;
+    Integer integer1 = (Integer) objectArray[index1];
+    Integer integer2 = (Integer) objectArray[index2];
+    return integer1.intValue() + integer2.intValue();
+  }
+
+}
diff --git a/test/574-irreducible-and-constant-area/expected.txt b/test/574-irreducible-and-constant-area/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/574-irreducible-and-constant-area/expected.txt
diff --git a/test/574-irreducible-and-constant-area/info.txt b/test/574-irreducible-and-constant-area/info.txt
new file mode 100644
index 0000000..e957a5a
--- /dev/null
+++ b/test/574-irreducible-and-constant-area/info.txt
@@ -0,0 +1,3 @@
+Regression test for intrinsics on x86, which used to wrongly assume
+a HInvokeStaticOrDirect must have a special input (does not apply for irreducible
+loops).
diff --git a/test/574-irreducible-and-constant-area/run b/test/574-irreducible-and-constant-area/run
new file mode 100755
index 0000000..ffdbcc9
--- /dev/null
+++ b/test/574-irreducible-and-constant-area/run
@@ -0,0 +1,18 @@
+#!/bin/bash
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Don't do relocation, as this affects this test.
+exec ${RUN} "$@" --no-relocate
diff --git a/test/574-irreducible-and-constant-area/smali/IrreducibleLoop.smali b/test/574-irreducible-and-constant-area/smali/IrreducibleLoop.smali
new file mode 100644
index 0000000..d7d4346
--- /dev/null
+++ b/test/574-irreducible-and-constant-area/smali/IrreducibleLoop.smali
@@ -0,0 +1,35 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LIrreducibleLoop;
+
+.super Ljava/lang/Object;
+
+.method public static simpleLoop(I)I
+   .registers 5
+   const/16 v0, 42
+   const/16 v1, 42
+   const-wide/high16 v2, 0x4000000000000000L
+   if-eq p0, v0, :other_loop_entry
+   :loop_entry
+   invoke-static {v1, v1}, LMain;->$inline$foo(FF)V
+   invoke-static {v2, v3, v2, v3}, LMain;->$inline$foo(DD)V
+   if-ne p0, v0, :exit
+   add-int v0, v0, v0
+   :other_loop_entry
+   add-int v0, v0, v0
+   goto :loop_entry
+   :exit
+   return v0
+.end method
diff --git a/test/574-irreducible-and-constant-area/src/Main.java b/test/574-irreducible-and-constant-area/src/Main.java
new file mode 100644
index 0000000..3cdd924
--- /dev/null
+++ b/test/574-irreducible-and-constant-area/src/Main.java
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+
+public class Main {
+  // Workaround for b/18051191.
+  class InnerClass {}
+
+  public static void main(String[] args) throws Exception {
+    Class<?> c = Class.forName("IrreducibleLoop");
+    Method m = c.getMethod("simpleLoop", int.class);
+    Object[] arguments = { 42 };
+    m.invoke(null, arguments);
+  }
+
+  public static void $inline$foo(float a, float b) {
+    Math.abs(a);
+    Math.max(a, b);
+    Math.min(a, b);
+  }
+
+  public static void $inline$foo(double a, double b) {
+    Math.abs(a);
+    Math.max(a, b);
+    Math.min(a, b);
+  }
+}
diff --git a/test/800-smali/expected.txt b/test/800-smali/expected.txt
index 73ce307..8808a50 100644
--- a/test/800-smali/expected.txt
+++ b/test/800-smali/expected.txt
@@ -58,4 +58,6 @@
 b/26594149 (6)
 b/26594149 (7)
 b/26594149 (8)
+b/27148248
+b/26965384
 Done!
diff --git a/test/800-smali/smali/b_26965384.smali b/test/800-smali/smali/b_26965384.smali
new file mode 100644
index 0000000..47ed418
--- /dev/null
+++ b/test/800-smali/smali/b_26965384.smali
@@ -0,0 +1,20 @@
+.class public LB26965384;
+.super LB26965384Super;
+
+.method public constructor <init>()V
+    .locals 1
+    const v0, 0
+    iput v0, p0, LB26965384;->a:I
+    invoke-direct {p0}, LB26965384Super;-><init>()V
+    return-void
+.end method
+
+
+# Just by loading this class we should fail. It doesn't really matter what's in
+# this method.
+.method public static run()V
+    .registers 4
+    new-instance v0, LB26965384;
+    invoke-direct {v0}, LB26965384;-><init>()V
+    return-void
+.end method
diff --git a/test/800-smali/smali/b_26965384Super.smali b/test/800-smali/smali/b_26965384Super.smali
new file mode 100644
index 0000000..32faea7
--- /dev/null
+++ b/test/800-smali/smali/b_26965384Super.smali
@@ -0,0 +1,10 @@
+.class public LB26965384Super;
+.super Ljava/lang/Object;
+
+.field public a:I
+
+.method public constructor <init>()V
+    .locals 0
+    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+    return-void
+.end method
diff --git a/test/800-smali/smali/b_27148248.smali b/test/800-smali/smali/b_27148248.smali
new file mode 100644
index 0000000..4601cc6
--- /dev/null
+++ b/test/800-smali/smali/b_27148248.smali
@@ -0,0 +1,27 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LB27148248;
+
+# Regression for dex2oatd crash during compilation of method which
+# used to throw with argument of non-reference type.
+
+.super Ljava/lang/Object;
+
+.method public static run()V
+   .registers 1
+   const v0, 0xbad
+   throw v0
+.end method
+
diff --git a/test/800-smali/src/Main.java b/test/800-smali/src/Main.java
index b0eff5d..4e6de46 100644
--- a/test/800-smali/src/Main.java
+++ b/test/800-smali/src/Main.java
@@ -160,6 +160,10 @@
                 null));
         testCases.add(new TestCase("b/26594149 (8)", "B26594149_8", "run", null, new VerifyError(),
                 null));
+        testCases.add(new TestCase("b/27148248", "B27148248", "run", null, new VerifyError(),
+                null));
+        testCases.add(new TestCase("b/26965384", "B26965384", "run", null, new VerifyError(),
+                null));
     }
 
     public void runTests() {
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index 9dcd4dc..364be59 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -42,8 +42,7 @@
 
 ifeq ($(ANDROID_COMPILE_WITH_JACK),true)
   TEST_ART_RUN_TEST_DEPENDENCIES += \
-    $(JACK) \
-    $(JILL_JAR)
+    $(JACK)
   TEST_ART_RUN_TEST_ORDERONLY_DEPENDENCIES += setup-jack-server
 endif
 
@@ -72,8 +71,8 @@
 	  DXMERGER=$(abspath $(HOST_OUT_EXECUTABLES)/dexmerger) \
 	  JACK_VERSION=$(JACK_DEFAULT_VERSION) \
 	  JACK=$(abspath $(JACK)) \
+	  JACK_VERSION=$(JACK_DEFAULT_VERSION) \
 	  JACK_CLASSPATH=$(TARGET_JACK_CLASSPATH) \
-	  JILL_JAR=$(abspath $(JILL_JAR)) \
 	  $(LOCAL_PATH)/run-test $$(PRIVATE_RUN_TEST_OPTIONS) --output-path $$(abspath $$(dir $$@)) $(1)
 	$(hide) touch $$@
 
@@ -446,9 +445,7 @@
 # Known broken tests for the JIT.
 # CFI unwinding expects managed frames, and the test does not iterate enough to even compile. JIT
 # also uses Generic JNI instead of the JNI compiler.
-# 570 is disabled while investigating osr flakiness.
 TEST_ART_BROKEN_JIT_RUN_TESTS := \
-  570-checker-osr \
   137-cfi
 
 ifneq (,$(filter jit,$(COMPILER_TYPES)))
@@ -964,8 +961,8 @@
 	    DXMERGER=$(abspath $(HOST_OUT_EXECUTABLES)/dexmerger) \
 	    JACK_VERSION=$(JACK_DEFAULT_VERSION) \
 	    JACK=$(abspath $(JACK)) \
+	    JACK_VERSION=$(JACK_DEFAULT_VERSION) \
 	    JACK_CLASSPATH=$$(PRIVATE_JACK_CLASSPATH) \
-	    JILL_JAR=$(abspath $(JILL_JAR)) \
 	    art/test/run-test $$(PRIVATE_RUN_TEST_OPTIONS) $(12) \
 	      && $$(call ART_TEST_PASSED,$$@) || $$(call ART_TEST_FAILED,$$@)
 	$$(hide) (echo $(MAKECMDGOALS) | grep -q $$@ && \
diff --git a/test/etc/run-test-jar b/test/etc/run-test-jar
index 8245ccf..2db1e6c 100755
--- a/test/etc/run-test-jar
+++ b/test/etc/run-test-jar
@@ -406,6 +406,9 @@
   TMP_DIR_OPTION="-Djava.io.tmpdir=/data/local/tmp"
 fi
 
+# We set DumpNativeStackOnSigQuit to false to avoid stressing libunwind.
+# b/27185632
+# b/24664297
 dalvikvm_cmdline="$INVOKE_WITH $GDB $ANDROID_ROOT/bin/$DALVIKVM \
                   $GDB_ARGS \
                   $FLAGS \
@@ -420,6 +423,7 @@
                   $DEBUGGER_OPTS \
                   $DALVIKVM_BOOT_OPT \
                   $TMP_DIR_OPTION \
+                  -XX:DumpNativeStackOnSigQuit:false \
                   -cp $DEX_LOCATION/$TEST_NAME.jar$SECONDARY_DEX $MAIN $ARGS"
 
 # Remove whitespace.
diff --git a/test/run-test b/test/run-test
index faa597e..f1875d7 100755
--- a/test/run-test
+++ b/test/run-test
@@ -88,13 +88,7 @@
   export JACK_CLASSPATH="${OUT_DIR:-$ANDROID_BUILD_TOP/out}/host/common/obj/JAVA_LIBRARIES/core-libart-hostdex_intermediates/classes.jack:${OUT_DIR:-$ANDROID_BUILD_TOP/out}/host/common/obj/JAVA_LIBRARIES/core-oj-hostdex_intermediates/classes.jack"
 fi
 
-# If JILL_JAR is not set, assume it is located in the prebuilts directory.
-if [ -z "$JILL_JAR" ]; then
-  export JILL_JAR="$ANDROID_BUILD_TOP/prebuilts/sdk/tools/jill.jar"
-fi
-
 export JACK="$JACK -g -cp $JACK_CLASSPATH"
-export JILL="java -jar $JILL_JAR"
 
 info="info.txt"
 build="build"
diff --git a/tools/ahat/README.txt b/tools/ahat/README.txt
index da5225c..d9b26bc 100644
--- a/tools/ahat/README.txt
+++ b/tools/ahat/README.txt
@@ -78,6 +78,7 @@
 
 Release History:
  0.4 Pending
+   Annotate char[] objects with their string values.
    Show registered native allocations for heap dumps that support it.
 
  0.3 Dec 15, 2015
diff --git a/tools/ahat/src/InstanceUtils.java b/tools/ahat/src/InstanceUtils.java
index 8b7f9ea..d7b64e2 100644
--- a/tools/ahat/src/InstanceUtils.java
+++ b/tools/ahat/src/InstanceUtils.java
@@ -76,11 +76,15 @@
    * If maxChars is negative, the returned string is not truncated.
    */
   public static String asString(Instance inst, int maxChars) {
-    if (!isInstanceOfClass(inst, "java.lang.String")) {
-      return null;
+    // The inst object could either be a java.lang.String or a char[]. If it
+    // is a char[], use that directly as the value, otherwise use the value
+    // field of the string object. The field accesses for count and offset
+    // later on will work okay regardless of what type the inst object is.
+    Object value = inst;
+    if (isInstanceOfClass(inst, "java.lang.String")) {
+      value = getField(inst, "value");
     }
 
-    Object value = getField(inst, "value");
     if (!(value instanceof ArrayInstance)) {
       return null;
     }
diff --git a/tools/ahat/test-dump/Main.java b/tools/ahat/test-dump/Main.java
index 701d60e..d61a98d 100644
--- a/tools/ahat/test-dump/Main.java
+++ b/tools/ahat/test-dump/Main.java
@@ -35,6 +35,7 @@
   // class and reading the desired field.
   public static class DumpedStuff {
     public String basicString = "hello, world";
+    public char[] charArray = "char thing".toCharArray();
     public String nullString = null;
     public Object anObject = new Object();
     public ReferenceQueue<Object> referenceQueue = new ReferenceQueue<Object>();
diff --git a/tools/ahat/test/InstanceUtilsTest.java b/tools/ahat/test/InstanceUtilsTest.java
index 32f48ce..59b1c90 100644
--- a/tools/ahat/test/InstanceUtilsTest.java
+++ b/tools/ahat/test/InstanceUtilsTest.java
@@ -32,6 +32,13 @@
   }
 
   @Test
+  public void asStringCharArray() throws IOException {
+    TestDump dump = TestDump.getTestDump();
+    Instance str = (Instance)dump.getDumpedThing("charArray");
+    assertEquals("char thing", InstanceUtils.asString(str));
+  }
+
+  @Test
   public void asStringTruncated() throws IOException {
     TestDump dump = TestDump.getTestDump();
     Instance str = (Instance)dump.getDumpedThing("basicString");
@@ -39,6 +46,13 @@
   }
 
   @Test
+  public void asStringCharArrayTruncated() throws IOException {
+    TestDump dump = TestDump.getTestDump();
+    Instance str = (Instance)dump.getDumpedThing("charArray");
+    assertEquals("char ", InstanceUtils.asString(str, 5));
+  }
+
+  @Test
   public void asStringExactMax() throws IOException {
     TestDump dump = TestDump.getTestDump();
     Instance str = (Instance)dump.getDumpedThing("basicString");
@@ -46,6 +60,13 @@
   }
 
   @Test
+  public void asStringCharArrayExactMax() throws IOException {
+    TestDump dump = TestDump.getTestDump();
+    Instance str = (Instance)dump.getDumpedThing("charArray");
+    assertEquals("char thing", InstanceUtils.asString(str, 10));
+  }
+
+  @Test
   public void asStringNotTruncated() throws IOException {
     TestDump dump = TestDump.getTestDump();
     Instance str = (Instance)dump.getDumpedThing("basicString");
@@ -53,6 +74,13 @@
   }
 
   @Test
+  public void asStringCharArrayNotTruncated() throws IOException {
+    TestDump dump = TestDump.getTestDump();
+    Instance str = (Instance)dump.getDumpedThing("charArray");
+    assertEquals("char thing", InstanceUtils.asString(str, 50));
+  }
+
+  @Test
   public void asStringNegativeMax() throws IOException {
     TestDump dump = TestDump.getTestDump();
     Instance str = (Instance)dump.getDumpedThing("basicString");
@@ -60,6 +88,13 @@
   }
 
   @Test
+  public void asStringCharArrayNegativeMax() throws IOException {
+    TestDump dump = TestDump.getTestDump();
+    Instance str = (Instance)dump.getDumpedThing("charArray");
+    assertEquals("char thing", InstanceUtils.asString(str, -3));
+  }
+
+  @Test
   public void asStringNull() throws IOException {
     TestDump dump = TestDump.getTestDump();
     Instance obj = (Instance)dump.getDumpedThing("nullString");
diff --git a/tools/buildbot-build.sh b/tools/buildbot-build.sh
index 9e02ce2..2eb52bc 100755
--- a/tools/buildbot-build.sh
+++ b/tools/buildbot-build.sh
@@ -21,7 +21,7 @@
 
 out_dir=${OUT_DIR-out}
 java_libraries_dir=${out_dir}/target/common/obj/JAVA_LIBRARIES
-common_targets="vogar ${java_libraries_dir}/core-tests_intermediates/javalib.jar apache-harmony-jdwp-tests-hostdex ${java_libraries_dir}/jsr166-tests_intermediates/javalib.jar ${out_dir}/host/linux-x86/bin/jack"
+common_targets="vogar core-tests apache-harmony-jdwp-tests-hostdex jsr166-tests ${out_dir}/host/linux-x86/bin/jack"
 mode="target"
 j_arg="-j$(nproc)"
 showcommands=
diff --git a/tools/run-libcore-tests.sh b/tools/run-libcore-tests.sh
index f346239..45fb4b4d 100755
--- a/tools/run-libcore-tests.sh
+++ b/tools/run-libcore-tests.sh
@@ -20,13 +20,13 @@
 fi
 
 # Jar containing jsr166 tests.
-jsr166_test_jar=${OUT_DIR-out}/target/common/obj/JAVA_LIBRARIES/jsr166-tests_intermediates/javalib.jar
+jsr166_test_jack=${OUT_DIR-out}/target/common/obj/JAVA_LIBRARIES/jsr166-tests_intermediates/classes.jack
 
 # Jar containing all the other tests.
-test_jar=${OUT_DIR-out}/target/common/obj/JAVA_LIBRARIES/core-tests_intermediates/javalib.jar
+test_jack=${OUT_DIR-out}/target/common/obj/JAVA_LIBRARIES/core-tests_intermediates/classes.jack
 
 
-if [ ! -f $test_jar ]; then
+if [ ! -f $test_jack ]; then
   echo "Before running, you must build core-tests, jsr166-tests and vogar: \
         make core-tests jsr166-tests vogar vogar.jar"
   exit 1
@@ -108,7 +108,11 @@
 # the default timeout.
 vogar_args="$vogar_args --timeout 480"
 
+# Use Jack with "1.8" configuration.
+export JACK_VERSION=`basename prebuilts/sdk/tools/jacks/*ALPHA* | sed 's/^jack-//' | sed 's/.jar$//'`
+vogar_args="$vogar_args --toolchain jack --language JN"
+
 # Run the tests using vogar.
 echo "Running tests for the following test packages:"
 echo ${working_packages[@]} | tr " " "\n"
-vogar $vogar_args --vm-arg -Xusejit:true $expectations --classpath $jsr166_test_jar --classpath $test_jar ${working_packages[@]}
+vogar $vogar_args --vm-arg -Xusejit:true $expectations --classpath $jsr166_test_jack --classpath $test_jack ${working_packages[@]}