Merge "[optimizing] Tune some x86_64 moves"
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index 5a3236d..730e61d 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -26,6 +26,7 @@
   AllFields \
   ExceptionHandle \
   GetMethodSignature \
+  Instrumentation \
   Interfaces \
   Main \
   MultiDex \
@@ -64,6 +65,7 @@
 ART_GTEST_compiler_driver_test_DEX_DEPS := AbstractMethod StaticLeafMethods
 ART_GTEST_dex_file_test_DEX_DEPS := GetMethodSignature Main Nested
 ART_GTEST_exception_test_DEX_DEPS := ExceptionHandle
+ART_GTEST_instrumentation_test_DEX_DEPS := Instrumentation
 ART_GTEST_jni_compiler_test_DEX_DEPS := MyClassNatives
 ART_GTEST_jni_internal_test_DEX_DEPS := AllFields StaticLeafMethods
 ART_GTEST_oat_file_assistant_test_DEX_DEPS := Main MainStripped MultiDex Nested
@@ -157,6 +159,7 @@
   runtime/handle_scope_test.cc \
   runtime/indenter_test.cc \
   runtime/indirect_reference_table_test.cc \
+  runtime/instrumentation_test.cc \
   runtime/intern_table_test.cc \
   runtime/interpreter/safe_math_test.cc \
   runtime/java_vm_ext_test.cc \
diff --git a/compiler/dex/compiler_enums.h b/compiler/dex/compiler_enums.h
index 0acdd42..b78b3d7 100644
--- a/compiler/dex/compiler_enums.h
+++ b/compiler/dex/compiler_enums.h
@@ -172,7 +172,6 @@
   kMirOpRangeCheck,
   kMirOpDivZeroCheck,
   kMirOpCheck,
-  kMirOpCheckPart2,
   kMirOpSelect,
 
   // Vector opcodes:
diff --git a/compiler/dex/gvn_dead_code_elimination.cc b/compiler/dex/gvn_dead_code_elimination.cc
index 4f0e9d1..a4319c3 100644
--- a/compiler/dex/gvn_dead_code_elimination.cc
+++ b/compiler/dex/gvn_dead_code_elimination.cc
@@ -533,7 +533,7 @@
 
   // Just before we kill mir_to_kill, we need to replace the previous SSA reg assigned to the
   // same dalvik reg to keep consistency with subsequent instructions. However, if there's no
-  // defining MIR for that dalvik reg, the preserved valus must come from its predecessors
+  // defining MIR for that dalvik reg, the preserved values must come from its predecessors
   // and we need to create a new Phi (a degenerate Phi if there's only a single predecessor).
   if (def_change == kNPos) {
     if (wide) {
@@ -541,7 +541,21 @@
       DCHECK_EQ(mir_graph_->SRegToVReg(new_s_reg) + 1, mir_graph_->SRegToVReg(new_s_reg + 1));
       CreatePhi(new_s_reg + 1);  // High word Phi.
     }
-    return CreatePhi(new_s_reg);
+    MIR* phi = CreatePhi(new_s_reg);
+    // If this is a degenerate Phi with all inputs being the same SSA reg, we need to its uses.
+    DCHECK_NE(phi->ssa_rep->num_uses, 0u);
+    int old_s_reg = phi->ssa_rep->uses[0];
+    bool all_same = true;
+    for (size_t i = 1u, num = phi->ssa_rep->num_uses; i != num; ++i) {
+      if (phi->ssa_rep->uses[i] != old_s_reg) {
+        all_same = false;
+        break;
+      }
+    }
+    if (all_same) {
+      vreg_chains_.RenameSRegUses(0u, last_change, old_s_reg, new_s_reg, wide);
+    }
+    return phi;
   } else {
     DCHECK_LT(def_change, last_change);
     DCHECK_LE(last_change, vreg_chains_.NumMIRs());
diff --git a/compiler/dex/gvn_dead_code_elimination_test.cc b/compiler/dex/gvn_dead_code_elimination_test.cc
index f9f0882..efb32bb 100644
--- a/compiler/dex/gvn_dead_code_elimination_test.cc
+++ b/compiler/dex/gvn_dead_code_elimination_test.cc
@@ -1629,6 +1629,52 @@
 }
 
 TEST_F(GvnDeadCodeEliminationTestDiamond, CreatePhi2) {
+  static const MIRDef mirs[] = {
+      DEF_CONST(3, Instruction::CONST, 0u, 1000),
+      DEF_MOVE(4, Instruction::MOVE, 1u, 0u),
+      DEF_CONST(4, Instruction::CONST, 2u, 1000),
+  };
+
+  static const int32_t sreg_to_vreg_map[] = { 0, 1, 0 };
+  PrepareSRegToVRegMap(sreg_to_vreg_map);
+
+  PrepareMIRs(mirs);
+  PerformGVN_DCE();
+
+  ASSERT_EQ(arraysize(mirs), value_names_.size());
+  EXPECT_EQ(value_names_[0], value_names_[1]);
+  EXPECT_EQ(value_names_[0], value_names_[2]);
+
+  static const bool eliminated[] = {
+      false, false, true,
+  };
+  static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
+  for (size_t i = 0; i != arraysize(eliminated); ++i) {
+    bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
+    EXPECT_EQ(eliminated[i], actually_eliminated) << i;
+  }
+  // Check that we've created a single-input Phi to replace the CONST 3u.
+  BasicBlock* bb4 = cu_.mir_graph->GetBasicBlock(4);
+  MIR* phi = bb4->first_mir_insn;
+  ASSERT_TRUE(phi != nullptr);
+  ASSERT_EQ(kMirOpPhi, static_cast<int>(phi->dalvikInsn.opcode));
+  ASSERT_EQ(1, phi->ssa_rep->num_uses);
+  EXPECT_EQ(0, phi->ssa_rep->uses[0]);
+  ASSERT_EQ(1, phi->ssa_rep->num_defs);
+  EXPECT_EQ(2, phi->ssa_rep->defs[0]);
+  EXPECT_EQ(0u, phi->dalvikInsn.vA);
+  MIR* move = phi->next;
+  ASSERT_TRUE(move != nullptr);
+  ASSERT_EQ(Instruction::MOVE, move->dalvikInsn.opcode);
+  ASSERT_EQ(1, move->ssa_rep->num_uses);
+  EXPECT_EQ(2, move->ssa_rep->uses[0]);
+  ASSERT_EQ(1, move->ssa_rep->num_defs);
+  EXPECT_EQ(1, move->ssa_rep->defs[0]);
+  EXPECT_EQ(1u, move->dalvikInsn.vA);
+  EXPECT_EQ(0u, move->dalvikInsn.vB);
+}
+
+TEST_F(GvnDeadCodeEliminationTestDiamond, CreatePhi3) {
   static const IFieldDef ifields[] = {
       { 0u, 1u, 0u, false, kDexMemAccessWord },
   };
diff --git a/compiler/dex/mir_dataflow.cc b/compiler/dex/mir_dataflow.cc
index b4aec98..a7ba061 100644
--- a/compiler/dex/mir_dataflow.cc
+++ b/compiler/dex/mir_dataflow.cc
@@ -834,9 +834,6 @@
   // 10B MIR_CHECK
   0,
 
-  // 10C MIR_CHECKPART2
-  0,
-
   // 10D MIR_SELECT
   DF_DA | DF_UB,
 
diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc
index 9e3fbbc..1871f07 100644
--- a/compiler/dex/mir_graph.cc
+++ b/compiler/dex/mir_graph.cc
@@ -52,8 +52,7 @@
   "OpNullCheck",
   "OpRangeCheck",
   "OpDivZeroCheck",
-  "Check1",
-  "Check2",
+  "Check",
   "Select",
   "ConstVector",
   "MoveVector",
@@ -1508,7 +1507,7 @@
   Instruction::Format dalvik_format = Instruction::k10x;  // Default to no-operand format.
 
   // Handle special cases that recover the original dalvik instruction.
-  if ((opcode == kMirOpCheck) || (opcode == kMirOpCheckPart2)) {
+  if (opcode == kMirOpCheck) {
     str.append(extended_mir_op_names_[opcode - kMirOpFirst]);
     str.append(": ");
     // Recover the original Dex instruction.
@@ -2517,8 +2516,6 @@
       return Instruction::kContinue | Instruction::kThrow;
     case kMirOpCheck:
       return Instruction::kContinue | Instruction::kThrow;
-    case kMirOpCheckPart2:
-      return Instruction::kContinue;
     case kMirOpSelect:
       return Instruction::kContinue;
     case kMirOpConstVector:
diff --git a/compiler/dex/mir_method_info.cc b/compiler/dex/mir_method_info.cc
index 5654604..94be1fd 100644
--- a/compiler/dex/mir_method_info.cc
+++ b/compiler/dex/mir_method_info.cc
@@ -169,7 +169,8 @@
         ~(kFlagFastPath | kFlagIsIntrinsic | kFlagIsSpecial | kFlagClassIsInitialized |
             (kInvokeTypeMask << kBitSharpTypeBegin));
     it->flags_ = other_flags |
-        (fast_path_flags != 0 ? kFlagFastPath : 0u) |
+        // String init path is a special always-fast path.
+        (fast_path_flags != 0 || string_init ? kFlagFastPath : 0u) |
         ((is_intrinsic_or_special & kInlineIntrinsic) != 0 ? kFlagIsIntrinsic : 0u) |
         ((is_intrinsic_or_special & kInlineSpecial) != 0 ? kFlagIsSpecial : 0u) |
         (static_cast<uint16_t>(invoke_type) << kBitSharpTypeBegin) |
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index fb68335..86bb69d 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -1391,22 +1391,6 @@
       }
     }
   }
-  if (bb->block_type != kEntryBlock && bb->first_mir_insn != nullptr &&
-      static_cast<int>(bb->first_mir_insn->dalvikInsn.opcode) == kMirOpCheckPart2) {
-    // In Mir2Lir::MethodBlockCodeGen() we have artificially moved the throwing
-    // instruction to the previous block. However, the MIRGraph data used above
-    // doesn't reflect that, so we still need to process that MIR insn here.
-    MIR* mir = nullptr;
-    BasicBlock* pred_bb = bb;
-    // Traverse empty blocks.
-    while (mir == nullptr && pred_bb->predecessors.size() == 1u) {
-      pred_bb = mir_graph_->GetBasicBlock(bb->predecessors[0]);
-      DCHECK(pred_bb != nullptr);
-      mir = pred_bb->last_mir_insn;
-    }
-    DCHECK(mir != nullptr);
-    UpdateReferenceVRegsLocal(nullptr, mir, references);
-  }
 }
 
 bool Mir2Lir::UpdateReferenceVRegsLocal(MIR* mir, MIR* prev_mir, BitVector* references) {
diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc
index e9e9161..e3e87ec 100644
--- a/compiler/dex/quick/mir_to_lir.cc
+++ b/compiler/dex/quick/mir_to_lir.cc
@@ -1187,7 +1187,6 @@
     case kMirOpRangeCheck:
     case kMirOpDivZeroCheck:
     case kMirOpCheck:
-    case kMirOpCheckPart2:
       // Ignore these known opcodes
       break;
     default:
@@ -1276,20 +1275,6 @@
       head_lir->u.m.def_mask = &kEncodeAll;
     }
 
-    if (opcode == kMirOpCheck) {
-      // Combine check and work halves of throwing instruction.
-      MIR* work_half = mir->meta.throw_insn;
-      mir->dalvikInsn = work_half->dalvikInsn;
-      mir->optimization_flags = work_half->optimization_flags;
-      mir->meta = work_half->meta;  // Whatever the work_half had, we need to copy it.
-      opcode = work_half->dalvikInsn.opcode;
-      SSARepresentation* ssa_rep = work_half->ssa_rep;
-      work_half->ssa_rep = mir->ssa_rep;
-      mir->ssa_rep = ssa_rep;
-      work_half->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpCheckPart2);
-      work_half->meta.throw_insn = mir;
-    }
-
     if (MIR::DecodedInstruction::IsPseudoMirOp(opcode)) {
       HandleExtendedMethodMIR(bb, mir);
       continue;
diff --git a/compiler/dex/quick/quick_compiler.cc b/compiler/dex/quick/quick_compiler.cc
index 73cfe92..7ca4382 100644
--- a/compiler/dex/quick/quick_compiler.cc
+++ b/compiler/dex/quick/quick_compiler.cc
@@ -403,7 +403,6 @@
     kMirOpRangeCheck,
     kMirOpDivZeroCheck,
     kMirOpCheck,
-    kMirOpCheckPart2,
     kMirOpSelect,
 };
 
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc
index 92fa6db..b2b5496 100644
--- a/compiler/optimizing/bounds_check_elimination.cc
+++ b/compiler/optimizing/bounds_check_elimination.cc
@@ -281,15 +281,22 @@
     return false;
   }
 
+  static bool DominatesAllBackEdges(HBasicBlock* block, HLoopInformation* loop_info) {
+    for (size_t i = 0, e = loop_info->GetBackEdges().Size(); i < e; ++i) {
+      HBasicBlock* back_edge = loop_info->GetBackEdges().Get(i);
+      if (!block->Dominates(back_edge)) {
+        return false;
+      }
+    }
+    return true;
+  }
+
   void Run() {
     HLoopInformation* loop_info = induction_variable_->GetBlock()->GetLoopInformation();
-    // Must be simplified loop.
-    DCHECK_EQ(loop_info->GetBackEdges().Size(), 1U);
     for (HBlocksInLoopIterator it_loop(*loop_info); !it_loop.Done(); it_loop.Advance()) {
       HBasicBlock* block = it_loop.Current();
       DCHECK(block->IsInLoop());
-      HBasicBlock* back_edge = loop_info->GetBackEdges().Get(0);
-      if (!block->Dominates(back_edge)) {
+      if (!DominatesAllBackEdges(block, loop_info)) {
         // In order not to trigger deoptimization unnecessarily, make sure
         // that all array accesses collected are really executed in the loop.
         // For array accesses in a branch inside the loop, don't collect the
@@ -1151,9 +1158,26 @@
     bounds_check->GetBlock()->RemoveInstruction(bounds_check);
   }
 
+  static bool HasSameInputAtBackEdges(HPhi* phi) {
+    DCHECK(phi->IsLoopHeaderPhi());
+    // Start with input 1. Input 0 is from the incoming block.
+    HInstruction* input1 = phi->InputAt(1);
+    DCHECK(phi->GetBlock()->GetLoopInformation()->IsBackEdge(
+        *phi->GetBlock()->GetPredecessors().Get(1)));
+    for (size_t i = 2, e = phi->InputCount(); i < e; ++i) {
+      DCHECK(phi->GetBlock()->GetLoopInformation()->IsBackEdge(
+          *phi->GetBlock()->GetPredecessors().Get(i)));
+      if (input1 != phi->InputAt(i)) {
+        return false;
+      }
+    }
+    return true;
+  }
+
   void VisitPhi(HPhi* phi) {
-    if (phi->IsLoopHeaderPhi() && phi->GetType() == Primitive::kPrimInt) {
-      DCHECK_EQ(phi->InputCount(), 2U);
+    if (phi->IsLoopHeaderPhi()
+        && (phi->GetType() == Primitive::kPrimInt)
+        && HasSameInputAtBackEdges(phi)) {
       HInstruction* instruction = phi->InputAt(1);
       HInstruction *left;
       int32_t increment;
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index 0f44af0..a5c6f23 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -282,7 +282,10 @@
 
   // To avoid splitting blocks, we compute ahead of time the instructions that
   // start a new block, and create these blocks.
-  ComputeBranchTargets(code_ptr, code_end, &number_of_branches);
+  if (!ComputeBranchTargets(code_ptr, code_end, &number_of_branches)) {
+    MaybeRecordStat(MethodCompilationStat::kNotCompiledBranchOutsideMethodCode);
+    return false;
+  }
 
   // Note that the compiler driver is null when unit testing.
   if ((compiler_driver_ != nullptr) && SkipCompilation(code_item, number_of_branches)) {
@@ -349,7 +352,7 @@
   current_block_ = block;
 }
 
-void HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr,
+bool HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr,
                                          const uint16_t* code_end,
                                          size_t* number_of_branches) {
   branch_targets_.SetSize(code_end - code_ptr);
@@ -374,7 +377,14 @@
       }
       dex_pc += instruction.SizeInCodeUnits();
       code_ptr += instruction.SizeInCodeUnits();
-      if ((code_ptr < code_end) && (FindBlockStartingAt(dex_pc) == nullptr)) {
+
+      if (code_ptr >= code_end) {
+        if (instruction.CanFlowThrough()) {
+          // In the normal case we should never hit this but someone can artificially forge a dex
+          // file to fall-through out the method code. In this case we bail out compilation.
+          return false;
+        }
+      } else if (FindBlockStartingAt(dex_pc) == nullptr) {
         block = new (arena_) HBasicBlock(graph_, dex_pc);
         branch_targets_.Put(dex_pc, block);
       }
@@ -406,7 +416,12 @@
       // Fall-through. Add a block if there is more code afterwards.
       dex_pc += instruction.SizeInCodeUnits();
       code_ptr += instruction.SizeInCodeUnits();
-      if ((code_ptr < code_end) && (FindBlockStartingAt(dex_pc) == nullptr)) {
+      if (code_ptr >= code_end) {
+        // In the normal case we should never hit this but someone can artificially forge a dex
+        // file to fall-through out the method code. In this case we bail out compilation.
+        // (A switch can fall-through so we don't need to check CanFlowThrough().)
+        return false;
+      } else if (FindBlockStartingAt(dex_pc) == nullptr) {
         block = new (arena_) HBasicBlock(graph_, dex_pc);
         branch_targets_.Put(dex_pc, block);
       }
@@ -415,6 +430,7 @@
       dex_pc += instruction.SizeInCodeUnits();
     }
   }
+  return true;
 }
 
 HBasicBlock* HGraphBuilder::FindBlockStartingAt(int32_t index) const {
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index dc6d97e..36503ce 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -88,7 +88,10 @@
   // the newly created blocks.
   // As a side effect, also compute the number of dex instructions, blocks, and
   // branches.
-  void ComputeBranchTargets(const uint16_t* start,
+  // Returns true if all the branches fall inside the method code, false otherwise.
+  // (In normal cases this should always return true but someone can artificially
+  // create a code unit in which branches fall-through out of it).
+  bool ComputeBranchTargets(const uint16_t* start,
                             const uint16_t* end,
                             size_t* number_of_branches);
   void MaybeUpdateCurrentBlock(size_t index);
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index e4c37de..f56e446 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -112,6 +112,10 @@
     return &return_label_;
   }
 
+  HBasicBlock* GetSuccessor() const {
+    return successor_;
+  }
+
  private:
   HSuspendCheck* const instruction_;
   // If not null, the block to branch to after the suspend check.
@@ -3539,8 +3543,18 @@
 void InstructionCodeGeneratorARM::GenerateSuspendCheck(HSuspendCheck* instruction,
                                                        HBasicBlock* successor) {
   SuspendCheckSlowPathARM* slow_path =
-      new (GetGraph()->GetArena()) SuspendCheckSlowPathARM(instruction, successor);
-  codegen_->AddSlowPath(slow_path);
+      down_cast<SuspendCheckSlowPathARM*>(instruction->GetSlowPath());
+  if (slow_path == nullptr) {
+    slow_path = new (GetGraph()->GetArena()) SuspendCheckSlowPathARM(instruction, successor);
+    instruction->SetSlowPath(slow_path);
+    codegen_->AddSlowPath(slow_path);
+    if (successor != nullptr) {
+      DCHECK(successor->IsLoopHeader());
+      codegen_->ClearSpillSlotsFromLoopPhisInStackMap(instruction);
+    }
+  } else {
+    DCHECK_EQ(slow_path->GetSuccessor(), successor);
+  }
 
   __ LoadFromOffset(
       kLoadUnsignedHalfword, IP, TR, Thread::ThreadFlagsOffset<kArmWordSize>().Int32Value());
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 9e02a1d..0222f93 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -285,6 +285,10 @@
     return &return_label_;
   }
 
+  HBasicBlock* GetSuccessor() const {
+    return successor_;
+  }
+
  private:
   HSuspendCheck* const instruction_;
   // If not null, the block to branch to after the suspend check.
@@ -1034,8 +1038,19 @@
 void InstructionCodeGeneratorARM64::GenerateSuspendCheck(HSuspendCheck* instruction,
                                                          HBasicBlock* successor) {
   SuspendCheckSlowPathARM64* slow_path =
-    new (GetGraph()->GetArena()) SuspendCheckSlowPathARM64(instruction, successor);
-  codegen_->AddSlowPath(slow_path);
+      down_cast<SuspendCheckSlowPathARM64*>(instruction->GetSlowPath());
+  if (slow_path == nullptr) {
+    slow_path = new (GetGraph()->GetArena()) SuspendCheckSlowPathARM64(instruction, successor);
+    instruction->SetSlowPath(slow_path);
+    codegen_->AddSlowPath(slow_path);
+    if (successor != nullptr) {
+      DCHECK(successor->IsLoopHeader());
+      codegen_->ClearSpillSlotsFromLoopPhisInStackMap(instruction);
+    }
+  } else {
+    DCHECK_EQ(slow_path->GetSuccessor(), successor);
+  }
+
   UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
   Register temp = temps.AcquireW();
 
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 8aa7796..cfb8702 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -153,6 +153,10 @@
     return &return_label_;
   }
 
+  HBasicBlock* GetSuccessor() const {
+    return successor_;
+  }
+
  private:
   HSuspendCheck* const instruction_;
   HBasicBlock* const successor_;
@@ -809,7 +813,6 @@
 
   HLoopInformation* info = block->GetLoopInformation();
   if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
-    codegen_->ClearSpillSlotsFromLoopPhisInStackMap(info->GetSuspendCheck());
     GenerateSuspendCheck(info->GetSuspendCheck(), successor);
     return;
   }
@@ -3993,8 +3996,19 @@
 void InstructionCodeGeneratorX86::GenerateSuspendCheck(HSuspendCheck* instruction,
                                                        HBasicBlock* successor) {
   SuspendCheckSlowPathX86* slow_path =
-      new (GetGraph()->GetArena()) SuspendCheckSlowPathX86(instruction, successor);
-  codegen_->AddSlowPath(slow_path);
+      down_cast<SuspendCheckSlowPathX86*>(instruction->GetSlowPath());
+  if (slow_path == nullptr) {
+    slow_path = new (GetGraph()->GetArena()) SuspendCheckSlowPathX86(instruction, successor);
+    instruction->SetSlowPath(slow_path);
+    codegen_->AddSlowPath(slow_path);
+    if (successor != nullptr) {
+      DCHECK(successor->IsLoopHeader());
+      codegen_->ClearSpillSlotsFromLoopPhisInStackMap(instruction);
+    }
+  } else {
+    DCHECK_EQ(slow_path->GetSuccessor(), successor);
+  }
+
   __ fs()->cmpw(Address::Absolute(
       Thread::ThreadFlagsOffset<kX86WordSize>().Int32Value()), Immediate(0));
   if (successor == nullptr) {
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 9cf5ecb..e633970 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -136,6 +136,10 @@
     return &return_label_;
   }
 
+  HBasicBlock* GetSuccessor() const {
+    return successor_;
+  }
+
  private:
   HSuspendCheck* const instruction_;
   HBasicBlock* const successor_;
@@ -771,7 +775,6 @@
 
   HLoopInformation* info = block->GetLoopInformation();
   if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
-    codegen_->ClearSpillSlotsFromLoopPhisInStackMap(info->GetSuspendCheck());
     GenerateSuspendCheck(info->GetSuspendCheck(), successor);
     return;
   }
@@ -3864,8 +3867,19 @@
 void InstructionCodeGeneratorX86_64::GenerateSuspendCheck(HSuspendCheck* instruction,
                                                           HBasicBlock* successor) {
   SuspendCheckSlowPathX86_64* slow_path =
-      new (GetGraph()->GetArena()) SuspendCheckSlowPathX86_64(instruction, successor);
-  codegen_->AddSlowPath(slow_path);
+      down_cast<SuspendCheckSlowPathX86_64*>(instruction->GetSlowPath());
+  if (slow_path == nullptr) {
+    slow_path = new (GetGraph()->GetArena()) SuspendCheckSlowPathX86_64(instruction, successor);
+    instruction->SetSlowPath(slow_path);
+    codegen_->AddSlowPath(slow_path);
+    if (successor != nullptr) {
+      DCHECK(successor->IsLoopHeader());
+      codegen_->ClearSpillSlotsFromLoopPhisInStackMap(instruction);
+    }
+  } else {
+    DCHECK_EQ(slow_path->GetSuccessor(), successor);
+  }
+
   __ gs()->cmpw(Address::Absolute(
       Thread::ThreadFlagsOffset<kX86_64WordSize>().Int32Value(), true), Immediate(0));
   if (successor == nullptr) {
diff --git a/compiler/optimizing/find_loops_test.cc b/compiler/optimizing/find_loops_test.cc
index 2bfecc6..8f69f4d 100644
--- a/compiler/optimizing/find_loops_test.cc
+++ b/compiler/optimizing/find_loops_test.cc
@@ -235,14 +235,13 @@
 
   TestBlock(graph, 0, false, -1);            // entry block
   TestBlock(graph, 1, false, -1);            // pre header
-  const int blocks2[] = {2, 3, 4, 5, 8};
-  TestBlock(graph, 2, true, 2, blocks2, 5);  // loop header
+  const int blocks2[] = {2, 3, 4, 5};
+  TestBlock(graph, 2, true, 2, blocks2, arraysize(blocks2));  // loop header
   TestBlock(graph, 3, false, 2);             // block in loop
-  TestBlock(graph, 4, false, 2);             // original back edge
-  TestBlock(graph, 5, false, 2);             // original back edge
+  TestBlock(graph, 4, false, 2);             // back edge
+  TestBlock(graph, 5, false, 2);             // back edge
   TestBlock(graph, 6, false, -1);            // return block
   TestBlock(graph, 7, false, -1);            // exit block
-  TestBlock(graph, 8, false, 2);             // synthesized back edge
 }
 
 
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index 8ea8f3c..bb27a94 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -288,6 +288,7 @@
 
 void SSAChecker::CheckLoop(HBasicBlock* loop_header) {
   int id = loop_header->GetBlockId();
+  HLoopInformation* loop_information = loop_header->GetLoopInformation();
 
   // Ensure the pre-header block is first in the list of
   // predecessors of a loop header.
@@ -297,57 +298,48 @@
         id));
   }
 
-  // Ensure the loop header has only two predecessors and that only the
-  // second one is a back edge.
+  // Ensure the loop header has only one incoming branch and the remaining
+  // predecessors are back edges.
   size_t num_preds = loop_header->GetPredecessors().Size();
   if (num_preds < 2) {
     AddError(StringPrintf(
         "Loop header %d has less than two predecessors: %zu.",
         id,
         num_preds));
-  } else if (num_preds > 2) {
-    AddError(StringPrintf(
-        "Loop header %d has more than two predecessors: %zu.",
-        id,
-        num_preds));
   } else {
-    HLoopInformation* loop_information = loop_header->GetLoopInformation();
     HBasicBlock* first_predecessor = loop_header->GetPredecessors().Get(0);
     if (loop_information->IsBackEdge(*first_predecessor)) {
       AddError(StringPrintf(
           "First predecessor of loop header %d is a back edge.",
           id));
     }
-    HBasicBlock* second_predecessor = loop_header->GetPredecessors().Get(1);
-    if (!loop_information->IsBackEdge(*second_predecessor)) {
-      AddError(StringPrintf(
-          "Second predecessor of loop header %d is not a back edge.",
-          id));
+    for (size_t i = 1, e = loop_header->GetPredecessors().Size(); i < e; ++i) {
+      HBasicBlock* predecessor = loop_header->GetPredecessors().Get(i);
+      if (!loop_information->IsBackEdge(*predecessor)) {
+        AddError(StringPrintf(
+            "Loop header %d has multiple incoming (non back edge) blocks.",
+            id));
+      }
     }
   }
 
-  const ArenaBitVector& loop_blocks = loop_header->GetLoopInformation()->GetBlocks();
+  const ArenaBitVector& loop_blocks = loop_information->GetBlocks();
 
-  // Ensure there is only one back edge per loop.
-  size_t num_back_edges =
-    loop_header->GetLoopInformation()->GetBackEdges().Size();
+  // Ensure back edges belong to the loop.
+  size_t num_back_edges = loop_information->GetBackEdges().Size();
   if (num_back_edges == 0) {
     AddError(StringPrintf(
         "Loop defined by header %d has no back edge.",
         id));
-  } else if (num_back_edges > 1) {
-    AddError(StringPrintf(
-        "Loop defined by header %d has several back edges: %zu.",
-        id,
-        num_back_edges));
   } else {
-    DCHECK_EQ(num_back_edges, 1u);
-    int back_edge_id = loop_header->GetLoopInformation()->GetBackEdges().Get(0)->GetBlockId();
-    if (!loop_blocks.IsBitSet(back_edge_id)) {
-      AddError(StringPrintf(
-          "Loop defined by header %d has an invalid back edge %d.",
-          id,
-          back_edge_id));
+    for (size_t i = 0; i < num_back_edges; ++i) {
+      int back_edge_id = loop_information->GetBackEdges().Get(i)->GetBlockId();
+      if (!loop_blocks.IsBitSet(back_edge_id)) {
+        AddError(StringPrintf(
+            "Loop defined by header %d has an invalid back edge %d.",
+            id,
+            back_edge_id));
+      }
     }
   }
 
diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc
index 8a96ee9..1914339 100644
--- a/compiler/optimizing/liveness_test.cc
+++ b/compiler/optimizing/liveness_test.cc
@@ -445,44 +445,40 @@
 
 TEST(LivenessTest, Loop6) {
   // Bitsets are made of:
-  // (constant0, constant4, constant5, phi in block 2, phi in block 8)
+  // (constant0, constant4, constant5, phi in block 2)
   const char* expected =
     "Block 0\n"
-    "  live in: (00000)\n"
-    "  live out: (11100)\n"
-    "  kill: (11100)\n"
+    "  live in: (0000)\n"
+    "  live out: (1110)\n"
+    "  kill: (1110)\n"
     "Block 1\n"
-    "  live in: (11100)\n"
-    "  live out: (01100)\n"
-    "  kill: (00000)\n"
+    "  live in: (1110)\n"
+    "  live out: (0110)\n"
+    "  kill: (0000)\n"
     "Block 2\n"  // loop header
-    "  live in: (01100)\n"
-    "  live out: (01110)\n"
-    "  kill: (00010)\n"
+    "  live in: (0110)\n"
+    "  live out: (0111)\n"
+    "  kill: (0001)\n"
     "Block 3\n"
-    "  live in: (01100)\n"
-    "  live out: (01100)\n"
-    "  kill: (00000)\n"
-    "Block 4\n"  // original back edge
-    "  live in: (01100)\n"
-    "  live out: (01100)\n"
-    "  kill: (00000)\n"
-    "Block 5\n"  // original back edge
-    "  live in: (01100)\n"
-    "  live out: (01100)\n"
-    "  kill: (00000)\n"
+    "  live in: (0110)\n"
+    "  live out: (0110)\n"
+    "  kill: (0000)\n"
+    "Block 4\n"  // back edge
+    "  live in: (0110)\n"
+    "  live out: (0110)\n"
+    "  kill: (0000)\n"
+    "Block 5\n"  // back edge
+    "  live in: (0110)\n"
+    "  live out: (0110)\n"
+    "  kill: (0000)\n"
     "Block 6\n"  // return block
-    "  live in: (00010)\n"
-    "  live out: (00000)\n"
-    "  kill: (00000)\n"
+    "  live in: (0001)\n"
+    "  live out: (0000)\n"
+    "  kill: (0000)\n"
     "Block 7\n"  // exit block
-    "  live in: (00000)\n"
-    "  live out: (00000)\n"
-    "  kill: (00000)\n"
-    "Block 8\n"  // synthesized back edge
-    "  live in: (01100)\n"
-    "  live out: (01100)\n"
-    "  kill: (00001)\n";
+    "  live in: (0000)\n"
+    "  live out: (0000)\n"
+    "  kill: (0000)\n";
 
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index d3ee770..85c0361 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -191,24 +191,6 @@
 void HGraph::SimplifyLoop(HBasicBlock* header) {
   HLoopInformation* info = header->GetLoopInformation();
 
-  // If there are more than one back edge, make them branch to the same block that
-  // will become the only back edge. This simplifies finding natural loops in the
-  // graph.
-  // Also, if the loop is a do/while (that is the back edge is an if), change the
-  // back edge to be a goto. This simplifies code generation of suspend cheks.
-  if (info->NumberOfBackEdges() > 1 || info->GetBackEdges().Get(0)->GetLastInstruction()->IsIf()) {
-    HBasicBlock* new_back_edge = new (arena_) HBasicBlock(this, header->GetDexPc());
-    AddBlock(new_back_edge);
-    new_back_edge->AddInstruction(new (arena_) HGoto());
-    for (size_t pred = 0, e = info->GetBackEdges().Size(); pred < e; ++pred) {
-      HBasicBlock* back_edge = info->GetBackEdges().Get(pred);
-      back_edge->ReplaceSuccessor(header, new_back_edge);
-    }
-    info->ClearBackEdges();
-    info->AddBackEdge(new_back_edge);
-    new_back_edge->AddSuccessor(header);
-  }
-
   // Make sure the loop has only one pre header. This simplifies SSA building by having
   // to just look at the pre header to know which locals are initialized at entry of the
   // loop.
@@ -218,11 +200,9 @@
     AddBlock(pre_header);
     pre_header->AddInstruction(new (arena_) HGoto());
 
-    ArenaBitVector back_edges(arena_, GetBlocks().Size(), false);
-    HBasicBlock* back_edge = info->GetBackEdges().Get(0);
     for (size_t pred = 0; pred < header->GetPredecessors().Size(); ++pred) {
       HBasicBlock* predecessor = header->GetPredecessors().Get(pred);
-      if (predecessor != back_edge) {
+      if (!info->IsBackEdge(*predecessor)) {
         predecessor->ReplaceSuccessor(header, pre_header);
         pred--;
       }
@@ -230,9 +210,17 @@
     pre_header->AddSuccessor(header);
   }
 
-  // Make sure the second predecessor of a loop header is the back edge.
-  if (header->GetPredecessors().Get(1) != info->GetBackEdges().Get(0)) {
-    header->SwapPredecessors();
+  // Make sure the first predecessor of a loop header is the incoming block.
+  if (info->IsBackEdge(*header->GetPredecessors().Get(0))) {
+    HBasicBlock* to_swap = header->GetPredecessors().Get(0);
+    for (size_t pred = 1, e = header->GetPredecessors().Size(); pred < e; ++pred) {
+      HBasicBlock* predecessor = header->GetPredecessors().Get(pred);
+      if (!info->IsBackEdge(*predecessor)) {
+        header->predecessors_.Put(pred, to_swap);
+        header->predecessors_.Put(0, predecessor);
+        break;
+      }
+    }
   }
 
   // Place the suspend check at the beginning of the header, so that live registers
@@ -357,21 +345,22 @@
 }
 
 bool HLoopInformation::Populate() {
-  DCHECK_EQ(GetBackEdges().Size(), 1u);
-  HBasicBlock* back_edge = GetBackEdges().Get(0);
-  DCHECK(back_edge->GetDominator() != nullptr);
-  if (!header_->Dominates(back_edge)) {
-    // This loop is not natural. Do not bother going further.
-    return false;
-  }
+  for (size_t i = 0, e = GetBackEdges().Size(); i < e; ++i) {
+    HBasicBlock* back_edge = GetBackEdges().Get(i);
+    DCHECK(back_edge->GetDominator() != nullptr);
+    if (!header_->Dominates(back_edge)) {
+      // This loop is not natural. Do not bother going further.
+      return false;
+    }
 
-  // Populate this loop: starting with the back edge, recursively add predecessors
-  // that are not already part of that loop. Set the header as part of the loop
-  // to end the recursion.
-  // This is a recursive implementation of the algorithm described in
-  // "Advanced Compiler Design & Implementation" (Muchnick) p192.
-  blocks_.SetBit(header_->GetBlockId());
-  PopulateRecursive(back_edge);
+    // Populate this loop: starting with the back edge, recursively add predecessors
+    // that are not already part of that loop. Set the header as part of the loop
+    // to end the recursion.
+    // This is a recursive implementation of the algorithm described in
+    // "Advanced Compiler Design & Implementation" (Muchnick) p192.
+    blocks_.SetBit(header_->GetBlockId());
+    PopulateRecursive(back_edge);
+  }
   return true;
 }
 
@@ -387,6 +376,14 @@
   return other.blocks_.IsBitSet(header_->GetBlockId());
 }
 
+size_t HLoopInformation::GetLifetimeEnd() const {
+  size_t last_position = 0;
+  for (size_t i = 0, e = back_edges_.Size(); i < e; ++i) {
+    last_position = std::max(back_edges_.Get(i)->GetLifetimeEnd(), last_position);
+  }
+  return last_position;
+}
+
 bool HBasicBlock::Dominates(HBasicBlock* other) const {
   // Walk up the dominator tree from `other`, to find out if `this`
   // is an ancestor.
@@ -503,6 +500,16 @@
   }
 }
 
+void HEnvironment::CopyFrom(const GrowableArray<HInstruction*>& locals) {
+  for (size_t i = 0; i < locals.Size(); i++) {
+    HInstruction* instruction = locals.Get(i);
+    SetRawEnvAt(i, instruction);
+    if (instruction != nullptr) {
+      instruction->AddEnvUseAt(this, i);
+    }
+  }
+}
+
 void HEnvironment::CopyFrom(HEnvironment* env) {
   for (size_t i = 0; i < env->Size(); i++) {
     HInstruction* instruction = env->GetInstructionAt(i);
@@ -963,8 +970,9 @@
     HLoopInformation* loop_info = it.Current();
     loop_info->Remove(this);
     if (loop_info->IsBackEdge(*this)) {
-      // This deliberately leaves the loop in an inconsistent state and will
-      // fail SSAChecker unless the entire loop is removed during the pass.
+      // If this was the last back edge of the loop, we deliberately leave the
+      // loop in an inconsistent state and will fail SSAChecker unless the
+      // entire loop is removed during the pass.
       loop_info->RemoveBackEdge(this);
     }
   }
@@ -1075,8 +1083,7 @@
     HLoopInformation* loop_info = it.Current();
     loop_info->Remove(other);
     if (loop_info->IsBackEdge(*other)) {
-      loop_info->ClearBackEdges();
-      loop_info->AddBackEdge(this);
+      loop_info->ReplaceBackEdge(other, this);
     }
   }
 
@@ -1307,11 +1314,9 @@
         loop_it.Current()->Add(to);
       }
       if (info->IsBackEdge(*at)) {
-        // Only `at` can become a back edge, as the inlined blocks
-        // are predecessors of `at`.
-        DCHECK_EQ(1u, info->NumberOfBackEdges());
-        info->ClearBackEdges();
-        info->AddBackEdge(to);
+        // Only `to` can become a back edge, as the inlined blocks
+        // are predecessors of `to`.
+        info->ReplaceBackEdge(at, to);
       }
     }
   }
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 63f3c95..5fc0470 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -48,6 +48,7 @@
 class HSuspendCheck;
 class LiveInterval;
 class LocationSummary;
+class SlowPathCode;
 class SsaBuilder;
 
 static const int kDefaultNumberOfBlocks = 8;
@@ -397,16 +398,21 @@
     return back_edges_;
   }
 
-  HBasicBlock* GetSingleBackEdge() const {
-    DCHECK_EQ(back_edges_.Size(), 1u);
-    return back_edges_.Get(0);
+  // Returns the lifetime position of the back edge that has the
+  // greatest lifetime position.
+  size_t GetLifetimeEnd() const;
+
+  void ReplaceBackEdge(HBasicBlock* existing, HBasicBlock* new_back_edge) {
+    for (size_t i = 0, e = back_edges_.Size(); i < e; ++i) {
+      if (back_edges_.Get(i) == existing) {
+        back_edges_.Put(i, new_back_edge);
+        return;
+      }
+    }
+    UNREACHABLE();
   }
 
-  void ClearBackEdges() {
-    back_edges_.Reset();
-  }
-
-  // Find blocks that are part of this loop. Returns whether the loop is a natural loop,
+  // Finds blocks that are part of this loop. Returns whether the loop is a natural loop,
   // that is the header dominates the back edge.
   bool Populate();
 
@@ -1062,7 +1068,9 @@
     }
   }
 
-  void CopyFrom(HEnvironment* env);
+  void CopyFrom(const GrowableArray<HInstruction*>& locals);
+  void CopyFrom(HEnvironment* environment);
+
   // Copy from `env`. If it's a loop phi for `loop_header`, copy the first
   // input to the loop phi instead. This is for inserting instructions that
   // require an environment (like HDeoptimization) in the loop pre-header.
@@ -3247,19 +3255,25 @@
 class HSuspendCheck : public HTemplateInstruction<0> {
  public:
   explicit HSuspendCheck(uint32_t dex_pc)
-      : HTemplateInstruction(SideEffects::None()), dex_pc_(dex_pc) {}
+      : HTemplateInstruction(SideEffects::None()), dex_pc_(dex_pc), slow_path_(nullptr) {}
 
   bool NeedsEnvironment() const OVERRIDE {
     return true;
   }
 
   uint32_t GetDexPc() const { return dex_pc_; }
+  void SetSlowPath(SlowPathCode* slow_path) { slow_path_ = slow_path; }
+  SlowPathCode* GetSlowPath() const { return slow_path_; }
 
   DECLARE_INSTRUCTION(SuspendCheck);
 
  private:
   const uint32_t dex_pc_;
 
+  // Only used for code generation, in order to share the same slow path between back edges
+  // of a same loop.
+  SlowPathCode* slow_path_;
+
   DISALLOW_COPY_AND_ASSIGN(HSuspendCheck);
 };
 
diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h
index 65c84e6..b6b1bb1 100644
--- a/compiler/optimizing/optimizing_compiler_stats.h
+++ b/compiler/optimizing/optimizing_compiler_stats.h
@@ -29,25 +29,26 @@
   kCompiledBaseline,
   kCompiledOptimized,
   kCompiledQuick,
-  kInstructionSimplifications,
   kInlinedInvoke,
-  kNotCompiledUnsupportedIsa,
-  kNotCompiledPathological,
+  kInstructionSimplifications,
+  kNotCompiledBranchOutsideMethodCode,
+  kNotCompiledCannotBuildSSA,
+  kNotCompiledCantAccesType,
+  kNotCompiledClassNotVerified,
   kNotCompiledHugeMethod,
   kNotCompiledLargeMethodNoBranches,
-  kNotCompiledCannotBuildSSA,
   kNotCompiledNoCodegen,
-  kNotCompiledUnresolvedMethod,
-  kNotCompiledUnresolvedField,
   kNotCompiledNonSequentialRegPair,
+  kNotCompiledPathological,
   kNotCompiledSpaceFilter,
-  kNotOptimizedTryCatch,
-  kNotOptimizedDisabled,
-  kNotCompiledCantAccesType,
-  kNotOptimizedRegisterAllocator,
   kNotCompiledUnhandledInstruction,
+  kNotCompiledUnresolvedField,
+  kNotCompiledUnresolvedMethod,
+  kNotCompiledUnsupportedIsa,
   kNotCompiledVerifyAtRuntime,
-  kNotCompiledClassNotVerified,
+  kNotOptimizedDisabled,
+  kNotOptimizedRegisterAllocator,
+  kNotOptimizedTryCatch,
   kRemovedCheckedCast,
   kRemovedDeadInstruction,
   kRemovedNullCheck,
@@ -98,23 +99,24 @@
       case kCompiledQuick : return "kCompiledQuick";
       case kInlinedInvoke : return "kInlinedInvoke";
       case kInstructionSimplifications: return "kInstructionSimplifications";
-      case kNotCompiledUnsupportedIsa : return "kNotCompiledUnsupportedIsa";
-      case kNotCompiledPathological : return "kNotCompiledPathological";
+      case kNotCompiledBranchOutsideMethodCode: return "kNotCompiledBranchOutsideMethodCode";
+      case kNotCompiledCannotBuildSSA : return "kNotCompiledCannotBuildSSA";
+      case kNotCompiledCantAccesType : return "kNotCompiledCantAccesType";
+      case kNotCompiledClassNotVerified : return "kNotCompiledClassNotVerified";
       case kNotCompiledHugeMethod : return "kNotCompiledHugeMethod";
       case kNotCompiledLargeMethodNoBranches : return "kNotCompiledLargeMethodNoBranches";
-      case kNotCompiledCannotBuildSSA : return "kNotCompiledCannotBuildSSA";
       case kNotCompiledNoCodegen : return "kNotCompiledNoCodegen";
-      case kNotCompiledUnresolvedMethod : return "kNotCompiledUnresolvedMethod";
-      case kNotCompiledUnresolvedField : return "kNotCompiledUnresolvedField";
       case kNotCompiledNonSequentialRegPair : return "kNotCompiledNonSequentialRegPair";
-      case kNotOptimizedDisabled : return "kNotOptimizedDisabled";
-      case kNotOptimizedTryCatch : return "kNotOptimizedTryCatch";
-      case kNotCompiledCantAccesType : return "kNotCompiledCantAccesType";
+      case kNotCompiledPathological : return "kNotCompiledPathological";
       case kNotCompiledSpaceFilter : return "kNotCompiledSpaceFilter";
-      case kNotOptimizedRegisterAllocator : return "kNotOptimizedRegisterAllocator";
       case kNotCompiledUnhandledInstruction : return "kNotCompiledUnhandledInstruction";
+      case kNotCompiledUnresolvedField : return "kNotCompiledUnresolvedField";
+      case kNotCompiledUnresolvedMethod : return "kNotCompiledUnresolvedMethod";
+      case kNotCompiledUnsupportedIsa : return "kNotCompiledUnsupportedIsa";
       case kNotCompiledVerifyAtRuntime : return "kNotCompiledVerifyAtRuntime";
-      case kNotCompiledClassNotVerified : return "kNotCompiledClassNotVerified";
+      case kNotOptimizedDisabled : return "kNotOptimizedDisabled";
+      case kNotOptimizedRegisterAllocator : return "kNotOptimizedRegisterAllocator";
+      case kNotOptimizedTryCatch : return "kNotOptimizedTryCatch";
       case kRemovedCheckedCast: return "kRemovedCheckedCast";
       case kRemovedDeadInstruction: return "kRemovedDeadInstruction";
       case kRemovedNullCheck: return "kRemovedNullCheck";
diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc
index b66e655..2a713cc 100644
--- a/compiler/optimizing/ssa_builder.cc
+++ b/compiler/optimizing/ssa_builder.cc
@@ -332,7 +332,7 @@
 }
 
 HInstruction* SsaBuilder::ValueOfLocal(HBasicBlock* block, size_t local) {
-  return GetLocalsFor(block)->GetInstructionAt(local);
+  return GetLocalsFor(block)->Get(local);
 }
 
 void SsaBuilder::VisitBasicBlock(HBasicBlock* block) {
@@ -349,7 +349,7 @@
         HPhi* phi = new (GetGraph()->GetArena()) HPhi(
             GetGraph()->GetArena(), local, 0, Primitive::kPrimVoid);
         block->AddPhi(phi);
-        current_locals_->SetRawEnvAt(local, phi);
+        current_locals_->Put(local, phi);
       }
     }
     // Save the loop header so that the last phase of the analysis knows which
@@ -389,7 +389,7 @@
         block->AddPhi(phi);
         value = phi;
       }
-      current_locals_->SetRawEnvAt(local, value);
+      current_locals_->Put(local, value);
     }
   }
 
@@ -520,7 +520,7 @@
 }
 
 void SsaBuilder::VisitLoadLocal(HLoadLocal* load) {
-  HInstruction* value = current_locals_->GetInstructionAt(load->GetLocal()->GetRegNumber());
+  HInstruction* value = current_locals_->Get(load->GetLocal()->GetRegNumber());
   // If the operation requests a specific type, we make sure its input is of that type.
   if (load->GetType() != value->GetType()) {
     if (load->GetType() == Primitive::kPrimFloat || load->GetType() == Primitive::kPrimDouble) {
@@ -534,7 +534,7 @@
 }
 
 void SsaBuilder::VisitStoreLocal(HStoreLocal* store) {
-  current_locals_->SetRawEnvAt(store->GetLocal()->GetRegNumber(), store->InputAt(1));
+  current_locals_->Put(store->GetLocal()->GetRegNumber(), store->InputAt(1));
   store->GetBlock()->RemoveInstruction(store);
 }
 
@@ -544,7 +544,7 @@
   }
   HEnvironment* environment = new (GetGraph()->GetArena()) HEnvironment(
       GetGraph()->GetArena(), current_locals_->Size());
-  environment->CopyFrom(current_locals_);
+  environment->CopyFrom(*current_locals_);
   instruction->SetRawEnvironment(environment);
 }
 
diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h
index 265e95b..1c83c4b 100644
--- a/compiler/optimizing/ssa_builder.h
+++ b/compiler/optimizing/ssa_builder.h
@@ -58,14 +58,15 @@
 
   void BuildSsa();
 
-  HEnvironment* GetLocalsFor(HBasicBlock* block) {
-    HEnvironment* env = locals_for_.Get(block->GetBlockId());
-    if (env == nullptr) {
-      env = new (GetGraph()->GetArena()) HEnvironment(
+  GrowableArray<HInstruction*>* GetLocalsFor(HBasicBlock* block) {
+    GrowableArray<HInstruction*>* locals = locals_for_.Get(block->GetBlockId());
+    if (locals == nullptr) {
+      locals = new (GetGraph()->GetArena()) GrowableArray<HInstruction*>(
           GetGraph()->GetArena(), GetGraph()->GetNumberOfVRegs());
-      locals_for_.Put(block->GetBlockId(), env);
+      locals->SetSize(GetGraph()->GetNumberOfVRegs());
+      locals_for_.Put(block->GetBlockId(), locals);
     }
-    return env;
+    return locals;
   }
 
   HInstruction* ValueOfLocal(HBasicBlock* block, size_t local);
@@ -93,14 +94,14 @@
   static HPhi* GetFloatDoubleOrReferenceEquivalentOfPhi(HPhi* phi, Primitive::Type type);
 
   // Locals for the current block being visited.
-  HEnvironment* current_locals_;
+  GrowableArray<HInstruction*>* current_locals_;
 
   // Keep track of loop headers found. The last phase of the analysis iterates
   // over these blocks to set the inputs of their phis.
   GrowableArray<HBasicBlock*> loop_headers_;
 
   // HEnvironment for each block.
-  GrowableArray<HEnvironment*> locals_for_;
+  GrowableArray<GrowableArray<HInstruction*>*> locals_for_;
 
   DISALLOW_COPY_AND_ASSIGN(SsaBuilder);
 };
diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc
index 1784168..09a6648 100644
--- a/compiler/optimizing/ssa_liveness_analysis.cc
+++ b/compiler/optimizing/ssa_liveness_analysis.cc
@@ -75,9 +75,7 @@
     HBasicBlock* block = it.Current();
     size_t number_of_forward_predecessors = block->GetPredecessors().Size();
     if (block->IsLoopHeader()) {
-      // We rely on having simplified the CFG.
-      DCHECK_EQ(1u, block->GetLoopInformation()->NumberOfBackEdges());
-      number_of_forward_predecessors--;
+      number_of_forward_predecessors -= block->GetLoopInformation()->NumberOfBackEdges();
     }
     forward_predecessors.Put(block->GetBlockId(), number_of_forward_predecessors);
   }
@@ -264,13 +262,12 @@
     }
 
     if (block->IsLoopHeader()) {
-      HBasicBlock* back_edge = block->GetLoopInformation()->GetBackEdges().Get(0);
+      size_t last_position = block->GetLoopInformation()->GetLifetimeEnd();
       // For all live_in instructions at the loop header, we need to create a range
       // that covers the full loop.
       for (uint32_t idx : live_in->Indexes()) {
         HInstruction* current = instructions_from_ssa_index_.Get(idx);
-        current->GetLiveInterval()->AddLoopRange(block->GetLifetimeStart(),
-                                                 back_edge->GetLifetimeEnd());
+        current->GetLiveInterval()->AddLoopRange(block->GetLifetimeStart(), last_position);
       }
     }
   }
diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h
index 7b98c4e..b550d8a 100644
--- a/compiler/optimizing/ssa_liveness_analysis.h
+++ b/compiler/optimizing/ssa_liveness_analysis.h
@@ -973,7 +973,11 @@
         break;
       }
 
-      size_t back_edge_use_position = current->GetSingleBackEdge()->GetLifetimeEnd();
+      // We're only adding a synthesized use at the last back edge. Adding syntehsized uses on
+      // all back edges is not necessary: anything used in the loop will have its use at the
+      // last back edge. If we want branches in a loop to have better register allocation than
+      // another branch, then it is the linear order we should change.
+      size_t back_edge_use_position = current->GetLifetimeEnd();
       if ((first_use_ != nullptr) && (first_use_->GetPosition() <= back_edge_use_position)) {
         // There was a use already seen in this loop. Therefore the previous call to `AddUse`
         // already inserted the backedge use. We can stop going outward.
diff --git a/compiler/optimizing/ssa_test.cc b/compiler/optimizing/ssa_test.cc
index 00c241b..4cc9c3e 100644
--- a/compiler/optimizing/ssa_test.cc
+++ b/compiler/optimizing/ssa_test.cc
@@ -373,30 +373,26 @@
   const char* expected =
     "BasicBlock 0, succ: 1\n"
     "  0: IntConstant 0 [5]\n"
-    "  1: IntConstant 4 [14, 8, 8]\n"
-    "  2: IntConstant 5 [14]\n"
+    "  1: IntConstant 4 [5, 8, 8]\n"
+    "  2: IntConstant 5 [5]\n"
     "  3: Goto\n"
     "BasicBlock 1, pred: 0, succ: 2\n"
     "  4: Goto\n"
-    "BasicBlock 2, pred: 1, 8, succ: 6, 3\n"
-    "  5: Phi(0, 14) [12, 6, 6]\n"
+    "BasicBlock 2, pred: 1, 4, 5, succ: 6, 3\n"
+    "  5: Phi(0, 2, 1) [12, 6, 6]\n"
     "  6: Equal(5, 5) [7]\n"
     "  7: If(6)\n"
     "BasicBlock 3, pred: 2, succ: 5, 4\n"
     "  8: Equal(1, 1) [9]\n"
     "  9: If(8)\n"
-    "BasicBlock 4, pred: 3, succ: 8\n"
+    "BasicBlock 4, pred: 3, succ: 2\n"
     "  10: Goto\n"
-    "BasicBlock 5, pred: 3, succ: 8\n"
+    "BasicBlock 5, pred: 3, succ: 2\n"
     "  11: Goto\n"
     "BasicBlock 6, pred: 2, succ: 7\n"
     "  12: Return(5)\n"
     "BasicBlock 7, pred: 6\n"
-    "  13: Exit\n"
-    // Synthesized single back edge of loop.
-    "BasicBlock 8, pred: 5, 4, succ: 2\n"
-    "  14: Phi(1, 2) [5]\n"
-    "  15: Goto\n";
+    "  13: Exit\n";
 
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc
index 282ab96..5e9653d 100644
--- a/compiler/utils/mips64/assembler_mips64.cc
+++ b/compiler/utils/mips64/assembler_mips64.cc
@@ -272,6 +272,10 @@
   EmitI(0x25, rs, rt, imm16);
 }
 
+void Mips64Assembler::Lwu(GpuRegister rt, GpuRegister rs, uint16_t imm16) {
+  EmitI(0x27, rs, rt, imm16);
+}
+
 void Mips64Assembler::Lui(GpuRegister rt, uint16_t imm16) {
   EmitI(0xf, static_cast<GpuRegister>(0), rt, imm16);
 }
@@ -480,6 +484,9 @@
     case kLoadWord:
       Lw(reg, base, offset);
       break;
+    case kLoadUnsignedWord:
+      Lwu(reg, base, offset);
+      break;
     case kLoadDoubleword:
       // TODO: alignment issues ???
       Ld(reg, base, offset);
@@ -512,7 +519,6 @@
     CHECK_EQ(0u, size) << dst;
   } else if (dst.IsGpuRegister()) {
     if (size == 4) {
-      CHECK_EQ(4u, size) << dst;
       LoadFromOffset(kLoadWord, dst.AsGpuRegister(), src_register, src_offset);
     } else if (size == 8) {
       CHECK_EQ(8u, size) << dst;
@@ -740,14 +746,13 @@
 void Mips64Assembler::LoadRef(ManagedRegister mdest, FrameOffset src) {
   Mips64ManagedRegister dest = mdest.AsMips64();
   CHECK(dest.IsGpuRegister());
-  LoadFromOffset(kLoadWord, dest.AsGpuRegister(), SP, src.Int32Value());
+  LoadFromOffset(kLoadUnsignedWord, dest.AsGpuRegister(), SP, src.Int32Value());
 }
 
-void Mips64Assembler::LoadRef(ManagedRegister mdest, ManagedRegister base,
-                            MemberOffset offs) {
+void Mips64Assembler::LoadRef(ManagedRegister mdest, ManagedRegister base, MemberOffset offs) {
   Mips64ManagedRegister dest = mdest.AsMips64();
-  CHECK(dest.IsGpuRegister() && dest.IsGpuRegister());
-  LoadFromOffset(kLoadWord, dest.AsGpuRegister(),
+  CHECK(dest.IsGpuRegister() && base.AsMips64().IsGpuRegister());
+  LoadFromOffset(kLoadUnsignedWord, dest.AsGpuRegister(),
                  base.AsMips64().AsGpuRegister(), offs.Int32Value());
   if (kPoisonHeapReferences) {
     Subu(dest.AsGpuRegister(), ZERO, dest.AsGpuRegister());
@@ -921,7 +926,7 @@
     // the address in the handle scope holding the reference.
     // e.g. out_reg = (handle == 0) ? 0 : (SP+handle_offset)
     if (in_reg.IsNoRegister()) {
-      LoadFromOffset(kLoadWord, out_reg.AsGpuRegister(),
+      LoadFromOffset(kLoadUnsignedWord, out_reg.AsGpuRegister(),
                      SP, handle_scope_offset.Int32Value());
       in_reg = out_reg;
     }
@@ -944,7 +949,7 @@
   CHECK(scratch.IsGpuRegister()) << scratch;
   if (null_allowed) {
     Label null_arg;
-    LoadFromOffset(kLoadWord, scratch.AsGpuRegister(), SP,
+    LoadFromOffset(kLoadUnsignedWord, scratch.AsGpuRegister(), SP,
                    handle_scope_offset.Int32Value());
     // Null values get a handle scope entry value of 0.  Otherwise, the handle scope entry is
     // the address in the handle scope holding the reference.
@@ -998,7 +1003,7 @@
   Mips64ManagedRegister scratch = mscratch.AsMips64();
   CHECK(scratch.IsGpuRegister()) << scratch;
   // Call *(*(SP + base) + offset)
-  LoadFromOffset(kLoadWord, scratch.AsGpuRegister(),
+  LoadFromOffset(kLoadUnsignedWord, scratch.AsGpuRegister(),
                  SP, base.Int32Value());
   LoadFromOffset(kLoadDoubleword, scratch.AsGpuRegister(),
                  scratch.AsGpuRegister(), offset.Int32Value());
diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h
index b7f6a9e..2d7c661 100644
--- a/compiler/utils/mips64/assembler_mips64.h
+++ b/compiler/utils/mips64/assembler_mips64.h
@@ -36,6 +36,7 @@
   kLoadSignedHalfword,
   kLoadUnsignedHalfword,
   kLoadWord,
+  kLoadUnsignedWord,
   kLoadDoubleword
 };
 
@@ -85,6 +86,7 @@
   void Ld(GpuRegister rt, GpuRegister rs, uint16_t imm16);
   void Lbu(GpuRegister rt, GpuRegister rs, uint16_t imm16);
   void Lhu(GpuRegister rt, GpuRegister rs, uint16_t imm16);
+  void Lwu(GpuRegister rt, GpuRegister rs, uint16_t imm16);
   void Lui(GpuRegister rt, uint16_t imm16);
   void Mfhi(GpuRegister rd);
   void Mflo(GpuRegister rd);
diff --git a/runtime/arch/mips64/asm_support_mips64.S b/runtime/arch/mips64/asm_support_mips64.S
index 10976bb..2613777 100644
--- a/runtime/arch/mips64/asm_support_mips64.S
+++ b/runtime/arch/mips64/asm_support_mips64.S
@@ -27,7 +27,8 @@
 #define rSELF $s1
 
 
-    //  Declare a function called name, sets up $gp.
+    // Declare a function called name, sets up $gp.
+    // This macro modifies t8.
 .macro ENTRY name
     .type \name, %function
     .global \name
@@ -35,10 +36,11 @@
     .balign 16
 \name:
     .cfi_startproc
+    // Set up $gp and store the previous $gp value to $t8. It will be pushed to the
+    // stack after the frame has been constructed.
+    .cpsetup $t9, $t8, \name
     // Ensure we get a sane starting CFA.
     .cfi_def_cfa $sp,0
-    // Load $gp. We expect that ".set noreorder" is in effect.
-    .cpload $t9
     // Declare a local convenience label to be branched to when $gp is already set up.
 .L\name\()_gp_set:
 .endm
diff --git a/runtime/arch/mips64/jni_entrypoints_mips64.S b/runtime/arch/mips64/jni_entrypoints_mips64.S
index 1085666..70d7d97 100644
--- a/runtime/arch/mips64/jni_entrypoints_mips64.S
+++ b/runtime/arch/mips64/jni_entrypoints_mips64.S
@@ -44,8 +44,11 @@
     .cfi_rel_offset 5, 8
     sd     $a0, 0($sp)
     .cfi_rel_offset 4, 0
-    jal    artFindNativeMethod  # (Thread*)
     move   $a0, $s1             # pass Thread::Current()
+    jal    artFindNativeMethod  # (Thread*)
+    .cpreturn                   # Restore gp from t8 in branch delay slot. gp is not used
+                                # anymore, and t8 may be clobbered in artFindNativeMethod.
+
     ld     $a0, 0($sp)          # restore registers from stack
     .cfi_restore 4
     ld     $a1, 8($sp)
diff --git a/runtime/arch/mips64/quick_entrypoints_mips64.S b/runtime/arch/mips64/quick_entrypoints_mips64.S
index d781e76..ff79b5d 100644
--- a/runtime/arch/mips64/quick_entrypoints_mips64.S
+++ b/runtime/arch/mips64/quick_entrypoints_mips64.S
@@ -27,6 +27,19 @@
     .extern artDeliverPendingExceptionFromCode
 
     /*
+     * Macro that sets up $gp and stores the previous $gp value to $t8.
+     * This macro modifies v1 and t8.
+     */
+.macro SETUP_GP
+    move $v1, $ra
+    bal 1f
+    nop
+1:
+    .cpsetup $ra, $t8, 1b
+    move $ra, $v1
+.endm
+
+    /*
      * Macro that sets up the callee save frame to conform with
      * Runtime::CreateCalleeSaveMethod(kSaveAll)
      * callee-save: padding + $f24-$f31 + $s0-$s7 + $gp + $ra + $s8 = 19 total + 1x8 bytes padding
@@ -44,8 +57,8 @@
     .cfi_rel_offset 31, 152
     sd     $s8, 144($sp)
     .cfi_rel_offset 30, 144
-    sd     $gp, 136($sp)
-    .cfi_rel_offset 28, 136
+    sd     $t8, 136($sp)           # t8 holds caller's gp, now save it to the stack.
+    .cfi_rel_offset 28, 136        # Value from gp is pushed, so set the cfi offset accordingly.
     sd     $s7, 128($sp)
     .cfi_rel_offset 23, 128
     sd     $s6, 120($sp)
@@ -102,8 +115,8 @@
     .cfi_rel_offset 31, 72
     sd     $s8, 64($sp)
     .cfi_rel_offset 30, 64
-    sd     $gp, 56($sp)
-    .cfi_rel_offset 28, 56
+    sd     $t8, 56($sp)            # t8 holds caller's gp, now save it to the stack.
+    .cfi_rel_offset 28, 56         # Value from gp is pushed, so set the cfi offset accordingly.
     sd     $s7, 48($sp)
     .cfi_rel_offset 23, 48
     sd     $s6, 40($sp)
@@ -130,7 +143,7 @@
     .cfi_restore 31
     ld     $s8, 64($sp)
     .cfi_restore 30
-    ld     $gp, 56($sp)
+    ld     $t8, 56($sp)            # Restore gp back to it's temp storage.
     .cfi_restore 28
     ld     $s7, 48($sp)
     .cfi_restore 23
@@ -146,6 +159,7 @@
     .cfi_restore 18
     daddiu $sp, $sp, 80
     .cfi_adjust_cfa_offset -80
+    .cpreturn
 .endm
 
 .macro RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
@@ -153,7 +167,7 @@
     .cfi_restore 31
     ld     $s8, 64($sp)
     .cfi_restore 30
-    ld     $gp, 56($sp)
+    ld     $t8, 56($sp)            # Restore gp back to it's temp storage.
     .cfi_restore 28
     ld     $s7, 48($sp)
     .cfi_restore 23
@@ -167,6 +181,7 @@
     .cfi_restore 19
     ld     $s2, 8($sp)
     .cfi_restore 18
+    .cpreturn
     jalr   $zero, $ra
     daddiu $sp, $sp, 80
     .cfi_adjust_cfa_offset -80
@@ -175,12 +190,6 @@
 // This assumes the top part of these stack frame types are identical.
 #define REFS_AND_ARGS_MINUS_REFS_SIZE (FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE - FRAME_SIZE_REFS_ONLY_CALLEE_SAVE)
 
-    /*
-     * Macro that sets up the callee save frame to conform with
-     * Runtime::CreateCalleeSaveMethod(kRefsAndArgs). Restoration assumes
-     * non-moving GC.
-     * callee-save: padding + $f12-$f19 + $a1-$a7 + $s2-$s7 + $gp + $ra + $s8 = 24 total + 1 words padding + Method*
-     */
 .macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_INTERNAL
     daddiu  $sp, $sp, -208
     .cfi_adjust_cfa_offset 208
@@ -194,8 +203,8 @@
     .cfi_rel_offset 31, 200
     sd     $s8, 192($sp)
     .cfi_rel_offset 30, 192
-    sd     $gp, 184($sp)
-    .cfi_rel_offset 28, 184
+    sd     $t8, 184($sp)           # t8 holds caller's gp, now save it to the stack.
+    .cfi_rel_offset 28, 184        # Value from gp is pushed, so set the cfi offset accordingly.
     sd     $s7, 176($sp)
     .cfi_rel_offset 23, 176
     sd     $s6, 168($sp)
@@ -232,16 +241,15 @@
     s.d    $f14, 32($sp)
     s.d    $f13, 24($sp)           # = kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset
     s.d    $f12, 16($sp)           # This isn't necessary to store.
-
-    # 1x8 bytes paddig + Method*
-    ld      $v0, %got(_ZN3art7Runtime9instance_E)($gp)
-    ld      $v0, 0($v0)
-    THIS_LOAD_REQUIRES_READ_BARRIER
-    lwu     $v0, RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET($v0)
-    sw      $v0, 0($sp)                                # Place Method* at bottom of stack.
-    sd      $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)  # Place sp in Thread::Current()->top_quick_frame.
+    # 1x8 bytes padding + Method*
 .endm
 
+    /*
+     * Macro that sets up the callee save frame to conform with
+     * Runtime::CreateCalleeSaveMethod(kRefsAndArgs). Restoration assumes
+     * non-moving GC.
+     * callee-save: padding + $f12-$f19 + $a1-$a7 + $s2-$s7 + $gp + $ra + $s8 = 24 total + 1 words padding + Method*
+     */
 .macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME
     SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_INTERNAL
     # load appropriate callee-save-method
@@ -253,12 +261,18 @@
     sd      $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)  # Place sp in Thread::Current()->top_quick_frame.
 .endm
 
+.macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_A0
+    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_INTERNAL
+    sw      $a0, 0($sp)                                # Place Method* at bottom of stack.
+    sd      $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)  # Place sp in Thread::Current()->top_quick_frame.
+.endm
+
 .macro RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
     ld     $ra, 200($sp)
     .cfi_restore 31
     ld     $s8, 192($sp)
     .cfi_restore 30
-    ld     $gp, 184($sp)
+    ld     $t8, 184($sp)           # Restore gp back to it's temp storage.
     .cfi_restore 28
     ld     $s7, 176($sp)
     .cfi_restore 23
@@ -297,6 +311,7 @@
     l.d    $f13, 24($sp)
     l.d    $f12, 16($sp)
 
+    .cpreturn
     daddiu $sp, $sp, 208
     .cfi_adjust_cfa_offset -208
 .endm
@@ -307,6 +322,7 @@
      * exception is Thread::Current()->exception_
      */
 .macro DELIVER_PENDING_EXCEPTION
+    SETUP_GP
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME     # save callee saves for throw
     dla     $t9, artDeliverPendingExceptionFromCode
     jalr    $zero, $t9                   # artDeliverPendingExceptionFromCode(Thread*)
@@ -348,7 +364,7 @@
      * On entry $a0 is uint32_t* gprs_ and $a1 is uint32_t* fprs_
      * FIXME: just guessing about the shape of the jmpbuf.  Where will pc be?
      */
-ENTRY art_quick_do_long_jump
+ENTRY_NO_GP art_quick_do_long_jump
     l.d     $f0, 0($a1)
     l.d     $f1, 8($a1)
     l.d     $f2, 16($a1)
@@ -605,7 +621,7 @@
      *   a4 = JValue* result
      *   a5 = shorty
      */
-ENTRY art_quick_invoke_stub
+ENTRY_NO_GP art_quick_invoke_stub
     # push a4, a5, s0(rSUSPEND), s1(rSELF), s8, ra onto the stack
     daddiu $sp, $sp, -48
     .cfi_adjust_cfa_offset 48
@@ -707,7 +723,7 @@
      *   a4 = JValue* result
      *   a5 = shorty
      */
-ENTRY art_quick_invoke_static_stub
+ENTRY_NO_GP art_quick_invoke_static_stub
 
     # push a4, a5, s0(rSUSPEND), s1(rSELF), s8, ra, onto the stack
     daddiu $sp, $sp, -48
@@ -851,7 +867,8 @@
     sd     $a1, 8($sp)
     sd     $a0, 0($sp)
     jal    artIsAssignableFromCode
-    nop
+    .cpreturn                       # Restore gp from t8 in branch delay slot.
+                                    # t8 may be clobbered in artIsAssignableFromCode.
     beq    $v0, $zero, .Lthrow_class_cast_exception
     ld     $ra, 24($sp)
     jalr   $zero, $ra
@@ -863,6 +880,7 @@
     ld     $a0, 0($sp)
     daddiu $sp, $sp, 32
     .cfi_adjust_cfa_offset -32
+    SETUP_GP
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
     dla  $t9, artThrowClassCastException
     jalr $zero, $t9                 # artThrowClassCastException (Class*, Class*, Thread*)
@@ -908,13 +926,13 @@
     daddu $t1, $t1, $t0
     sb   $t0, ($t1)
     jalr $zero, $ra
-    nop
+    .cpreturn                       # Restore gp from t8 in branch delay slot.
 .Ldo_aput_null:
     dsll  $a1, $a1, 2
     daddu $t0, $a0, $a1
     sw   $a2, MIRROR_OBJECT_ARRAY_DATA_OFFSET($t0)
     jalr $zero, $ra
-    nop
+    .cpreturn                       # Restore gp from t8 in branch delay slot.
 .Lcheck_assignability:
     daddiu $sp, $sp, -64
     .cfi_adjust_cfa_offset 64
@@ -927,7 +945,8 @@
     move   $a1, $t1
     move   $a0, $t0
     jal    artIsAssignableFromCode  # (Class*, Class*)
-    nop
+    .cpreturn                       # Restore gp from t8 in branch delay slot.
+                                    # t8 may be clobbered in artIsAssignableFromCode.
     ld     $ra, 56($sp)
     ld     $t9, 24($sp)
     ld     $a2, 16($sp)
@@ -935,6 +954,7 @@
     ld     $a0, 0($sp)
     daddiu $sp, $sp, 64
     .cfi_adjust_cfa_offset -64
+    SETUP_GP
     bne    $v0, $zero, .Ldo_aput
     nop
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
@@ -1312,7 +1332,7 @@
     bne    $a0, $zero, 1f
     daddiu rSUSPEND, $zero, SUSPEND_CHECK_INTERVAL   # reset rSUSPEND to SUSPEND_CHECK_INTERVAL
     jalr   $zero, $ra
-    nop
+    .cpreturn                                 # Restore gp from t8 in branch delay slot.
 1:
     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME         # save callee saves for stack crawl
     jal    artTestSuspendFromCode             # (Thread*)
@@ -1326,8 +1346,7 @@
      */
     .extern artQuickProxyInvokeHandler
 ENTRY art_quick_proxy_invoke_handler
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME
-    sd      $a0, 0($sp)            # place proxy method at bottom of frame
+    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_A0
     move    $a2, rSELF             # pass Thread::Current
     jal     artQuickProxyInvokeHandler  # (Method* proxy method, receiver, Thread*, SP)
     move    $a3, $sp               # pass $sp
@@ -1352,6 +1371,7 @@
     dsll    $t0, 2                 # convert target method offset to bytes
     daddu   $a0, $t0               # get address of target method
     dla     $t9, art_quick_invoke_interface_trampoline
+    .cpreturn
     jalr    $zero, $t9
     lwu     $a0, MIRROR_OBJECT_ARRAY_DATA_OFFSET($a0)  # load the target method
 END art_quick_imt_conflict_trampoline
@@ -1377,8 +1397,7 @@
     .extern artQuickGenericJniTrampoline
     .extern artQuickGenericJniEndTrampoline
 ENTRY art_quick_generic_jni_trampoline
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_INTERNAL
-    sd      $a0, 0($sp)            # store native ArtMethod* to bottom of stack
+    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_A0
     move    $s8, $sp               # save $sp
 
     # prepare for call to artQuickGenericJniTrampoline(Thread*, SP)
@@ -1481,8 +1500,7 @@
     .global art_quick_instrumentation_exit
 art_quick_instrumentation_exit:
     .cfi_startproc
-    daddiu   $t9, $ra, 4       # put current address into $t9 to rebuild $gp
-    .cpload  $t9
+    SETUP_GP
     move     $ra, $zero        # link register is to here, so clobber with 0 for later checks
     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
     move     $t0, $sp          # remember bottom of caller's frame
@@ -1494,8 +1512,11 @@
     mov.d    $f15, $f0         # pass fpr result
     move     $a2, $v0          # pass gpr result
     move     $a1, $t0          # pass $sp
-    jal      artInstrumentationMethodExitFromCode  # (Thread*, SP, gpr_res, fpr_res)
     move     $a0, rSELF        # pass Thread::Current
+    jal      artInstrumentationMethodExitFromCode  # (Thread*, SP, gpr_res, fpr_res)
+    .cpreturn                  # Restore gp from t8 in branch delay slot. gp is not used anymore,
+                               # and t8 may be clobbered in artInstrumentationMethodExitFromCode.
+
     move     $t9, $v0          # set aside returned link register
     move     $ra, $v1          # set link register for deoptimization
     ld       $v0, 0($sp)       # restore return values
diff --git a/runtime/arch/stub_test.cc b/runtime/arch/stub_test.cc
index de7804f..a7d24b8 100644
--- a/runtime/arch/stub_test.cc
+++ b/runtime/arch/stub_test.cc
@@ -261,6 +261,132 @@
           "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
           "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31",
           "memory");  // clobber.
+#elif defined(__mips__) && !defined(__LP64__)
+    __asm__ __volatile__ (
+        // Spill a0-a3 and t0-t7 which we say we don't clobber. May contain args.
+        "addiu $sp, $sp, -64\n\t"
+        "sw $a0, 0($sp)\n\t"
+        "sw $a1, 4($sp)\n\t"
+        "sw $a2, 8($sp)\n\t"
+        "sw $a3, 12($sp)\n\t"
+        "sw $t0, 16($sp)\n\t"
+        "sw $t1, 20($sp)\n\t"
+        "sw $t2, 24($sp)\n\t"
+        "sw $t3, 28($sp)\n\t"
+        "sw $t4, 32($sp)\n\t"
+        "sw $t5, 36($sp)\n\t"
+        "sw $t6, 40($sp)\n\t"
+        "sw $t7, 44($sp)\n\t"
+        // Spill gp register since it is caller save.
+        "sw $gp, 52($sp)\n\t"
+
+        "addiu $sp, $sp, -16\n\t"  // Reserve stack space, 16B aligned.
+        "sw %[referrer], 0($sp)\n\t"
+
+        // Push everything on the stack, so we don't rely on the order.
+        "addiu $sp, $sp, -20\n\t"
+        "sw %[arg0], 0($sp)\n\t"
+        "sw %[arg1], 4($sp)\n\t"
+        "sw %[arg2], 8($sp)\n\t"
+        "sw %[code], 12($sp)\n\t"
+        "sw %[self], 16($sp)\n\t"
+
+        // Load call params into the right registers.
+        "lw $a0, 0($sp)\n\t"
+        "lw $a1, 4($sp)\n\t"
+        "lw $a2, 8($sp)\n\t"
+        "lw $t9, 12($sp)\n\t"
+        "lw $s1, 16($sp)\n\t"
+        "addiu $sp, $sp, 20\n\t"
+
+        "jalr $t9\n\t"             // Call the stub.
+        "nop\n\t"
+        "addiu $sp, $sp, 16\n\t"   // Drop the quick "frame".
+
+        // Restore stuff not named clobbered.
+        "lw $a0, 0($sp)\n\t"
+        "lw $a1, 4($sp)\n\t"
+        "lw $a2, 8($sp)\n\t"
+        "lw $a3, 12($sp)\n\t"
+        "lw $t0, 16($sp)\n\t"
+        "lw $t1, 20($sp)\n\t"
+        "lw $t2, 24($sp)\n\t"
+        "lw $t3, 28($sp)\n\t"
+        "lw $t4, 32($sp)\n\t"
+        "lw $t5, 36($sp)\n\t"
+        "lw $t6, 40($sp)\n\t"
+        "lw $t7, 44($sp)\n\t"
+        // Restore gp.
+        "lw $gp, 52($sp)\n\t"
+        "addiu $sp, $sp, 64\n\t"   // Free stack space, now sp as on entry.
+
+        "move %[result], $v0\n\t"  // Store the call result.
+        : [result] "=r" (result)
+        : [arg0] "r"(arg0), [arg1] "r"(arg1), [arg2] "r"(arg2), [code] "r"(code), [self] "r"(self),
+          [referrer] "r"(referrer)
+        : "at", "v0", "v1", "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", "t8", "t9", "k0", "k1",
+          "fp", "ra",
+          "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", "f8", "f9", "f10", "f11", "f12", "f13",
+          "f14", "f15", "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", "f24", "f25", "f26",
+          "f27", "f28", "f29", "f30", "f31",
+          "memory");  // clobber.
+#elif defined(__mips__) && defined(__LP64__)
+    __asm__ __volatile__ (
+        // Spill a0-a7 which we say we don't clobber. May contain args.
+        "daddiu $sp, $sp, -64\n\t"
+        "sd $a0, 0($sp)\n\t"
+        "sd $a1, 8($sp)\n\t"
+        "sd $a2, 16($sp)\n\t"
+        "sd $a3, 24($sp)\n\t"
+        "sd $a4, 32($sp)\n\t"
+        "sd $a5, 40($sp)\n\t"
+        "sd $a6, 48($sp)\n\t"
+        "sd $a7, 56($sp)\n\t"
+
+        "daddiu $sp, $sp, -16\n\t"  // Reserve stack space, 16B aligned.
+        "sd %[referrer], 0($sp)\n\t"
+
+        // Push everything on the stack, so we don't rely on the order.
+        "daddiu $sp, $sp, -40\n\t"
+        "sd %[arg0], 0($sp)\n\t"
+        "sd %[arg1], 8($sp)\n\t"
+        "sd %[arg2], 16($sp)\n\t"
+        "sd %[code], 24($sp)\n\t"
+        "sd %[self], 32($sp)\n\t"
+
+        // Load call params into the right registers.
+        "ld $a0, 0($sp)\n\t"
+        "ld $a1, 8($sp)\n\t"
+        "ld $a2, 16($sp)\n\t"
+        "ld $t9, 24($sp)\n\t"
+        "ld $s1, 32($sp)\n\t"
+        "daddiu $sp, $sp, 40\n\t"
+
+        "jalr $t9\n\t"              // Call the stub.
+        "nop\n\t"
+        "daddiu $sp, $sp, 16\n\t"   // Drop the quick "frame".
+
+        // Restore stuff not named clobbered.
+        "ld $a0, 0($sp)\n\t"
+        "ld $a1, 8($sp)\n\t"
+        "ld $a2, 16($sp)\n\t"
+        "ld $a3, 24($sp)\n\t"
+        "ld $a4, 32($sp)\n\t"
+        "ld $a5, 40($sp)\n\t"
+        "ld $a6, 48($sp)\n\t"
+        "ld $a7, 56($sp)\n\t"
+        "daddiu $sp, $sp, 64\n\t"
+
+        "move %[result], $v0\n\t"   // Store the call result.
+        : [result] "=r" (result)
+        : [arg0] "r"(arg0), [arg1] "r"(arg1), [arg2] "r"(arg2), [code] "r"(code), [self] "r"(self),
+          [referrer] "r"(referrer)
+        : "at", "v0", "v1", "t0", "t1", "t2", "t3", "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
+          "t8", "t9", "k0", "k1", "fp", "ra",
+          "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", "f8", "f9", "f10", "f11", "f12", "f13",
+          "f14", "f15", "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", "f24", "f25", "f26",
+          "f27", "f28", "f29", "f30", "f31",
+          "memory");  // clobber.
 #elif defined(__x86_64__) && !defined(__APPLE__) && defined(__clang__)
     // Note: Uses the native convention
     // TODO: Set the thread?
@@ -487,6 +613,136 @@
           "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
           "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31",
           "memory");  // clobber.
+#elif defined(__mips__) && !defined(__LP64__)
+    __asm__ __volatile__ (
+        // Spill a0-a3 and t0-t7 which we say we don't clobber. May contain args.
+        "addiu $sp, $sp, -64\n\t"
+        "sw $a0, 0($sp)\n\t"
+        "sw $a1, 4($sp)\n\t"
+        "sw $a2, 8($sp)\n\t"
+        "sw $a3, 12($sp)\n\t"
+        "sw $t0, 16($sp)\n\t"
+        "sw $t1, 20($sp)\n\t"
+        "sw $t2, 24($sp)\n\t"
+        "sw $t3, 28($sp)\n\t"
+        "sw $t4, 32($sp)\n\t"
+        "sw $t5, 36($sp)\n\t"
+        "sw $t6, 40($sp)\n\t"
+        "sw $t7, 44($sp)\n\t"
+        // Spill gp register since it is caller save.
+        "sw $gp, 52($sp)\n\t"
+
+        "addiu $sp, $sp, -16\n\t"  // Reserve stack space, 16B aligned.
+        "sw %[referrer], 0($sp)\n\t"
+
+        // Push everything on the stack, so we don't rely on the order.
+        "addiu $sp, $sp, -24\n\t"
+        "sw %[arg0], 0($sp)\n\t"
+        "sw %[arg1], 4($sp)\n\t"
+        "sw %[arg2], 8($sp)\n\t"
+        "sw %[code], 12($sp)\n\t"
+        "sw %[self], 16($sp)\n\t"
+        "sw %[hidden], 20($sp)\n\t"
+
+        // Load call params into the right registers.
+        "lw $a0, 0($sp)\n\t"
+        "lw $a1, 4($sp)\n\t"
+        "lw $a2, 8($sp)\n\t"
+        "lw $t9, 12($sp)\n\t"
+        "lw $s1, 16($sp)\n\t"
+        "lw $t0, 20($sp)\n\t"
+        "addiu $sp, $sp, 24\n\t"
+
+        "jalr $t9\n\t"             // Call the stub.
+        "nop\n\t"
+        "addiu $sp, $sp, 16\n\t"   // Drop the quick "frame".
+
+        // Restore stuff not named clobbered.
+        "lw $a0, 0($sp)\n\t"
+        "lw $a1, 4($sp)\n\t"
+        "lw $a2, 8($sp)\n\t"
+        "lw $a3, 12($sp)\n\t"
+        "lw $t0, 16($sp)\n\t"
+        "lw $t1, 20($sp)\n\t"
+        "lw $t2, 24($sp)\n\t"
+        "lw $t3, 28($sp)\n\t"
+        "lw $t4, 32($sp)\n\t"
+        "lw $t5, 36($sp)\n\t"
+        "lw $t6, 40($sp)\n\t"
+        "lw $t7, 44($sp)\n\t"
+        // Restore gp.
+        "lw $gp, 52($sp)\n\t"
+        "addiu $sp, $sp, 64\n\t"   // Free stack space, now sp as on entry.
+
+        "move %[result], $v0\n\t"  // Store the call result.
+        : [result] "=r" (result)
+        : [arg0] "r"(arg0), [arg1] "r"(arg1), [arg2] "r"(arg2), [code] "r"(code), [self] "r"(self),
+          [referrer] "r"(referrer), [hidden] "r"(hidden)
+        : "at", "v0", "v1", "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", "t8", "t9", "k0", "k1",
+          "fp", "ra",
+          "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", "f8", "f9", "f10", "f11", "f12", "f13",
+          "f14", "f15", "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", "f24", "f25", "f26",
+          "f27", "f28", "f29", "f30", "f31",
+          "memory");  // clobber.
+#elif defined(__mips__) && defined(__LP64__)
+    __asm__ __volatile__ (
+        // Spill a0-a7 which we say we don't clobber. May contain args.
+        "daddiu $sp, $sp, -64\n\t"
+        "sd $a0, 0($sp)\n\t"
+        "sd $a1, 8($sp)\n\t"
+        "sd $a2, 16($sp)\n\t"
+        "sd $a3, 24($sp)\n\t"
+        "sd $a4, 32($sp)\n\t"
+        "sd $a5, 40($sp)\n\t"
+        "sd $a6, 48($sp)\n\t"
+        "sd $a7, 56($sp)\n\t"
+
+        "daddiu $sp, $sp, -16\n\t"  // Reserve stack space, 16B aligned.
+        "sd %[referrer], 0($sp)\n\t"
+
+        // Push everything on the stack, so we don't rely on the order.
+        "daddiu $sp, $sp, -48\n\t"
+        "sd %[arg0], 0($sp)\n\t"
+        "sd %[arg1], 8($sp)\n\t"
+        "sd %[arg2], 16($sp)\n\t"
+        "sd %[code], 24($sp)\n\t"
+        "sd %[self], 32($sp)\n\t"
+        "sd %[hidden], 40($sp)\n\t"
+
+        // Load call params into the right registers.
+        "ld $a0, 0($sp)\n\t"
+        "ld $a1, 8($sp)\n\t"
+        "ld $a2, 16($sp)\n\t"
+        "ld $t9, 24($sp)\n\t"
+        "ld $s1, 32($sp)\n\t"
+        "ld $t0, 40($sp)\n\t"
+        "daddiu $sp, $sp, 48\n\t"
+
+        "jalr $t9\n\t"              // Call the stub.
+        "nop\n\t"
+        "daddiu $sp, $sp, 16\n\t"   // Drop the quick "frame".
+
+        // Restore stuff not named clobbered.
+        "ld $a0, 0($sp)\n\t"
+        "ld $a1, 8($sp)\n\t"
+        "ld $a2, 16($sp)\n\t"
+        "ld $a3, 24($sp)\n\t"
+        "ld $a4, 32($sp)\n\t"
+        "ld $a5, 40($sp)\n\t"
+        "ld $a6, 48($sp)\n\t"
+        "ld $a7, 56($sp)\n\t"
+        "daddiu $sp, $sp, 64\n\t"
+
+        "move %[result], $v0\n\t"   // Store the call result.
+        : [result] "=r" (result)
+        : [arg0] "r"(arg0), [arg1] "r"(arg1), [arg2] "r"(arg2), [code] "r"(code), [self] "r"(self),
+          [referrer] "r"(referrer), [hidden] "r"(hidden)
+        : "at", "v0", "v1", "t0", "t1", "t2", "t3", "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
+          "t8", "t9", "k0", "k1", "fp", "ra",
+          "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", "f8", "f9", "f10", "f11", "f12", "f13",
+          "f14", "f15", "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", "f24", "f25", "f26",
+          "f27", "f28", "f29", "f30", "f31",
+          "memory");  // clobber.
 #elif defined(__x86_64__) && !defined(__APPLE__) && defined(__clang__)
     // Note: Uses the native convention
     // TODO: Set the thread?
@@ -521,7 +777,8 @@
   // Method with 32b arg0, 64b arg1
   size_t Invoke3UWithReferrer(size_t arg0, uint64_t arg1, uintptr_t code, Thread* self,
                               mirror::ArtMethod* referrer) {
-#if (defined(__x86_64__) && !defined(__APPLE__)) || defined(__aarch64__)
+#if (defined(__x86_64__) && !defined(__APPLE__)) || (defined(__mips__) && defined(__LP64__)) || \
+    defined(__aarch64__)
     // Just pass through.
     return Invoke3WithReferrer(arg0, arg1, 0U, code, self, referrer);
 #else
@@ -549,7 +806,7 @@
 
 
 TEST_F(StubTest, Memcpy) {
-#if defined(__i386__) || (defined(__x86_64__) && !defined(__APPLE__))
+#if defined(__i386__) || (defined(__x86_64__) && !defined(__APPLE__)) || defined(__mips__)
   Thread* self = Thread::Current();
 
   uint32_t orig[20];
@@ -586,7 +843,8 @@
 }
 
 TEST_F(StubTest, LockObject) {
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
+    (defined(__x86_64__) && !defined(__APPLE__))
   static constexpr size_t kThinLockLoops = 100;
 
   Thread* self = Thread::Current();
@@ -659,7 +917,8 @@
 
 // NO_THREAD_SAFETY_ANALYSIS as we do not want to grab exclusive mutator lock for MonitorInfo.
 static void TestUnlockObject(StubTest* test) NO_THREAD_SAFETY_ANALYSIS {
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
+    (defined(__x86_64__) && !defined(__APPLE__))
   static constexpr size_t kThinLockLoops = 100;
 
   Thread* self = Thread::Current();
@@ -809,12 +1068,14 @@
   TestUnlockObject(this);
 }
 
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
+    (defined(__x86_64__) && !defined(__APPLE__))
 extern "C" void art_quick_check_cast(void);
 #endif
 
 TEST_F(StubTest, CheckCast) {
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
+    (defined(__x86_64__) && !defined(__APPLE__))
   Thread* self = Thread::Current();
 
   const uintptr_t art_quick_check_cast = StubTest::GetEntrypoint(self, kQuickCheckCast);
@@ -865,7 +1126,8 @@
 TEST_F(StubTest, APutObj) {
   TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING();
 
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
+    (defined(__x86_64__) && !defined(__APPLE__))
   Thread* self = Thread::Current();
 
   // Do not check non-checked ones, we'd need handlers and stuff...
@@ -998,7 +1260,8 @@
 TEST_F(StubTest, AllocObject) {
   TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING();
 
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
+    (defined(__x86_64__) && !defined(__APPLE__))
   // This will lead to OOM  error messages in the log.
   ScopedLogSeverity sls(LogSeverity::FATAL);
 
@@ -1123,7 +1386,8 @@
 TEST_F(StubTest, AllocObjectArray) {
   TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING();
 
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
+    (defined(__x86_64__) && !defined(__APPLE__))
   // TODO: Check the "Unresolved" allocation stubs
 
   // This will lead to OOM  error messages in the log.
@@ -1292,7 +1556,8 @@
 static void GetSetBooleanStatic(ArtField* f, Thread* self,
                                 mirror::ArtMethod* referrer, StubTest* test)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
+    (defined(__x86_64__) && !defined(__APPLE__))
   constexpr size_t num_values = 5;
   uint8_t values[num_values] = { 0, 1, 2, 128, 0xFF };
 
@@ -1322,7 +1587,8 @@
 static void GetSetByteStatic(ArtField* f, Thread* self, mirror::ArtMethod* referrer,
                              StubTest* test)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
+    (defined(__x86_64__) && !defined(__APPLE__))
   int8_t values[] = { -128, -64, 0, 64, 127 };
 
   for (size_t i = 0; i < arraysize(values); ++i) {
@@ -1352,7 +1618,8 @@
 static void GetSetBooleanInstance(Handle<mirror::Object>* obj, ArtField* f, Thread* self,
                                   mirror::ArtMethod* referrer, StubTest* test)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
+    (defined(__x86_64__) && !defined(__APPLE__))
   uint8_t values[] = { 0, true, 2, 128, 0xFF };
 
   for (size_t i = 0; i < arraysize(values); ++i) {
@@ -1386,7 +1653,8 @@
 static void GetSetByteInstance(Handle<mirror::Object>* obj, ArtField* f,
                              Thread* self, mirror::ArtMethod* referrer, StubTest* test)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
+    (defined(__x86_64__) && !defined(__APPLE__))
   int8_t values[] = { -128, -64, 0, 64, 127 };
 
   for (size_t i = 0; i < arraysize(values); ++i) {
@@ -1420,7 +1688,8 @@
 static void GetSetCharStatic(ArtField* f, Thread* self, mirror::ArtMethod* referrer,
                              StubTest* test)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
+    (defined(__x86_64__) && !defined(__APPLE__))
   uint16_t values[] = { 0, 1, 2, 255, 32768, 0xFFFF };
 
   for (size_t i = 0; i < arraysize(values); ++i) {
@@ -1449,7 +1718,8 @@
 static void GetSetShortStatic(ArtField* f, Thread* self,
                               mirror::ArtMethod* referrer, StubTest* test)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
+    (defined(__x86_64__) && !defined(__APPLE__))
   int16_t values[] = { -0x7FFF, -32768, 0, 255, 32767, 0x7FFE };
 
   for (size_t i = 0; i < arraysize(values); ++i) {
@@ -1479,7 +1749,8 @@
 static void GetSetCharInstance(Handle<mirror::Object>* obj, ArtField* f,
                                Thread* self, mirror::ArtMethod* referrer, StubTest* test)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
+    (defined(__x86_64__) && !defined(__APPLE__))
   uint16_t values[] = { 0, 1, 2, 255, 32768, 0xFFFF };
 
   for (size_t i = 0; i < arraysize(values); ++i) {
@@ -1512,7 +1783,8 @@
 static void GetSetShortInstance(Handle<mirror::Object>* obj, ArtField* f,
                              Thread* self, mirror::ArtMethod* referrer, StubTest* test)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
+    (defined(__x86_64__) && !defined(__APPLE__))
   int16_t values[] = { -0x7FFF, -32768, 0, 255, 32767, 0x7FFE };
 
   for (size_t i = 0; i < arraysize(values); ++i) {
@@ -1546,7 +1818,8 @@
 static void GetSet32Static(ArtField* f, Thread* self, mirror::ArtMethod* referrer,
                            StubTest* test)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
+    (defined(__x86_64__) && !defined(__APPLE__))
   uint32_t values[] = { 0, 1, 2, 255, 32768, 1000000, 0xFFFFFFFF };
 
   for (size_t i = 0; i < arraysize(values); ++i) {
@@ -1563,7 +1836,11 @@
                                            self,
                                            referrer);
 
+#if defined(__mips__) && defined(__LP64__)
+    EXPECT_EQ(static_cast<uint32_t>(res), values[i]) << "Iteration " << i;
+#else
     EXPECT_EQ(res, values[i]) << "Iteration " << i;
+#endif
   }
 #else
   UNUSED(f, self, referrer, test);
@@ -1577,7 +1854,8 @@
 static void GetSet32Instance(Handle<mirror::Object>* obj, ArtField* f,
                              Thread* self, mirror::ArtMethod* referrer, StubTest* test)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
+    (defined(__x86_64__) && !defined(__APPLE__))
   uint32_t values[] = { 0, 1, 2, 255, 32768, 1000000, 0xFFFFFFFF };
 
   for (size_t i = 0; i < arraysize(values); ++i) {
@@ -1611,7 +1889,8 @@
 }
 
 
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
+    (defined(__x86_64__) && !defined(__APPLE__))
 
 static void set_and_check_static(uint32_t f_idx, mirror::Object* val, Thread* self,
                                  mirror::ArtMethod* referrer, StubTest* test)
@@ -1636,7 +1915,8 @@
 static void GetSetObjStatic(ArtField* f, Thread* self, mirror::ArtMethod* referrer,
                             StubTest* test)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
+    (defined(__x86_64__) && !defined(__APPLE__))
   set_and_check_static(f->GetDexFieldIndex(), nullptr, self, referrer, test);
 
   // Allocate a string object for simplicity.
@@ -1653,7 +1933,8 @@
 }
 
 
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
+    (defined(__x86_64__) && !defined(__APPLE__))
 static void set_and_check_instance(ArtField* f, mirror::Object* trg,
                                    mirror::Object* val, Thread* self, mirror::ArtMethod* referrer,
                                    StubTest* test)
@@ -1681,7 +1962,8 @@
 static void GetSetObjInstance(Handle<mirror::Object>* obj, ArtField* f,
                               Thread* self, mirror::ArtMethod* referrer, StubTest* test)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
+    (defined(__x86_64__) && !defined(__APPLE__))
   set_and_check_instance(f, obj->Get(), nullptr, self, referrer, test);
 
   // Allocate a string object for simplicity.
@@ -1703,7 +1985,8 @@
 static void GetSet64Static(ArtField* f, Thread* self, mirror::ArtMethod* referrer,
                            StubTest* test)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-#if (defined(__x86_64__) && !defined(__APPLE__)) || defined(__aarch64__)
+#if (defined(__x86_64__) && !defined(__APPLE__)) || (defined(__mips__) && defined(__LP64__)) || \
+    defined(__aarch64__)
   uint64_t values[] = { 0, 1, 2, 255, 32768, 1000000, 0xFFFFFFFF, 0xFFFFFFFFFFFF };
 
   for (size_t i = 0; i < arraysize(values); ++i) {
@@ -1733,7 +2016,8 @@
 static void GetSet64Instance(Handle<mirror::Object>* obj, ArtField* f,
                              Thread* self, mirror::ArtMethod* referrer, StubTest* test)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-#if (defined(__x86_64__) && !defined(__APPLE__)) || defined(__aarch64__)
+#if (defined(__x86_64__) && !defined(__APPLE__)) || (defined(__mips__) && defined(__LP64__)) || \
+    defined(__aarch64__)
   uint64_t values[] = { 0, 1, 2, 255, 32768, 1000000, 0xFFFFFFFF, 0xFFFFFFFFFFFF };
 
   for (size_t i = 0; i < arraysize(values); ++i) {
@@ -1933,7 +2217,8 @@
 }
 
 TEST_F(StubTest, IMT) {
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
+    (defined(__x86_64__) && !defined(__APPLE__))
   TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING();
 
   Thread* self = Thread::Current();
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index a115fbe..de4783a 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -89,7 +89,7 @@
             art::Thread::ThinLockIdOffset<__SIZEOF_POINTER__>().Int32Value())
 
 // Offset of field Thread::tlsPtr_.card_table.
-#define THREAD_CARD_TABLE_OFFSET 120
+#define THREAD_CARD_TABLE_OFFSET 128
 ADD_TEST_EQ(THREAD_CARD_TABLE_OFFSET,
             art::Thread::CardTableOffset<__SIZEOF_POINTER__>().Int32Value())
 
diff --git a/runtime/common_runtime_test.h b/runtime/common_runtime_test.h
index 9917378..34fdd8d 100644
--- a/runtime/common_runtime_test.h
+++ b/runtime/common_runtime_test.h
@@ -182,7 +182,7 @@
   }
 
 #define TEST_DISABLED_FOR_MIPS() \
-  if (kRuntimeISA == kMips || kRuntimeISA == kMips64) { \
+  if (kRuntimeISA == kMips) { \
     printf("WARNING: TEST DISABLED FOR MIPS\n"); \
     return; \
   }
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index dc1b4f1..9b33e50 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -57,6 +57,9 @@
 
 namespace art {
 
+// The key identifying the debugger to update instrumentation.
+static constexpr const char* kDbgInstrumentationKey = "Debugger";
+
 static const size_t kMaxAllocRecordStackDepth = 16;  // Max 255.
 static const size_t kDefaultNumAllocRecords = 64*1024;  // Must be a power of 2. 2BE can hold 64k-1.
 
@@ -232,13 +235,29 @@
   virtual ~DebugInstrumentationListener() {}
 
   void MethodEntered(Thread* thread, mirror::Object* this_object, mirror::ArtMethod* method,
-                     uint32_t dex_pc ATTRIBUTE_UNUSED)
+                     uint32_t dex_pc)
       OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     if (method->IsNative()) {
       // TODO: post location events is a suspension point and native method entry stubs aren't.
       return;
     }
-    Dbg::UpdateDebugger(thread, this_object, method, 0, Dbg::kMethodEntry, nullptr);
+    if (IsListeningToDexPcMoved()) {
+      // We also listen to kDexPcMoved instrumentation event so we know the DexPcMoved method is
+      // going to be called right after us. To avoid sending JDWP events twice for this location,
+      // we report the event in DexPcMoved. However, we must remind this is method entry so we
+      // send the METHOD_ENTRY event. And we can also group it with other events for this location
+      // like BREAKPOINT or SINGLE_STEP (or even METHOD_EXIT if this is a RETURN instruction).
+      thread->SetDebugMethodEntry();
+    } else if (IsListeningToMethodExit() && IsReturn(method, dex_pc)) {
+      // We also listen to kMethodExited instrumentation event and the current instruction is a
+      // RETURN so we know the MethodExited method is going to be called right after us. To avoid
+      // sending JDWP events twice for this location, we report the event(s) in MethodExited.
+      // However, we must remind this is method entry so we send the METHOD_ENTRY event. And we can
+      // also group it with other events for this location like BREAKPOINT or SINGLE_STEP.
+      thread->SetDebugMethodEntry();
+    } else {
+      Dbg::UpdateDebugger(thread, this_object, method, 0, Dbg::kMethodEntry, nullptr);
+    }
   }
 
   void MethodExited(Thread* thread, mirror::Object* this_object, mirror::ArtMethod* method,
@@ -248,14 +267,20 @@
       // TODO: post location events is a suspension point and native method entry stubs aren't.
       return;
     }
-    Dbg::UpdateDebugger(thread, this_object, method, dex_pc, Dbg::kMethodExit, &return_value);
+    uint32_t events = Dbg::kMethodExit;
+    if (thread->IsDebugMethodEntry()) {
+      // It is also the method entry.
+      DCHECK(IsReturn(method, dex_pc));
+      events |= Dbg::kMethodEntry;
+      thread->ClearDebugMethodEntry();
+    }
+    Dbg::UpdateDebugger(thread, this_object, method, dex_pc, events, &return_value);
   }
 
-  void MethodUnwind(Thread* thread, mirror::Object* this_object, mirror::ArtMethod* method,
-                    uint32_t dex_pc)
+  void MethodUnwind(Thread* thread ATTRIBUTE_UNUSED, mirror::Object* this_object ATTRIBUTE_UNUSED,
+                    mirror::ArtMethod* method, uint32_t dex_pc)
       OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     // We're not recorded to listen to this kind of event, so complain.
-    UNUSED(thread, this_object, method, dex_pc);
     LOG(ERROR) << "Unexpected method unwind event in debugger " << PrettyMethod(method)
                << " " << dex_pc;
   }
@@ -263,13 +288,27 @@
   void DexPcMoved(Thread* thread, mirror::Object* this_object, mirror::ArtMethod* method,
                   uint32_t new_dex_pc)
       OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    Dbg::UpdateDebugger(thread, this_object, method, new_dex_pc, 0, nullptr);
+    if (IsListeningToMethodExit() && IsReturn(method, new_dex_pc)) {
+      // We also listen to kMethodExited instrumentation event and the current instruction is a
+      // RETURN so we know the MethodExited method is going to be called right after us. Like in
+      // MethodEntered, we delegate event reporting to MethodExited.
+      // Besides, if this RETURN instruction is the only one in the method, we can send multiple
+      // JDWP events in the same packet: METHOD_ENTRY, METHOD_EXIT, BREAKPOINT and/or SINGLE_STEP.
+      // Therefore, we must not clear the debug method entry flag here.
+    } else {
+      uint32_t events = 0;
+      if (thread->IsDebugMethodEntry()) {
+        // It is also the method entry.
+        events = Dbg::kMethodEntry;
+        thread->ClearDebugMethodEntry();
+      }
+      Dbg::UpdateDebugger(thread, this_object, method, new_dex_pc, events, nullptr);
+    }
   }
 
-  void FieldRead(Thread* thread, mirror::Object* this_object, mirror::ArtMethod* method,
-                 uint32_t dex_pc, ArtField* field)
+  void FieldRead(Thread* thread ATTRIBUTE_UNUSED, mirror::Object* this_object,
+                 mirror::ArtMethod* method, uint32_t dex_pc, ArtField* field)
       OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    UNUSED(thread);
     Dbg::PostFieldAccessEvent(method, dex_pc, this_object, field);
   }
 
@@ -293,6 +332,26 @@
   }
 
  private:
+  static bool IsReturn(mirror::ArtMethod* method, uint32_t dex_pc)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    const DexFile::CodeItem* code_item = method->GetCodeItem();
+    const Instruction* instruction = Instruction::At(&code_item->insns_[dex_pc]);
+    return instruction->IsReturn();
+  }
+
+  static bool IsListeningToDexPcMoved() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return IsListeningTo(instrumentation::Instrumentation::kDexPcMoved);
+  }
+
+  static bool IsListeningToMethodExit() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return IsListeningTo(instrumentation::Instrumentation::kMethodExited);
+  }
+
+  static bool IsListeningTo(instrumentation::Instrumentation::InstrumentationEvent event)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return (Dbg::GetInstrumentationEvents() & event) != 0;
+  }
+
   DISALLOW_COPY_AND_ASSIGN(DebugInstrumentationListener);
 } gDebugInstrumentationListener;
 
@@ -677,7 +736,7 @@
       instrumentation_events_ = 0;
     }
     if (RequiresDeoptimization()) {
-      runtime->GetInstrumentation()->DisableDeoptimization();
+      runtime->GetInstrumentation()->DisableDeoptimization(kDbgInstrumentationKey);
     }
     gDebuggerActive = false;
   }
@@ -2998,12 +3057,12 @@
       break;
     case DeoptimizationRequest::kFullDeoptimization:
       VLOG(jdwp) << "Deoptimize the world ...";
-      instrumentation->DeoptimizeEverything();
+      instrumentation->DeoptimizeEverything(kDbgInstrumentationKey);
       VLOG(jdwp) << "Deoptimize the world DONE";
       break;
     case DeoptimizationRequest::kFullUndeoptimization:
       VLOG(jdwp) << "Undeoptimize the world ...";
-      instrumentation->UndeoptimizeEverything();
+      instrumentation->UndeoptimizeEverything(kDbgInstrumentationKey);
       VLOG(jdwp) << "Undeoptimize the world DONE";
       break;
     case DeoptimizationRequest::kSelectiveDeoptimization:
diff --git a/runtime/debugger.h b/runtime/debugger.h
index fe90eb6..789a0a4 100644
--- a/runtime/debugger.h
+++ b/runtime/debugger.h
@@ -714,6 +714,10 @@
 
   static JDWP::JdwpState* GetJdwpState();
 
+  static uint32_t GetInstrumentationEvents() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return instrumentation_events_;
+  }
+
  private:
   static JDWP::JdwpError GetLocalValue(const StackVisitor& visitor,
                                        ScopedObjectAccessUnchecked& soa, int slot,
diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc
index 1068e90..55a8411 100644
--- a/runtime/gc/collector/mark_sweep.cc
+++ b/runtime/gc/collector/mark_sweep.cc
@@ -385,7 +385,7 @@
       LOG(INTERNAL_FATAL) << "Attempting see if it's a bad root";
       mark_sweep_->VerifyRoots();
       PrintFileToLog("/proc/self/maps", LogSeverity::INTERNAL_FATAL);
-      MemMap::DumpMaps(LOG(INTERNAL_FATAL));
+      MemMap::DumpMaps(LOG(INTERNAL_FATAL), true);
       LOG(FATAL) << "Can't mark invalid object";
     }
   }
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index cbbc76c..4129d75 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -491,7 +491,7 @@
     bool no_gap = MemMap::CheckNoGaps(GetImageSpace()->GetMemMap(),
                                       non_moving_space_->GetMemMap());
     if (!no_gap) {
-      MemMap::DumpMaps(LOG(ERROR));
+      MemMap::DumpMaps(LOG(ERROR), true);
       LOG(FATAL) << "There's a gap between the image space and the non-moving space";
     }
   }
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index e6c333d..f810bc8 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -16,13 +16,10 @@
 
 #include "instrumentation.h"
 
-#include <sys/uio.h>
-
 #include <sstream>
 
 #include "arch/context.h"
 #include "atomic.h"
-#include "base/unix_file/fd_file.h"
 #include "class_linker.h"
 #include "debugger.h"
 #include "dex_file-inl.h"
@@ -39,16 +36,13 @@
 #include "mirror/object_array-inl.h"
 #include "mirror/object-inl.h"
 #include "nth_caller_visitor.h"
-#include "os.h"
-#include "scoped_thread_state_change.h"
 #include "thread.h"
 #include "thread_list.h"
 
 namespace art {
-
 namespace instrumentation {
 
-const bool kVerboseInstrumentation = false;
+constexpr bool kVerboseInstrumentation = false;
 
 static bool InstallStubsClassVisitor(mirror::Class* klass, void* arg)
     EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -64,7 +58,7 @@
       have_method_entry_listeners_(false), have_method_exit_listeners_(false),
       have_method_unwind_listeners_(false), have_dex_pc_listeners_(false),
       have_field_read_listeners_(false), have_field_write_listeners_(false),
-      have_exception_caught_listeners_(false),
+      have_exception_caught_listeners_(false), have_backward_branch_listeners_(false),
       deoptimized_methods_lock_("deoptimized methods lock"),
       deoptimization_enabled_(false),
       interpreter_handler_table_(kMainHandlerTable),
@@ -166,7 +160,7 @@
 // existing instrumentation frames.
 static void InstrumentationInstallStack(Thread* thread, void* arg)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  struct InstallStackVisitor : public StackVisitor {
+  struct InstallStackVisitor FINAL : public StackVisitor {
     InstallStackVisitor(Thread* thread_in, Context* context, uintptr_t instrumentation_exit_pc)
         : StackVisitor(thread_in, context),
           instrumentation_stack_(thread_in->GetInstrumentationStack()),
@@ -175,7 +169,7 @@
           last_return_pc_(0) {
     }
 
-    virtual bool VisitFrame() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    bool VisitFrame() OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
       mirror::ArtMethod* m = GetMethod();
       if (m == nullptr) {
         if (kVerboseInstrumentation) {
@@ -306,7 +300,7 @@
 // Removes the instrumentation exit pc as the return PC for every quick frame.
 static void InstrumentationRestoreStack(Thread* thread, void* arg)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  struct RestoreStackVisitor : public StackVisitor {
+  struct RestoreStackVisitor FINAL : public StackVisitor {
     RestoreStackVisitor(Thread* thread_in, uintptr_t instrumentation_exit_pc,
                         Instrumentation* instrumentation)
         : StackVisitor(thread_in, nullptr), thread_(thread_in),
@@ -315,7 +309,7 @@
           instrumentation_stack_(thread_in->GetInstrumentationStack()),
           frames_removed_(0) {}
 
-    virtual bool VisitFrame() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    bool VisitFrame() OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
       if (instrumentation_stack_->size() == 0) {
         return false;  // Stop.
       }
@@ -390,25 +384,29 @@
   }
 }
 
+static bool HasEvent(Instrumentation::InstrumentationEvent expected, uint32_t events) {
+  return (events & expected) != 0;
+}
+
 void Instrumentation::AddListener(InstrumentationListener* listener, uint32_t events) {
   Locks::mutator_lock_->AssertExclusiveHeld(Thread::Current());
-  if ((events & kMethodEntered) != 0) {
+  if (HasEvent(kMethodEntered, events)) {
     method_entry_listeners_.push_back(listener);
     have_method_entry_listeners_ = true;
   }
-  if ((events & kMethodExited) != 0) {
+  if (HasEvent(kMethodExited, events)) {
     method_exit_listeners_.push_back(listener);
     have_method_exit_listeners_ = true;
   }
-  if ((events & kMethodUnwind) != 0) {
+  if (HasEvent(kMethodUnwind, events)) {
     method_unwind_listeners_.push_back(listener);
     have_method_unwind_listeners_ = true;
   }
-  if ((events & kBackwardBranch) != 0) {
+  if (HasEvent(kBackwardBranch, events)) {
     backward_branch_listeners_.push_back(listener);
     have_backward_branch_listeners_ = true;
   }
-  if ((events & kDexPcMoved) != 0) {
+  if (HasEvent(kDexPcMoved, events)) {
     std::list<InstrumentationListener*>* modified;
     if (have_dex_pc_listeners_) {
       modified = new std::list<InstrumentationListener*>(*dex_pc_listeners_.get());
@@ -419,7 +417,7 @@
     dex_pc_listeners_.reset(modified);
     have_dex_pc_listeners_ = true;
   }
-  if ((events & kFieldRead) != 0) {
+  if (HasEvent(kFieldRead, events)) {
     std::list<InstrumentationListener*>* modified;
     if (have_field_read_listeners_) {
       modified = new std::list<InstrumentationListener*>(*field_read_listeners_.get());
@@ -430,7 +428,7 @@
     field_read_listeners_.reset(modified);
     have_field_read_listeners_ = true;
   }
-  if ((events & kFieldWritten) != 0) {
+  if (HasEvent(kFieldWritten, events)) {
     std::list<InstrumentationListener*>* modified;
     if (have_field_write_listeners_) {
       modified = new std::list<InstrumentationListener*>(*field_write_listeners_.get());
@@ -441,7 +439,7 @@
     field_write_listeners_.reset(modified);
     have_field_write_listeners_ = true;
   }
-  if ((events & kExceptionCaught) != 0) {
+  if (HasEvent(kExceptionCaught, events)) {
     std::list<InstrumentationListener*>* modified;
     if (have_exception_caught_listeners_) {
       modified = new std::list<InstrumentationListener*>(*exception_caught_listeners_.get());
@@ -458,102 +456,104 @@
 void Instrumentation::RemoveListener(InstrumentationListener* listener, uint32_t events) {
   Locks::mutator_lock_->AssertExclusiveHeld(Thread::Current());
 
-  if ((events & kMethodEntered) != 0) {
-    if (have_method_entry_listeners_) {
-      method_entry_listeners_.remove(listener);
-      have_method_entry_listeners_ = !method_entry_listeners_.empty();
-    }
+  if (HasEvent(kMethodEntered, events) && have_method_entry_listeners_) {
+    method_entry_listeners_.remove(listener);
+    have_method_entry_listeners_ = !method_entry_listeners_.empty();
   }
-  if ((events & kMethodExited) != 0) {
-    if (have_method_exit_listeners_) {
-      method_exit_listeners_.remove(listener);
-      have_method_exit_listeners_ = !method_exit_listeners_.empty();
-    }
+  if (HasEvent(kMethodExited, events) && have_method_exit_listeners_) {
+    method_exit_listeners_.remove(listener);
+    have_method_exit_listeners_ = !method_exit_listeners_.empty();
   }
-  if ((events & kMethodUnwind) != 0) {
-    if (have_method_unwind_listeners_) {
+  if (HasEvent(kMethodUnwind, events) && have_method_unwind_listeners_) {
       method_unwind_listeners_.remove(listener);
       have_method_unwind_listeners_ = !method_unwind_listeners_.empty();
-    }
   }
-  if ((events & kDexPcMoved) != 0) {
+  if (HasEvent(kBackwardBranch, events) && have_backward_branch_listeners_) {
+      backward_branch_listeners_.remove(listener);
+      have_backward_branch_listeners_ = !backward_branch_listeners_.empty();
+    }
+  if (HasEvent(kDexPcMoved, events) && have_dex_pc_listeners_) {
+    std::list<InstrumentationListener*>* modified =
+        new std::list<InstrumentationListener*>(*dex_pc_listeners_.get());
+    modified->remove(listener);
+    have_dex_pc_listeners_ = !modified->empty();
     if (have_dex_pc_listeners_) {
-      std::list<InstrumentationListener*>* modified =
-          new std::list<InstrumentationListener*>(*dex_pc_listeners_.get());
-      modified->remove(listener);
-      have_dex_pc_listeners_ = !modified->empty();
-      if (have_dex_pc_listeners_) {
-        dex_pc_listeners_.reset(modified);
-      } else {
-        dex_pc_listeners_.reset();
-        delete modified;
-      }
+      dex_pc_listeners_.reset(modified);
+    } else {
+      dex_pc_listeners_.reset();
+      delete modified;
     }
   }
-  if ((events & kFieldRead) != 0) {
+  if (HasEvent(kFieldRead, events) && have_field_read_listeners_) {
+    std::list<InstrumentationListener*>* modified =
+        new std::list<InstrumentationListener*>(*field_read_listeners_.get());
+    modified->remove(listener);
+    have_field_read_listeners_ = !modified->empty();
     if (have_field_read_listeners_) {
-      std::list<InstrumentationListener*>* modified =
-          new std::list<InstrumentationListener*>(*field_read_listeners_.get());
-      modified->remove(listener);
-      have_field_read_listeners_ = !modified->empty();
-      if (have_field_read_listeners_) {
-        field_read_listeners_.reset(modified);
-      } else {
-        field_read_listeners_.reset();
-        delete modified;
-      }
+      field_read_listeners_.reset(modified);
+    } else {
+      field_read_listeners_.reset();
+      delete modified;
     }
   }
-  if ((events & kFieldWritten) != 0) {
+  if (HasEvent(kFieldWritten, events) && have_field_write_listeners_) {
+    std::list<InstrumentationListener*>* modified =
+        new std::list<InstrumentationListener*>(*field_write_listeners_.get());
+    modified->remove(listener);
+    have_field_write_listeners_ = !modified->empty();
     if (have_field_write_listeners_) {
-      std::list<InstrumentationListener*>* modified =
-          new std::list<InstrumentationListener*>(*field_write_listeners_.get());
-      modified->remove(listener);
-      have_field_write_listeners_ = !modified->empty();
-      if (have_field_write_listeners_) {
-        field_write_listeners_.reset(modified);
-      } else {
-        field_write_listeners_.reset();
-        delete modified;
-      }
+      field_write_listeners_.reset(modified);
+    } else {
+      field_write_listeners_.reset();
+      delete modified;
     }
   }
-  if ((events & kExceptionCaught) != 0) {
+  if (HasEvent(kExceptionCaught, events) && have_exception_caught_listeners_) {
+    std::list<InstrumentationListener*>* modified =
+        new std::list<InstrumentationListener*>(*exception_caught_listeners_.get());
+    modified->remove(listener);
+    have_exception_caught_listeners_ = !modified->empty();
     if (have_exception_caught_listeners_) {
-      std::list<InstrumentationListener*>* modified =
-          new std::list<InstrumentationListener*>(*exception_caught_listeners_.get());
-      modified->remove(listener);
-      have_exception_caught_listeners_ = !modified->empty();
-      if (have_exception_caught_listeners_) {
-        exception_caught_listeners_.reset(modified);
-      } else {
-        exception_caught_listeners_.reset();
-        delete modified;
-      }
+      exception_caught_listeners_.reset(modified);
+    } else {
+      exception_caught_listeners_.reset();
+      delete modified;
     }
   }
   UpdateInterpreterHandlerTable();
 }
 
-void Instrumentation::ConfigureStubs(bool require_entry_exit_stubs, bool require_interpreter) {
-  interpret_only_ = require_interpreter || forced_interpret_only_;
-  // Compute what level of instrumentation is required and compare to current.
-  int desired_level, current_level;
-  if (require_interpreter) {
-    desired_level = 2;
-  } else if (require_entry_exit_stubs) {
-    desired_level = 1;
-  } else {
-    desired_level = 0;
-  }
+Instrumentation::InstrumentationLevel Instrumentation::GetCurrentInstrumentationLevel() const {
   if (interpreter_stubs_installed_) {
-    current_level = 2;
+    return InstrumentationLevel::kInstrumentWithInterpreter;
   } else if (entry_exit_stubs_installed_) {
-    current_level = 1;
+    return InstrumentationLevel::kInstrumentWithInstrumentationStubs;
   } else {
-    current_level = 0;
+    return InstrumentationLevel::kInstrumentNothing;
   }
-  if (desired_level == current_level) {
+}
+
+void Instrumentation::ConfigureStubs(const char* key, InstrumentationLevel desired_level) {
+  // Store the instrumentation level for this key or remove it.
+  if (desired_level == InstrumentationLevel::kInstrumentNothing) {
+    // The client no longer needs instrumentation.
+    requested_instrumentation_levels_.erase(key);
+  } else {
+    // The client needs instrumentation.
+    requested_instrumentation_levels_.Overwrite(key, desired_level);
+  }
+
+  // Look for the highest required instrumentation level.
+  InstrumentationLevel requested_level = InstrumentationLevel::kInstrumentNothing;
+  for (const auto& v : requested_instrumentation_levels_) {
+    requested_level = std::max(requested_level, v.second);
+  }
+
+  interpret_only_ = (requested_level == InstrumentationLevel::kInstrumentWithInterpreter) ||
+                    forced_interpret_only_;
+
+  InstrumentationLevel current_level = GetCurrentInstrumentationLevel();
+  if (requested_level == current_level) {
     // We're already set.
     return;
   }
@@ -561,12 +561,14 @@
   Runtime* runtime = Runtime::Current();
   Locks::mutator_lock_->AssertExclusiveHeld(self);
   Locks::thread_list_lock_->AssertNotHeld(self);
-  if (desired_level > 0) {
-    if (require_interpreter) {
+  if (requested_level > InstrumentationLevel::kInstrumentNothing) {
+    if (requested_level == InstrumentationLevel::kInstrumentWithInterpreter) {
       interpreter_stubs_installed_ = true;
-    } else {
-      CHECK(require_entry_exit_stubs);
       entry_exit_stubs_installed_ = true;
+    } else {
+      CHECK_EQ(requested_level, InstrumentationLevel::kInstrumentWithInstrumentationStubs);
+      entry_exit_stubs_installed_ = true;
+      interpreter_stubs_installed_ = false;
     }
     runtime->GetClassLinker()->VisitClasses(InstallStubsClassVisitor, this);
     instrumentation_stubs_installed_ = true;
@@ -590,8 +592,7 @@
   }
 }
 
-static void ResetQuickAllocEntryPointsForThread(Thread* thread, void* arg) {
-  UNUSED(arg);
+static void ResetQuickAllocEntryPointsForThread(Thread* thread, void* arg ATTRIBUTE_UNUSED) {
   thread->ResetQuickAllocEntryPointsForThread();
 }
 
@@ -804,11 +805,11 @@
   deoptimization_enabled_ = true;
 }
 
-void Instrumentation::DisableDeoptimization() {
+void Instrumentation::DisableDeoptimization(const char* key) {
   CHECK_EQ(deoptimization_enabled_, true);
   // If we deoptimized everything, undo it.
   if (interpreter_stubs_installed_) {
-    UndeoptimizeEverything();
+    UndeoptimizeEverything(key);
   }
   // Undeoptimized selected methods.
   while (true) {
@@ -828,25 +829,35 @@
 
 // Indicates if instrumentation should notify method enter/exit events to the listeners.
 bool Instrumentation::ShouldNotifyMethodEnterExitEvents() const {
+  if (!HasMethodEntryListeners() && !HasMethodExitListeners()) {
+    return false;
+  }
   return !deoptimization_enabled_ && !interpreter_stubs_installed_;
 }
 
-void Instrumentation::DeoptimizeEverything() {
-  CHECK(!interpreter_stubs_installed_);
-  ConfigureStubs(false, true);
+void Instrumentation::DeoptimizeEverything(const char* key) {
+  CHECK(deoptimization_enabled_);
+  ConfigureStubs(key, InstrumentationLevel::kInstrumentWithInterpreter);
 }
 
-void Instrumentation::UndeoptimizeEverything() {
+void Instrumentation::UndeoptimizeEverything(const char* key) {
   CHECK(interpreter_stubs_installed_);
-  ConfigureStubs(false, false);
+  CHECK(deoptimization_enabled_);
+  ConfigureStubs(key, InstrumentationLevel::kInstrumentNothing);
 }
 
-void Instrumentation::EnableMethodTracing(bool require_interpreter) {
-  ConfigureStubs(!require_interpreter, require_interpreter);
+void Instrumentation::EnableMethodTracing(const char* key, bool needs_interpreter) {
+  InstrumentationLevel level;
+  if (needs_interpreter) {
+    level = InstrumentationLevel::kInstrumentWithInterpreter;
+  } else {
+    level = InstrumentationLevel::kInstrumentWithInstrumentationStubs;
+  }
+  ConfigureStubs(key, level);
 }
 
-void Instrumentation::DisableMethodTracing() {
-  ConfigureStubs(false, false);
+void Instrumentation::DisableMethodTracing(const char* key) {
+  ConfigureStubs(key, InstrumentationLevel::kInstrumentNothing);
 }
 
 const void* Instrumentation::GetQuickCodeFor(mirror::ArtMethod* method, size_t pointer_size) const {
@@ -896,7 +907,7 @@
 void Instrumentation::MethodUnwindEvent(Thread* thread, mirror::Object* this_object,
                                         mirror::ArtMethod* method,
                                         uint32_t dex_pc) const {
-  if (have_method_unwind_listeners_) {
+  if (HasMethodUnwindListeners()) {
     for (InstrumentationListener* listener : method_unwind_listeners_) {
       listener->MethodUnwind(thread, this_object, method, dex_pc);
     }
@@ -906,11 +917,9 @@
 void Instrumentation::DexPcMovedEventImpl(Thread* thread, mirror::Object* this_object,
                                           mirror::ArtMethod* method,
                                           uint32_t dex_pc) const {
-  if (HasDexPcListeners()) {
-    std::shared_ptr<std::list<InstrumentationListener*>> original(dex_pc_listeners_);
-    for (InstrumentationListener* listener : *original.get()) {
-      listener->DexPcMoved(thread, this_object, method, dex_pc);
-    }
+  std::shared_ptr<std::list<InstrumentationListener*>> original(dex_pc_listeners_);
+  for (InstrumentationListener* listener : *original.get()) {
+    listener->DexPcMoved(thread, this_object, method, dex_pc);
   }
 }
 
@@ -924,22 +933,18 @@
 void Instrumentation::FieldReadEventImpl(Thread* thread, mirror::Object* this_object,
                                          mirror::ArtMethod* method, uint32_t dex_pc,
                                          ArtField* field) const {
-  if (HasFieldReadListeners()) {
-    std::shared_ptr<std::list<InstrumentationListener*>> original(field_read_listeners_);
-    for (InstrumentationListener* listener : *original.get()) {
-      listener->FieldRead(thread, this_object, method, dex_pc, field);
-    }
+  std::shared_ptr<std::list<InstrumentationListener*>> original(field_read_listeners_);
+  for (InstrumentationListener* listener : *original.get()) {
+    listener->FieldRead(thread, this_object, method, dex_pc, field);
   }
 }
 
 void Instrumentation::FieldWriteEventImpl(Thread* thread, mirror::Object* this_object,
                                          mirror::ArtMethod* method, uint32_t dex_pc,
                                          ArtField* field, const JValue& field_value) const {
-  if (HasFieldWriteListeners()) {
-    std::shared_ptr<std::list<InstrumentationListener*>> original(field_write_listeners_);
-    for (InstrumentationListener* listener : *original.get()) {
-      listener->FieldWritten(thread, this_object, method, dex_pc, field, field_value);
-    }
+  std::shared_ptr<std::list<InstrumentationListener*>> original(field_write_listeners_);
+  for (InstrumentationListener* listener : *original.get()) {
+    listener->FieldWritten(thread, this_object, method, dex_pc, field, field_value);
   }
 }
 
diff --git a/runtime/instrumentation.h b/runtime/instrumentation.h
index 8b7fcca..7d70d21 100644
--- a/runtime/instrumentation.h
+++ b/runtime/instrumentation.h
@@ -22,11 +22,10 @@
 #include <map>
 
 #include "arch/instruction_set.h"
-#include "atomic.h"
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "gc_root.h"
-#include "object_callbacks.h"
+#include "safe_map.h"
 
 namespace art {
 namespace mirror {
@@ -67,8 +66,6 @@
                              uint32_t dex_pc) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) = 0;
 
   // Call-back for when a method is exited.
-  // TODO: its likely passing the return value would be useful, however, we may need to get and
-  //       parse the shorty to determine what kind of register holds the result.
   virtual void MethodExited(Thread* thread, mirror::Object* this_object,
                             mirror::ArtMethod* method, uint32_t dex_pc,
                             const JValue& return_value)
@@ -119,6 +116,12 @@
     kBackwardBranch = 0x80,
   };
 
+  enum class InstrumentationLevel {
+    kInstrumentNothing,                   // execute without instrumentation
+    kInstrumentWithInstrumentationStubs,  // execute with instrumentation entry/exit stubs
+    kInstrumentWithInterpreter            // execute with interpreter
+  };
+
   Instrumentation();
 
   // Add a listener to be notified of the masked together sent of instrumentation events. This
@@ -138,7 +141,7 @@
   void EnableDeoptimization()
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
       LOCKS_EXCLUDED(deoptimized_methods_lock_);
-  void DisableDeoptimization()
+  void DisableDeoptimization(const char* key)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
       LOCKS_EXCLUDED(deoptimized_methods_lock_);
   bool AreAllMethodsDeoptimized() const {
@@ -147,12 +150,12 @@
   bool ShouldNotifyMethodEnterExitEvents() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Executes everything with interpreter.
-  void DeoptimizeEverything()
+  void DeoptimizeEverything(const char* key)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
       LOCKS_EXCLUDED(Locks::thread_list_lock_, Locks::classlinker_classes_lock_);
 
   // Executes everything with compiled code (or interpreter if there is no code).
-  void UndeoptimizeEverything()
+  void UndeoptimizeEverything(const char* key)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
       LOCKS_EXCLUDED(Locks::thread_list_lock_, Locks::classlinker_classes_lock_);
 
@@ -170,18 +173,19 @@
       LOCKS_EXCLUDED(Locks::thread_list_lock_, deoptimized_methods_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  // Indicates whether the method has been deoptimized so it is executed with the interpreter.
   bool IsDeoptimized(mirror::ArtMethod* method)
       LOCKS_EXCLUDED(deoptimized_methods_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  // Enable method tracing by installing instrumentation entry/exit stubs.
-  void EnableMethodTracing(
-      bool require_interpreter = kDeoptimizeForAccurateMethodEntryExitListeners)
+  // Enable method tracing by installing instrumentation entry/exit stubs or interpreter.
+  void EnableMethodTracing(const char* key,
+                           bool needs_interpreter = kDeoptimizeForAccurateMethodEntryExitListeners)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
       LOCKS_EXCLUDED(Locks::thread_list_lock_, Locks::classlinker_classes_lock_);
 
-  // Disable method tracing by uninstalling instrumentation entry/exit stubs.
-  void DisableMethodTracing()
+  // Disable method tracing by uninstalling instrumentation entry/exit stubs or interpreter.
+  void DisableMethodTracing(const char* key)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
       LOCKS_EXCLUDED(Locks::thread_list_lock_, Locks::classlinker_classes_lock_);
 
@@ -236,6 +240,10 @@
     return have_method_exit_listeners_;
   }
 
+  bool HasMethodUnwindListeners() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return have_method_unwind_listeners_;
+  }
+
   bool HasDexPcListeners() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return have_dex_pc_listeners_;
   }
@@ -355,8 +363,14 @@
       LOCKS_EXCLUDED(deoptimized_methods_lock_);
 
  private:
+  InstrumentationLevel GetCurrentInstrumentationLevel() const;
+
   // Does the job of installing or removing instrumentation code within methods.
-  void ConfigureStubs(bool require_entry_exit_stubs, bool require_interpreter)
+  // In order to support multiple clients using instrumentation at the same time,
+  // the caller must pass a unique key (a string) identifying it so we remind which
+  // instrumentation level it needs. Therefore the current instrumentation level
+  // becomes the highest instrumentation level required by a client.
+  void ConfigureStubs(const char* key, InstrumentationLevel desired_instrumentation_level)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
       LOCKS_EXCLUDED(Locks::thread_list_lock_, Locks::classlinker_classes_lock_,
                      deoptimized_methods_lock_);
@@ -452,6 +466,11 @@
   // Do we have any backward branch listeners? Short-cut to avoid taking the instrumentation_lock_.
   bool have_backward_branch_listeners_ GUARDED_BY(Locks::mutator_lock_);
 
+  // Contains the instrumentation level required by each client of the instrumentation identified
+  // by a string key.
+  typedef SafeMap<const char*, InstrumentationLevel> InstrumentationLevelTable;
+  InstrumentationLevelTable requested_instrumentation_levels_ GUARDED_BY(Locks::mutator_lock_);
+
   // The event listeners, written to with the mutator_lock_ exclusively held.
   std::list<InstrumentationListener*> method_entry_listeners_ GUARDED_BY(Locks::mutator_lock_);
   std::list<InstrumentationListener*> method_exit_listeners_ GUARDED_BY(Locks::mutator_lock_);
@@ -481,9 +500,12 @@
   size_t quick_alloc_entry_points_instrumentation_counter_
       GUARDED_BY(Locks::instrument_entrypoints_lock_);
 
+  friend class InstrumentationTest;  // For GetCurrentInstrumentationLevel and ConfigureStubs.
+
   DISALLOW_COPY_AND_ASSIGN(Instrumentation);
 };
 std::ostream& operator<<(std::ostream& os, const Instrumentation::InstrumentationEvent& rhs);
+std::ostream& operator<<(std::ostream& os, const Instrumentation::InstrumentationLevel& rhs);
 
 // An element in the instrumentation side stack maintained in art::Thread.
 struct InstrumentationStackFrame {
diff --git a/runtime/instrumentation_test.cc b/runtime/instrumentation_test.cc
new file mode 100644
index 0000000..5afacb8
--- /dev/null
+++ b/runtime/instrumentation_test.cc
@@ -0,0 +1,791 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "instrumentation.h"
+
+#include "common_runtime_test.h"
+#include "common_throws.h"
+#include "class_linker-inl.h"
+#include "dex_file.h"
+#include "handle_scope-inl.h"
+#include "jvalue.h"
+#include "runtime.h"
+#include "scoped_thread_state_change.h"
+#include "thread_list.h"
+#include "thread-inl.h"
+
+namespace art {
+namespace instrumentation {
+
+class TestInstrumentationListener FINAL : public instrumentation::InstrumentationListener {
+ public:
+  TestInstrumentationListener()
+    : received_method_enter_event(false), received_method_exit_event(false),
+      received_method_unwind_event(false), received_dex_pc_moved_event(false),
+      received_field_read_event(false), received_field_written_event(false),
+      received_exception_caught_event(false), received_backward_branch_event(false) {}
+
+  virtual ~TestInstrumentationListener() {}
+
+  void MethodEntered(Thread* thread ATTRIBUTE_UNUSED,
+                     mirror::Object* this_object ATTRIBUTE_UNUSED,
+                     mirror::ArtMethod* method ATTRIBUTE_UNUSED,
+                     uint32_t dex_pc ATTRIBUTE_UNUSED)
+      OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    received_method_enter_event = true;
+  }
+
+  void MethodExited(Thread* thread ATTRIBUTE_UNUSED,
+                    mirror::Object* this_object ATTRIBUTE_UNUSED,
+                    mirror::ArtMethod* method ATTRIBUTE_UNUSED,
+                    uint32_t dex_pc ATTRIBUTE_UNUSED,
+                    const JValue& return_value ATTRIBUTE_UNUSED)
+      OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    received_method_exit_event = true;
+  }
+
+  void MethodUnwind(Thread* thread ATTRIBUTE_UNUSED,
+                    mirror::Object* this_object ATTRIBUTE_UNUSED,
+                    mirror::ArtMethod* method ATTRIBUTE_UNUSED,
+                    uint32_t dex_pc ATTRIBUTE_UNUSED)
+      OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    received_method_unwind_event = true;
+  }
+
+  void DexPcMoved(Thread* thread ATTRIBUTE_UNUSED,
+                  mirror::Object* this_object ATTRIBUTE_UNUSED,
+                  mirror::ArtMethod* method ATTRIBUTE_UNUSED,
+                  uint32_t new_dex_pc ATTRIBUTE_UNUSED)
+      OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    received_dex_pc_moved_event = true;
+  }
+
+  void FieldRead(Thread* thread ATTRIBUTE_UNUSED,
+                 mirror::Object* this_object ATTRIBUTE_UNUSED,
+                 mirror::ArtMethod* method ATTRIBUTE_UNUSED,
+                 uint32_t dex_pc ATTRIBUTE_UNUSED,
+                 ArtField* field ATTRIBUTE_UNUSED)
+      OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    received_field_read_event = true;
+  }
+
+  void FieldWritten(Thread* thread ATTRIBUTE_UNUSED,
+                    mirror::Object* this_object ATTRIBUTE_UNUSED,
+                    mirror::ArtMethod* method ATTRIBUTE_UNUSED,
+                    uint32_t dex_pc ATTRIBUTE_UNUSED,
+                    ArtField* field ATTRIBUTE_UNUSED,
+                    const JValue& field_value ATTRIBUTE_UNUSED)
+      OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    received_field_written_event = true;
+  }
+
+  void ExceptionCaught(Thread* thread ATTRIBUTE_UNUSED,
+                       mirror::Throwable* exception_object ATTRIBUTE_UNUSED)
+      OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    received_exception_caught_event = true;
+  }
+
+  void BackwardBranch(Thread* thread ATTRIBUTE_UNUSED,
+                      mirror::ArtMethod* method ATTRIBUTE_UNUSED,
+                      int32_t dex_pc_offset ATTRIBUTE_UNUSED)
+      OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    received_backward_branch_event = true;
+  }
+
+  void Reset() {
+    received_method_enter_event = false;
+    received_method_exit_event = false;
+    received_method_unwind_event = false;
+    received_dex_pc_moved_event = false;
+    received_field_read_event = false;
+    received_field_written_event = false;
+    received_exception_caught_event = false;
+    received_backward_branch_event = false;
+  }
+
+  bool received_method_enter_event;
+  bool received_method_exit_event;
+  bool received_method_unwind_event;
+  bool received_dex_pc_moved_event;
+  bool received_field_read_event;
+  bool received_field_written_event;
+  bool received_exception_caught_event;
+  bool received_backward_branch_event;
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(TestInstrumentationListener);
+};
+
+class InstrumentationTest : public CommonRuntimeTest {
+ public:
+  // Unique keys used to test Instrumentation::ConfigureStubs.
+  static constexpr const char* kClientOneKey = "TestClient1";
+  static constexpr const char* kClientTwoKey = "TestClient2";
+
+  void CheckConfigureStubs(const char* key, Instrumentation::InstrumentationLevel level) {
+    ScopedObjectAccess soa(Thread::Current());
+    instrumentation::Instrumentation* instr = Runtime::Current()->GetInstrumentation();
+    {
+      soa.Self()->TransitionFromRunnableToSuspended(kSuspended);
+      Runtime* runtime = Runtime::Current();
+      runtime->GetThreadList()->SuspendAll("Instrumentation::ConfigureStubs");
+      instr->ConfigureStubs(key, level);
+      runtime->GetThreadList()->ResumeAll();
+      soa.Self()->TransitionFromSuspendedToRunnable();
+    }
+  }
+
+  Instrumentation::InstrumentationLevel GetCurrentInstrumentationLevel() {
+    return Runtime::Current()->GetInstrumentation()->GetCurrentInstrumentationLevel();
+  }
+
+  size_t GetInstrumentationUserCount() {
+    ScopedObjectAccess soa(Thread::Current());
+    return Runtime::Current()->GetInstrumentation()->requested_instrumentation_levels_.size();
+  }
+
+  void TestEvent(uint32_t instrumentation_event) {
+    ScopedObjectAccess soa(Thread::Current());
+    instrumentation::Instrumentation* instr = Runtime::Current()->GetInstrumentation();
+    TestInstrumentationListener listener;
+    {
+      soa.Self()->TransitionFromRunnableToSuspended(kSuspended);
+      Runtime* runtime = Runtime::Current();
+      runtime->GetThreadList()->SuspendAll("Add instrumentation listener");
+      instr->AddListener(&listener, instrumentation_event);
+      runtime->GetThreadList()->ResumeAll();
+      soa.Self()->TransitionFromSuspendedToRunnable();
+    }
+
+    mirror::ArtMethod* const event_method = nullptr;
+    mirror::Object* const event_obj = nullptr;
+    const uint32_t event_dex_pc = 0;
+
+    // Check the listener is registered and is notified of the event.
+    EXPECT_TRUE(HasEventListener(instr, instrumentation_event));
+    EXPECT_FALSE(DidListenerReceiveEvent(listener, instrumentation_event));
+    ReportEvent(instr, instrumentation_event, soa.Self(), event_method, event_obj, event_dex_pc);
+    EXPECT_TRUE(DidListenerReceiveEvent(listener, instrumentation_event));
+
+    listener.Reset();
+    {
+      soa.Self()->TransitionFromRunnableToSuspended(kSuspended);
+      Runtime* runtime = Runtime::Current();
+      runtime->GetThreadList()->SuspendAll("Remove instrumentation listener");
+      instr->RemoveListener(&listener, instrumentation_event);
+      runtime->GetThreadList()->ResumeAll();
+      soa.Self()->TransitionFromSuspendedToRunnable();
+    }
+
+    // Check the listener is not registered and is not notified of the event.
+    EXPECT_FALSE(HasEventListener(instr, instrumentation_event));
+    EXPECT_FALSE(DidListenerReceiveEvent(listener, instrumentation_event));
+    ReportEvent(instr, instrumentation_event, soa.Self(), event_method, event_obj, event_dex_pc);
+    EXPECT_FALSE(DidListenerReceiveEvent(listener, instrumentation_event));
+  }
+
+  void DeoptimizeMethod(Thread* self, Handle<mirror::ArtMethod> method,
+                        bool enable_deoptimization)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    Runtime* runtime = Runtime::Current();
+    instrumentation::Instrumentation* instrumentation = runtime->GetInstrumentation();
+    self->TransitionFromRunnableToSuspended(kSuspended);
+    runtime->GetThreadList()->SuspendAll("Single method deoptimization");
+    if (enable_deoptimization) {
+      instrumentation->EnableDeoptimization();
+    }
+    instrumentation->Deoptimize(method.Get());
+    runtime->GetThreadList()->ResumeAll();
+    self->TransitionFromSuspendedToRunnable();
+  }
+
+  void UndeoptimizeMethod(Thread* self, Handle<mirror::ArtMethod> method,
+                          const char* key, bool disable_deoptimization)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    Runtime* runtime = Runtime::Current();
+    instrumentation::Instrumentation* instrumentation = runtime->GetInstrumentation();
+    self->TransitionFromRunnableToSuspended(kSuspended);
+    runtime->GetThreadList()->SuspendAll("Single method undeoptimization");
+    instrumentation->Undeoptimize(method.Get());
+    if (disable_deoptimization) {
+      instrumentation->DisableDeoptimization(key);
+    }
+    runtime->GetThreadList()->ResumeAll();
+    self->TransitionFromSuspendedToRunnable();
+  }
+
+  void DeoptimizeEverything(Thread* self, const char* key, bool enable_deoptimization)
+        SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    Runtime* runtime = Runtime::Current();
+    instrumentation::Instrumentation* instrumentation = runtime->GetInstrumentation();
+    self->TransitionFromRunnableToSuspended(kSuspended);
+    runtime->GetThreadList()->SuspendAll("Full deoptimization");
+    if (enable_deoptimization) {
+      instrumentation->EnableDeoptimization();
+    }
+    instrumentation->DeoptimizeEverything(key);
+    runtime->GetThreadList()->ResumeAll();
+    self->TransitionFromSuspendedToRunnable();
+  }
+
+  void UndeoptimizeEverything(Thread* self, const char* key, bool disable_deoptimization)
+        SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    Runtime* runtime = Runtime::Current();
+    instrumentation::Instrumentation* instrumentation = runtime->GetInstrumentation();
+    self->TransitionFromRunnableToSuspended(kSuspended);
+    runtime->GetThreadList()->SuspendAll("Full undeoptimization");
+    instrumentation->UndeoptimizeEverything(key);
+    if (disable_deoptimization) {
+      instrumentation->DisableDeoptimization(key);
+    }
+    runtime->GetThreadList()->ResumeAll();
+    self->TransitionFromSuspendedToRunnable();
+  }
+
+  void EnableMethodTracing(Thread* self, const char* key, bool needs_interpreter)
+        SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    Runtime* runtime = Runtime::Current();
+    instrumentation::Instrumentation* instrumentation = runtime->GetInstrumentation();
+    self->TransitionFromRunnableToSuspended(kSuspended);
+    runtime->GetThreadList()->SuspendAll("EnableMethodTracing");
+    instrumentation->EnableMethodTracing(key, needs_interpreter);
+    runtime->GetThreadList()->ResumeAll();
+    self->TransitionFromSuspendedToRunnable();
+  }
+
+  void DisableMethodTracing(Thread* self, const char* key)
+        SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    Runtime* runtime = Runtime::Current();
+    instrumentation::Instrumentation* instrumentation = runtime->GetInstrumentation();
+    self->TransitionFromRunnableToSuspended(kSuspended);
+    runtime->GetThreadList()->SuspendAll("EnableMethodTracing");
+    instrumentation->DisableMethodTracing(key);
+    runtime->GetThreadList()->ResumeAll();
+    self->TransitionFromSuspendedToRunnable();
+  }
+
+ private:
+  static bool HasEventListener(const instrumentation::Instrumentation* instr, uint32_t event_type)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    switch (event_type) {
+      case instrumentation::Instrumentation::kMethodEntered:
+        return instr->HasMethodEntryListeners();
+      case instrumentation::Instrumentation::kMethodExited:
+        return instr->HasMethodExitListeners();
+      case instrumentation::Instrumentation::kMethodUnwind:
+        return instr->HasMethodUnwindListeners();
+      case instrumentation::Instrumentation::kDexPcMoved:
+        return instr->HasDexPcListeners();
+      case instrumentation::Instrumentation::kFieldRead:
+        return instr->HasFieldReadListeners();
+      case instrumentation::Instrumentation::kFieldWritten:
+        return instr->HasFieldWriteListeners();
+      case instrumentation::Instrumentation::kExceptionCaught:
+        return instr->HasExceptionCaughtListeners();
+      case instrumentation::Instrumentation::kBackwardBranch:
+        return instr->HasBackwardBranchListeners();
+      default:
+        LOG(FATAL) << "Unknown instrumentation event " << event_type;
+        UNREACHABLE();
+    }
+  }
+
+  static void ReportEvent(const instrumentation::Instrumentation* instr, uint32_t event_type,
+                          Thread* self, mirror::ArtMethod* method, mirror::Object* obj,
+                          uint32_t dex_pc)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    switch (event_type) {
+      case instrumentation::Instrumentation::kMethodEntered:
+        instr->MethodEnterEvent(self, obj, method, dex_pc);
+        break;
+      case instrumentation::Instrumentation::kMethodExited: {
+        JValue value;
+        instr->MethodExitEvent(self, obj, method, dex_pc, value);
+        break;
+      }
+      case instrumentation::Instrumentation::kMethodUnwind:
+        instr->MethodUnwindEvent(self, obj, method, dex_pc);
+        break;
+      case instrumentation::Instrumentation::kDexPcMoved:
+        instr->DexPcMovedEvent(self, obj, method, dex_pc);
+        break;
+      case instrumentation::Instrumentation::kFieldRead:
+        instr->FieldReadEvent(self, obj, method, dex_pc, nullptr);
+        break;
+      case instrumentation::Instrumentation::kFieldWritten: {
+        JValue value;
+        instr->FieldWriteEvent(self, obj, method, dex_pc, nullptr, value);
+        break;
+      }
+      case instrumentation::Instrumentation::kExceptionCaught: {
+        ThrowArithmeticExceptionDivideByZero();
+        mirror::Throwable* event_exception = self->GetException();
+        instr->ExceptionCaughtEvent(self, event_exception);
+        self->ClearException();
+        break;
+      }
+      case instrumentation::Instrumentation::kBackwardBranch:
+        instr->BackwardBranch(self, method, dex_pc);
+        break;
+      default:
+        LOG(FATAL) << "Unknown instrumentation event " << event_type;
+        UNREACHABLE();
+    }
+  }
+
+  static bool DidListenerReceiveEvent(const TestInstrumentationListener& listener,
+                                      uint32_t event_type) {
+    switch (event_type) {
+      case instrumentation::Instrumentation::kMethodEntered:
+        return listener.received_method_enter_event;
+      case instrumentation::Instrumentation::kMethodExited:
+        return listener.received_method_exit_event;
+      case instrumentation::Instrumentation::kMethodUnwind:
+        return listener.received_method_unwind_event;
+      case instrumentation::Instrumentation::kDexPcMoved:
+        return listener.received_dex_pc_moved_event;
+      case instrumentation::Instrumentation::kFieldRead:
+        return listener.received_field_read_event;
+      case instrumentation::Instrumentation::kFieldWritten:
+        return listener.received_field_written_event;
+      case instrumentation::Instrumentation::kExceptionCaught:
+        return listener.received_exception_caught_event;
+      case instrumentation::Instrumentation::kBackwardBranch:
+        return listener.received_backward_branch_event;
+      default:
+        LOG(FATAL) << "Unknown instrumentation event " << event_type;
+        UNREACHABLE();
+    }
+  }
+};
+
+TEST_F(InstrumentationTest, NoInstrumentation) {
+  ScopedObjectAccess soa(Thread::Current());
+  instrumentation::Instrumentation* instr = Runtime::Current()->GetInstrumentation();
+  ASSERT_NE(instr, nullptr);
+
+  EXPECT_FALSE(instr->AreExitStubsInstalled());
+  EXPECT_FALSE(instr->AreAllMethodsDeoptimized());
+  EXPECT_FALSE(instr->IsActive());
+  EXPECT_FALSE(instr->ShouldNotifyMethodEnterExitEvents());
+
+  // Test interpreter table is the default one.
+  EXPECT_EQ(instrumentation::kMainHandlerTable, instr->GetInterpreterHandlerTable());
+
+  // Check there is no registered listener.
+  EXPECT_FALSE(instr->HasDexPcListeners());
+  EXPECT_FALSE(instr->HasExceptionCaughtListeners());
+  EXPECT_FALSE(instr->HasFieldReadListeners());
+  EXPECT_FALSE(instr->HasFieldWriteListeners());
+  EXPECT_FALSE(instr->HasMethodEntryListeners());
+  EXPECT_FALSE(instr->HasMethodExitListeners());
+  EXPECT_FALSE(instr->IsActive());
+}
+
+// Test instrumentation listeners for each event.
+TEST_F(InstrumentationTest, MethodEntryEvent) {
+  TestEvent(instrumentation::Instrumentation::kMethodEntered);
+}
+
+TEST_F(InstrumentationTest, MethodExitEvent) {
+  TestEvent(instrumentation::Instrumentation::kMethodExited);
+}
+
+TEST_F(InstrumentationTest, MethodUnwindEvent) {
+  TestEvent(instrumentation::Instrumentation::kMethodUnwind);
+}
+
+TEST_F(InstrumentationTest, DexPcMovedEvent) {
+  TestEvent(instrumentation::Instrumentation::kDexPcMoved);
+}
+
+TEST_F(InstrumentationTest, FieldReadEvent) {
+  TestEvent(instrumentation::Instrumentation::kFieldRead);
+}
+
+TEST_F(InstrumentationTest, FieldWriteEvent) {
+  TestEvent(instrumentation::Instrumentation::kFieldWritten);
+}
+
+TEST_F(InstrumentationTest, ExceptionCaughtEvent) {
+  TestEvent(instrumentation::Instrumentation::kExceptionCaught);
+}
+
+TEST_F(InstrumentationTest, BackwardBranchEvent) {
+  TestEvent(instrumentation::Instrumentation::kBackwardBranch);
+}
+
+TEST_F(InstrumentationTest, DeoptimizeDirectMethod) {
+  ScopedObjectAccess soa(Thread::Current());
+  jobject class_loader = LoadDex("Instrumentation");
+  Runtime* const runtime = Runtime::Current();
+  instrumentation::Instrumentation* instr = runtime->GetInstrumentation();
+  ClassLinker* class_linker = runtime->GetClassLinker();
+  StackHandleScope<2> hs(soa.Self());
+  Handle<mirror::ClassLoader> loader(hs.NewHandle(soa.Decode<mirror::ClassLoader*>(class_loader)));
+  mirror::Class* klass = class_linker->FindClass(soa.Self(), "LInstrumentation;", loader);
+  ASSERT_TRUE(klass != nullptr);
+  Handle<mirror::ArtMethod> method_to_deoptimize(
+      hs.NewHandle(klass->FindDeclaredDirectMethod("instanceMethod", "()V")));
+  ASSERT_TRUE(method_to_deoptimize.Get() != nullptr);
+
+  EXPECT_FALSE(instr->AreAllMethodsDeoptimized());
+  EXPECT_FALSE(instr->IsDeoptimized(method_to_deoptimize.Get()));
+
+  DeoptimizeMethod(soa.Self(), method_to_deoptimize, true);
+
+  EXPECT_FALSE(instr->AreAllMethodsDeoptimized());
+  EXPECT_TRUE(instr->AreExitStubsInstalled());
+  EXPECT_TRUE(instr->IsDeoptimized(method_to_deoptimize.Get()));
+
+  constexpr const char* instrumentation_key = "DeoptimizeDirectMethod";
+  UndeoptimizeMethod(soa.Self(), method_to_deoptimize, instrumentation_key, true);
+
+  EXPECT_FALSE(instr->AreAllMethodsDeoptimized());
+  EXPECT_FALSE(instr->IsDeoptimized(method_to_deoptimize.Get()));
+}
+
+TEST_F(InstrumentationTest, FullDeoptimization) {
+  ScopedObjectAccess soa(Thread::Current());
+  Runtime* const runtime = Runtime::Current();
+  instrumentation::Instrumentation* instr = runtime->GetInstrumentation();
+  EXPECT_FALSE(instr->AreAllMethodsDeoptimized());
+
+  constexpr const char* instrumentation_key = "FullDeoptimization";
+  DeoptimizeEverything(soa.Self(), instrumentation_key, true);
+
+  EXPECT_TRUE(instr->AreAllMethodsDeoptimized());
+  EXPECT_TRUE(instr->AreExitStubsInstalled());
+
+  UndeoptimizeEverything(soa.Self(), instrumentation_key, true);
+
+  EXPECT_FALSE(instr->AreAllMethodsDeoptimized());
+}
+
+TEST_F(InstrumentationTest, MixedDeoptimization) {
+  ScopedObjectAccess soa(Thread::Current());
+  jobject class_loader = LoadDex("Instrumentation");
+  Runtime* const runtime = Runtime::Current();
+  instrumentation::Instrumentation* instr = runtime->GetInstrumentation();
+  ClassLinker* class_linker = runtime->GetClassLinker();
+  StackHandleScope<2> hs(soa.Self());
+  Handle<mirror::ClassLoader> loader(hs.NewHandle(soa.Decode<mirror::ClassLoader*>(class_loader)));
+  mirror::Class* klass = class_linker->FindClass(soa.Self(), "LInstrumentation;", loader);
+  ASSERT_TRUE(klass != nullptr);
+  Handle<mirror::ArtMethod> method_to_deoptimize(
+      hs.NewHandle(klass->FindDeclaredDirectMethod("instanceMethod", "()V")));
+  ASSERT_TRUE(method_to_deoptimize.Get() != nullptr);
+
+  EXPECT_FALSE(instr->AreAllMethodsDeoptimized());
+  EXPECT_FALSE(instr->IsDeoptimized(method_to_deoptimize.Get()));
+
+  DeoptimizeMethod(soa.Self(), method_to_deoptimize, true);
+  // Deoptimizing a method does not change instrumentation level.
+  EXPECT_EQ(Instrumentation::InstrumentationLevel::kInstrumentNothing,
+            GetCurrentInstrumentationLevel());
+  EXPECT_FALSE(instr->AreAllMethodsDeoptimized());
+  EXPECT_TRUE(instr->AreExitStubsInstalled());
+  EXPECT_TRUE(instr->IsDeoptimized(method_to_deoptimize.Get()));
+
+  constexpr const char* instrumentation_key = "MixedDeoptimization";
+  DeoptimizeEverything(soa.Self(), instrumentation_key, false);
+  EXPECT_EQ(Instrumentation::InstrumentationLevel::kInstrumentWithInterpreter,
+            GetCurrentInstrumentationLevel());
+  EXPECT_TRUE(instr->AreAllMethodsDeoptimized());
+  EXPECT_TRUE(instr->AreExitStubsInstalled());
+  EXPECT_TRUE(instr->IsDeoptimized(method_to_deoptimize.Get()));
+
+  UndeoptimizeEverything(soa.Self(), instrumentation_key, false);
+  EXPECT_EQ(Instrumentation::InstrumentationLevel::kInstrumentNothing,
+            GetCurrentInstrumentationLevel());
+  EXPECT_FALSE(instr->AreAllMethodsDeoptimized());
+  EXPECT_TRUE(instr->AreExitStubsInstalled());
+  EXPECT_TRUE(instr->IsDeoptimized(method_to_deoptimize.Get()));
+
+  UndeoptimizeMethod(soa.Self(), method_to_deoptimize, instrumentation_key, true);
+  EXPECT_EQ(Instrumentation::InstrumentationLevel::kInstrumentNothing,
+            GetCurrentInstrumentationLevel());
+  EXPECT_FALSE(instr->AreAllMethodsDeoptimized());
+  EXPECT_FALSE(instr->IsDeoptimized(method_to_deoptimize.Get()));
+}
+
+TEST_F(InstrumentationTest, MethodTracing_Interpreter) {
+  ScopedObjectAccess soa(Thread::Current());
+  Runtime* const runtime = Runtime::Current();
+  instrumentation::Instrumentation* instr = runtime->GetInstrumentation();
+  EXPECT_FALSE(instr->AreAllMethodsDeoptimized());
+
+  constexpr const char* instrumentation_key = "MethodTracing";
+  EnableMethodTracing(soa.Self(), instrumentation_key, true);
+  EXPECT_EQ(Instrumentation::InstrumentationLevel::kInstrumentWithInterpreter,
+            GetCurrentInstrumentationLevel());
+  EXPECT_TRUE(instr->AreAllMethodsDeoptimized());
+  EXPECT_TRUE(instr->AreExitStubsInstalled());
+
+  DisableMethodTracing(soa.Self(), instrumentation_key);
+  EXPECT_EQ(Instrumentation::InstrumentationLevel::kInstrumentNothing,
+            GetCurrentInstrumentationLevel());
+  EXPECT_FALSE(instr->AreAllMethodsDeoptimized());
+}
+
+TEST_F(InstrumentationTest, MethodTracing_InstrumentationEntryExitStubs) {
+  ScopedObjectAccess soa(Thread::Current());
+  Runtime* const runtime = Runtime::Current();
+  instrumentation::Instrumentation* instr = runtime->GetInstrumentation();
+  EXPECT_FALSE(instr->AreAllMethodsDeoptimized());
+
+  constexpr const char* instrumentation_key = "MethodTracing";
+  EnableMethodTracing(soa.Self(), instrumentation_key, false);
+  EXPECT_EQ(Instrumentation::InstrumentationLevel::kInstrumentWithInstrumentationStubs,
+            GetCurrentInstrumentationLevel());
+  EXPECT_FALSE(instr->AreAllMethodsDeoptimized());
+  EXPECT_TRUE(instr->AreExitStubsInstalled());
+
+  DisableMethodTracing(soa.Self(), instrumentation_key);
+  EXPECT_EQ(Instrumentation::InstrumentationLevel::kInstrumentNothing,
+            GetCurrentInstrumentationLevel());
+  EXPECT_FALSE(instr->AreAllMethodsDeoptimized());
+}
+
+// We use a macro to print the line number where the test is failing.
+#define CHECK_INSTRUMENTATION(_level, _user_count)                                      \
+  do {                                                                                  \
+    Instrumentation* const instr = Runtime::Current()->GetInstrumentation();            \
+    bool interpreter =                                                                  \
+      (_level == Instrumentation::InstrumentationLevel::kInstrumentWithInterpreter);    \
+    EXPECT_EQ(_level, GetCurrentInstrumentationLevel());                                \
+    EXPECT_EQ(_user_count, GetInstrumentationUserCount());                              \
+    if (instr->IsForcedInterpretOnly()) {                                               \
+      EXPECT_TRUE(instr->InterpretOnly());                                              \
+    } else if (interpreter) {                                                           \
+      EXPECT_TRUE(instr->InterpretOnly());                                              \
+    } else {                                                                            \
+      EXPECT_FALSE(instr->InterpretOnly());                                             \
+    }                                                                                   \
+    if (interpreter) {                                                                  \
+      EXPECT_TRUE(instr->AreAllMethodsDeoptimized());                                   \
+    } else {                                                                            \
+      EXPECT_FALSE(instr->AreAllMethodsDeoptimized());                                  \
+    }                                                                                   \
+  } while (false)
+
+TEST_F(InstrumentationTest, ConfigureStubs_Nothing) {
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentNothing, 0U);
+
+  // Check no-op.
+  CheckConfigureStubs(kClientOneKey, Instrumentation::InstrumentationLevel::kInstrumentNothing);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentNothing, 0U);
+}
+
+TEST_F(InstrumentationTest, ConfigureStubs_InstrumentationStubs) {
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentNothing, 0U);
+
+  // Check we can switch to instrumentation stubs
+  CheckConfigureStubs(kClientOneKey,
+                      Instrumentation::InstrumentationLevel::kInstrumentWithInstrumentationStubs);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentWithInstrumentationStubs,
+                        1U);
+
+  // Check we can disable instrumentation.
+  CheckConfigureStubs(kClientOneKey, Instrumentation::InstrumentationLevel::kInstrumentNothing);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentNothing, 0U);
+}
+
+TEST_F(InstrumentationTest, ConfigureStubs_Interpreter) {
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentNothing, 0U);
+
+  // Check we can switch to interpreter
+  CheckConfigureStubs(kClientOneKey,
+                      Instrumentation::InstrumentationLevel::kInstrumentWithInterpreter);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentWithInterpreter, 1U);
+
+  // Check we can disable instrumentation.
+  CheckConfigureStubs(kClientOneKey, Instrumentation::InstrumentationLevel::kInstrumentNothing);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentNothing, 0U);
+}
+
+TEST_F(InstrumentationTest, ConfigureStubs_InstrumentationStubsToInterpreter) {
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentNothing, 0U);
+
+  // Configure stubs with instrumentation stubs.
+  CheckConfigureStubs(kClientOneKey,
+                      Instrumentation::InstrumentationLevel::kInstrumentWithInstrumentationStubs);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentWithInstrumentationStubs,
+                        1U);
+
+  // Configure stubs with interpreter.
+  CheckConfigureStubs(kClientOneKey,
+                      Instrumentation::InstrumentationLevel::kInstrumentWithInterpreter);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentWithInterpreter, 1U);
+
+  // Check we can disable instrumentation.
+  CheckConfigureStubs(kClientOneKey, Instrumentation::InstrumentationLevel::kInstrumentNothing);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentNothing, 0U);
+}
+
+TEST_F(InstrumentationTest, ConfigureStubs_InterpreterToInstrumentationStubs) {
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentNothing, 0U);
+
+  // Configure stubs with interpreter.
+  CheckConfigureStubs(kClientOneKey,
+                      Instrumentation::InstrumentationLevel::kInstrumentWithInterpreter);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentWithInterpreter, 1U);
+
+  // Configure stubs with instrumentation stubs.
+  CheckConfigureStubs(kClientOneKey,
+                      Instrumentation::InstrumentationLevel::kInstrumentWithInstrumentationStubs);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentWithInstrumentationStubs,
+                        1U);
+
+  // Check we can disable instrumentation.
+  CheckConfigureStubs(kClientOneKey, Instrumentation::InstrumentationLevel::kInstrumentNothing);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentNothing, 0U);
+}
+
+TEST_F(InstrumentationTest,
+       ConfigureStubs_InstrumentationStubsToInterpreterToInstrumentationStubs) {
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentNothing, 0U);
+
+  // Configure stubs with instrumentation stubs.
+  CheckConfigureStubs(kClientOneKey,
+                      Instrumentation::InstrumentationLevel::kInstrumentWithInstrumentationStubs);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentWithInstrumentationStubs,
+                        1U);
+
+  // Configure stubs with interpreter.
+  CheckConfigureStubs(kClientOneKey,
+                      Instrumentation::InstrumentationLevel::kInstrumentWithInterpreter);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentWithInterpreter, 1U);
+
+  // Configure stubs with instrumentation stubs again.
+  CheckConfigureStubs(kClientOneKey,
+                      Instrumentation::InstrumentationLevel::kInstrumentWithInstrumentationStubs);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentWithInstrumentationStubs,
+                        1U);
+
+  // Check we can disable instrumentation.
+  CheckConfigureStubs(kClientOneKey, Instrumentation::InstrumentationLevel::kInstrumentNothing);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentNothing, 0U);
+}
+
+TEST_F(InstrumentationTest, MultiConfigureStubs_Nothing) {
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentNothing, 0U);
+
+  // Check kInstrumentNothing with two clients.
+  CheckConfigureStubs(kClientOneKey, Instrumentation::InstrumentationLevel::kInstrumentNothing);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentNothing, 0U);
+
+  CheckConfigureStubs(kClientTwoKey, Instrumentation::InstrumentationLevel::kInstrumentNothing);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentNothing, 0U);
+}
+
+TEST_F(InstrumentationTest, MultiConfigureStubs_InstrumentationStubs) {
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentNothing, 0U);
+
+  // Configure stubs with instrumentation stubs for 1st client.
+  CheckConfigureStubs(kClientOneKey,
+                      Instrumentation::InstrumentationLevel::kInstrumentWithInstrumentationStubs);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentWithInstrumentationStubs,
+                        1U);
+
+  // Configure stubs with instrumentation stubs for 2nd client.
+  CheckConfigureStubs(kClientTwoKey,
+                      Instrumentation::InstrumentationLevel::kInstrumentWithInstrumentationStubs);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentWithInstrumentationStubs,
+                        2U);
+
+  // 1st client requests instrumentation deactivation but 2nd client still needs
+  // instrumentation stubs.
+  CheckConfigureStubs(kClientOneKey, Instrumentation::InstrumentationLevel::kInstrumentNothing);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentWithInstrumentationStubs,
+                        1U);
+
+  // 2nd client requests instrumentation deactivation
+  CheckConfigureStubs(kClientTwoKey, Instrumentation::InstrumentationLevel::kInstrumentNothing);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentNothing, 0U);
+}
+
+TEST_F(InstrumentationTest, MultiConfigureStubs_Interpreter) {
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentNothing, 0U);
+
+  // Configure stubs with interpreter for 1st client.
+  CheckConfigureStubs(kClientOneKey,
+                      Instrumentation::InstrumentationLevel::kInstrumentWithInterpreter);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentWithInterpreter, 1U);
+
+  // Configure stubs with interpreter for 2nd client.
+  CheckConfigureStubs(kClientTwoKey,
+                      Instrumentation::InstrumentationLevel::kInstrumentWithInterpreter);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentWithInterpreter, 2U);
+
+  // 1st client requests instrumentation deactivation but 2nd client still needs interpreter.
+  CheckConfigureStubs(kClientOneKey, Instrumentation::InstrumentationLevel::kInstrumentNothing);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentWithInterpreter, 1U);
+
+  // 2nd client requests instrumentation deactivation
+  CheckConfigureStubs(kClientTwoKey, Instrumentation::InstrumentationLevel::kInstrumentNothing);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentNothing, 0U);
+}
+
+TEST_F(InstrumentationTest, MultiConfigureStubs_InstrumentationStubsThenInterpreter) {
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentNothing, 0U);
+
+  // Configure stubs with instrumentation stubs for 1st client.
+  CheckConfigureStubs(kClientOneKey,
+                      Instrumentation::InstrumentationLevel::kInstrumentWithInstrumentationStubs);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentWithInstrumentationStubs,
+                        1U);
+
+  // Configure stubs with interpreter for 2nd client.
+  CheckConfigureStubs(kClientTwoKey,
+                      Instrumentation::InstrumentationLevel::kInstrumentWithInterpreter);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentWithInterpreter, 2U);
+
+  // 1st client requests instrumentation deactivation but 2nd client still needs interpreter.
+  CheckConfigureStubs(kClientOneKey, Instrumentation::InstrumentationLevel::kInstrumentNothing);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentWithInterpreter, 1U);
+
+  // 2nd client requests instrumentation deactivation
+  CheckConfigureStubs(kClientTwoKey, Instrumentation::InstrumentationLevel::kInstrumentNothing);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentNothing, 0U);
+}
+
+TEST_F(InstrumentationTest, MultiConfigureStubs_InterpreterThenInstrumentationStubs) {
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentNothing, 0U);
+
+  // Configure stubs with interpreter for 1st client.
+  CheckConfigureStubs(kClientOneKey,
+                      Instrumentation::InstrumentationLevel::kInstrumentWithInterpreter);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentWithInterpreter, 1U);
+
+  // Configure stubs with instrumentation stubs for 2nd client.
+  CheckConfigureStubs(kClientTwoKey,
+                      Instrumentation::InstrumentationLevel::kInstrumentWithInstrumentationStubs);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentWithInterpreter, 2U);
+
+  // 1st client requests instrumentation deactivation but 2nd client still needs
+  // instrumentation stubs.
+  CheckConfigureStubs(kClientOneKey, Instrumentation::InstrumentationLevel::kInstrumentNothing);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentWithInstrumentationStubs,
+                        1U);
+
+  // 2nd client requests instrumentation deactivation
+  CheckConfigureStubs(kClientTwoKey, Instrumentation::InstrumentationLevel::kInstrumentNothing);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentNothing, 0U);
+}
+
+}  // namespace instrumentation
+}  // namespace art
diff --git a/runtime/interpreter/interpreter_goto_table_impl.cc b/runtime/interpreter/interpreter_goto_table_impl.cc
index 878efba..dd1f55e 100644
--- a/runtime/interpreter/interpreter_goto_table_impl.cc
+++ b/runtime/interpreter/interpreter_goto_table_impl.cc
@@ -156,7 +156,6 @@
   const Instruction* inst = Instruction::At(code_item->insns_ + dex_pc);
   uint16_t inst_data;
   const void* const* currentHandlersTable;
-  bool notified_method_entry_event = false;
   UPDATE_HANDLER_TABLE();
   if (LIKELY(dex_pc == 0)) {  // We are entering the method as opposed to deoptimizing.
     if (kIsDebugBuild) {
@@ -166,7 +165,6 @@
     if (UNLIKELY(instrumentation->HasMethodEntryListeners())) {
       instrumentation->MethodEnterEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                         shadow_frame.GetMethod(), 0);
-      notified_method_entry_event = true;
     }
   }
 
@@ -264,9 +262,6 @@
       instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                        shadow_frame.GetMethod(), dex_pc,
                                        result);
-    } else if (UNLIKELY(instrumentation->HasDexPcListeners())) {
-      instrumentation->DexPcMovedEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
-                                       shadow_frame.GetMethod(), dex_pc);
     }
     return result;
   }
@@ -281,9 +276,6 @@
       instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                        shadow_frame.GetMethod(), dex_pc,
                                        result);
-    } else if (UNLIKELY(instrumentation->HasDexPcListeners())) {
-      instrumentation->DexPcMovedEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
-                                       shadow_frame.GetMethod(), dex_pc);
     }
     return result;
   }
@@ -299,9 +291,6 @@
       instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                        shadow_frame.GetMethod(), dex_pc,
                                        result);
-    } else if (UNLIKELY(instrumentation->HasDexPcListeners())) {
-      instrumentation->DexPcMovedEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
-                                       shadow_frame.GetMethod(), dex_pc);
     }
     return result;
   }
@@ -316,9 +305,6 @@
       instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                        shadow_frame.GetMethod(), dex_pc,
                                        result);
-    } else if (UNLIKELY(instrumentation->HasDexPcListeners())) {
-      instrumentation->DexPcMovedEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
-                                       shadow_frame.GetMethod(), dex_pc);
     }
     return result;
   }
@@ -352,9 +338,6 @@
       instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                        shadow_frame.GetMethod(), dex_pc,
                                        result);
-    } else if (UNLIKELY(instrumentation->HasDexPcListeners())) {
-      instrumentation->DexPcMovedEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
-                                       shadow_frame.GetMethod(), dex_pc);
     }
     return result;
   }
@@ -2510,26 +2493,16 @@
 // Note: we do not use the kReturn instruction flag here (to test the instruction is a return). The
 // compiler seems to not evaluate "(Instruction::FlagsOf(Instruction::code) & kReturn) != 0" to
 // a constant condition that would remove the "if" statement so the test is free.
-#define INSTRUMENTATION_INSTRUCTION_HANDLER(o, code, n, f, r, i, a, v)                            \
-  alt_op_##code: {                                                                                \
-    if (Instruction::code != Instruction::RETURN_VOID &&                                          \
-        Instruction::code != Instruction::RETURN_VOID_NO_BARRIER &&                               \
-        Instruction::code != Instruction::RETURN &&                                               \
-        Instruction::code != Instruction::RETURN_WIDE &&                                          \
-        Instruction::code != Instruction::RETURN_OBJECT) {                                        \
-      if (LIKELY(!notified_method_entry_event)) {                                                 \
-        Runtime* runtime = Runtime::Current();                                                    \
-        const instrumentation::Instrumentation* instrumentation = runtime->GetInstrumentation();  \
-        if (UNLIKELY(instrumentation->HasDexPcListeners())) {                                     \
-          Object* this_object = shadow_frame.GetThisObject(code_item->ins_size_);                 \
-          instrumentation->DexPcMovedEvent(self, this_object, shadow_frame.GetMethod(), dex_pc);  \
-        }                                                                                         \
-      } else {                                                                                    \
-        notified_method_entry_event = false;                                                      \
-      }                                                                                           \
-    }                                                                                             \
-    UPDATE_HANDLER_TABLE();                                                                       \
-    goto *handlersTable[instrumentation::kMainHandlerTable][Instruction::code];                   \
+#define INSTRUMENTATION_INSTRUCTION_HANDLER(o, code, n, f, r, i, a, v)                        \
+  alt_op_##code: {                                                                            \
+    Runtime* const runtime = Runtime::Current();                                              \
+    const instrumentation::Instrumentation* instrumentation = runtime->GetInstrumentation();  \
+    if (UNLIKELY(instrumentation->HasDexPcListeners())) {                                     \
+      Object* this_object = shadow_frame.GetThisObject(code_item->ins_size_);                 \
+      instrumentation->DexPcMovedEvent(self, this_object, shadow_frame.GetMethod(), dex_pc);  \
+    }                                                                                         \
+    UPDATE_HANDLER_TABLE();                                                                   \
+    goto *handlersTable[instrumentation::kMainHandlerTable][Instruction::code];               \
   }
 #include "dex_instruction_list.h"
       DEX_INSTRUCTION_LIST(INSTRUMENTATION_INSTRUCTION_HANDLER)
diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc
index a5e5299..0e3420f 100644
--- a/runtime/interpreter/interpreter_switch_impl.cc
+++ b/runtime/interpreter/interpreter_switch_impl.cc
@@ -47,10 +47,7 @@
 // Code to run before each dex instruction.
 #define PREAMBLE()                                                                              \
   do {                                                                                          \
-    DCHECK(!inst->IsReturn());                                                                  \
-    if (UNLIKELY(notified_method_entry_event)) {                                                \
-      notified_method_entry_event = false;                                                      \
-    } else if (UNLIKELY(instrumentation->HasDexPcListeners())) {                                \
+    if (UNLIKELY(instrumentation->HasDexPcListeners())) {                                       \
       instrumentation->DexPcMovedEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),  \
                                        shadow_frame.GetMethod(), dex_pc);                       \
     }                                                                                           \
@@ -67,7 +64,6 @@
   self->VerifyStack();
 
   uint32_t dex_pc = shadow_frame.GetDexPC();
-  bool notified_method_entry_event = false;
   const auto* const instrumentation = Runtime::Current()->GetInstrumentation();
   if (LIKELY(dex_pc == 0)) {  // We are entering the method as opposed to deoptimizing.
     if (kIsDebugBuild) {
@@ -76,7 +72,6 @@
     if (UNLIKELY(instrumentation->HasMethodEntryListeners())) {
       instrumentation->MethodEnterEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                         shadow_frame.GetMethod(), 0);
-      notified_method_entry_event = true;
     }
   }
   const uint16_t* const insns = code_item->insns_;
@@ -171,19 +166,18 @@
         break;
       }
       case Instruction::RETURN_VOID_NO_BARRIER: {
+        PREAMBLE();
         JValue result;
         self->AllowThreadSuspension();
         if (UNLIKELY(instrumentation->HasMethodExitListeners())) {
           instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                            shadow_frame.GetMethod(), inst->GetDexPc(insns),
                                            result);
-        } else if (UNLIKELY(instrumentation->HasDexPcListeners())) {
-          instrumentation->DexPcMovedEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
-                                           shadow_frame.GetMethod(), dex_pc);
         }
         return result;
       }
       case Instruction::RETURN_VOID: {
+        PREAMBLE();
         QuasiAtomic::ThreadFenceForConstructor();
         JValue result;
         self->AllowThreadSuspension();
@@ -191,13 +185,11 @@
           instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                            shadow_frame.GetMethod(), inst->GetDexPc(insns),
                                            result);
-        } else if (UNLIKELY(instrumentation->HasDexPcListeners())) {
-          instrumentation->DexPcMovedEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
-                                           shadow_frame.GetMethod(), dex_pc);
         }
         return result;
       }
       case Instruction::RETURN: {
+        PREAMBLE();
         JValue result;
         result.SetJ(0);
         result.SetI(shadow_frame.GetVReg(inst->VRegA_11x(inst_data)));
@@ -206,13 +198,11 @@
           instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                            shadow_frame.GetMethod(), inst->GetDexPc(insns),
                                            result);
-        } else if (UNLIKELY(instrumentation->HasDexPcListeners())) {
-          instrumentation->DexPcMovedEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
-                                           shadow_frame.GetMethod(), dex_pc);
         }
         return result;
       }
       case Instruction::RETURN_WIDE: {
+        PREAMBLE();
         JValue result;
         result.SetJ(shadow_frame.GetVRegLong(inst->VRegA_11x(inst_data)));
         self->AllowThreadSuspension();
@@ -220,13 +210,11 @@
           instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                            shadow_frame.GetMethod(), inst->GetDexPc(insns),
                                            result);
-        } else if (UNLIKELY(instrumentation->HasDexPcListeners())) {
-          instrumentation->DexPcMovedEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
-                                           shadow_frame.GetMethod(), dex_pc);
         }
         return result;
       }
       case Instruction::RETURN_OBJECT: {
+        PREAMBLE();
         JValue result;
         self->AllowThreadSuspension();
         const size_t ref_idx = inst->VRegA_11x(inst_data);
@@ -254,9 +242,6 @@
           instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                            shadow_frame.GetMethod(), inst->GetDexPc(insns),
                                            result);
-        } else if (UNLIKELY(instrumentation->HasDexPcListeners())) {
-          instrumentation->DexPcMovedEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
-                                           shadow_frame.GetMethod(), dex_pc);
         }
         return result;
       }
diff --git a/runtime/jdwp/jdwp_event.cc b/runtime/jdwp/jdwp_event.cc
index 1ec800f..ab3f2e4 100644
--- a/runtime/jdwp/jdwp_event.cc
+++ b/runtime/jdwp/jdwp_event.cc
@@ -141,6 +141,8 @@
     }
 }
 
+// Returns the instrumentation event the DebugInstrumentationListener must
+// listen to in order to properly report the given JDWP event to the debugger.
 static uint32_t GetInstrumentationEventFor(JdwpEventKind eventKind) {
   switch (eventKind) {
     case EK_BREAKPOINT:
diff --git a/runtime/jit/jit.h b/runtime/jit/jit.h
index f5ad8b8..c698cfc 100644
--- a/runtime/jit/jit.h
+++ b/runtime/jit/jit.h
@@ -19,8 +19,6 @@
 
 #include <unordered_map>
 
-#include "instrumentation.h"
-
 #include "atomic.h"
 #include "base/macros.h"
 #include "base/mutex.h"
diff --git a/runtime/jit/jit_instrumentation.cc b/runtime/jit/jit_instrumentation.cc
index e2f9cec..3232674 100644
--- a/runtime/jit/jit_instrumentation.cc
+++ b/runtime/jit/jit_instrumentation.cc
@@ -77,7 +77,7 @@
   ScopedObjectAccessUnchecked soa(self);
   // Since we don't have on-stack replacement, some methods can remain in the interpreter longer
   // than we want resulting in samples even after the method is compiled.
-  if (method->IsClassInitializer() ||
+  if (method->IsClassInitializer() || method->IsNative() ||
       Runtime::Current()->GetJit()->GetCodeCache()->ContainsMethod(method)) {
     return;
   }
diff --git a/runtime/mem_map.cc b/runtime/mem_map.cc
index 959bb75..cf4233c 100644
--- a/runtime/mem_map.cc
+++ b/runtime/mem_map.cc
@@ -153,7 +153,7 @@
       return true;
     }
   }
-  PrintFileToLog("/proc/self/maps", LogSeverity::WARNING);
+  PrintFileToLog("/proc/self/maps", LogSeverity::ERROR);
   *error_msg = StringPrintf("Requested region 0x%08" PRIxPTR "-0x%08" PRIxPTR " does not overlap "
                             "any existing map. See process maps in the log.", begin, end);
   return false;
@@ -256,7 +256,7 @@
     // Only use this if you actually made the page reservation yourself.
     CHECK(expected_ptr != nullptr);
 
-    DCHECK(ContainedWithinExistingMap(expected_ptr, byte_count, error_msg)) << error_msg;
+    DCHECK(ContainedWithinExistingMap(expected_ptr, byte_count, error_msg)) << *error_msg;
     flags |= MAP_FIXED;
   }
 
@@ -411,7 +411,7 @@
     // Only use this if you actually made the page reservation yourself.
     CHECK(expected_ptr != nullptr);
 
-    DCHECK(ContainedWithinExistingMap(expected_ptr, byte_count, error_msg)) << error_msg;
+    DCHECK(ContainedWithinExistingMap(expected_ptr, byte_count, error_msg)) << *error_msg;
     flags |= MAP_FIXED;
   } else {
     CHECK_EQ(0, flags & MAP_FIXED);
@@ -617,13 +617,68 @@
   return true;
 }
 
-void MemMap::DumpMaps(std::ostream& os) {
+void MemMap::DumpMaps(std::ostream& os, bool terse) {
   MutexLock mu(Thread::Current(), *Locks::mem_maps_lock_);
-  DumpMapsLocked(os);
+  DumpMapsLocked(os, terse);
 }
 
-void MemMap::DumpMapsLocked(std::ostream& os) {
-  os << *maps_;
+void MemMap::DumpMapsLocked(std::ostream& os, bool terse) {
+  const auto& mem_maps = *maps_;
+  if (!terse) {
+    os << mem_maps;
+    return;
+  }
+
+  // Terse output example:
+  //   [MemMap: 0x409be000+0x20P~0x11dP+0x20P~0x61cP+0x20P prot=0x3 LinearAlloc]
+  //   [MemMap: 0x451d6000+0x6bP(3) prot=0x3 large object space allocation]
+  // The details:
+  //   "+0x20P" means 0x20 pages taken by a single mapping,
+  //   "~0x11dP" means a gap of 0x11d pages,
+  //   "+0x6bP(3)" means 3 mappings one after another, together taking 0x6b pages.
+  os << "MemMap:" << std::endl;
+  for (auto it = mem_maps.begin(), maps_end = mem_maps.end(); it != maps_end;) {
+    MemMap* map = it->second;
+    void* base = it->first;
+    CHECK_EQ(base, map->BaseBegin());
+    os << "[MemMap: " << base;
+    ++it;
+    // Merge consecutive maps with the same protect flags and name.
+    constexpr size_t kMaxGaps = 9;
+    size_t num_gaps = 0;
+    size_t num = 1u;
+    size_t size = map->BaseSize();
+    CHECK(IsAligned<kPageSize>(size));
+    void* end = map->BaseEnd();
+    while (it != maps_end &&
+        it->second->GetProtect() == map->GetProtect() &&
+        it->second->GetName() == map->GetName() &&
+        (it->second->BaseBegin() == end || num_gaps < kMaxGaps)) {
+      if (it->second->BaseBegin() != end) {
+        ++num_gaps;
+        os << "+0x" << std::hex << (size / kPageSize) << "P";
+        if (num != 1u) {
+          os << "(" << std::dec << num << ")";
+        }
+        size_t gap =
+            reinterpret_cast<uintptr_t>(it->second->BaseBegin()) - reinterpret_cast<uintptr_t>(end);
+        CHECK(IsAligned<kPageSize>(gap));
+        os << "~0x" << std::hex << (gap / kPageSize) << "P";
+        num = 0u;
+        size = 0u;
+      }
+      CHECK(IsAligned<kPageSize>(it->second->BaseSize()));
+      ++num;
+      size += it->second->BaseSize();
+      end = it->second->BaseEnd();
+      ++it;
+    }
+    os << "+0x" << std::hex << (size / kPageSize) << "P";
+    if (num != 1u) {
+      os << "(" << std::dec << num << ")";
+    }
+    os << " prot=0x" << std::hex << map->GetProtect() << " " << map->GetName() << "]" << std::endl;
+  }
 }
 
 bool MemMap::HasMemMap(MemMap* map) {
diff --git a/runtime/mem_map.h b/runtime/mem_map.h
index dc6d935..6023a70 100644
--- a/runtime/mem_map.h
+++ b/runtime/mem_map.h
@@ -137,7 +137,7 @@
 
   static bool CheckNoGaps(MemMap* begin_map, MemMap* end_map)
       LOCKS_EXCLUDED(Locks::mem_maps_lock_);
-  static void DumpMaps(std::ostream& os)
+  static void DumpMaps(std::ostream& os, bool terse = false)
       LOCKS_EXCLUDED(Locks::mem_maps_lock_);
 
   typedef AllocationTrackingMultiMap<void*, MemMap*, kAllocatorTagMaps> Maps;
@@ -149,7 +149,7 @@
   MemMap(const std::string& name, uint8_t* begin, size_t size, void* base_begin, size_t base_size,
          int prot, bool reuse) LOCKS_EXCLUDED(Locks::mem_maps_lock_);
 
-  static void DumpMapsLocked(std::ostream& os)
+  static void DumpMapsLocked(std::ostream& os, bool terse)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mem_maps_lock_);
   static bool HasMemMap(MemMap* map)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mem_maps_lock_);
diff --git a/runtime/oat.h b/runtime/oat.h
index a31e09a..aaf442a 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,7 +32,7 @@
 class PACKED(4) OatHeader {
  public:
   static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' };
-  static constexpr uint8_t kOatVersion[] = { '0', '6', '1', '\0' };
+  static constexpr uint8_t kOatVersion[] = { '0', '6', '2', '\0' };
 
   static constexpr const char* kImageLocationKey = "image-location";
   static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
diff --git a/runtime/oat_file_assistant.h b/runtime/oat_file_assistant.h
index a25ee31..3b2e2c4 100644
--- a/runtime/oat_file_assistant.h
+++ b/runtime/oat_file_assistant.h
@@ -405,9 +405,9 @@
   bool cached_oat_file_name_found_;
   std::string cached_oat_file_name_;
 
-  // Cached value of the loaded odex file.
+  // Cached value of the loaded oat file.
   // Use the GetOatFile method rather than accessing this directly, unless you
-  // know the odex file isn't out of date.
+  // know the oat file isn't out of date.
   bool oat_file_load_attempted_ = false;
   std::unique_ptr<OatFile> cached_oat_file_;
 
diff --git a/runtime/quick/inline_method_analyser.cc b/runtime/quick/inline_method_analyser.cc
index 9cf4b16..1c404ff 100644
--- a/runtime/quick/inline_method_analyser.cc
+++ b/runtime/quick/inline_method_analyser.cc
@@ -134,7 +134,10 @@
 bool InlineMethodAnalyser::IsSyntheticAccessor(MethodReference ref) {
   const DexFile::MethodId& method_id = ref.dex_file->GetMethodId(ref.dex_method_index);
   const char* method_name = ref.dex_file->GetMethodName(method_id);
-  return strncmp(method_name, "access$", strlen("access$")) == 0;
+  // javac names synthetic accessors "access$nnn",
+  // jack names them "-getN", "-putN", "-wrapN".
+  return strncmp(method_name, "access$", strlen("access$")) == 0 ||
+      strncmp(method_name, "-", strlen("-")) == 0;
 }
 
 bool InlineMethodAnalyser::AnalyseReturnMethod(const DexFile::CodeItem* code_item,
diff --git a/runtime/quick_exception_handler.cc b/runtime/quick_exception_handler.cc
index a80eed6..9e79bd2 100644
--- a/runtime/quick_exception_handler.cc
+++ b/runtime/quick_exception_handler.cc
@@ -223,7 +223,10 @@
           break;
         case kReferenceVReg: {
           uint32_t value = 0;
-          if (GetVReg(h_method.Get(), reg, kind, &value)) {
+          // Check IsReferenceVReg in case the compiled GC map doesn't agree with the verifier.
+          // We don't want to copy a stale reference into the shadow frame as a reference.
+          // b/20736048
+          if (GetVReg(h_method.Get(), reg, kind, &value) && IsReferenceVReg(h_method.Get(), reg)) {
             new_frame->SetVRegReference(reg, reinterpret_cast<mirror::Object*>(value));
           } else {
             new_frame->SetVReg(reg, kDeadValue);
diff --git a/runtime/reflection.cc b/runtime/reflection.cc
index 329ceb5..49e1b8e 100644
--- a/runtime/reflection.cc
+++ b/runtime/reflection.cc
@@ -520,23 +520,6 @@
   return result;
 }
 
-void InvokeWithShadowFrame(Thread* self, ShadowFrame* shadow_frame, uint16_t arg_offset,
-                           JValue* result) {
-  // We want to make sure that the stack is not within a small distance from the
-  // protected region in case we are calling into a leaf function whose stack
-  // check has been elided.
-  if (UNLIKELY(__builtin_frame_address(0) < self->GetStackEnd())) {
-    ThrowStackOverflowError(self);
-    return;
-  }
-  uint32_t shorty_len;
-  const char* shorty = shadow_frame->GetMethod()->GetShorty(&shorty_len);
-  ArgArray arg_array(shorty, shorty_len);
-  arg_array.BuildArgArrayFromFrame(shadow_frame, arg_offset);
-  shadow_frame->GetMethod()->Invoke(self, arg_array.GetArray(), arg_array.GetNumBytes(), result,
-                                    shorty);
-}
-
 jobject InvokeMethod(const ScopedObjectAccessAlreadyRunnable& soa, jobject javaMethod,
                      jobject javaReceiver, jobject javaArgs, size_t num_frames) {
   // We want to make sure that the stack is not within a small distance from the
diff --git a/runtime/reflection.h b/runtime/reflection.h
index 6305d68..37f8a6a 100644
--- a/runtime/reflection.h
+++ b/runtime/reflection.h
@@ -61,10 +61,6 @@
                                            jobject obj, jmethodID mid, va_list args)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-void InvokeWithShadowFrame(Thread* self, ShadowFrame* shadow_frame, uint16_t arg_offset,
-                           JValue* result)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
 // num_frames is number of frames we look up for access check.
 jobject InvokeMethod(const ScopedObjectAccessAlreadyRunnable& soa, jobject method, jobject receiver,
                      jobject args, size_t num_frames = 1)
diff --git a/runtime/scoped_thread_state_change.h b/runtime/scoped_thread_state_change.h
index 99750a1..60ed55a 100644
--- a/runtime/scoped_thread_state_change.h
+++ b/runtime/scoped_thread_state_change.h
@@ -133,6 +133,7 @@
   T AddLocalReference(mirror::Object* obj) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     Locks::mutator_lock_->AssertSharedHeld(Self());
     DCHECK(IsRunnable());  // Don't work with raw objects in non-runnable states.
+    DCHECK_NE(obj, Runtime::Current()->GetClearedJniWeakGlobal());
     return obj == nullptr ? nullptr : Env()->AddLocalReference<T>(obj);
   }
 
diff --git a/runtime/stack.cc b/runtime/stack.cc
index e49bc1d..a566886 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -19,6 +19,7 @@
 #include "arch/context.h"
 #include "base/hex_dump.h"
 #include "entrypoints/runtime_asm_entrypoints.h"
+#include "gc_map.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/class-inl.h"
 #include "mirror/object.h"
@@ -151,6 +152,33 @@
   return GetMethod()->NativeQuickPcOffset(cur_quick_frame_pc_);
 }
 
+bool StackVisitor::IsReferenceVReg(mirror::ArtMethod* m, uint16_t vreg) {
+  // Process register map (which native and runtime methods don't have)
+  if (m->IsNative() || m->IsRuntimeMethod() || m->IsProxyMethod()) {
+    return false;
+  }
+  if (m->IsOptimized(sizeof(void*))) {
+    return true;  // TODO: Implement.
+  }
+  const uint8_t* native_gc_map = m->GetNativeGcMap(sizeof(void*));
+  CHECK(native_gc_map != nullptr) << PrettyMethod(m);
+  const DexFile::CodeItem* code_item = m->GetCodeItem();
+  // Can't be null or how would we compile its instructions?
+  DCHECK(code_item != nullptr) << PrettyMethod(m);
+  NativePcOffsetToReferenceMap map(native_gc_map);
+  size_t num_regs = std::min(map.RegWidth() * 8, static_cast<size_t>(code_item->registers_size_));
+  const uint8_t* reg_bitmap = nullptr;
+  if (num_regs > 0) {
+    Runtime* runtime = Runtime::Current();
+    const void* entry_point = runtime->GetInstrumentation()->GetQuickCodeFor(m, sizeof(void*));
+    uintptr_t native_pc_offset = m->NativeQuickPcOffset(GetCurrentQuickFramePc(), entry_point);
+    reg_bitmap = map.FindBitMap(native_pc_offset);
+    DCHECK(reg_bitmap != nullptr);
+  }
+  // Does this register hold a reference?
+  return vreg < num_regs && TestBitmap(vreg, reg_bitmap);
+}
+
 bool StackVisitor::GetVReg(mirror::ArtMethod* m, uint16_t vreg, VRegKind kind,
                            uint32_t* val) const {
   if (cur_quick_frame_ != nullptr) {
diff --git a/runtime/stack.h b/runtime/stack.h
index 3f1bff8..ab8641b 100644
--- a/runtime/stack.h
+++ b/runtime/stack.h
@@ -478,6 +478,9 @@
   bool GetNextMethodAndDexPc(mirror::ArtMethod** next_method, uint32_t* next_dex_pc)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  bool IsReferenceVReg(mirror::ArtMethod* m, uint16_t vreg)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   bool GetVReg(mirror::ArtMethod* m, uint16_t vreg, VRegKind kind, uint32_t* val) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
diff --git a/runtime/thread.cc b/runtime/thread.cc
index c8aad1b..605a1b5 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -2311,10 +2311,6 @@
     }
   }
 
-  static bool TestBitmap(size_t reg, const uint8_t* reg_vector) {
-    return ((reg_vector[reg / kBitsPerByte] >> (reg % kBitsPerByte)) & 0x01) != 0;
-  }
-
   // Visitor for when we visit a root.
   RootVisitor& visitor_;
 };
diff --git a/runtime/thread.h b/runtime/thread.h
index e766daa..9346813 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -752,6 +752,18 @@
     tls32_.ready_for_debug_invoke = ready;
   }
 
+  bool IsDebugMethodEntry() const {
+    return tls32_.debug_method_entry_;
+  }
+
+  void SetDebugMethodEntry() {
+    tls32_.debug_method_entry_ = true;
+  }
+
+  void ClearDebugMethodEntry() {
+    tls32_.debug_method_entry_ = false;
+  }
+
   // Activates single step control for debugging. The thread takes the
   // ownership of the given SingleStepControl*. It is deleted by a call
   // to DeactivateSingleStepControl or upon thread destruction.
@@ -1028,7 +1040,7 @@
       suspend_count(0), debug_suspend_count(0), thin_lock_thread_id(0), tid(0),
       daemon(is_daemon), throwing_OutOfMemoryError(false), no_thread_suspension(0),
       thread_exit_check_count(0), handling_signal_(false), suspended_at_suspend_check(false),
-      ready_for_debug_invoke(false) {
+      ready_for_debug_invoke(false), debug_method_entry_(false) {
     }
 
     union StateAndFlags state_and_flags;
@@ -1077,6 +1089,10 @@
     // used to invoke method from the debugger which is only allowed when
     // the thread is suspended by an event.
     bool32_t ready_for_debug_invoke;
+
+    // True if the thread enters a method. This is used to detect method entry
+    // event for the debugger.
+    bool32_t debug_method_entry_;
   } tls32_;
 
   struct PACKED(8) tls_64bit_sized_values {
diff --git a/runtime/trace.cc b/runtime/trace.cc
index 9eca517..3b8feda 100644
--- a/runtime/trace.cc
+++ b/runtime/trace.cc
@@ -126,6 +126,9 @@
 pthread_t Trace::sampling_pthread_ = 0U;
 std::unique_ptr<std::vector<mirror::ArtMethod*>> Trace::temp_stack_trace_;
 
+// The key identifying the tracer to update instrumentation.
+static constexpr const char* kTracerInstrumentationKey = "Tracer";
+
 static mirror::ArtMethod* DecodeTraceMethodId(uint32_t tmid) {
   return reinterpret_cast<mirror::ArtMethod*>(tmid & ~kTraceMethodActionMask);
 }
@@ -393,7 +396,7 @@
                                                    instrumentation::Instrumentation::kMethodExited |
                                                    instrumentation::Instrumentation::kMethodUnwind);
         // TODO: In full-PIC mode, we don't need to fully deopt.
-        runtime->GetInstrumentation()->EnableMethodTracing();
+        runtime->GetInstrumentation()->EnableMethodTracing(kTracerInstrumentationKey);
       }
     }
   }
@@ -440,7 +443,7 @@
       MutexLock mu(Thread::Current(), *Locks::thread_list_lock_);
       runtime->GetThreadList()->ForEach(ClearThreadStackTraceAndClockBase, nullptr);
     } else {
-      runtime->GetInstrumentation()->DisableMethodTracing();
+      runtime->GetInstrumentation()->DisableMethodTracing(kTracerInstrumentationKey);
       runtime->GetInstrumentation()->RemoveListener(
           the_trace, instrumentation::Instrumentation::kMethodEntered |
           instrumentation::Instrumentation::kMethodExited |
@@ -522,7 +525,7 @@
       MutexLock mu(Thread::Current(), *Locks::thread_list_lock_);
       runtime->GetThreadList()->ForEach(ClearThreadStackTraceAndClockBase, nullptr);
     } else {
-      runtime->GetInstrumentation()->DisableMethodTracing();
+      runtime->GetInstrumentation()->DisableMethodTracing(kTracerInstrumentationKey);
       runtime->GetInstrumentation()->RemoveListener(the_trace,
                                                     instrumentation::Instrumentation::kMethodEntered |
                                                     instrumentation::Instrumentation::kMethodExited |
@@ -566,7 +569,7 @@
                                                instrumentation::Instrumentation::kMethodExited |
                                                instrumentation::Instrumentation::kMethodUnwind);
     // TODO: In full-PIC mode, we don't need to fully deopt.
-    runtime->GetInstrumentation()->EnableMethodTracing();
+    runtime->GetInstrumentation()->EnableMethodTracing(kTracerInstrumentationKey);
   }
 
   runtime->GetThreadList()->ResumeAll();
diff --git a/runtime/utf.h b/runtime/utf.h
index dd38afa..7f05248 100644
--- a/runtime/utf.h
+++ b/runtime/utf.h
@@ -87,9 +87,9 @@
 /*
  * Retrieve the next UTF-16 character or surrogate pair from a UTF-8 string.
  * single byte, 2-byte and 3-byte UTF-8 sequences result in a single UTF-16
- * character whereas 4-byte UTF-8 sequences result in a surrogate pair. Use
- * GetLeadingUtf16Char and GetTrailingUtf16Char to process the return value
- * of this function.
+ * character (possibly one half of a surrogate) whereas 4-byte UTF-8 sequences
+ * result in a surrogate pair. Use GetLeadingUtf16Char and GetTrailingUtf16Char
+ * to process the return value of this function.
  *
  * Advances "*utf8_data_in" to the start of the next character.
  *
diff --git a/runtime/utils.cc b/runtime/utils.cc
index 650214f..7986cdc 100644
--- a/runtime/utils.cc
+++ b/runtime/utils.cc
@@ -827,14 +827,21 @@
    */
 
   const uint32_t pair = GetUtf16FromUtf8(pUtf8Ptr);
-
   const uint16_t leading = GetLeadingUtf16Char(pair);
-  const uint32_t trailing = GetTrailingUtf16Char(pair);
 
-  if (trailing == 0) {
-    // Perform follow-up tests based on the high 8 bits of the
-    // lower surrogate.
-    switch (leading >> 8) {
+  // We have a surrogate pair resulting from a valid 4 byte UTF sequence.
+  // No further checks are necessary because 4 byte sequences span code
+  // points [U+10000, U+1FFFFF], which are valid codepoints in a dex
+  // identifier. Furthermore, GetUtf16FromUtf8 guarantees that each of
+  // the surrogate halves are valid and well formed in this instance.
+  if (GetTrailingUtf16Char(pair) != 0) {
+    return true;
+  }
+
+
+  // We've encountered a one, two or three byte UTF-8 sequence. The
+  // three byte UTF-8 sequence could be one half of a surrogate pair.
+  switch (leading >> 8) {
     case 0x00:
       // It's only valid if it's above the ISO-8859-1 high space (0xa0).
       return (leading > 0x00a0);
@@ -842,9 +849,14 @@
     case 0xd9:
     case 0xda:
     case 0xdb:
-      // It looks like a leading surrogate but we didn't find a trailing
-      // surrogate if we're here.
-      return false;
+      {
+        // We found a three byte sequence encoding one half of a surrogate.
+        // Look for the other half.
+        const uint32_t pair2 = GetUtf16FromUtf8(pUtf8Ptr);
+        const uint16_t trailing = GetLeadingUtf16Char(pair2);
+
+        return (GetTrailingUtf16Char(pair2) == 0) && (0xdc00 <= trailing && trailing <= 0xdfff);
+      }
     case 0xdc:
     case 0xdd:
     case 0xde:
@@ -855,21 +867,19 @@
     case 0xff:
       // It's in the range that has spaces, controls, and specials.
       switch (leading & 0xfff8) {
-      case 0x2000:
-      case 0x2008:
-      case 0x2028:
-      case 0xfff0:
-      case 0xfff8:
-        return false;
+        case 0x2000:
+        case 0x2008:
+        case 0x2028:
+        case 0xfff0:
+        case 0xfff8:
+          return false;
       }
-      break;
-    }
-
-    return true;
+      return true;
+    default:
+      return true;
   }
 
-  // We have a surrogate pair. Check that trailing surrogate is well formed.
-  return (trailing >= 0xdc00 && trailing <= 0xdfff);
+  UNREACHABLE();
 }
 
 /* Return whether the pointed-at modified-UTF-8 encoded character is
diff --git a/runtime/utils.h b/runtime/utils.h
index eaafcf0..71ccf85 100644
--- a/runtime/utils.h
+++ b/runtime/utils.h
@@ -604,6 +604,11 @@
   return std::unique_ptr<T>(new T(std::forward<Args>(args)...));
 }
 
+inline bool TestBitmap(size_t idx, const uint8_t* bitmap) {
+  return ((bitmap[idx / kBitsPerByte] >> (idx % kBitsPerByte)) & 0x01) != 0;
+}
+
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_UTILS_H_
diff --git a/runtime/utils_test.cc b/runtime/utils_test.cc
index 195de0c..869d305 100644
--- a/runtime/utils_test.cc
+++ b/runtime/utils_test.cc
@@ -521,4 +521,27 @@
   EXPECT_GT(NanoTime() - start, MsToNs(1000));
 }
 
+TEST_F(UtilsTest, IsValidDescriptor) {
+  std::vector<uint8_t> descriptor(
+      { 'L', 'a', '/', 'b', '$', 0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80, ';', 0x00 });
+  EXPECT_TRUE(IsValidDescriptor(reinterpret_cast<char*>(&descriptor[0])));
+
+  std::vector<uint8_t> unpaired_surrogate(
+      { 'L', 'a', '/', 'b', '$', 0xed, 0xa0, 0x80, ';', 0x00 });
+  EXPECT_FALSE(IsValidDescriptor(reinterpret_cast<char*>(&unpaired_surrogate[0])));
+
+  std::vector<uint8_t> unpaired_surrogate_at_end(
+      { 'L', 'a', '/', 'b', '$', 0xed, 0xa0, 0x80, 0x00 });
+  EXPECT_FALSE(IsValidDescriptor(reinterpret_cast<char*>(&unpaired_surrogate_at_end[0])));
+
+  std::vector<uint8_t> invalid_surrogate(
+      { 'L', 'a', '/', 'b', '$', 0xed, 0xb0, 0x80, ';', 0x00 });
+  EXPECT_FALSE(IsValidDescriptor(reinterpret_cast<char*>(&invalid_surrogate[0])));
+
+  std::vector<uint8_t> unpaired_surrogate_with_multibyte_sequence(
+      { 'L', 'a', '/', 'b', '$', 0xed, 0xb0, 0x80, 0xf0, 0x9f, 0x8f, 0xa0, ';', 0x00 });
+  EXPECT_FALSE(
+      IsValidDescriptor(reinterpret_cast<char*>(&unpaired_surrogate_with_multibyte_sequence[0])));
+}
+
 }  // namespace art
diff --git a/test/082-inline-execute/src/Main.java b/test/082-inline-execute/src/Main.java
index 0e90c4d..abd6cb8 100644
--- a/test/082-inline-execute/src/Main.java
+++ b/test/082-inline-execute/src/Main.java
@@ -236,15 +236,6 @@
     String str10 = "abcdefghij";
     String str40 = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabc";
 
-    int supplementaryChar = 0x20b9f;
-    String surrogatePair = "\ud842\udf9f";
-    String stringWithSurrogates = "hello " + surrogatePair + " world";
-
-    Assert.assertEquals(stringWithSurrogates.indexOf(supplementaryChar), "hello ".length());
-    Assert.assertEquals(stringWithSurrogates.indexOf(supplementaryChar, 2), "hello ".length());
-    Assert.assertEquals(stringWithSurrogates.indexOf(supplementaryChar, 6), 6);
-    Assert.assertEquals(stringWithSurrogates.indexOf(supplementaryChar, 7), -1);
-
     Assert.assertEquals(str0.indexOf('a'), -1);
     Assert.assertEquals(str3.indexOf('a'), 0);
     Assert.assertEquals(str3.indexOf('b'), 1);
@@ -269,24 +260,49 @@
     Assert.assertEquals(str40.indexOf('a',10), 10);
     Assert.assertEquals(str40.indexOf('b',40), -1);
 
+    testIndexOfNull();
+
+    testSurrogateIndexOf();
+  }
+
+  private static void testSurrogateIndexOf() {
+    int supplementaryChar = 0x20b9f;
+    String surrogatePair = "\ud842\udf9f";
+    String stringWithSurrogates = "hello " + surrogatePair + " world";
+
+    Assert.assertEquals(stringWithSurrogates.indexOf(supplementaryChar), "hello ".length());
+    Assert.assertEquals(stringWithSurrogates.indexOf(supplementaryChar, 2), "hello ".length());
+    Assert.assertEquals(stringWithSurrogates.indexOf(supplementaryChar, 6), 6);
+    Assert.assertEquals(stringWithSurrogates.indexOf(supplementaryChar, 7), -1);
+  }
+
+  private static void testIndexOfNull() {
     String strNull = null;
     try {
-      strNull.indexOf('a');
+      testNullIndex(strNull, 'a');
       Assert.fail();
     } catch (NullPointerException expected) {
     }
     try {
-      strNull.indexOf('a', 0);
+      testNullIndex(strNull, 'a', 0);
       Assert.fail();
     } catch (NullPointerException expected) {
     }
     try {
-      strNull.indexOf('a', -1);
+        testNullIndex(strNull, 'a', -1);
       Assert.fail();
     } catch (NullPointerException expected) {
     }
   }
 
+  private static int testNullIndex(String strNull, int c) {
+    return strNull.indexOf(c);
+  }
+
+  private static int testNullIndex(String strNull, int c, int startIndex) {
+    return strNull.indexOf(c, startIndex);
+  }
+
   public static void test_String_compareTo() {
     String test = "0123456789";
     String test1 = new String("0123456789");    // different object
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index 515b8af..40f5f00 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -385,7 +385,6 @@
 
 # Known broken tests for the optimizing compiler.
 TEST_ART_BROKEN_OPTIMIZING_RUN_TESTS :=
-TEST_ART_BROKEN_OPTIMIZING_RUN_TESTS += 472-unreachable-if-regression # b/19988134
 
 ifneq (,$(filter optimizing,$(COMPILER_TYPES)))
   ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
diff --git a/test/Instrumentation/Instrumentation.java b/test/Instrumentation/Instrumentation.java
new file mode 100644
index 0000000..09d4342
--- /dev/null
+++ b/test/Instrumentation/Instrumentation.java
@@ -0,0 +1,22 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Instrumentation {
+  // Direct method
+  private void instanceMethod() {
+    System.out.println("instanceMethod");
+  }
+}
diff --git a/tools/art b/tools/art
index 85e6e2f..f167a73 100644
--- a/tools/art
+++ b/tools/art
@@ -95,6 +95,7 @@
   PATH=$ANDROID_ROOT/bin:$PATH \
   $invoke_with $ANDROID_ROOT/bin/$DALVIKVM $lib \
     -XXlib:$LIBART \
+    -Xnorelocate \
     -Ximage:$ANDROID_ROOT/framework/core.art \
     -Xcompiler-option --include-debug-symbols \
     "$@"