diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 78b7cc0..8af5473 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -1620,9 +1620,10 @@
   if (method->IsProxyMethod()) {
     return GetPortableProxyInvokeHandler();
   }
-  const void* result = GetOatMethodFor(method).GetPortableCode();
+  const OatFile::OatMethod oat_method = GetOatMethodFor(method);
+  const void* result = oat_method.GetPortableCode();
   if (result == nullptr) {
-    if (GetOatMethodFor(method).GetQuickCode() == nullptr) {
+    if (oat_method.GetQuickCode() == nullptr) {
       // No code? You must mean to go into the interpreter.
       result = GetPortableToInterpreterBridge();
     } else {
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index 514ad4c..1f59617 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -211,6 +211,7 @@
 Mutex* Dbg::deoptimization_lock_ = nullptr;
 std::vector<DeoptimizationRequest> Dbg::deoptimization_requests_;
 size_t Dbg::full_deoptimization_event_count_ = 0;
+size_t Dbg::delayed_full_undeoptimization_count_ = 0;
 
 // Breakpoints.
 static std::vector<Breakpoint> gBreakpoints GUARDED_BY(Locks::breakpoint_lock_);
@@ -625,6 +626,7 @@
     MutexLock mu(Thread::Current(), *deoptimization_lock_);
     CHECK_EQ(deoptimization_requests_.size(), 0U);
     CHECK_EQ(full_deoptimization_event_count_, 0U);
+    CHECK_EQ(delayed_full_undeoptimization_count_, 0U);
   }
 
   Runtime* runtime = Runtime::Current();
@@ -667,6 +669,7 @@
       MutexLock mu(Thread::Current(), *deoptimization_lock_);
       deoptimization_requests_.clear();
       full_deoptimization_event_count_ = 0U;
+      delayed_full_undeoptimization_count_ = 0U;
     }
     runtime->GetInstrumentation()->RemoveListener(&gDebugInstrumentationListener,
                                                   instrumentation::Instrumentation::kMethodEntered |
@@ -2580,20 +2583,24 @@
       LOG(WARNING) << "Ignoring empty deoptimization request.";
       break;
     case DeoptimizationRequest::kFullDeoptimization:
-      VLOG(jdwp) << "Deoptimize the world";
+      VLOG(jdwp) << "Deoptimize the world ...";
       instrumentation->DeoptimizeEverything();
+      VLOG(jdwp) << "Deoptimize the world DONE";
       break;
     case DeoptimizationRequest::kFullUndeoptimization:
-      VLOG(jdwp) << "Undeoptimize the world";
+      VLOG(jdwp) << "Undeoptimize the world ...";
       instrumentation->UndeoptimizeEverything();
+      VLOG(jdwp) << "Undeoptimize the world DONE";
       break;
     case DeoptimizationRequest::kSelectiveDeoptimization:
-      VLOG(jdwp) << "Deoptimize method " << PrettyMethod(request.method);
+      VLOG(jdwp) << "Deoptimize method " << PrettyMethod(request.method) << " ...";
       instrumentation->Deoptimize(request.method);
+      VLOG(jdwp) << "Deoptimize method " << PrettyMethod(request.method) << " DONE";
       break;
     case DeoptimizationRequest::kSelectiveUndeoptimization:
-      VLOG(jdwp) << "Undeoptimize method " << PrettyMethod(request.method);
+      VLOG(jdwp) << "Undeoptimize method " << PrettyMethod(request.method) << " ...";
       instrumentation->Undeoptimize(request.method);
+      VLOG(jdwp) << "Undeoptimize method " << PrettyMethod(request.method) << " DONE";
       break;
     default:
       LOG(FATAL) << "Unsupported deoptimization request kind " << request.kind;
@@ -2601,17 +2608,43 @@
   }
 }
 
+void Dbg::DelayFullUndeoptimization() {
+  MutexLock mu(Thread::Current(), *deoptimization_lock_);
+  ++delayed_full_undeoptimization_count_;
+  DCHECK_LE(delayed_full_undeoptimization_count_, full_deoptimization_event_count_);
+}
+
+void Dbg::ProcessDelayedFullUndeoptimizations() {
+  // TODO: avoid taking the lock twice (once here and once in ManageDeoptimization).
+  {
+    MutexLock mu(Thread::Current(), *deoptimization_lock_);
+    while (delayed_full_undeoptimization_count_ > 0) {
+      DeoptimizationRequest req;
+      req.kind = DeoptimizationRequest::kFullUndeoptimization;
+      req.method = nullptr;
+      RequestDeoptimizationLocked(req);
+      --delayed_full_undeoptimization_count_;
+    }
+  }
+  ManageDeoptimization();
+}
+
 void Dbg::RequestDeoptimization(const DeoptimizationRequest& req) {
   if (req.kind == DeoptimizationRequest::kNothing) {
     // Nothing to do.
     return;
   }
   MutexLock mu(Thread::Current(), *deoptimization_lock_);
+  RequestDeoptimizationLocked(req);
+}
+
+void Dbg::RequestDeoptimizationLocked(const DeoptimizationRequest& req) {
   switch (req.kind) {
     case DeoptimizationRequest::kFullDeoptimization: {
       DCHECK(req.method == nullptr);
       if (full_deoptimization_event_count_ == 0) {
-        VLOG(jdwp) << "Request full deoptimization";
+        VLOG(jdwp) << "Queue request #" << deoptimization_requests_.size()
+                   << " for full deoptimization";
         deoptimization_requests_.push_back(req);
       }
       ++full_deoptimization_event_count_;
@@ -2622,20 +2655,23 @@
       DCHECK_GT(full_deoptimization_event_count_, 0U);
       --full_deoptimization_event_count_;
       if (full_deoptimization_event_count_ == 0) {
-        VLOG(jdwp) << "Request full undeoptimization";
+        VLOG(jdwp) << "Queue request #" << deoptimization_requests_.size()
+                   << " for full undeoptimization";
         deoptimization_requests_.push_back(req);
       }
       break;
     }
     case DeoptimizationRequest::kSelectiveDeoptimization: {
       DCHECK(req.method != nullptr);
-      VLOG(jdwp) << "Request deoptimization of " << PrettyMethod(req.method);
+      VLOG(jdwp) << "Queue request #" << deoptimization_requests_.size()
+                 << " for deoptimization of " << PrettyMethod(req.method);
       deoptimization_requests_.push_back(req);
       break;
     }
     case DeoptimizationRequest::kSelectiveUndeoptimization: {
       DCHECK(req.method != nullptr);
-      VLOG(jdwp) << "Request undeoptimization of " << PrettyMethod(req.method);
+      VLOG(jdwp) << "Queue request #" << deoptimization_requests_.size()
+                 << " for undeoptimization of " << PrettyMethod(req.method);
       deoptimization_requests_.push_back(req);
       break;
     }
@@ -2663,7 +2699,9 @@
   const ThreadState old_state = self->SetStateUnsafe(kRunnable);
   {
     MutexLock mu(self, *deoptimization_lock_);
+    size_t req_index = 0;
     for (const DeoptimizationRequest& request : deoptimization_requests_) {
+      VLOG(jdwp) << "Process deoptimization request #" << req_index++;
       ProcessDeoptimizationRequest(request);
     }
     deoptimization_requests_.clear();
diff --git a/runtime/debugger.h b/runtime/debugger.h
index 23c9c6a..f9478c6 100644
--- a/runtime/debugger.h
+++ b/runtime/debugger.h
@@ -432,6 +432,13 @@
       LOCKS_EXCLUDED(deoptimization_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  // Support delayed full undeoptimization requests. This is currently only used for single-step
+  // events.
+  static void DelayFullUndeoptimization() LOCKS_EXCLUDED(deoptimization_lock_);
+  static void ProcessDelayedFullUndeoptimizations()
+      LOCKS_EXCLUDED(deoptimization_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   // Manage deoptimization after updating JDWP events list. Suspends all threads, processes each
   // request and finally resumes all threads.
   static void ManageDeoptimization()
@@ -541,6 +548,10 @@
   static void ProcessDeoptimizationRequest(const DeoptimizationRequest& request)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  static void RequestDeoptimizationLocked(const DeoptimizationRequest& req)
+      EXCLUSIVE_LOCKS_REQUIRED(deoptimization_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   static Mutex* alloc_tracker_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
 
   static AllocRecord* recent_allocation_records_ PT_GUARDED_BY(alloc_tracker_lock_);
@@ -562,6 +573,10 @@
   // undeoptimize when the last event is unregistered (when the counter is set to 0).
   static size_t full_deoptimization_event_count_ GUARDED_BY(deoptimization_lock_);
 
+  // Count the number of full undeoptimization requests delayed to next resume or end of debug
+  // session.
+  static size_t delayed_full_undeoptimization_count_ GUARDED_BY(deoptimization_lock_);
+
   DISALLOW_COPY_AND_ASSIGN(Dbg);
 };
 
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index 525e2b3..5233462 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -255,7 +255,7 @@
   visitor.WalkStack(true);
   CHECK_EQ(visitor.dex_pcs_.size(), thread->GetInstrumentationStack()->size());
 
-  if (!instrumentation->ShouldNotifyMethodEnterExitEvents()) {
+  if (instrumentation->ShouldNotifyMethodEnterExitEvents()) {
     // Create method enter events for all methods currently on the thread's stack. We only do this
     // if no debugger is attached to prevent from posting events twice.
     typedef std::deque<InstrumentationStackFrame>::const_reverse_iterator It;
@@ -302,8 +302,9 @@
       }
       bool removed_stub = false;
       // TODO: make this search more efficient?
-      for (InstrumentationStackFrame instrumentation_frame : *instrumentation_stack_) {
-        if (instrumentation_frame.frame_id_ == GetFrameId()) {
+      const size_t frameId = GetFrameId();
+      for (const InstrumentationStackFrame& instrumentation_frame : *instrumentation_stack_) {
+        if (instrumentation_frame.frame_id_ == frameId) {
           if (kVerboseInstrumentation) {
             LOG(INFO) << "  Removing exit stub in " << DescribeLocation();
           }
@@ -313,7 +314,7 @@
             CHECK(m == instrumentation_frame.method_) << PrettyMethod(m);
           }
           SetReturnPc(instrumentation_frame.return_pc_);
-          if (!instrumentation_->ShouldNotifyMethodEnterExitEvents()) {
+          if (instrumentation_->ShouldNotifyMethodEnterExitEvents()) {
             // Create the method exit events. As the methods didn't really exit the result is 0.
             // We only do this if no debugger is attached to prevent from posting events twice.
             instrumentation_->MethodExitEvent(thread_, instrumentation_frame.this_object_, m,
@@ -439,7 +440,7 @@
     // We're already set.
     return;
   }
-  Thread* self = Thread::Current();
+  Thread* const self = Thread::Current();
   Runtime* runtime = Runtime::Current();
   Locks::thread_list_lock_->AssertNotHeld(self);
   if (desired_level > 0) {
@@ -451,7 +452,7 @@
     }
     runtime->GetClassLinker()->VisitClasses(InstallStubsClassVisitor, this);
     instrumentation_stubs_installed_ = true;
-    MutexLock mu(Thread::Current(), *Locks::thread_list_lock_);
+    MutexLock mu(self, *Locks::thread_list_lock_);
     runtime->GetThreadList()->ForEach(InstrumentationInstallStack, this);
   } else {
     interpreter_stubs_installed_ = false;
@@ -657,7 +658,7 @@
 
 // Indicates if instrumentation should notify method enter/exit events to the listeners.
 bool Instrumentation::ShouldNotifyMethodEnterExitEvents() const {
-  return deoptimization_enabled_ || interpreter_stubs_installed_;
+  return !deoptimization_enabled_ && !interpreter_stubs_installed_;
 }
 
 void Instrumentation::DeoptimizeEverything() {
diff --git a/runtime/jdwp/jdwp_event.cc b/runtime/jdwp/jdwp_event.cc
index 9b3ea2e..12370a4 100644
--- a/runtime/jdwp/jdwp_event.cc
+++ b/runtime/jdwp/jdwp_event.cc
@@ -248,7 +248,16 @@
       Dbg::UnconfigureStep(pMod->step.threadId);
     }
   }
-  if (NeedsFullDeoptimization(pEvent->eventKind)) {
+  if (pEvent->eventKind == EK_SINGLE_STEP) {
+    // Special case for single-steps where we want to avoid the slow pattern deoptimize/undeoptimize
+    // loop between each single-step. In a IDE, this would happens each time the user click on the
+    // "single-step" button. Here we delay the full undeoptimization to the next resume
+    // (VM.Resume or ThreadReference.Resume) or the end of the debugging session (VM.Dispose or
+    // runtime shutdown).
+    // Therefore, in a singles-stepping sequence, only the first single-step will trigger a full
+    // deoptimization and only the last single-step will trigger a full undeoptimization.
+    Dbg::DelayFullUndeoptimization();
+  } else if (NeedsFullDeoptimization(pEvent->eventKind)) {
     CHECK_EQ(req.kind, DeoptimizationRequest::kNothing);
     CHECK(req.method == nullptr);
     req.kind = DeoptimizationRequest::kFullUndeoptimization;
diff --git a/runtime/jdwp/jdwp_handler.cc b/runtime/jdwp/jdwp_handler.cc
index c2a2b54..3e8b697 100644
--- a/runtime/jdwp/jdwp_handler.cc
+++ b/runtime/jdwp/jdwp_handler.cc
@@ -291,6 +291,7 @@
  */
 static JdwpError VM_Resume(JdwpState*, Request&, ExpandBuf*)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  Dbg::ProcessDelayedFullUndeoptimizations();
   Dbg::ResumeVM();
   return ERR_NONE;
 }
@@ -980,6 +981,8 @@
     return ERR_NONE;
   }
 
+  Dbg::ProcessDelayedFullUndeoptimizations();
+
   Dbg::ResumeThread(thread_id);
   return ERR_NONE;
 }
diff --git a/runtime/jdwp/jdwp_main.cc b/runtime/jdwp/jdwp_main.cc
index 5fc0228..aeb4016 100644
--- a/runtime/jdwp/jdwp_main.cc
+++ b/runtime/jdwp/jdwp_main.cc
@@ -332,6 +332,8 @@
     CHECK(event_list_ == NULL);
   }
 
+  Dbg::ProcessDelayedFullUndeoptimizations();
+
   /*
    * Should not have one of these in progress.  If the debugger went away
    * mid-request, though, we could see this.
