Merge "Update locations of registers after slow paths spilling."
diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc
index 15edcc5..8275162 100644
--- a/compiler/dex/quick/arm64/call_arm64.cc
+++ b/compiler/dex/quick/arm64/call_arm64.cc
@@ -449,14 +449,14 @@
     switch (state) {
     case 0:  // Get the current Method* [sets kArg0]
       if (direct_code != static_cast<uintptr_t>(-1)) {
-        cg->LoadConstant(cg->TargetPtrReg(kInvokeTgt), direct_code);
+        cg->LoadConstantWide(cg->TargetPtrReg(kInvokeTgt), direct_code);
       } else if (Arm64UseRelativeCall(cu, target_method)) {
         // Defer to linker patch.
       } else {
         cg->LoadCodeAddress(target_method, type, kInvokeTgt);
       }
       if (direct_method != static_cast<uintptr_t>(-1)) {
-        cg->LoadConstant(cg->TargetReg(kArg0, kRef), direct_method);
+        cg->LoadConstantWide(cg->TargetReg(kArg0, kRef), direct_method);
       } else {
         cg->LoadMethodAddress(target_method, type, kArg0);
       }
@@ -479,7 +479,7 @@
       // Set up direct code if known.
       if (direct_code != 0) {
         if (direct_code != static_cast<uintptr_t>(-1)) {
-          cg->LoadConstant(cg->TargetPtrReg(kInvokeTgt), direct_code);
+          cg->LoadConstantWide(cg->TargetPtrReg(kInvokeTgt), direct_code);
         } else if (Arm64UseRelativeCall(cu, target_method)) {
           // Defer to linker patch.
         } else {
diff --git a/compiler/dex/quick/mips64/call_mips64.cc b/compiler/dex/quick/mips64/call_mips64.cc
index 63cef7e..31be1c2 100644
--- a/compiler/dex/quick/mips64/call_mips64.cc
+++ b/compiler/dex/quick/mips64/call_mips64.cc
@@ -356,12 +356,12 @@
     switch (state) {
     case 0:  // Get the current Method* [sets kArg0]
       if (direct_code != static_cast<uintptr_t>(-1)) {
-        cg->LoadConstant(cg->TargetPtrReg(kInvokeTgt), direct_code);
+        cg->LoadConstantWide(cg->TargetPtrReg(kInvokeTgt), direct_code);
       } else {
         cg->LoadCodeAddress(target_method, type, kInvokeTgt);
       }
       if (direct_method != static_cast<uintptr_t>(-1)) {
-        cg->LoadConstant(cg->TargetReg(kArg0, kRef), direct_method);
+        cg->LoadConstantWide(cg->TargetReg(kArg0, kRef), direct_method);
       } else {
         cg->LoadMethodAddress(target_method, type, kArg0);
       }
@@ -382,7 +382,7 @@
       // Set up direct code if known.
       if (direct_code != 0) {
         if (direct_code != static_cast<uintptr_t>(-1)) {
-          cg->LoadConstant(cg->TargetPtrReg(kInvokeTgt), direct_code);
+          cg->LoadConstantWide(cg->TargetPtrReg(kInvokeTgt), direct_code);
         } else {
           CHECK_LT(target_method.dex_method_index, target_method.dex_file->NumMethodIds());
           cg->LoadCodeAddress(target_method, type, kInvokeTgt);
diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc
index c3db3a6..11c1465 100644
--- a/compiler/dex/quick/x86/call_x86.cc
+++ b/compiler/dex/quick/x86/call_x86.cc
@@ -332,7 +332,12 @@
     switch (state) {
     case 0:  // Get the current Method* [sets kArg0]
       if (direct_method != static_cast<uintptr_t>(-1)) {
-        cg->LoadConstant(cg->TargetReg(kArg0, kRef), direct_method);
+        auto target_reg = cg->TargetReg(kArg0, kRef);
+        if (target_reg.Is64Bit()) {
+          cg->LoadConstantWide(target_reg, direct_method);
+        } else {
+          cg->LoadConstant(target_reg, direct_method);
+        }
       } else {
         cg->LoadMethodAddress(target_method, type, kArg0);
       }
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index be6c41a..3d3d5cb 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -932,7 +932,7 @@
     Runtime* current = Runtime::Current();
 
     // Suspend all threads.
-    current->GetThreadList()->SuspendAll();
+    current->GetThreadList()->SuspendAll(__FUNCTION__);
 
     std::string error_msg;
     std::unique_ptr<ClinitImageUpdate> update(ClinitImageUpdate::Create(image_classes_.get(),
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index c7f81ea..d238b2c 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -137,6 +137,7 @@
   if (oat_file_ == nullptr) {
     PLOG(ERROR) << "Failed to open writable oat file " << oat_filename << " for " << oat_location
         << ": " << error_msg;
+    oat_file->Erase();
     return false;
   }
   CHECK_EQ(class_linker->RegisterOatFile(oat_file_), oat_file_);
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index 344d2b5..c27b3d4 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -1539,7 +1539,7 @@
       {
         self->TransitionFromRunnableToSuspended(kNative);
         ThreadList* thread_list = Runtime::Current()->GetThreadList();
-        thread_list->SuspendAll();
+        thread_list->SuspendAll(__FUNCTION__);
         heap->RevokeAllThreadLocalAllocationStacks(self);
         thread_list->ResumeAll();
         self->TransitionFromSuspendedToRunnable();
diff --git a/runtime/base/mutex.cc b/runtime/base/mutex.cc
index 2ec2b0c..13dcb8c 100644
--- a/runtime/base/mutex.cc
+++ b/runtime/base/mutex.cc
@@ -46,7 +46,6 @@
 Mutex* Locks::jni_libraries_lock_ = nullptr;
 Mutex* Locks::logging_lock_ = nullptr;
 Mutex* Locks::mem_maps_lock_ = nullptr;
-Mutex* Locks::method_verifiers_lock_ = nullptr;
 Mutex* Locks::modify_ldt_lock_ = nullptr;
 ReaderWriterMutex* Locks::mutator_lock_ = nullptr;
 Mutex* Locks::profiler_lock_ = nullptr;
@@ -1002,10 +1001,6 @@
     classlinker_classes_lock_ = new ReaderWriterMutex("ClassLinker classes lock",
                                                       current_lock_level);
 
-    UPDATE_CURRENT_LOCK_LEVEL(kMethodVerifiersLock);
-    DCHECK(method_verifiers_lock_ == nullptr);
-    method_verifiers_lock_ = new Mutex("Method verifiers lock", current_lock_level);
-
     UPDATE_CURRENT_LOCK_LEVEL(kMonitorPoolLock);
     DCHECK(allocated_monitor_ids_lock_ == nullptr);
     allocated_monitor_ids_lock_ =  new Mutex("allocated monitor ids lock", current_lock_level);
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index f9e1e62..6e7b04f 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -588,11 +588,9 @@
   // Guards lists of classes within the class linker.
   static ReaderWriterMutex* classlinker_classes_lock_ ACQUIRED_AFTER(breakpoint_lock_);
 
-  static Mutex* method_verifiers_lock_ ACQUIRED_AFTER(classlinker_classes_lock_);
-
   // When declaring any Mutex add DEFAULT_MUTEX_ACQUIRED_AFTER to use annotalysis to check the code
   // doesn't try to hold a higher level Mutex.
-  #define DEFAULT_MUTEX_ACQUIRED_AFTER ACQUIRED_AFTER(Locks::method_verifiers_lock_)
+  #define DEFAULT_MUTEX_ACQUIRED_AFTER ACQUIRED_AFTER(Locks::classlinker_classes_lock_)
 
   static Mutex* allocated_monitor_ids_lock_ ACQUIRED_AFTER(classlinker_classes_lock_);
 
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index 9f2a09b..6296cf5 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -644,7 +644,7 @@
   }
 
   Runtime* runtime = Runtime::Current();
-  runtime->GetThreadList()->SuspendAll();
+  runtime->GetThreadList()->SuspendAll(__FUNCTION__);
   Thread* self = Thread::Current();
   ThreadState old_state = self->SetStateUnsafe(kRunnable);
   CHECK_NE(old_state, kRunnable);
@@ -668,7 +668,7 @@
   // to kRunnable to avoid scoped object access transitions. Remove the debugger as a listener
   // and clear the object registry.
   Runtime* runtime = Runtime::Current();
-  runtime->GetThreadList()->SuspendAll();
+  runtime->GetThreadList()->SuspendAll(__FUNCTION__);
   Thread* self = Thread::Current();
   ThreadState old_state = self->SetStateUnsafe(kRunnable);
 
@@ -819,7 +819,7 @@
   Thread* self = Thread::Current();
   CHECK_EQ(self->GetState(), kRunnable);
   self->TransitionFromRunnableToSuspended(kSuspended);
-  Runtime::Current()->GetThreadList()->SuspendAll();
+  Runtime::Current()->GetThreadList()->SuspendAll(__FUNCTION__);
 
   MonitorInfo monitor_info(o);
 
@@ -3135,7 +3135,7 @@
   self->TransitionFromRunnableToSuspended(kWaitingForDeoptimization);
   // We need to suspend mutator threads first.
   Runtime* const runtime = Runtime::Current();
-  runtime->GetThreadList()->SuspendAll();
+  runtime->GetThreadList()->SuspendAll(__FUNCTION__);
   const ThreadState old_state = self->SetStateUnsafe(kRunnable);
   {
     MutexLock mu(self, *Locks::deoptimization_lock_);
@@ -4436,7 +4436,7 @@
         // RosAlloc's internal logic doesn't know to release and reacquire the heap bitmap lock.
         self->TransitionFromRunnableToSuspended(kSuspended);
         ThreadList* tl = Runtime::Current()->GetThreadList();
-        tl->SuspendAll();
+        tl->SuspendAll(__FUNCTION__);
         {
           ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
           space->AsRosAllocSpace()->Walk(HeapChunkContext::HeapChunkJavaCallback, &context);
@@ -4452,7 +4452,7 @@
         heap->IncrementDisableMovingGC(self);
         self->TransitionFromRunnableToSuspended(kSuspended);
         ThreadList* tl = Runtime::Current()->GetThreadList();
-        tl->SuspendAll();
+        tl->SuspendAll(__FUNCTION__);
         ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
         context.SetChunkOverhead(0);
         space->AsRegionSpace()->Walk(BumpPointerSpaceCallback, &context);
diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc
index 0fdfcb3..88209a3 100644
--- a/runtime/entrypoints_order_test.cc
+++ b/runtime/entrypoints_order_test.cc
@@ -129,7 +129,8 @@
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, held_mutexes, nested_signal_state,
                         sizeof(void*) * kLockLevelCount);
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, nested_signal_state, flip_function, sizeof(void*));
-    EXPECT_OFFSET_DIFF(Thread, tlsPtr_.flip_function, Thread, wait_mutex_, sizeof(void*),
+    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, flip_function, method_verifier, sizeof(void*));
+    EXPECT_OFFSET_DIFF(Thread, tlsPtr_.method_verifier, Thread, wait_mutex_, sizeof(void*),
                        thread_tlsptr_end);
   }
 
diff --git a/runtime/gc/collector/garbage_collector.cc b/runtime/gc/collector/garbage_collector.cc
index eafcc45..47d6ada 100644
--- a/runtime/gc/collector/garbage_collector.cc
+++ b/runtime/gc/collector/garbage_collector.cc
@@ -157,7 +157,7 @@
 
 GarbageCollector::ScopedPause::ScopedPause(GarbageCollector* collector)
     : start_time_(NanoTime()), collector_(collector) {
-  Runtime::Current()->GetThreadList()->SuspendAll();
+  Runtime::Current()->GetThreadList()->SuspendAll(__FUNCTION__);
 }
 
 GarbageCollector::ScopedPause::~ScopedPause() {
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 9421db5..0cad11f 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -576,7 +576,7 @@
   ThreadList* tl = Runtime::Current()->GetThreadList();
   Thread* self = Thread::Current();
   ScopedThreadStateChange tsc(self, kSuspended);
-  tl->SuspendAll();
+  tl->SuspendAll(__FUNCTION__);
   // Something may have caused the transition to fail.
   if (!IsMovingGc(collector_type_) && non_moving_space_ != main_space_) {
     CHECK(main_space_ != nullptr);
@@ -758,7 +758,7 @@
     IncrementDisableMovingGC(self);
     self->TransitionFromRunnableToSuspended(kWaitingForVisitObjects);
     ThreadList* tl = Runtime::Current()->GetThreadList();
-    tl->SuspendAll();
+    tl->SuspendAll(__FUNCTION__);
     VisitObjectsInternalRegionSpace(callback, arg);
     VisitObjectsInternal(callback, arg);
     tl->ResumeAll();
@@ -1058,7 +1058,7 @@
     // Deflate the monitors, this can cause a pause but shouldn't matter since we don't care
     // about pauses.
     Runtime* runtime = Runtime::Current();
-    runtime->GetThreadList()->SuspendAll();
+    runtime->GetThreadList()->SuspendAll(__FUNCTION__);
     uint64_t start_time = NanoTime();
     size_t count = runtime->GetMonitorList()->DeflateMonitors();
     VLOG(heap) << "Deflating " << count << " monitors took "
@@ -1697,7 +1697,7 @@
     return HomogeneousSpaceCompactResult::kErrorVMShuttingDown;
   }
   // Suspend all threads.
-  tl->SuspendAll();
+  tl->SuspendAll(__FUNCTION__);
   uint64_t start_time = NanoTime();
   // Launch compaction.
   space::MallocSpace* to_space = main_space_backup_.release();
@@ -1779,7 +1779,7 @@
     return;
   }
   collector::GarbageCollector* collector = nullptr;
-  tl->SuspendAll();
+  tl->SuspendAll(__FUNCTION__);
   switch (collector_type) {
     case kCollectorTypeSS: {
       if (!IsMovingGc(collector_type_)) {
@@ -1993,15 +1993,16 @@
 
   virtual mirror::Object* MarkNonForwardedObject(mirror::Object* obj)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_) {
-    size_t object_size = RoundUp(obj->SizeOf(), kObjectAlignment);
+    size_t obj_size = obj->SizeOf();
+    size_t alloc_size = RoundUp(obj_size, kObjectAlignment);
     mirror::Object* forward_address;
     // Find the smallest bin which we can move obj in.
-    auto it = bins_.lower_bound(object_size);
+    auto it = bins_.lower_bound(alloc_size);
     if (it == bins_.end()) {
       // No available space in the bins, place it in the target space instead (grows the zygote
       // space).
       size_t bytes_allocated, dummy;
-      forward_address = to_space_->Alloc(self_, object_size, &bytes_allocated, nullptr, &dummy);
+      forward_address = to_space_->Alloc(self_, alloc_size, &bytes_allocated, nullptr, &dummy);
       if (to_space_live_bitmap_ != nullptr) {
         to_space_live_bitmap_->Set(forward_address);
       } else {
@@ -2016,11 +2017,12 @@
       // Set the live and mark bits so that sweeping system weaks works properly.
       bin_live_bitmap_->Set(forward_address);
       bin_mark_bitmap_->Set(forward_address);
-      DCHECK_GE(size, object_size);
-      AddBin(size - object_size, pos + object_size);  // Add a new bin with the remaining space.
+      DCHECK_GE(size, alloc_size);
+      // Add a new bin with the remaining space.
+      AddBin(size - alloc_size, pos + alloc_size);
     }
-    // Copy the object over to its new location.
-    memcpy(reinterpret_cast<void*>(forward_address), obj, object_size);
+    // Copy the object over to its new location. Don't use alloc_size to avoid valgrind error.
+    memcpy(reinterpret_cast<void*>(forward_address), obj, obj_size);
     if (kUseBakerOrBrooksReadBarrier) {
       obj->AssertReadBarrierPointer();
       if (kUseBrooksReadBarrier) {
diff --git a/runtime/gc/space/rosalloc_space.cc b/runtime/gc/space/rosalloc_space.cc
index f140021..eb1d5f4 100644
--- a/runtime/gc/space/rosalloc_space.cc
+++ b/runtime/gc/space/rosalloc_space.cc
@@ -303,7 +303,7 @@
   // TODO: NO_THREAD_SAFETY_ANALYSIS.
   Thread* self = Thread::Current();
   ThreadList* tl = Runtime::Current()->GetThreadList();
-  tl->SuspendAll();
+  tl->SuspendAll(__FUNCTION__);
   {
     MutexLock mu(self, *Locks::runtime_shutdown_lock_);
     MutexLock mu2(self, *Locks::thread_list_lock_);
diff --git a/runtime/hprof/hprof.cc b/runtime/hprof/hprof.cc
index 5a7b7e1..b822613 100644
--- a/runtime/hprof/hprof.cc
+++ b/runtime/hprof/hprof.cc
@@ -1180,7 +1180,7 @@
     // comment in Heap::VisitObjects().
     heap->IncrementDisableMovingGC(self);
   }
-  Runtime::Current()->GetThreadList()->SuspendAll();
+  Runtime::Current()->GetThreadList()->SuspendAll(__FUNCTION__);
   Hprof hprof(filename, fd, direct_to_ddms);
   hprof.Dump();
   Runtime::Current()->GetThreadList()->ResumeAll();
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index 085062c..b53b8cd 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -607,7 +607,7 @@
   Locks::mutator_lock_->AssertNotHeld(self);
   Locks::instrument_entrypoints_lock_->AssertHeld(self);
   if (runtime->IsStarted()) {
-    tl->SuspendAll();
+    tl->SuspendAll(__FUNCTION__);
   }
   {
     MutexLock mu(self, *Locks::runtime_shutdown_lock_);
diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc
index 897287b..729791f 100644
--- a/runtime/jit/jit.cc
+++ b/runtime/jit/jit.cc
@@ -32,9 +32,6 @@
 namespace jit {
 
 JitOptions* JitOptions::CreateFromRuntimeArguments(const RuntimeArgumentMap& options) {
-  if (kRuntimeISA == kArm64) {
-    return nullptr;
-  }
   if (!options.GetOrDefault(RuntimeArgumentMap::UseJIT)) {
     return nullptr;
   }
@@ -167,7 +164,7 @@
 void Jit::CreateInstrumentationCache(size_t compile_threshold) {
   CHECK_GT(compile_threshold, 0U);
   Runtime* const runtime = Runtime::Current();
-  runtime->GetThreadList()->SuspendAll();
+  runtime->GetThreadList()->SuspendAll(__FUNCTION__);
   // Add Jit interpreter instrumentation, tells the interpreter when to notify the jit to compile
   // something.
   instrumentation_cache_.reset(new jit::JitInstrumentationCache(compile_threshold));
diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index 495f753..97052f1 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h
@@ -439,7 +439,7 @@
 }
 
 inline ObjectArray<ArtField>* Class::GetSFields() {
-  DCHECK(IsLoaded() || IsErroneous());
+  DCHECK(IsLoaded() || IsErroneous()) << GetStatus();
   return GetFieldObject<ObjectArray<ArtField>>(OFFSET_OF_OBJECT_MEMBER(Class, sfields_));
 }
 
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index a2f1481..189559e 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -1331,12 +1331,6 @@
     callee_save_methods_[i].VisitRootIfNonNull(callback, arg, RootInfo(kRootVMInternal));
   }
   verifier::MethodVerifier::VisitStaticRoots(callback, arg);
-  {
-    MutexLock mu(Thread::Current(), *Locks::method_verifiers_lock_);
-    for (verifier::MethodVerifier* verifier : method_verifiers_) {
-      verifier->VisitRoots(callback, arg);
-    }
-  }
   VisitTransactionRoots(callback, arg);
   instrumentation_.VisitRoots(callback, arg);
 }
@@ -1508,26 +1502,6 @@
   compile_time_class_paths_.Put(class_loader, class_path);
 }
 
-void Runtime::AddMethodVerifier(verifier::MethodVerifier* verifier) {
-  DCHECK(verifier != nullptr);
-  if (gAborting) {
-    return;
-  }
-  MutexLock mu(Thread::Current(), *Locks::method_verifiers_lock_);
-  method_verifiers_.insert(verifier);
-}
-
-void Runtime::RemoveMethodVerifier(verifier::MethodVerifier* verifier) {
-  DCHECK(verifier != nullptr);
-  if (gAborting) {
-    return;
-  }
-  MutexLock mu(Thread::Current(), *Locks::method_verifiers_lock_);
-  auto it = method_verifiers_.find(verifier);
-  CHECK(it != method_verifiers_.end());
-  method_verifiers_.erase(it);
-}
-
 void Runtime::StartProfiler(const char* profile_output_filename) {
   profile_output_filename_ = profile_output_filename;
   profiler_started_ =
diff --git a/runtime/runtime.h b/runtime/runtime.h
index d54972c..3cf22bf 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -456,11 +456,6 @@
     return use_compile_time_class_path_;
   }
 
-  void AddMethodVerifier(verifier::MethodVerifier* verifier)
-      LOCKS_EXCLUDED(Locks::method_verifiers_lock_);
-  void RemoveMethodVerifier(verifier::MethodVerifier* verifier)
-      LOCKS_EXCLUDED(Locks::method_verifiers_lock_);
-
   const std::vector<const DexFile*>& GetCompileTimeClassPath(jobject class_loader);
 
   // The caller is responsible for ensuring the class_path DexFiles remain
@@ -642,9 +637,6 @@
   Mutex fault_message_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
   std::string fault_message_ GUARDED_BY(fault_message_lock_);
 
-  // Method verifier set, used so that we can update their GC roots.
-  std::set<verifier::MethodVerifier*> method_verifiers_ GUARDED_BY(Locks::method_verifiers_lock_);
-
   // A non-zero value indicates that a thread has been created but not yet initialized. Guarded by
   // the shutdown lock so that threads aren't born while we're shutting down.
   size_t threads_being_born_ GUARDED_BY(Locks::runtime_shutdown_lock_);
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 8e98d53..affb6cd 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -70,6 +70,7 @@
 #include "thread-inl.h"
 #include "utils.h"
 #include "verifier/dex_gc_map.h"
+#include "verifier/method_verifier.h"
 #include "verify_object-inl.h"
 #include "vmap_table.h"
 #include "well_known_classes.h"
@@ -2296,6 +2297,9 @@
       mapper.VisitShadowFrame(shadow_frame);
     }
   }
+  if (tlsPtr_.method_verifier != nullptr) {
+    tlsPtr_.method_verifier->VisitRoots(visitor, arg, RootInfo(kRootNativeStack, thread_id));
+  }
   // Visit roots on this thread's stack
   Context* context = GetLongJumpContext();
   RootCallbackVisitor visitor_to_callback(visitor, arg, thread_id);
@@ -2417,4 +2421,14 @@
   tlsPtr_.debug_invoke_req = nullptr;
 }
 
+void Thread::SetVerifier(verifier::MethodVerifier* verifier) {
+  CHECK(tlsPtr_.method_verifier == nullptr);
+  tlsPtr_.method_verifier = verifier;
+}
+
+void Thread::ClearVerifier(verifier::MethodVerifier* verifier) {
+  CHECK_EQ(tlsPtr_.method_verifier, verifier);
+  tlsPtr_.method_verifier = nullptr;
+}
+
 }  // namespace art
diff --git a/runtime/thread.h b/runtime/thread.h
index 2e9ae3c..da7af83 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -62,6 +62,11 @@
   class StackTraceElement;
   class Throwable;
 }  // namespace mirror
+
+namespace verifier {
+class MethodVerifier;
+}  // namespace verifier
+
 class BaseMutex;
 class ClassLinker;
 class Closure;
@@ -875,6 +880,9 @@
     return tls32_.suspended_at_suspend_check;
   }
 
+  void SetVerifier(verifier::MethodVerifier* verifier);
+  void ClearVerifier(verifier::MethodVerifier* verifier);
+
  private:
   explicit Thread(bool daemon);
   ~Thread() LOCKS_EXCLUDED(Locks::mutator_lock_,
@@ -1055,10 +1063,8 @@
       pthread_self(0), last_no_thread_suspension_cause(nullptr), thread_local_start(nullptr),
       thread_local_pos(nullptr), thread_local_end(nullptr), thread_local_objects(0),
       thread_local_alloc_stack_top(nullptr), thread_local_alloc_stack_end(nullptr),
-      nested_signal_state(nullptr), flip_function(nullptr) {
-        for (size_t i = 0; i < kLockLevelCount; ++i) {
-          held_mutexes[i] = nullptr;
-        }
+      nested_signal_state(nullptr), flip_function(nullptr), method_verifier(nullptr) {
+      std::fill(held_mutexes, held_mutexes + kLockLevelCount, nullptr);
     }
 
     // The biased card table, see CardTable for details.
@@ -1172,6 +1178,9 @@
 
     // The function used for thread flip.
     Closure* flip_function;
+
+    // Current method verifier, used for root marking.
+    verifier::MethodVerifier* method_verifier;
   } tlsPtr_;
 
   // Guards the 'interrupted_' and 'wait_monitor_' members.
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index ddfbebd..1ab0093 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -448,13 +448,13 @@
   return runnable_threads.size() + other_threads.size() + 1;  // +1 for self.
 }
 
-void ThreadList::SuspendAll() {
+void ThreadList::SuspendAll(const char* cause) {
   Thread* self = Thread::Current();
 
   if (self != nullptr) {
-    VLOG(threads) << *self << " SuspendAll starting...";
+    VLOG(threads) << *self << " SuspendAll for " << cause << " starting...";
   } else {
-    VLOG(threads) << "Thread[null] SuspendAll starting...";
+    VLOG(threads) << "Thread[null] SuspendAll for " << cause << " starting...";
   }
   ATRACE_BEGIN("Suspending mutator threads");
   const uint64_t start_time = NanoTime();
@@ -503,7 +503,7 @@
   }
 
   ATRACE_END();
-  ATRACE_BEGIN("Mutator threads suspended");
+  ATRACE_BEGIN((std::string("Mutator threads suspended for ") + cause).c_str());
 
   if (self != nullptr) {
     VLOG(threads) << *self << " SuspendAll complete";
diff --git a/runtime/thread_list.h b/runtime/thread_list.h
index de0dd79..c18e285 100644
--- a/runtime/thread_list.h
+++ b/runtime/thread_list.h
@@ -61,7 +61,7 @@
       LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_);
 
   // Suspends all threads and gets exclusive access to the mutator_lock_.
-  void SuspendAll()
+  void SuspendAll(const char* cause)
       EXCLUSIVE_LOCK_FUNCTION(Locks::mutator_lock_)
       LOCKS_EXCLUDED(Locks::thread_list_lock_,
                      Locks::thread_suspend_count_lock_);
diff --git a/runtime/trace.cc b/runtime/trace.cc
index 8833a85..88be23f 100644
--- a/runtime/trace.cc
+++ b/runtime/trace.cc
@@ -313,7 +313,7 @@
       }
     }
 
-    runtime->GetThreadList()->SuspendAll();
+    runtime->GetThreadList()->SuspendAll(__FUNCTION__);
     {
       MutexLock mu(self, *Locks::thread_list_lock_);
       runtime->GetThreadList()->ForEach(GetSample, the_trace);
@@ -367,7 +367,7 @@
   // Enable count of allocs if specified in the flags.
   bool enable_stats = false;
 
-  runtime->GetThreadList()->SuspendAll();
+  runtime->GetThreadList()->SuspendAll(__FUNCTION__);
 
   // Create Trace object.
   {
@@ -421,7 +421,7 @@
     CHECK_PTHREAD_CALL(pthread_join, (sampling_pthread, NULL), "sampling thread shutdown");
     sampling_pthread_ = 0U;
   }
-  runtime->GetThreadList()->SuspendAll();
+  runtime->GetThreadList()->SuspendAll(__FUNCTION__);
   if (the_trace != nullptr) {
     stop_alloc_counting = (the_trace->flags_ & kTraceCountAllocs) != 0;
     the_trace->FinishTracing();
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index b3f686d..9ceb6f4 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -395,12 +395,12 @@
       has_virtual_or_interface_invokes_(false),
       verify_to_dump_(verify_to_dump),
       allow_thread_suspension_(allow_thread_suspension) {
-  Runtime::Current()->AddMethodVerifier(this);
+  self->SetVerifier(this);
   DCHECK(class_def != nullptr);
 }
 
 MethodVerifier::~MethodVerifier() {
-  Runtime::Current()->RemoveMethodVerifier(this);
+  Thread::Current()->ClearVerifier(this);
   STLDeleteElements(&failure_messages_);
 }
 
@@ -4334,8 +4334,8 @@
   RegTypeCache::VisitStaticRoots(callback, arg);
 }
 
-void MethodVerifier::VisitRoots(RootCallback* callback, void* arg) {
-  reg_types_.VisitRoots(callback, arg);
+void MethodVerifier::VisitRoots(RootCallback* callback, void* arg, const RootInfo& root_info) {
+  reg_types_.VisitRoots(callback, arg, root_info);
 }
 
 }  // namespace verifier
diff --git a/runtime/verifier/method_verifier.h b/runtime/verifier/method_verifier.h
index d7c2071..6b813ef 100644
--- a/runtime/verifier/method_verifier.h
+++ b/runtime/verifier/method_verifier.h
@@ -227,7 +227,8 @@
 
   static void VisitStaticRoots(RootCallback* callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void VisitRoots(RootCallback* callback, void* arg) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void VisitRoots(RootCallback* callback, void* arg, const RootInfo& roots)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Accessors used by the compiler via CompilerCallback
   const DexFile::CodeItem* CodeItem() const;
diff --git a/runtime/verifier/reg_type.cc b/runtime/verifier/reg_type.cc
index 3510665..201169f 100644
--- a/runtime/verifier/reg_type.cc
+++ b/runtime/verifier/reg_type.cc
@@ -778,8 +778,8 @@
   }
 }
 
-void RegType::VisitRoots(RootCallback* callback, void* arg) const {
-  klass_.VisitRootIfNonNull(callback, arg, RootInfo(kRootUnknown));
+void RegType::VisitRoots(RootCallback* callback, void* arg, const RootInfo& root_info) const {
+  klass_.VisitRootIfNonNull(callback, arg, root_info);
 }
 
 void UninitializedThisReferenceType::CheckInvariants() const {
diff --git a/runtime/verifier/reg_type.h b/runtime/verifier/reg_type.h
index 05958b5..73e131e 100644
--- a/runtime/verifier/reg_type.h
+++ b/runtime/verifier/reg_type.h
@@ -250,7 +250,7 @@
 
   virtual ~RegType() {}
 
-  void VisitRoots(RootCallback* callback, void* arg) const
+  void VisitRoots(RootCallback* callback, void* arg, const RootInfo& root_info) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
  protected:
diff --git a/runtime/verifier/reg_type_cache.cc b/runtime/verifier/reg_type_cache.cc
index 22696c7..6e57857 100644
--- a/runtime/verifier/reg_type_cache.cc
+++ b/runtime/verifier/reg_type_cache.cc
@@ -238,9 +238,7 @@
   }
 }
 
-RegTypeCache::RegTypeCache(bool can_load_classes)
-    : entries_lock_("entries lock"),
-      can_load_classes_(can_load_classes) {
+RegTypeCache::RegTypeCache(bool can_load_classes) : can_load_classes_(can_load_classes) {
   if (kIsDebugBuild) {
     Thread::Current()->AssertThreadSuspensionIsAllowable(gAborting == 0);
   }
@@ -563,35 +561,33 @@
   // Visit the primitive types, this is required since if there are no active verifiers they wont
   // be in the entries array, and therefore not visited as roots.
   if (primitive_initialized_) {
-    UndefinedType::GetInstance()->VisitRoots(callback, arg);
-    ConflictType::GetInstance()->VisitRoots(callback, arg);
-    BooleanType::GetInstance()->VisitRoots(callback, arg);
-    ByteType::GetInstance()->VisitRoots(callback, arg);
-    ShortType::GetInstance()->VisitRoots(callback, arg);
-    CharType::GetInstance()->VisitRoots(callback, arg);
-    IntegerType::GetInstance()->VisitRoots(callback, arg);
-    LongLoType::GetInstance()->VisitRoots(callback, arg);
-    LongHiType::GetInstance()->VisitRoots(callback, arg);
-    FloatType::GetInstance()->VisitRoots(callback, arg);
-    DoubleLoType::GetInstance()->VisitRoots(callback, arg);
-    DoubleHiType::GetInstance()->VisitRoots(callback, arg);
+    RootInfo ri(kRootUnknown);
+    UndefinedType::GetInstance()->VisitRoots(callback, arg, ri);
+    ConflictType::GetInstance()->VisitRoots(callback, arg, ri);
+    BooleanType::GetInstance()->VisitRoots(callback, arg, ri);
+    ByteType::GetInstance()->VisitRoots(callback, arg, ri);
+    ShortType::GetInstance()->VisitRoots(callback, arg, ri);
+    CharType::GetInstance()->VisitRoots(callback, arg, ri);
+    IntegerType::GetInstance()->VisitRoots(callback, arg, ri);
+    LongLoType::GetInstance()->VisitRoots(callback, arg, ri);
+    LongHiType::GetInstance()->VisitRoots(callback, arg, ri);
+    FloatType::GetInstance()->VisitRoots(callback, arg, ri);
+    DoubleLoType::GetInstance()->VisitRoots(callback, arg, ri);
+    DoubleHiType::GetInstance()->VisitRoots(callback, arg, ri);
     for (int32_t value = kMinSmallConstant; value <= kMaxSmallConstant; ++value) {
-      small_precise_constants_[value - kMinSmallConstant]->VisitRoots(callback, arg);
+      small_precise_constants_[value - kMinSmallConstant]->VisitRoots(callback, arg, ri);
     }
   }
 }
 
-void RegTypeCache::VisitRoots(RootCallback* callback, void* arg) {
-  MutexLock mu(Thread::Current(), entries_lock_);
+void RegTypeCache::VisitRoots(RootCallback* callback, void* arg, const RootInfo& root_info) {
   // Exclude the static roots that are visited by VisitStaticRoots().
   for (size_t i = primitive_count_; i < entries_.size(); ++i) {
-    entries_[i]->VisitRoots(callback, arg);
+    entries_[i]->VisitRoots(callback, arg, root_info);
   }
 }
 
 void RegTypeCache::AddEntry(RegType* new_entry) {
-  // TODO: There is probably a faster way to do this by using thread local roots.
-  MutexLock mu(Thread::Current(), entries_lock_);
   entries_.push_back(new_entry);
 }
 
diff --git a/runtime/verifier/reg_type_cache.h b/runtime/verifier/reg_type_cache.h
index 4b56fd6..01032a0 100644
--- a/runtime/verifier/reg_type_cache.h
+++ b/runtime/verifier/reg_type_cache.h
@@ -137,7 +137,8 @@
   void Dump(std::ostream& os) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   const RegType& RegTypeFromPrimitiveType(Primitive::Type) const;
 
-  void VisitRoots(RootCallback* callback, void* arg) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void VisitRoots(RootCallback* callback, void* arg, const RootInfo& root_info)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   static void VisitStaticRoots(RootCallback* callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -171,9 +172,6 @@
   // Number of well known primitives that will be copied into a RegTypeCache upon construction.
   static uint16_t primitive_count_;
 
-  // Guards adding and visitng roots to prevent race conditions.
-  Mutex entries_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
-
   // The actual storage for the RegTypes.
   std::vector<const RegType*> entries_;