Move some fast invoke checks to CanUseMterp

This speeds up arm64 golem interpreter benchmarks by 1.5%.

Test: test.py -b -r --interpreter --host
Change-Id: Ia9d7c885cd488de56c6b726373072070b509bdf1
diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc
index b37a278..5784b9b 100644
--- a/runtime/interpreter/interpreter.cc
+++ b/runtime/interpreter/interpreter.cc
@@ -248,6 +248,14 @@
     bool from_deoptimize = false) REQUIRES_SHARED(Locks::mutator_lock_) {
   DCHECK(!shadow_frame.GetMethod()->IsAbstract());
   DCHECK(!shadow_frame.GetMethod()->IsNative());
+
+  // Check that we are using the right interpreter.
+  if (kIsDebugBuild && self->UseMterp() != CanUseMterp()) {
+    // The flag might be currently being updated on all threads. Retry with lock.
+    MutexLock tll_mu(self, *Locks::thread_list_lock_);
+    DCHECK_EQ(self->UseMterp(), CanUseMterp());
+  }
+
   if (LIKELY(!from_deoptimize)) {  // Entering the method, but not via deoptimization.
     if (kIsDebugBuild) {
       CHECK_EQ(shadow_frame.GetDexPC(), 0u);
diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h
index 96588c8..9924aa5 100644
--- a/runtime/interpreter/interpreter_common.h
+++ b/runtime/interpreter/interpreter_common.h
@@ -128,10 +128,8 @@
 static ALWAYS_INLINE bool UseInterpreterToInterpreterFastPath(ArtMethod* method)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   Runtime* runtime = Runtime::Current();
-  if (!runtime->IsStarted()) {
-    return false;
-  }
   const void* quick_code = method->GetEntryPointFromQuickCompiledCode();
+  DCHECK(runtime->IsStarted());
   if (!runtime->GetClassLinker()->IsQuickToInterpreterBridge(quick_code)) {
     return false;
   }
@@ -144,16 +142,11 @@
   if (type == kStatic && !method->GetDeclaringClass()->IsInitialized()) {
     return false;
   }
-  if (runtime->IsActiveTransaction() || runtime->GetInstrumentation()->HasMethodEntryListeners()) {
-    return false;
-  }
+  DCHECK(!runtime->IsActiveTransaction());
   ProfilingInfo* profiling_info = method->GetProfilingInfo(kRuntimePointerSize);
   if ((profiling_info != nullptr) && (profiling_info->GetSavedEntryPoint() != nullptr)) {
     return false;
   }
-  if (runtime->GetJit() != nullptr && runtime->GetJit()->JitAtFirstUse()) {
-    return false;
-  }
   return true;
 }
 
@@ -171,7 +164,9 @@
                                    JValue* result)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   // Make sure to check for async exceptions before anything else.
-  if (UNLIKELY(self->ObserveAsyncException())) {
+  if (is_mterp && self->UseMterp()) {
+    DCHECK(!self->ObserveAsyncException());
+  } else if (UNLIKELY(self->ObserveAsyncException())) {
     return false;
   }
   const uint32_t method_idx = (is_range) ? inst->VRegB_3rc() : inst->VRegB_35c();
@@ -229,7 +224,7 @@
     }
   }
 
-  if (is_mterp && UseInterpreterToInterpreterFastPath<type>(called_method)) {
+  if (is_mterp && self->UseMterp() && UseInterpreterToInterpreterFastPath<type>(called_method)) {
     const uint16_t number_of_inputs =
         (is_range) ? inst->VRegA_3rc(inst_data) : inst->VRegA_35c(inst_data);
     CodeItemDataAccessor accessor(called_method->DexInstructionData());
diff --git a/runtime/interpreter/mterp/mterp.cc b/runtime/interpreter/mterp/mterp.cc
index 4b6f430..ba109bc 100644
--- a/runtime/interpreter/mterp/mterp.cc
+++ b/runtime/interpreter/mterp/mterp.cc
@@ -146,15 +146,18 @@
     REQUIRES_SHARED(Locks::mutator_lock_) {
   const Runtime* const runtime = Runtime::Current();
   return
+      runtime->IsStarted() &&
+      !runtime->IsAotCompiler() &&
       !Dbg::IsDebuggerActive() &&
-      !runtime->GetInstrumentation()->NonJitProfilingActive() &&
+      !runtime->GetInstrumentation()->IsActive() &&
       // mterp only knows how to deal with the normal exits. It cannot handle any of the
       // non-standard force-returns.
       !runtime->AreNonStandardExitsEnabled() &&
       // An async exception has been thrown. We need to go to the switch interpreter. MTerp doesn't
       // know how to deal with these so we could end up never dealing with it if we are in an
       // infinite loop.
-      !runtime->AreAsyncExceptionsThrown();
+      !runtime->AreAsyncExceptionsThrown() &&
+      (runtime->GetJit() == nullptr || !runtime->GetJit()->JitAtFirstUse());
 }
 
 
@@ -560,6 +563,7 @@
     MutexLock tll_mu(self, *Locks::thread_list_lock_);
     DCHECK_EQ(self->UseMterp(), CanUseMterp());
   }
+  DCHECK(!Runtime::Current()->IsActiveTransaction());
   const Instruction* inst = Instruction::At(dex_pc_ptr);
   uint16_t inst_data = inst->Fetch16(0);
   if (inst->Opcode(inst_data) == Instruction::MOVE_EXCEPTION) {