Revert "Revert "ART: Improve JitProfile perf in arm/arm64 mterp""

Ready for review.

This reverts commit 6aef867f4d1a98a12bcdd65e9bf2ff894f0f2d7e.

Change-Id: I5d53ed2bedc7e429ce7d3cdf80b6696a9628740e
diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc
index 0488dbf..f9941d2 100644
--- a/runtime/interpreter/interpreter_switch_impl.cc
+++ b/runtime/interpreter/interpreter_switch_impl.cc
@@ -18,6 +18,7 @@
 #include "experimental_flags.h"
 #include "interpreter_common.h"
 #include "jit/jit.h"
+#include "jit/jit_instrumentation.h"
 #include "safe_math.h"
 
 #include <memory>  // std::unique_ptr
@@ -72,7 +73,6 @@
 
 #define BRANCH_INSTRUMENTATION(offset)                                                         \
   do {                                                                                         \
-    ArtMethod* method = shadow_frame.GetMethod();                                              \
     instrumentation->Branch(self, method, dex_pc, offset);                                     \
     JValue result;                                                                             \
     if (jit::Jit::MaybeDoOnStackReplacement(self, method, dex_pc, offset, &result)) {          \
@@ -80,6 +80,13 @@
     }                                                                                          \
   } while (false)
 
+#define HOTNESS_UPDATE()                                                                       \
+  do {                                                                                         \
+    if (jit_instrumentation_cache != nullptr) {                                                \
+      jit_instrumentation_cache->AddSamples(self, method, 1);                                  \
+    }                                                                                          \
+  } while (false)
+
 static bool IsExperimentalInstructionEnabled(const Instruction *inst) {
   DCHECK(inst->IsExperimental());
   return Runtime::Current()->AreExperimentalFlagsEnabled(ExperimentalFlags::kLambdas);
@@ -101,6 +108,12 @@
   const uint16_t* const insns = code_item->insns_;
   const Instruction* inst = Instruction::At(insns + dex_pc);
   uint16_t inst_data;
+  ArtMethod* method = shadow_frame.GetMethod();
+  jit::Jit* jit = Runtime::Current()->GetJit();
+  jit::JitInstrumentationCache* jit_instrumentation_cache = nullptr;
+  if (jit != nullptr) {
+    jit_instrumentation_cache = jit->GetInstrumentationCache();
+  }
 
   // TODO: collapse capture-variable+create-lambda into one opcode, then we won't need
   // to keep this live for the scope of the entire function call.
@@ -564,6 +577,7 @@
         int8_t offset = inst->VRegA_10t(inst_data);
         BRANCH_INSTRUMENTATION(offset);
         if (IsBackwardBranch(offset)) {
+          HOTNESS_UPDATE();
           self->AllowThreadSuspension();
         }
         inst = inst->RelativeAt(offset);
@@ -574,6 +588,7 @@
         int16_t offset = inst->VRegA_20t();
         BRANCH_INSTRUMENTATION(offset);
         if (IsBackwardBranch(offset)) {
+          HOTNESS_UPDATE();
           self->AllowThreadSuspension();
         }
         inst = inst->RelativeAt(offset);
@@ -584,6 +599,7 @@
         int32_t offset = inst->VRegA_30t();
         BRANCH_INSTRUMENTATION(offset);
         if (IsBackwardBranch(offset)) {
+          HOTNESS_UPDATE();
           self->AllowThreadSuspension();
         }
         inst = inst->RelativeAt(offset);
@@ -594,6 +610,7 @@
         int32_t offset = DoPackedSwitch(inst, shadow_frame, inst_data);
         BRANCH_INSTRUMENTATION(offset);
         if (IsBackwardBranch(offset)) {
+          HOTNESS_UPDATE();
           self->AllowThreadSuspension();
         }
         inst = inst->RelativeAt(offset);
@@ -604,6 +621,7 @@
         int32_t offset = DoSparseSwitch(inst, shadow_frame, inst_data);
         BRANCH_INSTRUMENTATION(offset);
         if (IsBackwardBranch(offset)) {
+          HOTNESS_UPDATE();
           self->AllowThreadSuspension();
         }
         inst = inst->RelativeAt(offset);
@@ -708,6 +726,7 @@
           int16_t offset = inst->VRegC_22t();
           BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
+            HOTNESS_UPDATE();
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -724,6 +743,7 @@
           int16_t offset = inst->VRegC_22t();
           BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
+            HOTNESS_UPDATE();
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -740,6 +760,7 @@
           int16_t offset = inst->VRegC_22t();
           BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
+            HOTNESS_UPDATE();
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -756,6 +777,7 @@
           int16_t offset = inst->VRegC_22t();
           BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
+            HOTNESS_UPDATE();
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -772,6 +794,7 @@
           int16_t offset = inst->VRegC_22t();
           BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
+            HOTNESS_UPDATE();
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -788,6 +811,7 @@
           int16_t offset = inst->VRegC_22t();
           BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
+            HOTNESS_UPDATE();
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -803,6 +827,7 @@
           int16_t offset = inst->VRegB_21t();
           BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
+            HOTNESS_UPDATE();
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -818,6 +843,7 @@
           int16_t offset = inst->VRegB_21t();
           BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
+            HOTNESS_UPDATE();
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -833,6 +859,7 @@
           int16_t offset = inst->VRegB_21t();
           BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
+            HOTNESS_UPDATE();
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -848,6 +875,7 @@
           int16_t offset = inst->VRegB_21t();
           BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
+            HOTNESS_UPDATE();
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -863,6 +891,7 @@
           int16_t offset = inst->VRegB_21t();
           BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
+            HOTNESS_UPDATE();
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -878,6 +907,7 @@
           int16_t offset = inst->VRegB_21t();
           BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
+            HOTNESS_UPDATE();
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);