Revert "Revert "ART: Improve JitProfile perf in arm/arm64 mterp""

Ready for review.

This reverts commit 6aef867f4d1a98a12bcdd65e9bf2ff894f0f2d7e.

Change-Id: I5d53ed2bedc7e429ce7d3cdf80b6696a9628740e
diff --git a/runtime/jit/jit_instrumentation.cc b/runtime/jit/jit_instrumentation.cc
index d751e5a..b18d6a2 100644
--- a/runtime/jit/jit_instrumentation.cc
+++ b/runtime/jit/jit_instrumentation.cc
@@ -80,9 +80,9 @@
   DISALLOW_IMPLICIT_CONSTRUCTORS(JitCompileTask);
 };
 
-JitInstrumentationCache::JitInstrumentationCache(size_t hot_method_threshold,
-                                                 size_t warm_method_threshold,
-                                                 size_t osr_method_threshold)
+JitInstrumentationCache::JitInstrumentationCache(uint16_t hot_method_threshold,
+                                                 uint16_t warm_method_threshold,
+                                                 uint16_t osr_method_threshold)
     : hot_method_threshold_(hot_method_threshold),
       warm_method_threshold_(warm_method_threshold),
       osr_method_threshold_(osr_method_threshold),
@@ -130,44 +130,62 @@
   }
 }
 
-void JitInstrumentationCache::AddSamples(Thread* self, ArtMethod* method, size_t) {
+void JitInstrumentationCache::AddSamples(Thread* self, ArtMethod* method, uint16_t count) {
   // Since we don't have on-stack replacement, some methods can remain in the interpreter longer
-  // than we want resulting in samples even after the method is compiled.
-  if (method->IsClassInitializer() || method->IsNative()) {
+  // than we want resulting in samples even after the method is compiled.  Also, if the
+  // jit is no longer interested in hotness samples because we're shutting down, just return.
+  if (method->IsClassInitializer() || method->IsNative() || (thread_pool_ == nullptr)) {
+    if (thread_pool_ == nullptr) {
+      // Should only see this when shutting down.
+      DCHECK(Runtime::Current()->IsShuttingDown(self));
+    }
     return;
   }
   DCHECK(thread_pool_ != nullptr);
+  DCHECK_GT(warm_method_threshold_, 0);
+  DCHECK_GT(hot_method_threshold_, warm_method_threshold_);
+  DCHECK_GT(osr_method_threshold_, hot_method_threshold_);
 
-  uint16_t sample_count = method->IncrementCounter();
-  if (sample_count == warm_method_threshold_) {
-    bool success = ProfilingInfo::Create(self, method, /* retry_allocation */ false);
-    if (success) {
-      VLOG(jit) << "Start profiling " << PrettyMethod(method);
+  int32_t starting_count = method->GetCounter();
+  int32_t new_count = starting_count + count;   // int32 here to avoid wrap-around;
+  if (starting_count < warm_method_threshold_) {
+    if (new_count >= warm_method_threshold_) {
+      bool success = ProfilingInfo::Create(self, method, /* retry_allocation */ false);
+      if (success) {
+        VLOG(jit) << "Start profiling " << PrettyMethod(method);
+      }
+
+      if (thread_pool_ == nullptr) {
+        // Calling ProfilingInfo::Create might put us in a suspended state, which could
+        // lead to the thread pool being deleted when we are shutting down.
+        DCHECK(Runtime::Current()->IsShuttingDown(self));
+        return;
+      }
+
+      if (!success) {
+        // We failed allocating. Instead of doing the collection on the Java thread, we push
+        // an allocation to a compiler thread, that will do the collection.
+        thread_pool_->AddTask(self, new JitCompileTask(method, JitCompileTask::kAllocateProfile));
+      }
     }
-
-    if (thread_pool_ == nullptr) {
-      // Calling ProfilingInfo::Create might put us in a suspended state, which could
-      // lead to the thread pool being deleted when we are shutting down.
-      DCHECK(Runtime::Current()->IsShuttingDown(self));
-      return;
+    // Avoid jumping more than one state at a time.
+    new_count = std::min(new_count, hot_method_threshold_ - 1);
+  } else if (starting_count < hot_method_threshold_) {
+    if (new_count >= hot_method_threshold_) {
+      DCHECK(thread_pool_ != nullptr);
+      thread_pool_->AddTask(self, new JitCompileTask(method, JitCompileTask::kCompile));
     }
-
-    if (!success) {
-      // We failed allocating. Instead of doing the collection on the Java thread, we push
-      // an allocation to a compiler thread, that will do the collection.
-      thread_pool_->AddTask(self, new JitCompileTask(method, JitCompileTask::kAllocateProfile));
+    // Avoid jumping more than one state at a time.
+    new_count = std::min(new_count, osr_method_threshold_ - 1);
+  } else if (starting_count < osr_method_threshold_) {
+    if (new_count >= osr_method_threshold_) {
+      DCHECK(thread_pool_ != nullptr);
+      thread_pool_->AddTask(self, new JitCompileTask(method, JitCompileTask::kCompileOsr));
     }
   }
-
-  if (sample_count == hot_method_threshold_) {
-    DCHECK(thread_pool_ != nullptr);
-    thread_pool_->AddTask(self, new JitCompileTask(method, JitCompileTask::kCompile));
-  }
-
-  if (sample_count == osr_method_threshold_) {
-    DCHECK(thread_pool_ != nullptr);
-    thread_pool_->AddTask(self, new JitCompileTask(method, JitCompileTask::kCompileOsr));
-  }
+  // Update hotness counter, but avoid wrap around.
+  method->SetCounter(
+      std::min(new_count, static_cast<int32_t>(std::numeric_limits<uint16_t>::max())));
 }
 
 JitInstrumentationListener::JitInstrumentationListener(JitInstrumentationCache* cache)
diff --git a/runtime/jit/jit_instrumentation.h b/runtime/jit/jit_instrumentation.h
index d1c5c44..7ffd4eb 100644
--- a/runtime/jit/jit_instrumentation.h
+++ b/runtime/jit/jit_instrumentation.h
@@ -40,6 +40,8 @@
 class Thread;
 
 namespace jit {
+static constexpr int16_t kJitCheckForOSR = -1;
+static constexpr int16_t kJitHotnessDisabled = -2;
 
 class JitInstrumentationCache;
 
@@ -84,7 +86,6 @@
 
   static constexpr uint32_t kJitEvents =
       instrumentation::Instrumentation::kMethodEntered |
-      instrumentation::Instrumentation::kBranch |
       instrumentation::Instrumentation::kInvokeVirtualOrInterface;
 
  private:
@@ -96,25 +97,33 @@
 // Keeps track of which methods are hot.
 class JitInstrumentationCache {
  public:
-  JitInstrumentationCache(size_t hot_method_threshold,
-                          size_t warm_method_threshold,
-                          size_t osr_method_threshold);
-  void AddSamples(Thread* self, ArtMethod* method, size_t samples)
+  JitInstrumentationCache(uint16_t hot_method_threshold,
+                          uint16_t warm_method_threshold,
+                          uint16_t osr_method_threshold);
+  void AddSamples(Thread* self, ArtMethod* method, uint16_t samples)
       SHARED_REQUIRES(Locks::mutator_lock_);
   void CreateThreadPool();
   void DeleteThreadPool(Thread* self);
 
+  size_t OSRMethodThreshold() const {
+    return osr_method_threshold_;
+  }
+
   size_t HotMethodThreshold() const {
     return hot_method_threshold_;
   }
 
+  size_t WarmMethodThreshold() const {
+    return warm_method_threshold_;
+  }
+
   // Wait until there is no more pending compilation tasks.
   void WaitForCompilationToFinish(Thread* self);
 
  private:
-  size_t hot_method_threshold_;
-  size_t warm_method_threshold_;
-  size_t osr_method_threshold_;
+  uint16_t hot_method_threshold_;
+  uint16_t warm_method_threshold_;
+  uint16_t osr_method_threshold_;
   JitInstrumentationListener listener_;
   std::unique_ptr<ThreadPool> thread_pool_;