Revert^2 "Reduce the number of calls to Jit::AddSamples."

The method is called for every invoke which is expensive.
Add samples, but don't check the consequences every time.

This reduces its cost from 3.5% to 1% (maps on device).

This reverts commit 0402f4b019c1d6c567b1c56589f1ea9170ab5dcc
and fixes 667-jit-jni-stub which relied on being able to set
the sample count to exactly one below the compilation threshold,
and then expected the compilation to happen on the next sample.

Test: ./art/test.py -b --host -r --ndebug
Test: ./art/test.py -b -r -t 570
Change-Id: I99c6d03f565f17fe6539ed89632d8f8bbda68107
diff --git a/runtime/jit/jit.h b/runtime/jit/jit.h
index 7ce5f07..10f6094 100644
--- a/runtime/jit/jit.h
+++ b/runtime/jit/jit.h
@@ -47,6 +47,7 @@
 // At what priority to schedule jit threads. 9 is the lowest foreground priority on device.
 // See android/os/Process.java.
 static constexpr int kJitPoolThreadPthreadDefaultPriority = 9;
+static constexpr uint32_t kJitSamplesBatchSize = 32;  // Must be power of 2.
 
 class JitOptions {
  public:
@@ -122,12 +123,16 @@
   }
 
  private:
+  // We add the sample in batches of size kJitSamplesBatchSize.
+  // This method rounds the threshold so that it is multiple of the batch size.
+  static uint32_t RoundUpThreshold(uint32_t threshold);
+
   bool use_jit_compilation_;
   size_t code_cache_initial_capacity_;
   size_t code_cache_max_capacity_;
-  uint16_t compile_threshold_;
-  uint16_t warmup_threshold_;
-  uint16_t osr_threshold_;
+  uint32_t compile_threshold_;
+  uint32_t warmup_threshold_;
+  uint32_t osr_threshold_;
   uint16_t priority_thread_weight_;
   uint16_t invoke_transition_weight_;
   bool dump_info_on_shutdown_;
@@ -154,7 +159,7 @@
   static constexpr size_t kDefaultPriorityThreadWeightRatio = 1000;
   static constexpr size_t kDefaultInvokeTransitionWeightRatio = 500;
   // How frequently should the interpreter check to see if OSR compilation is ready.
-  static constexpr int16_t kJitRecheckOSRThreshold = 100;
+  static constexpr int16_t kJitRecheckOSRThreshold = 101;  // Prime number to avoid patterns.
 
   virtual ~Jit();
 
@@ -217,7 +222,10 @@
   void MethodEntered(Thread* thread, ArtMethod* method)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  void AddSamples(Thread* self, ArtMethod* method, uint16_t samples, bool with_backedges)
+  ALWAYS_INLINE void AddSamples(Thread* self,
+                                ArtMethod* method,
+                                uint16_t samples,
+                                bool with_backedges)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   void InvokeVirtualOrInterface(ObjPtr<mirror::Object> this_object,
@@ -297,6 +305,15 @@
  private:
   Jit(JitCodeCache* code_cache, JitOptions* options);
 
+  // Compile the method if the number of samples passes a threshold.
+  // Returns false if we can not compile now - don't increment the counter and retry later.
+  bool MaybeCompileMethod(Thread* self,
+                          ArtMethod* method,
+                          uint32_t old_count,
+                          uint32_t new_count,
+                          bool with_backedges)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
   static bool BindCompilerMethods(std::string* error_msg);
 
   // JIT compiler