Thread-local allocation stack.

With this change, Ritz MemAllocTest gets ~14% faster on N4.

Bug: 9986565
Change-Id: I2fb7d6f7c5daa63dd4fc73ba739e6ae4ed820617
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index e416c0e..80a5a1a 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -111,6 +111,9 @@
 // If true, use rosalloc/RosAllocSpace instead of dlmalloc/DlMallocSpace
 static constexpr bool kUseRosAlloc = true;
 
+// If true, use thread-local allocation stack.
+static constexpr bool kUseThreadLocalAllocationStack = true;
+
 // The process state passed in from the activity manager, used to determine when to do trimming
 // and compaction.
 enum ProcessState {
@@ -665,11 +668,17 @@
       SHARED_LOCKS_REQUIRED(GlobalSychronization::heap_bitmap_lock_);
 
   // Swap the allocation stack with the live stack.
-  void SwapStacks();
+  void SwapStacks(Thread* self);
+
+  // Revoke all the thread-local allocation stacks.
+  void RevokeAllThreadLocalAllocationStacks(Thread* self);
 
   // Clear cards and update the mod union table.
   void ProcessCards(TimingLogger& timings);
 
+  // Push an object onto the allocation stack.
+  void PushOnAllocationStack(Thread* self, mirror::Object* obj);
+
   // All-known continuous spaces, where objects lie within fixed bounds.
   std::vector<space::ContinuousSpace*> continuous_spaces_;