Rosalloc thread local allocation path without a cas.

Speedup on N4:
MemAllocTest 3044 -> 2396 (~21% reduction)
BinaryTrees  4101 -> 2929 (~26% reduction)

Bug: 9986565
Change-Id: Ia1d1a37b9e001f903c3c056e8ec68fc8c623a78b
diff --git a/runtime/gc/space/bump_pointer_space.h b/runtime/gc/space/bump_pointer_space.h
index 089ede4..c496a42 100644
--- a/runtime/gc/space/bump_pointer_space.h
+++ b/runtime/gc/space/bump_pointer_space.h
@@ -47,10 +47,10 @@
 
   // Allocate num_bytes, returns nullptr if the space is full.
   mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated,
-                        size_t* usable_size) OVERRIDE;
+                        size_t* usable_size, size_t* bytes_tl_bulk_allocated) OVERRIDE;
   // Thread-unsafe allocation for when mutators are suspended, used by the semispace collector.
   mirror::Object* AllocThreadUnsafe(Thread* self, size_t num_bytes, size_t* bytes_allocated,
-                                    size_t* usable_size)
+                                    size_t* usable_size, size_t* bytes_tl_bulk_allocated)
       OVERRIDE EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   mirror::Object* AllocNonvirtual(size_t num_bytes);
@@ -103,9 +103,9 @@
 
   void Dump(std::ostream& os) const;
 
-  void RevokeThreadLocalBuffers(Thread* thread) LOCKS_EXCLUDED(block_lock_);
-  void RevokeAllThreadLocalBuffers() LOCKS_EXCLUDED(Locks::runtime_shutdown_lock_,
-                                                    Locks::thread_list_lock_);
+  size_t RevokeThreadLocalBuffers(Thread* thread) LOCKS_EXCLUDED(block_lock_);
+  size_t RevokeAllThreadLocalBuffers() LOCKS_EXCLUDED(Locks::runtime_shutdown_lock_,
+                                                      Locks::thread_list_lock_);
   void AssertThreadLocalBuffersAreRevoked(Thread* thread) LOCKS_EXCLUDED(block_lock_);
   void AssertAllThreadLocalBuffersAreRevoked() LOCKS_EXCLUDED(Locks::runtime_shutdown_lock_,
                                                               Locks::thread_list_lock_);