Rosalloc thread local allocation path without a cas.

Speedup on N4:
MemAllocTest 3044 -> 2396 (~21% reduction)
BinaryTrees  4101 -> 2929 (~26% reduction)

Bug: 9986565
Change-Id: Ia1d1a37b9e001f903c3c056e8ec68fc8c623a78b
diff --git a/runtime/gc/space/rosalloc_space.cc b/runtime/gc/space/rosalloc_space.cc
index ced25a4..f140021 100644
--- a/runtime/gc/space/rosalloc_space.cc
+++ b/runtime/gc/space/rosalloc_space.cc
@@ -154,7 +154,8 @@
 }
 
 mirror::Object* RosAllocSpace::AllocWithGrowth(Thread* self, size_t num_bytes,
-                                               size_t* bytes_allocated, size_t* usable_size) {
+                                               size_t* bytes_allocated, size_t* usable_size,
+                                               size_t* bytes_tl_bulk_allocated) {
   mirror::Object* result;
   {
     MutexLock mu(self, lock_);
@@ -162,7 +163,8 @@
     size_t max_allowed = Capacity();
     rosalloc_->SetFootprintLimit(max_allowed);
     // Try the allocation.
-    result = AllocCommon(self, num_bytes, bytes_allocated, usable_size);
+    result = AllocCommon(self, num_bytes, bytes_allocated, usable_size,
+                         bytes_tl_bulk_allocated);
     // Shrink back down as small as possible.
     size_t footprint = rosalloc_->Footprint();
     rosalloc_->SetFootprintLimit(footprint);
@@ -209,7 +211,7 @@
       __builtin_prefetch(reinterpret_cast<char*>(ptrs[i + kPrefetchLookAhead]));
     }
     if (kVerifyFreedBytes) {
-      verify_bytes += AllocationSizeNonvirtual(ptrs[i], nullptr);
+      verify_bytes += AllocationSizeNonvirtual<true>(ptrs[i], nullptr);
     }
   }
 
@@ -338,12 +340,12 @@
   }
 }
 
-void RosAllocSpace::RevokeThreadLocalBuffers(Thread* thread) {
-  rosalloc_->RevokeThreadLocalRuns(thread);
+size_t RosAllocSpace::RevokeThreadLocalBuffers(Thread* thread) {
+  return rosalloc_->RevokeThreadLocalRuns(thread);
 }
 
-void RosAllocSpace::RevokeAllThreadLocalBuffers() {
-  rosalloc_->RevokeAllThreadLocalRuns();
+size_t RosAllocSpace::RevokeAllThreadLocalBuffers() {
+  return rosalloc_->RevokeAllThreadLocalRuns();
 }
 
 void RosAllocSpace::AssertThreadLocalBuffersAreRevoked(Thread* thread) {