Rosalloc thread local allocation path without a cas.

Speedup on N4:
MemAllocTest 3044 -> 2396 (~21% reduction)
BinaryTrees  4101 -> 2929 (~26% reduction)

Bug: 9986565
Change-Id: Ia1d1a37b9e001f903c3c056e8ec68fc8c623a78b
diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc
index 8aac484..ee4e752 100644
--- a/runtime/gc/collector/mark_sweep.cc
+++ b/runtime/gc/collector/mark_sweep.cc
@@ -292,6 +292,7 @@
   Runtime::Current()->AllowNewSystemWeaks();
   {
     WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
+    GetHeap()->RecordFreeRevoke();
     // Reclaim unmarked objects.
     Sweep(false);
     // Swap the live and mark bitmaps for each space which we modified space. This is an