Refactor and improve GC root handling

Changed GcRoot to use compressed references. Changed root visiting to
use virtual functions instead of function pointers. Changed root visting
interface to be an array of roots instead of a single root at a time.
Added buffered root marking helper to avoid dispatch overhead.

Root marking seems a bit faster on EvaluateAndApplyChanges due to batch
marking. Pause times unaffected.

Mips64 is untested but might work, maybe.

Before:
MarkConcurrentRoots: Sum: 67.678ms 99% C.I. 2us-664.999us Avg: 161.138us Max: 671us

After:
MarkConcurrentRoots: Sum: 54.806ms 99% C.I. 2us-499.986us Avg: 136.333us Max: 602us

Bug: 19264997

Change-Id: I0a71ebb5928f205b9b3f7945b25db6489d5657ca
diff --git a/runtime/gc/collector/mark_sweep.h b/runtime/gc/collector/mark_sweep.h
index 3f99e21..31cea17 100644
--- a/runtime/gc/collector/mark_sweep.h
+++ b/runtime/gc/collector/mark_sweep.h
@@ -185,11 +185,12 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
-  static void MarkRootCallback(mirror::Object** root, void* arg, const RootInfo& root_info)
+  virtual void VisitRoots(mirror::Object*** roots, size_t count, const RootInfo& info) OVERRIDE
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
-  static void VerifyRootMarked(mirror::Object** root, void* arg, const RootInfo& root_info)
+  virtual void VisitRoots(mirror::CompressedReference<mirror::Object>** roots, size_t count,
+                          const RootInfo& info) OVERRIDE
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
@@ -197,9 +198,6 @@
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  static void MarkRootParallelCallback(mirror::Object** root, void* arg, const RootInfo& root_info)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
   // Marks an object.
   void MarkObject(mirror::Object* obj)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
@@ -250,9 +248,8 @@
   // whether or not we care about pauses.
   size_t GetThreadCount(bool paused) const;
 
-  static void VerifyRootCallback(mirror::Object** root, void* arg, const RootInfo& root_info);
-
-  void VerifyRoot(const mirror::Object* root, const RootInfo& root_info) NO_THREAD_SAFETY_ANALYSIS;
+  void VerifyRoot(const mirror::Object* root, const RootInfo& root_info)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
 
   // Push a single reference on a mark stack.
   void PushOnMarkStack(mirror::Object* obj) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -326,18 +323,21 @@
   friend class CardScanTask;
   friend class CheckBitmapVisitor;
   friend class CheckReferenceVisitor;
+  friend class CheckpointMarkThreadRoots;
   friend class art::gc::Heap;
+  friend class FifoMarkStackChunk;
   friend class MarkObjectVisitor;
+  template<bool kUseFinger> friend class MarkStackTask;
+  friend class MarkSweepMarkObjectSlowPath;
   friend class ModUnionCheckReferences;
   friend class ModUnionClearCardVisitor;
   friend class ModUnionReferenceVisitor;
-  friend class ModUnionVisitor;
+  friend class ModUnionScanImageRootVisitor;
   friend class ModUnionTableBitmap;
   friend class ModUnionTableReferenceCache;
-  friend class ModUnionScanImageRootVisitor;
-  template<bool kUseFinger> friend class MarkStackTask;
-  friend class FifoMarkStackChunk;
-  friend class MarkSweepMarkObjectSlowPath;
+  friend class ModUnionVisitor;
+  friend class VerifyRootMarkedVisitor;
+  friend class VerifyRootVisitor;
 
   DISALLOW_COPY_AND_ASSIGN(MarkSweep);
 };