Improvements for table lookup read barriers.

- Implement fast paths for the GC root read barrier routines.
- Avoid unnecessary CAS operations.

Bug: 12687968
Change-Id: Iceef44e253062af5bf2295a521a9c64403deafe1
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index 468179c..0a7a69f 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -620,7 +620,10 @@
     gc_mark_stack_->PushBack(to_ref);
   } else {
     CHECK_EQ(static_cast<uint32_t>(mark_stack_mode),
-             static_cast<uint32_t>(kMarkStackModeGcExclusive));
+             static_cast<uint32_t>(kMarkStackModeGcExclusive))
+        << "ref=" << to_ref
+        << " self->gc_marking=" << self->GetIsGcMarking()
+        << " cc->is_marking=" << is_marking_;
     CHECK(self == thread_running_gc_)
         << "Only GC-running thread should access the mark stack "
         << "in the GC exclusive mark stack mode";