Change one read barrier bit to mark bit

Optimization to help slow path performance. When the GC marks an
object through the read barrier slow path. The GC sets the mark bit
in the lock word of that reference. This bit is checked from the
assembly entrypoint the common case is that it is set. If the bit is
set, the read barrier knows the object is already marked and there is
no work to do.

To prevent dirty pages in zygote and image, the bit is set by the
image writer and zygote space creation.

EAAC score (lower is better):
N9: 777 -> 700 (average 31 of runs)
N6P (960000 mhz): 1737.48 -> 1442.31 (average of 25 runs)

Bug: 30162165
Bug: 12687968

Test: N9, N6P booting, test-art-host, test-art-target all with CC

Change-Id: Iae0cacfae221e33151d3c0ab65338d1c822ab63d
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index d7221e4..071537d 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -42,9 +42,6 @@
 namespace collector {
 
 static constexpr size_t kDefaultGcMarkStackSize = 2 * MB;
-// If kGrayDirtyImmuneObjects is true then we gray dirty objects in the GC pause to prevent dirty
-// pages.
-static constexpr bool kGrayDirtyImmuneObjects = true;
 // If kFilterModUnionCards then we attempt to filter cards that don't need to be dirty in the mod
 // union table. Disabled since it does not seem to help the pause much.
 static constexpr bool kFilterModUnionCards = kIsDebugBuild;
@@ -52,6 +49,9 @@
 // ConcurrentCopying::Scan. May be used to diagnose possibly unnecessary read barriers.
 // Only enabled for kIsDebugBuild to avoid performance hit.
 static constexpr bool kDisallowReadBarrierDuringScan = kIsDebugBuild;
+// Slow path mark stack size, increase this if the stack is getting full and it is causing
+// performance problems.
+static constexpr size_t kReadBarrierMarkStackSize = 512 * KB;
 
 ConcurrentCopying::ConcurrentCopying(Heap* heap,
                                      const std::string& name_prefix,
@@ -63,6 +63,10 @@
       gc_mark_stack_(accounting::ObjectStack::Create("concurrent copying gc mark stack",
                                                      kDefaultGcMarkStackSize,
                                                      kDefaultGcMarkStackSize)),
+      rb_mark_bit_stack_(accounting::ObjectStack::Create("rb copying gc mark stack",
+                                                         kReadBarrierMarkStackSize,
+                                                         kReadBarrierMarkStackSize)),
+      rb_mark_bit_stack_full_(false),
       mark_stack_lock_("concurrent copying mark stack lock", kMarkSweepMarkStackLock),
       thread_running_gc_(nullptr),
       is_marking_(false), is_active_(false), is_asserting_to_space_invariant_(false),
@@ -187,6 +191,7 @@
     CHECK(false_gray_stack_.empty());
   }
 
+  rb_mark_bit_stack_full_ = false;
   mark_from_read_barrier_measurements_ = measure_read_barrier_slow_path_;
   if (measure_read_barrier_slow_path_) {
     rb_slow_path_ns_.StoreRelaxed(0);
@@ -914,9 +919,9 @@
     }
     collector_->AssertToSpaceInvariant(nullptr, MemberOffset(0), ref);
     if (kUseBakerReadBarrier) {
-      CHECK(ref->GetReadBarrierPointer() == ReadBarrier::WhitePtr())
+      CHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::WhitePtr())
           << "Ref " << ref << " " << PrettyTypeOf(ref)
-          << " has non-white rb_ptr " << ref->GetReadBarrierPointer();
+          << " has non-white rb_ptr ";
     }
   }
 
@@ -982,7 +987,7 @@
     VerifyNoFromSpaceRefsFieldVisitor visitor(collector);
     obj->VisitReferences(visitor, visitor);
     if (kUseBakerReadBarrier) {
-      CHECK(obj->GetReadBarrierPointer() == ReadBarrier::WhitePtr())
+      CHECK_EQ(obj->GetReadBarrierPointer(), ReadBarrier::WhitePtr())
           << "obj=" << obj << " non-white rb_ptr " << obj->GetReadBarrierPointer();
     }
   }
@@ -2243,6 +2248,15 @@
         }
       }
     }
+    if (kUseBakerReadBarrier) {
+      TimingLogger::ScopedTiming split("EmptyRBMarkBitStack", GetTimings());
+      DCHECK(rb_mark_bit_stack_.get() != nullptr);
+      const auto* limit = rb_mark_bit_stack_->End();
+      for (StackReference<mirror::Object>* it = rb_mark_bit_stack_->Begin(); it != limit; ++it) {
+        CHECK(it->AsMirrorPtr()->AtomicSetMarkBit(1, 0));
+      }
+      rb_mark_bit_stack_->Reset();
+    }
   }
   if (measure_read_barrier_slow_path_) {
     MutexLock mu(self, rb_slow_path_histogram_lock_);