Sticky mark bits "generational" GC

Sticky mark bits GC. Sticky mark bits implemented using allocation stack which enables us to use the previous GC live bitmap as the mark bitmap.

Removed heap_end_ since checking versus it caused more overhead than it saved.

Removed check for impossible allocations at the start of AllocObject since these allocations will just fall through and fail anyways.
These allocations do not happen often enough for it to be worth checking for.

A bunch of constant optimization performance improvements.

Pre locking regression benchmark improvements:
Deltablue: ~0.3 sec runtime.
CaffeineMark: ~300 total score due to improved string score.

Change-Id: I15016f1ae7fdf76fc3aadb5774b527bf802d9701
diff --git a/src/card_table.h b/src/card_table.h
index e1d0646..66357c9 100644
--- a/src/card_table.h
+++ b/src/card_table.h
@@ -73,8 +73,9 @@
   }
 
   // For every dirty card between begin and end invoke the visitor with the specified argument.
-  template <typename Visitor>
-  void Scan(SpaceBitmap* bitmap, byte* scan_begin, byte* scan_end, const Visitor& visitor) const
+  template <typename Visitor, typename FingerVisitor>
+  void Scan(SpaceBitmap* bitmap, byte* scan_begin, byte* scan_end,
+            const Visitor& visitor, const FingerVisitor& finger_visitor) const
       EXCLUSIVE_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_)
       SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     DCHECK(bitmap->HasAddress(scan_begin));
@@ -82,21 +83,22 @@
     byte* card_cur = CardFromAddr(scan_begin);
     byte* card_end = CardFromAddr(scan_end);
     while (card_cur < card_end) {
-      while (card_cur < card_end && *card_cur == GC_CARD_CLEAN) {
-        card_cur++;
+      // Find the first dirty card.
+      card_cur = reinterpret_cast<byte*>(memchr(card_cur, GC_CARD_DIRTY, card_end - card_cur));
+      if (card_cur == NULL) {
+        break;
       }
-      byte* run_start = card_cur;
+      byte* run_start = card_cur++;
 
-      while (card_cur < card_end && *card_cur == GC_CARD_DIRTY) {
+      // Guaranteed to have at least one clean card at the end of the array.
+      while (*card_cur == GC_CARD_DIRTY) {
         card_cur++;
       }
       byte* run_end = card_cur;
 
-      if (run_start != run_end) {
-        uintptr_t start = reinterpret_cast<uintptr_t>(AddrFromCard(run_start));
-        uintptr_t end = reinterpret_cast<uintptr_t>(AddrFromCard(run_end));
-        bitmap->VisitMarkedRange(start, end, visitor);
-      }
+      uintptr_t start = reinterpret_cast<uintptr_t>(AddrFromCard(run_start));
+      uintptr_t end = reinterpret_cast<uintptr_t>(AddrFromCard(run_end));
+      bitmap->VisitMarkedRange(start, end, visitor, finger_visitor);
     }
   }
 
@@ -109,6 +111,12 @@
   // Resets all of the bytes in the card table which do not map to the image space.
   void ClearSpaceCards(Space* space);
 
+  // Clean all the cards which map to a space.
+  void PreClearCards(Space* space, std::vector<byte*>& out_cards);
+
+  // Returns all of the dirty cards which map to a space.
+  void GetDirtyCards(Space* space, std::vector<byte*>& out_cards) const;
+
   // Returns the first address in the heap which maps to this card.
   void* AddrFromCard(const byte *card_addr) const {
     DCHECK(IsValidCard(card_addr))