Merge "Small update to CFG printing using DOT"
diff --git a/build/Android.executable.mk b/build/Android.executable.mk
index e8d48e4..ba54e04 100644
--- a/build/Android.executable.mk
+++ b/build/Android.executable.mk
@@ -97,6 +97,8 @@
 
   ifeq ($$(art_target_or_host),target)
     LOCAL_MODULE_TARGET_ARCH := $(ART_SUPPORTED_ARCH)
+    #HACK: force 32-bit until 64-bit dex2oat can handle 32-bit
+    LOCAL_32_BIT_ONLY := true
   endif
 
   ifeq ($$(art_target_or_host),target)
diff --git a/compiler/dex/frontend.cc b/compiler/dex/frontend.cc
index 7890d81..64fa685 100644
--- a/compiler/dex/frontend.cc
+++ b/compiler/dex/frontend.cc
@@ -251,6 +251,15 @@
   /* Reassociate sreg names with original Dalvik vreg names. */
   cu.mir_graph->RemapRegLocations();
 
+  /* Free Arenas from the cu.arena_stack for reuse by the cu.arena in the codegen. */
+  if (cu.enable_debug & (1 << kDebugShowMemoryUsage)) {
+    if (cu.arena_stack.PeakBytesAllocated() > 256 * 1024) {
+      MemStats stack_stats(cu.arena_stack.GetPeakStats());
+      LOG(INFO) << PrettyMethod(method_idx, dex_file) << " " << Dumpable<MemStats>(stack_stats);
+    }
+  }
+  cu.arena_stack.Reset();
+
   CompiledMethod* result = NULL;
 
   cu.cg->Materialize();
@@ -266,12 +275,9 @@
   }
 
   if (cu.enable_debug & (1 << kDebugShowMemoryUsage)) {
-    if (cu.arena.BytesAllocated() > (1 * 1024 *1024) ||
-        cu.arena_stack.PeakBytesAllocated() > 256 * 1024) {
+    if (cu.arena.BytesAllocated() > (1 * 1024 *1024)) {
       MemStats mem_stats(cu.arena.GetMemStats());
-      MemStats peak_stats(cu.arena_stack.GetPeakStats());
-      LOG(INFO) << PrettyMethod(method_idx, dex_file) << " " << Dumpable<MemStats>(mem_stats)
-          << Dumpable<MemStats>(peak_stats);
+      LOG(INFO) << PrettyMethod(method_idx, dex_file) << " " << Dumpable<MemStats>(mem_stats);
     }
   }
 
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index e3dc554..f3c5a34 100644
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -1163,40 +1163,35 @@
     // TODO - add Mips implementation
     return false;
   }
-  if (cu_->instruction_set == kThumb2) {
-    RegLocation rl_src = info->args[0];
-    rl_src = LoadValueWide(rl_src, kCoreReg);
-    RegLocation rl_dest = InlineTargetWide(info);
-    RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-    int sign_reg = AllocTemp();
-    // abs(x) = y<=x>>31, (x+y)^y.
-    OpRegRegImm(kOpAsr, sign_reg, rl_src.reg.GetHighReg(), 31);
-    OpRegRegReg(kOpAdd, rl_result.reg.GetReg(), rl_src.reg.GetReg(), sign_reg);
-    OpRegRegReg(kOpAdc, rl_result.reg.GetHighReg(), rl_src.reg.GetHighReg(), sign_reg);
-    OpRegReg(kOpXor, rl_result.reg.GetReg(), sign_reg);
-    OpRegReg(kOpXor, rl_result.reg.GetHighReg(), sign_reg);
-    StoreValueWide(rl_dest, rl_result);
-    return true;
-  } else {
-    DCHECK_EQ(cu_->instruction_set, kX86);
-    // Reuse source registers to avoid running out of temps
-    RegLocation rl_src = info->args[0];
-    rl_src = LoadValueWide(rl_src, kCoreReg);
-    RegLocation rl_dest = InlineTargetWide(info);
-    RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-    OpRegCopyWide(rl_result.reg.GetReg(), rl_result.reg.GetHighReg(), rl_src.reg.GetReg(), rl_src.reg.GetHighReg());
-    FreeTemp(rl_src.reg.GetReg());
-    FreeTemp(rl_src.reg.GetHighReg());
-    int sign_reg = AllocTemp();
-    // abs(x) = y<=x>>31, (x+y)^y.
-    OpRegRegImm(kOpAsr, sign_reg, rl_result.reg.GetHighReg(), 31);
-    OpRegReg(kOpAdd, rl_result.reg.GetReg(), sign_reg);
-    OpRegReg(kOpAdc, rl_result.reg.GetHighReg(), sign_reg);
-    OpRegReg(kOpXor, rl_result.reg.GetReg(), sign_reg);
-    OpRegReg(kOpXor, rl_result.reg.GetHighReg(), sign_reg);
-    StoreValueWide(rl_dest, rl_result);
-    return true;
+  RegLocation rl_src = info->args[0];
+  rl_src = LoadValueWide(rl_src, kCoreReg);
+  RegLocation rl_dest = InlineTargetWide(info);
+  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+
+  // If on x86 or if we would clobber a register needed later, just copy the source first.
+  if (cu_->instruction_set == kX86 || rl_result.reg.GetReg() == rl_src.reg.GetHighReg()) {
+    OpRegCopyWide(rl_result.reg.GetReg(), rl_result.reg.GetHighReg(),
+                  rl_src.reg.GetReg(), rl_src.reg.GetHighReg());
+    if (rl_result.reg.GetReg() != rl_src.reg.GetReg() &&
+        rl_result.reg.GetReg() != rl_src.reg.GetHighReg() &&
+        rl_result.reg.GetHighReg() != rl_src.reg.GetReg() &&
+        rl_result.reg.GetHighReg() != rl_src.reg.GetHighReg()) {
+      // Reuse source registers to avoid running out of temps.
+      FreeTemp(rl_src.reg.GetReg());
+      FreeTemp(rl_src.reg.GetHighReg());
+    }
+    rl_src = rl_result;
   }
+
+  // abs(x) = y<=x>>31, (x+y)^y.
+  int sign_reg = AllocTemp();
+  OpRegRegImm(kOpAsr, sign_reg, rl_src.reg.GetHighReg(), 31);
+  OpRegRegReg(kOpAdd, rl_result.reg.GetReg(), rl_src.reg.GetReg(), sign_reg);
+  OpRegRegReg(kOpAdc, rl_result.reg.GetHighReg(), rl_src.reg.GetHighReg(), sign_reg);
+  OpRegReg(kOpXor, rl_result.reg.GetReg(), sign_reg);
+  OpRegReg(kOpXor, rl_result.reg.GetHighReg(), sign_reg);
+  StoreValueWide(rl_dest, rl_result);
+  return true;
 }
 
 bool Mir2Lir::GenInlinedAbsFloat(CallInfo* info) {
diff --git a/compiler/dex/ssa_transformation.cc b/compiler/dex/ssa_transformation.cc
index dab98d9..5f89c21 100644
--- a/compiler/dex/ssa_transformation.cc
+++ b/compiler/dex/ssa_transformation.cc
@@ -621,8 +621,9 @@
   int map_size = sizeof(int) * cu_->num_dalvik_registers;
 
   /* Save SSA map snapshot */
+  ScopedArenaAllocator allocator(&cu_->arena_stack);
   int* saved_ssa_map =
-      static_cast<int*>(arena_->Alloc(map_size, kArenaAllocDalvikToSSAMap));
+      static_cast<int*>(allocator.Alloc(map_size, kArenaAllocDalvikToSSAMap));
   memcpy(saved_ssa_map, vreg_to_ssa_map_, map_size);
 
   if (block->fall_through != NullBasicBlockId) {
@@ -648,7 +649,6 @@
       memcpy(vreg_to_ssa_map_, saved_ssa_map, map_size);
     }
   }
-  vreg_to_ssa_map_ = saved_ssa_map;
   return;
 }
 
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index 5078182..6824183 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -579,37 +579,51 @@
   image_writer->FixupObject(obj, copy);
 }
 
+class FixupVisitor {
+ public:
+  FixupVisitor(ImageWriter* image_writer, Object* copy) : image_writer_(image_writer), copy_(copy) {
+  }
+
+  void operator()(Object* obj, MemberOffset offset, bool /*is_static*/) const
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
+    Object* ref = obj->GetFieldObject<Object, kVerifyNone>(offset, false);
+    // Use SetFieldObjectWithoutWriteBarrier to avoid card marking since we are writing to the
+    // image.
+    copy_->SetFieldObjectWithoutWriteBarrier<false, true, kVerifyNone>(
+        offset, image_writer_->GetImageAddress(ref), false);
+  }
+
+  // java.lang.ref.Reference visitor.
+  void operator()(mirror::Class* /*klass*/, mirror::Reference* ref) const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
+    copy_->SetFieldObjectWithoutWriteBarrier<false, true, kVerifyNone>(
+        mirror::Reference::ReferentOffset(), image_writer_->GetImageAddress(ref->GetReferent()),
+        false);
+  }
+
+ private:
+  ImageWriter* const image_writer_;
+  mirror::Object* const copy_;
+};
+
 void ImageWriter::FixupObject(Object* orig, Object* copy) {
-  DCHECK(orig != NULL);
-  DCHECK(copy != NULL);
-  copy->SetClass<kVerifyNone>(down_cast<Class*>(GetImageAddress(orig->GetClass())));
+  DCHECK(orig != nullptr);
+  DCHECK(copy != nullptr);
   if (kUseBrooksPointer) {
     orig->AssertSelfBrooksPointer();
     // Note the address 'copy' isn't the same as the image address of 'orig'.
     copy->SetBrooksPointer(GetImageAddress(orig));
-    DCHECK(copy->GetBrooksPointer() == GetImageAddress(orig));
+    DCHECK_EQ(copy->GetBrooksPointer(), GetImageAddress(orig));
   }
-  // TODO: special case init of pointers to malloc data (or removal of these pointers)
-  if (orig->IsClass<kVerifyNone>()) {
-    FixupClass(orig->AsClass<kVerifyNone>(), down_cast<Class*>(copy));
-  } else if (orig->IsObjectArray<kVerifyNone>()) {
-    FixupObjectArray(orig->AsObjectArray<Object, kVerifyNone>(),
-                     down_cast<ObjectArray<Object>*>(copy));
-  } else if (orig->IsArtMethod<kVerifyNone>()) {
+  FixupVisitor visitor(this, copy);
+  orig->VisitReferences<true /*visit class*/>(visitor, visitor);
+  if (orig->IsArtMethod<kVerifyNone>()) {
     FixupMethod(orig->AsArtMethod<kVerifyNone>(), down_cast<ArtMethod*>(copy));
-  } else {
-    FixupInstanceFields(orig, copy);
   }
 }
 
-void ImageWriter::FixupClass(Class* orig, Class* copy) {
-  FixupInstanceFields(orig, copy);
-  FixupStaticFields(orig, copy);
-}
-
 void ImageWriter::FixupMethod(ArtMethod* orig, ArtMethod* copy) {
-  FixupInstanceFields(orig, copy);
-
   // OatWriter replaces the code_ with an offset value. Here we re-adjust to a pointer relative to
   // oat_begin_
 
@@ -680,79 +694,6 @@
   }
 }
 
-void ImageWriter::FixupObjectArray(ObjectArray<Object>* orig, ObjectArray<Object>* copy) {
-  for (int32_t i = 0; i < orig->GetLength(); ++i) {
-    Object* element = orig->Get(i);
-    copy->SetWithoutChecksAndWriteBarrier<false, true, kVerifyNone>(i, GetImageAddress(element));
-  }
-}
-
-void ImageWriter::FixupInstanceFields(Object* orig, Object* copy) {
-  DCHECK(orig != NULL);
-  DCHECK(copy != NULL);
-  Class* klass = orig->GetClass();
-  DCHECK(klass != NULL);
-  FixupFields(orig, copy, klass->GetReferenceInstanceOffsets(), false);
-}
-
-void ImageWriter::FixupStaticFields(Class* orig, Class* copy) {
-  DCHECK(orig != NULL);
-  DCHECK(copy != NULL);
-  FixupFields(orig, copy, orig->GetReferenceStaticOffsets(), true);
-}
-
-void ImageWriter::FixupFields(Object* orig,
-                              Object* copy,
-                              uint32_t ref_offsets,
-                              bool is_static) {
-  if (ref_offsets != CLASS_WALK_SUPER) {
-    // Found a reference offset bitmap.  Fixup the specified offsets.
-    while (ref_offsets != 0) {
-      size_t right_shift = CLZ(ref_offsets);
-      MemberOffset byte_offset = CLASS_OFFSET_FROM_CLZ(right_shift);
-      Object* ref = orig->GetFieldObject<Object, kVerifyNone>(byte_offset, false);
-      // Use SetFieldObjectWithoutWriteBarrier to avoid card marking since we are writing to the
-      // image.
-      copy->SetFieldObjectWithoutWriteBarrier<false, true, kVerifyNone>(
-          byte_offset, GetImageAddress(ref), false);
-      ref_offsets &= ~(CLASS_HIGH_BIT >> right_shift);
-    }
-  } else {
-    // There is no reference offset bitmap.  In the non-static case,
-    // walk up the class inheritance hierarchy and find reference
-    // offsets the hard way. In the static case, just consider this
-    // class.
-    for (Class *klass = is_static ? orig->AsClass() : orig->GetClass();
-         klass != NULL;
-         klass = is_static ? NULL : klass->GetSuperClass()) {
-      size_t num_reference_fields = (is_static
-                                     ? klass->NumReferenceStaticFields()
-                                     : klass->NumReferenceInstanceFields());
-      for (size_t i = 0; i < num_reference_fields; ++i) {
-        ArtField* field = (is_static
-                           ? klass->GetStaticField(i)
-                           : klass->GetInstanceField(i));
-        MemberOffset field_offset = field->GetOffset();
-        Object* ref = orig->GetFieldObject<Object, kVerifyNone>(field_offset, false);
-        // Use SetFieldObjectWithoutWriteBarrier to avoid card marking since we are writing to the
-        // image.
-        copy->SetFieldObjectWithoutWriteBarrier<false, true, kVerifyNone>(
-            field_offset, GetImageAddress(ref), false);
-      }
-    }
-  }
-  if (!is_static && orig->IsReferenceInstance()) {
-    // Fix-up referent, that isn't marked as an object field, for References.
-    ArtField* field = orig->GetClass()->FindInstanceField("referent", "Ljava/lang/Object;");
-    MemberOffset field_offset = field->GetOffset();
-    Object* ref = orig->GetFieldObject<Object>(field_offset, false);
-    // Use SetFieldObjectWithoutWriteBarrier to avoid card marking since we are writing to the
-    // image.
-    copy->SetFieldObjectWithoutWriteBarrier<false, true, kVerifyNone>(
-        field_offset, GetImageAddress(ref), false);
-  }
-}
-
 static ArtMethod* GetTargetMethod(const CompilerDriver::CallPatchInformation* patch)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
diff --git a/compiler/image_writer.h b/compiler/image_writer.h
index dff33ba..92b24f6 100644
--- a/compiler/image_writer.h
+++ b/compiler/image_writer.h
@@ -141,22 +141,10 @@
   void CopyAndFixupObjects();
   static void CopyAndFixupObjectsCallback(mirror::Object* obj, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void FixupClass(mirror::Class* orig, mirror::Class* copy)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void FixupMethod(mirror::ArtMethod* orig, mirror::ArtMethod* copy)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void FixupObject(mirror::Object* orig, mirror::Object* copy)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void FixupObjectArray(mirror::ObjectArray<mirror::Object>* orig,
-                        mirror::ObjectArray<mirror::Object>* copy)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void FixupInstanceFields(mirror::Object* orig, mirror::Object* copy)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void FixupStaticFields(mirror::Class* orig, mirror::Class* copy)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void FixupFields(mirror::Object* orig, mirror::Object* copy, uint32_t ref_offsets,
-                   bool is_static)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Patches references in OatFile to expect runtime addresses.
   void PatchOatCodeAndMethods()
@@ -164,7 +152,6 @@
   void SetPatchLocation(const CompilerDriver::PatchInformation* patch, uint32_t value)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-
   const CompilerDriver& compiler_driver_;
 
   // oat file with code for this image
@@ -199,6 +186,9 @@
   uint32_t quick_imt_conflict_trampoline_offset_;
   uint32_t quick_resolution_trampoline_offset_;
   uint32_t quick_to_interpreter_bridge_offset_;
+
+  friend class FixupVisitor;
+  DISALLOW_COPY_AND_ASSIGN(ImageWriter);
 };
 
 }  // namespace art
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index c5219a6..a07aebc 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -398,7 +398,7 @@
           (*cfi_info)[offset_to_update+1] = new_value >> 8;
           (*cfi_info)[offset_to_update+2] = new_value >> 16;
           (*cfi_info)[offset_to_update+3] = new_value >> 24;
-          method_info_.push_back(DebugInfo(PrettyMethod(class_def_method_index, dex_file, false),
+          method_info_.push_back(DebugInfo(PrettyMethod(method_idx, dex_file, false),
                                            new_value, new_value + code_size));
         }
       }
diff --git a/compiler/utils/scoped_arena_allocator.cc b/compiler/utils/scoped_arena_allocator.cc
index ee3b07e..a78d287 100644
--- a/compiler/utils/scoped_arena_allocator.cc
+++ b/compiler/utils/scoped_arena_allocator.cc
@@ -34,9 +34,19 @@
 }
 
 ArenaStack::~ArenaStack() {
+  DebugStackRefCounter::CheckNoRefs();
   stats_and_pool_.pool->FreeArenaChain(bottom_arena_);
 }
 
+void ArenaStack::Reset() {
+  DebugStackRefCounter::CheckNoRefs();
+  stats_and_pool_.pool->FreeArenaChain(bottom_arena_);
+  bottom_arena_ = nullptr;
+  top_arena_  = nullptr;
+  top_ptr_ = nullptr;
+  top_end_ = nullptr;
+}
+
 MemStats ArenaStack::GetPeakStats() const {
   DebugStackRefCounter::CheckNoRefs();
   return MemStats("ArenaStack peak", static_cast<const TaggedStats<Peak>*>(&stats_and_pool_),
diff --git a/compiler/utils/scoped_arena_allocator.h b/compiler/utils/scoped_arena_allocator.h
index 24a8afe..28e86ec 100644
--- a/compiler/utils/scoped_arena_allocator.h
+++ b/compiler/utils/scoped_arena_allocator.h
@@ -37,6 +37,8 @@
   explicit ArenaStack(ArenaPool* arena_pool);
   ~ArenaStack();
 
+  void Reset();
+
   size_t PeakBytesAllocated() {
     return PeakStats()->BytesAllocated();
   }
diff --git a/runtime/arch/arm64/asm_support_arm64.h b/runtime/arch/arm64/asm_support_arm64.h
index 44c3e60..263a764 100644
--- a/runtime/arch/arm64/asm_support_arm64.h
+++ b/runtime/arch/arm64/asm_support_arm64.h
@@ -22,11 +22,11 @@
 // TODO Thread offsets need to be checked when on Aarch64.
 
 // Offset of field Runtime::callee_save_methods_[kSaveAll]
-#define RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET 320
+#define RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET 0
 // Offset of field Runtime::callee_save_methods_[kRefsOnly]
-#define RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET 328
+#define RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET 8
 // Offset of field Runtime::callee_save_methods_[kRefsAndArgs]
-#define RUNTIME_REF_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET 336
+#define RUNTIME_REF_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET 16
 
 // Register holding Thread::Current().
 #define xSELF x18
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 2d64e7f..9db07f8 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -453,7 +453,7 @@
     cmp w17, #'J'           // is this a long?
     bne .LisOther
 
-    cmp x8, # 7*12          // Skip this load if all registers full.
+    cmp x8, # 6*12          // Skip this load if all registers full.
     beq .LfillRegisters
 
     add x17, x12, x8        // Calculate subroutine to jump to.
@@ -461,7 +461,7 @@
 
 
 .LisOther:                  // Everything else takes one vReg.
-    cmp x8, # 7*12          // Skip this load if all registers full.
+    cmp x8, # 6*12          // Skip this load if all registers full.
     beq .LfillRegisters
     add x17, x11, x8        // Calculate subroutine to jump to.
     br x17
diff --git a/runtime/arch/x86_64/asm_support_x86_64.h b/runtime/arch/x86_64/asm_support_x86_64.h
index 5a4e63e..03d9e24 100644
--- a/runtime/arch/x86_64/asm_support_x86_64.h
+++ b/runtime/arch/x86_64/asm_support_x86_64.h
@@ -20,11 +20,11 @@
 #include "asm_support.h"
 
 // Offset of field Runtime::callee_save_methods_[kSaveAll]
-#define RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET 200
+#define RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET 0
 // Offset of field Runtime::callee_save_methods_[kRefsOnly]
-#define RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET 208
+#define RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET 8
 // Offset of field Runtime::callee_save_methods_[kRefsAndArgs]
-#define RUNTIME_REF_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET 216
+#define RUNTIME_REF_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET 16
 
 // Offset of field Thread::self_ verified in InitCpu
 #define THREAD_SELF_OFFSET 72
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index cef9954..08ea123 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -2314,26 +2314,26 @@
 }
 
 mirror::Class* ClassLinker::FindPrimitiveClass(char type) {
-  switch (Primitive::GetType(type)) {
-    case Primitive::kPrimByte:
+  switch (type) {
+    case 'B':
       return GetClassRoot(kPrimitiveByte);
-    case Primitive::kPrimChar:
+    case 'C':
       return GetClassRoot(kPrimitiveChar);
-    case Primitive::kPrimDouble:
+    case 'D':
       return GetClassRoot(kPrimitiveDouble);
-    case Primitive::kPrimFloat:
+    case 'F':
       return GetClassRoot(kPrimitiveFloat);
-    case Primitive::kPrimInt:
+    case 'I':
       return GetClassRoot(kPrimitiveInt);
-    case Primitive::kPrimLong:
+    case 'J':
       return GetClassRoot(kPrimitiveLong);
-    case Primitive::kPrimShort:
+    case 'S':
       return GetClassRoot(kPrimitiveShort);
-    case Primitive::kPrimBoolean:
+    case 'Z':
       return GetClassRoot(kPrimitiveBoolean);
-    case Primitive::kPrimVoid:
+    case 'V':
       return GetClassRoot(kPrimitiveVoid);
-    case Primitive::kPrimNot:
+    default:
       break;
   }
   std::string printable_type(PrintableChar(type));
diff --git a/runtime/gc/accounting/heap_bitmap.h b/runtime/gc/accounting/heap_bitmap.h
index 7cfeb63..b23b12e 100644
--- a/runtime/gc/accounting/heap_bitmap.h
+++ b/runtime/gc/accounting/heap_bitmap.h
@@ -31,12 +31,9 @@
 
 class HeapBitmap {
  public:
-  typedef std::vector<SpaceBitmap*, GcAllocator<SpaceBitmap*> > SpaceBitmapVector;
-  typedef std::vector<ObjectSet*, GcAllocator<ObjectSet*> > ObjectSetVector;
-
   bool Test(const mirror::Object* obj) SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
     SpaceBitmap* bitmap = GetContinuousSpaceBitmap(obj);
-    if (LIKELY(bitmap != NULL)) {
+    if (LIKELY(bitmap != nullptr)) {
       return bitmap->Test(obj);
     } else {
       return GetDiscontinuousSpaceObjectSet(obj) != NULL;
@@ -71,7 +68,7 @@
         return bitmap;
       }
     }
-    return NULL;
+    return nullptr;
   }
 
   ObjectSet* GetDiscontinuousSpaceObjectSet(const mirror::Object* obj) {
@@ -80,7 +77,7 @@
         return space_set;
       }
     }
-    return NULL;
+    return nullptr;
   }
 
   void Walk(ObjectCallback* callback, void* arg)
@@ -110,10 +107,10 @@
   void RemoveDiscontinuousObjectSet(ObjectSet* set);
 
   // Bitmaps covering continuous spaces.
-  SpaceBitmapVector continuous_space_bitmaps_;
+  std::vector<SpaceBitmap*, GcAllocator<SpaceBitmap*>> continuous_space_bitmaps_;
 
   // Sets covering discontinuous spaces.
-  ObjectSetVector discontinuous_space_sets_;
+  std::vector<ObjectSet*, GcAllocator<ObjectSet*>> discontinuous_space_sets_;
 
   friend class art::gc::Heap;
 };
diff --git a/runtime/gc/accounting/mod_union_table-inl.h b/runtime/gc/accounting/mod_union_table-inl.h
index 19c6768..76719b6 100644
--- a/runtime/gc/accounting/mod_union_table-inl.h
+++ b/runtime/gc/accounting/mod_union_table-inl.h
@@ -33,11 +33,9 @@
       : ModUnionTableReferenceCache(name, heap, space) {}
 
   bool AddReference(const mirror::Object* /* obj */, const mirror::Object* ref) ALWAYS_INLINE {
-    const std::vector<space::ContinuousSpace*>& spaces = GetHeap()->GetContinuousSpaces();
-    typedef std::vector<space::ContinuousSpace*>::const_iterator It;
-    for (It it = spaces.begin(); it != spaces.end(); ++it) {
-      if ((*it)->Contains(ref)) {
-        return (*it)->IsMallocSpace();
+    for (space::ContinuousSpace* space : GetHeap()->GetContinuousSpaces()) {
+      if (space->HasAddress(ref)) {
+        return !space->IsImageSpace();
       }
     }
     // Assume it points to a large object.
diff --git a/runtime/gc/accounting/mod_union_table.cc b/runtime/gc/accounting/mod_union_table.cc
index 8871921..314f3c5 100644
--- a/runtime/gc/accounting/mod_union_table.cc
+++ b/runtime/gc/accounting/mod_union_table.cc
@@ -70,37 +70,29 @@
 
 class ModUnionUpdateObjectReferencesVisitor {
  public:
-  ModUnionUpdateObjectReferencesVisitor(MarkObjectCallback* callback, void* arg)
+  ModUnionUpdateObjectReferencesVisitor(MarkHeapReferenceCallback* callback, void* arg)
     : callback_(callback),
       arg_(arg) {
   }
 
   // Extra parameters are required since we use this same visitor signature for checking objects.
-  void operator()(Object* obj, Object* ref, const MemberOffset& offset,
-                  bool /* is_static */) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  void operator()(Object* obj, MemberOffset offset, bool /* static */) const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     // Only add the reference if it is non null and fits our criteria.
-    if (ref != nullptr) {
-      Object* new_ref = callback_(ref, arg_);
-      if (new_ref != ref) {
-        // Use SetFieldObjectWithoutWriteBarrier to avoid card mark as an optimization which
-        // reduces dirtied pages and improves performance.
-        if (Runtime::Current()->IsActiveTransaction()) {
-          obj->SetFieldObjectWithoutWriteBarrier<true>(offset, new_ref, true);
-        } else {
-          obj->SetFieldObjectWithoutWriteBarrier<false>(offset, new_ref, true);
-        }
-      }
+    mirror::HeapReference<Object>* obj_ptr = obj->GetFieldObjectReferenceAddr(offset);
+    if (obj_ptr->AsMirrorPtr() != nullptr) {
+      callback_(obj_ptr, arg_);
     }
   }
 
  private:
-  MarkObjectCallback* const callback_;
+  MarkHeapReferenceCallback* const callback_;
   void* arg_;
 };
 
 class ModUnionScanImageRootVisitor {
  public:
-  ModUnionScanImageRootVisitor(MarkObjectCallback* callback, void* arg)
+  ModUnionScanImageRootVisitor(MarkHeapReferenceCallback* callback, void* arg)
       : callback_(callback), arg_(arg) {}
 
   void operator()(Object* root) const
@@ -108,11 +100,11 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK(root != NULL);
     ModUnionUpdateObjectReferencesVisitor ref_visitor(callback_, arg_);
-    collector::MarkSweep::VisitObjectReferences(root, ref_visitor, true);
+    root->VisitReferences<kMovingClasses>(ref_visitor);
   }
 
  private:
-  MarkObjectCallback* const callback_;
+  MarkHeapReferenceCallback* const callback_;
   void* const arg_;
 };
 
@@ -131,12 +123,14 @@
   }
 
   // Extra parameters are required since we use this same visitor signature for checking objects.
-  void operator()(Object* obj, Object* ref, const MemberOffset& offset,
-                  bool /* is_static */) const {
+  void operator()(Object* obj, MemberOffset offset, bool /* static */) const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    mirror::HeapReference<Object>* ref_ptr = obj->GetFieldObjectReferenceAddr(offset);
+    mirror::Object* ref = ref_ptr->AsMirrorPtr();
     // Only add the reference if it is non null and fits our criteria.
-    if (ref != nullptr && mod_union_table_->AddReference(obj, ref)) {
+    if (ref  != nullptr && mod_union_table_->AddReference(obj, ref)) {
       // Push the adddress of the reference.
-      references_->push_back(obj->GetFieldObjectReferenceAddr(offset));
+      references_->push_back(ref_ptr);
     }
   }
 
@@ -155,11 +149,10 @@
 
   void operator()(Object* obj) const
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_) {
-    DCHECK(obj != NULL);
     // We don't have an early exit since we use the visitor pattern, an early
     // exit should significantly speed this up.
     AddToReferenceArrayVisitor visitor(mod_union_table_, references_);
-    collector::MarkSweep::VisitObjectReferences(obj, visitor, true);
+    obj->VisitReferences<kMovingClasses>(visitor);
   }
  private:
   ModUnionTableReferenceCache* const mod_union_table_;
@@ -175,20 +168,22 @@
   }
 
   // Extra parameters are required since we use this same visitor signature for checking objects.
-  void operator()(Object* obj, Object* ref,
-                  const MemberOffset& /* offset */, bool /* is_static */) const
+  void operator()(Object* obj, MemberOffset offset, bool /* is_static */) const
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_) {
-    Heap* heap = mod_union_table_->GetHeap();
-    if (ref != NULL && mod_union_table_->AddReference(obj, ref) &&
+    mirror::Object* ref = obj->GetFieldObject<mirror::Object>(offset, false);
+    if (ref != nullptr && mod_union_table_->AddReference(obj, ref) &&
         references_.find(ref) == references_.end()) {
+      Heap* heap = mod_union_table_->GetHeap();
       space::ContinuousSpace* from_space = heap->FindContinuousSpaceFromObject(obj, false);
       space::ContinuousSpace* to_space = heap->FindContinuousSpaceFromObject(ref, false);
-      LOG(INFO) << "Object " << reinterpret_cast<const void*>(obj) << "(" << PrettyTypeOf(obj) << ")"
-                << "References " << reinterpret_cast<const void*>(ref)
-                << "(" << PrettyTypeOf(ref) << ") without being in mod-union table";
-      LOG(INFO) << "FromSpace " << from_space->GetName() << " type " << from_space->GetGcRetentionPolicy();
-      LOG(INFO) << "ToSpace " << to_space->GetName() << " type " << to_space->GetGcRetentionPolicy();
-      mod_union_table_->GetHeap()->DumpSpaces();
+      LOG(INFO) << "Object " << reinterpret_cast<const void*>(obj) << "(" << PrettyTypeOf(obj)
+          << ")" << "References " << reinterpret_cast<const void*>(ref) << "(" << PrettyTypeOf(ref)
+          << ") without being in mod-union table";
+      LOG(INFO) << "FromSpace " << from_space->GetName() << " type "
+          << from_space->GetGcRetentionPolicy();
+      LOG(INFO) << "ToSpace " << to_space->GetName() << " type "
+          << to_space->GetGcRetentionPolicy();
+      heap->DumpSpaces();
       LOG(FATAL) << "FATAL ERROR";
     }
   }
@@ -208,9 +203,8 @@
 
   void operator()(Object* obj) const NO_THREAD_SAFETY_ANALYSIS {
     Locks::heap_bitmap_lock_->AssertSharedHeld(Thread::Current());
-    DCHECK(obj != NULL);
     CheckReferenceVisitor visitor(mod_union_table_, references_);
-    collector::MarkSweep::VisitObjectReferences(obj, visitor, true);
+    obj->VisitReferences<kMovingClasses>(visitor);
   }
 
  private:
@@ -264,7 +258,7 @@
   }
 }
 
-void ModUnionTableReferenceCache::UpdateAndMarkReferences(MarkObjectCallback* callback,
+void ModUnionTableReferenceCache::UpdateAndMarkReferences(MarkHeapReferenceCallback* callback,
                                                           void* arg) {
   Heap* heap = GetHeap();
   CardTable* card_table = heap->GetCardTable();
@@ -298,14 +292,7 @@
   size_t count = 0;
   for (const auto& ref : references_) {
     for (mirror::HeapReference<Object>* obj_ptr : ref.second) {
-      Object* obj = obj_ptr->AsMirrorPtr();
-      if (obj != nullptr) {
-        Object* new_obj = callback(obj, arg);
-        // Avoid dirtying pages in the image unless necessary.
-        if (new_obj != obj) {
-          obj_ptr->Assign(new_obj);
-        }
-      }
+      callback(obj_ptr, arg);
     }
     count += ref.second.size();
   }
@@ -322,7 +309,8 @@
 }
 
 // Mark all references to the alloc space(s).
-void ModUnionTableCardCache::UpdateAndMarkReferences(MarkObjectCallback* callback, void* arg) {
+void ModUnionTableCardCache::UpdateAndMarkReferences(MarkHeapReferenceCallback* callback,
+                                                     void* arg) {
   CardTable* card_table = heap_->GetCardTable();
   ModUnionScanImageRootVisitor scan_visitor(callback, arg);
   SpaceBitmap* bitmap = space_->GetLiveBitmap();
diff --git a/runtime/gc/accounting/mod_union_table.h b/runtime/gc/accounting/mod_union_table.h
index 2e22a11..c4b020b 100644
--- a/runtime/gc/accounting/mod_union_table.h
+++ b/runtime/gc/accounting/mod_union_table.h
@@ -69,7 +69,7 @@
   // Update the mod-union table using data stored by ClearCards. There may be multiple ClearCards
   // before a call to update, for example, back-to-back sticky GCs. Also mark references to other
   // spaces which are stored in the mod-union table.
-  virtual void UpdateAndMarkReferences(MarkObjectCallback* callback, void* arg) = 0;
+  virtual void UpdateAndMarkReferences(MarkHeapReferenceCallback* callback, void* arg) = 0;
 
   // Verification, sanity checks that we don't have clean cards which conflict with out cached data
   // for said cards. Exclusive lock is required since verify sometimes uses
@@ -106,7 +106,7 @@
   void ClearCards();
 
   // Update table based on cleared cards and mark all references to the other spaces.
-  void UpdateAndMarkReferences(MarkObjectCallback* callback, void* arg)
+  void UpdateAndMarkReferences(MarkHeapReferenceCallback* callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
@@ -142,7 +142,7 @@
   void ClearCards();
 
   // Mark all references to the alloc space(s).
-  void UpdateAndMarkReferences(MarkObjectCallback* callback, void* arg)
+  void UpdateAndMarkReferences(MarkHeapReferenceCallback* callback, void* arg)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
diff --git a/runtime/gc/accounting/remembered_set.cc b/runtime/gc/accounting/remembered_set.cc
index e6508dc..afa5054 100644
--- a/runtime/gc/accounting/remembered_set.cc
+++ b/runtime/gc/accounting/remembered_set.cc
@@ -60,28 +60,24 @@
 
 class RememberedSetReferenceVisitor {
  public:
-  RememberedSetReferenceVisitor(MarkObjectCallback* callback, space::ContinuousSpace* target_space,
+  RememberedSetReferenceVisitor(MarkHeapReferenceCallback* callback,
+                                space::ContinuousSpace* target_space,
                                 bool* const contains_reference_to_target_space, void* arg)
       : callback_(callback), target_space_(target_space), arg_(arg),
         contains_reference_to_target_space_(contains_reference_to_target_space) {}
 
-  void operator()(mirror::Object* obj, mirror::Object* ref,
-                  const MemberOffset& offset, bool /* is_static */) const
+  void operator()(mirror::Object* obj, MemberOffset offset, bool /* is_static */) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    if (ref != nullptr) {
-      if (target_space_->HasAddress(ref)) {
-        *contains_reference_to_target_space_ = true;
-        mirror::Object* new_ref = callback_(ref, arg_);
-        DCHECK(!target_space_->HasAddress(new_ref));
-        if (new_ref != ref) {
-          obj->SetFieldObjectWithoutWriteBarrier<false>(offset, new_ref, false);
-        }
-      }
+    mirror::HeapReference<mirror::Object>* ref_ptr = obj->GetFieldObjectReferenceAddr(offset);
+    if (target_space_->HasAddress(ref_ptr->AsMirrorPtr())) {
+      *contains_reference_to_target_space_ = true;
+      callback_(ref_ptr, arg_);
+      DCHECK(!target_space_->HasAddress(ref_ptr->AsMirrorPtr()));
     }
   }
 
  private:
-  MarkObjectCallback* const callback_;
+  MarkHeapReferenceCallback* const callback_;
   space::ContinuousSpace* const target_space_;
   void* const arg_;
   bool* const contains_reference_to_target_space_;
@@ -89,27 +85,27 @@
 
 class RememberedSetObjectVisitor {
  public:
-  RememberedSetObjectVisitor(MarkObjectCallback* callback, space::ContinuousSpace* target_space,
+  RememberedSetObjectVisitor(MarkHeapReferenceCallback* callback,
+                             space::ContinuousSpace* target_space,
                              bool* const contains_reference_to_target_space, void* arg)
       : callback_(callback), target_space_(target_space), arg_(arg),
         contains_reference_to_target_space_(contains_reference_to_target_space) {}
 
   void operator()(mirror::Object* obj) const EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    DCHECK(obj != NULL);
     RememberedSetReferenceVisitor ref_visitor(callback_, target_space_,
                                               contains_reference_to_target_space_, arg_);
-    collector::MarkSweep::VisitObjectReferences(obj, ref_visitor, true);
+    obj->VisitReferences<kMovingClasses>(ref_visitor);
   }
 
  private:
-  MarkObjectCallback* const callback_;
+  MarkHeapReferenceCallback* const callback_;
   space::ContinuousSpace* const target_space_;
   void* const arg_;
   bool* const contains_reference_to_target_space_;
 };
 
-void RememberedSet::UpdateAndMarkReferences(MarkObjectCallback* callback,
+void RememberedSet::UpdateAndMarkReferences(MarkHeapReferenceCallback* callback,
                                             space::ContinuousSpace* target_space, void* arg) {
   CardTable* card_table = heap_->GetCardTable();
   bool contains_reference_to_target_space = false;
@@ -155,7 +151,8 @@
   for (const byte* card_addr : dirty_cards_) {
     auto start = reinterpret_cast<byte*>(card_table->AddrFromCard(card_addr));
     auto end = start + CardTable::kCardSize;
-    DCHECK(space_->Begin() <= start && end <= space_->End());
+    DCHECK_LE(space_->Begin(), start);
+    DCHECK_LE(end, space_->Limit());
   }
 }
 
diff --git a/runtime/gc/accounting/remembered_set.h b/runtime/gc/accounting/remembered_set.h
index 92feeb1..4ed20dd 100644
--- a/runtime/gc/accounting/remembered_set.h
+++ b/runtime/gc/accounting/remembered_set.h
@@ -52,7 +52,7 @@
   void ClearCards();
 
   // Mark through all references to the target space.
-  void UpdateAndMarkReferences(MarkObjectCallback* callback,
+  void UpdateAndMarkReferences(MarkHeapReferenceCallback* callback,
                                space::ContinuousSpace* target_space, void* arg)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
diff --git a/runtime/gc/allocator/rosalloc.cc b/runtime/gc/allocator/rosalloc.cc
index ace9f9e..19fdc63 100644
--- a/runtime/gc/allocator/rosalloc.cc
+++ b/runtime/gc/allocator/rosalloc.cc
@@ -1652,6 +1652,30 @@
   }
 }
 
+void RosAlloc::AssertThreadLocalRunsAreRevoked(Thread* thread) {
+  if (kIsDebugBuild) {
+    Thread* self = Thread::Current();
+    // Avoid race conditions on the bulk free bit maps with BulkFree() (GC).
+    WriterMutexLock wmu(self, bulk_free_lock_);
+    for (size_t idx = 0; idx < kNumOfSizeBrackets; idx++) {
+      MutexLock mu(self, *size_bracket_locks_[idx]);
+      Run* thread_local_run = reinterpret_cast<Run*>(thread->rosalloc_runs_[idx]);
+      DCHECK(thread_local_run == nullptr);
+    }
+  }
+}
+
+void RosAlloc::AssertAllThreadLocalRunsAreRevoked() {
+  if (kIsDebugBuild) {
+    MutexLock mu(Thread::Current(), *Locks::runtime_shutdown_lock_);
+    MutexLock mu2(Thread::Current(), *Locks::thread_list_lock_);
+    std::list<Thread*> thread_list = Runtime::Current()->GetThreadList()->GetList();
+    for (Thread* t : thread_list) {
+      AssertThreadLocalRunsAreRevoked(t);
+    }
+  }
+}
+
 void RosAlloc::Initialize() {
   // Check the consistency of the number of size brackets.
   DCHECK_EQ(Thread::kRosAllocNumOfSizeBrackets, kNumOfSizeBrackets);
diff --git a/runtime/gc/allocator/rosalloc.h b/runtime/gc/allocator/rosalloc.h
index 738d917..0b4b189 100644
--- a/runtime/gc/allocator/rosalloc.h
+++ b/runtime/gc/allocator/rosalloc.h
@@ -549,6 +549,10 @@
   void RevokeThreadLocalRuns(Thread* thread);
   // Releases the thread-local runs assigned to all the threads back to the common set of runs.
   void RevokeAllThreadLocalRuns() LOCKS_EXCLUDED(Locks::thread_list_lock_);
+  // Assert the thread local runs of a thread are revoked.
+  void AssertThreadLocalRunsAreRevoked(Thread* thread);
+  // Assert all the thread local runs are revoked.
+  void AssertAllThreadLocalRunsAreRevoked() LOCKS_EXCLUDED(Locks::thread_list_lock_);
   // Dumps the page map for debugging.
   std::string DumpPageMap() EXCLUSIVE_LOCKS_REQUIRED(lock_);
 
diff --git a/runtime/gc/collector/garbage_collector.cc b/runtime/gc/collector/garbage_collector.cc
index 1e1e447..65b5471 100644
--- a/runtime/gc/collector/garbage_collector.cc
+++ b/runtime/gc/collector/garbage_collector.cc
@@ -47,9 +47,8 @@
   ResetCumulativeStatistics();
 }
 
-bool GarbageCollector::HandleDirtyObjectsPhase() {
-  DCHECK(IsConcurrent());
-  return true;
+void GarbageCollector::HandleDirtyObjectsPhase() {
+  LOG(FATAL) << "Unreachable";
 }
 
 void GarbageCollector::RegisterPause(uint64_t nano_length) {
@@ -64,12 +63,6 @@
   total_freed_bytes_ = 0;
 }
 
-void GarbageCollector::RevokeAllThreadLocalBuffers() {
-  timings_.StartSplit("(Paused)RevokeAllThreadLocalBuffers");
-  GetHeap()->RevokeAllThreadLocalBuffers();
-  timings_.EndSplit();
-}
-
 void GarbageCollector::Run(GcCause gc_cause, bool clear_soft_references) {
   ThreadList* thread_list = Runtime::Current()->GetThreadList();
   Thread* self = Thread::Current();
@@ -85,50 +78,56 @@
   freed_objects_ = 0;
   freed_large_objects_ = 0;
 
-  InitializePhase();
-
-  if (!IsConcurrent()) {
-    // Pause is the entire length of the GC.
-    uint64_t pause_start = NanoTime();
-    ATRACE_BEGIN("Application threads suspended");
-    // Mutator lock may be already exclusively held when we do garbage collections for changing the
-    // current collector / allocator during process state updates.
-    if (Locks::mutator_lock_->IsExclusiveHeld(self)) {
-      // PreGcRosAllocVerification() is called in Heap::TransitionCollector().
-      RevokeAllThreadLocalBuffers();
-      MarkingPhase();
-      ReclaimPhase();
-      // PostGcRosAllocVerification() is called in Heap::TransitionCollector().
-    } else {
-      thread_list->SuspendAll();
-      GetHeap()->PreGcRosAllocVerification(&timings_);
-      RevokeAllThreadLocalBuffers();
-      MarkingPhase();
-      ReclaimPhase();
-      GetHeap()->PostGcRosAllocVerification(&timings_);
-      thread_list->ResumeAll();
+  CollectorType collector_type = GetCollectorType();
+  switch (collector_type) {
+    case kCollectorTypeMS:      // Fall through.
+    case kCollectorTypeSS:      // Fall through.
+    case kCollectorTypeGSS: {
+      InitializePhase();
+      // Pause is the entire length of the GC.
+      uint64_t pause_start = NanoTime();
+      ATRACE_BEGIN("Application threads suspended");
+      // Mutator lock may be already exclusively held when we do garbage collections for changing the
+      // current collector / allocator during process state updates.
+      if (Locks::mutator_lock_->IsExclusiveHeld(self)) {
+        // PreGcRosAllocVerification() is called in Heap::TransitionCollector().
+        RevokeAllThreadLocalBuffers();
+        MarkingPhase();
+        ReclaimPhase();
+        // PostGcRosAllocVerification() is called in Heap::TransitionCollector().
+      } else {
+        ATRACE_BEGIN("Suspending mutator threads");
+        thread_list->SuspendAll();
+        ATRACE_END();
+        GetHeap()->PreGcRosAllocVerification(&timings_);
+        RevokeAllThreadLocalBuffers();
+        MarkingPhase();
+        ReclaimPhase();
+        GetHeap()->PostGcRosAllocVerification(&timings_);
+        ATRACE_BEGIN("Resuming mutator threads");
+        thread_list->ResumeAll();
+        ATRACE_END();
+      }
+      ATRACE_END();
+      RegisterPause(NanoTime() - pause_start);
+      FinishPhase();
+      break;
     }
-    ATRACE_END();
-    RegisterPause(NanoTime() - pause_start);
-  } else {
-    CHECK(!Locks::mutator_lock_->IsExclusiveHeld(self));
-    Thread* self = Thread::Current();
-    {
-      ReaderMutexLock mu(self, *Locks::mutator_lock_);
-      MarkingPhase();
-    }
-    bool done = false;
-    while (!done) {
+    case kCollectorTypeCMS: {
+      InitializePhase();
+      CHECK(!Locks::mutator_lock_->IsExclusiveHeld(self));
+      {
+        ReaderMutexLock mu(self, *Locks::mutator_lock_);
+        MarkingPhase();
+      }
       uint64_t pause_start = NanoTime();
       ATRACE_BEGIN("Suspending mutator threads");
       thread_list->SuspendAll();
       ATRACE_END();
       ATRACE_BEGIN("All mutator threads suspended");
       GetHeap()->PreGcRosAllocVerification(&timings_);
-      done = HandleDirtyObjectsPhase();
-      if (done) {
-        RevokeAllThreadLocalBuffers();
-      }
+      HandleDirtyObjectsPhase();
+      RevokeAllThreadLocalBuffers();
       GetHeap()->PostGcRosAllocVerification(&timings_);
       ATRACE_END();
       uint64_t pause_end = NanoTime();
@@ -136,13 +135,19 @@
       thread_list->ResumeAll();
       ATRACE_END();
       RegisterPause(pause_end - pause_start);
+      {
+        ReaderMutexLock mu(self, *Locks::mutator_lock_);
+        ReclaimPhase();
+      }
+      FinishPhase();
+      break;
     }
-    {
-      ReaderMutexLock mu(self, *Locks::mutator_lock_);
-      ReclaimPhase();
+    default: {
+      LOG(FATAL) << "Unreachable collector type=" << static_cast<size_t>(collector_type);
+      break;
     }
   }
-  FinishPhase();
+
   uint64_t end_time = NanoTime();
   duration_ns_ = end_time - start_time;
   total_time_ns_ += GetDurationNs();
diff --git a/runtime/gc/collector/garbage_collector.h b/runtime/gc/collector/garbage_collector.h
index 8259cf0..93fd2ab 100644
--- a/runtime/gc/collector/garbage_collector.h
+++ b/runtime/gc/collector/garbage_collector.h
@@ -20,6 +20,7 @@
 #include "base/histogram.h"
 #include "base/mutex.h"
 #include "base/timing_logger.h"
+#include "gc/collector_type.h"
 #include "gc/gc_cause.h"
 #include "gc_type.h"
 #include <stdint.h>
@@ -34,9 +35,6 @@
 
 class GarbageCollector {
  public:
-  // Returns true iff the garbage collector is concurrent.
-  virtual bool IsConcurrent() const = 0;
-
   GarbageCollector(Heap* heap, const std::string& name);
   virtual ~GarbageCollector() { }
 
@@ -46,6 +44,8 @@
 
   virtual GcType GetGcType() const = 0;
 
+  virtual CollectorType GetCollectorType() const = 0;
+
   // Run the garbage collector.
   void Run(GcCause gc_cause, bool clear_soft_references);
 
@@ -118,8 +118,8 @@
   // Mark all reachable objects, done concurrently.
   virtual void MarkingPhase() = 0;
 
-  // Only called for concurrent GCs. Gets called repeatedly until it succeeds.
-  virtual bool HandleDirtyObjectsPhase() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  // Only called for concurrent GCs.
+  virtual void HandleDirtyObjectsPhase() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Called with mutators running.
   virtual void ReclaimPhase() = 0;
@@ -127,7 +127,8 @@
   // Called after the GC is finished. Done without mutators paused.
   virtual void FinishPhase() = 0;
 
-  void RevokeAllThreadLocalBuffers();
+  // Revoke all the thread-local buffers.
+  virtual void RevokeAllThreadLocalBuffers() = 0;
 
   static constexpr size_t kPauseBucketSize = 500;
   static constexpr size_t kPauseBucketCount = 32;
diff --git a/runtime/gc/collector/immune_region.cc b/runtime/gc/collector/immune_region.cc
index 70a6213..3e1c944 100644
--- a/runtime/gc/collector/immune_region.cc
+++ b/runtime/gc/collector/immune_region.cc
@@ -28,8 +28,8 @@
 }
 
 void ImmuneRegion::Reset() {
-  begin_ = nullptr;
-  end_ = nullptr;
+  SetBegin(nullptr);
+  SetEnd(nullptr);
 }
 
 bool ImmuneRegion::AddContinuousSpace(space::ContinuousSpace* space) {
@@ -41,13 +41,13 @@
   mirror::Object* space_begin = reinterpret_cast<mirror::Object*>(space->Begin());
   mirror::Object* space_limit = reinterpret_cast<mirror::Object*>(space->Limit());
   if (IsEmpty()) {
-    begin_ = space_begin;
-    end_ = space_limit;
+    SetBegin(space_begin);
+    SetEnd(space_limit);
   } else {
     if (space_limit <= begin_) {  // Space is before the immune region.
-      begin_ = space_begin;
+      SetBegin(space_begin);
     } else if (space_begin >= end_) {  // Space is after the immune region.
-      end_ = space_limit;
+      SetEnd(space_limit);
     } else {
       return false;
     }
diff --git a/runtime/gc/collector/immune_region.h b/runtime/gc/collector/immune_region.h
index 21d0b43..0c0a89b 100644
--- a/runtime/gc/collector/immune_region.h
+++ b/runtime/gc/collector/immune_region.h
@@ -46,16 +46,29 @@
   bool ContainsSpace(const space::ContinuousSpace* space) const;
   // Returns true if an object is inside of the immune region (assumed to be marked).
   bool ContainsObject(const mirror::Object* obj) const ALWAYS_INLINE {
-    return obj >= begin_ && obj < end_;
+    // Note: Relies on integer underflow behavior.
+    return reinterpret_cast<uintptr_t>(obj) - reinterpret_cast<uintptr_t>(begin_) < size_;
+  }
+  void SetBegin(mirror::Object* begin) {
+    begin_ = begin;
+    UpdateSize();
+  }
+  void SetEnd(mirror::Object* end) {
+    end_ = end;
+    UpdateSize();
   }
 
  private:
   bool IsEmpty() const {
-    return begin_ == end_;
+    return size_ == 0;
+  }
+  void UpdateSize() {
+    size_ = reinterpret_cast<uintptr_t>(end_) - reinterpret_cast<uintptr_t>(begin_);
   }
 
   mirror::Object* begin_;
   mirror::Object* end_;
+  uintptr_t size_;
 };
 
 }  // namespace collector
diff --git a/runtime/gc/collector/mark_sweep-inl.h b/runtime/gc/collector/mark_sweep-inl.h
index 4915532..1cb2adb 100644
--- a/runtime/gc/collector/mark_sweep-inl.h
+++ b/runtime/gc/collector/mark_sweep-inl.h
@@ -22,129 +22,30 @@
 #include "gc/heap.h"
 #include "mirror/art_field.h"
 #include "mirror/class.h"
-#include "mirror/object_array.h"
+#include "mirror/object_array-inl.h"
+#include "mirror/reference.h"
 
 namespace art {
 namespace gc {
 namespace collector {
 
-template <typename MarkVisitor>
-inline void MarkSweep::ScanObjectVisit(mirror::Object* obj, const MarkVisitor& visitor) {
+template<typename MarkVisitor, typename ReferenceVisitor>
+inline void MarkSweep::ScanObjectVisit(mirror::Object* obj, const MarkVisitor& visitor,
+                                       const ReferenceVisitor& ref_visitor) {
   if (kIsDebugBuild && !IsMarked(obj)) {
     heap_->DumpSpaces();
     LOG(FATAL) << "Scanning unmarked object " << obj;
   }
-  // The GetClass verifies the object, don't need to reverify after.
-  mirror::Class* klass = obj->GetClass();
-  // IsArrayClass verifies klass.
-  if (UNLIKELY(klass->IsArrayClass())) {
-    if (kCountScannedTypes) {
-      ++array_count_;
-    }
-    if (klass->IsObjectArrayClass<kVerifyNone>()) {
-      VisitObjectArrayReferences(obj->AsObjectArray<mirror::Object, kVerifyNone>(), visitor);
-    }
-  } else if (UNLIKELY(klass == mirror::Class::GetJavaLangClass())) {
-    if (kCountScannedTypes) {
+  obj->VisitReferences<false>(visitor, ref_visitor);
+  if (kCountScannedTypes) {
+    mirror::Class* klass = obj->GetClass<kVerifyNone>();
+    if (UNLIKELY(klass == mirror::Class::GetJavaLangClass())) {
       ++class_count_;
-    }
-    VisitClassReferences(klass, obj, visitor);
-  } else {
-    if (kCountScannedTypes) {
+    } else if (UNLIKELY(klass->IsArrayClass<kVerifyNone>())) {
+      ++array_count_;
+    } else {
       ++other_count_;
     }
-    VisitOtherReferences(klass, obj, visitor);
-    if (UNLIKELY(klass->IsReferenceClass<kVerifyNone>())) {
-      DelayReferenceReferent(klass, obj);
-    }
-  }
-}
-
-template <typename Visitor>
-inline void MarkSweep::VisitObjectReferences(mirror::Object* obj, const Visitor& visitor,
-                                             bool visit_class)
-    SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_,
-                          Locks::mutator_lock_) {
-  mirror::Class* klass = obj->GetClass();
-  if (klass->IsArrayClass()) {
-    if (visit_class) {
-      visitor(obj, klass, mirror::Object::ClassOffset(), false);
-    }
-    if (klass->IsObjectArrayClass<kVerifyNone>()) {
-      VisitObjectArrayReferences(obj->AsObjectArray<mirror::Object, kVerifyNone>(), visitor);
-    }
-  } else if (klass == mirror::Class::GetJavaLangClass()) {
-    DCHECK_EQ(klass->GetClass<kVerifyNone>(), mirror::Class::GetJavaLangClass());
-    VisitClassReferences(klass, obj, visitor);
-  } else {
-    VisitOtherReferences(klass, obj, visitor);
-  }
-}
-
-template <typename Visitor>
-inline void MarkSweep::VisitInstanceFieldsReferences(mirror::Class* klass, mirror::Object* obj,
-                                                     const Visitor& visitor)
-    SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_) {
-  VisitFieldsReferences(obj, klass->GetReferenceInstanceOffsets<kVerifyNone>(), false, visitor);
-}
-
-template <typename Visitor>
-inline void MarkSweep::VisitClassReferences(mirror::Class* klass, mirror::Object* obj,
-                                            const Visitor& visitor)
-    SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_) {
-  VisitInstanceFieldsReferences(klass, obj, visitor);
-  VisitStaticFieldsReferences(obj->AsClass<kVerifyNone>(), visitor);
-}
-
-template <typename Visitor>
-inline void MarkSweep::VisitStaticFieldsReferences(mirror::Class* klass, const Visitor& visitor)
-    SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_) {
-  VisitFieldsReferences(klass, klass->GetReferenceStaticOffsets<kVerifyNone>(), true, visitor);
-}
-
-template <typename Visitor>
-inline void MarkSweep::VisitFieldsReferences(mirror::Object* obj, uint32_t ref_offsets,
-                                             bool is_static, const Visitor& visitor) {
-  if (LIKELY(ref_offsets != CLASS_WALK_SUPER)) {
-    // Found a reference offset bitmap.  Mark the specified offsets.
-    while (ref_offsets != 0) {
-      size_t right_shift = CLZ(ref_offsets);
-      MemberOffset field_offset = CLASS_OFFSET_FROM_CLZ(right_shift);
-      mirror::Object* ref = obj->GetFieldObject<mirror::Object, kVerifyReads>(field_offset, false);
-      visitor(obj, ref, field_offset, is_static);
-      ref_offsets &= ~(CLASS_HIGH_BIT >> right_shift);
-    }
-  } else {
-    // There is no reference offset bitmap.  In the non-static case,
-    // walk up the class inheritance hierarchy and find reference
-    // offsets the hard way. In the static case, just consider this
-    // class.
-    for (mirror::Class* klass = is_static ? obj->AsClass<kVerifyNone>() : obj->GetClass<kVerifyNone>();
-         klass != nullptr;
-         klass = is_static ? nullptr : klass->GetSuperClass()) {
-      size_t num_reference_fields = (is_static
-                                     ? klass->NumReferenceStaticFields()
-                                     : klass->NumReferenceInstanceFields());
-      for (size_t i = 0; i < num_reference_fields; ++i) {
-        mirror::ArtField* field = (is_static ? klass->GetStaticField(i)
-                                             : klass->GetInstanceField(i));
-        MemberOffset field_offset = field->GetOffset();
-        mirror::Object* ref = obj->GetFieldObject<mirror::Object, kVerifyReads>(field_offset, false);
-        visitor(obj, ref, field_offset, is_static);
-      }
-    }
-  }
-}
-
-template <typename Visitor>
-inline void MarkSweep::VisitObjectArrayReferences(mirror::ObjectArray<mirror::Object>* array,
-                                                  const Visitor& visitor) {
-  const size_t length = static_cast<size_t>(array->GetLength());
-  for (size_t i = 0; i < length; ++i) {
-    mirror::Object* element = array->GetWithoutChecks(static_cast<int32_t>(i));
-    const size_t width = sizeof(mirror::HeapReference<mirror::Object>);
-    MemberOffset offset(i * width + mirror::Array::DataOffset(width).Int32Value());
-    visitor(array, element, offset, false);
   }
 }
 
diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc
index 8372734..8abf5e2 100644
--- a/runtime/gc/collector/mark_sweep.cc
+++ b/runtime/gc/collector/mark_sweep.cc
@@ -79,16 +79,20 @@
 static constexpr bool kParallelProcessMarkStack = true;
 
 // Profiling and information flags.
-static constexpr bool kCountClassesMarked = false;
 static constexpr bool kProfileLargeObjects = false;
 static constexpr bool kMeasureOverhead = false;
 static constexpr bool kCountTasks = false;
 static constexpr bool kCountJavaLangRefs = false;
+static constexpr bool kCountMarkedObjects = false;
 
 // Turn off kCheckLocks when profiling the GC since it slows the GC down by up to 40%.
 static constexpr bool kCheckLocks = kDebugLocking;
 static constexpr bool kVerifyRoots = kIsDebugBuild;
 
+// If true, revoke the rosalloc thread-local buffers at the
+// checkpoint, as opposed to during the pause.
+static constexpr bool kRevokeRosAllocThreadLocalBuffersAtCheckpoint = true;
+
 void MarkSweep::BindBitmaps() {
   timings_.StartSplit("BindBitmaps");
   WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
@@ -105,9 +109,6 @@
     : GarbageCollector(heap,
                        name_prefix +
                        (is_concurrent ? "concurrent mark sweep": "mark sweep")),
-      current_mark_bitmap_(NULL),
-      mark_stack_(NULL),
-      live_stack_freeze_size_(0),
       gc_barrier_(new Barrier(0)),
       large_object_lock_("mark sweep large object lock", kMarkSweepLargeObjectLock),
       mark_stack_lock_("mark sweep mark stack lock", kMarkSweepMarkStackLock),
@@ -125,13 +126,20 @@
   other_count_ = 0;
   large_object_test_ = 0;
   large_object_mark_ = 0;
-  classes_marked_ = 0;
   overhead_time_ = 0;
   work_chunks_created_ = 0;
   work_chunks_deleted_ = 0;
   reference_count_ = 0;
-
-  FindDefaultMarkBitmap();
+  mark_null_count_ = 0;
+  mark_immune_count_ = 0;
+  mark_fastpath_count_ = 0;
+  mark_slowpath_count_ = 0;
+  FindDefaultSpaceBitmap();
+  {
+    // TODO: I don't think we should need heap bitmap lock to get the mark bitmap.
+    ReaderMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
+    mark_bitmap_ = heap_->GetMarkBitmap();
+  }
 
   // Do any pre GC verification.
   timings_.NewSplit("PreGcVerification");
@@ -145,13 +153,17 @@
                                &MarkObjectCallback, &ProcessMarkStackPausedCallback, this);
 }
 
-void MarkSweep::PreProcessReferences(Thread* self) {
-  timings_.NewSplit("PreProcessReferences");
-  GetHeap()->ProcessSoftReferences(timings_, clear_soft_references_, &IsMarkedCallback,
-                                   &MarkObjectCallback, &ProcessMarkStackPausedCallback, this);
+void MarkSweep::PreProcessReferences() {
+  if (IsConcurrent()) {
+    // No reason to do this for non-concurrent GC since pre processing soft references only helps
+    // pauses.
+    timings_.NewSplit("PreProcessReferences");
+    GetHeap()->ProcessSoftReferences(timings_, clear_soft_references_, &IsMarkedCallback,
+                                     &MarkObjectCallback, &ProcessMarkStackPausedCallback, this);
+  }
 }
 
-bool MarkSweep::HandleDirtyObjectsPhase() {
+void MarkSweep::HandleDirtyObjectsPhase() {
   TimingLogger::ScopedSplit split("(Paused)HandleDirtyObjectsPhase", &timings_);
   Thread* self = Thread::Current();
   Locks::mutator_lock_->AssertExclusiveHeld(self);
@@ -194,11 +206,6 @@
   // incorrectly sweep it. This also fixes a race where interning may attempt to return a strong
   // reference to a string that is about to be swept.
   Runtime::Current()->DisallowNewSystemWeaks();
-  return true;
-}
-
-bool MarkSweep::IsConcurrent() const {
-  return is_concurrent_;
 }
 
 void MarkSweep::PreCleanCards() {
@@ -244,7 +251,7 @@
   Thread* self = Thread::Current();
 
   BindBitmaps();
-  FindDefaultMarkBitmap();
+  FindDefaultSpaceBitmap();
 
   // Process dirty cards and add dirty cards to mod union tables.
   heap_->ProcessCards(timings_, false);
@@ -261,11 +268,7 @@
   MarkReachableObjects();
   // Pre-clean dirtied cards to reduce pauses.
   PreCleanCards();
-  if (IsConcurrent()) {
-    // No reason to do this for non-concurrent GC since pre processing soft references only helps
-    // pauses.
-    PreProcessReferences(self);
-  }
+  PreProcessReferences();
 }
 
 void MarkSweep::UpdateAndMarkModUnion() {
@@ -276,7 +279,7 @@
       TimingLogger::ScopedSplit split(name, &timings_);
       accounting::ModUnionTable* mod_union_table = heap_->FindModUnionTableFromSpace(space);
       CHECK(mod_union_table != nullptr);
-      mod_union_table->UpdateAndMarkReferences(MarkObjectCallback, this);
+      mod_union_table->UpdateAndMarkReferences(MarkHeapReferenceCallback, this);
     }
   }
 }
@@ -357,14 +360,13 @@
   }
 }
 
-void MarkSweep::FindDefaultMarkBitmap() {
+void MarkSweep::FindDefaultSpaceBitmap() {
   TimingLogger::ScopedSplit split("FindDefaultMarkBitmap", &timings_);
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
     accounting::SpaceBitmap* bitmap = space->GetMarkBitmap();
     if (bitmap != nullptr &&
         space->GetGcRetentionPolicy() == space::kGcRetentionPolicyAlwaysCollect) {
-      current_mark_bitmap_ = bitmap;
-      CHECK(current_mark_bitmap_ != NULL);
+      current_space_bitmap_ = bitmap;
       return;
     }
   }
@@ -390,7 +392,7 @@
   }
 }
 
-inline void MarkSweep::MarkObjectNonNullParallel(const Object* obj) {
+inline void MarkSweep::MarkObjectNonNullParallel(Object* obj) {
   DCHECK(obj != NULL);
   if (MarkObjectParallel(obj)) {
     MutexLock mu(Thread::Current(), mark_stack_lock_);
@@ -398,7 +400,7 @@
       ExpandMarkStack();
     }
     // The object must be pushed on to the mark stack.
-    mark_stack_->PushBack(const_cast<Object*>(obj));
+    mark_stack_->PushBack(obj);
   }
 }
 
@@ -408,19 +410,21 @@
   return obj;
 }
 
+void MarkSweep::MarkHeapReferenceCallback(mirror::HeapReference<mirror::Object>* ref, void* arg) {
+  reinterpret_cast<MarkSweep*>(arg)->MarkObject(ref->AsMirrorPtr());
+}
+
 inline void MarkSweep::UnMarkObjectNonNull(const Object* obj) {
   DCHECK(!immune_region_.ContainsObject(obj));
-
   if (kUseBrooksPointer) {
     // Verify all the objects have the correct Brooks pointer installed.
     obj->AssertSelfBrooksPointer();
   }
-
   // Try to take advantage of locality of references within a space, failing this find the space
   // the hard way.
-  accounting::SpaceBitmap* object_bitmap = current_mark_bitmap_;
+  accounting::SpaceBitmap* object_bitmap = current_space_bitmap_;
   if (UNLIKELY(!object_bitmap->HasAddress(obj))) {
-    accounting::SpaceBitmap* new_bitmap = heap_->GetMarkBitmap()->GetContinuousSpaceBitmap(obj);
+    accounting::SpaceBitmap* new_bitmap = mark_bitmap_->GetContinuousSpaceBitmap(obj);
     if (LIKELY(new_bitmap != NULL)) {
       object_bitmap = new_bitmap;
     } else {
@@ -428,50 +432,54 @@
       return;
     }
   }
-
   DCHECK(object_bitmap->HasAddress(obj));
   object_bitmap->Clear(obj);
 }
 
-inline void MarkSweep::MarkObjectNonNull(const Object* obj) {
-  DCHECK(obj != NULL);
-
+inline void MarkSweep::MarkObjectNonNull(Object* obj) {
+  DCHECK(obj != nullptr);
   if (kUseBrooksPointer) {
     // Verify all the objects have the correct Brooks pointer installed.
     obj->AssertSelfBrooksPointer();
   }
-
   if (immune_region_.ContainsObject(obj)) {
+    if (kCountMarkedObjects) {
+      ++mark_immune_count_;
+    }
     DCHECK(IsMarked(obj));
     return;
   }
-
   // Try to take advantage of locality of references within a space, failing this find the space
   // the hard way.
-  accounting::SpaceBitmap* object_bitmap = current_mark_bitmap_;
+  accounting::SpaceBitmap* object_bitmap = current_space_bitmap_;
   if (UNLIKELY(!object_bitmap->HasAddress(obj))) {
-    accounting::SpaceBitmap* new_bitmap = heap_->GetMarkBitmap()->GetContinuousSpaceBitmap(obj);
-    if (LIKELY(new_bitmap != NULL)) {
-      object_bitmap = new_bitmap;
-    } else {
+    object_bitmap = mark_bitmap_->GetContinuousSpaceBitmap(obj);
+    if (kCountMarkedObjects) {
+      ++mark_slowpath_count_;
+    }
+    if (UNLIKELY(object_bitmap == nullptr)) {
       MarkLargeObject(obj, true);
       return;
     }
+  } else if (kCountMarkedObjects) {
+    ++mark_fastpath_count_;
   }
-
   // This object was not previously marked.
-  if (!object_bitmap->Test(obj)) {
-    object_bitmap->Set(obj);
-    if (UNLIKELY(mark_stack_->Size() >= mark_stack_->Capacity())) {
-      // Lock is not needed but is here anyways to please annotalysis.
-      MutexLock mu(Thread::Current(), mark_stack_lock_);
-      ExpandMarkStack();
-    }
-    // The object must be pushed on to the mark stack.
-    mark_stack_->PushBack(const_cast<Object*>(obj));
+  if (!object_bitmap->Set(obj)) {
+    PushOnMarkStack(obj);
   }
 }
 
+inline void MarkSweep::PushOnMarkStack(Object* obj) {
+  if (UNLIKELY(mark_stack_->Size() >= mark_stack_->Capacity())) {
+    // Lock is not needed but is here anyways to please annotalysis.
+    MutexLock mu(Thread::Current(), mark_stack_lock_);
+    ExpandMarkStack();
+  }
+  // The object must be pushed on to the mark stack.
+  mark_stack_->PushBack(obj);
+}
+
 // Rare case, probably not worth inlining since it will increase instruction cache miss rate.
 bool MarkSweep::MarkLargeObject(const Object* obj, bool set) {
   // TODO: support >1 discontinuous space.
@@ -501,23 +509,20 @@
 }
 
 inline bool MarkSweep::MarkObjectParallel(const Object* obj) {
-  DCHECK(obj != NULL);
-
+  DCHECK(obj != nullptr);
   if (kUseBrooksPointer) {
     // Verify all the objects have the correct Brooks pointer installed.
     obj->AssertSelfBrooksPointer();
   }
-
   if (immune_region_.ContainsObject(obj)) {
     DCHECK(IsMarked(obj));
     return false;
   }
-
   // Try to take advantage of locality of references within a space, failing this find the space
   // the hard way.
-  accounting::SpaceBitmap* object_bitmap = current_mark_bitmap_;
+  accounting::SpaceBitmap* object_bitmap = current_space_bitmap_;
   if (UNLIKELY(!object_bitmap->HasAddress(obj))) {
-    accounting::SpaceBitmap* new_bitmap = heap_->GetMarkBitmap()->GetContinuousSpaceBitmap(obj);
+    accounting::SpaceBitmap* new_bitmap = mark_bitmap_->GetContinuousSpaceBitmap(obj);
     if (new_bitmap != NULL) {
       object_bitmap = new_bitmap;
     } else {
@@ -527,23 +532,20 @@
       return MarkLargeObject(obj, true);
     }
   }
-
   // Return true if the object was not previously marked.
   return !object_bitmap->AtomicTestAndSet(obj);
 }
 
-// Used to mark objects when recursing.  Recursion is done by moving
-// the finger across the bitmaps in address order and marking child
-// objects.  Any newly-marked objects whose addresses are lower than
-// the finger won't be visited by the bitmap scan, so those objects
-// need to be added to the mark stack.
-inline void MarkSweep::MarkObject(const Object* obj) {
-  if (obj != NULL) {
+// Used to mark objects when processing the mark stack. If an object is null, it is not marked.
+inline void MarkSweep::MarkObject(Object* obj) {
+  if (obj != nullptr) {
     MarkObjectNonNull(obj);
+  } else if (kCountMarkedObjects) {
+    ++mark_null_count_;
   }
 }
 
-void MarkSweep::MarkRootParallelCallback(mirror::Object** root, void* arg, uint32_t /*thread_id*/,
+void MarkSweep::MarkRootParallelCallback(Object** root, void* arg, uint32_t /*thread_id*/,
                                          RootType /*root_type*/) {
   reinterpret_cast<MarkSweep*>(arg)->MarkObjectNonNullParallel(*root);
 }
@@ -614,8 +616,8 @@
   explicit ScanObjectVisitor(MarkSweep* const mark_sweep) ALWAYS_INLINE
       : mark_sweep_(mark_sweep) {}
 
-  // TODO: Fixme when anotatalysis works with visitors.
-  void operator()(Object* obj) const ALWAYS_INLINE NO_THREAD_SAFETY_ANALYSIS {
+  void operator()(Object* obj) const ALWAYS_INLINE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
     if (kCheckLocks) {
       Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
       Locks::heap_bitmap_lock_->AssertExclusiveHeld(Thread::Current());
@@ -627,11 +629,26 @@
   MarkSweep* const mark_sweep_;
 };
 
+class DelayReferenceReferentVisitor {
+ public:
+  explicit DelayReferenceReferentVisitor(MarkSweep* collector) : collector_(collector) {
+  }
+
+  void operator()(mirror::Class* klass, mirror::Reference* ref) const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
+    collector_->DelayReferenceReferent(klass, ref);
+  }
+
+ private:
+  MarkSweep* const collector_;
+};
+
 template <bool kUseFinger = false>
 class MarkStackTask : public Task {
  public:
   MarkStackTask(ThreadPool* thread_pool, MarkSweep* mark_sweep, size_t mark_stack_size,
-                const Object** mark_stack)
+                Object** mark_stack)
       : mark_sweep_(mark_sweep),
         thread_pool_(thread_pool),
         mark_stack_pos_(mark_stack_size) {
@@ -649,27 +666,44 @@
   static const size_t kMaxSize = 1 * KB;
 
  protected:
+  class MarkObjectParallelVisitor {
+   public:
+    explicit MarkObjectParallelVisitor(MarkStackTask<kUseFinger>* chunk_task,
+                                       MarkSweep* mark_sweep) ALWAYS_INLINE
+            : chunk_task_(chunk_task), mark_sweep_(mark_sweep) {}
+
+    void operator()(Object* obj, MemberOffset offset, bool /* static */) const ALWAYS_INLINE
+        SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+      mirror::Object* ref = obj->GetFieldObject<mirror::Object>(offset, false);
+      if (ref != nullptr && mark_sweep_->MarkObjectParallel(ref)) {
+        if (kUseFinger) {
+          android_memory_barrier();
+          if (reinterpret_cast<uintptr_t>(ref) >=
+              static_cast<uintptr_t>(mark_sweep_->atomic_finger_)) {
+            return;
+          }
+        }
+        chunk_task_->MarkStackPush(ref);
+      }
+    }
+
+   private:
+    MarkStackTask<kUseFinger>* const chunk_task_;
+    MarkSweep* const mark_sweep_;
+  };
+
   class ScanObjectParallelVisitor {
    public:
     explicit ScanObjectParallelVisitor(MarkStackTask<kUseFinger>* chunk_task) ALWAYS_INLINE
         : chunk_task_(chunk_task) {}
 
-    void operator()(Object* obj) const {
-      MarkSweep* mark_sweep = chunk_task_->mark_sweep_;
-      mark_sweep->ScanObjectVisit(obj,
-          [mark_sweep, this](Object* /* obj */, Object* ref, const MemberOffset& /* offset */,
-              bool /* is_static */) ALWAYS_INLINE_LAMBDA {
-        if (ref != nullptr && mark_sweep->MarkObjectParallel(ref)) {
-          if (kUseFinger) {
-            android_memory_barrier();
-            if (reinterpret_cast<uintptr_t>(ref) >=
-                static_cast<uintptr_t>(mark_sweep->atomic_finger_)) {
-              return;
-            }
-          }
-          chunk_task_->MarkStackPush(ref);
-        }
-      });
+    // No thread safety analysis since multiple threads will use this visitor.
+    void operator()(Object* obj) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+        EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
+      MarkSweep* const mark_sweep = chunk_task_->mark_sweep_;
+      MarkObjectParallelVisitor mark_visitor(chunk_task_, mark_sweep);
+      DelayReferenceReferentVisitor ref_visitor(mark_sweep);
+      mark_sweep->ScanObjectVisit(obj, mark_visitor, ref_visitor);
     }
 
    private:
@@ -687,11 +721,11 @@
   MarkSweep* const mark_sweep_;
   ThreadPool* const thread_pool_;
   // Thread local mark stack for this task.
-  const Object* mark_stack_[kMaxSize];
+  Object* mark_stack_[kMaxSize];
   // Mark stack position.
   size_t mark_stack_pos_;
 
-  void MarkStackPush(const Object* obj) ALWAYS_INLINE {
+  void MarkStackPush(Object* obj) ALWAYS_INLINE {
     if (UNLIKELY(mark_stack_pos_ == kMaxSize)) {
       // Mark stack overflow, give 1/2 the stack to the thread pool as a new work task.
       mark_stack_pos_ /= 2;
@@ -700,7 +734,7 @@
       thread_pool_->AddTask(Thread::Current(), task);
     }
     DCHECK(obj != nullptr);
-    DCHECK(mark_stack_pos_ < kMaxSize);
+    DCHECK_LT(mark_stack_pos_, kMaxSize);
     mark_stack_[mark_stack_pos_++] = obj;
   }
 
@@ -709,16 +743,17 @@
   }
 
   // Scans all of the objects
-  virtual void Run(Thread* self) {
+  virtual void Run(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
     ScanObjectParallelVisitor visitor(this);
     // TODO: Tune this.
     static const size_t kFifoSize = 4;
-    BoundedFifoPowerOfTwo<const Object*, kFifoSize> prefetch_fifo;
+    BoundedFifoPowerOfTwo<Object*, kFifoSize> prefetch_fifo;
     for (;;) {
-      const Object* obj = nullptr;
+      Object* obj = nullptr;
       if (kUseMarkStackPrefetch) {
         while (mark_stack_pos_ != 0 && prefetch_fifo.size() < kFifoSize) {
-          const Object* obj = mark_stack_[--mark_stack_pos_];
+          Object* obj = mark_stack_[--mark_stack_pos_];
           DCHECK(obj != nullptr);
           __builtin_prefetch(obj);
           prefetch_fifo.push_back(obj);
@@ -735,7 +770,7 @@
         obj = mark_stack_[--mark_stack_pos_];
       }
       DCHECK(obj != nullptr);
-      visitor(const_cast<mirror::Object*>(obj));
+      visitor(obj);
     }
   }
 };
@@ -744,7 +779,7 @@
  public:
   CardScanTask(ThreadPool* thread_pool, MarkSweep* mark_sweep, accounting::SpaceBitmap* bitmap,
                byte* begin, byte* end, byte minimum_age, size_t mark_stack_size,
-               const Object** mark_stack_obj)
+               Object** mark_stack_obj)
       : MarkStackTask<false>(thread_pool, mark_sweep, mark_stack_size, mark_stack_obj),
         bitmap_(bitmap),
         begin_(begin),
@@ -795,8 +830,8 @@
     // scanned at the same time.
     timings_.StartSplit(paused ? "(Paused)ScanGrayObjects" : "ScanGrayObjects");
     // Try to take some of the mark stack since we can pass this off to the worker tasks.
-    const Object** mark_stack_begin = const_cast<const Object**>(mark_stack_->Begin());
-    const Object** mark_stack_end = const_cast<const Object**>(mark_stack_->End());
+    Object** mark_stack_begin = mark_stack_->Begin();
+    Object** mark_stack_end = mark_stack_->End();
     const size_t mark_stack_size = mark_stack_end - mark_stack_begin;
     // Estimated number of work tasks we will create.
     const size_t mark_stack_tasks = GetHeap()->GetContinuousSpaces().size() * thread_count;
@@ -829,7 +864,7 @@
         size_t mark_stack_increment = std::min(mark_stack_delta, mark_stack_remaining);
         mark_stack_end -= mark_stack_increment;
         mark_stack_->PopBackCount(static_cast<int32_t>(mark_stack_increment));
-        DCHECK_EQ(mark_stack_end, const_cast<const art::mirror::Object **>(mark_stack_->End()));
+        DCHECK_EQ(mark_stack_end, mark_stack_->End());
         // Add the new task to the thread pool.
         auto* task = new CardScanTask(thread_pool, this, space->GetMarkBitmap(), card_begin,
                                       card_begin + card_increment, minimum_age,
@@ -918,8 +953,8 @@
     for (const auto& space : GetHeap()->GetContinuousSpaces()) {
       if ((space->GetGcRetentionPolicy() == space::kGcRetentionPolicyAlwaysCollect) ||
           (!partial && space->GetGcRetentionPolicy() == space::kGcRetentionPolicyFullCollect)) {
-        current_mark_bitmap_ = space->GetMarkBitmap();
-        if (current_mark_bitmap_ == nullptr) {
+        current_space_bitmap_ = space->GetMarkBitmap();
+        if (current_space_bitmap_ == nullptr) {
           continue;
         }
         if (parallel) {
@@ -938,7 +973,7 @@
             delta = RoundUp(delta, KB);
             if (delta < 16 * KB) delta = end - begin;
             begin += delta;
-            auto* task = new RecursiveMarkTask(thread_pool, this, current_mark_bitmap_, start,
+            auto* task = new RecursiveMarkTask(thread_pool, this, current_space_bitmap_, start,
                                                begin);
             thread_pool->AddTask(self, task);
           }
@@ -950,7 +985,7 @@
           // This function does not handle heap end increasing, so we must use the space end.
           uintptr_t begin = reinterpret_cast<uintptr_t>(space->Begin());
           uintptr_t end = reinterpret_cast<uintptr_t>(space->End());
-          current_mark_bitmap_->VisitMarkedRange(begin, end, scan_visitor);
+          current_space_bitmap_->VisitMarkedRange(begin, end, scan_visitor);
         }
       }
     }
@@ -1033,6 +1068,9 @@
     if (kUseThreadLocalAllocationStack) {
       thread->RevokeThreadLocalAllocationStack();
     }
+    if (kRevokeRosAllocThreadLocalBuffersAtCheckpoint) {
+      mark_sweep_->GetHeap()->RevokeRosAllocThreadLocalBuffers(thread);
+    }
     mark_sweep_->GetBarrier().Pass(self);
   }
 
@@ -1196,27 +1234,29 @@
   GetHeap()->RecordFree(freed_objects, freed_bytes);
 }
 
-// Process the "referent" field in a java.lang.ref.Reference.  If the
-// referent has not yet been marked, put it on the appropriate list in
-// the heap for later processing.
-void MarkSweep::DelayReferenceReferent(mirror::Class* klass, Object* obj) {
+// Process the "referent" field in a java.lang.ref.Reference.  If the referent has not yet been
+// marked, put it on the appropriate list in the heap for later processing.
+void MarkSweep::DelayReferenceReferent(mirror::Class* klass, mirror::Reference* ref) {
   DCHECK(klass != nullptr);
-  heap_->DelayReferenceReferent(klass, obj->AsReference(), IsMarkedCallback, this);
+  if (kCountJavaLangRefs) {
+    ++reference_count_;
+  }
+  heap_->DelayReferenceReferent(klass, ref, IsMarkedCallback, this);
 }
 
 class MarkObjectVisitor {
  public:
-  explicit MarkObjectVisitor(MarkSweep* const mark_sweep) ALWAYS_INLINE : mark_sweep_(mark_sweep) {}
+  explicit MarkObjectVisitor(MarkSweep* const mark_sweep) ALWAYS_INLINE : mark_sweep_(mark_sweep) {
+  }
 
-  // TODO: Fixme when anotatalysis works with visitors.
-  void operator()(const Object* /* obj */, const Object* ref, const MemberOffset& /* offset */,
-                  bool /* is_static */) const ALWAYS_INLINE
-      NO_THREAD_SAFETY_ANALYSIS {
+  void operator()(Object* obj, MemberOffset offset, bool /* is_static */) const
+      ALWAYS_INLINE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
     if (kCheckLocks) {
       Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
       Locks::heap_bitmap_lock_->AssertExclusiveHeld(Thread::Current());
     }
-    mark_sweep_->MarkObject(ref);
+    mark_sweep_->MarkObject(obj->GetFieldObject<mirror::Object>(offset, false));
   }
 
  private:
@@ -1226,12 +1266,12 @@
 // Scans an object reference.  Determines the type of the reference
 // and dispatches to a specialized scanning routine.
 void MarkSweep::ScanObject(Object* obj) {
-  MarkObjectVisitor visitor(this);
-  ScanObjectVisit(obj, visitor);
+  MarkObjectVisitor mark_visitor(this);
+  DelayReferenceReferentVisitor ref_visitor(this);
+  ScanObjectVisit(obj, mark_visitor, ref_visitor);
 }
 
 void MarkSweep::ProcessMarkStackPausedCallback(void* arg) {
-  DCHECK(arg != nullptr);
   reinterpret_cast<MarkSweep*>(arg)->ProcessMarkStack(true);
 }
 
@@ -1244,8 +1284,7 @@
   // Split the current mark stack up into work tasks.
   for (mirror::Object **it = mark_stack_->Begin(), **end = mark_stack_->End(); it < end; ) {
     const size_t delta = std::min(static_cast<size_t>(end - it), chunk_size);
-    thread_pool->AddTask(self, new MarkStackTask<false>(thread_pool, this, delta,
-                                                        const_cast<const mirror::Object**>(it)));
+    thread_pool->AddTask(self, new MarkStackTask<false>(thread_pool, this, delta, it));
     it += delta;
   }
   thread_pool->SetMaxActiveWorkers(thread_count - 1);
@@ -1299,11 +1338,10 @@
   if (immune_region_.ContainsObject(object)) {
     return true;
   }
-  DCHECK(current_mark_bitmap_ != NULL);
-  if (current_mark_bitmap_->HasAddress(object)) {
-    return current_mark_bitmap_->Test(object);
+  if (current_space_bitmap_->HasAddress(object)) {
+    return current_space_bitmap_->Test(object);
   }
-  return heap_->GetMarkBitmap()->Test(object);
+  return mark_bitmap_->Test(object);
 }
 
 void MarkSweep::FinishPhase() {
@@ -1312,44 +1350,35 @@
   Heap* heap = GetHeap();
   timings_.NewSplit("PostGcVerification");
   heap->PostGcVerification(this);
-
-  // Update the cumulative statistics
+  // Update the cumulative statistics.
   total_freed_objects_ += GetFreedObjects() + GetFreedLargeObjects();
   total_freed_bytes_ += GetFreedBytes() + GetFreedLargeObjectBytes();
-
   // Ensure that the mark stack is empty.
   CHECK(mark_stack_->IsEmpty());
-
   if (kCountScannedTypes) {
     VLOG(gc) << "MarkSweep scanned classes=" << class_count_ << " arrays=" << array_count_
              << " other=" << other_count_;
   }
-
   if (kCountTasks) {
     VLOG(gc) << "Total number of work chunks allocated: " << work_chunks_created_;
   }
-
   if (kMeasureOverhead) {
     VLOG(gc) << "Overhead time " << PrettyDuration(overhead_time_);
   }
-
   if (kProfileLargeObjects) {
     VLOG(gc) << "Large objects tested " << large_object_test_ << " marked " << large_object_mark_;
   }
-
-  if (kCountClassesMarked) {
-    VLOG(gc) << "Classes marked " << classes_marked_;
-  }
-
   if (kCountJavaLangRefs) {
     VLOG(gc) << "References scanned " << reference_count_;
   }
-
+  if (kCountMarkedObjects) {
+    VLOG(gc) << "Marked: null=" << mark_null_count_ << " immune=" <<  mark_immune_count_
+        << " fastpath=" << mark_fastpath_count_ << " slowpath=" << mark_slowpath_count_;
+  }
   // Update the cumulative loggers.
   cumulative_timings_.Start();
   cumulative_timings_.AddLogger(timings_);
   cumulative_timings_.End();
-
   // Clear all of the spaces' mark bitmaps.
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
     accounting::SpaceBitmap* bitmap = space->GetMarkBitmap();
@@ -1359,12 +1388,24 @@
     }
   }
   mark_stack_->Reset();
-
   // Reset the marked large objects.
   space::LargeObjectSpace* large_objects = GetHeap()->GetLargeObjectsSpace();
   large_objects->GetMarkObjects()->Clear();
 }
 
+void MarkSweep::RevokeAllThreadLocalBuffers() {
+  if (kRevokeRosAllocThreadLocalBuffersAtCheckpoint && IsConcurrent()) {
+    // If concurrent, rosalloc thread-local buffers are revoked at the
+    // thread checkpoint. Bump pointer space thread-local buffers must
+    // not be in use.
+    GetHeap()->AssertAllBumpPointerSpaceThreadLocalBuffersAreRevoked();
+  } else {
+    timings_.StartSplit("(Paused)RevokeAllThreadLocalBuffers");
+    GetHeap()->RevokeAllThreadLocalBuffers();
+    timings_.EndSplit();
+  }
+}
+
 }  // namespace collector
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/collector/mark_sweep.h b/runtime/gc/collector/mark_sweep.h
index b117b20..84b775a 100644
--- a/runtime/gc/collector/mark_sweep.h
+++ b/runtime/gc/collector/mark_sweep.h
@@ -33,6 +33,7 @@
   class Class;
   class Object;
   template<class T> class ObjectArray;
+  class Reference;
 }  // namespace mirror
 
 class StackVisitor;
@@ -68,24 +69,30 @@
 
   virtual void InitializePhase() OVERRIDE;
   virtual void MarkingPhase() OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  virtual bool HandleDirtyObjectsPhase() OVERRIDE EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
+  virtual void HandleDirtyObjectsPhase() OVERRIDE EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
   virtual void ReclaimPhase() OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   virtual void FinishPhase() OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   virtual void MarkReachableObjects()
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
-  virtual bool IsConcurrent() const OVERRIDE;
+  bool IsConcurrent() const {
+    return is_concurrent_;
+  }
 
   virtual GcType GetGcType() const OVERRIDE {
     return kGcTypeFull;
   }
 
+  virtual CollectorType GetCollectorType() const OVERRIDE {
+    return is_concurrent_ ? kCollectorTypeCMS : kCollectorTypeMS;
+  }
+
   // Initializes internal structures.
   void Init();
 
   // Find the default mark bitmap.
-  void FindDefaultMarkBitmap();
+  void FindDefaultSpaceBitmap();
 
   // Marks all objects in the root set at the start of a garbage collection.
   void MarkRoots(Thread* self)
@@ -126,7 +133,7 @@
   void ProcessReferences(Thread* self)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void PreProcessReferences(Thread* self)
+  void PreProcessReferences()
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -156,10 +163,12 @@
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  // TODO: enable thread safety analysis when in use by multiple worker threads.
-  template <typename MarkVisitor>
-  void ScanObjectVisit(mirror::Object* obj, const MarkVisitor& visitor)
-      NO_THREAD_SAFETY_ANALYSIS;
+  // No thread safety analysis due to lambdas.
+  template<typename MarkVisitor, typename ReferenceVisitor>
+  void ScanObjectVisit(mirror::Object* obj, const MarkVisitor& visitor,
+                       const ReferenceVisitor& ref_visitor)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+    EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
   void SweepSystemWeaks()
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
@@ -174,15 +183,14 @@
   void VerifyIsLive(const mirror::Object* obj)
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
-  template <typename Visitor>
-  static void VisitObjectReferences(mirror::Object* obj, const Visitor& visitor, bool visit_class)
-      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_,
-                            Locks::mutator_lock_);
-
   static mirror::Object* MarkObjectCallback(mirror::Object* obj, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
+  static void MarkHeapReferenceCallback(mirror::HeapReference<mirror::Object>* ref, void* arg)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+
   static void MarkRootCallback(mirror::Object** root, void* arg, uint32_t thread_id,
                                RootType root_type)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
@@ -201,7 +209,7 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Marks an object.
-  void MarkObject(const mirror::Object* obj)
+  void MarkObject(mirror::Object* obj)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
@@ -209,6 +217,10 @@
     return *gc_barrier_;
   }
 
+  // Schedules an unmarked object for reference processing.
+  void DelayReferenceReferent(mirror::Class* klass, mirror::Reference* reference)
+      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
+
  protected:
   // Returns true if the object has its bit set in the mark bitmap.
   bool IsMarked(const mirror::Object* object) const;
@@ -217,10 +229,9 @@
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
   static void VerifyImageRootVisitor(mirror::Object* root, void* arg)
-      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_,
-                            Locks::mutator_lock_);
+      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
 
-  void MarkObjectNonNull(const mirror::Object* obj)
+  void MarkObjectNonNull(mirror::Object* obj)
         SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
         EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
@@ -236,12 +247,12 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Marks an object atomically, safe to use from multiple threads.
-  void MarkObjectNonNullParallel(const mirror::Object* obj);
+  void MarkObjectNonNullParallel(mirror::Object* obj);
 
   // Marks or unmarks a large object based on whether or not set is true. If set is true, then we
   // mark, otherwise we unmark.
   bool MarkLargeObject(const mirror::Object* obj, bool set)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) LOCKS_EXCLUDED(large_object_lock_);
 
   // Returns true if we need to add obj to a mark stack.
   bool MarkObjectParallel(const mirror::Object* obj) NO_THREAD_SAFETY_ANALYSIS;
@@ -265,49 +276,14 @@
   void VerifyRoot(const mirror::Object* root, size_t vreg, const StackVisitor* visitor)
       NO_THREAD_SAFETY_ANALYSIS;
 
-  template <typename Visitor>
-  static void VisitInstanceFieldsReferences(mirror::Class* klass, mirror::Object* obj,
-                                            const Visitor& visitor)
-      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
-
-  // Visit the header, static field references, and interface pointers of a class object.
-  template <typename Visitor>
-  static void VisitClassReferences(mirror::Class* klass, mirror::Object* obj,
-                                   const Visitor& visitor)
-      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
-
-  template <typename Visitor>
-  static void VisitStaticFieldsReferences(mirror::Class* klass, const Visitor& visitor)
-      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
-
-  template <typename Visitor>
-  static void VisitFieldsReferences(mirror::Object* obj, uint32_t ref_offsets, bool is_static,
-                                    const Visitor& visitor)
-      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
-
-  // Visit all of the references in an object array.
-  template <typename Visitor>
-  static void VisitObjectArrayReferences(mirror::ObjectArray<mirror::Object>* array,
-                                         const Visitor& visitor)
-      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
-
-  // Visits the header and field references of a data object.
-  template <typename Visitor>
-  static void VisitOtherReferences(mirror::Class* klass, mirror::Object* obj,
-                                   const Visitor& visitor)
-      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_) {
-    return VisitInstanceFieldsReferences(klass, obj, visitor);
-  }
+  // Push a single reference on a mark stack.
+  void PushOnMarkStack(mirror::Object* obj);
 
   // Blackens objects grayed during a garbage collection.
   void ScanGrayObjects(bool paused, byte minimum_age)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  // Schedules an unmarked object for reference processing.
-  void DelayReferenceReferent(mirror::Class* klass, mirror::Object* reference)
-      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
-
   // Recursively blackens objects on the mark stack.
   void ProcessMarkStack(bool paused)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
@@ -317,27 +293,21 @@
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void EnqueueFinalizerReferences(mirror::Object** ref)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
-  void PreserveSomeSoftReferences(mirror::Object** ref)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
-  void ClearWhiteReferences(mirror::Object** list)
-      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
-
   // Used to get around thread safety annotations. The call is from MarkingPhase and is guarded by
   // IsExclusiveHeld.
   void RevokeAllThreadLocalAllocationStacks(Thread* self) NO_THREAD_SAFETY_ANALYSIS;
 
+  // Revoke all the thread-local buffers.
+  void RevokeAllThreadLocalBuffers();
+
   // Whether or not we count how many of each type of object were scanned.
   static const bool kCountScannedTypes = false;
 
   // Current space, we check this space first to avoid searching for the appropriate space for an
   // object.
-  accounting::SpaceBitmap* current_mark_bitmap_;
+  accounting::SpaceBitmap* current_space_bitmap_;
+  // Cache the heap's mark bitmap to prevent having to do 2 loads during slow path marking.
+  accounting::HeapBitmap* mark_bitmap_;
 
   accounting::ObjectStack* mark_stack_;
 
@@ -354,11 +324,14 @@
   AtomicInteger other_count_;
   AtomicInteger large_object_test_;
   AtomicInteger large_object_mark_;
-  AtomicInteger classes_marked_;
   AtomicInteger overhead_time_;
   AtomicInteger work_chunks_created_;
   AtomicInteger work_chunks_deleted_;
   AtomicInteger reference_count_;
+  AtomicInteger mark_null_count_;
+  AtomicInteger mark_immune_count_;
+  AtomicInteger mark_fastpath_count_;
+  AtomicInteger mark_slowpath_count_;
 
   // Verification.
   size_t live_stack_freeze_size_;
@@ -377,6 +350,7 @@
   friend class art::gc::Heap;
   friend class InternTableEntryIsUnmarked;
   friend class MarkIfReachesAllocspaceVisitor;
+  friend class MarkObjectVisitor;
   friend class ModUnionCheckReferences;
   friend class ModUnionClearCardVisitor;
   friend class ModUnionReferenceVisitor;
diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc
index 5b9c397..5faa3a1 100644
--- a/runtime/gc/collector/semi_space.cc
+++ b/runtime/gc/collector/semi_space.cc
@@ -97,18 +97,13 @@
 SemiSpace::SemiSpace(Heap* heap, bool generational, const std::string& name_prefix)
     : GarbageCollector(heap,
                        name_prefix + (name_prefix.empty() ? "" : " ") + "marksweep + semispace"),
-      mark_stack_(nullptr),
-      is_large_object_space_immune_(false),
       to_space_(nullptr),
-      to_space_live_bitmap_(nullptr),
       from_space_(nullptr),
-      self_(nullptr),
       generational_(generational),
       last_gc_to_space_end_(nullptr),
       bytes_promoted_(0),
       whole_heap_collection_(true),
-      whole_heap_collection_interval_counter_(0),
-      saved_bytes_(0) {
+      whole_heap_collection_interval_counter_(0) {
 }
 
 void SemiSpace::InitializePhase() {
@@ -214,7 +209,7 @@
             space->IsZygoteSpace() ? "UpdateAndMarkZygoteModUnionTable" :
                                      "UpdateAndMarkImageModUnionTable",
                                      &timings_);
-        table->UpdateAndMarkReferences(MarkObjectCallback, this);
+        table->UpdateAndMarkReferences(MarkHeapReferenceCallback, this);
       } else if (heap_->FindRememberedSetFromSpace(space) != nullptr) {
         DCHECK(kUseRememberedSet);
         // If a bump pointer space only collection, the non-moving
@@ -246,7 +241,8 @@
 class SemiSpaceScanObjectVisitor {
  public:
   explicit SemiSpaceScanObjectVisitor(SemiSpace* ss) : semi_space_(ss) {}
-  void operator()(Object* obj) const NO_THREAD_SAFETY_ANALYSIS {
+  void operator()(Object* obj) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
     // TODO: fix NO_THREAD_SAFETY_ANALYSIS. ScanObject() requires an
     // exclusive lock on the mutator lock, but
     // SpaceBitmap::VisitMarkedRange() only requires the shared lock.
@@ -263,22 +259,21 @@
   explicit SemiSpaceVerifyNoFromSpaceReferencesVisitor(space::ContinuousMemMapAllocSpace* from_space) :
       from_space_(from_space) {}
 
-  void operator()(Object* obj, Object* ref, const MemberOffset& offset, bool /* is_static */)
-      const ALWAYS_INLINE {
+  void operator()(Object* obj, MemberOffset offset, bool /* is_static */) const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) ALWAYS_INLINE {
+    mirror::Object* ref = obj->GetFieldObject<mirror::Object>(offset, false);
     if (from_space_->HasAddress(ref)) {
       Runtime::Current()->GetHeap()->DumpObject(LOG(INFO), obj);
     }
-    DCHECK(!from_space_->HasAddress(ref));
   }
  private:
   space::ContinuousMemMapAllocSpace* from_space_;
 };
 
 void SemiSpace::VerifyNoFromSpaceReferences(Object* obj) {
-  DCHECK(obj != NULL);
   DCHECK(!from_space_->HasAddress(obj)) << "Scanning object " << obj << " in from space";
   SemiSpaceVerifyNoFromSpaceReferencesVisitor visitor(from_space_);
-  MarkSweep::VisitObjectReferences(obj, visitor, kMovingClasses);
+  obj->VisitReferences<kMovingClasses>(visitor);
 }
 
 class SemiSpaceVerifyNoFromSpaceReferencesObjectVisitor {
@@ -313,7 +308,7 @@
       accounting::RememberedSet* rem_set = heap_->FindRememberedSetFromSpace(space);
       if (kUseRememberedSet) {
         DCHECK(rem_set != nullptr);
-        rem_set->UpdateAndMarkReferences(MarkObjectCallback, from_space_, this);
+        rem_set->UpdateAndMarkReferences(MarkHeapReferenceCallback, from_space_, this);
         if (kIsDebugBuild) {
           // Verify that there are no from-space references that
           // remain in the space, that is, the remembered set (and the
@@ -475,9 +470,9 @@
   memcpy(dest, src, page_remain);
   byte_src += page_remain;
   byte_dest += page_remain;
-  CHECK_ALIGNED(reinterpret_cast<uintptr_t>(byte_dest), kPageSize);
-  CHECK_ALIGNED(reinterpret_cast<uintptr_t>(byte_dest), sizeof(uintptr_t));
-  CHECK_ALIGNED(reinterpret_cast<uintptr_t>(byte_src), sizeof(uintptr_t));
+  DCHECK_ALIGNED(reinterpret_cast<uintptr_t>(byte_dest), kPageSize);
+  DCHECK_ALIGNED(reinterpret_cast<uintptr_t>(byte_dest), sizeof(uintptr_t));
+  DCHECK_ALIGNED(reinterpret_cast<uintptr_t>(byte_src), sizeof(uintptr_t));
   while (byte_src + kPageSize < limit) {
     bool all_zero = true;
     uintptr_t* word_dest = reinterpret_cast<uintptr_t*>(byte_dest);
@@ -582,17 +577,18 @@
 // Used to mark and copy objects. Any newly-marked objects who are in the from space get moved to
 // the to-space and have their forward address updated. Objects which have been newly marked are
 // pushed on the mark stack.
-Object* SemiSpace::MarkObject(Object* obj) {
+void SemiSpace::MarkObject(mirror::HeapReference<Object>* obj_ptr) {
+  Object* obj = obj_ptr->AsMirrorPtr();
+  if (obj == nullptr) {
+    return;
+  }
   if (kUseBrooksPointer) {
     // Verify all the objects have the correct forward pointer installed.
-    if (obj != nullptr) {
-      obj->AssertSelfBrooksPointer();
-    }
+    obj->AssertSelfBrooksPointer();
   }
-  Object* forward_address = obj;
-  if (obj != nullptr && !immune_region_.ContainsObject(obj)) {
+  if (!immune_region_.ContainsObject(obj)) {
     if (from_space_->HasAddress(obj)) {
-      forward_address = GetForwardingAddressInFromSpace(obj);
+      mirror::Object* forward_address = GetForwardingAddressInFromSpace(obj);
       // If the object has already been moved, return the new forward address.
       if (forward_address == nullptr) {
         forward_address = MarkNonForwardedObject(obj);
@@ -604,9 +600,10 @@
         // Push the object onto the mark stack for later processing.
         MarkStackPush(forward_address);
       }
-      // TODO: Do we need this if in the else statement?
+      obj_ptr->Assign(forward_address);
     } else {
-      accounting::SpaceBitmap* object_bitmap = heap_->GetMarkBitmap()->GetContinuousSpaceBitmap(obj);
+      accounting::SpaceBitmap* object_bitmap =
+          heap_->GetMarkBitmap()->GetContinuousSpaceBitmap(obj);
       if (LIKELY(object_bitmap != nullptr)) {
         if (generational_) {
           // If a bump pointer space only collection, we should not
@@ -615,9 +612,8 @@
           // the non-moving space is added to the immune space.
           DCHECK(whole_heap_collection_);
         }
-        // This object was not previously marked.
-        if (!object_bitmap->Test(obj)) {
-          object_bitmap->Set(obj);
+        if (!object_bitmap->Set(obj)) {
+          // This object was not previously marked.
           MarkStackPush(obj);
         }
       } else {
@@ -628,25 +624,30 @@
       }
     }
   }
-  return forward_address;
 }
 
 void SemiSpace::ProcessMarkStackCallback(void* arg) {
-  DCHECK(arg != nullptr);
   reinterpret_cast<SemiSpace*>(arg)->ProcessMarkStack();
 }
 
 mirror::Object* SemiSpace::MarkObjectCallback(mirror::Object* root, void* arg) {
-  DCHECK(root != nullptr);
-  DCHECK(arg != nullptr);
-  return reinterpret_cast<SemiSpace*>(arg)->MarkObject(root);
+  auto ref = mirror::HeapReference<mirror::Object>::FromMirrorPtr(root);
+  reinterpret_cast<SemiSpace*>(arg)->MarkObject(&ref);
+  return ref.AsMirrorPtr();
+}
+
+void SemiSpace::MarkHeapReferenceCallback(mirror::HeapReference<mirror::Object>* obj_ptr,
+                                          void* arg) {
+  reinterpret_cast<SemiSpace*>(arg)->MarkObject(obj_ptr);
 }
 
 void SemiSpace::MarkRootCallback(Object** root, void* arg, uint32_t /*thread_id*/,
                                  RootType /*root_type*/) {
-  DCHECK(root != nullptr);
-  DCHECK(arg != nullptr);
-  *root = reinterpret_cast<SemiSpace*>(arg)->MarkObject(*root);
+  auto ref = mirror::HeapReference<mirror::Object>::FromMirrorPtr(*root);
+  reinterpret_cast<SemiSpace*>(arg)->MarkObject(&ref);
+  if (*root != ref.AsMirrorPtr()) {
+    *root = ref.AsMirrorPtr();
+  }
 }
 
 // Marks all objects in the root set.
@@ -708,42 +709,35 @@
 
 // Process the "referent" field in a java.lang.ref.Reference.  If the referent has not yet been
 // marked, put it on the appropriate list in the heap for later processing.
-void SemiSpace::DelayReferenceReferent(mirror::Class* klass, Object* obj) {
-  heap_->DelayReferenceReferent(klass, obj->AsReference(), MarkedForwardingAddressCallback, this);
+void SemiSpace::DelayReferenceReferent(mirror::Class* klass, mirror::Reference* reference) {
+  heap_->DelayReferenceReferent(klass, reference, MarkedForwardingAddressCallback, this);
 }
 
 class SemiSpaceMarkObjectVisitor {
  public:
-  explicit SemiSpaceMarkObjectVisitor(SemiSpace* semi_space) : semi_space_(semi_space) {
+  explicit SemiSpaceMarkObjectVisitor(SemiSpace* collector) : collector_(collector) {
   }
 
-  void operator()(Object* obj, Object* ref, const MemberOffset& offset, bool /* is_static */)
-      const ALWAYS_INLINE NO_THREAD_SAFETY_ANALYSIS /* EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_) */ {
-    mirror::Object* new_address = semi_space_->MarkObject(ref);
-    if (new_address != ref) {
-      DCHECK(new_address != nullptr);
-      // Don't need to mark the card since we updating the object address and not changing the
-      // actual objects its pointing to. Using SetFieldObjectWithoutWriteBarrier is better in this
-      // case since it does not dirty cards and use additional memory.
-      // Since we do not change the actual object, we can safely use non-transactional mode. Also
-      // disable check as we could run inside a transaction.
-      obj->SetFieldObjectWithoutWriteBarrier<false, false, kVerifyNone>(offset, new_address, false);
-    }
+  void operator()(Object* obj, MemberOffset offset, bool /* is_static */) const ALWAYS_INLINE
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
+    collector_->MarkObject(obj->GetFieldObjectReferenceAddr(offset));
   }
+
+  void operator()(mirror::Class* klass, mirror::Reference* ref) const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
+    collector_->DelayReferenceReferent(klass, ref);
+  }
+
  private:
-  SemiSpace* const semi_space_;
+  SemiSpace* const collector_;
 };
 
 // Visit all of the references of an object and update.
 void SemiSpace::ScanObject(Object* obj) {
-  DCHECK(obj != NULL);
   DCHECK(!from_space_->HasAddress(obj)) << "Scanning object " << obj << " in from space";
   SemiSpaceMarkObjectVisitor visitor(this);
-  MarkSweep::VisitObjectReferences(obj, visitor, kMovingClasses);
-  mirror::Class* klass = obj->GetClass<kVerifyNone>();
-  if (UNLIKELY(klass->IsReferenceClass<kVerifyNone>())) {
-    DelayReferenceReferent(klass, obj);
-  }
+  obj->VisitReferences<kMovingClasses>(visitor, visitor);
 }
 
 // Scan anything that's on the mark stack.
@@ -858,6 +852,12 @@
   }
 }
 
+void SemiSpace::RevokeAllThreadLocalBuffers() {
+  timings_.StartSplit("(Paused)RevokeAllThreadLocalBuffers");
+  GetHeap()->RevokeAllThreadLocalBuffers();
+  timings_.EndSplit();
+}
+
 }  // namespace collector
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/collector/semi_space.h b/runtime/gc/collector/semi_space.h
index 08bfbc4..523c2ab 100644
--- a/runtime/gc/collector/semi_space.h
+++ b/runtime/gc/collector/semi_space.h
@@ -66,23 +66,24 @@
   // If true, use remembered sets in the generational mode.
   static constexpr bool kUseRememberedSet = true;
 
-  explicit SemiSpace(Heap* heap, bool generational = false,
-                     const std::string& name_prefix = "");
+  explicit SemiSpace(Heap* heap, bool generational = false, const std::string& name_prefix = "");
 
   ~SemiSpace() {}
 
-  virtual void InitializePhase();
-  virtual bool IsConcurrent() const {
-    return false;
-  }
-  virtual void MarkingPhase() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
-  virtual void ReclaimPhase() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
-  virtual void FinishPhase() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
-  virtual void MarkReachableObjects()
+  virtual void InitializePhase() OVERRIDE;
+  virtual void MarkingPhase() OVERRIDE EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
+      LOCKS_EXCLUDED(Locks::heap_bitmap_lock_);
+  virtual void ReclaimPhase() OVERRIDE EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
+      LOCKS_EXCLUDED(Locks::heap_bitmap_lock_);
+  virtual void FinishPhase() OVERRIDE EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void MarkReachableObjects()
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
-  virtual GcType GetGcType() const {
+  virtual GcType GetGcType() const OVERRIDE {
     return kGcTypePartial;
   }
+  virtual CollectorType GetCollectorType() const OVERRIDE {
+    return generational_ ? kCollectorTypeGSS : kCollectorTypeSS;
+  }
 
   // Sets which space we will be copying objects to.
   void SetToSpace(space::ContinuousMemMapAllocSpace* to_space);
@@ -97,11 +98,13 @@
   void FindDefaultMarkBitmap();
 
   // Returns the new address of the object.
-  mirror::Object* MarkObject(mirror::Object* object)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
+  void MarkObject(mirror::HeapReference<mirror::Object>* obj_ptr)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void ScanObject(mirror::Object* obj)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void VerifyNoFromSpaceReferences(mirror::Object* obj)
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
@@ -112,12 +115,13 @@
 
   // Bind the live bits to the mark bits of bitmaps for spaces that are never collected, ie
   // the image. Mark that portion of the heap as immune.
-  virtual void BindBitmaps() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  virtual void BindBitmaps() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      LOCKS_EXCLUDED(Locks::heap_bitmap_lock_);
 
   void UnBindBitmaps()
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
-  void ProcessReferences(Thread* self)
+  void ProcessReferences(Thread* self) EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Sweeps unmarked objects to complete the garbage collection.
@@ -126,22 +130,9 @@
   // Sweeps unmarked objects to complete the garbage collection.
   void SweepLargeObjects(bool swap_bitmaps) EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
-  // Sweep only pointers within an array. WARNING: Trashes objects.
-  void SweepArray(accounting::ObjectStack* allocation_stack_, bool swap_bitmaps)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
-
-  // TODO: enable thread safety analysis when in use by multiple worker threads.
-  template <typename MarkVisitor>
-  void ScanObjectVisit(const mirror::Object* obj, const MarkVisitor& visitor)
-      NO_THREAD_SAFETY_ANALYSIS;
-
   void SweepSystemWeaks()
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
 
-  template <typename Visitor>
-  static void VisitObjectReferencesAndClass(mirror::Object* obj, const Visitor& visitor)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
-
   static void MarkRootCallback(mirror::Object** root, void* arg, uint32_t /*tid*/,
                                RootType /*root_type*/)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
@@ -149,24 +140,36 @@
   static mirror::Object* MarkObjectCallback(mirror::Object* root, void* arg)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
 
+  static void MarkHeapReferenceCallback(mirror::HeapReference<mirror::Object>* obj_ptr, void* arg)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
+
   static void ProcessMarkStackCallback(void* arg)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
 
   virtual mirror::Object* MarkNonForwardedObject(mirror::Object* obj)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  // Schedules an unmarked object for reference processing.
+  void DelayReferenceReferent(mirror::Class* klass, mirror::Reference* reference)
+      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
 
  protected:
   // Returns null if the object is not marked, otherwise returns the forwarding address (same as
   // object for non movable things).
-  mirror::Object* GetMarkedForwardAddress(mirror::Object* object) const;
+  mirror::Object* GetMarkedForwardAddress(mirror::Object* object) const
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
   static mirror::Object* MarkedForwardingAddressCallback(mirror::Object* object, void* arg)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
   // Marks or unmarks a large object based on whether or not set is true. If set is true, then we
   // mark, otherwise we unmark.
   bool MarkLargeObject(const mirror::Object* obj)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Expand mark stack to 2x its current size.
   void ResizeMarkStack(size_t new_size);
@@ -174,70 +177,17 @@
   // Returns true if we should sweep the space.
   virtual bool ShouldSweepSpace(space::ContinuousSpace* space) const;
 
-  static void VerifyRootCallback(const mirror::Object* root, void* arg, size_t vreg,
-                                 const StackVisitor *visitor);
-
-  void VerifyRoot(const mirror::Object* root, size_t vreg, const StackVisitor* visitor)
-      NO_THREAD_SAFETY_ANALYSIS;
-
-  template <typename Visitor>
-  static void VisitInstanceFieldsReferences(const mirror::Class* klass, const mirror::Object* obj,
-                                            const Visitor& visitor)
-      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
-
-  // Visit the header, static field references, and interface pointers of a class object.
-  template <typename Visitor>
-  static void VisitClassReferences(const mirror::Class* klass, const mirror::Object* obj,
-                                   const Visitor& visitor)
-      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
-
-  template <typename Visitor>
-  static void VisitStaticFieldsReferences(const mirror::Class* klass, const Visitor& visitor)
-      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
-
-  template <typename Visitor>
-  static void VisitFieldsReferences(const mirror::Object* obj, uint32_t ref_offsets, bool is_static,
-                                    const Visitor& visitor)
-      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
-
-  // Visit all of the references in an object array.
-  template <typename Visitor>
-  static void VisitObjectArrayReferences(const mirror::ObjectArray<mirror::Object>* array,
-                                         const Visitor& visitor)
-      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
-
-  // Visits the header and field references of a data object.
-  template <typename Visitor>
-  static void VisitOtherReferences(const mirror::Class* klass, const mirror::Object* obj,
-                                   const Visitor& visitor)
-      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_) {
-    return VisitInstanceFieldsReferences(klass, obj, visitor);
-  }
-
   // Push an object onto the mark stack.
-  inline void MarkStackPush(mirror::Object* obj);
+  void MarkStackPush(mirror::Object* obj);
 
   void UpdateAndMarkModUnion()
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  // Schedules an unmarked object for reference processing.
-  void DelayReferenceReferent(mirror::Class* klass, mirror::Object* reference)
-      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
-
   // Recursively blackens objects on the mark stack.
   void ProcessMarkStack()
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
 
-  void EnqueueFinalizerReferences(mirror::Object** ref)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
-
-  void PreserveSomeSoftReferences(mirror::Object** ref)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
-
-  void ClearWhiteReferences(mirror::Object** list)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
-
   void ProcessReferences(mirror::Object** soft_references, bool clear_soft_references,
                          mirror::Object** weak_references,
                          mirror::Object** finalizer_references,
@@ -246,6 +196,9 @@
 
   inline mirror::Object* GetForwardingAddressInFromSpace(mirror::Object* obj) const;
 
+  // Revoke all the thread-local buffers.
+  void RevokeAllThreadLocalBuffers();
+
   // Current space, we check this space first to avoid searching for the appropriate space for an
   // object.
   accounting::ObjectStack* mark_stack_;
diff --git a/runtime/gc/heap-inl.h b/runtime/gc/heap-inl.h
index 6cc44c9..8bfe793 100644
--- a/runtime/gc/heap-inl.h
+++ b/runtime/gc/heap-inl.h
@@ -118,11 +118,11 @@
   } else {
     DCHECK(!Dbg::IsAllocTrackingEnabled());
   }
-  // concurrent_gc_ isn't known at compile time so we can optimize by not checking it for
+  // IsConcurrentGc() isn't known at compile time so we can optimize by not checking it for
   // the BumpPointer or TLAB allocators. This is nice since it allows the entire if statement to be
   // optimized out. And for the other allocators, AllocatorMayHaveConcurrentGC is a constant since
   // the allocator_type should be constant propagated.
-  if (AllocatorMayHaveConcurrentGC(allocator) && concurrent_gc_) {
+  if (AllocatorMayHaveConcurrentGC(allocator) && IsGcConcurrent()) {
     CheckConcurrentGC(self, new_num_bytes_allocated, &obj);
   }
   VerifyObject(obj);
@@ -276,7 +276,7 @@
     if (UNLIKELY(new_footprint > growth_limit_)) {
       return true;
     }
-    if (!AllocatorMayHaveConcurrentGC(allocator_type) || !concurrent_gc_) {
+    if (!AllocatorMayHaveConcurrentGC(allocator_type) || !IsGcConcurrent()) {
       if (!kGrow) {
         return true;
       }
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index a763e37..6c3ae5e 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -88,7 +88,6 @@
       rosalloc_space_(nullptr),
       dlmalloc_space_(nullptr),
       main_space_(nullptr),
-      concurrent_gc_(false),
       collector_type_(kCollectorTypeNone),
       post_zygote_collector_type_(post_zygote_collector_type),
       background_collector_type_(background_collector_type),
@@ -277,7 +276,8 @@
   // Card cache for now since it makes it easier for us to update the references to the copying
   // spaces.
   accounting::ModUnionTable* mod_union_table =
-      new accounting::ModUnionTableCardCache("Image mod-union table", this, GetImageSpace());
+      new accounting::ModUnionTableToZygoteAllocspace("Image mod-union table", this,
+                                                      GetImageSpace());
   CHECK(mod_union_table != nullptr) << "Failed to create image mod-union table";
   AddModUnionTable(mod_union_table);
 
@@ -774,9 +774,9 @@
                              IsMarkedCallback* is_marked_callback,
                              MarkObjectCallback* mark_object_callback,
                              ProcessMarkStackCallback* process_mark_stack_callback, void* arg) {
+  timings.StartSplit("(Paused)ProcessReferences");
   ProcessSoftReferences(timings, clear_soft, is_marked_callback, mark_object_callback,
                         process_mark_stack_callback, arg);
-  timings.StartSplit("(Paused)ProcessReferences");
   // Clear all remaining soft and weak references with white referents.
   soft_reference_queue_.ClearWhiteReferences(cleared_references_, is_marked_callback, arg);
   weak_reference_queue_.ClearWhiteReferences(cleared_references_, is_marked_callback, arg);
@@ -1018,20 +1018,20 @@
     }
     if (search_allocation_stack) {
       if (sorted) {
-        if (allocation_stack_->ContainsSorted(const_cast<mirror::Object*>(obj))) {
+        if (allocation_stack_->ContainsSorted(obj)) {
           return true;
         }
-      } else if (allocation_stack_->Contains(const_cast<mirror::Object*>(obj))) {
+      } else if (allocation_stack_->Contains(obj)) {
         return true;
       }
     }
 
     if (search_live_stack) {
       if (sorted) {
-        if (live_stack_->ContainsSorted(const_cast<mirror::Object*>(obj))) {
+        if (live_stack_->ContainsSorted(obj)) {
           return true;
         }
-      } else if (live_stack_->Contains(const_cast<mirror::Object*>(obj))) {
+      } else if (live_stack_->Contains(obj)) {
         return true;
       }
     }
@@ -1101,8 +1101,12 @@
   GetLiveBitmap()->Walk(Heap::VerificationCallback, this);
 }
 
-void Heap::RecordFree(size_t freed_objects, size_t freed_bytes) {
-  DCHECK_LE(freed_bytes, num_bytes_allocated_.Load());
+void Heap::RecordFree(ssize_t freed_objects, ssize_t freed_bytes) {
+  // Use signed comparison since freed bytes can be negative when background compaction foreground
+  // transitions occurs. This is caused by the moving objects from a bump pointer space to a
+  // free list backed space typically increasing memory footprint due to padding and binning.
+  DCHECK_LE(freed_bytes, static_cast<ssize_t>(num_bytes_allocated_.Load()));
+  DCHECK_GE(freed_objects, 0);
   num_bytes_allocated_.FetchAndSub(freed_bytes);
   if (Runtime::Current()->HasStatsEnabled()) {
     RuntimeStats* thread_stats = Thread::Current()->GetStats();
@@ -1298,15 +1302,16 @@
   // For bitmap Visit.
   // TODO: Fix lock analysis to not use NO_THREAD_SAFETY_ANALYSIS, requires support for
   // annotalysis on visitors.
-  void operator()(const mirror::Object* o) const NO_THREAD_SAFETY_ANALYSIS {
-    collector::MarkSweep::VisitObjectReferences(const_cast<mirror::Object*>(o), *this, true);
+  void operator()(mirror::Object* o) const NO_THREAD_SAFETY_ANALYSIS {
+    o->VisitReferences<true>(*this);
   }
 
   // For MarkSweep::VisitObjectReferences.
-  void operator()(mirror::Object* referrer, mirror::Object* object,
-                  const MemberOffset&, bool) const {
-    if (object == object_ && (max_count_ == 0 || referring_objects_.size() < max_count_)) {
-      referring_objects_.push_back(referrer);
+  void operator()(mirror::Object* obj, MemberOffset offset, bool /* is_static */) const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    mirror::Object* ref = obj->GetFieldObject<mirror::Object>(offset, false);
+    if (ref == object_ && (max_count_ == 0 || referring_objects_.size() < max_count_)) {
+      referring_objects_.push_back(obj);
     }
   }
 
@@ -1443,10 +1448,8 @@
     collector_type_ = collector_type;
     gc_plan_.clear();
     switch (collector_type_) {
-      case kCollectorTypeSS:
-        // Fall-through.
+      case kCollectorTypeSS:  // Fall-through.
       case kCollectorTypeGSS: {
-        concurrent_gc_ = false;
         gc_plan_.push_back(collector::kGcTypeFull);
         if (use_tlab_) {
           ChangeAllocator(kAllocatorTypeTLAB);
@@ -1456,7 +1459,6 @@
         break;
       }
       case kCollectorTypeMS: {
-        concurrent_gc_ = false;
         gc_plan_.push_back(collector::kGcTypeSticky);
         gc_plan_.push_back(collector::kGcTypePartial);
         gc_plan_.push_back(collector::kGcTypeFull);
@@ -1464,7 +1466,6 @@
         break;
       }
       case kCollectorTypeCMS: {
-        concurrent_gc_ = true;
         gc_plan_.push_back(collector::kGcTypeSticky);
         gc_plan_.push_back(collector::kGcTypePartial);
         gc_plan_.push_back(collector::kGcTypeFull);
@@ -1475,7 +1476,7 @@
         LOG(FATAL) << "Unimplemented";
       }
     }
-    if (concurrent_gc_) {
+    if (IsGcConcurrent()) {
       concurrent_start_bytes_ =
           std::max(max_allowed_footprint_, kMinConcurrentRemainingBytes) - kMinConcurrentRemainingBytes;
     } else {
@@ -1809,7 +1810,7 @@
   } else if (current_allocator_ == kAllocatorTypeRosAlloc ||
       current_allocator_ == kAllocatorTypeDlMalloc) {
     for (const auto& cur_collector : garbage_collectors_) {
-      if (cur_collector->IsConcurrent() == concurrent_gc_ &&
+      if (cur_collector->GetCollectorType() == collector_type_ &&
           cur_collector->GetGcType() == gc_type) {
         collector = cur_collector;
         break;
@@ -1819,8 +1820,8 @@
     LOG(FATAL) << "Invalid current allocator " << current_allocator_;
   }
   CHECK(collector != nullptr)
-      << "Could not find garbage collector with concurrent=" << concurrent_gc_
-      << " and type=" << gc_type;
+      << "Could not find garbage collector with collector_type="
+      << static_cast<size_t>(collector_type_) << " and gc_type=" << gc_type;
   ATRACE_BEGIN(StringPrintf("%s %s GC", PrettyCause(gc_cause), collector->GetName()).c_str());
   if (!clear_soft_references) {
     clear_soft_references = gc_type != collector::kGcTypeSticky;  // TODO: GSS?
@@ -1910,10 +1911,18 @@
     return failed_;
   }
 
-  // TODO: Fix lock analysis to not use NO_THREAD_SAFETY_ANALYSIS, requires support for smarter
-  // analysis on visitors.
-  void operator()(mirror::Object* obj, mirror::Object* ref,
-                  const MemberOffset& offset, bool /* is_static */) const
+  void operator()(mirror::Class* klass, mirror::Reference* ref) const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    this->operator()(ref, mirror::Reference::ReferentOffset(), false);
+  }
+
+  void operator()(mirror::Object* obj, MemberOffset offset, bool /* static */) const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    this->operator()(obj, obj->GetFieldObject<mirror::Object>(offset, false), offset);
+  }
+
+  // TODO: Fix the no thread safety analysis.
+  void operator()(mirror::Object* obj, mirror::Object* ref, MemberOffset offset) const
       NO_THREAD_SAFETY_ANALYSIS {
     if (ref == nullptr || IsLive(ref)) {
       // Verify that the reference is live.
@@ -2014,7 +2023,7 @@
   static void VerifyRoots(mirror::Object** root, void* arg, uint32_t /*thread_id*/,
                           RootType /*root_type*/) {
     VerifyReferenceVisitor* visitor = reinterpret_cast<VerifyReferenceVisitor*>(arg);
-    (*visitor)(nullptr, *root, MemberOffset(0), true);
+    (*visitor)(nullptr, *root, MemberOffset(0));
   }
 
  private:
@@ -2033,11 +2042,7 @@
     // be live or else how did we find it in the live bitmap?
     VerifyReferenceVisitor visitor(heap_);
     // The class doesn't count as a reference but we should verify it anyways.
-    collector::MarkSweep::VisitObjectReferences(obj, visitor, true);
-    if (obj->IsReferenceInstance()) {
-      mirror::Reference* ref = obj->AsReference();
-      visitor(obj, ref->GetReferent(), mirror::Reference::ReferentOffset(), false);
-    }
+    obj->VisitReferences<true>(visitor, visitor);
     failed_ = failed_ || visitor.Failed();
   }
 
@@ -2102,11 +2107,12 @@
 
   // TODO: Fix lock analysis to not use NO_THREAD_SAFETY_ANALYSIS, requires support for
   // annotalysis on visitors.
-  void operator()(mirror::Object* obj, mirror::Object* ref, const MemberOffset& offset,
-                  bool is_static) const NO_THREAD_SAFETY_ANALYSIS {
+  void operator()(mirror::Object* obj, MemberOffset offset, bool is_static) const
+      NO_THREAD_SAFETY_ANALYSIS {
+    mirror::Object* ref = obj->GetFieldObject<mirror::Object>(offset, false);
     // Filter out class references since changing an object's class does not mark the card as dirty.
     // Also handles large objects, since the only reference they hold is a class reference.
-    if (ref != NULL && !ref->IsClass()) {
+    if (ref != nullptr && !ref->IsClass()) {
       accounting::CardTable* card_table = heap_->GetCardTable();
       // If the object is not dirty and it is referencing something in the live stack other than
       // class, then it must be on a dirty card.
@@ -2118,8 +2124,8 @@
         // Card should be either kCardDirty if it got re-dirtied after we aged it, or
         // kCardDirty - 1 if it didnt get touched since we aged it.
         accounting::ObjectStack* live_stack = heap_->live_stack_.get();
-        if (live_stack->ContainsSorted(const_cast<mirror::Object*>(ref))) {
-          if (live_stack->ContainsSorted(const_cast<mirror::Object*>(obj))) {
+        if (live_stack->ContainsSorted(ref)) {
+          if (live_stack->ContainsSorted(obj)) {
             LOG(ERROR) << "Object " << obj << " found in live stack";
           }
           if (heap_->GetLiveBitmap()->Test(obj)) {
@@ -2173,7 +2179,7 @@
   void operator()(mirror::Object* obj) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
     VerifyReferenceCardVisitor visitor(heap_, const_cast<bool*>(&failed_));
-    collector::MarkSweep::VisitObjectReferences(const_cast<mirror::Object*>(obj), visitor, true);
+    obj->VisitReferences<true>(visitor);
   }
 
   bool Failed() const {
@@ -2229,6 +2235,14 @@
   }
 }
 
+void Heap::AssertAllBumpPointerSpaceThreadLocalBuffersAreRevoked() {
+  if (kIsDebugBuild) {
+    if (bump_pointer_space_ != nullptr) {
+      bump_pointer_space_->AssertAllThreadLocalBuffersAreRevoked();
+    }
+  }
+}
+
 accounting::ModUnionTable* Heap::FindModUnionTableFromSpace(space::Space* space) {
   auto it = mod_union_tables_.find(space);
   if (it == mod_union_tables_.end()) {
@@ -2274,8 +2288,7 @@
   }
 }
 
-static mirror::Object* IdentityMarkObjectCallback(mirror::Object* obj, void*) {
-  return obj;
+static void IdentityMarkHeapReferenceCallback(mirror::HeapReference<mirror::Object>*, void*) {
 }
 
 void Heap::PreGcVerification(collector::GarbageCollector* gc) {
@@ -2313,7 +2326,7 @@
     ReaderMutexLock reader_lock(self, *Locks::heap_bitmap_lock_);
     for (const auto& table_pair : mod_union_tables_) {
       accounting::ModUnionTable* mod_union_table = table_pair.second;
-      mod_union_table->UpdateAndMarkReferences(IdentityMarkObjectCallback, nullptr);
+      mod_union_table->UpdateAndMarkReferences(IdentityMarkHeapReferenceCallback, nullptr);
       mod_union_table->Verify();
     }
     thread_list->ResumeAll();
@@ -2488,7 +2501,7 @@
   }
   if (!ignore_max_footprint_) {
     SetIdealFootprint(target_size);
-    if (concurrent_gc_) {
+    if (IsGcConcurrent()) {
       // Calculate when to perform the next ConcurrentGC.
       // Calculate the estimated GC duration.
       const double gc_duration_seconds = NsToMs(gc_duration) / 1000.0;
@@ -2651,6 +2664,12 @@
   }
 }
 
+void Heap::RevokeRosAllocThreadLocalBuffers(Thread* thread) {
+  if (rosalloc_space_ != nullptr) {
+    rosalloc_space_->RevokeThreadLocalBuffers(thread);
+  }
+}
+
 void Heap::RevokeAllThreadLocalBuffers() {
   if (rosalloc_space_ != nullptr) {
     rosalloc_space_->RevokeAllThreadLocalBuffers();
@@ -2708,7 +2727,7 @@
       // finalizers released native managed allocations.
       UpdateMaxNativeFootprint();
     } else if (!IsGCRequestPending()) {
-      if (concurrent_gc_) {
+      if (IsGcConcurrent()) {
         RequestConcurrentGC(self);
       } else {
         CollectGarbageInternal(gc_type, kGcCauseForAlloc, false);
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index eb53ba9..60b8450 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -353,7 +353,7 @@
 
   // Freed bytes can be negative in cases where we copy objects from a compacted space to a
   // free-list backed space.
-  void RecordFree(size_t freed_objects, size_t freed_bytes);
+  void RecordFree(ssize_t freed_objects, ssize_t freed_bytes);
 
   // Must be called if a field of an Object in the heap changes, and before any GC safe-point.
   // The call is not needed if NULL is stored in the field.
@@ -437,7 +437,9 @@
   void Trim() LOCKS_EXCLUDED(heap_trim_request_lock_);
 
   void RevokeThreadLocalBuffers(Thread* thread);
+  void RevokeRosAllocThreadLocalBuffers(Thread* thread);
   void RevokeAllThreadLocalBuffers();
+  void AssertAllBumpPointerSpaceThreadLocalBuffersAreRevoked();
 
   void PreGcRosAllocVerification(TimingLogger* timings)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -553,6 +555,9 @@
   void RemoveRememberedSet(space::Space* space);
 
   bool IsCompilingBoot() const;
+  bool RunningOnValgrind() const {
+    return running_on_valgrind_;
+  }
   bool HasImageSpace() const;
 
  private:
@@ -680,6 +685,12 @@
   // Push an object onto the allocation stack.
   void PushOnAllocationStack(Thread* self, mirror::Object* obj);
 
+  // What kind of concurrency behavior is the runtime after? Currently true for concurrent mark
+  // sweep GC, false for other GC types.
+  bool IsGcConcurrent() const ALWAYS_INLINE {
+    return collector_type_ == kCollectorTypeCMS;
+  }
+
   // All-known continuous spaces, where objects lie within fixed bounds.
   std::vector<space::ContinuousSpace*> continuous_spaces_;
 
@@ -722,10 +733,6 @@
   // The mem-map which we will use for the non-moving space after the zygote is done forking:
   UniquePtr<MemMap> post_zygote_non_moving_space_mem_map_;
 
-  // What kind of concurrency behavior is the runtime after? Currently true for concurrent mark
-  // sweep GC, false for other GC types.
-  bool concurrent_gc_;
-
   // The current collector type.
   CollectorType collector_type_;
   // Which collector we will switch to after zygote fork.
diff --git a/runtime/gc/space/bump_pointer_space.cc b/runtime/gc/space/bump_pointer_space.cc
index fcd3b70..6148894 100644
--- a/runtime/gc/space/bump_pointer_space.cc
+++ b/runtime/gc/space/bump_pointer_space.cc
@@ -104,6 +104,26 @@
   }
 }
 
+void BumpPointerSpace::AssertThreadLocalBuffersAreRevoked(Thread* thread) {
+  if (kIsDebugBuild) {
+    MutexLock mu(Thread::Current(), block_lock_);
+    DCHECK(!thread->HasTlab());
+  }
+}
+
+void BumpPointerSpace::AssertAllThreadLocalBuffersAreRevoked() {
+  if (kIsDebugBuild) {
+    Thread* self = Thread::Current();
+    MutexLock mu(self, *Locks::runtime_shutdown_lock_);
+    MutexLock mu2(self, *Locks::thread_list_lock_);
+    // TODO: Not do a copy of the thread list?
+    std::list<Thread*> thread_list = Runtime::Current()->GetThreadList()->GetList();
+    for (Thread* thread : thread_list) {
+      AssertThreadLocalBuffersAreRevoked(thread);
+    }
+  }
+}
+
 void BumpPointerSpace::UpdateMainBlock() {
   DCHECK_EQ(num_blocks_, 0U);
   main_block_size_ = Size();
diff --git a/runtime/gc/space/bump_pointer_space.h b/runtime/gc/space/bump_pointer_space.h
index 031fccd..3ab5df4 100644
--- a/runtime/gc/space/bump_pointer_space.h
+++ b/runtime/gc/space/bump_pointer_space.h
@@ -103,6 +103,9 @@
   void RevokeThreadLocalBuffers(Thread* thread) LOCKS_EXCLUDED(block_lock_);
   void RevokeAllThreadLocalBuffers() LOCKS_EXCLUDED(Locks::runtime_shutdown_lock_,
                                                     Locks::thread_list_lock_);
+  void AssertThreadLocalBuffersAreRevoked(Thread* thread) LOCKS_EXCLUDED(block_lock_);
+  void AssertAllThreadLocalBuffersAreRevoked() LOCKS_EXCLUDED(Locks::runtime_shutdown_lock_,
+                                                              Locks::thread_list_lock_);
 
   uint64_t GetBytesAllocated() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   uint64_t GetObjectsAllocated() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
diff --git a/runtime/gc/space/dlmalloc_space.cc b/runtime/gc/space/dlmalloc_space.cc
index 0597422..30c2edb 100644
--- a/runtime/gc/space/dlmalloc_space.cc
+++ b/runtime/gc/space/dlmalloc_space.cc
@@ -60,7 +60,7 @@
 
   // Everything is set so record in immutable structure and leave
   byte* begin = mem_map->Begin();
-  if (RUNNING_ON_VALGRIND > 0) {
+  if (Runtime::Current()->GetHeap()->RunningOnValgrind()) {
     return new ValgrindMallocSpace<DlMallocSpace, void*>(
         name, mem_map, mspace, begin, end, begin + capacity, growth_limit, initial_size);
   } else {
diff --git a/runtime/gc/space/rosalloc_space.cc b/runtime/gc/space/rosalloc_space.cc
index 80c7ca7..3c65205 100644
--- a/runtime/gc/space/rosalloc_space.cc
+++ b/runtime/gc/space/rosalloc_space.cc
@@ -65,7 +65,7 @@
   byte* begin = mem_map->Begin();
   // TODO: Fix RosAllocSpace to support valgrind. There is currently some issues with
   // AllocationSize caused by redzones. b/12944686
-  if (false && RUNNING_ON_VALGRIND > 0) {
+  if (false && Runtime::Current()->GetHeap()->RunningOnValgrind()) {
     return new ValgrindMallocSpace<RosAllocSpace, allocator::RosAlloc*>(
         name, mem_map, rosalloc, begin, end, begin + capacity, growth_limit, initial_size);
   } else {
@@ -308,6 +308,12 @@
   rosalloc_->RevokeAllThreadLocalRuns();
 }
 
+void RosAllocSpace::AssertAllThreadLocalBuffersAreRevoked() {
+  if (kIsDebugBuild) {
+    rosalloc_->AssertAllThreadLocalRunsAreRevoked();
+  }
+}
+
 void RosAllocSpace::Clear() {
   madvise(GetMemMap()->Begin(), GetMemMap()->Size(), MADV_DONTNEED);
   GetLiveBitmap()->Clear();
diff --git a/runtime/gc/space/rosalloc_space.h b/runtime/gc/space/rosalloc_space.h
index 9b9adf8..949ec08 100644
--- a/runtime/gc/space/rosalloc_space.h
+++ b/runtime/gc/space/rosalloc_space.h
@@ -89,6 +89,7 @@
 
   void RevokeThreadLocalBuffers(Thread* thread);
   void RevokeAllThreadLocalBuffers();
+  void AssertAllThreadLocalBuffersAreRevoked();
 
   // Returns the class of a recently freed object.
   mirror::Class* FindRecentFreedObject(const mirror::Object* obj);
diff --git a/runtime/intern_table.cc b/runtime/intern_table.cc
index 524798d..dfc82dd 100644
--- a/runtime/intern_table.cc
+++ b/runtime/intern_table.cc
@@ -242,15 +242,15 @@
   return InsertWeak(s, hash_code);
 }
 
-mirror::String* InternTable::InternStrong(int32_t utf16_length,
-                                          const char* utf8_data) {
+mirror::String* InternTable::InternStrong(int32_t utf16_length, const char* utf8_data) {
+  DCHECK(utf8_data != nullptr);
   return InternStrong(mirror::String::AllocFromModifiedUtf8(
       Thread::Current(), utf16_length, utf8_data));
 }
 
 mirror::String* InternTable::InternStrong(const char* utf8_data) {
-  return InternStrong(
-      mirror::String::AllocFromModifiedUtf8(Thread::Current(), utf8_data));
+  DCHECK(utf8_data != nullptr);
+  return InternStrong(mirror::String::AllocFromModifiedUtf8(Thread::Current(), utf8_data));
 }
 
 mirror::String* InternTable::InternStrong(mirror::String* s) {
diff --git a/runtime/interpreter/interpreter_common.cc b/runtime/interpreter/interpreter_common.cc
index e8cea9d..297f1a8 100644
--- a/runtime/interpreter/interpreter_common.cc
+++ b/runtime/interpreter/interpreter_common.cc
@@ -30,9 +30,10 @@
                                   size_t dest_reg, size_t src_reg)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   // If both register locations contains the same value, the register probably holds a reference.
-  int32_t src_value = shadow_frame.GetVReg(src_reg);
+  // Uint required, so that sign extension does not make this wrong on 64b systems
+  uint32_t src_value = shadow_frame.GetVReg(src_reg);
   mirror::Object* o = shadow_frame.GetVRegReference<kVerifyNone>(src_reg);
-  if (src_value == reinterpret_cast<intptr_t>(o)) {
+  if (src_value == reinterpret_cast<uintptr_t>(o)) {
     new_shadow_frame->SetVRegReference(dest_reg, o);
   } else {
     new_shadow_frame->SetVReg(dest_reg, src_value);
diff --git a/runtime/jni_internal.cc b/runtime/jni_internal.cc
index 43db7ec..13aa77f 100644
--- a/runtime/jni_internal.cc
+++ b/runtime/jni_internal.cc
@@ -550,24 +550,16 @@
     return soa.AddLocalReference<jclass>(c);
   }
 
-  static jmethodID FromReflectedMethod(JNIEnv* env, jobject java_method) {
-    CHECK_NON_NULL_ARGUMENT(FromReflectedMethod, java_method);
+  static jmethodID FromReflectedMethod(JNIEnv* env, jobject jlr_method) {
+    CHECK_NON_NULL_ARGUMENT(FromReflectedMethod, jlr_method);
     ScopedObjectAccess soa(env);
-    jobject art_method = env->GetObjectField(
-        java_method, WellKnownClasses::java_lang_reflect_AbstractMethod_artMethod);
-    mirror::ArtMethod* method = soa.Decode<mirror::ArtMethod*>(art_method);
-    DCHECK(method != nullptr);
-    return soa.EncodeMethod(method);
+    return soa.EncodeMethod(mirror::ArtMethod::FromReflectedMethod(soa, jlr_method));
   }
 
-  static jfieldID FromReflectedField(JNIEnv* env, jobject java_field) {
-    CHECK_NON_NULL_ARGUMENT(FromReflectedField, java_field);
+  static jfieldID FromReflectedField(JNIEnv* env, jobject jlr_field) {
+    CHECK_NON_NULL_ARGUMENT(FromReflectedField, jlr_field);
     ScopedObjectAccess soa(env);
-    jobject art_field = env->GetObjectField(java_field,
-                                            WellKnownClasses::java_lang_reflect_Field_artField);
-    mirror::ArtField* field = soa.Decode<mirror::ArtField*>(art_field);
-    DCHECK(field != nullptr);
-    return soa.EncodeField(field);
+    return soa.EncodeField(mirror::ArtField::FromReflectedField(soa, jlr_field));
   }
 
   static jobject ToReflectedMethod(JNIEnv* env, jclass, jmethodID mid, jboolean) {
diff --git a/runtime/mem_map.cc b/runtime/mem_map.cc
index 5647d93..1594338 100644
--- a/runtime/mem_map.cc
+++ b/runtime/mem_map.cc
@@ -47,7 +47,10 @@
 }
 
 #if defined(__LP64__) && !defined(__x86_64__)
-uintptr_t MemMap::next_mem_pos_ = kPageSize * 2;   // first page to check for low-mem extent
+// Where to start with low memory allocation.
+static constexpr uintptr_t LOW_MEM_START = kPageSize * 2;
+
+uintptr_t MemMap::next_mem_pos_ = LOW_MEM_START;   // first page to check for low-mem extent
 #endif
 
 static bool CheckMapRequest(byte* expected_ptr, void* actual_ptr, size_t byte_count,
@@ -122,6 +125,9 @@
   int flags = MAP_PRIVATE | MAP_ANONYMOUS;
 #endif
 
+  // We need to store and potentially set an error number for pretty printing of errors
+  int saved_errno = 0;
+
   // TODO:
   // A page allocator would be a useful abstraction here, as
   // 1) It is doubtful that MAP_32BIT on x86_64 is doing the right job for us
@@ -129,11 +135,25 @@
 #if defined(__LP64__) && !defined(__x86_64__)
   // MAP_32BIT only available on x86_64.
   void* actual = MAP_FAILED;
-  std::string strerr;
   if (low_4gb && expected == nullptr) {
     flags |= MAP_FIXED;
 
+    bool first_run = true;
+
     for (uintptr_t ptr = next_mem_pos_; ptr < 4 * GB; ptr += kPageSize) {
+      if (4U * GB - ptr < page_aligned_byte_count) {
+        // Not enough memory until 4GB.
+        if (first_run) {
+          // Try another time from the bottom;
+          ptr = LOW_MEM_START - kPageSize;
+          first_run = false;
+          continue;
+        } else {
+          // Second try failed.
+          break;
+        }
+      }
+
       uintptr_t tail_ptr;
 
       // Check pages are free.
@@ -162,11 +182,12 @@
     }
 
     if (actual == MAP_FAILED) {
-      strerr = "Could not find contiguous low-memory space.";
+      LOG(ERROR) << "Could not find contiguous low-memory space.";
+      saved_errno = ENOMEM;
     }
   } else {
     actual = mmap(expected, page_aligned_byte_count, prot, flags, fd.get(), 0);
-    strerr = strerror(errno);
+    saved_errno = errno;
   }
 
 #else
@@ -177,15 +198,16 @@
 #endif
 
   void* actual = mmap(expected, page_aligned_byte_count, prot, flags, fd.get(), 0);
-  std::string strerr(strerror(errno));
+  saved_errno = errno;
 #endif
 
   if (actual == MAP_FAILED) {
     std::string maps;
     ReadFileToString("/proc/self/maps", &maps);
+
     *error_msg = StringPrintf("Failed anonymous mmap(%p, %zd, 0x%x, 0x%x, %d, 0): %s\n%s",
                               expected, page_aligned_byte_count, prot, flags, fd.get(),
-                              strerr.c_str(), maps.c_str());
+                              strerror(saved_errno), maps.c_str());
     return nullptr;
   }
   std::ostringstream check_map_request_error_msg;
@@ -229,15 +251,17 @@
                                               flags,
                                               fd,
                                               page_aligned_offset));
-  std::string strerr(strerror(errno));
   if (actual == MAP_FAILED) {
+    auto saved_errno = errno;
+
     std::string maps;
     ReadFileToString("/proc/self/maps", &maps);
+
     *error_msg = StringPrintf("mmap(%p, %zd, 0x%x, 0x%x, %d, %" PRId64
                               ") of file '%s' failed: %s\n%s",
                               page_aligned_expected, page_aligned_byte_count, prot, flags, fd,
-                              static_cast<int64_t>(page_aligned_offset), filename, strerr.c_str(),
-                              maps.c_str());
+                              static_cast<int64_t>(page_aligned_offset), filename,
+                              strerror(saved_errno), maps.c_str());
     return nullptr;
   }
   std::ostringstream check_map_request_error_msg;
diff --git a/runtime/mirror/array-inl.h b/runtime/mirror/array-inl.h
index 1d37775..dac287f 100644
--- a/runtime/mirror/array-inl.h
+++ b/runtime/mirror/array-inl.h
@@ -27,10 +27,6 @@
 namespace art {
 namespace mirror {
 
-static inline size_t HeaderSize(size_t component_size) {
-  return sizeof(Object) + (component_size == sizeof(int64_t) ? 8 : 4);
-}
-
 template<VerifyObjectFlags kVerifyFlags>
 inline size_t Array::SizeOf() {
   // This is safe from overflow because the array was already allocated, so we know it's sane.
@@ -38,7 +34,7 @@
   // Don't need to check this since we already check this in GetClass.
   int32_t component_count =
       GetLength<static_cast<VerifyObjectFlags>(kVerifyFlags & ~kVerifyThis)>();
-  size_t header_size = HeaderSize(component_size);
+  size_t header_size = DataOffset(component_size).SizeValue();
   size_t data_size = component_count * component_size;
   return header_size + data_size;
 }
@@ -50,7 +46,7 @@
   DCHECK_GE(component_count, 0);
   DCHECK(array_class->IsArrayClass());
 
-  size_t header_size = HeaderSize(component_size);
+  size_t header_size = Array::DataOffset(component_size).SizeValue();
   size_t data_size = component_count * component_size;
   size_t size = header_size + data_size;
 
@@ -134,7 +130,7 @@
         heap->AllocObjectWithAllocator<kIsInstrumented, true>(self, array_class, size,
                                                               allocator_type, visitor));
   } else {
-    SetLengthToUsableSizeVisitor visitor(component_count, HeaderSize(component_size),
+    SetLengthToUsableSizeVisitor visitor(component_count, DataOffset(component_size).SizeValue(),
                                          component_size);
     result = down_cast<Array*>(
         heap->AllocObjectWithAllocator<kIsInstrumented, true>(self, array_class, size,
diff --git a/runtime/mirror/art_field.cc b/runtime/mirror/art_field.cc
index 7740213..f91cab1 100644
--- a/runtime/mirror/art_field.cc
+++ b/runtime/mirror/art_field.cc
@@ -21,7 +21,9 @@
 #include "object-inl.h"
 #include "object_utils.h"
 #include "runtime.h"
+#include "scoped_thread_state_change.h"
 #include "utils.h"
+#include "well_known_classes.h"
 
 namespace art {
 namespace mirror {
@@ -29,6 +31,13 @@
 // TODO: get global references for these
 Class* ArtField::java_lang_reflect_ArtField_ = NULL;
 
+ArtField* ArtField::FromReflectedField(const ScopedObjectAccess& soa, jobject jlr_field) {
+  mirror::ArtField* f = soa.DecodeField(WellKnownClasses::java_lang_reflect_Field_artField);
+  mirror::ArtField* field = f->GetObject(soa.Decode<mirror::Object*>(jlr_field))->AsArtField();
+  DCHECK(field != nullptr);
+  return field;
+}
+
 void ArtField::SetClass(Class* java_lang_reflect_ArtField) {
   CHECK(java_lang_reflect_ArtField_ == NULL);
   CHECK(java_lang_reflect_ArtField != NULL);
diff --git a/runtime/mirror/art_field.h b/runtime/mirror/art_field.h
index 46287c3..0daa838 100644
--- a/runtime/mirror/art_field.h
+++ b/runtime/mirror/art_field.h
@@ -25,12 +25,16 @@
 namespace art {
 
 struct ArtFieldOffsets;
+class ScopedObjectAccess;
 
 namespace mirror {
 
 // C++ mirror of java.lang.reflect.ArtField
 class MANAGED ArtField : public Object {
  public:
+  static ArtField* FromReflectedField(const ScopedObjectAccess& soa, jobject jlr_field)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   Class* GetDeclaringClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void SetDeclaringClass(Class *new_declaring_class) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
diff --git a/runtime/mirror/art_method.cc b/runtime/mirror/art_method.cc
index 7814f36..ee5a0a4 100644
--- a/runtime/mirror/art_method.cc
+++ b/runtime/mirror/art_method.cc
@@ -16,6 +16,7 @@
 
 #include "art_method.h"
 
+#include "art_field-inl.h"
 #include "art_method-inl.h"
 #include "base/stringpiece.h"
 #include "class-inl.h"
@@ -28,8 +29,10 @@
 #include "object-inl.h"
 #include "object_array.h"
 #include "object_array-inl.h"
+#include "scoped_thread_state_change.h"
 #include "string.h"
 #include "object_utils.h"
+#include "well_known_classes.h"
 
 namespace art {
 namespace mirror {
@@ -45,6 +48,15 @@
 // TODO: get global references for these
 Class* ArtMethod::java_lang_reflect_ArtMethod_ = NULL;
 
+ArtMethod* ArtMethod::FromReflectedMethod(const ScopedObjectAccess& soa, jobject jlr_method) {
+  mirror::ArtField* f =
+      soa.DecodeField(WellKnownClasses::java_lang_reflect_AbstractMethod_artMethod);
+  mirror::ArtMethod* method = f->GetObject(soa.Decode<mirror::Object*>(jlr_method))->AsArtMethod();
+  DCHECK(method != nullptr);
+  return method;
+}
+
+
 void ArtMethod::VisitRoots(RootCallback* callback, void* arg) {
   if (java_lang_reflect_ArtMethod_ != nullptr) {
     callback(reinterpret_cast<mirror::Object**>(&java_lang_reflect_ArtMethod_), arg, 0,
diff --git a/runtime/mirror/art_method.h b/runtime/mirror/art_method.h
index c654933..fd5ac19 100644
--- a/runtime/mirror/art_method.h
+++ b/runtime/mirror/art_method.h
@@ -31,6 +31,7 @@
 union JValue;
 struct MethodClassOffsets;
 class MethodHelper;
+class ScopedObjectAccess;
 class StringPiece;
 class ShadowFrame;
 
@@ -44,6 +45,9 @@
 // C++ mirror of java.lang.reflect.Method and java.lang.reflect.Constructor
 class MANAGED ArtMethod : public Object {
  public:
+  static ArtMethod* FromReflectedMethod(const ScopedObjectAccess& soa, jobject jlr_method)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   Class* GetDeclaringClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void SetDeclaringClass(Class *new_declaring_class) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index e3f4eed..89d9241 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h
@@ -465,6 +465,12 @@
   return Alloc<true>(self, Runtime::Current()->GetHeap()->GetCurrentNonMovingAllocator());
 }
 
+template <bool kVisitClass, typename Visitor>
+inline void Class::VisitReferences(mirror::Class* klass, const Visitor& visitor) {
+  VisitInstanceFieldsReferences<kVisitClass>(klass, visitor);
+  VisitStaticFieldsReferences<kVisitClass>(this, visitor);
+}
+
 }  // namespace mirror
 }  // namespace art
 
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index 76ab94c..ddc07ff 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -861,6 +861,10 @@
   // When class is verified, set the kAccPreverified flag on each method.
   void SetPreverifiedFlagOnAllMethods() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  template <bool kVisitClass, typename Visitor>
+  void VisitReferences(mirror::Class* klass, const Visitor& visitor)
+      NO_THREAD_SAFETY_ANALYSIS;
+
  private:
   void SetVerifyErrorClass(Class* klass) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
diff --git a/runtime/mirror/object-inl.h b/runtime/mirror/object-inl.h
index cad1017..281d4ec 100644
--- a/runtime/mirror/object-inl.h
+++ b/runtime/mirror/object-inl.h
@@ -557,6 +557,77 @@
   return success;
 }
 
+template<bool kVisitClass, bool kIsStatic, typename Visitor>
+inline void Object::VisitFieldsReferences(uint32_t ref_offsets, const Visitor& visitor) {
+  if (LIKELY(ref_offsets != CLASS_WALK_SUPER)) {
+    if (!kVisitClass) {
+     // Mask out the class from the reference offsets.
+      ref_offsets ^= kWordHighBitMask;
+    }
+    DCHECK_EQ(ClassOffset().Uint32Value(), 0U);
+    // Found a reference offset bitmap. Visit the specified offsets.
+    while (ref_offsets != 0) {
+      size_t right_shift = CLZ(ref_offsets);
+      MemberOffset field_offset = CLASS_OFFSET_FROM_CLZ(right_shift);
+      visitor(this, field_offset, kIsStatic);
+      ref_offsets &= ~(CLASS_HIGH_BIT >> right_shift);
+    }
+  } else {
+    // There is no reference offset bitmap.  In the non-static case, walk up the class
+    // inheritance hierarchy and find reference offsets the hard way. In the static case, just
+    // consider this class.
+    for (mirror::Class* klass = kIsStatic ? AsClass() : GetClass(); klass != nullptr;
+        klass = kIsStatic ? nullptr : klass->GetSuperClass()) {
+      size_t num_reference_fields =
+          kIsStatic ? klass->NumReferenceStaticFields() : klass->NumReferenceInstanceFields();
+      for (size_t i = 0; i < num_reference_fields; ++i) {
+        mirror::ArtField* field = kIsStatic ? klass->GetStaticField(i)
+            : klass->GetInstanceField(i);
+        MemberOffset field_offset = field->GetOffset();
+        // TODO: Do a simpler check?
+        if (!kVisitClass && UNLIKELY(field_offset.Uint32Value() == ClassOffset().Uint32Value())) {
+          continue;
+        }
+        visitor(this, field_offset, kIsStatic);
+      }
+    }
+  }
+}
+
+template<bool kVisitClass, typename Visitor>
+inline void Object::VisitInstanceFieldsReferences(mirror::Class* klass, const Visitor& visitor) {
+  VisitFieldsReferences<kVisitClass, false>(
+      klass->GetReferenceInstanceOffsets<kVerifyNone>(), visitor);
+}
+
+template<bool kVisitClass, typename Visitor>
+inline void Object::VisitStaticFieldsReferences(mirror::Class* klass, const Visitor& visitor) {
+  klass->VisitFieldsReferences<kVisitClass, true>(
+      klass->GetReferenceStaticOffsets<kVerifyNone>(), visitor);
+}
+
+template <const bool kVisitClass, VerifyObjectFlags kVerifyFlags, typename Visitor,
+    typename JavaLangRefVisitor>
+inline void Object::VisitReferences(const Visitor& visitor,
+                                    const JavaLangRefVisitor& ref_visitor) {
+  mirror::Class* klass = GetClass<kVerifyFlags>();
+  if (UNLIKELY(klass == Class::GetJavaLangClass())) {
+    DCHECK_EQ(klass->GetClass(), Class::GetJavaLangClass());
+    AsClass<kVerifyNone>()->VisitReferences<kVisitClass>(klass, visitor);
+  } else if (UNLIKELY(klass->IsArrayClass<kVerifyFlags>())) {
+    if (klass->IsObjectArrayClass<kVerifyNone>()) {
+      AsObjectArray<mirror::Object>()->VisitReferences<kVisitClass>(visitor);
+    } else if (kVisitClass) {
+      visitor(this, ClassOffset(), false);
+    }
+  } else {
+    VisitFieldsReferences<kVisitClass, false>(klass->GetReferenceInstanceOffsets(), visitor);
+    if (UNLIKELY(klass->IsReferenceClass())) {
+      ref_visitor(klass, AsReference());
+    }
+  }
+}
+
 }  // namespace mirror
 }  // namespace art
 
diff --git a/runtime/mirror/object.h b/runtime/mirror/object.h
index 476259f..0a77828 100644
--- a/runtime/mirror/object.h
+++ b/runtime/mirror/object.h
@@ -240,6 +240,14 @@
 #endif
   }
 
+  // TODO fix thread safety analysis broken by the use of template. This should be
+  // SHARED_LOCKS_REQUIRED(Locks::mutator_lock_).
+  template <const bool kVisitClass, VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+      typename Visitor, typename JavaLangRefVisitor = VoidFunctor>
+  void VisitReferences(const Visitor& visitor,
+                       const JavaLangRefVisitor& ref_visitor = VoidFunctor())
+      NO_THREAD_SAFETY_ANALYSIS;
+
  protected:
   // Accessors for non-Java type fields
   template<class T, VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
@@ -251,6 +259,17 @@
 #endif
   }
 
+  // TODO: Fixme when anotatalysis works with visitors.
+  template<bool kVisitClass, bool kIsStatic, typename Visitor>
+  void VisitFieldsReferences(uint32_t ref_offsets, const Visitor& visitor)
+      NO_THREAD_SAFETY_ANALYSIS;
+  template<bool kVisitClass, typename Visitor>
+  void VisitInstanceFieldsReferences(mirror::Class* klass, const Visitor& visitor)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  template<bool kVisitClass, typename Visitor>
+  void VisitStaticFieldsReferences(mirror::Class* klass, const Visitor& visitor)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
  private:
   // Verify the type correctness of stores to fields.
   void CheckFieldAssignmentImpl(MemberOffset field_offset, Object* new_value)
diff --git a/runtime/mirror/object_array-inl.h b/runtime/mirror/object_array-inl.h
index a427957..8032cc3 100644
--- a/runtime/mirror/object_array-inl.h
+++ b/runtime/mirror/object_array-inl.h
@@ -233,6 +233,17 @@
                       (i * sizeof(HeapReference<Object>)));
 }
 
+template<class T> template<const bool kVisitClass, typename Visitor>
+void ObjectArray<T>::VisitReferences(const Visitor& visitor) {
+  if (kVisitClass) {
+    visitor(this, ClassOffset(), false);
+  }
+  const size_t length = static_cast<size_t>(GetLength());
+  for (size_t i = 0; i < length; ++i) {
+    visitor(this, OffsetOfElement(i), false);
+  }
+}
+
 }  // namespace mirror
 }  // namespace art
 
diff --git a/runtime/mirror/object_array.h b/runtime/mirror/object_array.h
index 7f9e716..5ff0490 100644
--- a/runtime/mirror/object_array.h
+++ b/runtime/mirror/object_array.h
@@ -78,6 +78,11 @@
   ObjectArray<T>* CopyOf(Thread* self, int32_t new_length)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  // TODO fix thread safety analysis broken by the use of template. This should be
+  // SHARED_LOCKS_REQUIRED(Locks::mutator_lock_).
+  template<const bool kVisitClass, typename Visitor>
+  void VisitReferences(const Visitor& visitor) NO_THREAD_SAFETY_ANALYSIS;
+
  private:
   static MemberOffset OffsetOfElement(int32_t i);
 
diff --git a/runtime/mirror/string.cc b/runtime/mirror/string.cc
index 3f35210..d4f11b2 100644
--- a/runtime/mirror/string.cc
+++ b/runtime/mirror/string.cc
@@ -138,9 +138,7 @@
 }
 
 String* String::AllocFromModifiedUtf8(Thread* self, const char* utf) {
-  if (UNLIKELY(utf == nullptr)) {
-    return nullptr;
-  }
+  DCHECK(utf != nullptr);
   size_t char_count = CountModifiedUtf8Chars(utf);
   return AllocFromModifiedUtf8(self, char_count, utf);
 }
diff --git a/runtime/native/java_lang_reflect_Constructor.cc b/runtime/native/java_lang_reflect_Constructor.cc
index a22d7ca..b7e8ac2 100644
--- a/runtime/native/java_lang_reflect_Constructor.cc
+++ b/runtime/native/java_lang_reflect_Constructor.cc
@@ -36,10 +36,7 @@
  */
 static jobject Constructor_newInstance(JNIEnv* env, jobject javaMethod, jobjectArray javaArgs) {
   ScopedFastNativeObjectAccess soa(env);
-  jobject art_method = soa.Env()->GetObjectField(
-      javaMethod, WellKnownClasses::java_lang_reflect_AbstractMethod_artMethod);
-
-  mirror::ArtMethod* m = soa.Decode<mirror::Object*>(art_method)->AsArtMethod();
+  mirror::ArtMethod* m = mirror::ArtMethod::FromReflectedMethod(soa, javaMethod);
   SirtRef<mirror::Class> c(soa.Self(), m->GetDeclaringClass());
   if (UNLIKELY(c->IsAbstract())) {
     ThrowLocation throw_location = soa.Self()->GetCurrentLocationForThrow();
diff --git a/runtime/native/java_lang_reflect_Field.cc b/runtime/native/java_lang_reflect_Field.cc
index 7e21d6c..6667d51 100644
--- a/runtime/native/java_lang_reflect_Field.cc
+++ b/runtime/native/java_lang_reflect_Field.cc
@@ -28,69 +28,72 @@
 namespace art {
 
 static bool GetFieldValue(const ScopedFastNativeObjectAccess& soa, mirror::Object* o,
-                          mirror::ArtField* f, JValue& value, bool allow_references)
+                          mirror::ArtField* f, Primitive::Type field_type, bool allow_references,
+                          JValue* value)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  DCHECK_EQ(value.GetJ(), INT64_C(0));
-  CHECK(!kMovingFields);
-  SirtRef<mirror::Object> sirt_obj(soa.Self(), o);
-  SirtRef<mirror::Class> sirt_klass(soa.Self(), f->GetDeclaringClass());
-  if (!Runtime::Current()->GetClassLinker()->EnsureInitialized(sirt_klass, true, true)) {
-    return false;
-  }
-  o = sirt_obj.get();
-  switch (FieldHelper(f).GetTypeAsPrimitiveType()) {
-  case Primitive::kPrimBoolean:
-    value.SetZ(f->GetBoolean(o));
-    return true;
-  case Primitive::kPrimByte:
-    value.SetB(f->GetByte(o));
-    return true;
-  case Primitive::kPrimChar:
-    value.SetC(f->GetChar(o));
-    return true;
-  case Primitive::kPrimDouble:
-    value.SetD(f->GetDouble(o));
-    return true;
-  case Primitive::kPrimFloat:
-    value.SetF(f->GetFloat(o));
-    return true;
-  case Primitive::kPrimInt:
-    value.SetI(f->GetInt(o));
-    return true;
-  case Primitive::kPrimLong:
-    value.SetJ(f->GetLong(o));
-    return true;
-  case Primitive::kPrimShort:
-    value.SetS(f->GetShort(o));
-    return true;
-  case Primitive::kPrimNot:
-    if (allow_references) {
-      value.SetL(f->GetObject(o));
+  DCHECK_EQ(value->GetJ(), INT64_C(0));
+  DCHECK(f->GetDeclaringClass()->IsInitialized());
+  switch (field_type) {
+    case Primitive::kPrimBoolean:
+      value->SetZ(f->GetBoolean(o));
       return true;
-    }
-    // Else break to report an error.
-    break;
-  case Primitive::kPrimVoid:
-    // Never okay.
-    break;
+    case Primitive::kPrimByte:
+      value->SetB(f->GetByte(o));
+      return true;
+    case Primitive::kPrimChar:
+      value->SetC(f->GetChar(o));
+      return true;
+    case Primitive::kPrimDouble:
+      value->SetD(f->GetDouble(o));
+      return true;
+    case Primitive::kPrimFloat:
+      value->SetF(f->GetFloat(o));
+      return true;
+    case Primitive::kPrimInt:
+      value->SetI(f->GetInt(o));
+      return true;
+    case Primitive::kPrimLong:
+      value->SetJ(f->GetLong(o));
+      return true;
+    case Primitive::kPrimShort:
+      value->SetS(f->GetShort(o));
+      return true;
+    case Primitive::kPrimNot:
+      if (allow_references) {
+        value->SetL(f->GetObject(o));
+        return true;
+      }
+      // Else break to report an error.
+      break;
+    case Primitive::kPrimVoid:
+      // Never okay.
+      break;
   }
-  ThrowIllegalArgumentException(NULL,
-                                StringPrintf("Not a primitive field: %s",
-                                             PrettyField(f).c_str()).c_str());
+  ThrowIllegalArgumentException(nullptr, StringPrintf("Not a primitive field: %s",
+                                                      PrettyField(f).c_str()).c_str());
   return false;
 }
 
 static bool CheckReceiver(const ScopedFastNativeObjectAccess& soa, jobject j_rcvr,
-                          mirror::ArtField* f, mirror::Object*& class_or_rcvr)
+                          mirror::ArtField* f, mirror::Object** class_or_rcvr)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  soa.Self()->AssertThreadSuspensionIsAllowable();
   if (f->IsStatic()) {
-    class_or_rcvr = f->GetDeclaringClass();
+    SirtRef<mirror::Class> sirt_klass(soa.Self(), f->GetDeclaringClass());
+    if (UNLIKELY(!Runtime::Current()->GetClassLinker()->EnsureInitialized(sirt_klass, true, true))) {
+      DCHECK(soa.Self()->IsExceptionPending());
+      *class_or_rcvr = nullptr;
+      return false;
+    }
+    *class_or_rcvr = sirt_klass.get();
     return true;
   }
 
-  class_or_rcvr = soa.Decode<mirror::Object*>(j_rcvr);
+  *class_or_rcvr = soa.Decode<mirror::Object*>(j_rcvr);
   mirror::Class* declaringClass = f->GetDeclaringClass();
-  if (!VerifyObjectIsClass(class_or_rcvr, declaringClass)) {
+  if (!VerifyObjectIsClass(*class_or_rcvr, declaringClass)) {
+    DCHECK(soa.Self()->IsExceptionPending());
+    *class_or_rcvr = nullptr;
     return false;
   }
   return true;
@@ -98,42 +101,48 @@
 
 static jobject Field_get(JNIEnv* env, jobject javaField, jobject javaObj) {
   ScopedFastNativeObjectAccess soa(env);
-  mirror::ArtField* f = soa.DecodeField(env->FromReflectedField(javaField));
-  mirror::Object* o = NULL;
-  if (!CheckReceiver(soa, javaObj, f, o)) {
-    return NULL;
+  CHECK(!kMovingFields) << "CheckReceiver may trigger thread suspension for initialization";
+  mirror::ArtField* f = mirror::ArtField::FromReflectedField(soa, javaField);
+  mirror::Object* o = nullptr;
+  if (!CheckReceiver(soa, javaObj, f, &o)) {
+    DCHECK(soa.Self()->IsExceptionPending());
+    return nullptr;
   }
-
+  // We now don't expect suspension unless an exception is thrown.
   // Get the field's value, boxing if necessary.
+  Primitive::Type field_type = FieldHelper(f).GetTypeAsPrimitiveType();
   JValue value;
-  if (!GetFieldValue(soa, o, f, value, true)) {
-    return NULL;
+  if (!GetFieldValue(soa, o, f, field_type, true, &value)) {
+    DCHECK(soa.Self()->IsExceptionPending());
+    return nullptr;
   }
-  return
-      soa.AddLocalReference<jobject>(BoxPrimitive(FieldHelper(f).GetTypeAsPrimitiveType(), value));
+  return soa.AddLocalReference<jobject>(BoxPrimitive(field_type, value));
 }
 
 static JValue GetPrimitiveField(JNIEnv* env, jobject javaField, jobject javaObj,
                                 char dst_descriptor) {
   ScopedFastNativeObjectAccess soa(env);
-  mirror::ArtField* f = soa.DecodeField(env->FromReflectedField(javaField));
-  mirror::Object* o = NULL;
-  if (!CheckReceiver(soa, javaObj, f, o)) {
+  CHECK(!kMovingFields) << "CheckReceiver may trigger thread suspension for initialization";
+  mirror::ArtField* f = mirror::ArtField::FromReflectedField(soa, javaField);
+  mirror::Object* o = nullptr;
+  if (!CheckReceiver(soa, javaObj, f, &o)) {
+    DCHECK(soa.Self()->IsExceptionPending());
     return JValue();
   }
-
+  // We now don't expect suspension unless an exception is thrown.
   // Read the value.
+  Primitive::Type field_type = FieldHelper(f).GetTypeAsPrimitiveType();
   JValue field_value;
-  if (!GetFieldValue(soa, o, f, field_value, false)) {
+  if (!GetFieldValue(soa, o, f, field_type, false, &field_value)) {
+    DCHECK(soa.Self()->IsExceptionPending());
     return JValue();
   }
 
   // Widen it if necessary (and possible).
   JValue wide_value;
-  mirror::Class* dst_type =
-      Runtime::Current()->GetClassLinker()->FindPrimitiveClass(dst_descriptor);
-  if (!ConvertPrimitiveValue(NULL, false, FieldHelper(f).GetTypeAsPrimitiveType(),
-                             dst_type->GetPrimitiveType(), field_value, wide_value)) {
+  if (!ConvertPrimitiveValue(NULL, false, field_type, Primitive::GetType(dst_descriptor),
+                             field_value, wide_value)) {
+    DCHECK(soa.Self()->IsExceptionPending());
     return JValue();
   }
   return wide_value;
@@ -172,16 +181,11 @@
 }
 
 static void SetFieldValue(ScopedFastNativeObjectAccess& soa, mirror::Object* o,
-                          mirror::ArtField* f, const JValue& new_value, bool allow_references)
+                          mirror::ArtField* f, Primitive::Type field_type, bool allow_references,
+                          const JValue& new_value)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  CHECK(!kMovingFields);
-  SirtRef<mirror::Object> sirt_obj(soa.Self(), o);
-  SirtRef<mirror::Class> sirt_klass(soa.Self(), f->GetDeclaringClass());
-  if (!Runtime::Current()->GetClassLinker()->EnsureInitialized(sirt_klass, true, true)) {
-    return;
-  }
-  o = sirt_obj.get();
-  switch (FieldHelper(f).GetTypeAsPrimitiveType()) {
+  DCHECK(f->GetDeclaringClass()->IsInitialized());
+  switch (field_type) {
   case Primitive::kPrimBoolean:
     f->SetBoolean<false>(o, new_value.GetZ());
     break;
@@ -214,63 +218,77 @@
     // Else fall through to report an error.
   case Primitive::kPrimVoid:
     // Never okay.
-    ThrowIllegalArgumentException(NULL, StringPrintf("Not a primitive field: %s",
-                                                     PrettyField(f).c_str()).c_str());
+    ThrowIllegalArgumentException(nullptr, StringPrintf("Not a primitive field: %s",
+                                                        PrettyField(f).c_str()).c_str());
     return;
   }
-
-  // Special handling for final fields on SMP systems.
-  // We need a store/store barrier here (JMM requirement).
-  if (f->IsFinal()) {
-    QuasiAtomic::MembarStoreLoad();
-  }
 }
 
 static void Field_set(JNIEnv* env, jobject javaField, jobject javaObj, jobject javaValue) {
   ScopedFastNativeObjectAccess soa(env);
-  mirror::ArtField* f = soa.DecodeField(env->FromReflectedField(javaField));
-
+  CHECK(!kMovingFields) << "CheckReceiver may trigger thread suspension for initialization";
+  mirror::ArtField* f = mirror::ArtField::FromReflectedField(soa, javaField);
+  // Check that the receiver is non-null and an instance of the field's declaring class.
+  mirror::Object* o = nullptr;
+  if (!CheckReceiver(soa, javaObj, f, &o)) {
+    DCHECK(soa.Self()->IsExceptionPending());
+    return;
+  }
+  Primitive::Type field_prim_type;
+  mirror::Class* field_type;
+  {
+    FieldHelper fh(f);
+    const char* field_type_desciptor = fh.GetTypeDescriptor();
+    field_prim_type = Primitive::GetType(field_type_desciptor[0]);
+    if (field_prim_type == Primitive::kPrimNot) {
+      SirtRef<mirror::Object> sirt_obj(soa.Self(), o);
+      // May cause resolution.
+      CHECK(!kMovingFields) << "Resolution may trigger thread suspension";
+      field_type = fh.GetType(true);
+      if (field_type == nullptr) {
+        DCHECK(soa.Self()->IsExceptionPending());
+        return;
+      }
+    } else {
+      field_type = Runtime::Current()->GetClassLinker()->FindPrimitiveClass(field_type_desciptor[0]);
+    }
+  }
+  // We now don't expect suspension unless an exception is thrown.
   // Unbox the value, if necessary.
   mirror::Object* boxed_value = soa.Decode<mirror::Object*>(javaValue);
   JValue unboxed_value;
-  if (!UnboxPrimitiveForField(boxed_value, FieldHelper(f).GetType(), unboxed_value, f)) {
+  if (!UnboxPrimitiveForField(boxed_value, field_type, unboxed_value, f)) {
+    DCHECK(soa.Self()->IsExceptionPending());
     return;
   }
-
-  // Check that the receiver is non-null and an instance of the field's declaring class.
-  mirror::Object* o = NULL;
-  if (!CheckReceiver(soa, javaObj, f, o)) {
-    return;
-  }
-
-  SetFieldValue(soa, o, f, unboxed_value, true);
+  SetFieldValue(soa, o, f, field_prim_type, true, unboxed_value);
 }
 
 static void SetPrimitiveField(JNIEnv* env, jobject javaField, jobject javaObj, char src_descriptor,
                               const JValue& new_value) {
   ScopedFastNativeObjectAccess soa(env);
-  mirror::ArtField* f = soa.DecodeField(env->FromReflectedField(javaField));
-  mirror::Object* o = NULL;
-  if (!CheckReceiver(soa, javaObj, f, o)) {
+  mirror::ArtField* f = mirror::ArtField::FromReflectedField(soa, javaField);
+  mirror::Object* o = nullptr;
+  if (!CheckReceiver(soa, javaObj, f, &o)) {
     return;
   }
-  FieldHelper fh(f);
-  if (!fh.IsPrimitiveType()) {
-    ThrowIllegalArgumentException(NULL, StringPrintf("Not a primitive field: %s",
-                                                     PrettyField(f).c_str()).c_str());
+  Primitive::Type field_type = FieldHelper(f).GetTypeAsPrimitiveType();
+  if (UNLIKELY(field_type == Primitive::kPrimNot)) {
+    ThrowIllegalArgumentException(nullptr, StringPrintf("Not a primitive field: %s",
+                                                        PrettyField(f).c_str()).c_str());
     return;
   }
 
   // Widen the value if necessary (and possible).
   JValue wide_value;
-  mirror::Class* src_type = Runtime::Current()->GetClassLinker()->FindPrimitiveClass(src_descriptor);
-  if (!ConvertPrimitiveValue(NULL, false, src_type->GetPrimitiveType(), fh.GetTypeAsPrimitiveType(),
-                             new_value, wide_value)) {
+  if (!ConvertPrimitiveValue(nullptr, false, Primitive::GetType(src_descriptor),
+                             field_type, new_value, wide_value)) {
+    DCHECK(soa.Self()->IsExceptionPending());
     return;
   }
 
   // Write the value.
-  SetFieldValue(soa, o, f, wide_value, false);
+  SetFieldValue(soa, o, f, field_type, false, wide_value);
 }
 
 static void Field_setBoolean(JNIEnv* env, jobject javaField, jobject javaObj, jboolean z) {
diff --git a/runtime/native/java_lang_reflect_Method.cc b/runtime/native/java_lang_reflect_Method.cc
index 0b8bb7b..abb73b6 100644
--- a/runtime/native/java_lang_reflect_Method.cc
+++ b/runtime/native/java_lang_reflect_Method.cc
@@ -37,10 +37,7 @@
 
 static jobject Method_getExceptionTypesNative(JNIEnv* env, jobject javaMethod) {
   ScopedFastNativeObjectAccess soa(env);
-  jobject art_method = soa.Env()->GetObjectField(
-      javaMethod, WellKnownClasses::java_lang_reflect_AbstractMethod_artMethod);
-
-  mirror::ArtMethod* proxy_method = soa.Decode<mirror::Object*>(art_method)->AsArtMethod();
+  mirror::ArtMethod* proxy_method = mirror::ArtMethod::FromReflectedMethod(soa, javaMethod);
   CHECK(proxy_method->GetDeclaringClass()->IsProxyClass());
   mirror::SynthesizedProxyClass* proxy_class =
       down_cast<mirror::SynthesizedProxyClass*>(proxy_method->GetDeclaringClass());
diff --git a/runtime/object_callbacks.h b/runtime/object_callbacks.h
index 468ba08..89ee34e 100644
--- a/runtime/object_callbacks.h
+++ b/runtime/object_callbacks.h
@@ -25,6 +25,7 @@
 namespace art {
 namespace mirror {
 class Object;
+template<class MirrorType> class HeapReference;
 }  // namespace mirror
 class StackVisitor;
 
@@ -56,6 +57,9 @@
 // A callback for verifying roots.
 typedef void (VerifyRootCallback)(const mirror::Object* root, void* arg, size_t vreg,
     const StackVisitor* visitor);
+
+typedef void (MarkHeapReferenceCallback)(mirror::HeapReference<mirror::Object>* ref, void* arg);
+
 // A callback for testing if an object is marked, returns nullptr if not marked, otherwise the new
 // address the object (if the object didn't move, returns the object input parameter).
 typedef mirror::Object* (IsMarkedCallback)(mirror::Object* object, void* arg)
diff --git a/runtime/offsets.h b/runtime/offsets.h
index e2dba9d..ed4e49e 100644
--- a/runtime/offsets.h
+++ b/runtime/offsets.h
@@ -32,6 +32,10 @@
   uint32_t Uint32Value() const {
     return static_cast<uint32_t>(val_);
   }
+  size_t SizeValue() const {
+    return val_;
+  }
+
  protected:
   size_t val_;
 };
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index 5717689..9b1c013 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -203,11 +203,13 @@
     std::string checks(buf);
     std::vector<std::string> checkvec;
     Split(checks, ',', checkvec);
+    explicit_checks_ = kExplicitNullCheck | kExplicitSuspendCheck |
+        kExplicitStackOverflowCheck;
     for (auto& str : checkvec) {
       std::string val = Trim(str);
       if (val == "none") {
         explicit_checks_ = kExplicitNullCheck | kExplicitSuspendCheck |
-            kExplicitStackOverflowCheck;
+          kExplicitStackOverflowCheck;
       } else if (val == "null") {
         explicit_checks_ &= ~kExplicitNullCheck;
       } else if (val == "suspend") {
diff --git a/runtime/quick/inline_method_analyser.cc b/runtime/quick/inline_method_analyser.cc
index 4388d31..a9072d8 100644
--- a/runtime/quick/inline_method_analyser.cc
+++ b/runtime/quick/inline_method_analyser.cc
@@ -80,8 +80,15 @@
 COMPILE_ASSERT(InlineMethodAnalyser::IGetVariant(Instruction::IGET_SHORT) ==
     InlineMethodAnalyser::IPutVariant(Instruction::IPUT_SHORT), check_iget_iput_short_variant);
 
+// This is used by compiler and debugger. We look into the dex cache for resolved methods and
+// fields. However, in the context of the debugger, not all methods and fields are resolved. Since
+// we need to be able to detect possibly inlined method, we pass a null inline method to indicate
+// we don't want to take unresolved methods and fields into account during analysis.
 bool InlineMethodAnalyser::AnalyseMethodCode(verifier::MethodVerifier* verifier,
                                              InlineMethod* method) {
+  DCHECK(verifier != nullptr);
+  DCHECK_EQ(Runtime::Current()->IsCompiler(), method != nullptr);
+  DCHECK_EQ(verifier->CanLoadClasses(), method != nullptr);
   // We currently support only plain return or 2-instruction methods.
 
   const DexFile::CodeItem* code_item = verifier->CodeItem();
@@ -91,9 +98,11 @@
 
   switch (opcode) {
     case Instruction::RETURN_VOID:
-      method->opcode = kInlineOpNop;
-      method->flags = kInlineSpecial;
-      method->d.data = 0u;
+      if (method != nullptr) {
+        method->opcode = kInlineOpNop;
+        method->flags = kInlineSpecial;
+        method->d.data = 0u;
+      }
       return true;
     case Instruction::RETURN:
     case Instruction::RETURN_OBJECT:
@@ -136,14 +145,16 @@
   DCHECK_LT((return_opcode == Instruction::RETURN_WIDE) ? reg + 1 : reg,
       code_item->registers_size_);
 
-  result->opcode = kInlineOpReturnArg;
-  result->flags = kInlineSpecial;
-  InlineReturnArgData* data = &result->d.return_data;
-  data->arg = reg - arg_start;
-  data->is_wide = (return_opcode == Instruction::RETURN_WIDE) ? 1u : 0u;
-  data->is_object = (return_opcode == Instruction::RETURN_OBJECT) ? 1u : 0u;
-  data->reserved = 0u;
-  data->reserved2 = 0u;
+  if (result != nullptr) {
+    result->opcode = kInlineOpReturnArg;
+    result->flags = kInlineSpecial;
+    InlineReturnArgData* data = &result->d.return_data;
+    data->arg = reg - arg_start;
+    data->is_wide = (return_opcode == Instruction::RETURN_WIDE) ? 1u : 0u;
+    data->is_object = (return_opcode == Instruction::RETURN_OBJECT) ? 1u : 0u;
+    data->reserved = 0u;
+    data->reserved2 = 0u;
+  }
   return true;
 }
 
@@ -173,9 +184,11 @@
   if (return_opcode == Instruction::RETURN_OBJECT && vB != 0) {
     return false;  // Returning non-null reference constant?
   }
-  result->opcode = kInlineOpNonWideConst;
-  result->flags = kInlineSpecial;
-  result->d.data = static_cast<uint64_t>(vB);
+  if (result != nullptr) {
+    result->opcode = kInlineOpNonWideConst;
+    result->flags = kInlineSpecial;
+    result->d.data = static_cast<uint64_t>(vB);
+  }
   return true;
 }
 
@@ -215,18 +228,19 @@
     return false;
   }
 
-  if (!ComputeSpecialAccessorInfo(field_idx, false, verifier, &result->d.ifield_data)) {
-    return false;
+  if (result != nullptr) {
+    InlineIGetIPutData* data = &result->d.ifield_data;
+    if (!ComputeSpecialAccessorInfo(field_idx, false, verifier, data)) {
+      return false;
+    }
+    result->opcode = kInlineOpIGet;
+    result->flags = kInlineSpecial;
+    data->op_variant = IGetVariant(opcode);
+    data->object_arg = object_reg - arg_start;  // Allow IGET on any register, not just "this".
+    data->src_arg = 0;
+    data->method_is_static = (verifier->GetAccessFlags() & kAccStatic) != 0;
+    data->reserved = 0;
   }
-
-  result->opcode = kInlineOpIGet;
-  result->flags = kInlineSpecial;
-  InlineIGetIPutData* data = &result->d.ifield_data;
-  data->op_variant = IGetVariant(opcode);
-  data->object_arg = object_reg - arg_start;  // Allow IGET on any register, not just "this".
-  data->src_arg = 0;
-  data->method_is_static = (verifier->GetAccessFlags() & kAccStatic) != 0;
-  data->reserved = 0;
   return true;
 }
 
@@ -262,18 +276,19 @@
     return false;
   }
 
-  if (!ComputeSpecialAccessorInfo(field_idx, true, verifier, &result->d.ifield_data)) {
-    return false;
+  if (result != nullptr) {
+    InlineIGetIPutData* data = &result->d.ifield_data;
+    if (!ComputeSpecialAccessorInfo(field_idx, true, verifier, data)) {
+      return false;
+    }
+    result->opcode = kInlineOpIPut;
+    result->flags = kInlineSpecial;
+    data->op_variant = IPutVariant(opcode);
+    data->object_arg = object_reg - arg_start;  // Allow IPUT on any register, not just "this".
+    data->src_arg = src_reg - arg_start;
+    data->method_is_static = (verifier->GetAccessFlags() & kAccStatic) != 0;
+    data->reserved = 0;
   }
-
-  result->opcode = kInlineOpIPut;
-  result->flags = kInlineSpecial;
-  InlineIGetIPutData* data = &result->d.ifield_data;
-  data->op_variant = IPutVariant(opcode);
-  data->object_arg = object_reg - arg_start;  // Allow IPUT on any register, not just "this".
-  data->src_arg = src_reg - arg_start;
-  data->method_is_static = (verifier->GetAccessFlags() & kAccStatic) != 0;
-  data->reserved = 0;
   return true;
 }
 
diff --git a/runtime/reflection.cc b/runtime/reflection.cc
index dde9a94..f567055 100644
--- a/runtime/reflection.cc
+++ b/runtime/reflection.cc
@@ -462,8 +462,7 @@
 
 jobject InvokeMethod(const ScopedObjectAccess& soa, jobject javaMethod,
                      jobject javaReceiver, jobject javaArgs) {
-  jmethodID mid = soa.Env()->FromReflectedMethod(javaMethod);
-  mirror::ArtMethod* m = soa.DecodeMethod(mid);
+  mirror::ArtMethod* m = mirror::ArtMethod::FromReflectedMethod(soa, javaMethod);
 
   mirror::Class* declaring_class = m->GetDeclaringClass();
   if (UNLIKELY(!declaring_class->IsInitialized())) {
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index eaa27de..21d79c3 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -80,7 +80,11 @@
 Runtime* Runtime::instance_ = NULL;
 
 Runtime::Runtime()
-    : compiler_callbacks_(nullptr),
+    : pre_allocated_OutOfMemoryError_(nullptr),
+      resolution_method_(nullptr),
+      imt_conflict_method_(nullptr),
+      default_imt_(nullptr),
+      compiler_callbacks_(nullptr),
       is_zygote_(false),
       is_concurrent_gc_enabled_(true),
       is_explicit_gc_disabled_(false),
@@ -94,10 +98,6 @@
       class_linker_(nullptr),
       signal_catcher_(nullptr),
       java_vm_(nullptr),
-      pre_allocated_OutOfMemoryError_(nullptr),
-      resolution_method_(nullptr),
-      imt_conflict_method_(nullptr),
-      default_imt_(nullptr),
       fault_message_lock_("Fault message lock"),
       fault_message_(""),
       method_verifier_lock_("Method verifiers lock"),
diff --git a/runtime/runtime.h b/runtime/runtime.h
index eeaaa2b..50c88d3 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -442,6 +442,12 @@
   // A pointer to the active runtime or NULL.
   static Runtime* instance_;
 
+  mirror::ArtMethod* callee_save_methods_[kLastCalleeSaveType];
+  mirror::Throwable* pre_allocated_OutOfMemoryError_;
+  mirror::ArtMethod* resolution_method_;
+  mirror::ArtMethod* imt_conflict_method_;
+  mirror::ObjectArray<mirror::ArtMethod>* default_imt_;
+
   CompilerCallbacks* compiler_callbacks_;
   bool is_zygote_;
   bool is_concurrent_gc_enabled_;
@@ -475,16 +481,6 @@
 
   JavaVMExt* java_vm_;
 
-  mirror::Throwable* pre_allocated_OutOfMemoryError_;
-
-  mirror::ArtMethod* callee_save_methods_[kLastCalleeSaveType];
-
-  mirror::ArtMethod* resolution_method_;
-
-  mirror::ArtMethod* imt_conflict_method_;
-
-  mirror::ObjectArray<mirror::ArtMethod>* default_imt_;
-
   // Fault message, printed when we get a SIGSEGV.
   Mutex fault_message_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
   std::string fault_message_ GUARDED_BY(fault_message_lock_);
diff --git a/runtime/thread.cc b/runtime/thread.cc
index afa5574..8e14924 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -2074,6 +2074,16 @@
   thread_local_objects_ = 0;
 }
 
+bool Thread::HasTlab() const {
+  bool has_tlab = thread_local_pos_ != nullptr;
+  if (has_tlab) {
+    DCHECK(thread_local_start_ != nullptr && thread_local_end_ != nullptr);
+  } else {
+    DCHECK(thread_local_start_ == nullptr && thread_local_end_ == nullptr);
+  }
+  return has_tlab;
+}
+
 std::ostream& operator<<(std::ostream& os, const Thread& thread) {
   thread.ShortDump(os);
   return os;
diff --git a/runtime/thread.h b/runtime/thread.h
index 6cbd3d9..b063b1e 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -852,6 +852,7 @@
   // Doesn't check that there is room.
   mirror::Object* AllocTlab(size_t bytes);
   void SetTlab(byte* start, byte* end);
+  bool HasTlab() const;
 
   // Remove the suspend trigger for this thread by making the suspend_trigger_ TLS value
   // equal to a valid pointer.