Merge "Generate ARM special methods from InlineMethod data."
diff --git a/build/Android.libarttest.mk b/build/Android.libarttest.mk
index f946d91..65b78c9 100644
--- a/build/Android.libarttest.mk
+++ b/build/Android.libarttest.mk
@@ -17,7 +17,8 @@
 LIBARTTEST_COMMON_SRC_FILES := \
 	test/JniTest/jni_test.cc \
 	test/ReferenceMap/stack_walk_refmap_jni.cc \
-	test/StackWalk/stack_walk_jni.cc
+	test/StackWalk/stack_walk_jni.cc \
+	test/UnsafeTest/unsafe_test.cc
 
 # $(1): target or host
 define build-libarttest
diff --git a/compiler/dex/frontend.cc b/compiler/dex/frontend.cc
index c2016d0..8f83cd0 100644
--- a/compiler/dex/frontend.cc
+++ b/compiler/dex/frontend.cc
@@ -140,24 +140,25 @@
 CompilationUnit::~CompilationUnit() {
 }
 
-// TODO: Add a cumulative version of logging, and combine with dex2oat --dump-timing
 void CompilationUnit::StartTimingSplit(const char* label) {
-  if (enable_debug & (1 << kDebugTimings)) {
+  if (compiler_driver->GetDumpPasses()) {
     timings.StartSplit(label);
   }
 }
 
 void CompilationUnit::NewTimingSplit(const char* label) {
-  if (enable_debug & (1 << kDebugTimings)) {
+  if (compiler_driver->GetDumpPasses()) {
     timings.NewSplit(label);
   }
 }
 
 void CompilationUnit::EndTiming() {
-  if (enable_debug & (1 << kDebugTimings)) {
+  if (compiler_driver->GetDumpPasses()) {
     timings.EndSplit();
-    LOG(INFO) << "TIMINGS " << PrettyMethod(method_idx, *dex_file);
-    LOG(INFO) << Dumpable<TimingLogger>(timings);
+    if (enable_debug & (1 << kDebugTimings)) {
+      LOG(INFO) << "TIMINGS " << PrettyMethod(method_idx, *dex_file);
+      LOG(INFO) << Dumpable<TimingLogger>(timings);
+    }
   }
 }
 
@@ -330,6 +331,9 @@
   }
 
   cu.EndTiming();
+  compiler.GetTimingsLogger().Start();
+  compiler.GetTimingsLogger().AddLogger(cu.timings);
+  compiler.GetTimingsLogger().End();
   return result;
 }
 
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index 5e0fed7..05eb360 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -1186,4 +1186,37 @@
 void Mir2Lir::AddSlowPath(LIRSlowPath* slowpath) {
   slow_paths_.Insert(slowpath);
 }
+
+void Mir2Lir::LoadCodeAddress(int dex_method_index, InvokeType type, SpecialTargetRegister symbolic_reg) {
+  LIR* data_target = ScanLiteralPool(code_literal_list_, dex_method_index, 0);
+  if (data_target == NULL) {
+    data_target = AddWordData(&code_literal_list_, dex_method_index);
+    data_target->operands[1] = type;
+  }
+  LIR* load_pc_rel = OpPcRelLoad(TargetReg(symbolic_reg), data_target);
+  AppendLIR(load_pc_rel);
+  DCHECK_NE(cu_->instruction_set, kMips) << reinterpret_cast<void*>(data_target);
+}
+
+void Mir2Lir::LoadMethodAddress(int dex_method_index, InvokeType type, SpecialTargetRegister symbolic_reg) {
+  LIR* data_target = ScanLiteralPool(method_literal_list_, dex_method_index, 0);
+  if (data_target == NULL) {
+    data_target = AddWordData(&method_literal_list_, dex_method_index);
+    data_target->operands[1] = type;
+  }
+  LIR* load_pc_rel = OpPcRelLoad(TargetReg(symbolic_reg), data_target);
+  AppendLIR(load_pc_rel);
+  DCHECK_NE(cu_->instruction_set, kMips) << reinterpret_cast<void*>(data_target);
+}
+
+void Mir2Lir::LoadClassType(uint32_t type_idx, SpecialTargetRegister symbolic_reg) {
+  // Use the literal pool and a PC-relative load from a data word.
+  LIR* data_target = ScanLiteralPool(class_literal_list_, type_idx, 0);
+  if (data_target == nullptr) {
+    data_target = AddWordData(&class_literal_list_, type_idx);
+  }
+  LIR* load_pc_rel = OpPcRelLoad(TargetReg(symbolic_reg), data_target);
+  AppendLIR(load_pc_rel);
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/dex_file_method_inliner.cc b/compiler/dex/quick/dex_file_method_inliner.cc
index 0ad8abf..389dd9a 100644
--- a/compiler/dex/quick/dex_file_method_inliner.cc
+++ b/compiler/dex/quick/dex_file_method_inliner.cc
@@ -123,6 +123,8 @@
     { kClassCacheShort, 1, { kClassCacheShort } },
     // kProtoCacheD_D
     { kClassCacheDouble, 1, { kClassCacheDouble } },
+    // kProtoCacheF_F
+    { kClassCacheFloat, 1, { kClassCacheFloat } },
     // kProtoCacheD_J
     { kClassCacheLong, 1, { kClassCacheDouble } },
     // kProtoCacheJ_D
@@ -198,6 +200,10 @@
     INTRINSIC(JavaLangStrictMath, Abs, I_I, kIntrinsicAbsInt, 0),
     INTRINSIC(JavaLangMath,       Abs, J_J, kIntrinsicAbsLong, 0),
     INTRINSIC(JavaLangStrictMath, Abs, J_J, kIntrinsicAbsLong, 0),
+    INTRINSIC(JavaLangMath,       Abs, F_F, kIntrinsicAbsFloat, 0),
+    INTRINSIC(JavaLangStrictMath, Abs, F_F, kIntrinsicAbsFloat, 0),
+    INTRINSIC(JavaLangMath,       Abs, D_D, kIntrinsicAbsDouble, 0),
+    INTRINSIC(JavaLangStrictMath, Abs, D_D, kIntrinsicAbsDouble, 0),
     INTRINSIC(JavaLangMath,       Min, II_I, kIntrinsicMinMaxInt, kIntrinsicFlagMin),
     INTRINSIC(JavaLangStrictMath, Min, II_I, kIntrinsicMinMaxInt, kIntrinsicFlagMin),
     INTRINSIC(JavaLangMath,       Max, II_I, kIntrinsicMinMaxInt, kIntrinsicFlagMax),
@@ -345,6 +351,10 @@
       return backend->GenInlinedAbsInt(info);
     case kIntrinsicAbsLong:
       return backend->GenInlinedAbsLong(info);
+    case kIntrinsicAbsFloat:
+      return backend->GenInlinedAbsFloat(info);
+    case kIntrinsicAbsDouble:
+      return backend->GenInlinedAbsDouble(info);
     case kIntrinsicMinMaxInt:
       return backend->GenInlinedMinMaxInt(info, intrinsic.d.data & kIntrinsicFlagMin);
     case kIntrinsicSqrt:
diff --git a/compiler/dex/quick/dex_file_method_inliner.h b/compiler/dex/quick/dex_file_method_inliner.h
index fe0824c..fb7528e 100644
--- a/compiler/dex/quick/dex_file_method_inliner.h
+++ b/compiler/dex/quick/dex_file_method_inliner.h
@@ -40,6 +40,8 @@
   kIntrinsicReverseBytes,
   kIntrinsicAbsInt,
   kIntrinsicAbsLong,
+  kIntrinsicAbsFloat,
+  kIntrinsicAbsDouble,
   kIntrinsicMinMaxInt,
   kIntrinsicSqrt,
   kIntrinsicCharAt,
@@ -268,6 +270,7 @@
       kProtoCacheJ_J,
       kProtoCacheS_S,
       kProtoCacheD_D,
+      kProtoCacheF_F,
       kProtoCacheD_J,
       kProtoCacheJ_D,
       kProtoCacheF_I,
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index c59f3b8..0533fbf 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -220,13 +220,7 @@
                                    &is_type_initialized, &use_direct_type_ptr, &direct_type_ptr)) {
       // The fast path.
       if (!use_direct_type_ptr) {
-        // Use the literal pool and a PC-relative load from a data word.
-        LIR* data_target = ScanLiteralPool(class_literal_list_, type_idx, 0);
-        if (data_target == nullptr) {
-          data_target = AddWordData(&class_literal_list_, type_idx);
-        }
-        LIR* load_pc_rel = OpPcRelLoad(TargetReg(kArg0), data_target);
-        AppendLIR(load_pc_rel);
+        LoadClassType(type_idx, kArg0);
         func_offset = QUICK_ENTRYPOINT_OFFSET(pAllocArrayResolved);
         CallRuntimeHelperRegMethodRegLocation(func_offset, TargetReg(kArg0), rl_src, true);
       } else {
@@ -994,13 +988,7 @@
                                    &is_type_initialized, &use_direct_type_ptr, &direct_type_ptr)) {
       // The fast path.
       if (!use_direct_type_ptr) {
-        // Use the literal pool and a PC-relative load from a data word.
-        LIR* data_target = ScanLiteralPool(class_literal_list_, type_idx, 0);
-        if (data_target == nullptr) {
-          data_target = AddWordData(&class_literal_list_, type_idx);
-        }
-        LIR* load_pc_rel = OpPcRelLoad(TargetReg(kArg0), data_target);
-        AppendLIR(load_pc_rel);
+        LoadClassType(type_idx, kArg0);
         if (!is_type_initialized) {
           func_offset = QUICK_ENTRYPOINT_OFFSET(pAllocObjectResolved);
           CallRuntimeHelperRegMethod(func_offset, TargetReg(kArg0), true);
@@ -1100,6 +1088,9 @@
                                          bool can_assume_type_is_in_dex_cache,
                                          uint32_t type_idx, RegLocation rl_dest,
                                          RegLocation rl_src) {
+  // X86 has its own implementation.
+  DCHECK_NE(cu_->instruction_set, kX86);
+
   FlushAllRegs();
   // May generate a call - use explicit registers
   LockCallTemps();
@@ -1181,15 +1172,10 @@
         LoadConstant(rl_result.low_reg, 1);     // assume true
         branchover = OpCmpBranch(kCondEq, TargetReg(kArg1), TargetReg(kArg2), NULL);
       }
-      if (cu_->instruction_set != kX86) {
-        int r_tgt = LoadHelper(QUICK_ENTRYPOINT_OFFSET(pInstanceofNonTrivial));
-        OpRegCopy(TargetReg(kArg0), TargetReg(kArg2));    // .ne case - arg0 <= class
-        OpReg(kOpBlx, r_tgt);    // .ne case: helper(class, ref->class)
-        FreeTemp(r_tgt);
-      } else {
-        OpRegCopy(TargetReg(kArg0), TargetReg(kArg2));
-        OpThreadMem(kOpBlx, QUICK_ENTRYPOINT_OFFSET(pInstanceofNonTrivial));
-      }
+      int r_tgt = LoadHelper(QUICK_ENTRYPOINT_OFFSET(pInstanceofNonTrivial));
+      OpRegCopy(TargetReg(kArg0), TargetReg(kArg2));    // .ne case - arg0 <= class
+      OpReg(kOpBlx, r_tgt);    // .ne case: helper(class, ref->class)
+      FreeTemp(r_tgt);
     }
   }
   // TODO: only clobber when type isn't final?
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index 6aaad66..ee61c8b 100644
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -384,31 +384,15 @@
         if (cu->instruction_set != kX86) {
           cg->LoadConstant(cg->TargetReg(kInvokeTgt), direct_code);
         }
-      } else {
+      } else if (cu->instruction_set != kX86) {
         CHECK_EQ(cu->dex_file, target_method.dex_file);
-        LIR* data_target = cg->ScanLiteralPool(cg->code_literal_list_,
-                                               target_method.dex_method_index, 0);
-        if (data_target == NULL) {
-          data_target = cg->AddWordData(&cg->code_literal_list_, target_method.dex_method_index);
-          data_target->operands[1] = type;
-        }
-        LIR* load_pc_rel = cg->OpPcRelLoad(cg->TargetReg(kInvokeTgt), data_target);
-        cg->AppendLIR(load_pc_rel);
-        DCHECK_EQ(cu->instruction_set, kThumb2) << reinterpret_cast<void*>(data_target);
+        cg->LoadCodeAddress(target_method.dex_method_index, type, kInvokeTgt);
       }
       if (direct_method != static_cast<unsigned int>(-1)) {
         cg->LoadConstant(cg->TargetReg(kArg0), direct_method);
       } else {
         CHECK_EQ(cu->dex_file, target_method.dex_file);
-        LIR* data_target = cg->ScanLiteralPool(cg->method_literal_list_,
-                                               target_method.dex_method_index, 0);
-        if (data_target == NULL) {
-          data_target = cg->AddWordData(&cg->method_literal_list_, target_method.dex_method_index);
-          data_target->operands[1] = type;
-        }
-        LIR* load_pc_rel = cg->OpPcRelLoad(cg->TargetReg(kArg0), data_target);
-        cg->AppendLIR(load_pc_rel);
-        DCHECK_EQ(cu->instruction_set, kThumb2) << reinterpret_cast<void*>(data_target);
+        cg->LoadMethodAddress(target_method.dex_method_index, type, kArg0);
       }
       break;
     default:
@@ -427,18 +411,10 @@
       if (direct_code != 0) {
         if (direct_code != static_cast<unsigned int>(-1)) {
           cg->LoadConstant(cg->TargetReg(kInvokeTgt), direct_code);
-        } else {
+        } else if (cu->instruction_set != kX86) {
           CHECK_EQ(cu->dex_file, target_method.dex_file);
           CHECK_LT(target_method.dex_method_index, target_method.dex_file->NumMethodIds());
-          LIR* data_target = cg->ScanLiteralPool(cg->code_literal_list_,
-                                                 target_method.dex_method_index, 0);
-          if (data_target == NULL) {
-            data_target = cg->AddWordData(&cg->code_literal_list_, target_method.dex_method_index);
-            data_target->operands[1] = type;
-          }
-          LIR* load_pc_rel = cg->OpPcRelLoad(cg->TargetReg(kInvokeTgt), data_target);
-          cg->AppendLIR(load_pc_rel);
-          DCHECK_EQ(cu->instruction_set, kThumb2) << reinterpret_cast<void*>(data_target);
+          cg->LoadCodeAddress(target_method.dex_method_index, type, kInvokeTgt);
         }
       }
       break;
@@ -1001,7 +977,10 @@
   RegLocation rl_obj = info->args[0];
   RegLocation rl_idx = info->args[1];
   rl_obj = LoadValue(rl_obj, kCoreReg);
-  rl_idx = LoadValue(rl_idx, kCoreReg);
+  // X86 wants to avoid putting a constant index into a register.
+  if (!(cu_->instruction_set == kX86 && rl_idx.is_const)) {
+    rl_idx = LoadValue(rl_idx, kCoreReg);
+  }
   int reg_max;
   GenNullCheck(rl_obj.s_reg_low, rl_obj.low_reg, info->opt_flags);
   bool range_check = (!(info->opt_flags & MIR_IGNORE_RANGE_CHECK));
@@ -1025,29 +1004,43 @@
       FreeTemp(reg_max);
       OpCondBranch(kCondUge, launch_pad);
     }
+    OpRegImm(kOpAdd, reg_ptr, data_offset);
   } else {
     if (range_check) {
-      reg_max = AllocTemp();
-      LoadWordDisp(rl_obj.low_reg, count_offset, reg_max);
+      // On x86, we can compare to memory directly
       // Set up a launch pad to allow retry in case of bounds violation */
       launch_pad = RawLIR(0, kPseudoIntrinsicRetry, WrapPointer(info));
       intrinsic_launchpads_.Insert(launch_pad);
-      OpRegReg(kOpCmp, rl_idx.low_reg, reg_max);
-      FreeTemp(reg_max);
-      OpCondBranch(kCondUge, launch_pad);
+      if (rl_idx.is_const) {
+        OpCmpMemImmBranch(kCondUlt, INVALID_REG, rl_obj.low_reg, count_offset,
+                          mir_graph_->ConstantValue(rl_idx.orig_sreg), launch_pad);
+      } else {
+        OpRegMem(kOpCmp, rl_idx.low_reg, rl_obj.low_reg, count_offset);
+        OpCondBranch(kCondUge, launch_pad);
+      }
     }
     reg_off = AllocTemp();
     reg_ptr = AllocTemp();
     LoadWordDisp(rl_obj.low_reg, offset_offset, reg_off);
     LoadWordDisp(rl_obj.low_reg, value_offset, reg_ptr);
   }
-  OpRegImm(kOpAdd, reg_ptr, data_offset);
-  OpRegReg(kOpAdd, reg_off, rl_idx.low_reg);
+  if (rl_idx.is_const) {
+    OpRegImm(kOpAdd, reg_off, mir_graph_->ConstantValue(rl_idx.orig_sreg));
+  } else {
+    OpRegReg(kOpAdd, reg_off, rl_idx.low_reg);
+  }
   FreeTemp(rl_obj.low_reg);
-  FreeTemp(rl_idx.low_reg);
+  if (rl_idx.low_reg != INVALID_REG) {
+    FreeTemp(rl_idx.low_reg);
+  }
   RegLocation rl_dest = InlineTarget(info);
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  LoadBaseIndexed(reg_ptr, reg_off, rl_result.low_reg, 1, kUnsignedHalf);
+  if (cu_->instruction_set != kX86) {
+    LoadBaseIndexed(reg_ptr, reg_off, rl_result.low_reg, 1, kUnsignedHalf);
+  } else {
+    LoadBaseIndexedDisp(reg_ptr, reg_off, 1, data_offset, rl_result.low_reg,
+                        INVALID_REG, kUnsignedHalf, INVALID_SREG);
+  }
   FreeTemp(reg_off);
   FreeTemp(reg_ptr);
   StoreValue(rl_dest, rl_result);
@@ -1094,7 +1087,7 @@
     return false;
   }
   RegLocation rl_src_i = info->args[0];
-  RegLocation rl_dest = InlineTarget(info);  // result reg
+  RegLocation rl_dest = (size == kLong) ? InlineTargetWide(info) : InlineTarget(info);  // result reg
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
   if (size == kLong) {
     RegLocation rl_i = LoadValueWide(rl_src_i, kCoreReg);
@@ -1179,6 +1172,43 @@
   }
 }
 
+bool Mir2Lir::GenInlinedAbsFloat(CallInfo* info) {
+  if (cu_->instruction_set == kMips) {
+    // TODO - add Mips implementation
+    return false;
+  }
+  RegLocation rl_src = info->args[0];
+  rl_src = LoadValue(rl_src, kCoreReg);
+  RegLocation rl_dest = InlineTarget(info);
+  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+  int signMask = AllocTemp();
+  LoadConstant(signMask, 0x7fffffff);
+  OpRegRegReg(kOpAnd, rl_result.low_reg, rl_src.low_reg, signMask);
+  FreeTemp(signMask);
+  StoreValue(rl_dest, rl_result);
+  return true;
+}
+
+bool Mir2Lir::GenInlinedAbsDouble(CallInfo* info) {
+  if (cu_->instruction_set == kMips) {
+    // TODO - add Mips implementation
+    return false;
+  }
+  RegLocation rl_src = info->args[0];
+  rl_src = LoadValueWide(rl_src, kCoreReg);
+  RegLocation rl_dest = InlineTargetWide(info);
+  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+  OpRegCopyWide(rl_result.low_reg, rl_result.high_reg, rl_src.low_reg, rl_src.high_reg);
+  FreeTemp(rl_src.low_reg);
+  FreeTemp(rl_src.high_reg);
+  int signMask = AllocTemp();
+  LoadConstant(signMask, 0x7fffffff);
+  OpRegReg(kOpAnd, rl_result.high_reg, signMask);
+  FreeTemp(signMask);
+  StoreValueWide(rl_dest, rl_result);
+  return true;
+}
+
 bool Mir2Lir::GenInlinedFloatCvt(CallInfo* info) {
   if (cu_->instruction_set == kMips) {
     // TODO - add Mips implementation
@@ -1308,7 +1338,7 @@
   RegLocation rl_src_obj = info->args[1];  // Object
   RegLocation rl_src_offset = info->args[2];  // long low
   rl_src_offset.wide = 0;  // ignore high half in info->args[3]
-  RegLocation rl_dest = InlineTarget(info);  // result reg
+  RegLocation rl_dest = is_long ? InlineTargetWide(info) : InlineTarget(info);  // result reg
   if (is_volatile) {
     GenMemBarrier(kLoadLoad);
   }
@@ -1436,8 +1466,15 @@
     call_inst = OpReg(kOpBlx, TargetReg(kInvokeTgt));
   } else {
     if (fast_path) {
-      call_inst = OpMem(kOpBlx, TargetReg(kArg0),
-                        mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().Int32Value());
+      if (direct_code == static_cast<unsigned int>(-1)) {
+        // We can have the linker fixup a call relative.
+        call_inst =
+          reinterpret_cast<X86Mir2Lir*>(this)->CallWithLinkerFixup(
+              target_method.dex_method_index, info->type);
+      } else {
+        call_inst = OpMem(kOpBlx, TargetReg(kArg0),
+                          mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().Int32Value());
+      }
     } else {
       ThreadOffset trampoline(-1);
       switch (info->type) {
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index 3a68044..6115953 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -417,7 +417,7 @@
     bool EvaluateBranch(Instruction::Code opcode, int src1, int src2);
     bool IsInexpensiveConstant(RegLocation rl_src);
     ConditionCode FlipComparisonOrder(ConditionCode before);
-    void InstallLiteralPools();
+    virtual void InstallLiteralPools();
     void InstallSwitchTables();
     void InstallFillArrayData();
     bool VerifyCatchEntries();
@@ -663,6 +663,8 @@
     bool GenInlinedReverseBytes(CallInfo* info, OpSize size);
     bool GenInlinedAbsInt(CallInfo* info);
     bool GenInlinedAbsLong(CallInfo* info);
+    bool GenInlinedAbsFloat(CallInfo* info);
+    bool GenInlinedAbsDouble(CallInfo* info);
     bool GenInlinedFloatCvt(CallInfo* info);
     bool GenInlinedDoubleCvt(CallInfo* info);
     bool GenInlinedIndexOf(CallInfo* info, bool zero_based);
@@ -737,6 +739,34 @@
     void SpecialMIR2LIR(const InlineMethod& special);
     void MethodMIR2LIR();
 
+    /*
+     * @brief Load the address of the dex method into the register.
+     * @param dex_method_index The index of the method to be invoked.
+     * @param type How the method will be invoked.
+     * @param register that will contain the code address.
+     * @note register will be passed to TargetReg to get physical register.
+     */
+    void LoadCodeAddress(int dex_method_index, InvokeType type,
+                         SpecialTargetRegister symbolic_reg);
+
+    /*
+     * @brief Load the Method* of a dex method into the register.
+     * @param dex_method_index The index of the method to be invoked.
+     * @param type How the method will be invoked.
+     * @param register that will contain the code address.
+     * @note register will be passed to TargetReg to get physical register.
+     */
+    virtual void LoadMethodAddress(int dex_method_index, InvokeType type,
+                                   SpecialTargetRegister symbolic_reg);
+
+    /*
+     * @brief Load the Class* of a Dex Class type into the register.
+     * @param type How the method will be invoked.
+     * @param register that will contain the code address.
+     * @note register will be passed to TargetReg to get physical register.
+     */
+    virtual void LoadClassType(uint32_t type_idx, SpecialTargetRegister symbolic_reg);
+
     // Routines that work for the generic case, but may be overriden by target.
     /*
      * @brief Compare memory to immediate, and branch if condition true.
@@ -1046,13 +1076,13 @@
 
     void AddSlowPath(LIRSlowPath* slowpath);
 
-  private:
-    void GenInstanceofCallingHelper(bool needs_access_check, bool type_known_final,
-                                    bool type_known_abstract, bool use_declaring_class,
-                                    bool can_assume_type_is_in_dex_cache,
-                                    uint32_t type_idx, RegLocation rl_dest,
-                                    RegLocation rl_src);
+    virtual void GenInstanceofCallingHelper(bool needs_access_check, bool type_known_final,
+                                            bool type_known_abstract, bool use_declaring_class,
+                                            bool can_assume_type_is_in_dex_cache,
+                                            uint32_t type_idx, RegLocation rl_dest,
+                                            RegLocation rl_src);
 
+  private:
     void ClobberBody(RegisterInfo* p);
     void ResetDefBody(RegisterInfo* p) {
       p->def_start = NULL;
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
index ae53ddb..321c6a7 100644
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ b/compiler/dex/quick/x86/assemble_x86.cc
@@ -354,6 +354,7 @@
   { kX86CallM, kCall, IS_BINARY_OP | IS_BRANCH | IS_LOAD | REG_USE0,        { 0,             0, 0xFF, 0,    0, 2, 0, 0 }, "CallM", "[!0r+!1d]" },
   { kX86CallA, kCall, IS_QUAD_OP   | IS_BRANCH | IS_LOAD | REG_USE01,       { 0,             0, 0xFF, 0,    0, 2, 0, 0 }, "CallA", "[!0r+!1r<<!2d+!3d]" },
   { kX86CallT, kCall, IS_UNARY_OP  | IS_BRANCH | IS_LOAD,                   { THREAD_PREFIX, 0, 0xFF, 0,    0, 2, 0, 0 }, "CallT", "fs:[!0d]" },
+  { kX86CallI, kCall, IS_UNARY_OP  | IS_BRANCH,                             { 0,             0, 0xE8, 0,    0, 0, 0, 4 }, "CallI", "!0d" },
   { kX86Ret,   kNullary, NO_OPERAND | IS_BRANCH,                            { 0,             0, 0xC3, 0,    0, 0, 0, 0 }, "Ret", "" },
 
   { kX86StartOfMethod, kMacro,  IS_UNARY_OP | SETS_CCODES,             { 0, 0, 0,    0, 0, 0, 0, 0 }, "StartOfMethod", "!0r" },
@@ -494,6 +495,7 @@
       }
     case kCall:
       switch (lir->opcode) {
+        case kX86CallI: return 5;  // opcode 0:disp
         case kX86CallR: return 2;  // opcode modrm
         case kX86CallM:  // lir operands - 0: base, 1: disp
           return ComputeSize(entry, lir->operands[0], lir->operands[1], false);
@@ -985,6 +987,16 @@
   DCHECK_EQ(0, entry->skeleton.immediate_bytes);
 }
 
+void X86Mir2Lir::EmitCallImmediate(const X86EncodingMap* entry, int disp) {
+  EmitPrefixAndOpcode(entry);
+  DCHECK_EQ(4, entry->skeleton.immediate_bytes);
+  code_buffer_.push_back(disp & 0xFF);
+  code_buffer_.push_back((disp >> 8) & 0xFF);
+  code_buffer_.push_back((disp >> 16) & 0xFF);
+  code_buffer_.push_back((disp >> 24) & 0xFF);
+  DCHECK_EQ(0, entry->skeleton.ax_opcode);
+}
+
 void X86Mir2Lir::EmitCallThread(const X86EncodingMap* entry, int disp) {
   DCHECK_NE(entry->skeleton.prefix1, 0);
   EmitPrefixAndOpcode(entry);
@@ -1290,6 +1302,9 @@
         break;
       case kCall:
         switch (entry->opcode) {
+          case kX86CallI:  // lir operands - 0: disp
+            EmitCallImmediate(entry, lir->operands[0]);
+            break;
           case kX86CallM:  // lir operands - 0: base, 1: disp
             EmitCallMem(entry, lir->operands[0], lir->operands[1]);
             break;
@@ -1375,6 +1390,13 @@
  */
 void X86Mir2Lir::AssembleLIR() {
   cu_->NewTimingSplit("Assemble");
+
+  // We will remove the method address if we never ended up using it
+  if (store_method_addr_ && !store_method_addr_used_) {
+    setup_method_address_[0]->flags.is_nop = true;
+    setup_method_address_[1]->flags.is_nop = true;
+  }
+
   AssignOffsets();
   int assembler_retries = 0;
   /*
diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc
index 93875c9..7f646e0 100644
--- a/compiler/dex/quick/x86/call_x86.cc
+++ b/compiler/dex/quick/x86/call_x86.cc
@@ -93,6 +93,7 @@
     RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low);
     rl_method = LoadValue(rl_method, kCoreReg);
     start_of_method_reg = rl_method.low_reg;
+    store_method_addr_used_ = true;
   } else {
     start_of_method_reg = AllocTemp();
     NewLIR1(kX86StartOfMethod, start_of_method_reg);
@@ -155,6 +156,7 @@
     // We can use the saved value.
     RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low);
     LoadValueDirect(rl_method, rX86_ARG2);
+    store_method_addr_used_ = true;
   } else {
     NewLIR1(kX86StartOfMethod, rX86_ARG2);
   }
@@ -228,9 +230,9 @@
 
   if (base_of_code_ != nullptr) {
     // We have been asked to save the address of the method start for later use.
-    NewLIR1(kX86StartOfMethod, rX86_ARG0);
+    setup_method_address_[0] = NewLIR1(kX86StartOfMethod, rX86_ARG0);
     int displacement = SRegOffset(base_of_code_->s_reg_low);
-    StoreBaseDisp(rX86_SP, displacement, rX86_ARG0, kWord);
+    setup_method_address_[1] = StoreBaseDisp(rX86_SP, displacement, rX86_ARG0, kWord);
   }
 
   FreeTemp(rX86_ARG0);
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index 4c1c171..22e36d5 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -189,6 +189,24 @@
      */
     void GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx,
                             RegLocation rl_dest, RegLocation rl_src);
+    /*
+     *
+     * @brief Implement Set up instanceof a class with x86 specific code.
+     * @param needs_access_check 'true' if we must check the access.
+     * @param type_known_final 'true' if the type is known to be a final class.
+     * @param type_known_abstract 'true' if the type is known to be an abstract class.
+     * @param use_declaring_class 'true' if the type can be loaded off the current Method*.
+     * @param can_assume_type_is_in_dex_cache 'true' if the type is known to be in the cache.
+     * @param type_idx Type index to use if use_declaring_class is 'false'.
+     * @param rl_dest Result to be set to 0 or 1.
+     * @param rl_src Object to be tested.
+     */
+    void GenInstanceofCallingHelper(bool needs_access_check, bool type_known_final,
+                                    bool type_known_abstract, bool use_declaring_class,
+                                    bool can_assume_type_is_in_dex_cache,
+                                    uint32_t type_idx, RegLocation rl_dest,
+                                    RegLocation rl_src);
+
     // Single operation generators.
     LIR* OpUnconditionalBranch(LIR* target);
     LIR* OpCmpBranch(ConditionCode cond, int src1, int src2, LIR* target);
@@ -245,6 +263,43 @@
     void GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest,
                        RegLocation rl_lhs, RegLocation rl_rhs);
 
+    /*
+     * @brief Dump a RegLocation using printf
+     * @param loc Register location to dump
+     */
+    static void DumpRegLocation(RegLocation loc);
+
+    /*
+     * @brief Load the Method* of a dex method into the register.
+     * @param dex_method_index The index of the method to be invoked.
+     * @param type How the method will be invoked.
+     * @param register that will contain the code address.
+     * @note register will be passed to TargetReg to get physical register.
+     */
+    void LoadMethodAddress(int dex_method_index, InvokeType type,
+                           SpecialTargetRegister symbolic_reg);
+
+    /*
+     * @brief Load the Class* of a Dex Class type into the register.
+     * @param type How the method will be invoked.
+     * @param register that will contain the code address.
+     * @note register will be passed to TargetReg to get physical register.
+     */
+    void LoadClassType(uint32_t type_idx, SpecialTargetRegister symbolic_reg);
+
+    /*
+     * @brief Generate a relative call to the method that will be patched at link time.
+     * @param dex_method_index The index of the method to be invoked.
+     * @param type How the method will be invoked.
+     * @returns Call instruction
+     */
+    LIR * CallWithLinkerFixup(int dex_method_index, InvokeType type);
+
+    /*
+     * @brief Handle x86 specific literals
+     */
+    void InstallLiteralPools();
+
   private:
     void EmitPrefix(const X86EncodingMap* entry);
     void EmitOpcode(const X86EncodingMap* entry);
@@ -290,6 +345,7 @@
     void EmitJmp(const X86EncodingMap* entry, int rel);
     void EmitJcc(const X86EncodingMap* entry, int rel, uint8_t cc);
     void EmitCallMem(const X86EncodingMap* entry, uint8_t base, int disp);
+    void EmitCallImmediate(const X86EncodingMap* entry, int disp);
     void EmitCallThread(const X86EncodingMap* entry, int disp);
     void EmitPcRel(const X86EncodingMap* entry, uint8_t reg, int base_or_table, uint8_t index,
                    int scale, int table_or_disp);
@@ -330,12 +386,6 @@
      */
     bool IsNoOp(Instruction::Code op, int32_t value);
 
-    /*
-     * @brief Dump a RegLocation using printf
-     * @param loc Register location to dump
-     */
-    static void DumpRegLocation(RegLocation loc);
-
     /**
      * @brief Calculate magic number and shift for a given divisor
      * @param divisor divisor number for calculation
@@ -459,11 +509,26 @@
 
     // Information derived from analysis of MIR
 
+    // The compiler temporary for the code address of the method.
+    CompilerTemp *base_of_code_;
+
     // Have we decided to compute a ptr to code and store in temporary VR?
     bool store_method_addr_;
 
-    // The compiler temporary for the code address of the method.
-    CompilerTemp *base_of_code_;
+    // Have we used the stored method address?
+    bool store_method_addr_used_;
+
+    // Instructions to remove if we didn't use the stored method address.
+    LIR* setup_method_address_[2];
+
+    // Instructions needing patching with Method* values.
+    GrowableArray<LIR*> method_address_insns_;
+
+    // Instructions needing patching with Class Type* values.
+    GrowableArray<LIR*> class_type_address_insns_;
+
+    // Instructions needing patching with PC relative code addresses.
+    GrowableArray<LIR*> call_method_insns_;
 };
 
 }  // namespace art
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index a567a8a..9dd6116 100644
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -670,7 +670,7 @@
 bool X86Mir2Lir::GenInlinedPeek(CallInfo* info, OpSize size) {
   RegLocation rl_src_address = info->args[0];  // long address
   rl_src_address.wide = 0;  // ignore high half in info->args[1]
-  RegLocation rl_dest = InlineTarget(info);
+  RegLocation rl_dest = size == kLong ? InlineTargetWide(info) : InlineTarget(info);
   RegLocation rl_address = LoadValue(rl_src_address, kCoreReg);
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
   if (size == kLong) {
@@ -780,8 +780,23 @@
 }
 
 LIR* X86Mir2Lir::OpPcRelLoad(int reg, LIR* target) {
-  LOG(FATAL) << "Unexpected use of OpPcRelLoad for x86";
-  return NULL;
+  CHECK(base_of_code_ != nullptr);
+
+  // Address the start of the method
+  RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low);
+  LoadValueDirectFixed(rl_method, reg);
+  store_method_addr_used_ = true;
+
+  // Load the proper value from the literal area.
+  // We don't know the proper offset for the value, so pick one that will force
+  // 4 byte offset.  We will fix this up in the assembler later to have the right
+  // value.
+  LIR *res = RawLIR(current_dalvik_offset_, kX86Mov32RM, reg, reg, 256, 0, 0, target);
+  res->target = target;
+  res->flags.fixup = kFixupLoad;
+  SetMemRefType(res, true, kLiteral);
+  store_method_addr_used_ = true;
+  return res;
 }
 
 LIR* X86Mir2Lir::OpVldm(int rBase, int count) {
@@ -1717,6 +1732,88 @@
   StoreValue(rl_dest, rl_result);
 }
 
+void X86Mir2Lir::GenInstanceofCallingHelper(bool needs_access_check, bool type_known_final,
+                                            bool type_known_abstract, bool use_declaring_class,
+                                            bool can_assume_type_is_in_dex_cache,
+                                            uint32_t type_idx, RegLocation rl_dest,
+                                            RegLocation rl_src) {
+  FlushAllRegs();
+  // May generate a call - use explicit registers.
+  LockCallTemps();
+  LoadCurrMethodDirect(TargetReg(kArg1));  // kArg1 gets current Method*.
+  int class_reg = TargetReg(kArg2);  // kArg2 will hold the Class*.
+  // Reference must end up in kArg0.
+  if (needs_access_check) {
+    // Check we have access to type_idx and if not throw IllegalAccessError,
+    // Caller function returns Class* in kArg0.
+    CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(pInitializeTypeAndVerifyAccess),
+                         type_idx, true);
+    OpRegCopy(class_reg, TargetReg(kRet0));
+    LoadValueDirectFixed(rl_src, TargetReg(kArg0));
+  } else if (use_declaring_class) {
+    LoadValueDirectFixed(rl_src, TargetReg(kArg0));
+    LoadWordDisp(TargetReg(kArg1),
+                 mirror::ArtMethod::DeclaringClassOffset().Int32Value(), class_reg);
+  } else {
+    // Load dex cache entry into class_reg (kArg2).
+    LoadValueDirectFixed(rl_src, TargetReg(kArg0));
+    LoadWordDisp(TargetReg(kArg1),
+                 mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(), class_reg);
+    int32_t offset_of_type =
+        mirror::Array::DataOffset(sizeof(mirror::Class*)).Int32Value() + (sizeof(mirror::Class*)
+        * type_idx);
+    LoadWordDisp(class_reg, offset_of_type, class_reg);
+    if (!can_assume_type_is_in_dex_cache) {
+      // Need to test presence of type in dex cache at runtime.
+      LIR* hop_branch = OpCmpImmBranch(kCondNe, class_reg, 0, NULL);
+      // Type is not resolved. Call out to helper, which will return resolved type in kRet0/kArg0.
+      CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(pInitializeType), type_idx, true);
+      OpRegCopy(TargetReg(kArg2), TargetReg(kRet0));  // Align usage with fast path.
+      LoadValueDirectFixed(rl_src, TargetReg(kArg0));  /* Reload Ref. */
+      // Rejoin code paths
+      LIR* hop_target = NewLIR0(kPseudoTargetLabel);
+      hop_branch->target = hop_target;
+    }
+  }
+  /* kArg0 is ref, kArg2 is class. If ref==null, use directly as bool result. */
+  RegLocation rl_result = GetReturn(false);
+
+  // SETcc only works with EAX..EDX.
+  DCHECK_LT(rl_result.low_reg, 4);
+
+  // Is the class NULL?
+  LIR* branch1 = OpCmpImmBranch(kCondEq, TargetReg(kArg0), 0, NULL);
+
+  /* Load object->klass_. */
+  DCHECK_EQ(mirror::Object::ClassOffset().Int32Value(), 0);
+  LoadWordDisp(TargetReg(kArg0),  mirror::Object::ClassOffset().Int32Value(), TargetReg(kArg1));
+  /* kArg0 is ref, kArg1 is ref->klass_, kArg2 is class. */
+  LIR* branchover = nullptr;
+  if (type_known_final) {
+    // Ensure top 3 bytes of result are 0.
+    LoadConstant(rl_result.low_reg, 0);
+    OpRegReg(kOpCmp, TargetReg(kArg1), TargetReg(kArg2));
+    // Set the low byte of the result to 0 or 1 from the compare condition code.
+    NewLIR2(kX86Set8R, rl_result.low_reg, kX86CondEq);
+  } else {
+    if (!type_known_abstract) {
+      LoadConstant(rl_result.low_reg, 1);     // Assume result succeeds.
+      branchover = OpCmpBranch(kCondEq, TargetReg(kArg1), TargetReg(kArg2), NULL);
+    }
+    OpRegCopy(TargetReg(kArg0), TargetReg(kArg2));
+    OpThreadMem(kOpBlx, QUICK_ENTRYPOINT_OFFSET(pInstanceofNonTrivial));
+  }
+  // TODO: only clobber when type isn't final?
+  ClobberCallerSave();
+  /* Branch targets here. */
+  LIR* target = NewLIR0(kPseudoTargetLabel);
+  StoreValue(rl_dest, rl_result);
+  branch1->target = target;
+  if (branchover != nullptr) {
+    branchover->target = target;
+  }
+}
+
 void X86Mir2Lir::GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest,
                             RegLocation rl_lhs, RegLocation rl_rhs) {
   OpKind op = kOpBkpt;
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index a347d8b..1893ffc 100644
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -510,7 +510,11 @@
 }
 
 X86Mir2Lir::X86Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena)
-    : Mir2Lir(cu, mir_graph, arena) {
+    : Mir2Lir(cu, mir_graph, arena),
+      method_address_insns_(arena, 100, kGrowableArrayMisc),
+      class_type_address_insns_(arena, 100, kGrowableArrayMisc),
+      call_method_insns_(arena, 100, kGrowableArrayMisc) {
+  store_method_addr_used_ = false;
   for (int i = 0; i < kX86Last; i++) {
     if (X86Mir2Lir::EncodingMap[i].opcode != i) {
       LOG(FATAL) << "Encoding order for " << X86Mir2Lir::EncodingMap[i].name
@@ -816,4 +820,104 @@
   Mir2Lir::Materialize();
 }
 
+void X86Mir2Lir::LoadMethodAddress(int dex_method_index, InvokeType type,
+                                   SpecialTargetRegister symbolic_reg) {
+  /*
+   * For x86, just generate a 32 bit move immediate instruction, that will be filled
+   * in at 'link time'.  For now, put a unique value based on target to ensure that
+   * code deduplication works.
+   */
+  const DexFile::MethodId& id = cu_->dex_file->GetMethodId(dex_method_index);
+  uintptr_t ptr = reinterpret_cast<uintptr_t>(&id);
+
+  // Generate the move instruction with the unique pointer and save index and type.
+  LIR *move = RawLIR(current_dalvik_offset_, kX86Mov32RI, TargetReg(symbolic_reg),
+                     static_cast<int>(ptr), dex_method_index, type);
+  AppendLIR(move);
+  method_address_insns_.Insert(move);
+}
+
+void X86Mir2Lir::LoadClassType(uint32_t type_idx, SpecialTargetRegister symbolic_reg) {
+  /*
+   * For x86, just generate a 32 bit move immediate instruction, that will be filled
+   * in at 'link time'.  For now, put a unique value based on target to ensure that
+   * code deduplication works.
+   */
+  const DexFile::TypeId& id = cu_->dex_file->GetTypeId(type_idx);
+  uintptr_t ptr = reinterpret_cast<uintptr_t>(&id);
+
+  // Generate the move instruction with the unique pointer and save index and type.
+  LIR *move = RawLIR(current_dalvik_offset_, kX86Mov32RI, TargetReg(symbolic_reg),
+                     static_cast<int>(ptr), type_idx);
+  AppendLIR(move);
+  class_type_address_insns_.Insert(move);
+}
+
+LIR *X86Mir2Lir::CallWithLinkerFixup(int dex_method_index, InvokeType type) {
+  /*
+   * For x86, just generate a 32 bit call relative instruction, that will be filled
+   * in at 'link time'.  For now, put a unique value based on target to ensure that
+   * code deduplication works.
+   */
+  const DexFile::MethodId& id = cu_->dex_file->GetMethodId(dex_method_index);
+  uintptr_t ptr = reinterpret_cast<uintptr_t>(&id);
+
+  // Generate the call instruction with the unique pointer and save index and type.
+  LIR *call = RawLIR(current_dalvik_offset_, kX86CallI, static_cast<int>(ptr), dex_method_index,
+                     type);
+  AppendLIR(call);
+  call_method_insns_.Insert(call);
+  return call;
+}
+
+void X86Mir2Lir::InstallLiteralPools() {
+  // These are handled differently for x86.
+  DCHECK(code_literal_list_ == nullptr);
+  DCHECK(method_literal_list_ == nullptr);
+  DCHECK(class_literal_list_ == nullptr);
+
+  // Handle the fixups for methods.
+  for (uint32_t i = 0; i < method_address_insns_.Size(); i++) {
+      LIR* p = method_address_insns_.Get(i);
+      DCHECK_EQ(p->opcode, kX86Mov32RI);
+      uint32_t target = p->operands[2];
+
+      // The offset to patch is the last 4 bytes of the instruction.
+      int patch_offset = p->offset + p->flags.size - 4;
+      cu_->compiler_driver->AddMethodPatch(cu_->dex_file, cu_->class_def_idx,
+                                           cu_->method_idx, cu_->invoke_type,
+                                           target, static_cast<InvokeType>(p->operands[3]),
+                                           patch_offset);
+  }
+
+  // Handle the fixups for class types.
+  for (uint32_t i = 0; i < class_type_address_insns_.Size(); i++) {
+      LIR* p = class_type_address_insns_.Get(i);
+      DCHECK_EQ(p->opcode, kX86Mov32RI);
+      uint32_t target = p->operands[2];
+
+      // The offset to patch is the last 4 bytes of the instruction.
+      int patch_offset = p->offset + p->flags.size - 4;
+      cu_->compiler_driver->AddClassPatch(cu_->dex_file, cu_->class_def_idx,
+                                          cu_->method_idx, target, patch_offset);
+  }
+
+  // And now the PC-relative calls to methods.
+  for (uint32_t i = 0; i < call_method_insns_.Size(); i++) {
+      LIR* p = call_method_insns_.Get(i);
+      DCHECK_EQ(p->opcode, kX86CallI);
+      uint32_t target = p->operands[1];
+
+      // The offset to patch is the last 4 bytes of the instruction.
+      int patch_offset = p->offset + p->flags.size - 4;
+      cu_->compiler_driver->AddRelativeCodePatch(cu_->dex_file, cu_->class_def_idx,
+                                                 cu_->method_idx, cu_->invoke_type, target,
+                                                 static_cast<InvokeType>(p->operands[2]),
+                                                 patch_offset, -4 /* offset */);
+  }
+
+  // And do the normal processing.
+  Mir2Lir::InstallLiteralPools();
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc
index e2744d0..48a39bb 100644
--- a/compiler/dex/quick/x86/utility_x86.cc
+++ b/compiler/dex/quick/x86/utility_x86.cc
@@ -518,8 +518,7 @@
         res->target = data_target;
         res->flags.fixup = kFixupLoad;
         SetMemRefType(res, true, kLiteral);
-        // Redo after we assign target to ensure size is correct.
-        SetupResourceMasks(res);
+        store_method_addr_used_ = true;
       } else {
         if (val_lo == 0) {
           res = NewLIR2(kX86XorpsRR, r_dest_lo, r_dest_lo);
@@ -860,6 +859,7 @@
     case Instruction::REM_DOUBLE_2ADDR:
       AnalyzeFPInstruction(opcode, bb, mir);
       break;
+
     // Packed switches and array fills need a pointer to the base of the method.
     case Instruction::FILL_ARRAY_DATA:
     case Instruction::PACKED_SWITCH:
diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h
index 6962ff7..8c385a1 100644
--- a/compiler/dex/quick/x86/x86_lir.h
+++ b/compiler/dex/quick/x86/x86_lir.h
@@ -22,54 +22,53 @@
 namespace art {
 
 /*
- * Runtime register conventions. We consider both x86, x86-64 and x32 (32bit mode x86-64), although
- * we currently only target x86. The ABI has different conventions and we hope to have a single
- * convention to simplify code generation. Changing something that is callee save and making it
- * caller save places a burden on up-calls to save/restore the callee save register, however, there
- * are few registers that are callee save in the ABI. Changing something that is caller save and
- * making it callee save places a burden on down-calls to save/restore the callee save register.
- * For these reasons we aim to match native conventions for caller and callee save. The first 4
- * registers can be used for byte operations, for this reason they are preferred for temporary
- * scratch registers.
+ * Runtime register conventions. We consider both x86, x86-64 and x32 (32bit mode x86-64). The ABI
+ * has different conventions and we capture those here. Changing something that is callee save and
+ * making it caller save places a burden on up-calls to save/restore the callee save register,
+ * however, there are few registers that are callee save in the ABI. Changing something that is
+ * caller save and making it callee save places a burden on down-calls to save/restore the callee
+ * save register. For these reasons we aim to match native conventions for caller and callee save.
+ * On x86 only the first 4 registers can be used for byte operations, for this reason they are
+ * preferred for temporary scratch registers.
  *
  * General Purpose Register:
- *  Native: x86         | x86-64 / x32      | ART
- *  r0/eax: caller save | caller save       | caller, Method*, scratch, return value
- *  r1/ecx: caller save | caller save, arg4 | caller, arg1, scratch
- *  r2/edx: caller save | caller save, arg3 | caller, arg2, scratch, high half of long return
- *  r3/ebx: callEE save | callEE save       | callER, arg3, scratch
+ *  Native: x86    | x86-64 / x32 | ART x86                                         | ART x86-64
+ *  r0/eax: caller | caller       | caller, Method*, scratch, return value          | caller, scratch, return value
+ *  r1/ecx: caller | caller, arg4 | caller, arg1, scratch                           | caller, arg3, scratch
+ *  r2/edx: caller | caller, arg3 | caller, arg2, scratch, high half of long return | caller, arg2, scratch
+ *  r3/ebx: callEE | callEE       | callER, arg3, scratch                           | callee, promotable
  *  r4/esp: stack pointer
- *  r5/ebp: callee save | callee save       | callee, available for dalvik register promotion
- *  r6/esi: callEE save | callER save, arg2 | callee, available for dalvik register promotion
- *  r7/edi: callEE save | callER save, arg1 | callee, available for dalvik register promotion
+ *  r5/ebp: callee | callee       | callee, promotable                              | callee, promotable
+ *  r6/esi: callEE | callER, arg2 | callee, promotable                              | caller, arg1, scratch
+ *  r7/edi: callEE | callER, arg1 | callee, promotable                              | caller, Method*, scratch
  *  ---  x86-64/x32 registers
  *  Native: x86-64 / x32      | ART
- *  r8:     caller save, arg5 | caller, scratch
- *  r9:     caller save, arg6 | caller, scratch
+ *  r8:     caller save, arg5 | caller, arg4, scratch
+ *  r9:     caller save, arg6 | caller, arg5, scratch
  *  r10:    caller save       | caller, scratch
  *  r11:    caller save       | caller, scratch
- *  r12:    callee save       | callee, available for dalvik register promotion
- *  r13:    callee save       | callee, available for dalvik register promotion
- *  r14:    callee save       | callee, available for dalvik register promotion
- *  r15:    callee save       | callee, available for dalvik register promotion
+ *  r12:    callee save       | callee, available for register promotion (promotable)
+ *  r13:    callee save       | callee, available for register promotion (promotable)
+ *  r14:    callee save       | callee, available for register promotion (promotable)
+ *  r15:    callee save       | callee, available for register promotion (promotable)
  *
  * There is no rSELF, instead on x86 fs: has a base address of Thread::Current, whereas on
  * x86-64/x32 gs: holds it.
  *
  * For floating point we don't support CPUs without SSE2 support (ie newer than PIII):
- *  Native: x86       | x86-64 / x32     | ART
- *  XMM0: caller save |caller save, arg1 | caller, float/double return value (except for native x86 code)
- *  XMM1: caller save |caller save, arg2 | caller, scratch
- *  XMM2: caller save |caller save, arg3 | caller, scratch
- *  XMM3: caller save |caller save, arg4 | caller, scratch
- *  XMM4: caller save |caller save, arg5 | caller, scratch
- *  XMM5: caller save |caller save, arg6 | caller, scratch
- *  XMM6: caller save |caller save, arg7 | caller, scratch
- *  XMM7: caller save |caller save, arg8 | caller, scratch
+ *  Native: x86  | x86-64 / x32 | ART x86                    | ART x86-64
+ *  XMM0: caller | caller, arg1 | caller, float return value | caller, arg1, float return value
+ *  XMM1: caller | caller, arg2 | caller, scratch            | caller, arg2, scratch
+ *  XMM2: caller | caller, arg3 | caller, scratch            | caller, arg3, scratch
+ *  XMM3: caller | caller, arg4 | caller, scratch            | caller, arg4, scratch
+ *  XMM4: caller | caller, arg5 | caller, scratch            | caller, arg5, scratch
+ *  XMM5: caller | caller, arg6 | caller, scratch            | caller, arg6, scratch
+ *  XMM6: caller | caller, arg7 | caller, scratch            | caller, arg7, scratch
+ *  XMM7: caller | caller, arg8 | caller, scratch            | caller, arg8, scratch
  *  ---  x86-64/x32 registers
- *  XMM8 .. 15: caller save
+ *  XMM8 .. 15: caller save available as scratch registers for ART.
  *
- * X87 is a necessary evil outside of ART code:
+ * X87 is a necessary evil outside of ART code for x86:
  *  ST0:  x86 float/double native return value, caller save
  *  ST1 .. ST7: caller save
  *
@@ -387,6 +386,7 @@
   kX86CallA,            // call [base + index * scale + disp]
                         // lir operands - 0: base, 1: index, 2: scale, 3: disp
   kX86CallT,            // call fs:[disp]; fs: is equal to Thread::Current(); lir operands - 0: disp
+  kX86CallI,            // call <relative> - 0: disp; Used for core.oat linking only
   kX86Ret,              // ret; no lir operands
   kX86StartOfMethod,    // call 0; pop reg; sub reg, # - generate start of method into reg
                         // lir operands - 0: reg
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 5b9d66c..402d4f4 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -344,7 +344,7 @@
                                CompilerBackend compiler_backend, InstructionSet instruction_set,
                                InstructionSetFeatures instruction_set_features,
                                bool image, DescriptorSet* image_classes, size_t thread_count,
-                               bool dump_stats)
+                               bool dump_stats, bool dump_passes, CumulativeLogger* timer)
     : verification_results_(verification_results),
       method_inliner_map_(method_inliner_map),
       compiler_backend_(compiler_backend),
@@ -359,13 +359,15 @@
       start_ns_(0),
       stats_(new AOTCompilationStats),
       dump_stats_(dump_stats),
+      dump_passes_(dump_passes),
+      timings_logger_(timer),
       compiler_library_(NULL),
       compiler_(NULL),
       compiler_context_(NULL),
       jni_compiler_(NULL),
       compiler_enable_auto_elf_loading_(NULL),
       compiler_get_method_code_addr_(NULL),
-      support_boot_image_fixup_(instruction_set == kThumb2),
+      support_boot_image_fixup_(instruction_set != kMips),
       dedupe_code_("dedupe code"),
       dedupe_mapping_table_("dedupe mapping table"),
       dedupe_vmap_table_("dedupe vmap table"),
@@ -1396,6 +1398,24 @@
                                                     target_invoke_type,
                                                     literal_offset));
 }
+void CompilerDriver::AddRelativeCodePatch(const DexFile* dex_file,
+                                          uint16_t referrer_class_def_idx,
+                                          uint32_t referrer_method_idx,
+                                          InvokeType referrer_invoke_type,
+                                          uint32_t target_method_idx,
+                                          InvokeType target_invoke_type,
+                                          size_t literal_offset,
+                                          int32_t pc_relative_offset) {
+  MutexLock mu(Thread::Current(), compiled_methods_lock_);
+  code_to_patch_.push_back(new RelativeCallPatchInformation(dex_file,
+                                                            referrer_class_def_idx,
+                                                            referrer_method_idx,
+                                                            referrer_invoke_type,
+                                                            target_method_idx,
+                                                            target_invoke_type,
+                                                            literal_offset,
+                                                            pc_relative_offset));
+}
 void CompilerDriver::AddMethodPatch(const DexFile* dex_file,
                                     uint16_t referrer_class_def_idx,
                                     uint32_t referrer_method_idx,
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index ea43e4f..c4ac9db 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -22,6 +22,7 @@
 #include <vector>
 
 #include "base/mutex.h"
+#include "base/timing_logger.h"
 #include "class_reference.h"
 #include "compiled_class.h"
 #include "compiled_method.h"
@@ -103,7 +104,8 @@
                           CompilerBackend compiler_backend, InstructionSet instruction_set,
                           InstructionSetFeatures instruction_set_features,
                           bool image, DescriptorSet* image_classes,
-                          size_t thread_count, bool dump_stats);
+                          size_t thread_count, bool dump_stats, bool dump_passes,
+                          CumulativeLogger* timer);
 
   ~CompilerDriver();
 
@@ -238,6 +240,15 @@
                     InvokeType target_invoke_type,
                     size_t literal_offset)
       LOCKS_EXCLUDED(compiled_methods_lock_);
+  void AddRelativeCodePatch(const DexFile* dex_file,
+                            uint16_t referrer_class_def_idx,
+                            uint32_t referrer_method_idx,
+                            InvokeType referrer_invoke_type,
+                            uint32_t target_method_idx,
+                            InvokeType target_invoke_type,
+                            size_t literal_offset,
+                            int32_t pc_relative_offset)
+      LOCKS_EXCLUDED(compiled_methods_lock_);
   void AddMethodPatch(const DexFile* dex_file,
                       uint16_t referrer_class_def_idx,
                       uint32_t referrer_method_idx,
@@ -293,6 +304,15 @@
 
   class CallPatchInformation;
   class TypePatchInformation;
+
+  bool GetDumpPasses() const {
+    return dump_passes_;
+  }
+
+  CumulativeLogger& GetTimingsLogger() const {
+    return *timings_logger_;
+  }
+
   class PatchInformation {
    public:
     const DexFile& GetDexFile() const {
@@ -362,8 +382,14 @@
     bool IsCall() const {
       return true;
     }
+    virtual bool IsRelative() const {
+      return false;
+    }
+    virtual int RelativeOffset() const {
+      return 0;
+    }
 
-   private:
+   protected:
     CallPatchInformation(const DexFile* dex_file,
                          uint16_t referrer_class_def_idx,
                          uint32_t referrer_method_idx,
@@ -378,6 +404,7 @@
           target_invoke_type_(target_invoke_type) {
     }
 
+   private:
     const InvokeType referrer_invoke_type_;
     const uint32_t target_method_idx_;
     const InvokeType target_invoke_type_;
@@ -386,6 +413,36 @@
     DISALLOW_COPY_AND_ASSIGN(CallPatchInformation);
   };
 
+  class RelativeCallPatchInformation : public CallPatchInformation {
+   public:
+    bool IsRelative() const {
+      return true;
+    }
+    int RelativeOffset() const {
+      return offset_;
+    }
+
+   private:
+    RelativeCallPatchInformation(const DexFile* dex_file,
+                                 uint16_t referrer_class_def_idx,
+                                 uint32_t referrer_method_idx,
+                                 InvokeType referrer_invoke_type,
+                                 uint32_t target_method_idx,
+                                 InvokeType target_invoke_type,
+                                 size_t literal_offset,
+                                 int32_t pc_relative_offset)
+        : CallPatchInformation(dex_file, referrer_class_def_idx,
+                           referrer_method_idx, referrer_invoke_type,
+                           target_method_idx, target_invoke_type, literal_offset),
+          offset_(pc_relative_offset) {
+    }
+
+    const int offset_;
+
+    friend class CompilerDriver;
+    DISALLOW_COPY_AND_ASSIGN(RelativeCallPatchInformation);
+  };
+
   class TypePatchInformation : public PatchInformation {
    public:
     uint32_t GetTargetTypeIdx() const {
@@ -535,6 +592,9 @@
   UniquePtr<AOTCompilationStats> stats_;
 
   bool dump_stats_;
+  const bool dump_passes_;
+
+  CumulativeLogger* const timings_logger_;
 
   typedef void (*CompilerCallbackFn)(CompilerDriver& driver);
   typedef MutexLock* (*CompilerMutexLockFn)(CompilerDriver& driver);
diff --git a/compiler/elf_writer_mclinker.cc b/compiler/elf_writer_mclinker.cc
index cdfb881..c7baf4f 100644
--- a/compiler/elf_writer_mclinker.cc
+++ b/compiler/elf_writer_mclinker.cc
@@ -265,12 +265,12 @@
   added_symbols_.Put(&symbol, &symbol);
 
   // Add input to supply code for symbol
-  const std::vector<uint8_t>& code = compiled_code.GetCode();
+  const std::vector<uint8_t>* code = compiled_code.GetPortableCode();
   // TODO: ownership of code_input?
   // TODO: why does IRBuilder::ReadInput take a non-const pointer?
   mcld::Input* code_input = ir_builder_->ReadInput(symbol,
-                                                   const_cast<uint8_t*>(&code[0]),
-                                                   code.size());
+                                                   const_cast<uint8_t*>(&(*code)[0]),
+                                                   code->size());
   CHECK(code_input != NULL);
 }
 
@@ -376,7 +376,7 @@
           (!method->IsStatic() ||
            method->IsConstructor() ||
            method->GetDeclaringClass()->IsInitialized())) {
-        method->SetOatCodeOffset(offset);
+        method->SetPortableOatCodeOffset(offset);
       }
     }
     it.Next();
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index 67cd51b..e5dfb9d 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -785,7 +785,19 @@
     uintptr_t quick_code = reinterpret_cast<uintptr_t>(class_linker->GetQuickOatCodeFor(target));
     uintptr_t code_base = reinterpret_cast<uintptr_t>(&oat_file_->GetOatHeader());
     uintptr_t code_offset = quick_code - code_base;
-    SetPatchLocation(patch, PointerToLowMemUInt32(GetOatAddress(code_offset)));
+    if (patch->IsRelative()) {
+      // value to patch is relative to the location being patched
+      const void* quick_oat_code =
+        class_linker->GetQuickOatCodeFor(patch->GetDexFile(),
+                                         patch->GetReferrerClassDefIdx(),
+                                         patch->GetReferrerMethodIdx());
+      uintptr_t base = reinterpret_cast<uintptr_t>(quick_oat_code);
+      uintptr_t patch_location = base + patch->GetLiteralOffset();
+      uintptr_t value = quick_code - patch_location + patch->RelativeOffset();
+      SetPatchLocation(patch, value);
+    } else {
+      SetPatchLocation(patch, PointerToLowMemUInt32(GetOatAddress(code_offset)));
+    }
   }
 
   const CallPatches& methods_to_patch = compiler_driver_.GetMethodsToPatch();
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index b3070b6..f6b511c 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -91,10 +91,12 @@
   verification_results_.reset(new VerificationResults);
   method_inliner_map_.reset(compiler_backend == kQuick ? new DexFileToMethodInlinerMap : nullptr);
   callbacks_.Reset(verification_results_.get(), method_inliner_map_.get());
+  timer_.reset(new CumulativeLogger("Compilation times"));
   compiler_driver_.reset(new CompilerDriver(verification_results_.get(),
                                             method_inliner_map_.get(),
                                             compiler_backend, insn_set,
-                                            insn_features, false, NULL, 2, true));
+                                            insn_features, false, NULL, 2, true, true,
+                                            timer_.get()));
   jobject class_loader = NULL;
   if (kCompile) {
     TimingLogger timings("OatTest::WriteRead", false, false);
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 90eea5e..98c64aa 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -253,7 +253,9 @@
                                       bool image,
                                       UniquePtr<CompilerDriver::DescriptorSet>& image_classes,
                                       bool dump_stats,
-                                      TimingLogger& timings) {
+                                      bool dump_passes,
+                                      TimingLogger& timings,
+                                      CumulativeLogger& compiler_phases_timings) {
     // SirtRef and ClassLoader creation needs to come after Runtime::Create
     jobject class_loader = NULL;
     Thread* self = Thread::Current();
@@ -280,7 +282,9 @@
                                                         image,
                                                         image_classes.release(),
                                                         thread_count_,
-                                                        dump_stats));
+                                                        dump_stats,
+                                                        dump_passes,
+                                                        &compiler_phases_timings));
 
     if (compiler_backend_ == kPortable) {
       driver->SetBitcodeFileName(bitcode_filename);
@@ -654,6 +658,7 @@
 
 static int dex2oat(int argc, char** argv) {
   TimingLogger timings("compiler", false, false);
+  CumulativeLogger compiler_phases_timings("compilation times");
 
   InitLogging(argv);
 
@@ -703,6 +708,7 @@
   bool is_host = false;
   bool dump_stats = false;
   bool dump_timing = false;
+  bool dump_passes = false;
   bool dump_slow_timing = kIsDebugBuild;
   bool watch_dog_enabled = !kIsTargetBuild;
 
@@ -798,6 +804,8 @@
       runtime_args.push_back(argv[i]);
     } else if (option == "--dump-timing") {
       dump_timing = true;
+    } else if (option == "--dump-passes") {
+      dump_passes = true;
     } else if (option == "--dump-stats") {
       dump_stats = true;
     } else {
@@ -1069,7 +1077,9 @@
                                                                   image,
                                                                   image_classes,
                                                                   dump_stats,
-                                                                  timings));
+                                                                  dump_passes,
+                                                                  timings,
+                                                                  compiler_phases_timings));
 
   if (compiler.get() == NULL) {
     LOG(ERROR) << "Failed to create oat file: " << oat_location;
@@ -1145,6 +1155,9 @@
     if (dump_timing || (dump_slow_timing && timings.GetTotalNs() > MsToNs(1000))) {
       LOG(INFO) << Dumpable<TimingLogger>(timings);
     }
+    if (dump_passes) {
+      LOG(INFO) << Dumpable<CumulativeLogger>(compiler.get()->GetTimingsLogger());
+    }
     return EXIT_SUCCESS;
   }
 
@@ -1187,6 +1200,9 @@
   if (dump_timing || (dump_slow_timing && timings.GetTotalNs() > MsToNs(1000))) {
     LOG(INFO) << Dumpable<TimingLogger>(timings);
   }
+  if (dump_passes) {
+    LOG(INFO) << Dumpable<CumulativeLogger>(compiler_phases_timings);
+  }
 
   // Everything was successfully written, do an explicit exit here to avoid running Runtime
   // destructors that take time (bug 10645725) unless we're a debug build or running on valgrind.
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index 0e5c60a..cf3f72e 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -274,7 +274,7 @@
      *   r2 = size of argument array in bytes
      *   r3 = (managed) thread pointer
      *   [sp] = JValue* result
-     *   [sp + 4] = result type char
+     *   [sp + 4] = shorty
      */
 ENTRY art_quick_invoke_stub
     push   {r0, r4, r5, r9, r11, lr}       @ spill regs
diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S
index c60bca0..f9a200a 100644
--- a/runtime/arch/mips/quick_entrypoints_mips.S
+++ b/runtime/arch/mips/quick_entrypoints_mips.S
@@ -456,7 +456,7 @@
      *   a2 = size of argument array in bytes
      *   a3 = (managed) thread pointer
      *   [sp + 16] = JValue* result
-     *   [sp + 20] = result type char
+     *   [sp + 20] = shorty
      */
 ENTRY art_quick_invoke_stub
     GENERATE_GLOBAL_POINTER
@@ -502,7 +502,8 @@
     addiu $sp, $sp, 16
     .cfi_adjust_cfa_offset -16
     lw    $t0, 16($sp)          # get result pointer
-    lw    $t1, 20($sp)          # get result type char
+    lw    $t1, 20($sp)          # get shorty
+    lb    $t1, 0($t1)           # get result type char
     li    $t2, 68               # put char 'D' into t2
     beq   $t1, $t2, 1f          # branch if result type char == 'D'
     li    $t3, 70               # put char 'F' into t3
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 9c3eb30..c76c6b2 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -251,7 +251,7 @@
      *   [sp + 12] = size of argument array in bytes
      *   [sp + 16] = (managed) thread pointer
      *   [sp + 20] = JValue* result
-     *   [sp + 24] = result type char
+     *   [sp + 24] = shorty
      */
 DEFINE_FUNCTION art_quick_invoke_stub
     PUSH ebp                      // save ebp
@@ -281,17 +281,20 @@
     POP ebx                       // pop ebx
     POP ebp                       // pop ebp
     mov 20(%esp), %ecx            // get result pointer
-    cmpl LITERAL(68), 24(%esp)    // test if result type char == 'D'
-    je return_double_quick
-    cmpl LITERAL(70), 24(%esp)    // test if result type char == 'F'
-    je return_float_quick
-    mov %eax, (%ecx)              // store the result
+    mov %eax, (%ecx)              // store the result assuming its a long, int or Object*
     mov %edx, 4(%ecx)             // store the other half of the result
+    mov 24(%esp), %edx            // get the shorty
+    cmpb LITERAL(68), (%edx)      // test if result type char == 'D'
+    je return_double_quick
+    cmpb LITERAL(70), (%edx)      // test if result type char == 'F'
+    je return_float_quick
     ret
 return_double_quick:
-return_float_quick:
     movsd %xmm0, (%ecx)           // store the floating point result
     ret
+return_float_quick:
+    movss %xmm0, (%ecx)           // store the floating point result
+    ret
 END_FUNCTION art_quick_invoke_stub
 
 MACRO3(NO_ARG_DOWNCALL, c_name, cxx_name, return_macro)
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index e01a31b..44bc7a2 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -149,6 +149,14 @@
 
     /*
      * Quick invocation stub.
+     * On entry:
+     *   [sp] = return address
+     *   rdi = method pointer
+     *   rsi = argument array or NULL for no argument methods
+     *   rdx = size of argument array in bytes
+     *   rcx = (managed) thread pointer
+     *   r8 = JValue* result
+     *   r9 = char* shorty
      */
 DEFINE_FUNCTION art_quick_invoke_stub
     int3
diff --git a/runtime/base/bit_vector.cc b/runtime/base/bit_vector.cc
index 3b82651..3db8e12 100644
--- a/runtime/base/bit_vector.cc
+++ b/runtime/base/bit_vector.cc
@@ -44,10 +44,10 @@
     expandable_(expandable),
     storage_size_(storage_size),
     storage_(storage) {
-  DCHECK_EQ(sizeof(storage_[0]), 4U);  // Assuming 32-bit units.
-  if (storage_ == NULL) {
+  DCHECK_EQ(sizeof(*storage_), 4U);  // Assuming 32-bit units.
+  if (storage_ == nullptr) {
     storage_size_ = BitsToWords(start_bits);
-    storage_ = static_cast<uint32_t*>(allocator_->Alloc(storage_size_ * sizeof(uint32_t)));
+    storage_ = static_cast<uint32_t*>(allocator_->Alloc(storage_size_ * sizeof(*storage_)));
   }
 }
 
@@ -59,7 +59,11 @@
  * Determine whether or not the specified bit is set.
  */
 bool BitVector::IsBitSet(uint32_t num) const {
-  DCHECK_LT(num, storage_size_ * sizeof(uint32_t) * 8);
+  // If the index is over the size:
+  if (num >= storage_size_ * sizeof(*storage_) * 8) {
+    // Whether it is expandable or not, this bit does not exist: thus it is not set.
+    return false;
+  }
 
   uint32_t val = storage_[num >> 5] & check_masks[num & 0x1f];
   return (val != 0);
@@ -67,7 +71,7 @@
 
 // Mark all bits bit as "clear".
 void BitVector::ClearAllBits() {
-  memset(storage_, 0, storage_size_ * sizeof(uint32_t));
+  memset(storage_, 0, storage_size_ * sizeof(*storage_));
 }
 
 // Mark the specified bit as "set".
@@ -76,17 +80,17 @@
  * not using it badly or change resize mechanism.
  */
 void BitVector::SetBit(uint32_t num) {
-  if (num >= storage_size_ * sizeof(uint32_t) * 8) {
+  if (num >= storage_size_ * sizeof(*storage_) * 8) {
     DCHECK(expandable_) << "Attempted to expand a non-expandable bitmap to position " << num;
 
     /* Round up to word boundaries for "num+1" bits */
     uint32_t new_size = BitsToWords(num + 1);
     DCHECK_GT(new_size, storage_size_);
     uint32_t *new_storage =
-        static_cast<uint32_t*>(allocator_->Alloc(new_size * sizeof(uint32_t)));
-    memcpy(new_storage, storage_, storage_size_ * sizeof(uint32_t));
+        static_cast<uint32_t*>(allocator_->Alloc(new_size * sizeof(*storage_)));
+    memcpy(new_storage, storage_, storage_size_ * sizeof(*storage_));
     // Zero out the new storage words.
-    memset(&new_storage[storage_size_], 0, (new_size - storage_size_) * sizeof(uint32_t));
+    memset(&new_storage[storage_size_], 0, (new_size - storage_size_) * sizeof(*storage_));
     // TOTO: collect stats on space wasted because of resize.
     storage_ = new_storage;
     storage_size_ = new_size;
@@ -97,30 +101,109 @@
 
 // Mark the specified bit as "unset".
 void BitVector::ClearBit(uint32_t num) {
-  DCHECK_LT(num, storage_size_ * sizeof(uint32_t) * 8);
-  storage_[num >> 5] &= ~check_masks[num & 0x1f];
+  // If the index is over the size, we don't have to do anything, it is cleared.
+  if (num < storage_size_ * sizeof(*storage_) * 8) {
+    // Otherwise, go ahead and clear it.
+    storage_[num >> 5] &= ~check_masks[num & 0x1f];
+  }
 }
 
-// Intersect with another bit vector.  Sizes and expandability must be the same.
+bool BitVector::SameBitsSet(const BitVector *src) {
+  int our_highest = GetHighestBitSet();
+  int src_highest = src->GetHighestBitSet();
+
+  // If the highest bit set is different, we are different.
+  if (our_highest != src_highest) {
+    return true;
+  }
+
+  // If the highest bit set is -1, both are cleared, we are the same.
+  // If the highest bit set is 0, both have a unique bit set, we are the same.
+  if (our_highest >= 0) {
+    return true;
+  }
+
+  // Get the highest bit set's cell's index.
+  int our_highest_index = (our_highest >> 5);
+
+  // This memcmp is enough: we know that the highest bit set is the same for both:
+  //   - Therefore, min_size goes up to at least that, we are thus comparing at least what we need to, but not less.
+  //      ie. we are comparing all storage cells that could have difference, if both vectors have cells above our_highest_index,
+  //          they are automatically at 0.
+  return (memcmp(storage_, src->GetRawStorage(), our_highest_index * sizeof(*storage_)) != 0);
+}
+
+// Intersect with another bit vector.
 void BitVector::Intersect(const BitVector* src) {
-  DCHECK_EQ(storage_size_, src->GetStorageSize());
-  DCHECK_EQ(expandable_, src->IsExpandable());
-  for (uint32_t idx = 0; idx < storage_size_; idx++) {
+  uint32_t src_storage_size = src->storage_size_;
+
+  // Get the minimum size between us and source.
+  uint32_t min_size = (storage_size_ < src_storage_size) ? storage_size_ : src_storage_size;
+
+  uint32_t idx;
+  for (idx = 0; idx < min_size; idx++) {
     storage_[idx] &= src->GetRawStorageWord(idx);
   }
+
+  // Now, due to this being an intersection, there are two possibilities:
+  //   - Either src was larger than us: we don't care, all upper bits would thus be 0.
+  //   - Either we are larger than src: we don't care, all upper bits would have been 0 too.
+  // So all we need to do is set all remaining bits to 0.
+  for (; idx < storage_size_; idx++) {
+    storage_[idx] = 0;
+  }
 }
 
 /*
- * Union with another bit vector.  Sizes and expandability must be the same.
+ * Union with another bit vector.
  */
 void BitVector::Union(const BitVector* src) {
-  DCHECK_EQ(storage_size_, src->GetStorageSize());
-  DCHECK_EQ(expandable_, src->IsExpandable());
-  for (uint32_t idx = 0; idx < storage_size_; idx++) {
+  uint32_t src_size = src->storage_size_;
+
+  // Get our size, we use this variable for the last loop of the method:
+  //   - It can change in the if block if src is of a different size.
+  uint32_t size = storage_size_;
+
+  // Is the storage size smaller than src's?
+  if (storage_size_ < src_size) {
+    // Get the highest bit to determine how much we need to expand.
+    int highest_bit = src->GetHighestBitSet();
+
+    // If src has no bit set, we are done: there is no need for a union with src.
+    if (highest_bit == -1) {
+      return;
+    }
+
+    // Set it to reallocate.
+    SetBit(highest_bit);
+
+    // Paranoid: storage size should be big enough to hold this bit now.
+    DCHECK_LT(static_cast<uint32_t> (highest_bit), storage_size_ * sizeof(*(storage_)) * 8);
+
+    //  Update the size, our size can now not be bigger than the src size
+    size = storage_size_;
+  }
+
+  for (uint32_t idx = 0; idx < size; idx++) {
     storage_[idx] |= src->GetRawStorageWord(idx);
   }
 }
 
+void BitVector::Subtract(const BitVector *src) {
+    uint32_t src_size = src->storage_size_;
+
+    // We only need to operate on bytes up to the smaller of the sizes of the two operands.
+    unsigned int min_size = (storage_size_ > src_size) ? src_size : storage_size_;
+
+    // Difference until max, we know both accept it:
+    //   There is no need to do more:
+    //     If we are bigger than src, the upper bits are unchanged.
+    //     If we are smaller than src, the non-existant upper bits are 0 and thus can't get subtracted.
+    for (uint32_t idx = 0; idx < min_size; idx++) {
+        storage_[idx] &= (~(src->GetRawStorageWord(idx)));
+    }
+}
+
 // Count the number of bits that are set.
 uint32_t BitVector::NumSetBits() const {
   uint32_t count = 0;
@@ -132,7 +215,7 @@
 
 // Count the number of bits that are set up through and including num.
 uint32_t BitVector::NumSetBits(uint32_t num) const {
-  DCHECK_LT(num, storage_size_ * sizeof(uint32_t) * 8);
+  DCHECK_LT(num, storage_size_ * sizeof(*storage_) * 8);
   uint32_t last_word = num >> 5;
   uint32_t partial_word_bits = num & 0x1f;
 
@@ -163,15 +246,84 @@
  * iterator.
  */
 void BitVector::SetInitialBits(uint32_t num_bits) {
-  DCHECK_LE(BitsToWords(num_bits), storage_size_);
+  // If num_bits is 0, clear everything.
+  if (num_bits == 0) {
+    ClearAllBits();
+    return;
+  }
+
+  // Set the highest bit we want to set to get the BitVector allocated if need be.
+  SetBit(num_bits - 1);
+
   uint32_t idx;
+  // We can set every storage element with -1.
   for (idx = 0; idx < (num_bits >> 5); idx++) {
     storage_[idx] = -1;
   }
+
+  // Handle the potentially last few bits.
   uint32_t rem_num_bits = num_bits & 0x1f;
-  if (rem_num_bits) {
+  if (rem_num_bits != 0) {
     storage_[idx] = (1 << rem_num_bits) - 1;
   }
+
+  // Now set the upper ones to 0.
+  for (; idx < storage_size_; idx++) {
+    storage_[idx] = 0;
+  }
+}
+
+int BitVector::GetHighestBitSet() const {
+  unsigned int max = storage_size_;
+  for (int idx = max - 1; idx >= 0; idx--) {
+    // If not 0, we have more work: check the bits.
+    uint32_t value = storage_[idx];
+
+    if (value != 0) {
+      // Shift right for the counting.
+      value /= 2;
+
+      int cnt = 0;
+
+      // Count the bits.
+      while (value > 0) {
+        value /= 2;
+        cnt++;
+      }
+
+      // Return cnt + how many storage units still remain * the number of bits per unit.
+      int res = cnt + (idx * (sizeof(*storage_) * 8));
+      return res;
+    }
+  }
+
+  // All zero, therefore return -1.
+  return -1;
+}
+
+void BitVector::Copy(const BitVector *src) {
+  // Get highest bit set, we only need to copy till then.
+  int highest_bit = src->GetHighestBitSet();
+
+  // If nothing is set, clear everything.
+  if (highest_bit == -1) {
+    ClearAllBits();
+    return;
+  }
+
+  // Set upper bit to ensure right size before copy.
+  SetBit(highest_bit);
+
+  // Now set until highest bit's storage.
+  uint32_t size = 1 + (highest_bit / (sizeof(*storage_) * 8));
+  memcpy(storage_, src->GetRawStorage(), sizeof(*storage_) * size);
+
+  // Set upper bits to 0.
+  uint32_t left = storage_size_ - size;
+
+  if (left > 0) {
+    memset(storage_ + size, 0, sizeof(*storage_) * left);
+  }
 }
 
 }  // namespace art
diff --git a/runtime/base/bit_vector.h b/runtime/base/bit_vector.h
index 74bec08..c8f285e 100644
--- a/runtime/base/bit_vector.h
+++ b/runtime/base/bit_vector.h
@@ -46,7 +46,9 @@
           DCHECK_EQ(bit_size_, p_bits_->GetStorageSize() * sizeof(uint32_t) * 8);
           DCHECK_EQ(bit_storage_, p_bits_->GetRawStorage());
 
-          if (UNLIKELY(bit_index_ >= bit_size_)) return -1;
+          if (UNLIKELY(bit_index_ >= bit_size_)) {
+            return -1;
+          }
 
           uint32_t word_index = bit_index_ / 32;
           uint32_t word = bit_storage_[word_index];
@@ -89,7 +91,7 @@
               bool expandable,
               Allocator* allocator,
               uint32_t storage_size = 0,
-              uint32_t* storage = NULL);
+              uint32_t* storage = nullptr);
 
     virtual ~BitVector();
 
@@ -98,17 +100,24 @@
     bool IsBitSet(uint32_t num) const;
     void ClearAllBits();
     void SetInitialBits(uint32_t num_bits);
-    void Copy(BitVector* src) {
-      memcpy(storage_, src->GetRawStorage(), sizeof(uint32_t) * storage_size_);
-    }
+
+    void Copy(const BitVector* src);
     void Intersect(const BitVector* src2);
     void Union(const BitVector* src);
+    void Subtract(const BitVector* src);
     // Are we equal to another bit vector?  Note: expandability attributes must also match.
     bool Equal(const BitVector* src) {
       return (storage_size_ == src->GetStorageSize()) &&
         (expandable_ == src->IsExpandable()) &&
         (memcmp(storage_, src->GetRawStorage(), storage_size_ * sizeof(uint32_t)) == 0);
     }
+
+    /**
+     * @brief Are all the bits set the same?
+     * @details expandability and size can differ as long as the same bits are set.
+     */
+    bool SameBitsSet(const BitVector *src);
+
     uint32_t NumSetBits() const;
     uint32_t NumSetBits(uint32_t num) const;
 
@@ -121,6 +130,11 @@
     const uint32_t* GetRawStorage() const { return storage_; }
     size_t GetSizeOf() const { return storage_size_ * sizeof(uint32_t); }
 
+    /**
+     * @return the highest bit set, -1 if none are set
+     */
+    int GetHighestBitSet() const;
+
   private:
     Allocator* const allocator_;
     const bool expandable_;         // expand bitmap if we run out?
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 978c99b..fac1e14 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -1119,14 +1119,15 @@
 // Keep in sync with InitCallback. Anything we visit, we need to
 // reinit references to when reinitializing a ClassLinker from a
 // mapped image.
-void ClassLinker::VisitRoots(RootVisitor* visitor, void* arg, bool only_dirty, bool clean_dirty) {
-  class_roots_ = down_cast<mirror::ObjectArray<mirror::Class>*>(visitor(class_roots_, arg));
+void ClassLinker::VisitRoots(RootCallback* callback, void* arg, bool only_dirty, bool clean_dirty) {
+  class_roots_ = down_cast<mirror::ObjectArray<mirror::Class>*>(
+      callback(class_roots_, arg, 0, kRootVMInternal));
   Thread* self = Thread::Current();
   {
     ReaderMutexLock mu(self, dex_lock_);
     if (!only_dirty || dex_caches_dirty_) {
       for (mirror::DexCache*& dex_cache : dex_caches_) {
-        dex_cache = down_cast<mirror::DexCache*>(visitor(dex_cache, arg));
+        dex_cache = down_cast<mirror::DexCache*>(callback(dex_cache, arg, 0, kRootVMInternal));
         DCHECK(dex_cache != nullptr);
       }
       if (clean_dirty) {
@@ -1139,7 +1140,7 @@
     WriterMutexLock mu(self, *Locks::classlinker_classes_lock_);
     if (!only_dirty || class_table_dirty_) {
       for (std::pair<const size_t, mirror::Class*>& it : class_table_) {
-        it.second = down_cast<mirror::Class*>(visitor(it.second, arg));
+        it.second = down_cast<mirror::Class*>(callback(it.second, arg, 0, kRootStickyClass));
         DCHECK(it.second != nullptr);
       }
       if (clean_dirty) {
@@ -1151,7 +1152,8 @@
     // handle image roots by using the MS/CMS rescanning of dirty cards.
   }
 
-  array_iftable_ = reinterpret_cast<mirror::IfTable*>(visitor(array_iftable_, arg));
+  array_iftable_ = reinterpret_cast<mirror::IfTable*>(callback(array_iftable_, arg, 0,
+                                                               kRootVMInternal));
   DCHECK(array_iftable_ != nullptr);
 }
 
@@ -3152,7 +3154,7 @@
     CHECK(can_init_statics);
     if (LIKELY(Runtime::Current()->IsStarted())) {
       JValue result;
-      clinit->Invoke(self, NULL, 0, &result, 'V');
+      clinit->Invoke(self, NULL, 0, &result, "V");
     } else {
       art::interpreter::EnterInterpreterFromInvoke(self, clinit, NULL, NULL, NULL);
     }
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index 7e31356..0745ee2 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -26,8 +26,8 @@
 #include "dex_file.h"
 #include "gtest/gtest.h"
 #include "jni.h"
-#include "root_visitor.h"
 #include "oat_file.h"
+#include "object_callbacks.h"
 
 namespace art {
 namespace gc {
@@ -235,7 +235,7 @@
       LOCKS_EXCLUDED(dex_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void VisitRoots(RootVisitor* visitor, void* arg, bool only_dirty, bool clean_dirty)
+  void VisitRoots(RootCallback* callback, void* arg, bool only_dirty, bool clean_dirty)
       LOCKS_EXCLUDED(Locks::classlinker_classes_lock_, dex_lock_);
 
   mirror::DexCache* FindDexCache(const DexFile& dex_file) const
diff --git a/runtime/class_linker_test.cc b/runtime/class_linker_test.cc
index fb979c2..d9ef0c1 100644
--- a/runtime/class_linker_test.cc
+++ b/runtime/class_linker_test.cc
@@ -339,7 +339,7 @@
     }
   }
 
-  static mirror::Object* TestRootVisitor(mirror::Object* root, void*) {
+  static mirror::Object* TestRootVisitor(mirror::Object* root, void*, uint32_t, RootType) {
     EXPECT_TRUE(root != NULL);
     return root;
   }
diff --git a/runtime/common_test.h b/runtime/common_test.h
index daa2ff1..f7859ea 100644
--- a/runtime/common_test.h
+++ b/runtime/common_test.h
@@ -268,7 +268,7 @@
     MakeExecutable(&code[0], code.size());
   }
 
-  // Create an OatMethod based on pointers (for unit tests)
+  // Create an OatMethod based on pointers (for unit tests).
   OatFile::OatMethod CreateOatMethod(const void* code,
                                      const size_t frame_size_in_bytes,
                                      const uint32_t core_spill_mask,
@@ -276,11 +276,23 @@
                                      const uint8_t* mapping_table,
                                      const uint8_t* vmap_table,
                                      const uint8_t* gc_map) {
-    const byte* base = nullptr;  // Base of data in oat file, ie 0.
-    uint32_t code_offset = PointerToLowMemUInt32(code);
-    uint32_t mapping_table_offset = PointerToLowMemUInt32(mapping_table);
-    uint32_t vmap_table_offset = PointerToLowMemUInt32(vmap_table);
-    uint32_t gc_map_offset = PointerToLowMemUInt32(gc_map);
+    const byte* base;
+    uint32_t code_offset, mapping_table_offset, vmap_table_offset, gc_map_offset;
+    if (mapping_table == nullptr && vmap_table == nullptr && gc_map == nullptr) {
+      base = reinterpret_cast<const byte*>(code);  // Base of data points at code.
+      base -= kPointerSize;  // Move backward so that code_offset != 0.
+      code_offset = kPointerSize;
+      mapping_table_offset = 0;
+      vmap_table_offset = 0;
+      gc_map_offset = 0;
+    } else {
+      // TODO: 64bit support.
+      base = nullptr;  // Base of data in oat file, ie 0.
+      code_offset = PointerToLowMemUInt32(code);
+      mapping_table_offset = PointerToLowMemUInt32(mapping_table);
+      vmap_table_offset = PointerToLowMemUInt32(vmap_table);
+      gc_map_offset = PointerToLowMemUInt32(gc_map);
+    }
     return OatFile::OatMethod(base,
                               code_offset,
                               frame_size_in_bytes,
@@ -470,6 +482,8 @@
       instruction_set = kX86;
 #elif defined(__x86_64__)
       instruction_set = kX86_64;
+      // TODO: x86_64 compilation support.
+      runtime_->SetCompilerFilter(Runtime::kInterpretOnly);
 #endif
 
       for (int i = 0; i < Runtime::kLastCalleeSaveType; i++) {
@@ -480,12 +494,13 @@
         }
       }
       class_linker_->FixupDexCaches(runtime_->GetResolutionMethod());
+      timer_.reset(new CumulativeLogger("Compilation times"));
       compiler_driver_.reset(new CompilerDriver(verification_results_.get(),
                                                 method_inliner_map_.get(),
                                                 compiler_backend, instruction_set,
                                                 instruction_set_features,
                                                 true, new CompilerDriver::DescriptorSet,
-                                                2, true));
+                                                2, true, true, timer_.get()));
     }
     // We typically don't generate an image in unit tests, disable this optimization by default.
     compiler_driver_->SetSupportBootImageFixup(false);
@@ -530,6 +545,7 @@
     (*icu_cleanup_fn)();
 
     compiler_driver_.reset();
+    timer_.reset();
     callbacks_.Reset(nullptr, nullptr);
     method_inliner_map_.reset();
     verification_results_.reset();
@@ -662,7 +678,7 @@
 
   class TestCompilerCallbacks : public CompilerCallbacks {
    public:
-    TestCompilerCallbacks() : verification_results_(nullptr), method_inliner_map_(nullptr) { }
+    TestCompilerCallbacks() : verification_results_(nullptr), method_inliner_map_(nullptr) {}
 
     void Reset(VerificationResults* verification_results,
                DexFileToMethodInlinerMap* method_inliner_map) {
@@ -701,6 +717,7 @@
   UniquePtr<DexFileToMethodInlinerMap> method_inliner_map_;
   TestCompilerCallbacks callbacks_;
   UniquePtr<CompilerDriver> compiler_driver_;
+  UniquePtr<CumulativeLogger> timer_;
 
  private:
   std::vector<const DexFile*> opened_dex_files_;
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index 8280c7c..733e843 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -85,17 +85,17 @@
     return depth;
   }
 
-  void UpdateObjectPointers(RootVisitor* visitor, void* arg)
+  void UpdateObjectPointers(IsMarkedCallback* callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     if (type != nullptr) {
-      type = down_cast<mirror::Class*>(visitor(type, arg));
+      type = down_cast<mirror::Class*>(callback(type, arg));
     }
     for (size_t stack_frame = 0; stack_frame < kMaxAllocRecordStackDepth; ++stack_frame) {
       mirror::ArtMethod*& m = stack[stack_frame].method;
       if (m == nullptr) {
         break;
       }
-      m = down_cast<mirror::ArtMethod*>(visitor(m, arg));
+      m = down_cast<mirror::ArtMethod*>(callback(m, arg));
     }
   }
 };
@@ -3019,7 +3019,7 @@
   MethodHelper mh(m.get());
   ArgArray arg_array(mh.GetShorty(), mh.GetShortyLength());
   arg_array.BuildArgArray(soa, pReq->receiver, reinterpret_cast<jvalue*>(pReq->arg_values));
-  InvokeWithArgArray(soa, m.get(), &arg_array, &pReq->result_value, mh.GetShorty()[0]);
+  InvokeWithArgArray(soa, m.get(), &arg_array, &pReq->result_value, mh.GetShorty());
 
   mirror::Throwable* exception = soa.Self()->GetException(NULL);
   soa.Self()->ClearException();
@@ -3793,7 +3793,7 @@
   }
 }
 
-void Dbg::UpdateObjectPointers(RootVisitor* visitor, void* arg) {
+void Dbg::UpdateObjectPointers(IsMarkedCallback* visitor, void* arg) {
   {
     MutexLock mu(Thread::Current(), gAllocTrackerLock);
     if (recent_allocation_records_ != nullptr) {
diff --git a/runtime/debugger.h b/runtime/debugger.h
index f1e3f45..5d269ee 100644
--- a/runtime/debugger.h
+++ b/runtime/debugger.h
@@ -29,7 +29,7 @@
 #include "jdwp/jdwp.h"
 #include "jni.h"
 #include "jvalue.h"
-#include "root_visitor.h"
+#include "object_callbacks.h"
 #include "thread_state.h"
 
 namespace art {
@@ -453,7 +453,7 @@
   static void DumpRecentAllocations();
 
   // Updates the stored direct object pointers (called from SweepSystemWeaks).
-  static void UpdateObjectPointers(RootVisitor* visitor, void* arg)
+  static void UpdateObjectPointers(IsMarkedCallback* callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   enum HpifWhen {
diff --git a/runtime/entrypoints/interpreter/interpreter_entrypoints.cc b/runtime/entrypoints/interpreter/interpreter_entrypoints.cc
index 8a2ce51..2067a45 100644
--- a/runtime/entrypoints/interpreter/interpreter_entrypoints.cc
+++ b/runtime/entrypoints/interpreter/interpreter_entrypoints.cc
@@ -48,11 +48,11 @@
   if (kUsePortableCompiler) {
     ArgArray arg_array(mh.GetShorty(), mh.GetShortyLength());
     arg_array.BuildArgArrayFromFrame(shadow_frame, arg_offset);
-    method->Invoke(self, arg_array.GetArray(), arg_array.GetNumBytes(), result, mh.GetShorty()[0]);
+    method->Invoke(self, arg_array.GetArray(), arg_array.GetNumBytes(), result, mh.GetShorty());
   } else {
     method->Invoke(self, shadow_frame->GetVRegArgs(arg_offset),
                    (shadow_frame->NumberOfVRegs() - arg_offset) * sizeof(uint32_t),
-                   result, mh.GetShorty()[0]);
+                   result, mh.GetShorty());
   }
 }
 
diff --git a/runtime/entrypoints/jni/jni_entrypoints.cc b/runtime/entrypoints/jni/jni_entrypoints.cc
index 4d1e531..c0304eb 100644
--- a/runtime/entrypoints/jni/jni_entrypoints.cc
+++ b/runtime/entrypoints/jni/jni_entrypoints.cc
@@ -46,7 +46,8 @@
   }
 }
 
-static void WorkAroundJniBugsForJobject(intptr_t* arg_ptr) {
+static void WorkAroundJniBugsForJobject(intptr_t* arg_ptr)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   intptr_t value = *arg_ptr;
   mirror::Object** value_as_jni_rep = reinterpret_cast<mirror::Object**>(value);
   mirror::Object* value_as_work_around_rep = value_as_jni_rep != NULL ? *value_as_jni_rep : NULL;
diff --git a/runtime/gc/accounting/heap_bitmap.cc b/runtime/gc/accounting/heap_bitmap.cc
index c520ee6..f94cf24 100644
--- a/runtime/gc/accounting/heap_bitmap.cc
+++ b/runtime/gc/accounting/heap_bitmap.cc
@@ -72,12 +72,10 @@
   discontinuous_space_sets_.erase(it);
 }
 
-void HeapBitmap::Walk(SpaceBitmap::Callback* callback, void* arg) {
+void HeapBitmap::Walk(ObjectCallback* callback, void* arg) {
   for (const auto& bitmap : continuous_space_bitmaps_) {
     bitmap->Walk(callback, arg);
   }
-
-  DCHECK(!discontinuous_space_sets_.empty());
   for (const auto& space_set : discontinuous_space_sets_) {
     space_set->Walk(callback, arg);
   }
diff --git a/runtime/gc/accounting/heap_bitmap.h b/runtime/gc/accounting/heap_bitmap.h
index bcf36a2..dde1425 100644
--- a/runtime/gc/accounting/heap_bitmap.h
+++ b/runtime/gc/accounting/heap_bitmap.h
@@ -20,6 +20,7 @@
 #include "base/logging.h"
 #include "gc_allocator.h"
 #include "locks.h"
+#include "object_callbacks.h"
 #include "space_bitmap.h"
 
 namespace art {
@@ -83,7 +84,7 @@
     return NULL;
   }
 
-  void Walk(SpaceBitmap::Callback* callback, void* arg)
+  void Walk(ObjectCallback* callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
   template <typename Visitor>
diff --git a/runtime/gc/accounting/mod_union_table.cc b/runtime/gc/accounting/mod_union_table.cc
index 0225f29..aad214a 100644
--- a/runtime/gc/accounting/mod_union_table.cc
+++ b/runtime/gc/accounting/mod_union_table.cc
@@ -70,8 +70,8 @@
 
 class ModUnionUpdateObjectReferencesVisitor {
  public:
-  ModUnionUpdateObjectReferencesVisitor(RootVisitor visitor, void* arg)
-    : visitor_(visitor),
+  ModUnionUpdateObjectReferencesVisitor(RootCallback* callback, void* arg)
+    : callback_(callback),
       arg_(arg) {
   }
 
@@ -80,7 +80,7 @@
                   bool /* is_static */) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     // Only add the reference if it is non null and fits our criteria.
     if (ref != nullptr) {
-      Object* new_ref = visitor_(ref, arg_);
+      Object* new_ref = callback_(ref, arg_, 0, kRootVMInternal);
       if (new_ref != ref) {
         // Use SetFieldObjectWithoutWriteBarrier to avoid card mark as an optimization which
         // reduces dirtied pages and improves performance.
@@ -90,26 +90,26 @@
   }
 
  private:
-  RootVisitor* visitor_;
+  RootCallback* const callback_;
   void* arg_;
 };
 
 class ModUnionScanImageRootVisitor {
  public:
-  ModUnionScanImageRootVisitor(RootVisitor visitor, void* arg)
-      : visitor_(visitor), arg_(arg) {}
+  ModUnionScanImageRootVisitor(RootCallback* callback, void* arg)
+      : callback_(callback), arg_(arg) {}
 
   void operator()(Object* root) const
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK(root != NULL);
-    ModUnionUpdateObjectReferencesVisitor ref_visitor(visitor_, arg_);
+    ModUnionUpdateObjectReferencesVisitor ref_visitor(callback_, arg_);
     collector::MarkSweep::VisitObjectReferences(root, ref_visitor, true);
   }
 
  private:
-  RootVisitor* visitor_;
-  void* arg_;
+  RootCallback* const callback_;
+  void* const arg_;
 };
 
 void ModUnionTableReferenceCache::ClearCards() {
@@ -261,7 +261,7 @@
   }
 }
 
-void ModUnionTableReferenceCache::UpdateAndMarkReferences(RootVisitor visitor, void* arg) {
+void ModUnionTableReferenceCache::UpdateAndMarkReferences(RootCallback* callback, void* arg) {
   Heap* heap = GetHeap();
   CardTable* card_table = heap->GetCardTable();
 
@@ -296,7 +296,7 @@
     for (mirror::HeapReference<Object>* obj_ptr : ref.second) {
       Object* obj = obj_ptr->AsMirrorPtr();
       if (obj != nullptr) {
-        Object* new_obj = visitor(obj, arg);
+        Object* new_obj = callback(obj, arg, 0, kRootVMInternal);
         // Avoid dirtying pages in the image unless necessary.
         if (new_obj != obj) {
           obj_ptr->Assign(new_obj);
@@ -318,9 +318,9 @@
 }
 
 // Mark all references to the alloc space(s).
-void ModUnionTableCardCache::UpdateAndMarkReferences(RootVisitor visitor, void* arg) {
+void ModUnionTableCardCache::UpdateAndMarkReferences(RootCallback* callback, void* arg) {
   CardTable* card_table = heap_->GetCardTable();
-  ModUnionScanImageRootVisitor scan_visitor(visitor, arg);
+  ModUnionScanImageRootVisitor scan_visitor(callback, arg);
   SpaceBitmap* bitmap = space_->GetLiveBitmap();
   for (const byte* card_addr : cleared_cards_) {
     uintptr_t start = reinterpret_cast<uintptr_t>(card_table->AddrFromCard(card_addr));
diff --git a/runtime/gc/accounting/mod_union_table.h b/runtime/gc/accounting/mod_union_table.h
index a89dbd1..7d5d8d2 100644
--- a/runtime/gc/accounting/mod_union_table.h
+++ b/runtime/gc/accounting/mod_union_table.h
@@ -19,7 +19,7 @@
 
 #include "gc_allocator.h"
 #include "globals.h"
-#include "root_visitor.h"
+#include "object_callbacks.h"
 #include "safe_map.h"
 
 #include <set>
@@ -69,7 +69,7 @@
   // Update the mod-union table using data stored by ClearCards. There may be multiple ClearCards
   // before a call to update, for example, back-to-back sticky GCs. Also mark references to other
   // spaces which are stored in the mod-union table.
-  virtual void UpdateAndMarkReferences(RootVisitor visitor, void* arg) = 0;
+  virtual void UpdateAndMarkReferences(RootCallback* callback, void* arg) = 0;
 
   // Verification, sanity checks that we don't have clean cards which conflict with out cached data
   // for said cards. Exclusive lock is required since verify sometimes uses
@@ -106,7 +106,7 @@
   void ClearCards();
 
   // Update table based on cleared cards and mark all references to the other spaces.
-  void UpdateAndMarkReferences(RootVisitor visitor, void* arg)
+  void UpdateAndMarkReferences(RootCallback* callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
@@ -142,7 +142,7 @@
   void ClearCards();
 
   // Mark all references to the alloc space(s).
-  void UpdateAndMarkReferences(RootVisitor visitor, void* arg)
+  void UpdateAndMarkReferences(RootCallback* callback, void* arg)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
diff --git a/runtime/gc/accounting/space_bitmap.cc b/runtime/gc/accounting/space_bitmap.cc
index a080bee..ad4ff1b 100644
--- a/runtime/gc/accounting/space_bitmap.cc
+++ b/runtime/gc/accounting/space_bitmap.cc
@@ -44,7 +44,7 @@
                       reinterpret_cast<void*>(HeapLimit()));
 }
 
-void ObjectSet::Walk(SpaceBitmap::Callback* callback, void* arg) {
+void ObjectSet::Walk(ObjectCallback* callback, void* arg) {
   for (const mirror::Object* obj : contained_) {
     callback(const_cast<mirror::Object*>(obj), arg);
   }
@@ -102,7 +102,7 @@
 
 // Visits set bits in address order.  The callback is not permitted to
 // change the bitmap bits or max during the traversal.
-void SpaceBitmap::Walk(SpaceBitmap::Callback* callback, void* arg) {
+void SpaceBitmap::Walk(ObjectCallback* callback, void* arg) {
   CHECK(bitmap_begin_ != NULL);
   CHECK(callback != NULL);
 
@@ -174,12 +174,12 @@
   }
 }
 
-static void WalkFieldsInOrder(SpaceBitmap* visited, SpaceBitmap::Callback* callback, mirror::Object* obj,
+static void WalkFieldsInOrder(SpaceBitmap* visited, ObjectCallback* callback, mirror::Object* obj,
                               void* arg);
 
 // Walk instance fields of the given Class. Separate function to allow recursion on the super
 // class.
-static void WalkInstanceFields(SpaceBitmap* visited, SpaceBitmap::Callback* callback, mirror::Object* obj,
+static void WalkInstanceFields(SpaceBitmap* visited, ObjectCallback* callback, mirror::Object* obj,
                                mirror::Class* klass, void* arg)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   // Visit fields of parent classes first.
@@ -204,7 +204,7 @@
 }
 
 // For an unvisited object, visit it then all its children found via fields.
-static void WalkFieldsInOrder(SpaceBitmap* visited, SpaceBitmap::Callback* callback, mirror::Object* obj,
+static void WalkFieldsInOrder(SpaceBitmap* visited, ObjectCallback* callback, mirror::Object* obj,
                               void* arg)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   if (visited->Test(obj)) {
@@ -246,7 +246,7 @@
 
 // Visits set bits with an in order traversal.  The callback is not permitted to change the bitmap
 // bits or max during the traversal.
-void SpaceBitmap::InOrderWalk(SpaceBitmap::Callback* callback, void* arg) {
+void SpaceBitmap::InOrderWalk(ObjectCallback* callback, void* arg) {
   UniquePtr<SpaceBitmap> visited(Create("bitmap for in-order walk",
                                        reinterpret_cast<byte*>(heap_begin_),
                                        IndexToOffset(bitmap_size_ / kWordSize)));
diff --git a/runtime/gc/accounting/space_bitmap.h b/runtime/gc/accounting/space_bitmap.h
index aa074eb..3c4b674 100644
--- a/runtime/gc/accounting/space_bitmap.h
+++ b/runtime/gc/accounting/space_bitmap.h
@@ -17,10 +17,11 @@
 #ifndef ART_RUNTIME_GC_ACCOUNTING_SPACE_BITMAP_H_
 #define ART_RUNTIME_GC_ACCOUNTING_SPACE_BITMAP_H_
 
-#include "locks.h"
 #include "gc_allocator.h"
 #include "globals.h"
+#include "locks.h"
 #include "mem_map.h"
+#include "object_callbacks.h"
 #include "UniquePtr.h"
 
 #include <limits.h>
@@ -42,8 +43,6 @@
   // Alignment of objects within spaces.
   static const size_t kAlignment = 8;
 
-  typedef void Callback(mirror::Object* obj, void* arg);
-
   typedef void ScanCallback(mirror::Object* obj, void* finger, void* arg);
 
   typedef void SweepCallback(size_t ptr_count, mirror::Object** ptrs, void* arg);
@@ -102,7 +101,7 @@
     return index < bitmap_size_ / kWordSize;
   }
 
-  void VisitRange(uintptr_t base, uintptr_t max, Callback* visitor, void* arg) const;
+  void VisitRange(uintptr_t base, uintptr_t max, ObjectCallback* callback, void* arg) const;
 
   class ClearVisitor {
    public:
@@ -129,10 +128,10 @@
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void Walk(Callback* callback, void* arg)
+  void Walk(ObjectCallback* callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
-  void InOrderWalk(Callback* callback, void* arg)
+  void InOrderWalk(ObjectCallback* callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
 
   static void SweepWalk(const SpaceBitmap& live, const SpaceBitmap& mark, uintptr_t base,
@@ -249,7 +248,7 @@
     contained_ = space_set.contained_;
   }
 
-  void Walk(SpaceBitmap::Callback* callback, void* arg)
+  void Walk(ObjectCallback* callback, void* arg)
       SHARED_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_);
 
   template <typename Visitor>
diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc
index 862d06f..de9f59e 100644
--- a/runtime/gc/collector/mark_sweep.cc
+++ b/runtime/gc/collector/mark_sweep.cc
@@ -525,14 +525,16 @@
   }
 }
 
-Object* MarkSweep::MarkRootParallelCallback(Object* root, void* arg) {
+mirror::Object* MarkSweep::MarkRootParallelCallback(mirror::Object* root, void* arg,
+                                                    uint32_t /*thread_id*/, RootType /*root_type*/) {
   DCHECK(root != NULL);
   DCHECK(arg != NULL);
   reinterpret_cast<MarkSweep*>(arg)->MarkObjectNonNullParallel(root);
   return root;
 }
 
-Object* MarkSweep::MarkRootCallback(Object* root, void* arg) {
+Object* MarkSweep::MarkRootCallback(Object* root, void* arg, uint32_t /*thread_id*/,
+                                    RootType /*root_type*/) {
   DCHECK(root != nullptr);
   DCHECK(arg != nullptr);
   reinterpret_cast<MarkSweep*>(arg)->MarkObjectNonNull(root);
@@ -930,7 +932,7 @@
   ProcessMarkStack(false);
 }
 
-mirror::Object* MarkSweep::IsMarkedCallback(Object* object, void* arg) {
+mirror::Object* MarkSweep::IsMarkedCallback(mirror::Object* object, void* arg) {
   if (reinterpret_cast<MarkSweep*>(arg)->IsMarked(object)) {
     return object;
   }
diff --git a/runtime/gc/collector/mark_sweep.h b/runtime/gc/collector/mark_sweep.h
index bfedac7..8bc0bb5 100644
--- a/runtime/gc/collector/mark_sweep.h
+++ b/runtime/gc/collector/mark_sweep.h
@@ -22,8 +22,8 @@
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "garbage_collector.h"
+#include "object_callbacks.h"
 #include "offsets.h"
-#include "root_visitor.h"
 #include "UniquePtr.h"
 
 namespace art {
@@ -180,11 +180,13 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
-  static mirror::Object* MarkRootCallback(mirror::Object* root, void* arg)
+  static mirror::Object* MarkRootCallback(mirror::Object* root, void* arg, uint32_t thread_id,
+                                          RootType root_type)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
-  static mirror::Object* MarkRootParallelCallback(mirror::Object* root, void* arg);
+  static mirror::Object* MarkRootParallelCallback(mirror::Object* root, void* arg,
+                                                  uint32_t thread_id, RootType root_type);
 
   // Marks an object.
   void MarkObject(const mirror::Object* obj)
diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc
index 0c6a938..b37b9d2 100644
--- a/runtime/gc/collector/semi_space.cc
+++ b/runtime/gc/collector/semi_space.cc
@@ -513,7 +513,7 @@
   return forward_address;
 }
 
-Object* SemiSpace::RecursiveMarkObjectCallback(Object* root, void* arg) {
+mirror::Object* SemiSpace::RecursiveMarkObjectCallback(mirror::Object* root, void* arg) {
   DCHECK(root != nullptr);
   DCHECK(arg != nullptr);
   SemiSpace* semi_space = reinterpret_cast<SemiSpace*>(arg);
@@ -522,7 +522,8 @@
   return ret;
 }
 
-Object* SemiSpace::MarkRootCallback(Object* root, void* arg) {
+Object* SemiSpace::MarkRootCallback(Object* root, void* arg, uint32_t /*thread_id*/,
+                                    RootType /*root_type*/) {
   DCHECK(root != nullptr);
   DCHECK(arg != nullptr);
   return reinterpret_cast<SemiSpace*>(arg)->MarkObject(root);
@@ -536,7 +537,7 @@
   timings_.EndSplit();
 }
 
-mirror::Object* SemiSpace::MarkedForwardingAddressCallback(Object* object, void* arg) {
+mirror::Object* SemiSpace::MarkedForwardingAddressCallback(mirror::Object* object, void* arg) {
   return reinterpret_cast<SemiSpace*>(arg)->GetMarkedForwardAddress(object);
 }
 
diff --git a/runtime/gc/collector/semi_space.h b/runtime/gc/collector/semi_space.h
index 685b33c..f58402f 100644
--- a/runtime/gc/collector/semi_space.h
+++ b/runtime/gc/collector/semi_space.h
@@ -22,8 +22,8 @@
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "garbage_collector.h"
+#include "object_callbacks.h"
 #include "offsets.h"
-#include "root_visitor.h"
 #include "UniquePtr.h"
 
 namespace art {
@@ -142,7 +142,8 @@
   static void VisitObjectReferencesAndClass(mirror::Object* obj, const Visitor& visitor)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
 
-  static mirror::Object* MarkRootCallback(mirror::Object* root, void* arg)
+  static mirror::Object* MarkRootCallback(mirror::Object* root, void* arg, uint32_t /*tid*/,
+                                          RootType /*root_type*/)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
 
   static mirror::Object* RecursiveMarkObjectCallback(mirror::Object* root, void* arg)
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 62567d7..a324925 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -369,7 +369,7 @@
   }
 }
 
-void Heap::VisitObjects(ObjectVisitorCallback callback, void* arg) {
+void Heap::VisitObjects(ObjectCallback callback, void* arg) {
   Thread* self = Thread::Current();
   // GCs can move objects, so don't allow this.
   const char* old_cause = self->StartAssertNoThreadSuspension("Visiting objects");
@@ -604,8 +604,8 @@
 }
 
 struct SoftReferenceArgs {
-  RootVisitor* is_marked_callback_;
-  RootVisitor* recursive_mark_callback_;
+  IsMarkedCallback* is_marked_callback_;
+  MarkObjectCallback* recursive_mark_callback_;
   void* arg_;
 };
 
@@ -617,8 +617,8 @@
 
 // Process reference class instances and schedule finalizations.
 void Heap::ProcessReferences(TimingLogger& timings, bool clear_soft,
-                             RootVisitor* is_marked_callback,
-                             RootVisitor* recursive_mark_object_callback, void* arg) {
+                             IsMarkedCallback* is_marked_callback,
+                             MarkObjectCallback* recursive_mark_object_callback, void* arg) {
   // Unless we are in the zygote or required to clear soft references with white references,
   // preserve some white referents.
   if (!clear_soft && !Runtime::Current()->IsZygote()) {
@@ -671,13 +671,13 @@
 // Process the "referent" field in a java.lang.ref.Reference.  If the referent has not yet been
 // marked, put it on the appropriate list in the heap for later processing.
 void Heap::DelayReferenceReferent(mirror::Class* klass, mirror::Object* obj,
-                                  RootVisitor mark_visitor, void* arg) {
+                                  IsMarkedCallback is_marked_callback, void* arg) {
   DCHECK(klass != nullptr);
   DCHECK(klass->IsReferenceClass());
   DCHECK(obj != nullptr);
   mirror::Object* referent = GetReferenceReferent(obj);
   if (referent != nullptr) {
-    mirror::Object* forward_address = mark_visitor(referent, arg);
+    mirror::Object* forward_address = is_marked_callback(referent, arg);
     // Null means that the object is not currently marked.
     if (forward_address == nullptr) {
       Thread* self = Thread::Current();
@@ -800,11 +800,12 @@
   return IsAligned<kObjectAlignment>(obj) && IsHeapAddress(obj);
 }
 
+bool Heap::IsNonDiscontinuousSpaceHeapAddress(const mirror::Object* obj) const {
+  return FindContinuousSpaceFromObject(obj, true) != nullptr;
+}
+
 bool Heap::IsHeapAddress(const mirror::Object* obj) const {
-  if (kMovingCollector && bump_pointer_space_ && bump_pointer_space_->HasAddress(obj)) {
-    return true;
-  }
-  // TODO: This probably doesn't work for large objects.
+  // TODO: This might not work for large objects.
   return FindSpaceFromObject(obj, true) != nullptr;
 }
 
@@ -1169,7 +1170,7 @@
 
 void Heap::GetReferringObjects(mirror::Object* o, int32_t max_count,
                                std::vector<mirror::Object*>& referring_objects) {
-  // Can't do any GC in this function since this may move classes.
+  // Can't do any GC in this function since this may move the object o.
   Thread* self = Thread::Current();
   auto* old_cause = self->StartAssertNoThreadSuspension("GetReferringObjects");
   ReferringObjectsFinder finder(o, max_count, referring_objects);
@@ -1696,7 +1697,8 @@
   gc_complete_cond_->Broadcast(self);
 }
 
-static mirror::Object* RootMatchesObjectVisitor(mirror::Object* root, void* arg) {
+static mirror::Object* RootMatchesObjectVisitor(mirror::Object* root, void* arg,
+                                                uint32_t /*thread_id*/, RootType /*root_type*/) {
   mirror::Object* obj = reinterpret_cast<mirror::Object*>(arg);
   if (root == obj) {
     LOG(INFO) << "Object " << obj << " is a root";
@@ -1823,7 +1825,8 @@
     return heap_->IsLiveObjectLocked(obj, true, false, true);
   }
 
-  static mirror::Object* VerifyRoots(mirror::Object* root, void* arg) {
+  static mirror::Object* VerifyRoots(mirror::Object* root, void* arg, uint32_t /*thread_id*/,
+                                     RootType /*root_type*/) {
     VerifyReferenceVisitor* visitor = reinterpret_cast<VerifyReferenceVisitor*>(arg);
     (*visitor)(nullptr, root, MemberOffset(0), true);
     return root;
@@ -2041,7 +2044,7 @@
   }
 }
 
-static mirror::Object* IdentityCallback(mirror::Object* obj, void*) {
+static mirror::Object* IdentityRootCallback(mirror::Object* obj, void*, uint32_t, RootType) {
   return obj;
 }
 
@@ -2080,7 +2083,7 @@
     ReaderMutexLock reader_lock(self, *Locks::heap_bitmap_lock_);
     for (const auto& table_pair : mod_union_tables_) {
       accounting::ModUnionTable* mod_union_table = table_pair.second;
-      mod_union_table->UpdateAndMarkReferences(IdentityCallback, nullptr);
+      mod_union_table->UpdateAndMarkReferences(IdentityRootCallback, nullptr);
       mod_union_table->Verify();
     }
     thread_list->ResumeAll();
@@ -2316,10 +2319,10 @@
 void Heap::AddFinalizerReference(Thread* self, mirror::Object* object) {
   ScopedObjectAccess soa(self);
   JValue result;
-  ArgArray arg_array(NULL, 0);
+  ArgArray arg_array("VL", 2);
   arg_array.Append(object);
   soa.DecodeMethod(WellKnownClasses::java_lang_ref_FinalizerReference_add)->Invoke(self,
-      arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'V');
+      arg_array.GetArray(), arg_array.GetNumBytes(), &result, "VL");
 }
 
 void Heap::EnqueueClearedReferences() {
@@ -2330,10 +2333,10 @@
     if (LIKELY(Runtime::Current()->IsStarted())) {
       ScopedObjectAccess soa(self);
       JValue result;
-      ArgArray arg_array(NULL, 0);
+      ArgArray arg_array("VL", 2);
       arg_array.Append(cleared_references_.GetList());
       soa.DecodeMethod(WellKnownClasses::java_lang_ref_ReferenceQueue_add)->Invoke(soa.Self(),
-          arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'V');
+          arg_array.GetArray(), arg_array.GetNumBytes(), &result, "VL");
     }
     cleared_references_.Clear();
   }
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index 476ceee..e416c0e 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -32,9 +32,9 @@
 #include "gtest/gtest.h"
 #include "jni.h"
 #include "locks.h"
+#include "object_callbacks.h"
 #include "offsets.h"
 #include "reference_queue.h"
-#include "root_visitor.h"
 #include "safe_map.h"
 #include "thread_pool.h"
 
@@ -183,7 +183,7 @@
   }
 
   // Visit all of the live objects in the heap.
-  void VisitObjects(ObjectVisitorCallback callback, void* arg)
+  void VisitObjects(ObjectCallback callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
 
   void SwapSemiSpaces() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -226,10 +226,16 @@
   // A weaker test than IsLiveObject or VerifyObject that doesn't require the heap lock,
   // and doesn't abort on error, allowing the caller to report more
   // meaningful diagnostics.
-  bool IsValidObjectAddress(const mirror::Object* obj) const;
+  bool IsValidObjectAddress(const mirror::Object* obj) const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Returns true if the address passed in is a heap address, doesn't need to be aligned.
-  bool IsHeapAddress(const mirror::Object* obj) const;
+  bool IsHeapAddress(const mirror::Object* obj) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  // Faster alternative to IsHeapAddress since finding if an object is in the large object space is
+  // very slow.
+  bool IsNonDiscontinuousSpaceHeapAddress(const mirror::Object* obj) const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Returns true if 'obj' is a live heap object, false otherwise (including for invalid addresses).
   // Requires the heap lock to be held.
@@ -328,8 +334,9 @@
     return finalizer_reference_zombie_offset_;
   }
   static mirror::Object* PreserveSoftReferenceCallback(mirror::Object* obj, void* arg);
-  void ProcessReferences(TimingLogger& timings, bool clear_soft, RootVisitor* is_marked_callback,
-                         RootVisitor* recursive_mark_object_callback, void* arg)
+  void ProcessReferences(TimingLogger& timings, bool clear_soft,
+                         IsMarkedCallback* is_marked_callback,
+                         MarkObjectCallback* recursive_mark_object_callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
@@ -605,8 +612,9 @@
   // Returns true if the reference object has not yet been enqueued.
   bool IsEnqueuable(mirror::Object* ref) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   bool IsEnqueued(mirror::Object* ref) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void DelayReferenceReferent(mirror::Class* klass, mirror::Object* obj, RootVisitor mark_visitor,
-                              void* arg) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void DelayReferenceReferent(mirror::Class* klass, mirror::Object* obj,
+                              IsMarkedCallback is_marked_callback, void* arg)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Run the finalizers.
   void RunFinalization(JNIEnv* env);
diff --git a/runtime/gc/reference_queue.cc b/runtime/gc/reference_queue.cc
index 2d73a71..fae4cac 100644
--- a/runtime/gc/reference_queue.cc
+++ b/runtime/gc/reference_queue.cc
@@ -94,13 +94,14 @@
   }
 }
 
-void ReferenceQueue::ClearWhiteReferences(ReferenceQueue& cleared_references, RootVisitor visitor,
+void ReferenceQueue::ClearWhiteReferences(ReferenceQueue& cleared_references,
+                                          IsMarkedCallback* preserve_callback,
                                           void* arg) {
   while (!IsEmpty()) {
     mirror::Object* ref = DequeuePendingReference();
     mirror::Object* referent = heap_->GetReferenceReferent(ref);
     if (referent != nullptr) {
-      mirror::Object* forward_address = visitor(referent, arg);
+      mirror::Object* forward_address = preserve_callback(referent, arg);
       if (forward_address == nullptr) {
         // Referent is white, clear it.
         heap_->ClearReferenceReferent(ref);
@@ -108,7 +109,7 @@
           cleared_references.EnqueuePendingReference(ref);
         }
       } else if (referent != forward_address) {
-        // Object moved, need to updated the referrent.
+        // Object moved, need to updated the referent.
         heap_->SetReferenceReferent(ref, forward_address);
       }
     }
@@ -116,8 +117,9 @@
 }
 
 void ReferenceQueue::EnqueueFinalizerReferences(ReferenceQueue& cleared_references,
-                                                RootVisitor is_marked_callback,
-                                                RootVisitor recursive_mark_callback, void* arg) {
+                                                IsMarkedCallback is_marked_callback,
+                                                MarkObjectCallback recursive_mark_callback,
+                                                void* arg) {
   while (!IsEmpty()) {
     mirror::Object* ref = DequeuePendingReference();
     mirror::Object* referent = heap_->GetReferenceReferent(ref);
@@ -139,7 +141,7 @@
   }
 }
 
-void ReferenceQueue::PreserveSomeSoftReferences(RootVisitor preserve_callback, void* arg) {
+void ReferenceQueue::PreserveSomeSoftReferences(IsMarkedCallback preserve_callback, void* arg) {
   ReferenceQueue cleared(heap_);
   while (!IsEmpty()) {
     mirror::Object* ref = DequeuePendingReference();
@@ -149,7 +151,7 @@
       if (forward_address == nullptr) {
         // Either the reference isn't marked or we don't wish to preserve it.
         cleared.EnqueuePendingReference(ref);
-      } else {
+      } else if (forward_address != referent) {
         heap_->SetReferenceReferent(ref, forward_address);
       }
     }
diff --git a/runtime/gc/reference_queue.h b/runtime/gc/reference_queue.h
index 3f3069e..e12a95f 100644
--- a/runtime/gc/reference_queue.h
+++ b/runtime/gc/reference_queue.h
@@ -27,8 +27,8 @@
 #include "gtest/gtest.h"
 #include "jni.h"
 #include "locks.h"
+#include "object_callbacks.h"
 #include "offsets.h"
-#include "root_visitor.h"
 #include "thread_pool.h"
 
 namespace art {
@@ -56,17 +56,18 @@
   // Enqueues finalizer references with white referents.  White referents are blackened, moved to the
   // zombie field, and the referent field is cleared.
   void EnqueueFinalizerReferences(ReferenceQueue& cleared_references,
-                                  RootVisitor is_marked_callback,
-                                  RootVisitor recursive_mark_callback, void* arg)
+                                  IsMarkedCallback is_marked_callback,
+                                  MarkObjectCallback recursive_mark_callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   // Walks the reference list marking any references subject to the reference clearing policy.
   // References with a black referent are removed from the list.  References with white referents
   // biased toward saving are blackened and also removed from the list.
-  void PreserveSomeSoftReferences(RootVisitor preserve_callback, void* arg)
+  void PreserveSomeSoftReferences(IsMarkedCallback* preserve_callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   // Unlink the reference list clearing references objects with white referents.  Cleared references
   // registered to a reference queue are scheduled for appending by the heap worker thread.
-  void ClearWhiteReferences(ReferenceQueue& cleared_references, RootVisitor visitor, void* arg)
+  void ClearWhiteReferences(ReferenceQueue& cleared_references, IsMarkedCallback is_marked_callback,
+                            void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void Dump(std::ostream& os) const
         SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
diff --git a/runtime/gc/space/bump_pointer_space.cc b/runtime/gc/space/bump_pointer_space.cc
index a314d74..f7bdc4c 100644
--- a/runtime/gc/space/bump_pointer_space.cc
+++ b/runtime/gc/space/bump_pointer_space.cc
@@ -44,9 +44,8 @@
       growth_end_(limit),
       objects_allocated_(0), bytes_allocated_(0),
       block_lock_("Block lock"),
+      main_block_size_(0),
       num_blocks_(0) {
-  CHECK_GE(Capacity(), sizeof(BlockHeader));
-  end_ += sizeof(BlockHeader);
 }
 
 BumpPointerSpace::BumpPointerSpace(const std::string& name, MemMap* mem_map)
@@ -55,9 +54,8 @@
       growth_end_(mem_map->End()),
       objects_allocated_(0), bytes_allocated_(0),
       block_lock_("Block lock"),
+      main_block_size_(0),
       num_blocks_(0) {
-  CHECK_GE(Capacity(), sizeof(BlockHeader));
-  end_ += sizeof(BlockHeader);
 }
 
 mirror::Object* BumpPointerSpace::Alloc(Thread*, size_t num_bytes, size_t* bytes_allocated) {
@@ -78,13 +76,14 @@
   CHECK_NE(madvise(Begin(), Limit() - Begin(), MADV_DONTNEED), -1) << "madvise failed";
   // Reset the end of the space back to the beginning, we move the end forward as we allocate
   // objects.
-  SetEnd(Begin() + sizeof(BlockHeader));
+  SetEnd(Begin());
   objects_allocated_ = 0;
   bytes_allocated_ = 0;
   growth_end_ = Limit();
   {
     MutexLock mu(Thread::Current(), block_lock_);
     num_blocks_ = 0;
+    main_block_size_ = 0;
   }
 }
 
@@ -115,9 +114,8 @@
 }
 
 void BumpPointerSpace::UpdateMainBlock() {
-  BlockHeader* header = reinterpret_cast<BlockHeader*>(Begin());
-  header->size_ = Size() - sizeof(BlockHeader);
   DCHECK_EQ(num_blocks_, 0U);
+  main_block_size_ = Size();
 }
 
 // Returns the start of the storage.
@@ -137,9 +135,9 @@
   return storage;
 }
 
-void BumpPointerSpace::Walk(ObjectVisitorCallback callback, void* arg) {
+void BumpPointerSpace::Walk(ObjectCallback* callback, void* arg) {
   byte* pos = Begin();
-
+  byte* main_end = pos;
   {
     MutexLock mu(Thread::Current(), block_lock_);
     // If we have 0 blocks then we need to update the main header since we have bump pointer style
@@ -147,8 +145,15 @@
     if (num_blocks_ == 0) {
       UpdateMainBlock();
     }
+    main_end += main_block_size_;
   }
-
+  // Walk all of the objects in the main block first.
+  while (pos < main_end) {
+    mirror::Object* obj = reinterpret_cast<mirror::Object*>(pos);
+    callback(obj, arg);
+    pos = reinterpret_cast<byte*>(GetNextObject(obj));
+  }
+  // Walk the other blocks (currently only TLABs).
   while (pos < End()) {
     BlockHeader* header = reinterpret_cast<BlockHeader*>(pos);
     size_t block_size = header->size_;
@@ -167,7 +172,7 @@
 }
 
 bool BumpPointerSpace::IsEmpty() const {
-  return Size() == sizeof(BlockHeader);
+  return Begin() == End();
 }
 
 uint64_t BumpPointerSpace::GetBytesAllocated() {
diff --git a/runtime/gc/space/bump_pointer_space.h b/runtime/gc/space/bump_pointer_space.h
index d73fe3b..d7e6f5b 100644
--- a/runtime/gc/space/bump_pointer_space.h
+++ b/runtime/gc/space/bump_pointer_space.h
@@ -17,7 +17,7 @@
 #ifndef ART_RUNTIME_GC_SPACE_BUMP_POINTER_SPACE_H_
 #define ART_RUNTIME_GC_SPACE_BUMP_POINTER_SPACE_H_
 
-#include "root_visitor.h"
+#include "object_callbacks.h"
 #include "space.h"
 
 namespace art {
@@ -121,7 +121,7 @@
   }
 
   // Go through all of the blocks and visit the continuous objects.
-  void Walk(ObjectVisitorCallback callback, void* arg)
+  void Walk(ObjectCallback* callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Object alignment within the space.
@@ -139,15 +139,16 @@
 
   // The main block is an unbounded block where objects go when there are no other blocks. This
   // enables us to maintain tightly packed objects when you are not using thread local buffers for
-  // allocation.
-  // The main block is also the block which starts at address 0.
+  // allocation. The main block starts at the space Begin().
   void UpdateMainBlock() EXCLUSIVE_LOCKS_REQUIRED(block_lock_);
 
   byte* growth_end_;
   AtomicInteger objects_allocated_;  // Accumulated from revoked thread local regions.
   AtomicInteger bytes_allocated_;  // Accumulated from revoked thread local regions.
   Mutex block_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
-
+  // The objects at the start of the space are stored in the main block. The main block doesn't
+  // have a header, this lets us walk empty spaces which are mprotected.
+  size_t main_block_size_ GUARDED_BY(block_lock_);
   // The number of blocks in the space, if it is 0 then the space has one long continuous block
   // which doesn't have an updated header.
   size_t num_blocks_ GUARDED_BY(block_lock_);
diff --git a/runtime/gc/space/space_test.cc b/runtime/gc/space/space_test.cc
index 9989ffe..6d07a60 100644
--- a/runtime/gc/space/space_test.cc
+++ b/runtime/gc/space/space_test.cc
@@ -39,20 +39,23 @@
     Runtime::Current()->GetHeap()->AddSpace(space);
   }
   void InstallClass(mirror::Object* o, size_t size) NO_THREAD_SAFETY_ANALYSIS {
-    // Note the minimum size, which is the size of a zero-length byte array, is 12.
-    EXPECT_GE(size, static_cast<size_t>(12));
+    // Note the minimum size, which is the size of a zero-length byte array.
+    EXPECT_GE(size, SizeOfZeroLengthByteArray());
     SirtRef<mirror::ClassLoader> null_loader(Thread::Current(), NULL);
     mirror::Class* byte_array_class = Runtime::Current()->GetClassLinker()->FindClass("[B", null_loader);
     EXPECT_TRUE(byte_array_class != NULL);
     o->SetClass(byte_array_class);
     mirror::Array* arr = o->AsArray();
-    // size_t header_size = sizeof(mirror::Object) + 4;
-    size_t header_size = arr->DataOffset(1).Uint32Value();
+    size_t header_size = SizeOfZeroLengthByteArray();
     int32_t length = size - header_size;
     arr->SetLength(length);
     EXPECT_EQ(arr->SizeOf(), size);
   }
 
+  static size_t SizeOfZeroLengthByteArray() {
+    return mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimByte)).Uint32Value();
+  }
+
   static MallocSpace* CreateDlMallocSpace(const std::string& name, size_t initial_size, size_t growth_limit,
                                           size_t capacity, byte* requested_begin) {
     return DlMallocSpace::Create(name, initial_size, growth_limit, capacity, requested_begin);
@@ -355,9 +358,10 @@
   mirror::Object* lots_of_objects[1024];
   for (size_t i = 0; i < arraysize(lots_of_objects); i++) {
     size_t allocation_size = 0;
-    lots_of_objects[i] = space->Alloc(self, 16, &allocation_size);
+    size_t size_of_zero_length_byte_array = SizeOfZeroLengthByteArray();
+    lots_of_objects[i] = space->Alloc(self, size_of_zero_length_byte_array, &allocation_size);
     EXPECT_TRUE(lots_of_objects[i] != nullptr);
-    InstallClass(lots_of_objects[i], 16);
+    InstallClass(lots_of_objects[i], size_of_zero_length_byte_array);
     EXPECT_EQ(allocation_size, space->AllocationSize(lots_of_objects[i]));
   }
 
@@ -436,9 +440,10 @@
         alloc_size = object_size;
       } else {
         alloc_size = test_rand(&rand_seed) % static_cast<size_t>(-object_size);
-        // Note the minimum size, which is the size of a zero-length byte array, is 12.
-        if (alloc_size < 12) {
-          alloc_size = 12;
+        // Note the minimum size, which is the size of a zero-length byte array.
+        size_t size_of_zero_length_byte_array = SizeOfZeroLengthByteArray();
+        if (alloc_size < size_of_zero_length_byte_array) {
+          alloc_size = size_of_zero_length_byte_array;
         }
       }
       mirror::Object* object;
@@ -562,6 +567,10 @@
 }
 
 void SpaceTest::SizeFootPrintGrowthLimitAndTrimDriver(size_t object_size, CreateSpaceFn create_space) {
+  if (object_size < SizeOfZeroLengthByteArray()) {
+    // Too small for the object layout/model.
+    return;
+  }
   size_t initial_size = 4 * MB;
   size_t growth_limit = 8 * MB;
   size_t capacity = 16 * MB;
diff --git a/runtime/hprof/hprof.cc b/runtime/hprof/hprof.cc
index 24d403d..ae03dd9 100644
--- a/runtime/hprof/hprof.cc
+++ b/runtime/hprof/hprof.cc
@@ -431,12 +431,8 @@
     Runtime::Current()->VisitRoots(RootVisitor, this, false, false);
     Thread* self = Thread::Current();
     {
-      WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
-      Runtime::Current()->GetHeap()->FlushAllocStack();
-    }
-    {
       ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
-      Runtime::Current()->GetHeap()->GetLiveBitmap()->Walk(HeapBitmapCallback, this);
+      Runtime::Current()->GetHeap()->VisitObjects(VisitObjectCallback, this);
     }
     current_record_.StartNewRecord(body_fp_, HPROF_TAG_HEAP_DUMP_END, HPROF_TIME);
     current_record_.Flush();
@@ -500,22 +496,23 @@
   }
 
  private:
-  static mirror::Object* RootVisitor(mirror::Object* obj, void* arg)
+  static mirror::Object* RootVisitor(mirror::Object* obj, void* arg, uint32_t thread_id,
+                                     RootType root_type)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK(arg != NULL);
-    reinterpret_cast<Hprof*>(arg)->VisitRoot(obj);
+    reinterpret_cast<Hprof*>(arg)->VisitRoot(obj, thread_id, root_type);
     return obj;
   }
 
-  static void HeapBitmapCallback(mirror::Object* obj, void* arg)
+  static void VisitObjectCallback(mirror::Object* obj, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    CHECK(obj != NULL);
-    CHECK(arg != NULL);
-    Hprof* hprof = reinterpret_cast<Hprof*>(arg);
-    hprof->DumpHeapObject(obj);
+    DCHECK(obj != NULL);
+    DCHECK(arg != NULL);
+    reinterpret_cast<Hprof*>(arg)->DumpHeapObject(obj);
   }
 
-  void VisitRoot(const mirror::Object* obj) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void VisitRoot(const mirror::Object* obj, uint32_t thread_id, RootType type)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   int DumpHeapObject(mirror::Object* obj) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -1050,10 +1047,7 @@
   return 0;
 }
 
-void Hprof::VisitRoot(const mirror::Object* obj) {
-  uint32_t threadId = 0;  // TODO
-  /*RootType*/ size_t type = 0;  // TODO
-
+void Hprof::VisitRoot(const mirror::Object* obj, uint32_t thread_id, RootType type) {
   static const HprofHeapTag xlate[] = {
     HPROF_ROOT_UNKNOWN,
     HPROF_ROOT_JNI_GLOBAL,
@@ -1071,13 +1065,12 @@
     HPROF_ROOT_VM_INTERNAL,
     HPROF_ROOT_JNI_MONITOR,
   };
-
   CHECK_LT(type, sizeof(xlate) / sizeof(HprofHeapTag));
   if (obj == NULL) {
     return;
   }
   gc_scan_state_ = xlate[type];
-  gc_thread_serial_number_ = threadId;
+  gc_thread_serial_number_ = thread_id;
   MarkRootObject(obj, 0);
   gc_scan_state_ = 0;
   gc_thread_serial_number_ = 0;
diff --git a/runtime/indirect_reference_table.cc b/runtime/indirect_reference_table.cc
index 8194a0d..4a02d74 100644
--- a/runtime/indirect_reference_table.cc
+++ b/runtime/indirect_reference_table.cc
@@ -309,9 +309,10 @@
   return true;
 }
 
-void IndirectReferenceTable::VisitRoots(RootVisitor* visitor, void* arg) {
+void IndirectReferenceTable::VisitRoots(RootCallback* callback, void* arg, uint32_t tid,
+                                        RootType root_type) {
   for (auto ref : *this) {
-    *ref = visitor(const_cast<mirror::Object*>(*ref), arg);
+    *ref = callback(const_cast<mirror::Object*>(*ref), arg, tid, root_type);
     DCHECK(*ref != nullptr);
   }
 }
diff --git a/runtime/indirect_reference_table.h b/runtime/indirect_reference_table.h
index 21e942e..9d2fa35 100644
--- a/runtime/indirect_reference_table.h
+++ b/runtime/indirect_reference_table.h
@@ -23,8 +23,8 @@
 #include <string>
 
 #include "base/logging.h"
+#include "object_callbacks.h"
 #include "offsets.h"
-#include "root_visitor.h"
 
 namespace art {
 namespace mirror {
@@ -307,7 +307,7 @@
     return IrtIterator(table_, Capacity(), Capacity());
   }
 
-  void VisitRoots(RootVisitor* visitor, void* arg);
+  void VisitRoots(RootCallback* callback, void* arg, uint32_t tid, RootType root_type);
 
   uint32_t GetSegmentState() const {
     return segment_state_.all;
diff --git a/runtime/intern_table.cc b/runtime/intern_table.cc
index a829e97..5693747 100644
--- a/runtime/intern_table.cc
+++ b/runtime/intern_table.cc
@@ -43,15 +43,16 @@
      << weak_interns_.size() << " weak\n";
 }
 
-void InternTable::VisitRoots(RootVisitor* visitor, void* arg,
+void InternTable::VisitRoots(RootCallback* callback, void* arg,
                              bool only_dirty, bool clean_dirty) {
   MutexLock mu(Thread::Current(), intern_table_lock_);
   if (!only_dirty || is_dirty_) {
     for (auto& strong_intern : strong_interns_) {
-      strong_intern.second = down_cast<mirror::String*>(visitor(strong_intern.second, arg));
+      strong_intern.second =
+          down_cast<mirror::String*>(callback(strong_intern.second, arg, 0,
+                                              kRootInternedString));
       DCHECK(strong_intern.second != nullptr);
     }
-
     if (clean_dirty) {
       is_dirty_ = false;
     }
@@ -196,15 +197,15 @@
 }
 
 mirror::String* InternTable::InternStrong(mirror::String* s) {
-  if (s == NULL) {
-    return NULL;
+  if (s == nullptr) {
+    return nullptr;
   }
   return Insert(s, true);
 }
 
 mirror::String* InternTable::InternWeak(mirror::String* s) {
-  if (s == NULL) {
-    return NULL;
+  if (s == nullptr) {
+    return nullptr;
   }
   return Insert(s, false);
 }
@@ -215,11 +216,11 @@
   return found == s;
 }
 
-void InternTable::SweepInternTableWeaks(RootVisitor visitor, void* arg) {
+void InternTable::SweepInternTableWeaks(IsMarkedCallback* callback, void* arg) {
   MutexLock mu(Thread::Current(), intern_table_lock_);
   for (auto it = weak_interns_.begin(), end = weak_interns_.end(); it != end;) {
     mirror::Object* object = it->second;
-    mirror::Object* new_object = visitor(object, arg);
+    mirror::Object* new_object = callback(object, arg);
     if (new_object == nullptr) {
       // TODO: use it = weak_interns_.erase(it) when we get a c++11 stl.
       weak_interns_.erase(it++);
diff --git a/runtime/intern_table.h b/runtime/intern_table.h
index eec63c8..9f09fb9 100644
--- a/runtime/intern_table.h
+++ b/runtime/intern_table.h
@@ -18,7 +18,7 @@
 #define ART_RUNTIME_INTERN_TABLE_H_
 
 #include "base/mutex.h"
-#include "root_visitor.h"
+#include "object_callbacks.h"
 
 #include <map>
 
@@ -55,13 +55,13 @@
   // Interns a potentially new string in the 'weak' table. (See above.)
   mirror::String* InternWeak(mirror::String* s) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void SweepInternTableWeaks(RootVisitor visitor, void* arg);
+  void SweepInternTableWeaks(IsMarkedCallback* callback, void* arg);
 
   bool ContainsWeak(mirror::String* s) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   size_t Size() const;
 
-  void VisitRoots(RootVisitor* visitor, void* arg, bool only_dirty, bool clean_dirty);
+  void VisitRoots(RootCallback* callback, void* arg, bool only_dirty, bool clean_dirty);
 
   void DumpForSigQuit(std::ostream& os) const;
 
diff --git a/runtime/intern_table_test.cc b/runtime/intern_table_test.cc
index aa2502d..c328245 100644
--- a/runtime/intern_table_test.cc
+++ b/runtime/intern_table_test.cc
@@ -81,7 +81,7 @@
   mutable std::vector<const mirror::String*> expected_;
 };
 
-mirror::Object* IsMarkedSweepingVisitor(mirror::Object* object, void* arg) {
+mirror::Object* IsMarkedSweepingCallback(mirror::Object* object, void* arg) {
   if (reinterpret_cast<TestPredicate*>(arg)->IsMarked(object)) {
     return object;
   }
@@ -108,7 +108,7 @@
   p.Expect(s1.get());
   {
     ReaderMutexLock mu(soa.Self(), *Locks::heap_bitmap_lock_);
-    t.SweepInternTableWeaks(IsMarkedSweepingVisitor, &p);
+    t.SweepInternTableWeaks(IsMarkedSweepingCallback, &p);
   }
 
   EXPECT_EQ(2U, t.Size());
diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc
index 02a9aa6..c6faf44 100644
--- a/runtime/interpreter/interpreter.cc
+++ b/runtime/interpreter/interpreter.cc
@@ -57,7 +57,10 @@
   } else if (name == "int java.lang.String.fastIndexOf(int, int)") {
     result->SetI(receiver->AsString()->FastIndexOf(args[0], args[1]));
   } else if (name == "java.lang.Object java.lang.reflect.Array.createMultiArray(java.lang.Class, int[])") {
-    result->SetL(Array::CreateMultiArray(self, reinterpret_cast<Object*>(args[0])->AsClass(), reinterpret_cast<Object*>(args[1])->AsIntArray()));
+    SirtRef<mirror::Class> sirt_class(self, reinterpret_cast<Object*>(args[0])->AsClass());
+    SirtRef<mirror::IntArray> sirt_dimensions(self,
+                                              reinterpret_cast<Object*>(args[1])->AsIntArray());
+    result->SetL(Array::CreateMultiArray(self, sirt_class, sirt_dimensions));
   } else if (name == "java.lang.Object java.lang.Throwable.nativeFillInStackTrace()") {
     ScopedObjectAccessUnchecked soa(self);
     result->SetL(soa.Decode<Object*>(self->CreateInternalStackTrace(soa)));
@@ -77,6 +80,14 @@
     Object* obj = reinterpret_cast<Object*>(args[0]);
     Object* newValue = reinterpret_cast<Object*>(args[3]);
     obj->SetFieldObject(MemberOffset((static_cast<uint64_t>(args[2]) << 32) | args[1]), newValue, false);
+  } else if (name == "int sun.misc.Unsafe.getArrayBaseOffsetForComponentType(java.lang.Class)") {
+    mirror::Class* component = reinterpret_cast<Object*>(args[0])->AsClass();
+    Primitive::Type primitive_type = component->GetPrimitiveType();
+    result->SetI(mirror::Array::DataOffset(Primitive::ComponentSize(primitive_type)).Int32Value());
+  } else if (name == "int sun.misc.Unsafe.getArrayIndexScaleForComponentType(java.lang.Class)") {
+    mirror::Class* component = reinterpret_cast<Object*>(args[0])->AsClass();
+    Primitive::Type primitive_type = component->GetPrimitiveType();
+    result->SetI(Primitive::ComponentSize(primitive_type));
   } else {
     LOG(FATAL) << "Attempt to invoke native method in non-started runtime: " << name;
   }
diff --git a/runtime/jdwp/object_registry.cc b/runtime/jdwp/object_registry.cc
index 40ba3e3..49dceb2 100644
--- a/runtime/jdwp/object_registry.cc
+++ b/runtime/jdwp/object_registry.cc
@@ -206,7 +206,7 @@
   }
 }
 
-void ObjectRegistry::UpdateObjectPointers(RootVisitor visitor, void* arg) {
+void ObjectRegistry::UpdateObjectPointers(IsMarkedCallback* callback, void* arg) {
   MutexLock mu(Thread::Current(), lock_);
   if (object_to_entry_.empty()) {
     return;
@@ -215,7 +215,7 @@
   for (auto& pair : object_to_entry_) {
     mirror::Object* new_obj;
     if (pair.first != nullptr) {
-      new_obj = visitor(pair.first, arg);
+      new_obj = callback(pair.first, arg);
       if (new_obj != nullptr) {
         new_object_to_entry.insert(std::make_pair(new_obj, pair.second));
       }
diff --git a/runtime/jdwp/object_registry.h b/runtime/jdwp/object_registry.h
index 0190575..3c6cb15 100644
--- a/runtime/jdwp/object_registry.h
+++ b/runtime/jdwp/object_registry.h
@@ -26,7 +26,7 @@
 #include "mirror/class.h"
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
-#include "root_visitor.h"
+#include "object_callbacks.h"
 #include "safe_map.h"
 
 namespace art {
@@ -85,7 +85,7 @@
   jobject GetJObject(JDWP::ObjectId id) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Visit, objects are treated as system weaks.
-  void UpdateObjectPointers(RootVisitor visitor, void* arg)
+  void UpdateObjectPointers(IsMarkedCallback* callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // We have allow / disallow functionality since we use system weak sweeping logic to update moved
diff --git a/runtime/jni_internal.cc b/runtime/jni_internal.cc
index deea5f6..6f31ca7 100644
--- a/runtime/jni_internal.cc
+++ b/runtime/jni_internal.cc
@@ -139,13 +139,13 @@
 }
 
 void InvokeWithArgArray(const ScopedObjectAccess& soa, ArtMethod* method,
-                        ArgArray* arg_array, JValue* result, char result_type)
+                        ArgArray* arg_array, JValue* result, const char* shorty)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   uint32_t* args = arg_array->GetArray();
   if (UNLIKELY(soa.Env()->check_jni)) {
     CheckMethodArguments(method, args);
   }
-  method->Invoke(soa.Self(), args, arg_array->GetNumBytes(), result, result_type);
+  method->Invoke(soa.Self(), args, arg_array->GetNumBytes(), result, shorty);
 }
 
 static JValue InvokeWithVarArgs(const ScopedObjectAccess& soa, jobject obj,
@@ -157,7 +157,7 @@
   JValue result;
   ArgArray arg_array(mh.GetShorty(), mh.GetShortyLength());
   arg_array.BuildArgArray(soa, receiver, args);
-  InvokeWithArgArray(soa, method, &arg_array, &result, mh.GetShorty()[0]);
+  InvokeWithArgArray(soa, method, &arg_array, &result, mh.GetShorty());
   return result;
 }
 
@@ -175,7 +175,7 @@
   JValue result;
   ArgArray arg_array(mh.GetShorty(), mh.GetShortyLength());
   arg_array.BuildArgArray(soa, receiver, args);
-  InvokeWithArgArray(soa, method, &arg_array, &result, mh.GetShorty()[0]);
+  InvokeWithArgArray(soa, method, &arg_array, &result, mh.GetShorty());
   return result;
 }
 
@@ -188,7 +188,7 @@
   JValue result;
   ArgArray arg_array(mh.GetShorty(), mh.GetShortyLength());
   arg_array.BuildArgArray(soa, receiver, args);
-  InvokeWithArgArray(soa, method, &arg_array, &result, mh.GetShorty()[0]);
+  InvokeWithArgArray(soa, method, &arg_array, &result, mh.GetShorty());
   return result;
 }
 
@@ -523,9 +523,9 @@
     return dlsym(handle_, symbol_name.c_str());
   }
 
-  void VisitRoots(RootVisitor* visitor, void* arg) {
+  void VisitRoots(RootCallback* visitor, void* arg) {
     if (class_loader_ != nullptr) {
-      class_loader_ = visitor(class_loader_, arg);
+      class_loader_ = visitor(class_loader_, arg, 0, kRootVMInternal);
     }
   }
 
@@ -619,9 +619,9 @@
     return NULL;
   }
 
-  void VisitRoots(RootVisitor* visitor, void* arg) {
+  void VisitRoots(RootCallback* callback, void* arg) {
     for (auto& lib_pair : libraries_) {
-      lib_pair.second->VisitRoots(visitor, arg);
+      lib_pair.second->VisitRoots(callback, arg);
     }
   }
 
@@ -637,7 +637,7 @@
   JValue result;
   ArgArray arg_array(mh.GetShorty(), mh.GetShortyLength());
   arg_array.BuildArgArray(soa, receiver, args);
-  InvokeWithArgArray(soa, method, &arg_array, &result, mh.GetShorty()[0]);
+  InvokeWithArgArray(soa, method, &arg_array, &result, mh.GetShorty());
   return result;
 }
 
@@ -2437,8 +2437,10 @@
         m = c->FindVirtualMethod(name, sig);
       }
       if (m == NULL) {
+        c->DumpClass(LOG(ERROR), mirror::Class::kDumpClassFullDetail);
         LOG(return_errors ? ERROR : FATAL) << "Failed to register native method "
-            << PrettyDescriptor(c) << "." << name << sig;
+            << PrettyDescriptor(c) << "." << name << sig << " in "
+            << c->GetDexCache()->GetLocation()->ToModifiedUtf8();
         ThrowNoSuchMethodError(soa, c, name, sig, "static or non-static");
         return JNI_ERR;
       } else if (!m->IsNative()) {
@@ -2650,6 +2652,16 @@
     size_t bytes = array->GetLength() * component_size;
     VLOG(heap) << "Release primitive array " << env << " array_data " << array_data
                << " elements " << reinterpret_cast<void*>(elements);
+    if (is_copy) {
+      // Sanity check: If elements is not the same as the java array's data, it better not be a
+      // heap address. TODO: This might be slow to check, may be worth keeping track of which
+      // copies we make?
+      if (heap->IsNonDiscontinuousSpaceHeapAddress(reinterpret_cast<mirror::Object*>(elements))) {
+        JniAbortF("ReleaseArrayElements", "invalid element pointer %p, array elements are %p",
+                  reinterpret_cast<void*>(elements), array_data);
+        return;
+      }
+    }
     // Don't need to copy if we had a direct pointer.
     if (mode != JNI_ABORT && is_copy) {
       memcpy(array_data, elements, bytes);
@@ -3373,11 +3385,11 @@
   return native_method;
 }
 
-void JavaVMExt::SweepJniWeakGlobals(RootVisitor visitor, void* arg) {
+void JavaVMExt::SweepJniWeakGlobals(IsMarkedCallback* callback, void* arg) {
   MutexLock mu(Thread::Current(), weak_globals_lock_);
   for (mirror::Object** entry : weak_globals_) {
     mirror::Object* obj = *entry;
-    mirror::Object* new_obj = visitor(obj, arg);
+    mirror::Object* new_obj = callback(obj, arg);
     if (new_obj == nullptr) {
       new_obj = kClearedJniWeakGlobal;
     }
@@ -3385,20 +3397,20 @@
   }
 }
 
-void JavaVMExt::VisitRoots(RootVisitor* visitor, void* arg) {
+void JavaVMExt::VisitRoots(RootCallback* callback, void* arg) {
   Thread* self = Thread::Current();
   {
     ReaderMutexLock mu(self, globals_lock);
-    globals.VisitRoots(visitor, arg);
+    globals.VisitRoots(callback, arg, 0, kRootJNIGlobal);
   }
   {
     MutexLock mu(self, pins_lock);
-    pin_table.VisitRoots(visitor, arg);
+    pin_table.VisitRoots(callback, arg, 0, kRootVMInternal);
   }
   {
     MutexLock mu(self, libraries_lock);
     // Libraries contains shared libraries which hold a pointer to a class loader.
-    libraries->VisitRoots(visitor, arg);
+    libraries->VisitRoots(callback, arg);
   }
   // The weak_globals table is visited by the GC itself (because it mutates the table).
 }
diff --git a/runtime/jni_internal.h b/runtime/jni_internal.h
index cd3c5cb..9e10987 100644
--- a/runtime/jni_internal.h
+++ b/runtime/jni_internal.h
@@ -22,8 +22,8 @@
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "indirect_reference_table.h"
+#include "object_callbacks.h"
 #include "reference_table.h"
-#include "root_visitor.h"
 #include "runtime.h"
 #include "sirt_ref.h"
 
@@ -57,7 +57,7 @@
 JValue InvokeWithJValues(const ScopedObjectAccess&, jobject obj, jmethodID mid, jvalue* args)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 void InvokeWithArgArray(const ScopedObjectAccess& soa, mirror::ArtMethod* method,
-                        ArgArray *arg_array, JValue* result, char result_type)
+                        ArgArray *arg_array, JValue* result, const char* shorty)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
 int ThrowNewException(JNIEnv* env, jclass exception_class, const char* msg, jobject cause);
@@ -91,7 +91,7 @@
 
   void SetCheckJniEnabled(bool enabled);
 
-  void VisitRoots(RootVisitor*, void*);
+  void VisitRoots(RootCallback* callback, void* arg);
 
   void DisallowNewWeakGlobals() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
   void AllowNewWeakGlobals() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -99,7 +99,7 @@
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void DeleteWeakGlobalRef(Thread* self, jweak obj)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void SweepJniWeakGlobals(RootVisitor visitor, void* arg);
+  void SweepJniWeakGlobals(IsMarkedCallback* callback, void* arg);
   mirror::Object* DecodeWeakGlobal(Thread* self, IndirectRef ref);
 
   Runtime* runtime;
diff --git a/runtime/jni_internal_test.cc b/runtime/jni_internal_test.cc
index fed734e..4c58c84 100644
--- a/runtime/jni_internal_test.cc
+++ b/runtime/jni_internal_test.cc
@@ -134,14 +134,14 @@
       arg_array.Append(receiver);
     }
 
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'V');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, "V");
   }
 
   void InvokeIdentityByteMethod(bool is_static)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     mirror::ArtMethod* method;
     mirror::Object* receiver;
-    JniInternalTestMakeExecutable(&method, &receiver, is_static, "identity", "(I)I");
+    JniInternalTestMakeExecutable(&method, &receiver, is_static, "identity", "(B)B");
 
     ArgArray arg_array(NULL, 0);
     uint32_t* args = arg_array.GetArray();
@@ -154,22 +154,22 @@
 
     arg_array.Append(0U);
     result.SetB(-1);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'B');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, "BB");
     EXPECT_EQ(0, result.GetB());
 
     args[0] = -1;
     result.SetB(0);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'B');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, "BB");
     EXPECT_EQ(-1, result.GetB());
 
     args[0] = SCHAR_MAX;
     result.SetB(0);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'B');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, "BB");
     EXPECT_EQ(SCHAR_MAX, result.GetB());
 
     args[0] = (SCHAR_MIN << 24) >> 24;
     result.SetB(0);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'B');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, "BB");
     EXPECT_EQ(SCHAR_MIN, result.GetB());
   }
 
@@ -190,22 +190,22 @@
 
     arg_array.Append(0U);
     result.SetI(-1);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'I');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, "II");
     EXPECT_EQ(0, result.GetI());
 
     args[0] = -1;
     result.SetI(0);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'I');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, "II");
     EXPECT_EQ(-1, result.GetI());
 
     args[0] = INT_MAX;
     result.SetI(0);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'I');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, "II");
     EXPECT_EQ(INT_MAX, result.GetI());
 
     args[0] = INT_MIN;
     result.SetI(0);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'I');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, "II");
     EXPECT_EQ(INT_MIN, result.GetI());
   }
 
@@ -228,28 +228,28 @@
     value.SetD(0.0);
     arg_array.AppendWide(value.GetJ());
     result.SetD(-1.0);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'D');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, "DD");
     EXPECT_EQ(0.0, result.GetD());
 
     value.SetD(-1.0);
     args[0] = value.GetJ();
     args[1] = value.GetJ() >> 32;
     result.SetD(0.0);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'D');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, "DD");
     EXPECT_EQ(-1.0, result.GetD());
 
     value.SetD(DBL_MAX);
     args[0] = value.GetJ();
     args[1] = value.GetJ() >> 32;
     result.SetD(0.0);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'D');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, "DD");
     EXPECT_EQ(DBL_MAX, result.GetD());
 
     value.SetD(DBL_MIN);
     args[0] = value.GetJ();
     args[1] = value.GetJ() >> 32;
     result.SetD(0.0);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'D');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, "DD");
     EXPECT_EQ(DBL_MIN, result.GetD());
   }
 
@@ -271,31 +271,31 @@
     arg_array.Append(0U);
     arg_array.Append(0U);
     result.SetI(-1);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'I');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, "III");
     EXPECT_EQ(0, result.GetI());
 
     args[0] = 1;
     args[1] = 2;
     result.SetI(0);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'I');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, "III");
     EXPECT_EQ(3, result.GetI());
 
     args[0] = -2;
     args[1] = 5;
     result.SetI(0);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'I');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, "III");
     EXPECT_EQ(3, result.GetI());
 
     args[0] = INT_MAX;
     args[1] = INT_MIN;
     result.SetI(1234);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'I');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, "III");
     EXPECT_EQ(-1, result.GetI());
 
     args[0] = INT_MAX;
     args[1] = INT_MAX;
     result.SetI(INT_MIN);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'I');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, "III");
     EXPECT_EQ(-2, result.GetI());
   }
 
@@ -318,35 +318,40 @@
     arg_array.Append(0U);
     arg_array.Append(0U);
     result.SetI(-1);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'I');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result,
+                   "IIII");
     EXPECT_EQ(0, result.GetI());
 
     args[0] = 1;
     args[1] = 2;
     args[2] = 3;
     result.SetI(0);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'I');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result,
+                   "IIII");
     EXPECT_EQ(6, result.GetI());
 
     args[0] = -1;
     args[1] = 2;
     args[2] = -3;
     result.SetI(0);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'I');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result,
+                   "IIII");
     EXPECT_EQ(-2, result.GetI());
 
     args[0] = INT_MAX;
     args[1] = INT_MIN;
     args[2] = INT_MAX;
     result.SetI(1234);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'I');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result,
+                   "IIII");
     EXPECT_EQ(2147483646, result.GetI());
 
     args[0] = INT_MAX;
     args[1] = INT_MAX;
     args[2] = INT_MAX;
     result.SetI(INT_MIN);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'I');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result,
+                   "IIII");
     EXPECT_EQ(2147483645, result.GetI());
   }
 
@@ -370,7 +375,8 @@
     arg_array.Append(0U);
     arg_array.Append(0U);
     result.SetI(-1);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'I');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result,
+                   "IIIII");
     EXPECT_EQ(0, result.GetI());
 
     args[0] = 1;
@@ -378,7 +384,8 @@
     args[2] = 3;
     args[3] = 4;
     result.SetI(0);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'I');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result,
+                   "IIIII");
     EXPECT_EQ(10, result.GetI());
 
     args[0] = -1;
@@ -386,7 +393,8 @@
     args[2] = -3;
     args[3] = 4;
     result.SetI(0);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'I');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result,
+                   "IIIII");
     EXPECT_EQ(2, result.GetI());
 
     args[0] = INT_MAX;
@@ -394,7 +402,8 @@
     args[2] = INT_MAX;
     args[3] = INT_MIN;
     result.SetI(1234);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'I');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result,
+                   "IIIII");
     EXPECT_EQ(-2, result.GetI());
 
     args[0] = INT_MAX;
@@ -402,7 +411,8 @@
     args[2] = INT_MAX;
     args[3] = INT_MAX;
     result.SetI(INT_MIN);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'I');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result,
+                   "IIIII");
     EXPECT_EQ(-4, result.GetI());
   }
 
@@ -427,7 +437,8 @@
     arg_array.Append(0U);
     arg_array.Append(0U);
     result.SetI(-1.0);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'I');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result,
+                   "IIIIII");
     EXPECT_EQ(0, result.GetI());
 
     args[0] = 1;
@@ -436,7 +447,8 @@
     args[3] = 4;
     args[4] = 5;
     result.SetI(0);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'I');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result,
+                   "IIIIII");
     EXPECT_EQ(15, result.GetI());
 
     args[0] = -1;
@@ -445,7 +457,8 @@
     args[3] = 4;
     args[4] = -5;
     result.SetI(0);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'I');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result,
+                   "IIIIII");
     EXPECT_EQ(-3, result.GetI());
 
     args[0] = INT_MAX;
@@ -454,7 +467,8 @@
     args[3] = INT_MIN;
     args[4] = INT_MAX;
     result.SetI(1234);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'I');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result,
+                   "IIIIII");
     EXPECT_EQ(2147483645, result.GetI());
 
     args[0] = INT_MAX;
@@ -463,7 +477,8 @@
     args[3] = INT_MAX;
     args[4] = INT_MAX;
     result.SetI(INT_MIN);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'I');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result,
+                   "IIIIII");
     EXPECT_EQ(2147483643, result.GetI());
   }
 
@@ -489,7 +504,8 @@
     arg_array.AppendWide(value.GetJ());
     arg_array.AppendWide(value2.GetJ());
     result.SetD(-1.0);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'D');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result,
+                   "DDD");
     EXPECT_EQ(0.0, result.GetD());
 
     value.SetD(1.0);
@@ -499,7 +515,8 @@
     args[2] = value2.GetJ();
     args[3] = value2.GetJ() >> 32;
     result.SetD(0.0);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'D');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result,
+                   "DDD");
     EXPECT_EQ(3.0, result.GetD());
 
     value.SetD(1.0);
@@ -509,7 +526,8 @@
     args[2] = value2.GetJ();
     args[3] = value2.GetJ() >> 32;
     result.SetD(0.0);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'D');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result,
+                   "DDD");
     EXPECT_EQ(-1.0, result.GetD());
 
     value.SetD(DBL_MAX);
@@ -519,7 +537,8 @@
     args[2] = value2.GetJ();
     args[3] = value2.GetJ() >> 32;
     result.SetD(0.0);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'D');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result,
+                   "DDD");
     EXPECT_EQ(1.7976931348623157e308, result.GetD());
 
     value.SetD(DBL_MAX);
@@ -529,7 +548,8 @@
     args[2] = value2.GetJ();
     args[3] = value2.GetJ() >> 32;
     result.SetD(0.0);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'D');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result,
+                   "DDD");
     EXPECT_EQ(INFINITY, result.GetD());
   }
 
@@ -558,7 +578,8 @@
     arg_array.AppendWide(value2.GetJ());
     arg_array.AppendWide(value3.GetJ());
     result.SetD(-1.0);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'D');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result,
+                   "DDDD");
     EXPECT_EQ(0.0, result.GetD());
 
     value.SetD(1.0);
@@ -571,7 +592,8 @@
     args[4] = value3.GetJ();
     args[5] = value3.GetJ() >> 32;
     result.SetD(0.0);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'D');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result,
+                   "DDDD");
     EXPECT_EQ(6.0, result.GetD());
 
     value.SetD(1.0);
@@ -584,7 +606,8 @@
     args[4] = value3.GetJ();
     args[5] = value3.GetJ() >> 32;
     result.SetD(0.0);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'D');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result,
+                   "DDDD");
     EXPECT_EQ(2.0, result.GetD());
   }
 
@@ -616,7 +639,8 @@
     arg_array.AppendWide(value3.GetJ());
     arg_array.AppendWide(value4.GetJ());
     result.SetD(-1.0);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'D');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result,
+                   "DDDDD");
     EXPECT_EQ(0.0, result.GetD());
 
     value.SetD(1.0);
@@ -632,7 +656,8 @@
     args[6] = value4.GetJ();
     args[7] = value4.GetJ() >> 32;
     result.SetD(0.0);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'D');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result,
+                   "DDDDD");
     EXPECT_EQ(10.0, result.GetD());
 
     value.SetD(1.0);
@@ -648,7 +673,8 @@
     args[6] = value4.GetJ();
     args[7] = value4.GetJ() >> 32;
     result.SetD(0.0);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'D');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result,
+                   "DDDDD");
     EXPECT_EQ(-2.0, result.GetD());
   }
 
@@ -683,7 +709,8 @@
     arg_array.AppendWide(value4.GetJ());
     arg_array.AppendWide(value5.GetJ());
     result.SetD(-1.0);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'D');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result,
+                   "DDDDDD");
     EXPECT_EQ(0.0, result.GetD());
 
     value.SetD(1.0);
@@ -702,7 +729,8 @@
     args[8] = value5.GetJ();
     args[9] = value5.GetJ() >> 32;
     result.SetD(0.0);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'D');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result,
+                   "DDDDDD");
     EXPECT_EQ(15.0, result.GetD());
 
     value.SetD(1.0);
@@ -721,7 +749,8 @@
     args[8] = value5.GetJ();
     args[9] = value5.GetJ() >> 32;
     result.SetD(0.0);
-    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'D');
+    method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result,
+                   "DDDDDD");
     EXPECT_EQ(3.0, result.GetD());
   }
 
@@ -1764,7 +1793,7 @@
   CHECK(started);
   Thread::Current()->TransitionFromSuspendedToRunnable();
 
-  method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'V');
+  method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, "VL");
 }
 
 TEST_F(JniInternalTest, StaticNopMethod) {
diff --git a/runtime/mirror/array-inl.h b/runtime/mirror/array-inl.h
index b2725e5..35ea2b3 100644
--- a/runtime/mirror/array-inl.h
+++ b/runtime/mirror/array-inl.h
@@ -111,9 +111,9 @@
 }
 
 template<class T>
-inline void PrimitiveArray<T>::VisitRoots(RootVisitor* visitor, void* arg) {
+inline void PrimitiveArray<T>::VisitRoots(RootCallback* callback, void* arg) {
   if (array_class_ != nullptr) {
-    array_class_ = down_cast<Class*>(visitor(array_class_, arg));
+    array_class_ = down_cast<Class*>(callback(array_class_, arg, 0, kRootStickyClass));
   }
 }
 
diff --git a/runtime/mirror/array.cc b/runtime/mirror/array.cc
index ca0d1f3..c23234e 100644
--- a/runtime/mirror/array.cc
+++ b/runtime/mirror/array.cc
@@ -40,23 +40,25 @@
 // piece and work our way in.
 // Recursively create an array with multiple dimensions.  Elements may be
 // Objects or primitive types.
-static Array* RecursiveCreateMultiArray(Thread* self, Class* array_class, int current_dimension,
-                                        SirtRef<mirror::IntArray>& dimensions)
+static Array* RecursiveCreateMultiArray(Thread* self,
+                                        const SirtRef<Class>& array_class, int current_dimension,
+                                        const SirtRef<mirror::IntArray>& dimensions)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   int32_t array_length = dimensions->Get(current_dimension);
-  SirtRef<Array> new_array(self, Array::Alloc<true>(self, array_class, array_length));
-  if (UNLIKELY(new_array.get() == NULL)) {
+  SirtRef<Array> new_array(self, Array::Alloc<true>(self, array_class.get(), array_length));
+  if (UNLIKELY(new_array.get() == nullptr)) {
     CHECK(self->IsExceptionPending());
-    return NULL;
+    return nullptr;
   }
   if (current_dimension + 1 < dimensions->GetLength()) {
     // Create a new sub-array in every element of the array.
     for (int32_t i = 0; i < array_length; i++) {
-      Array* sub_array = RecursiveCreateMultiArray(self, array_class->GetComponentType(),
+      SirtRef<mirror::Class> sirt_component_type(self, array_class->GetComponentType());
+      Array* sub_array = RecursiveCreateMultiArray(self, sirt_component_type,
                                                    current_dimension + 1, dimensions);
-      if (UNLIKELY(sub_array == NULL)) {
+      if (UNLIKELY(sub_array == nullptr)) {
         CHECK(self->IsExceptionPending());
-        return NULL;
+        return nullptr;
       }
       new_array->AsObjectArray<Array>()->Set(i, sub_array);
     }
@@ -64,7 +66,8 @@
   return new_array.get();
 }
 
-Array* Array::CreateMultiArray(Thread* self, Class* element_class, IntArray* dimensions) {
+Array* Array::CreateMultiArray(Thread* self, const SirtRef<Class>& element_class,
+                               const SirtRef<IntArray>& dimensions) {
   // Verify dimensions.
   //
   // The caller is responsible for verifying that "dimArray" is non-null
@@ -77,28 +80,27 @@
     int dimension = dimensions->Get(i);
     if (UNLIKELY(dimension < 0)) {
       ThrowNegativeArraySizeException(StringPrintf("Dimension %d: %d", i, dimension).c_str());
-      return NULL;
+      return nullptr;
     }
   }
 
   // Generate the full name of the array class.
   std::string descriptor(num_dimensions, '[');
-  descriptor += ClassHelper(element_class).GetDescriptor();
+  descriptor += ClassHelper(element_class.get()).GetDescriptor();
 
   // Find/generate the array class.
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   SirtRef<mirror::ClassLoader> class_loader(self, element_class->GetClassLoader());
-  Class* array_class = class_linker->FindClass(descriptor.c_str(), class_loader);
-  if (UNLIKELY(array_class == NULL)) {
+  SirtRef<mirror::Class> array_class(self,
+                                     class_linker->FindClass(descriptor.c_str(), class_loader));
+  if (UNLIKELY(array_class.get() == nullptr)) {
     CHECK(self->IsExceptionPending());
-    return NULL;
+    return nullptr;
   }
   // create the array
-  SirtRef<mirror::IntArray> sirt_dimensions(self, dimensions);
-  Array* new_array = RecursiveCreateMultiArray(self, array_class, 0, sirt_dimensions);
-  if (UNLIKELY(new_array == NULL)) {
+  Array* new_array = RecursiveCreateMultiArray(self, array_class, 0, dimensions);
+  if (UNLIKELY(new_array == nullptr)) {
     CHECK(self->IsExceptionPending());
-    return NULL;
   }
   return new_array;
 }
diff --git a/runtime/mirror/array.h b/runtime/mirror/array.h
index 6e366a0..2e123ef 100644
--- a/runtime/mirror/array.h
+++ b/runtime/mirror/array.h
@@ -18,6 +18,7 @@
 #define ART_RUNTIME_MIRROR_ARRAY_H_
 
 #include "object.h"
+#include "object_callbacks.h"
 #include "gc/heap.h"
 #include "thread.h"
 
@@ -47,7 +48,8 @@
                       size_t component_size)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  static Array* CreateMultiArray(Thread* self, Class* element_class, IntArray* dimensions)
+  static Array* CreateMultiArray(Thread* self, const SirtRef<Class>& element_class,
+                                 const SirtRef<IntArray>& dimensions)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   size_t SizeOf() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -181,7 +183,7 @@
     array_class_ = NULL;
   }
 
-  static void VisitRoots(RootVisitor* visitor, void* arg)
+  static void VisitRoots(RootCallback* callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
  private:
diff --git a/runtime/mirror/art_field.cc b/runtime/mirror/art_field.cc
index c3a4efb..29aade9 100644
--- a/runtime/mirror/art_field.cc
+++ b/runtime/mirror/art_field.cc
@@ -52,10 +52,10 @@
   SetField32(OFFSET_OF_OBJECT_MEMBER(ArtField, offset_), num_bytes.Uint32Value(), false);
 }
 
-void ArtField::VisitRoots(RootVisitor* visitor, void* arg) {
+void ArtField::VisitRoots(RootCallback* callback, void* arg) {
   if (java_lang_reflect_ArtField_ != nullptr) {
     java_lang_reflect_ArtField_ = down_cast<mirror::Class*>(
-        visitor(java_lang_reflect_ArtField_, arg));
+        callback(java_lang_reflect_ArtField_, arg, 0, kRootStickyClass));
   }
 }
 
diff --git a/runtime/mirror/art_field.h b/runtime/mirror/art_field.h
index b33fe4b..716b736 100644
--- a/runtime/mirror/art_field.h
+++ b/runtime/mirror/art_field.h
@@ -20,6 +20,7 @@
 #include "class.h"
 #include "modifiers.h"
 #include "object.h"
+#include "object_callbacks.h"
 
 namespace art {
 
@@ -106,7 +107,7 @@
 
   static void SetClass(Class* java_lang_reflect_ArtField);
   static void ResetClass();
-  static void VisitRoots(RootVisitor* visitor, void* arg)
+  static void VisitRoots(RootCallback* callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   bool IsVolatile() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
diff --git a/runtime/mirror/art_method.cc b/runtime/mirror/art_method.cc
index 575ea03..67e6c7d 100644
--- a/runtime/mirror/art_method.cc
+++ b/runtime/mirror/art_method.cc
@@ -35,15 +35,16 @@
 namespace mirror {
 
 extern "C" void art_portable_invoke_stub(ArtMethod*, uint32_t*, uint32_t, Thread*, JValue*, char);
-extern "C" void art_quick_invoke_stub(ArtMethod*, uint32_t*, uint32_t, Thread*, JValue*, char);
+extern "C" void art_quick_invoke_stub(ArtMethod*, uint32_t*, uint32_t, Thread*, JValue*,
+                                      const char*);
 
 // TODO: get global references for these
 Class* ArtMethod::java_lang_reflect_ArtMethod_ = NULL;
 
-void ArtMethod::VisitRoots(RootVisitor* visitor, void* arg) {
+void ArtMethod::VisitRoots(RootCallback* callback, void* arg) {
   if (java_lang_reflect_ArtMethod_ != nullptr) {
     java_lang_reflect_ArtMethod_ = down_cast<mirror::Class*>(
-        visitor(java_lang_reflect_ArtMethod_, arg));
+        callback(java_lang_reflect_ArtMethod_, arg, 0, kRootStickyClass));
   }
 }
 
@@ -245,10 +246,11 @@
 }
 
 void ArtMethod::Invoke(Thread* self, uint32_t* args, uint32_t args_size, JValue* result,
-                       char result_type) {
+                       const char* shorty) {
   if (kIsDebugBuild) {
     self->AssertThreadSuspensionIsAllowable();
     CHECK_EQ(kRunnable, self->GetState());
+    CHECK_STREQ(MethodHelper(this).GetShorty(), shorty);
   }
 
   // Push a transition back into managed code onto the linked list in thread.
@@ -274,9 +276,9 @@
                                                   : GetEntryPointFromPortableCompiledCode());
       }
       if (!IsPortableCompiled()) {
-        (*art_quick_invoke_stub)(this, args, args_size, self, result, result_type);
+        (*art_quick_invoke_stub)(this, args, args_size, self, result, shorty);
       } else {
-        (*art_portable_invoke_stub)(this, args, args_size, self, result, result_type);
+        (*art_portable_invoke_stub)(this, args, args_size, self, result, shorty[0]);
       }
       if (UNLIKELY(reinterpret_cast<intptr_t>(self->GetException(NULL)) == -1)) {
         // Unusual case where we were running LLVM generated code and an
diff --git a/runtime/mirror/art_method.h b/runtime/mirror/art_method.h
index bfa7cbe..e678503 100644
--- a/runtime/mirror/art_method.h
+++ b/runtime/mirror/art_method.h
@@ -23,7 +23,7 @@
 #include "locks.h"
 #include "modifiers.h"
 #include "object.h"
-#include "root_visitor.h"
+#include "object_callbacks.h"
 
 namespace art {
 
@@ -207,8 +207,8 @@
   // Find the method that this method overrides
   ArtMethod* FindOverriddenMethod() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void Invoke(Thread* self, uint32_t* args, uint32_t args_size, JValue* result, char result_type)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void Invoke(Thread* self, uint32_t* args, uint32_t args_size, JValue* result,
+              const char* shorty) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   EntryPointFromInterpreter* GetEntryPointFromInterpreter() {
     return GetFieldPtr<EntryPointFromInterpreter*>(
@@ -404,7 +404,7 @@
 
   static void ResetClass();
 
-  static void VisitRoots(RootVisitor* visitor, void* arg)
+  static void VisitRoots(RootCallback* callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
  protected:
diff --git a/runtime/mirror/class.cc b/runtime/mirror/class.cc
index 8051c9b..99a35e3 100644
--- a/runtime/mirror/class.cc
+++ b/runtime/mirror/class.cc
@@ -50,9 +50,10 @@
   java_lang_Class_ = NULL;
 }
 
-void Class::VisitRoots(RootVisitor* visitor, void* arg) {
+void Class::VisitRoots(RootCallback* callback, void* arg) {
   if (java_lang_Class_ != nullptr) {
-    java_lang_Class_ = down_cast<Class*>(visitor(java_lang_Class_, arg));
+    java_lang_Class_ = down_cast<Class*>(
+        callback(java_lang_Class_, arg, 0, kRootStickyClass));
   }
 }
 
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index cbec476..82c8264 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -795,7 +795,7 @@
   // Can't call this SetClass or else gets called instead of Object::SetClass in places.
   static void SetClassClass(Class* java_lang_Class);
   static void ResetClass();
-  static void VisitRoots(RootVisitor* visitor, void* arg)
+  static void VisitRoots(RootCallback* callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // When class is verified, set the kAccPreverified flag on each method.
diff --git a/runtime/mirror/object_test.cc b/runtime/mirror/object_test.cc
index 2af32da..db9723b 100644
--- a/runtime/mirror/object_test.cc
+++ b/runtime/mirror/object_test.cc
@@ -236,12 +236,12 @@
   SirtRef<Class> c(soa.Self(), class_linker_->FindSystemClass("I"));
   SirtRef<IntArray> dims(soa.Self(), IntArray::Alloc(soa.Self(), 1));
   dims->Set(0, 1);
-  Array* multi = Array::CreateMultiArray(soa.Self(), c.get(), dims.get());
+  Array* multi = Array::CreateMultiArray(soa.Self(), c, dims);
   EXPECT_TRUE(multi->GetClass() == class_linker_->FindSystemClass("[I"));
   EXPECT_EQ(1, multi->GetLength());
 
   dims->Set(0, -1);
-  multi = Array::CreateMultiArray(soa.Self(), c.get(), dims.get());
+  multi = Array::CreateMultiArray(soa.Self(), c, dims);
   EXPECT_TRUE(soa.Self()->IsExceptionPending());
   EXPECT_EQ(PrettyDescriptor(soa.Self()->GetException(NULL)->GetClass()),
             "java.lang.NegativeArraySizeException");
@@ -252,7 +252,7 @@
     for (int j = 0; j < 20; ++j) {
       dims->Set(0, i);
       dims->Set(1, j);
-      multi = Array::CreateMultiArray(soa.Self(), c.get(), dims.get());
+      multi = Array::CreateMultiArray(soa.Self(), c, dims);
       EXPECT_TRUE(multi->GetClass() == class_linker_->FindSystemClass("[[I"));
       EXPECT_EQ(i, multi->GetLength());
       for (int k = 0; k < i; ++k) {
diff --git a/runtime/mirror/stack_trace_element.cc b/runtime/mirror/stack_trace_element.cc
index a7ebe07..2e33198 100644
--- a/runtime/mirror/stack_trace_element.cc
+++ b/runtime/mirror/stack_trace_element.cc
@@ -58,9 +58,10 @@
   return trace;
 }
 
-void StackTraceElement::VisitRoots(RootVisitor* visitor, void* arg) {
+void StackTraceElement::VisitRoots(RootCallback* callback, void* arg) {
   if (java_lang_StackTraceElement_ != nullptr) {
-    java_lang_StackTraceElement_ = down_cast<Class*>(visitor(java_lang_StackTraceElement_, arg));
+    java_lang_StackTraceElement_ = down_cast<Class*>(
+        callback(java_lang_StackTraceElement_, arg, 0, kRootStickyClass));
   }
 }
 
diff --git a/runtime/mirror/stack_trace_element.h b/runtime/mirror/stack_trace_element.h
index 73d2673..51817f6 100644
--- a/runtime/mirror/stack_trace_element.h
+++ b/runtime/mirror/stack_trace_element.h
@@ -57,7 +57,7 @@
 
   static void SetClass(Class* java_lang_StackTraceElement);
   static void ResetClass();
-  static void VisitRoots(RootVisitor* visitor, void* arg)
+  static void VisitRoots(RootCallback* callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
  private:
diff --git a/runtime/mirror/string.cc b/runtime/mirror/string.cc
index 10ae066..6f4ead9 100644
--- a/runtime/mirror/string.cc
+++ b/runtime/mirror/string.cc
@@ -280,9 +280,9 @@
   return countDiff;
 }
 
-void String::VisitRoots(RootVisitor* visitor, void* arg) {
+void String::VisitRoots(RootCallback* callback, void* arg) {
   if (java_lang_String_ != nullptr) {
-    java_lang_String_ = down_cast<Class*>(visitor(java_lang_String_, arg));
+    java_lang_String_ = down_cast<Class*>(callback(java_lang_String_, arg, 0, kRootStickyClass));
   }
 }
 
diff --git a/runtime/mirror/string.h b/runtime/mirror/string.h
index 406c5a3..57ec314 100644
--- a/runtime/mirror/string.h
+++ b/runtime/mirror/string.h
@@ -19,7 +19,7 @@
 
 #include "class.h"
 #include "gtest/gtest.h"
-#include "root_visitor.h"
+#include "object_callbacks.h"
 
 namespace art {
 
@@ -107,7 +107,7 @@
 
   static void SetClass(Class* java_lang_String);
   static void ResetClass();
-  static void VisitRoots(RootVisitor* visitor, void* arg)
+  static void VisitRoots(RootCallback* callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
  private:
diff --git a/runtime/mirror/throwable.cc b/runtime/mirror/throwable.cc
index 2318b74..a57bd43 100644
--- a/runtime/mirror/throwable.cc
+++ b/runtime/mirror/throwable.cc
@@ -93,9 +93,10 @@
   java_lang_Throwable_ = NULL;
 }
 
-void Throwable::VisitRoots(RootVisitor* visitor, void* arg) {
+void Throwable::VisitRoots(RootCallback* callback, void* arg) {
   if (java_lang_Throwable_ != nullptr) {
-    java_lang_Throwable_ = down_cast<Class*>(visitor(java_lang_Throwable_, arg));
+    java_lang_Throwable_ = down_cast<Class*>(callback(java_lang_Throwable_, arg, 0,
+                                                      kRootStickyClass));
   }
 }
 
diff --git a/runtime/mirror/throwable.h b/runtime/mirror/throwable.h
index bc9848a..de71957 100644
--- a/runtime/mirror/throwable.h
+++ b/runtime/mirror/throwable.h
@@ -18,7 +18,7 @@
 #define ART_RUNTIME_MIRROR_THROWABLE_H_
 
 #include "object.h"
-#include "root_visitor.h"
+#include "object_callbacks.h"
 #include "string.h"
 
 namespace art {
@@ -51,7 +51,7 @@
 
   static void SetClass(Class* java_lang_Throwable);
   static void ResetClass();
-  static void VisitRoots(RootVisitor* visitor, void* arg)
+  static void VisitRoots(RootCallback* callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
  private:
diff --git a/runtime/monitor.cc b/runtime/monitor.cc
index 72220e0..85f3a09 100644
--- a/runtime/monitor.cc
+++ b/runtime/monitor.cc
@@ -1067,13 +1067,13 @@
   list_.push_front(m);
 }
 
-void MonitorList::SweepMonitorList(RootVisitor visitor, void* arg) {
+void MonitorList::SweepMonitorList(IsMarkedCallback* callback, void* arg) {
   MutexLock mu(Thread::Current(), monitor_list_lock_);
   for (auto it = list_.begin(); it != list_.end(); ) {
     Monitor* m = *it;
     mirror::Object* obj = m->GetObject();
     // The object of a monitor can be null if we have deflated it.
-    mirror::Object* new_obj = obj != nullptr ? visitor(obj, arg) : nullptr;
+    mirror::Object* new_obj = obj != nullptr ? callback(obj, arg) : nullptr;
     if (new_obj == nullptr) {
       VLOG(monitor) << "freeing monitor " << m << " belonging to unmarked object "
                     << m->GetObject();
diff --git a/runtime/monitor.h b/runtime/monitor.h
index 85a8c48..ca95e0b 100644
--- a/runtime/monitor.h
+++ b/runtime/monitor.h
@@ -26,7 +26,7 @@
 
 #include "atomic.h"
 #include "base/mutex.h"
-#include "root_visitor.h"
+#include "object_callbacks.h"
 #include "sirt_ref.h"
 #include "thread_state.h"
 
@@ -220,7 +220,8 @@
 
   void Add(Monitor* m);
 
-  void SweepMonitorList(RootVisitor visitor, void* arg) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void SweepMonitorList(IsMarkedCallback* callback, void* arg)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void DisallowNewMonitors();
   void AllowNewMonitors();
 
diff --git a/runtime/native/dalvik_system_VMDebug.cc b/runtime/native/dalvik_system_VMDebug.cc
index d9baaaf..6482917 100644
--- a/runtime/native/dalvik_system_VMDebug.cc
+++ b/runtime/native/dalvik_system_VMDebug.cc
@@ -234,9 +234,8 @@
                                            jboolean countAssignable) {
   ScopedObjectAccess soa(env);
   gc::Heap* heap = Runtime::Current()->GetHeap();
-  // We only want reachable instances, so do a GC. This also ensures that the alloc stack
-  // is empty, so the live bitmap is the only place we need to look. Need to do GC before decoding
-  // any jobjects.
+  // We only want reachable instances, so do a GC. Heap::VisitObjects visits all of the heap
+  // objects in the all spaces and the allocation stack.
   heap->CollectGarbage(false);
   mirror::Class* c = soa.Decode<mirror::Class*>(javaClass);
   if (c == nullptr) {
diff --git a/runtime/native/dalvik_system_VMRuntime.cc b/runtime/native/dalvik_system_VMRuntime.cc
index e1b5f97..5267069 100644
--- a/runtime/native/dalvik_system_VMRuntime.cc
+++ b/runtime/native/dalvik_system_VMRuntime.cc
@@ -181,11 +181,12 @@
 
 typedef std::map<std::string, mirror::String*> StringTable;
 
-static mirror::Object* PreloadDexCachesStringsVisitor(mirror::Object* root, void* arg)
+static mirror::Object* PreloadDexCachesStringsCallback(mirror::Object* root, void* arg,
+                                                       uint32_t /*thread_id*/,
+                                                       RootType /*root_type*/)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   StringTable& table = *reinterpret_cast<StringTable*>(arg);
   mirror::String* string = const_cast<mirror::Object*>(root)->AsString();
-  // LOG(INFO) << "VMRuntime.preloadDexCaches interned=" << string->ToModifiedUtf8();
   table[string->ToModifiedUtf8()] = string;
   return root;
 }
@@ -404,7 +405,7 @@
   // We use a std::map to avoid heap allocating StringObjects to lookup in gDvm.literalStrings
   StringTable strings;
   if (kPreloadDexCachesStrings) {
-    runtime->GetInternTable()->VisitRoots(PreloadDexCachesStringsVisitor, &strings, false, false);
+    runtime->GetInternTable()->VisitRoots(PreloadDexCachesStringsCallback, &strings, false, false);
   }
 
   const std::vector<const DexFile*>& boot_class_path = linker->GetBootClassPath();
diff --git a/runtime/native/java_lang_reflect_Array.cc b/runtime/native/java_lang_reflect_Array.cc
index 52cdb59..2197597 100644
--- a/runtime/native/java_lang_reflect_Array.cc
+++ b/runtime/native/java_lang_reflect_Array.cc
@@ -29,14 +29,16 @@
 static jobject Array_createMultiArray(JNIEnv* env, jclass, jclass javaElementClass, jobject javaDimArray) {
   ScopedFastNativeObjectAccess soa(env);
   DCHECK(javaElementClass != NULL);
-  mirror::Class* element_class = soa.Decode<mirror::Class*>(javaElementClass);
+  SirtRef<mirror::Class> element_class(soa.Self(), soa.Decode<mirror::Class*>(javaElementClass));
   DCHECK(element_class->IsClass());
   DCHECK(javaDimArray != NULL);
   mirror::Object* dimensions_obj = soa.Decode<mirror::Object*>(javaDimArray);
   DCHECK(dimensions_obj->IsArrayInstance());
   DCHECK_STREQ(ClassHelper(dimensions_obj->GetClass()).GetDescriptor(), "[I");
-  mirror::IntArray* dimensions_array = down_cast<mirror::IntArray*>(dimensions_obj);
-  mirror::Array* new_array = mirror::Array::CreateMultiArray(soa.Self(), element_class, dimensions_array);
+  SirtRef<mirror::IntArray> dimensions_array(soa.Self(),
+                                             down_cast<mirror::IntArray*>(dimensions_obj));
+  mirror::Array* new_array = mirror::Array::CreateMultiArray(soa.Self(), element_class,
+                                                             dimensions_array);
   return soa.AddLocalReference<jobject>(new_array);
 }
 
diff --git a/runtime/native/sun_misc_Unsafe.cc b/runtime/native/sun_misc_Unsafe.cc
index 6c22003..6727862 100644
--- a/runtime/native/sun_misc_Unsafe.cc
+++ b/runtime/native/sun_misc_Unsafe.cc
@@ -16,6 +16,7 @@
 
 #include "gc/accounting/card_table-inl.h"
 #include "jni_internal.h"
+#include "mirror/array.h"
 #include "mirror/object.h"
 #include "mirror/object-inl.h"
 #include "scoped_fast_native_object_access.h"
@@ -153,6 +154,20 @@
   obj->SetFieldObject(MemberOffset(offset), newValue, false);
 }
 
+static jint Unsafe_getArrayBaseOffsetForComponentType(JNIEnv* env, jclass, jobject component_class) {
+  ScopedFastNativeObjectAccess soa(env);
+  mirror::Class* component = soa.Decode<mirror::Class*>(component_class);
+  Primitive::Type primitive_type = component->GetPrimitiveType();
+  return mirror::Array::DataOffset(Primitive::ComponentSize(primitive_type)).Int32Value();
+}
+
+static jint Unsafe_getArrayIndexScaleForComponentType(JNIEnv* env, jclass, jobject component_class) {
+  ScopedFastNativeObjectAccess soa(env);
+  mirror::Class* component = soa.Decode<mirror::Class*>(component_class);
+  Primitive::Type primitive_type = component->GetPrimitiveType();
+  return Primitive::ComponentSize(primitive_type);
+}
+
 static JNINativeMethod gMethods[] = {
   NATIVE_METHOD(Unsafe, compareAndSwapInt, "!(Ljava/lang/Object;JII)Z"),
   NATIVE_METHOD(Unsafe, compareAndSwapLong, "!(Ljava/lang/Object;JJJ)Z"),
@@ -172,6 +187,8 @@
   NATIVE_METHOD(Unsafe, getObject, "!(Ljava/lang/Object;J)Ljava/lang/Object;"),
   NATIVE_METHOD(Unsafe, putObject, "!(Ljava/lang/Object;JLjava/lang/Object;)V"),
   NATIVE_METHOD(Unsafe, putOrderedObject, "!(Ljava/lang/Object;JLjava/lang/Object;)V"),
+  NATIVE_METHOD(Unsafe, getArrayBaseOffsetForComponentType, "!(Ljava/lang/Class;)I"),
+  NATIVE_METHOD(Unsafe, getArrayIndexScaleForComponentType, "!(Ljava/lang/Class;)I"),
 };
 
 void register_sun_misc_Unsafe(JNIEnv* env) {
diff --git a/runtime/oat_file.cc b/runtime/oat_file.cc
index 0f380ad..00a8506 100644
--- a/runtime/oat_file.cc
+++ b/runtime/oat_file.cc
@@ -226,7 +226,7 @@
   }
 
   for (size_t i = 0; i < GetOatHeader().GetDexFileCount(); i++) {
-    size_t dex_file_location_size = *reinterpret_cast<const uint32_t*>(oat);
+    uint32_t dex_file_location_size = *reinterpret_cast<const uint32_t*>(oat);
     if (UNLIKELY(dex_file_location_size == 0U)) {
       *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zd with empty location name",
                                 GetLocation().c_str(), i);
diff --git a/runtime/object_callbacks.h b/runtime/object_callbacks.h
new file mode 100644
index 0000000..8e3c529
--- /dev/null
+++ b/runtime/object_callbacks.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_OBJECT_CALLBACKS_H_
+#define ART_RUNTIME_OBJECT_CALLBACKS_H_
+
+// For uint32_t.
+#include <stdint.h>
+// For size_t.
+#include <stdlib.h>
+
+namespace art {
+namespace mirror {
+class Object;
+}  // namespace mirror
+class StackVisitor;
+
+enum RootType {
+  kRootUnknown = 0,
+  kRootJNIGlobal,
+  kRootJNILocal,
+  kRootJavaFrame,
+  kRootNativeStack,
+  kRootStickyClass,
+  kRootThreadBlock,
+  kRootMonitorUsed,
+  kRootThreadObject,
+  kRootInternedString,
+  kRootDebugger,
+  kRootVMInternal,
+  kRootJNIMonitor,
+};
+
+// Returns the new address of the object, returns root if it has not moved. tid and root_type are
+// only used by hprof.
+typedef mirror::Object* (RootCallback)(mirror::Object* root, void* arg, uint32_t thread_id,
+    RootType root_type) __attribute__((warn_unused_result));
+// A callback for visiting an object in the heap.
+typedef void (ObjectCallback)(mirror::Object* obj, void* arg);
+// A callback used for marking an object, returns the new address of the object if the object moved.
+typedef mirror::Object* (MarkObjectCallback)(mirror::Object* obj, void* arg)
+    __attribute__((warn_unused_result));
+// A callback for verifying roots.
+typedef void (VerifyRootCallback)(const mirror::Object* root, void* arg, size_t vreg,
+    const StackVisitor* visitor);
+// A callback for testing if an object is marked, returns nullptr if not marked, otherwise the new
+// address the object (if the object didn't move, returns the object input parameter).
+typedef mirror::Object* (IsMarkedCallback)(mirror::Object* object, void* arg)
+    __attribute__((warn_unused_result));
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_OBJECT_CALLBACKS_H_
diff --git a/runtime/reference_table.cc b/runtime/reference_table.cc
index b5ef735..a9b17e0 100644
--- a/runtime/reference_table.cc
+++ b/runtime/reference_table.cc
@@ -231,9 +231,10 @@
   DumpSummaryLine(os, sorted_entries.back(), GetElementCount(sorted_entries.back()), identical, equiv);
 }
 
-void ReferenceTable::VisitRoots(RootVisitor* visitor, void* arg) {
+void ReferenceTable::VisitRoots(RootCallback* visitor, void* arg, uint32_t tid,
+                                RootType root_type) {
   for (auto& ref : entries_) {
-    ref = visitor(ref, arg);
+    ref = visitor(ref, arg, tid, root_type);
   }
 }
 
diff --git a/runtime/reference_table.h b/runtime/reference_table.h
index 37b3172..c9f5bc5 100644
--- a/runtime/reference_table.h
+++ b/runtime/reference_table.h
@@ -22,8 +22,8 @@
 #include <string>
 #include <vector>
 
+#include "object_callbacks.h"
 #include "locks.h"
-#include "root_visitor.h"
 
 namespace art {
 namespace mirror {
@@ -47,7 +47,7 @@
 
   void Dump(std::ostream& os) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void VisitRoots(RootVisitor* visitor, void* arg);
+  void VisitRoots(RootCallback* visitor, void* arg, uint32_t tid, RootType root_type);
 
  private:
   typedef std::vector<mirror::Object*> Table;
diff --git a/runtime/reflection.cc b/runtime/reflection.cc
index ac8f5ef..0bfa70f 100644
--- a/runtime/reflection.cc
+++ b/runtime/reflection.cc
@@ -220,36 +220,46 @@
   }
 
   jmethodID m = NULL;
+  const char* shorty;
   switch (src_class) {
   case Primitive::kPrimBoolean:
     m = WellKnownClasses::java_lang_Boolean_valueOf;
+    shorty = "LZ";
     break;
   case Primitive::kPrimByte:
     m = WellKnownClasses::java_lang_Byte_valueOf;
+    shorty = "LB";
     break;
   case Primitive::kPrimChar:
     m = WellKnownClasses::java_lang_Character_valueOf;
+    shorty = "LC";
     break;
   case Primitive::kPrimDouble:
     m = WellKnownClasses::java_lang_Double_valueOf;
+    shorty = "LD";
     break;
   case Primitive::kPrimFloat:
     m = WellKnownClasses::java_lang_Float_valueOf;
+    shorty = "LF";
     break;
   case Primitive::kPrimInt:
     m = WellKnownClasses::java_lang_Integer_valueOf;
+    shorty = "LI";
     break;
   case Primitive::kPrimLong:
     m = WellKnownClasses::java_lang_Long_valueOf;
+    shorty = "LJ";
     break;
   case Primitive::kPrimShort:
     m = WellKnownClasses::java_lang_Short_valueOf;
+    shorty = "LS";
     break;
   case Primitive::kPrimVoid:
     // There's no such thing as a void field, and void methods invoked via reflection return null.
-    return NULL;
+    return nullptr;
   default:
     LOG(FATAL) << static_cast<int>(src_class);
+    shorty = nullptr;
   }
 
   ScopedObjectAccessUnchecked soa(Thread::Current());
@@ -257,7 +267,7 @@
     CHECK_EQ(soa.Self()->GetState(), kRunnable);
   }
 
-  ArgArray arg_array(NULL, 0);
+  ArgArray arg_array(nullptr, 0);
   JValue result;
   if (src_class == Primitive::kPrimDouble || src_class == Primitive::kPrimLong) {
     arg_array.AppendWide(value.GetJ());
@@ -266,7 +276,7 @@
   }
 
   soa.DecodeMethod(m)->Invoke(soa.Self(), arg_array.GetArray(), arg_array.GetNumBytes(),
-                              &result, 'L');
+                              &result, shorty);
   return result.GetL();
 }
 
diff --git a/runtime/root_visitor.h b/runtime/root_visitor.h
deleted file mode 100644
index 78c30ff..0000000
--- a/runtime/root_visitor.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_RUNTIME_ROOT_VISITOR_H_
-#define ART_RUNTIME_ROOT_VISITOR_H_
-
-// For size_t.
-#include <stdlib.h>
-
-namespace art {
-namespace mirror {
-class Object;
-}  // namespace mirror
-class StackVisitor;
-
-// Returns the new address of the object, returns root if it has not moved.
-typedef mirror::Object* (RootVisitor)(mirror::Object* root, void* arg)
-    __attribute__((warn_unused_result));
-typedef void (VerifyRootVisitor)(const mirror::Object* root, void* arg, size_t vreg,
-                                 const StackVisitor* visitor);
-typedef bool (IsMarkedTester)(const mirror::Object* object, void* arg);
-typedef void (ObjectVisitorCallback)(mirror::Object* obj, void* arg);
-
-}  // namespace art
-
-#endif  // ART_RUNTIME_ROOT_VISITOR_H_
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 09d05d1..6ca45e8 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -375,7 +375,7 @@
   return value;
 }
 
-void Runtime::SweepSystemWeaks(RootVisitor* visitor, void* arg) {
+void Runtime::SweepSystemWeaks(IsMarkedCallback* visitor, void* arg) {
   GetInternTable()->SweepInternTableWeaks(visitor, arg);
   GetMonitorList()->SweepMonitorList(visitor, arg);
   GetJavaVM()->SweepJniWeakGlobals(visitor, arg);
@@ -807,7 +807,7 @@
 
   JValue result;
   ArgArray arg_array(nullptr, 0);
-  InvokeWithArgArray(soa, getSystemClassLoader, &arg_array, &result, 'L');
+  InvokeWithArgArray(soa, getSystemClassLoader, &arg_array, &result, "L");
   SirtRef<mirror::ClassLoader> class_loader(soa.Self(),
                                             down_cast<mirror::ClassLoader*>(result.GetL()));
   CHECK(class_loader.get() != nullptr);
@@ -1301,66 +1301,69 @@
   return pre_allocated_OutOfMemoryError_;
 }
 
-void Runtime::VisitConcurrentRoots(RootVisitor* visitor, void* arg, bool only_dirty,
+void Runtime::VisitConcurrentRoots(RootCallback* callback, void* arg, bool only_dirty,
                                    bool clean_dirty) {
-  intern_table_->VisitRoots(visitor, arg, only_dirty, clean_dirty);
-  class_linker_->VisitRoots(visitor, arg, only_dirty, clean_dirty);
+  intern_table_->VisitRoots(callback, arg, only_dirty, clean_dirty);
+  class_linker_->VisitRoots(callback, arg, only_dirty, clean_dirty);
 }
 
-void Runtime::VisitNonThreadRoots(RootVisitor* visitor, void* arg) {
+void Runtime::VisitNonThreadRoots(RootCallback* callback, void* arg) {
   // Visit the classes held as static in mirror classes.
-  mirror::ArtField::VisitRoots(visitor, arg);
-  mirror::ArtMethod::VisitRoots(visitor, arg);
-  mirror::Class::VisitRoots(visitor, arg);
-  mirror::StackTraceElement::VisitRoots(visitor, arg);
-  mirror::String::VisitRoots(visitor, arg);
-  mirror::Throwable::VisitRoots(visitor, arg);
+  mirror::ArtField::VisitRoots(callback, arg);
+  mirror::ArtMethod::VisitRoots(callback, arg);
+  mirror::Class::VisitRoots(callback, arg);
+  mirror::StackTraceElement::VisitRoots(callback, arg);
+  mirror::String::VisitRoots(callback, arg);
+  mirror::Throwable::VisitRoots(callback, arg);
   // Visit all the primitive array types classes.
-  mirror::PrimitiveArray<uint8_t>::VisitRoots(visitor, arg);   // BooleanArray
-  mirror::PrimitiveArray<int8_t>::VisitRoots(visitor, arg);    // ByteArray
-  mirror::PrimitiveArray<uint16_t>::VisitRoots(visitor, arg);  // CharArray
-  mirror::PrimitiveArray<double>::VisitRoots(visitor, arg);    // DoubleArray
-  mirror::PrimitiveArray<float>::VisitRoots(visitor, arg);     // FloatArray
-  mirror::PrimitiveArray<int32_t>::VisitRoots(visitor, arg);   // IntArray
-  mirror::PrimitiveArray<int64_t>::VisitRoots(visitor, arg);   // LongArray
-  mirror::PrimitiveArray<int16_t>::VisitRoots(visitor, arg);   // ShortArray
-  java_vm_->VisitRoots(visitor, arg);
+  mirror::PrimitiveArray<uint8_t>::VisitRoots(callback, arg);   // BooleanArray
+  mirror::PrimitiveArray<int8_t>::VisitRoots(callback, arg);    // ByteArray
+  mirror::PrimitiveArray<uint16_t>::VisitRoots(callback, arg);  // CharArray
+  mirror::PrimitiveArray<double>::VisitRoots(callback, arg);    // DoubleArray
+  mirror::PrimitiveArray<float>::VisitRoots(callback, arg);     // FloatArray
+  mirror::PrimitiveArray<int32_t>::VisitRoots(callback, arg);   // IntArray
+  mirror::PrimitiveArray<int64_t>::VisitRoots(callback, arg);   // LongArray
+  mirror::PrimitiveArray<int16_t>::VisitRoots(callback, arg);   // ShortArray
+  java_vm_->VisitRoots(callback, arg);
   if (pre_allocated_OutOfMemoryError_ != nullptr) {
     pre_allocated_OutOfMemoryError_ = down_cast<mirror::Throwable*>(
-        visitor(pre_allocated_OutOfMemoryError_, arg));
+        callback(pre_allocated_OutOfMemoryError_, arg, 0, kRootVMInternal));
     DCHECK(pre_allocated_OutOfMemoryError_ != nullptr);
   }
-  resolution_method_ = down_cast<mirror::ArtMethod*>(visitor(resolution_method_, arg));
+  resolution_method_ = down_cast<mirror::ArtMethod*>(callback(resolution_method_, arg, 0,
+                                                              kRootVMInternal));
   DCHECK(resolution_method_ != nullptr);
   if (HasImtConflictMethod()) {
-    imt_conflict_method_ = down_cast<mirror::ArtMethod*>(visitor(imt_conflict_method_, arg));
+    imt_conflict_method_ = down_cast<mirror::ArtMethod*>(callback(imt_conflict_method_, arg, 0,
+                                                                  kRootVMInternal));
   }
   if (HasDefaultImt()) {
-    default_imt_ = down_cast<mirror::ObjectArray<mirror::ArtMethod>*>(visitor(default_imt_, arg));
+    default_imt_ = down_cast<mirror::ObjectArray<mirror::ArtMethod>*>(callback(default_imt_, arg,
+                                                                               0, kRootVMInternal));
   }
 
   for (int i = 0; i < Runtime::kLastCalleeSaveType; i++) {
     if (callee_save_methods_[i] != nullptr) {
       callee_save_methods_[i] = down_cast<mirror::ArtMethod*>(
-          visitor(callee_save_methods_[i], arg));
+          callback(callee_save_methods_[i], arg, 0, kRootVMInternal));
     }
   }
   {
     MutexLock mu(Thread::Current(), method_verifiers_lock_);
     for (verifier::MethodVerifier* verifier : method_verifiers_) {
-      verifier->VisitRoots(visitor, arg);
+      verifier->VisitRoots(callback, arg);
     }
   }
 }
 
-void Runtime::VisitNonConcurrentRoots(RootVisitor* visitor, void* arg) {
-  thread_list_->VisitRoots(visitor, arg);
-  VisitNonThreadRoots(visitor, arg);
+void Runtime::VisitNonConcurrentRoots(RootCallback* callback, void* arg) {
+  thread_list_->VisitRoots(callback, arg);
+  VisitNonThreadRoots(callback, arg);
 }
 
-void Runtime::VisitRoots(RootVisitor* visitor, void* arg, bool only_dirty, bool clean_dirty) {
-  VisitConcurrentRoots(visitor, arg, only_dirty, clean_dirty);
-  VisitNonConcurrentRoots(visitor, arg);
+void Runtime::VisitRoots(RootCallback* callback, void* arg, bool only_dirty, bool clean_dirty) {
+  VisitConcurrentRoots(callback, arg, only_dirty, clean_dirty);
+  VisitNonConcurrentRoots(callback, arg);
 }
 
 mirror::ObjectArray<mirror::ArtMethod>* Runtime::CreateDefaultImt(ClassLinker* cl) {
@@ -1473,12 +1476,11 @@
     method->SetFpSpillMask(0);
   } else if (instruction_set == kX86_64) {
     uint32_t ref_spills =
-        (1 << art::x86_64::RBP) | (1 << art::x86_64::RSI) | (1 << art::x86_64::RDI) |
-        (1 << art::x86_64::R8)  | (1 << art::x86_64::R9)  | (1 << art::x86_64::R10) |
-        (1 << art::x86_64::R11) | (1 << art::x86_64::R12) | (1 << art::x86_64::R13) |
-        (1 << art::x86_64::R14) | (1 << art::x86_64::R15);
+        (1 << art::x86_64::RBX) | (1 << art::x86_64::RBP) | (1 << art::x86_64::R12) |
+        (1 << art::x86_64::R13) | (1 << art::x86_64::R14) | (1 << art::x86_64::R15);
     uint32_t arg_spills =
-        (1 << art::x86_64::RCX) | (1 << art::x86_64::RDX) | (1 << art::x86_64::RBX);
+        (1 << art::x86_64::RSI) | (1 << art::x86_64::RDX) | (1 << art::x86_64::RCX) |
+        (1 << art::x86_64::R8) | (1 << art::x86_64::R9);
     uint32_t core_spills = ref_spills | (type == kRefsAndArgs ? arg_spills : 0) |
                          (1 << art::x86::kNumberOfCpuRegisters);  // fake return address callee save
     size_t frame_size = RoundUp((__builtin_popcount(core_spills) /* gprs */ +
diff --git a/runtime/runtime.h b/runtime/runtime.h
index 896a18b..07f3d7d 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -34,7 +34,7 @@
 #include "instrumentation.h"
 #include "jobject_comparator.h"
 #include "locks.h"
-#include "root_visitor.h"
+#include "object_callbacks.h"
 #include "runtime_stats.h"
 #include "safe_map.h"
 
@@ -341,24 +341,24 @@
 
   // Visit all the roots. If only_dirty is true then non-dirty roots won't be visited. If
   // clean_dirty is true then dirty roots will be marked as non-dirty after visiting.
-  void VisitRoots(RootVisitor* visitor, void* arg, bool only_dirty, bool clean_dirty)
+  void VisitRoots(RootCallback* visitor, void* arg, bool only_dirty, bool clean_dirty)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Visit all of the roots we can do safely do concurrently.
-  void VisitConcurrentRoots(RootVisitor* visitor, void* arg, bool only_dirty, bool clean_dirty)
+  void VisitConcurrentRoots(RootCallback* visitor, void* arg, bool only_dirty, bool clean_dirty)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Visit all of the non thread roots, we can do this with mutators unpaused.
-  void VisitNonThreadRoots(RootVisitor* visitor, void* arg)
+  void VisitNonThreadRoots(RootCallback* visitor, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Visit all other roots which must be done with mutators suspended.
-  void VisitNonConcurrentRoots(RootVisitor* visitor, void* arg)
+  void VisitNonConcurrentRoots(RootCallback* visitor, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Sweep system weaks, the system weak is deleted if the visitor return nullptr. Otherwise, the
   // system weak is updated to be the visitor's returned value.
-  void SweepSystemWeaks(RootVisitor* visitor, void* arg)
+  void SweepSystemWeaks(IsMarkedCallback* visitor, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Returns a special method that calls into a trampoline for runtime method resolution
diff --git a/runtime/runtime_linux.cc b/runtime/runtime_linux.cc
index d8f408a..73ac034 100644
--- a/runtime/runtime_linux.cc
+++ b/runtime/runtime_linux.cc
@@ -19,6 +19,7 @@
 #include <signal.h>
 #include <string.h>
 #include <sys/utsname.h>
+#include <inttypes.h>
 
 #include "base/logging.h"
 #include "base/mutex.h"
@@ -185,6 +186,41 @@
     os << '\n';
     DumpRegister32(os, "gs",  context.gregs[REG_GS]);
     DumpRegister32(os, "ss",  context.gregs[REG_SS]);
+#elif defined(__linux__) && defined(__x86_64__)
+    DumpRegister64(os, "rax", context.gregs[REG_RAX]);
+    DumpRegister64(os, "rbx", context.gregs[REG_RBX]);
+    DumpRegister64(os, "rcx", context.gregs[REG_RCX]);
+    DumpRegister64(os, "rdx", context.gregs[REG_RDX]);
+    os << '\n';
+
+    DumpRegister64(os, "rdi", context.gregs[REG_RDI]);
+    DumpRegister64(os, "rsi", context.gregs[REG_RSI]);
+    DumpRegister64(os, "rbp", context.gregs[REG_RBP]);
+    DumpRegister64(os, "rsp", context.gregs[REG_RSP]);
+    os << '\n';
+
+    DumpRegister64(os, "r8 ", context.gregs[REG_R8]);
+    DumpRegister64(os, "r9 ", context.gregs[REG_R9]);
+    DumpRegister64(os, "r10", context.gregs[REG_R10]);
+    DumpRegister64(os, "r11", context.gregs[REG_R11]);
+    os << '\n';
+
+    DumpRegister64(os, "r12", context.gregs[REG_R12]);
+    DumpRegister64(os, "r13", context.gregs[REG_R13]);
+    DumpRegister64(os, "r14", context.gregs[REG_R14]);
+    DumpRegister64(os, "r15", context.gregs[REG_R15]);
+    os << '\n';
+
+    DumpRegister64(os, "rip", context.gregs[REG_RIP]);
+    os << "   ";
+    DumpRegister32(os, "eflags", context.gregs[REG_EFL]);
+    DumpX86Flags(os, context.gregs[REG_EFL]);
+    os << '\n';
+
+    DumpRegister32(os, "cs",  (context.gregs[REG_CSGSFS]) & 0x0FFFF);
+    DumpRegister32(os, "gs",  (context.gregs[REG_CSGSFS] >> 16) & 0x0FFFF);
+    DumpRegister32(os, "fs",  (context.gregs[REG_CSGSFS] >> 32) & 0x0FFFF);
+    os << '\n';
 #else
     os << "Unknown architecture/word size/OS in ucontext dump";
 #endif
@@ -194,6 +230,10 @@
     os << StringPrintf(" %6s: 0x%08x", name, value);
   }
 
+  void DumpRegister64(std::ostream& os, const char* name, uint64_t value) {
+    os << StringPrintf(" %6s: 0x%016" PRIx64, name, value);
+  }
+
   void DumpX86Flags(std::ostream& os, uint32_t flags) {
     os << " [";
     if ((flags & (1 << 0)) != 0) {
diff --git a/runtime/thread.cc b/runtime/thread.cc
index c649765..6c3e7ee 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -179,7 +179,7 @@
     JValue result;
     ArgArray arg_array(nullptr, 0);
     arg_array.Append(receiver);
-    m->Invoke(self, arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'V');
+    m->Invoke(self, arg_array.GetArray(), arg_array.GetNumBytes(), &result, "V");
   }
   // Detach and delete self.
   Runtime::Current()->GetThreadList()->Unregister(self);
@@ -992,7 +992,8 @@
   }
 }
 
-static mirror::Object* MonitorExitVisitor(mirror::Object* object, void* arg)
+static mirror::Object* MonitorExitVisitor(mirror::Object* object, void* arg, uint32_t /*thread_id*/,
+                                          RootType /*root_type*/)
     NO_THREAD_SAFETY_ANALYSIS {
   Thread* self = reinterpret_cast<Thread*>(arg);
   mirror::Object* entered_monitor = object;
@@ -1034,7 +1035,7 @@
 
   // On thread detach, all monitors entered with JNI MonitorEnter are automatically exited.
   if (jni_env_ != nullptr) {
-    jni_env_->monitors.VisitRoots(MonitorExitVisitor, self);
+    jni_env_->monitors.VisitRoots(MonitorExitVisitor, self, 0, kRootVMInternal);
   }
 }
 
@@ -1144,16 +1145,17 @@
   return managed_stack_.ShadowFramesContain(sirt_entry);
 }
 
-void Thread::SirtVisitRoots(RootVisitor* visitor, void* arg) {
+void Thread::SirtVisitRoots(RootCallback* visitor, void* arg) {
+  uint32_t tid = GetTid();
   for (StackIndirectReferenceTable* cur = top_sirt_; cur; cur = cur->GetLink()) {
     size_t num_refs = cur->NumberOfReferences();
     for (size_t j = 0; j < num_refs; ++j) {
       mirror::Object* object = cur->GetReference(j);
       if (object != nullptr) {
-        const mirror::Object* new_obj = visitor(object, arg);
+        mirror::Object* new_obj = visitor(object, arg, tid, kRootNativeStack);
         DCHECK(new_obj != nullptr);
         if (new_obj != object) {
-          cur->SetReference(j, const_cast<mirror::Object*>(new_obj));
+          cur->SetReference(j, new_obj);
         }
       }
     }
@@ -1530,6 +1532,7 @@
 
   // Choose an appropriate constructor and set up the arguments.
   const char* signature;
+  const char* shorty;
   SirtRef<mirror::String> msg_string(this, nullptr);
   if (msg != nullptr) {
     // Ensure we remember this and the method over the String allocation.
@@ -1539,14 +1542,18 @@
       return;
     }
     if (cause.get() == nullptr) {
+      shorty = "VL";
       signature = "(Ljava/lang/String;)V";
     } else {
+      shorty = "VLL";
       signature = "(Ljava/lang/String;Ljava/lang/Throwable;)V";
     }
   } else {
     if (cause.get() == nullptr) {
+      shorty = "V";
       signature = "()V";
     } else {
+      shorty = "VL";
       signature = "(Ljava/lang/Throwable;)V";
     }
   }
@@ -1570,7 +1577,7 @@
                                          throw_location.GetDexPc());
     SetException(gc_safe_throw_location, exception.get());
   } else {
-    ArgArray args("VLL", 3);
+    ArgArray args(shorty, strlen(shorty));
     args.Append(exception.get());
     if (msg != nullptr) {
       args.Append(msg_string.get());
@@ -1579,7 +1586,7 @@
       args.Append(cause.get());
     }
     JValue result;
-    exception_init_method->Invoke(this, args.GetArray(), args.GetNumBytes(), &result, 'V');
+    exception_init_method->Invoke(this, args.GetArray(), args.GetNumBytes(), &result, shorty);
     if (LIKELY(!IsExceptionPending())) {
       ThrowLocation gc_safe_throw_location(saved_throw_this.get(), saved_throw_method.get(),
                                            throw_location.GetDexPc());
@@ -1954,31 +1961,17 @@
 
 class RootCallbackVisitor {
  public:
-  RootCallbackVisitor(RootVisitor* visitor, void* arg) : visitor_(visitor), arg_(arg) {}
+  RootCallbackVisitor(RootCallback* callback, void* arg, uint32_t tid)
+     : callback_(callback), arg_(arg), tid_(tid) {}
 
   mirror::Object* operator()(mirror::Object* obj, size_t, const StackVisitor*) const {
-    return visitor_(obj, arg_);
+    return callback_(obj, arg_, tid_, kRootJavaFrame);
   }
 
  private:
-  RootVisitor* visitor_;
-  void* arg_;
-};
-
-class VerifyCallbackVisitor {
- public:
-  VerifyCallbackVisitor(VerifyRootVisitor* visitor, void* arg)
-      : visitor_(visitor),
-        arg_(arg) {
-  }
-
-  void operator()(const mirror::Object* obj, size_t vreg, const StackVisitor* visitor) const {
-    visitor_(obj, arg_, vreg, visitor);
-  }
-
- private:
-  VerifyRootVisitor* const visitor_;
+  RootCallback* const callback_;
   void* const arg_;
+  const uint32_t tid_;
 };
 
 void Thread::SetClassLoaderOverride(mirror::ClassLoader* class_loader_override) {
@@ -1988,39 +1981,42 @@
   class_loader_override_ = class_loader_override;
 }
 
-void Thread::VisitRoots(RootVisitor* visitor, void* arg) {
+void Thread::VisitRoots(RootCallback* visitor, void* arg) {
+  uint32_t thread_id = GetThreadId();
   if (opeer_ != nullptr) {
-    opeer_ = visitor(opeer_, arg);
+    opeer_ = visitor(opeer_, arg, thread_id, kRootThreadObject);
   }
   if (exception_ != nullptr) {
-    exception_ = down_cast<mirror::Throwable*>(visitor(exception_, arg));
+    exception_ = down_cast<mirror::Throwable*>(visitor(exception_, arg, thread_id,
+                                                       kRootNativeStack));
   }
   throw_location_.VisitRoots(visitor, arg);
   if (class_loader_override_ != nullptr) {
-    class_loader_override_ = down_cast<mirror::ClassLoader*>(visitor(class_loader_override_, arg));
+    class_loader_override_ =
+        down_cast<mirror::ClassLoader*>(visitor(class_loader_override_, arg, thread_id,
+                                                kRootNativeStack));
   }
-  jni_env_->locals.VisitRoots(visitor, arg);
-  jni_env_->monitors.VisitRoots(visitor, arg);
-
+  jni_env_->locals.VisitRoots(visitor, arg, thread_id, kRootJNILocal);
+  jni_env_->monitors.VisitRoots(visitor, arg, thread_id, kRootJNIMonitor);
   SirtVisitRoots(visitor, arg);
-
   // Visit roots on this thread's stack
   Context* context = GetLongJumpContext();
-  RootCallbackVisitor visitorToCallback(visitor, arg);
+  RootCallbackVisitor visitorToCallback(visitor, arg, thread_id);
   ReferenceMapVisitor<RootCallbackVisitor> mapper(this, context, visitorToCallback);
   mapper.WalkStack();
   ReleaseLongJumpContext(context);
-
   for (instrumentation::InstrumentationStackFrame& frame : *GetInstrumentationStack()) {
     if (frame.this_object_ != nullptr) {
-      frame.this_object_ = visitor(frame.this_object_, arg);
+      frame.this_object_ = visitor(frame.this_object_, arg, thread_id, kRootJavaFrame);
     }
     DCHECK(frame.method_ != nullptr);
-    frame.method_ = down_cast<mirror::ArtMethod*>(visitor(frame.method_, arg));
+    frame.method_ = down_cast<mirror::ArtMethod*>(visitor(frame.method_, arg, thread_id,
+                                                          kRootJavaFrame));
   }
 }
 
-static mirror::Object* VerifyRoot(mirror::Object* root, void* arg) {
+static mirror::Object* VerifyRoot(mirror::Object* root, void* arg, uint32_t /*thread_id*/,
+                                  RootType /*root_type*/) {
   DCHECK(root != nullptr);
   DCHECK(arg != nullptr);
   reinterpret_cast<gc::Heap*>(arg)->VerifyObject(root);
@@ -2029,7 +2025,7 @@
 
 void Thread::VerifyStackImpl() {
   UniquePtr<Context> context(Context::Create());
-  RootCallbackVisitor visitorToCallback(VerifyRoot, Runtime::Current()->GetHeap());
+  RootCallbackVisitor visitorToCallback(VerifyRoot, Runtime::Current()->GetHeap(), GetTid());
   ReferenceMapVisitor<RootCallbackVisitor> mapper(this, context.get(), visitorToCallback);
   mapper.WalkStack();
 }
diff --git a/runtime/thread.h b/runtime/thread.h
index b7f8bb0..daffc92 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -31,8 +31,8 @@
 #include "globals.h"
 #include "jvalue.h"
 #include "locks.h"
+#include "object_callbacks.h"
 #include "offsets.h"
-#include "root_visitor.h"
 #include "runtime_stats.h"
 #include "stack.h"
 #include "stack_indirect_reference_table.h"
@@ -389,7 +389,7 @@
   static jobjectArray InternalStackTraceToStackTraceElementArray(JNIEnv* env, jobject internal,
       jobjectArray output_array = NULL, int* stack_depth = NULL);
 
-  void VisitRoots(RootVisitor* visitor, void* arg) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void VisitRoots(RootCallback* visitor, void* arg) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void VerifyStack() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -503,7 +503,8 @@
   // Is the given obj in this thread's stack indirect reference table?
   bool SirtContains(jobject obj) const;
 
-  void SirtVisitRoots(RootVisitor* visitor, void* arg) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void SirtVisitRoots(RootCallback* visitor, void* arg)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void PushSirt(StackIndirectReferenceTable* sirt) {
     sirt->SetLink(top_sirt_);
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index 8bf099b..25f692d 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -756,28 +756,30 @@
   }
 }
 
-void ThreadList::VisitRoots(RootVisitor* visitor, void* arg) const {
+void ThreadList::VisitRoots(RootCallback* callback, void* arg) const {
   MutexLock mu(Thread::Current(), *Locks::thread_list_lock_);
   for (const auto& thread : list_) {
-    thread->VisitRoots(visitor, arg);
+    thread->VisitRoots(callback, arg);
   }
 }
 
-struct VerifyRootWrapperArg {
-  VerifyRootVisitor* visitor;
-  void* arg;
+class VerifyRootWrapperArg {
+ public:
+  VerifyRootWrapperArg(VerifyRootCallback* callback, void* arg) : callback_(callback), arg_(arg) {
+  }
+  VerifyRootCallback* const callback_;
+  void* const arg_;
 };
 
-static mirror::Object* VerifyRootWrapperCallback(mirror::Object* root, void* arg) {
+static mirror::Object* VerifyRootWrapperCallback(mirror::Object* root, void* arg,
+                                                 uint32_t /*thread_id*/, RootType /*root_type*/) {
   VerifyRootWrapperArg* wrapperArg = reinterpret_cast<VerifyRootWrapperArg*>(arg);
-  wrapperArg->visitor(root, wrapperArg->arg, 0, NULL);
+  wrapperArg->callback_(root, wrapperArg->arg_, 0, NULL);
   return root;
 }
 
-void ThreadList::VerifyRoots(VerifyRootVisitor* visitor, void* arg) const {
-  VerifyRootWrapperArg wrapper;
-  wrapper.visitor = visitor;
-  wrapper.arg = arg;
+void ThreadList::VerifyRoots(VerifyRootCallback* callback, void* arg) const {
+  VerifyRootWrapperArg wrapper(callback, arg);
   MutexLock mu(Thread::Current(), *Locks::thread_list_lock_);
   for (const auto& thread : list_) {
     thread->VisitRoots(VerifyRootWrapperCallback, &wrapper);
diff --git a/runtime/thread_list.h b/runtime/thread_list.h
index 45994ae..e98aed9 100644
--- a/runtime/thread_list.h
+++ b/runtime/thread_list.h
@@ -19,7 +19,7 @@
 
 #include "base/mutex.h"
 #include "jni.h"
-#include "root_visitor.h"
+#include "object_callbacks.h"
 
 #include <bitset>
 #include <list>
@@ -113,10 +113,10 @@
       LOCKS_EXCLUDED(Locks::mutator_lock_, Locks::thread_list_lock_);
   void Unregister(Thread* self) LOCKS_EXCLUDED(Locks::mutator_lock_, Locks::thread_list_lock_);
 
-  void VisitRoots(RootVisitor* visitor, void* arg) const
+  void VisitRoots(RootCallback* callback, void* arg) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void VerifyRoots(VerifyRootVisitor* visitor, void* arg) const
+  void VerifyRoots(VerifyRootCallback* callback, void* arg) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Return a copy of the thread list.
diff --git a/runtime/throw_location.cc b/runtime/throw_location.cc
index 1cc3e74..2a1faff 100644
--- a/runtime/throw_location.cc
+++ b/runtime/throw_location.cc
@@ -33,13 +33,13 @@
   }
 }
 
-void ThrowLocation::VisitRoots(RootVisitor* visitor, void* arg) {
+void ThrowLocation::VisitRoots(RootCallback* visitor, void* arg) {
   if (this_object_ != nullptr) {
-    this_object_ = visitor(this_object_, arg);
+    this_object_ = visitor(this_object_, arg, 0, kRootVMInternal);
     DCHECK(this_object_ != nullptr);
   }
   if (method_ != nullptr) {
-    method_ = down_cast<mirror::ArtMethod*>(visitor(method_, arg));
+    method_ = down_cast<mirror::ArtMethod*>(visitor(method_, arg, 0, kRootVMInternal));
     DCHECK(method_ != nullptr);
   }
 }
diff --git a/runtime/throw_location.h b/runtime/throw_location.h
index 5da446e..f30aa4e 100644
--- a/runtime/throw_location.h
+++ b/runtime/throw_location.h
@@ -17,8 +17,8 @@
 #ifndef ART_RUNTIME_THROW_LOCATION_H_
 #define ART_RUNTIME_THROW_LOCATION_H_
 
+#include "object_callbacks.h"
 #include "base/macros.h"
-#include "root_visitor.h"
 
 #include <stdint.h>
 #include <string>
@@ -62,7 +62,7 @@
 
   std::string Dump() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void VisitRoots(RootVisitor* visitor, void* arg);
+  void VisitRoots(RootCallback* visitor, void* arg);
 
  private:
   // The 'this' reference of the throwing method.
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index 30be36c..ab943a6 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -3983,8 +3983,8 @@
   verifier::RegTypeCache::ShutDown();
 }
 
-void MethodVerifier::VisitRoots(RootVisitor* visitor, void* arg) {
-  reg_types_.VisitRoots(visitor, arg);
+void MethodVerifier::VisitRoots(RootCallback* callback, void* arg) {
+  reg_types_.VisitRoots(callback, arg);
 }
 
 }  // namespace verifier
diff --git a/runtime/verifier/method_verifier.h b/runtime/verifier/method_verifier.h
index 7c75c9c..031cfec 100644
--- a/runtime/verifier/method_verifier.h
+++ b/runtime/verifier/method_verifier.h
@@ -220,7 +220,7 @@
   // Describe VRegs at the given dex pc.
   std::vector<int32_t> DescribeVRegs(uint32_t dex_pc);
 
-  void VisitRoots(RootVisitor* visitor, void* arg) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void VisitRoots(RootCallback* callback, void* arg) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Accessors used by the compiler via CompilerCallback
   const DexFile::CodeItem* CodeItem() const;
diff --git a/runtime/verifier/reg_type.cc b/runtime/verifier/reg_type.cc
index f394bce..e56e670 100644
--- a/runtime/verifier/reg_type.cc
+++ b/runtime/verifier/reg_type.cc
@@ -969,9 +969,9 @@
   }
 }
 
-void RegType::VisitRoots(RootVisitor* visitor, void* arg) {
+void RegType::VisitRoots(RootCallback* callback, void* arg) {
   if (klass_ != nullptr) {
-    klass_ = down_cast<mirror::Class*>(visitor(klass_, arg));
+    klass_ = down_cast<mirror::Class*>(callback(klass_, arg, 0, kRootUnknown));
   }
 }
 
diff --git a/runtime/verifier/reg_type.h b/runtime/verifier/reg_type.h
index 3818375..a23b8c4 100644
--- a/runtime/verifier/reg_type.h
+++ b/runtime/verifier/reg_type.h
@@ -19,8 +19,8 @@
 
 #include "base/macros.h"
 #include "globals.h"
+#include "object_callbacks.h"
 #include "primitive.h"
-#include "root_visitor.h"
 
 #include "jni.h"
 
@@ -270,7 +270,7 @@
 
   virtual ~RegType() {}
 
-  void VisitRoots(RootVisitor* visitor, void* arg) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void VisitRoots(RootCallback* callback, void* arg) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
  protected:
   RegType(mirror::Class* klass, const std::string& descriptor, uint16_t cache_id)
diff --git a/runtime/verifier/reg_type_cache.cc b/runtime/verifier/reg_type_cache.cc
index c8a03d6..5e894ed 100644
--- a/runtime/verifier/reg_type_cache.cc
+++ b/runtime/verifier/reg_type_cache.cc
@@ -573,9 +573,9 @@
   }
 }
 
-void RegTypeCache::VisitRoots(RootVisitor* visitor, void* arg) {
+void RegTypeCache::VisitRoots(RootCallback* callback, void* arg) {
   for (RegType* entry : entries_) {
-    entry->VisitRoots(visitor, arg);
+    entry->VisitRoots(callback, arg);
   }
 }
 
diff --git a/runtime/verifier/reg_type_cache.h b/runtime/verifier/reg_type_cache.h
index 41bc8c9..4cc7e61 100644
--- a/runtime/verifier/reg_type_cache.h
+++ b/runtime/verifier/reg_type_cache.h
@@ -20,8 +20,8 @@
 #include "base/casts.h"
 #include "base/macros.h"
 #include "base/stl_util.h"
+#include "object_callbacks.h"
 #include "reg_type.h"
-#include "root_visitor.h"
 #include "runtime.h"
 
 #include <stdint.h>
@@ -146,7 +146,7 @@
   void Dump(std::ostream& os) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   const RegType& RegTypeFromPrimitiveType(Primitive::Type) const;
 
-  void VisitRoots(RootVisitor* visitor, void* arg) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void VisitRoots(RootCallback* callback, void* arg) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
  private:
   void FillPrimitiveAndSmallConstantTypes() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
diff --git a/test/051-thread/expected.txt b/test/051-thread/expected.txt
index 8e6b153..7139b7f 100644
--- a/test/051-thread/expected.txt
+++ b/test/051-thread/expected.txt
@@ -1,8 +1,9 @@
-Initializing System.out...
-Thread count: 512
-Starting thread 'Thready'
-@ Thread running
-@ Got expected setDaemon exception
-@ Thread bailing
-Thread starter returning
+thread test starting
+testThreadCapacity thread count: 512
+testThreadDaemons starting thread 'TestDaemonThread'
+testThreadDaemons @ Thread running
+testThreadDaemons @ Got expected setDaemon exception
+testThreadDaemons @ Thread bailing
+testThreadDaemons finished
+testSleepZero finished
 thread test done
diff --git a/test/051-thread/src/Main.java b/test/051-thread/src/Main.java
index 911c739..608b7e0 100644
--- a/test/051-thread/src/Main.java
+++ b/test/051-thread/src/Main.java
@@ -21,50 +21,36 @@
  */
 public class Main {
     public static void main(String[] args) throws Exception {
-        System.out.println("Initializing System.out...");
-
-        MyThread[] threads = new MyThread[512];
-        for (int i = 0; i < 512; i++) {
-            threads[i] = new MyThread();
-        }
-
-        for (MyThread thread : threads) {
-            thread.start();
-        }
-        for (MyThread thread : threads) {
-            thread.join();
-        }
-
-        System.out.println("Thread count: " + MyThread.mCount);
-
-        go();
+        System.out.println("thread test starting");
+        testThreadCapacity();
+        testThreadDaemons();
+        testSleepZero();
         System.out.println("thread test done");
     }
 
-    public static void go() {
-        Thread t = new Thread(null, new ThreadTestSub(), "Thready", 7168);
-
-        t.setDaemon(false);
-
-        System.out.print("Starting thread '" + t.getName() + "'\n");
-        t.start();
-
-        try {
-            t.join();
-        } catch (InterruptedException ex) {
-            ex.printStackTrace();
-        }
-
-        System.out.print("Thread starter returning\n");
-    }
-
     /*
      * Simple thread capacity test.
      */
-    static class MyThread extends Thread {
+    private static void testThreadCapacity() throws Exception {
+        TestCapacityThread[] threads = new TestCapacityThread[512];
+        for (int i = 0; i < 512; i++) {
+            threads[i] = new TestCapacityThread();
+        }
+
+        for (TestCapacityThread thread : threads) {
+            thread.start();
+        }
+        for (TestCapacityThread thread : threads) {
+            thread.join();
+        }
+
+        System.out.println("testThreadCapacity thread count: " + TestCapacityThread.mCount);
+    }
+
+    private static class TestCapacityThread extends Thread {
         static int mCount = 0;
         public void run() {
-            synchronized (MyThread.class) {
+            synchronized (TestCapacityThread.class) {
                 ++mCount;
             }
             try {
@@ -73,29 +59,57 @@
             }
         }
     }
-}
 
-class ThreadTestSub implements Runnable {
-    public void run() {
-        System.out.print("@ Thread running\n");
+    private static void testThreadDaemons() {
+        Thread t = new Thread(null, new TestDaemonThread(), "TestDaemonThread", 7168);
+
+        t.setDaemon(false);
+
+        System.out.print("testThreadDaemons starting thread '" + t.getName() + "'\n");
+        t.start();
 
         try {
-            Thread.currentThread().setDaemon(true);
-            System.out.print("@ FAILED: setDaemon() succeeded\n");
-        } catch (IllegalThreadStateException itse) {
-            System.out.print("@ Got expected setDaemon exception\n");
+            t.join();
+        } catch (InterruptedException ex) {
+            ex.printStackTrace();
         }
 
-        //if (true)
-        //    throw new NullPointerException();
+        System.out.print("testThreadDaemons finished\n");
+    }
+
+    private static class TestDaemonThread implements Runnable {
+        public void run() {
+            System.out.print("testThreadDaemons @ Thread running\n");
+
+            try {
+                Thread.currentThread().setDaemon(true);
+                System.out.print("testThreadDaemons @ FAILED: setDaemon() succeeded\n");
+            } catch (IllegalThreadStateException itse) {
+                System.out.print("testThreadDaemons @ Got expected setDaemon exception\n");
+            }
+
+            try {
+                Thread.sleep(2000);
+            }
+            catch (InterruptedException ie) {
+                System.out.print("testThreadDaemons @ Interrupted!\n");
+            }
+            finally {
+                System.out.print("testThreadDaemons @ Thread bailing\n");
+            }
+        }
+    }
+
+    private static void testSleepZero() throws Exception {
+        Thread.currentThread().interrupt();
         try {
-            Thread.sleep(2000);
+            Thread.sleep(0);
+            throw new AssertionError("unreachable");
+        } catch (InterruptedException e) {
+            if (Thread.currentThread().isInterrupted()) {
+                throw new AssertionError("thread is interrupted");
+            }
         }
-        catch (InterruptedException ie) {
-            System.out.print("@ Interrupted!\n");
-        }
-        finally {
-            System.out.print("@ Thread bailing\n");
-        }
+        System.out.print("testSleepZero finished\n");
     }
 }
diff --git a/test/083-compiler-regressions/src/Main.java b/test/083-compiler-regressions/src/Main.java
index fb65471..3307e50 100644
--- a/test/083-compiler-regressions/src/Main.java
+++ b/test/083-compiler-regressions/src/Main.java
@@ -90,12 +90,25 @@
         foo.wideSetBar4(0,0,0,sum);
         sum += foo.wideGetBar5(1,2,3,4,5);
         foo.wideSetBar5(0,0,0,0,sum);
-        if (foo.wideGetBar0() == 39488) {
+        long result1 = foo.wideGetBar0();
+        long expected1 = 1234L << 5;
+        sum += foo.wideGetBar0();
+        foo.wideSetBar2i(0,sum);
+        sum += foo.wideGetBar0();
+        foo.wideSetBar3i(0,0,sum);
+        sum += foo.wideGetBar0();
+        foo.wideSetBar4i(0,0,0,sum);
+        sum += foo.wideGetBar0();
+        foo.wideSetBar5i(0,0,0,0,sum);
+        long result2 = foo.wideGetBar0();
+        long expected2 = 1234L << 9;
+        if (result1 == expected1 && result2 == expected2) {
             System.out.println("wideGetterSetterTest passes");
         }
         else {
             System.out.println("wideGetterSetterTest fails: " +
-                                foo.wideGetBar0() + " (expecting 39488)");
+                                "result1: " + result1 + " (expecting " + expected1 + "), " +
+                                "result2: " + result2 + " (expecting " + expected2 + ")");
         }
     }
 
@@ -8374,6 +8387,18 @@
     public void wideSetBar5(long a1, long a2, long a3, long a4, long a5) {
         lbar = a5;
     }
+    public void wideSetBar2i(int a1, long a2) {
+      lbar = a2;
+    }
+    public void wideSetBar3i(int a1, int a2, long a3) {
+        lbar = a3;
+    }
+    public void wideSetBar4i(int a1, int a2, int a3, long a4) {
+        lbar = a4;
+    }
+    public void wideSetBar5i(int a1, int a2, int a3, int a4, long a5) {
+        lbar = a5;
+    }
     public long wideGetBar0() {
         return lbar;
     }
diff --git a/test/Android.mk b/test/Android.mk
index d716f9b..4d47651 100644
--- a/test/Android.mk
+++ b/test/Android.mk
@@ -50,7 +50,8 @@
 	ParallelGC \
 	ReferenceMap \
 	StackWalk \
-	ThreadStress
+	ThreadStress \
+	UnsafeTest
 
 # TODO: Enable when the StackWalk2 tests are passing
 #	StackWalk2 \
diff --git a/test/UnsafeTest/UnsafeTest.java b/test/UnsafeTest/UnsafeTest.java
new file mode 100644
index 0000000..f3d52896
--- /dev/null
+++ b/test/UnsafeTest/UnsafeTest.java
@@ -0,0 +1,78 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Field;
+import sun.misc.Unsafe;
+
+public class UnsafeTest {
+  static {
+    System.loadLibrary("arttest");
+  }
+
+  private static void check(int actual, int expected, String msg) {
+    if (actual != expected) {
+      System.logE(msg + " : " + actual + " != " + expected);
+      System.exit(-1);
+    }
+  }
+
+  private static Unsafe getUnsafe() throws Exception {
+    Class<?> unsafeClass = Class.forName("sun.misc.Unsafe");
+    Field f = unsafeClass.getDeclaredField("theUnsafe");
+    f.setAccessible(true);
+    return (Unsafe) f.get(null);
+  }
+
+  public static void main(String[] args) throws Exception {
+    Unsafe unsafe = getUnsafe();
+    check(unsafe.arrayBaseOffset(boolean[].class), vmArrayBaseOffset(boolean[].class),
+        "Unsafe.arrayBaseOffset(boolean[])");
+    check(unsafe.arrayBaseOffset(byte[].class), vmArrayBaseOffset(byte[].class),
+        "Unsafe.arrayBaseOffset(byte[])");
+    check(unsafe.arrayBaseOffset(char[].class), vmArrayBaseOffset(char[].class),
+        "Unsafe.arrayBaseOffset(char[])");
+    check(unsafe.arrayBaseOffset(double[].class), vmArrayBaseOffset(double[].class),
+        "Unsafe.arrayBaseOffset(double[])");
+    check(unsafe.arrayBaseOffset(float[].class), vmArrayBaseOffset(float[].class),
+        "Unsafe.arrayBaseOffset(float[])");
+    check(unsafe.arrayBaseOffset(int[].class), vmArrayBaseOffset(int[].class),
+        "Unsafe.arrayBaseOffset(int[])");
+    check(unsafe.arrayBaseOffset(long[].class), vmArrayBaseOffset(long[].class),
+        "Unsafe.arrayBaseOffset(long[])");
+    check(unsafe.arrayBaseOffset(Object[].class), vmArrayBaseOffset(Object[].class),
+        "Unsafe.arrayBaseOffset(Object[])");
+
+    check(unsafe.arrayIndexScale(boolean[].class), vmArrayIndexScale(boolean[].class),
+        "Unsafe.arrayIndexScale(boolean[])");
+    check(unsafe.arrayIndexScale(byte[].class), vmArrayIndexScale(byte[].class),
+        "Unsafe.arrayIndexScale(byte[])");
+    check(unsafe.arrayIndexScale(char[].class), vmArrayIndexScale(char[].class),
+        "Unsafe.arrayIndexScale(char[])");
+    check(unsafe.arrayIndexScale(double[].class), vmArrayIndexScale(double[].class),
+        "Unsafe.arrayIndexScale(double[])");
+    check(unsafe.arrayIndexScale(float[].class), vmArrayIndexScale(float[].class),
+        "Unsafe.arrayIndexScale(float[])");
+    check(unsafe.arrayIndexScale(int[].class), vmArrayIndexScale(int[].class),
+        "Unsafe.arrayIndexScale(int[])");
+    check(unsafe.arrayIndexScale(long[].class), vmArrayIndexScale(long[].class),
+        "Unsafe.arrayIndexScale(long[])");
+    check(unsafe.arrayIndexScale(Object[].class), vmArrayIndexScale(Object[].class),
+        "Unsafe.arrayIndexScale(Object[])");
+  }
+
+  private static native int vmArrayBaseOffset(Class clazz);
+  private static native int vmArrayIndexScale(Class clazz);
+}
diff --git a/test/UnsafeTest/unsafe_test.cc b/test/UnsafeTest/unsafe_test.cc
new file mode 100644
index 0000000..e36ee14
--- /dev/null
+++ b/test/UnsafeTest/unsafe_test.cc
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "jni.h"
+#include "mirror/array.h"
+#include "mirror/art_method-inl.h"
+#include "mirror/class.h"
+#include "mirror/class-inl.h"
+#include "mirror/object-inl.h"
+#include "scoped_thread_state_change.h"
+
+namespace art {
+
+extern "C" JNIEXPORT jint JNICALL Java_UnsafeTest_vmArrayBaseOffset(JNIEnv* env, jclass, jobject classObj) {
+  ScopedObjectAccess soa(env);
+  mirror::Class* klass = soa.Decode<mirror::Class*>(classObj);
+  return mirror::Array::DataOffset(
+      Primitive::ComponentSize(klass->GetComponentType()->GetPrimitiveType())).Int32Value();
+}
+
+extern "C" JNIEXPORT jint JNICALL Java_UnsafeTest_vmArrayIndexScale(JNIEnv* env, jclass, jobject classObj) {
+  ScopedObjectAccess soa(env);
+  mirror::Class* klass = soa.Decode<mirror::Class*>(classObj);
+  return Primitive::ComponentSize(klass->GetComponentType()->GetPrimitiveType());
+}
+
+}  // namespace art